Source code for tseda.dataloader.synthetic_series_data_loader
"""Synthetic multi-periodic time-series generator used for testing and demos."""
from .local_dataloader import LocalDataLoader
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
from scipy.stats import norm
from math import ceil
[docs]
class SyntheticSeriesDataLoader(LocalDataLoader):
"""Generate a deterministic synthetic time series with two superimposed periodicities."""
[docs]
def __init__(self, file_path: str = "data/synthetic_series.csv"):
"""Initialize with the synthetic series CSV path (unused; series is generated in memory).
Args:
file_path: Placeholder path; the series is always generated programmatically.
"""
super().__init__(file_path)
[docs]
def get_series(self) -> pd.Series:
"""Generate and return a synthetic multi-periodic signal series.
Returns:
Numeric series indexed by hourly timestamps. Returns an empty float
series only if generation fails.
"""
now = datetime.now()
p1_peak = 5
p2_peak = 7
p1 = 6
p2 = 10
N = ceil(24*90/1) # 90 days of hourly data
time_idx = [float(i) for i in range(N)]
p1_omega = [(2*np.pi* t)/(p1) for t in time_idx]
p2_omega = [(2*np.pi* t)/(p2) for t in time_idx]
p1_vals = [p1_peak*np.sin(w) for w in p1_omega]
p2_vals = [p2_peak*np.sin(w) for w in p2_omega]
noise = norm.rvs(loc=0, scale=0.5, size=N)
level = [20 for _ in range(N)]
signal = np.array(level) + np.array(p1_vals) + np.array(p2_vals) + np.array(noise)
time_vals = [now + timedelta(hours=i) for i in range(N)]
data = {"time": time_vals, "signal": signal}
df = pd.DataFrame.from_dict(data)
df.index = df.time
if not df.empty:
series = df["signal"]
series.index = df.time
return series
else:
print("No data available to extract series.")
return pd.Series(dtype=float)