quantrs2_ml/time_series/
utils.rs

1//! Utility functions and synthetic data generation for time series
2
3use crate::error::{MLError, Result};
4use scirs2_core::ndarray::{Array1, Array2};
5use serde::{Deserialize, Serialize};
6use std::f64::consts::PI;
7
8/// Generate synthetic time series data for testing and benchmarking
9pub fn generate_synthetic_time_series(
10    length: usize,
11    seasonality: Option<usize>,
12    trend: f64,
13    noise: f64,
14) -> Array2<f64> {
15    let mut data = Array2::zeros((length, 1));
16
17    for i in 0..length {
18        let t = i as f64;
19
20        // Trend component
21        let trend_val = trend * t;
22
23        // Seasonal component
24        let seasonal_val = if let Some(period) = seasonality {
25            10.0 * (2.0 * PI * t / period as f64).sin()
26        } else {
27            0.0
28        };
29
30        // Noise component
31        let noise_val = noise * (fastrand::f64() - 0.5);
32
33        data[[i, 0]] = 50.0 + trend_val + seasonal_val + noise_val;
34    }
35
36    data
37}
38
39/// Generate complex synthetic time series with multiple components
40pub fn generate_complex_time_series(
41    length: usize,
42    config: SyntheticDataConfig,
43) -> Result<Array2<f64>> {
44    let mut data = Array2::zeros((length, config.num_features));
45
46    for feature_idx in 0..config.num_features {
47        for i in 0..length {
48            let t = i as f64;
49            let mut value = config.base_level;
50
51            // Add trend components
52            for trend in &config.trends {
53                value += trend.apply(t);
54            }
55
56            // Add seasonal components
57            for seasonal in &config.seasonalities {
58                value += seasonal.apply(t);
59            }
60
61            // Add noise
62            value += config.noise_level * (fastrand::f64() - 0.5);
63
64            // Add feature-specific variation
65            let feature_factor = 1.0 + 0.1 * feature_idx as f64;
66            value *= feature_factor;
67
68            data[[i, feature_idx]] = value;
69        }
70    }
71
72    // Add correlations between features if specified
73    if config.feature_correlations {
74        data = add_feature_correlations(data, 0.3)?;
75    }
76
77    // Add outliers if specified
78    if config.outlier_probability > 0.0 {
79        data = add_outliers(data, config.outlier_probability, config.outlier_magnitude)?;
80    }
81
82    Ok(data)
83}
84
85/// Configuration for synthetic data generation
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct SyntheticDataConfig {
88    /// Number of features/variables
89    pub num_features: usize,
90
91    /// Base level of the time series
92    pub base_level: f64,
93
94    /// Trend components
95    pub trends: Vec<TrendComponent>,
96
97    /// Seasonal components
98    pub seasonalities: Vec<SeasonalComponent>,
99
100    /// Noise level
101    pub noise_level: f64,
102
103    /// Add correlations between features
104    pub feature_correlations: bool,
105
106    /// Probability of outliers
107    pub outlier_probability: f64,
108
109    /// Magnitude of outliers
110    pub outlier_magnitude: f64,
111}
112
113/// Trend component specification
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct TrendComponent {
116    /// Type of trend
117    pub trend_type: TrendType,
118
119    /// Magnitude/coefficient
120    pub magnitude: f64,
121
122    /// Additional parameters
123    pub parameters: Vec<f64>,
124}
125
126/// Types of trends
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub enum TrendType {
129    Linear,
130    Quadratic,
131    Exponential,
132    Logarithmic,
133    Sinusoidal,
134    Custom(String),
135}
136
137/// Seasonal component specification
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct SeasonalComponent {
140    /// Seasonal period
141    pub period: usize,
142
143    /// Amplitude
144    pub amplitude: f64,
145
146    /// Phase shift
147    pub phase: f64,
148
149    /// Seasonal pattern type
150    pub pattern_type: SeasonalPattern,
151}
152
153/// Types of seasonal patterns
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub enum SeasonalPattern {
156    Sinusoidal,
157    Triangular,
158    Square,
159    Sawtooth,
160    Custom(Vec<f64>),
161}
162
163impl TrendComponent {
164    /// Apply trend component at time t
165    pub fn apply(&self, t: f64) -> f64 {
166        match &self.trend_type {
167            TrendType::Linear => self.magnitude * t,
168            TrendType::Quadratic => self.magnitude * t * t,
169            TrendType::Exponential => {
170                let rate = self.parameters.get(0).copied().unwrap_or(0.01);
171                self.magnitude * (rate * t).exp()
172            }
173            TrendType::Logarithmic => {
174                if t > 0.0 {
175                    self.magnitude * t.ln()
176                } else {
177                    0.0
178                }
179            }
180            TrendType::Sinusoidal => {
181                let frequency = self.parameters.get(0).copied().unwrap_or(0.1);
182                self.magnitude * (2.0 * PI * frequency * t).sin()
183            }
184            TrendType::Custom(_) => {
185                // Placeholder for custom trend functions
186                self.magnitude * t
187            }
188        }
189    }
190}
191
192impl SeasonalComponent {
193    /// Apply seasonal component at time t
194    pub fn apply(&self, t: f64) -> f64 {
195        let normalized_time = (t % self.period as f64) / self.period as f64;
196        let phase_adjusted_time = (normalized_time + self.phase / (2.0 * PI)) % 1.0;
197
198        match &self.pattern_type {
199            SeasonalPattern::Sinusoidal => self.amplitude * (2.0 * PI * phase_adjusted_time).sin(),
200            SeasonalPattern::Triangular => {
201                let triangle_val = if phase_adjusted_time < 0.5 {
202                    4.0 * phase_adjusted_time - 1.0
203                } else {
204                    3.0 - 4.0 * phase_adjusted_time
205                };
206                self.amplitude * triangle_val
207            }
208            SeasonalPattern::Square => {
209                let square_val = if phase_adjusted_time < 0.5 { 1.0 } else { -1.0 };
210                self.amplitude * square_val
211            }
212            SeasonalPattern::Sawtooth => self.amplitude * (2.0 * phase_adjusted_time - 1.0),
213            SeasonalPattern::Custom(pattern) => {
214                if pattern.is_empty() {
215                    0.0
216                } else {
217                    let index = (phase_adjusted_time * pattern.len() as f64) as usize;
218                    let index = index.min(pattern.len() - 1);
219                    self.amplitude * pattern[index]
220                }
221            }
222        }
223    }
224}
225
226/// Add correlations between features
227fn add_feature_correlations(
228    mut data: Array2<f64>,
229    correlation_strength: f64,
230) -> Result<Array2<f64>> {
231    let (n_samples, n_features) = data.dim();
232
233    if n_features < 2 {
234        return Ok(data);
235    }
236
237    // Add correlation by blending features
238    for i in 1..n_features {
239        for j in 0..n_samples {
240            let original_value = data[[j, i]];
241            let correlated_value = data[[j, i - 1]];
242
243            data[[j, i]] = original_value * (1.0 - correlation_strength)
244                + correlated_value * correlation_strength;
245        }
246    }
247
248    Ok(data)
249}
250
251/// Add outliers to the data
252fn add_outliers(mut data: Array2<f64>, probability: f64, magnitude: f64) -> Result<Array2<f64>> {
253    let (n_samples, n_features) = data.dim();
254
255    for i in 0..n_samples {
256        for j in 0..n_features {
257            if fastrand::f64() < probability {
258                let outlier_direction = if fastrand::bool() { 1.0 } else { -1.0 };
259                data[[i, j]] += outlier_direction * magnitude;
260            }
261        }
262    }
263
264    Ok(data)
265}
266
267/// Data preprocessing utilities
268pub struct DataPreprocessor;
269
270impl DataPreprocessor {
271    /// Normalize data to zero mean and unit variance
272    pub fn standardize(data: &Array2<f64>) -> Result<(Array2<f64>, Array1<f64>, Array1<f64>)> {
273        let (n_samples, n_features) = data.dim();
274        let mut normalized = data.clone();
275        let mut means = Array1::zeros(n_features);
276        let mut stds = Array1::zeros(n_features);
277
278        for j in 0..n_features {
279            let column = data.column(j);
280            let mean = column.mean().unwrap_or(0.0);
281            let std = column.std(1.0).max(1e-8); // Avoid division by zero
282
283            means[j] = mean;
284            stds[j] = std;
285
286            for i in 0..n_samples {
287                normalized[[i, j]] = (data[[i, j]] - mean) / std;
288            }
289        }
290
291        Ok((normalized, means, stds))
292    }
293
294    /// Normalize data to [0, 1] range
295    pub fn min_max_scale(data: &Array2<f64>) -> Result<(Array2<f64>, Array1<f64>, Array1<f64>)> {
296        let (n_samples, n_features) = data.dim();
297        let mut scaled = data.clone();
298        let mut mins = Array1::zeros(n_features);
299        let mut ranges = Array1::zeros(n_features);
300
301        for j in 0..n_features {
302            let column = data.column(j);
303            let min_val = column.iter().fold(f64::INFINITY, |a, &b| a.min(b));
304            let max_val = column.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
305            let range = (max_val - min_val).max(1e-8); // Avoid division by zero
306
307            mins[j] = min_val;
308            ranges[j] = range;
309
310            for i in 0..n_samples {
311                scaled[[i, j]] = (data[[i, j]] - min_val) / range;
312            }
313        }
314
315        Ok((scaled, mins, ranges))
316    }
317
318    /// Remove outliers using IQR method
319    pub fn remove_outliers(data: &Array2<f64>, iqr_multiplier: f64) -> Result<Array2<f64>> {
320        let (n_samples, n_features) = data.dim();
321        let mut cleaned_data = Vec::new();
322
323        for i in 0..n_samples {
324            let mut is_outlier = false;
325
326            for j in 0..n_features {
327                let column = data.column(j);
328                let mut sorted_values: Vec<f64> = column.iter().cloned().collect();
329                sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
330
331                let q1_idx = sorted_values.len() / 4;
332                let q3_idx = 3 * sorted_values.len() / 4;
333                let q1 = sorted_values[q1_idx];
334                let q3 = sorted_values[q3_idx];
335                let iqr = q3 - q1;
336
337                let lower_bound = q1 - iqr_multiplier * iqr;
338                let upper_bound = q3 + iqr_multiplier * iqr;
339
340                if data[[i, j]] < lower_bound || data[[i, j]] > upper_bound {
341                    is_outlier = true;
342                    break;
343                }
344            }
345
346            if !is_outlier {
347                cleaned_data.push(data.row(i).to_owned());
348            }
349        }
350
351        if cleaned_data.is_empty() {
352            return Err(MLError::DataError(
353                "All data points were classified as outliers".to_string(),
354            ));
355        }
356
357        let cleaned_samples = cleaned_data.len();
358        let mut cleaned_array = Array2::zeros((cleaned_samples, n_features));
359
360        for (i, row) in cleaned_data.iter().enumerate() {
361            cleaned_array.row_mut(i).assign(row);
362        }
363
364        Ok(cleaned_array)
365    }
366
367    /// Interpolate missing values
368    pub fn interpolate_missing(data: &Array2<f64>) -> Result<Array2<f64>> {
369        let mut interpolated = data.clone();
370        let (n_samples, n_features) = data.dim();
371
372        for j in 0..n_features {
373            for i in 0..n_samples {
374                if !data[[i, j]].is_finite() {
375                    // Linear interpolation between neighboring valid points
376                    let mut left_val = None;
377                    let mut right_val = None;
378
379                    // Find left neighbor
380                    for k in (0..i).rev() {
381                        if data[[k, j]].is_finite() {
382                            left_val = Some((k, data[[k, j]]));
383                            break;
384                        }
385                    }
386
387                    // Find right neighbor
388                    for k in (i + 1)..n_samples {
389                        if data[[k, j]].is_finite() {
390                            right_val = Some((k, data[[k, j]]));
391                            break;
392                        }
393                    }
394
395                    // Interpolate
396                    let interpolated_value = match (left_val, right_val) {
397                        (Some((left_idx, left_val)), Some((right_idx, right_val))) => {
398                            let weight = (i - left_idx) as f64 / (right_idx - left_idx) as f64;
399                            left_val + weight * (right_val - left_val)
400                        }
401                        (Some((_, left_val)), None) => left_val,
402                        (None, Some((_, right_val))) => right_val,
403                        (None, None) => 0.0, // No valid neighbors found
404                    };
405
406                    interpolated[[i, j]] = interpolated_value;
407                }
408            }
409        }
410
411        Ok(interpolated)
412    }
413}
414
415/// Time series analysis utilities
416pub struct TimeSeriesAnalyzer;
417
418impl TimeSeriesAnalyzer {
419    /// Detect stationarity using Augmented Dickey-Fuller test (simplified)
420    pub fn is_stationary(data: &Array1<f64>) -> bool {
421        // Simplified stationarity test based on variance of differences
422        if data.len() < 10 {
423            return false;
424        }
425
426        let mut differences = Array1::zeros(data.len() - 1);
427        for i in 1..data.len() {
428            differences[i - 1] = data[i] - data[i - 1];
429        }
430
431        let original_var = data.var(1.0);
432        let diff_var = differences.var(1.0);
433
434        // If variance of differences is much smaller, likely stationary
435        diff_var < 0.8 * original_var
436    }
437
438    /// Compute autocorrelation function
439    pub fn autocorrelation(data: &Array1<f64>, max_lag: usize) -> Array1<f64> {
440        let n = data.len();
441        let max_lag = max_lag.min(n / 2);
442        let mut autocorr = Array1::zeros(max_lag + 1);
443
444        let mean = data.mean().unwrap_or(0.0);
445        let variance = data.var(1.0);
446
447        for lag in 0..=max_lag {
448            let mut sum = 0.0;
449            let mut count = 0;
450
451            for t in lag..n {
452                sum += (data[t] - mean) * (data[t - lag] - mean);
453                count += 1;
454            }
455
456            if count > 0 && variance > 1e-10 {
457                autocorr[lag] = sum / (count as f64 * variance);
458            }
459        }
460
461        autocorr
462    }
463
464    /// Detect seasonal periods using autocorrelation
465    pub fn detect_seasonality(data: &Array1<f64>, max_period: usize) -> Vec<usize> {
466        let autocorr = Self::autocorrelation(data, max_period);
467        let mut seasonal_periods = Vec::new();
468
469        // Find peaks in autocorrelation function
470        for lag in 2..autocorr.len() {
471            if lag < autocorr.len() - 1 {
472                let current = autocorr[lag];
473                let prev = autocorr[lag - 1];
474                let next = autocorr[lag + 1];
475
476                // Check if current value is a local maximum above threshold
477                if current > prev && current > next && current > 0.3 {
478                    seasonal_periods.push(lag);
479                }
480            }
481        }
482
483        seasonal_periods
484    }
485
486    /// Estimate trend using simple linear regression
487    pub fn estimate_trend(data: &Array1<f64>) -> (f64, f64) {
488        let n = data.len() as f64;
489
490        let mut sum_x = 0.0;
491        let mut sum_y = 0.0;
492        let mut sum_xy = 0.0;
493        let mut sum_x2 = 0.0;
494
495        for (i, &y) in data.iter().enumerate() {
496            let x = i as f64;
497            sum_x += x;
498            sum_y += y;
499            sum_xy += x * y;
500            sum_x2 += x * x;
501        }
502
503        let denominator = n * sum_x2 - sum_x * sum_x;
504
505        if denominator.abs() > 1e-10 {
506            let slope = (n * sum_xy - sum_x * sum_y) / denominator;
507            let intercept = (sum_y - slope * sum_x) / n;
508            (slope, intercept)
509        } else {
510            (0.0, sum_y / n)
511        }
512    }
513}
514
515/// Default configurations for common scenarios
516impl Default for SyntheticDataConfig {
517    fn default() -> Self {
518        Self {
519            num_features: 1,
520            base_level: 100.0,
521            trends: vec![TrendComponent {
522                trend_type: TrendType::Linear,
523                magnitude: 0.1,
524                parameters: Vec::new(),
525            }],
526            seasonalities: vec![SeasonalComponent {
527                period: 12,
528                amplitude: 10.0,
529                phase: 0.0,
530                pattern_type: SeasonalPattern::Sinusoidal,
531            }],
532            noise_level: 5.0,
533            feature_correlations: false,
534            outlier_probability: 0.02,
535            outlier_magnitude: 20.0,
536        }
537    }
538}
539
540impl SyntheticDataConfig {
541    /// Configuration for financial time series
542    pub fn financial() -> Self {
543        Self {
544            num_features: 5, // OHLCV
545            base_level: 100.0,
546            trends: vec![
547                TrendComponent {
548                    trend_type: TrendType::Linear,
549                    magnitude: 0.05,
550                    parameters: Vec::new(),
551                },
552                TrendComponent {
553                    trend_type: TrendType::Sinusoidal,
554                    magnitude: 2.0,
555                    parameters: vec![0.02], // Low frequency trend
556                },
557            ],
558            seasonalities: vec![
559                SeasonalComponent {
560                    period: 5, // Weekly seasonality
561                    amplitude: 3.0,
562                    phase: 0.0,
563                    pattern_type: SeasonalPattern::Sinusoidal,
564                },
565                SeasonalComponent {
566                    period: 21, // Monthly seasonality
567                    amplitude: 1.5,
568                    phase: PI / 4.0,
569                    pattern_type: SeasonalPattern::Sinusoidal,
570                },
571            ],
572            noise_level: 2.0,
573            feature_correlations: true,
574            outlier_probability: 0.05,
575            outlier_magnitude: 10.0,
576        }
577    }
578
579    /// Configuration for IoT sensor data
580    pub fn iot_sensor() -> Self {
581        Self {
582            num_features: 3, // Temperature, humidity, pressure
583            base_level: 25.0,
584            trends: vec![TrendComponent {
585                trend_type: TrendType::Sinusoidal,
586                magnitude: 5.0,
587                parameters: vec![1.0 / 24.0], // Daily cycle
588            }],
589            seasonalities: vec![
590                SeasonalComponent {
591                    period: 24, // Hourly readings, daily pattern
592                    amplitude: 8.0,
593                    phase: 0.0,
594                    pattern_type: SeasonalPattern::Sinusoidal,
595                },
596                SeasonalComponent {
597                    period: 168, // Weekly pattern
598                    amplitude: 3.0,
599                    phase: PI / 3.0,
600                    pattern_type: SeasonalPattern::Sinusoidal,
601                },
602            ],
603            noise_level: 1.0,
604            feature_correlations: true,
605            outlier_probability: 0.01,
606            outlier_magnitude: 15.0,
607        }
608    }
609
610    /// Configuration for demand forecasting
611    pub fn demand() -> Self {
612        Self {
613            num_features: 1,
614            base_level: 1000.0,
615            trends: vec![TrendComponent {
616                trend_type: TrendType::Linear,
617                magnitude: 2.0,
618                parameters: Vec::new(),
619            }],
620            seasonalities: vec![
621                SeasonalComponent {
622                    period: 7, // Weekly seasonality
623                    amplitude: 200.0,
624                    phase: 0.0,
625                    pattern_type: SeasonalPattern::Sinusoidal,
626                },
627                SeasonalComponent {
628                    period: 365, // Yearly seasonality
629                    amplitude: 500.0,
630                    phase: PI / 2.0,
631                    pattern_type: SeasonalPattern::Sinusoidal,
632                },
633            ],
634            noise_level: 50.0,
635            feature_correlations: false,
636            outlier_probability: 0.03,
637            outlier_magnitude: 300.0,
638        }
639    }
640}