ghostflow_ml/
time_series_extended.rs

1//! Extended Time Series - SARIMA, STL Decomposition, ACF/PACF
2
3use ghostflow_core::Tensor;
4
5/// SARIMA - Seasonal ARIMA
6/// SARIMA(p, d, q)(P, D, Q, s)
7pub struct SARIMA {
8    pub p: usize,  // AR order
9    pub d: usize,  // Differencing order
10    pub q: usize,  // MA order
11    pub seasonal_p: usize,  // Seasonal AR order
12    pub seasonal_d: usize,  // Seasonal differencing order
13    pub seasonal_q: usize,  // Seasonal MA order
14    pub seasonal_period: usize,  // Seasonal period (s)
15    pub max_iter: usize,
16    ar_coef_: Option<Vec<f32>>,
17    ma_coef_: Option<Vec<f32>>,
18    seasonal_ar_coef_: Option<Vec<f32>>,
19    seasonal_ma_coef_: Option<Vec<f32>>,
20    intercept_: f32,
21    diff_values_: Option<Vec<f32>>,
22}
23
24impl SARIMA {
25    pub fn new(p: usize, d: usize, q: usize, seasonal_p: usize, seasonal_d: usize, 
26               seasonal_q: usize, seasonal_period: usize) -> Self {
27        SARIMA {
28            p, d, q,
29            seasonal_p, seasonal_d, seasonal_q, seasonal_period,
30            max_iter: 100,
31            ar_coef_: None,
32            ma_coef_: None,
33            seasonal_ar_coef_: None,
34            seasonal_ma_coef_: None,
35            intercept_: 0.0,
36            diff_values_: None,
37        }
38    }
39
40    fn difference(y: &[f32], d: usize) -> Vec<f32> {
41        let mut result = y.to_vec();
42        for _ in 0..d {
43            let diff: Vec<f32> = result.windows(2).map(|w| w[1] - w[0]).collect();
44            result = diff;
45        }
46        result
47    }
48
49    fn seasonal_difference(y: &[f32], d: usize, period: usize) -> Vec<f32> {
50        let mut result = y.to_vec();
51        for _ in 0..d {
52            if result.len() <= period {
53                break;
54            }
55            let diff: Vec<f32> = (period..result.len())
56                .map(|i| result[i] - result[i - period])
57                .collect();
58            result = diff;
59        }
60        result
61    }
62
63    fn compute_autocorrelation(y: &[f32], lag: usize) -> f32 {
64        let n = y.len();
65        if lag >= n { return 0.0; }
66        
67        let mean = y.iter().sum::<f32>() / n as f32;
68        let var: f32 = y.iter().map(|&v| (v - mean).powi(2)).sum();
69        
70        if var < 1e-10 { return 0.0; }
71        
72        let cov: f32 = (0..n - lag)
73            .map(|i| (y[i] - mean) * (y[i + lag] - mean))
74            .sum();
75        
76        cov / var
77    }
78
79    pub fn fit(&mut self, y: &Tensor) {
80        let y_data = y.data_f32();
81        self.diff_values_ = Some(y_data.clone());
82
83        // Apply regular differencing
84        let mut y_diff = Self::difference(&y_data, self.d);
85        
86        // Apply seasonal differencing
87        y_diff = Self::seasonal_difference(&y_diff, self.seasonal_d, self.seasonal_period);
88
89        if y_diff.len() <= self.p + self.seasonal_p * self.seasonal_period {
90            panic!("Not enough data after differencing");
91        }
92
93        let n = y_diff.len();
94        let mean = y_diff.iter().sum::<f32>() / n as f32;
95        let y_centered: Vec<f32> = y_diff.iter().map(|&v| v - mean).collect();
96
97        // Fit AR coefficients using Yule-Walker
98        if self.p > 0 {
99            let mut r = vec![0.0f32; self.p + 1];
100            for k in 0..=self.p {
101                r[k] = Self::compute_autocorrelation(&y_centered, k);
102            }
103
104            let mut toeplitz = vec![0.0f32; self.p * self.p];
105            for i in 0..self.p {
106                for j in 0..self.p {
107                    toeplitz[i * self.p + j] = r[(i as i32 - j as i32).unsigned_abs() as usize];
108                }
109            }
110
111            let rhs: Vec<f32> = r[1..=self.p].to_vec();
112            self.ar_coef_ = Some(solve_linear(&toeplitz, &rhs, self.p));
113        } else {
114            self.ar_coef_ = Some(vec![]);
115        }
116
117        // Fit seasonal AR coefficients
118        if self.seasonal_p > 0 {
119            let mut r = vec![0.0f32; self.seasonal_p + 1];
120            for k in 0..=self.seasonal_p {
121                r[k] = Self::compute_autocorrelation(&y_centered, k * self.seasonal_period);
122            }
123
124            let mut toeplitz = vec![0.0f32; self.seasonal_p * self.seasonal_p];
125            for i in 0..self.seasonal_p {
126                for j in 0..self.seasonal_p {
127                    toeplitz[i * self.seasonal_p + j] = r[(i as i32 - j as i32).unsigned_abs() as usize];
128                }
129            }
130
131            let rhs: Vec<f32> = r[1..=self.seasonal_p].to_vec();
132            self.seasonal_ar_coef_ = Some(solve_linear(&toeplitz, &rhs, self.seasonal_p));
133        } else {
134            self.seasonal_ar_coef_ = Some(vec![]);
135        }
136
137        // Initialize MA coefficients (simplified)
138        self.ma_coef_ = Some(vec![0.0; self.q]);
139        self.seasonal_ma_coef_ = Some(vec![0.0; self.seasonal_q]);
140        self.intercept_ = mean;
141    }
142
143    pub fn predict(&self, steps: usize) -> Tensor {
144        let ar_coef = self.ar_coef_.as_ref().expect("Model not fitted");
145        let seasonal_ar_coef = self.seasonal_ar_coef_.as_ref().unwrap();
146        let diff_values = self.diff_values_.as_ref().unwrap();
147
148        // Get differenced series
149        let mut y_diff = Self::difference(diff_values, self.d);
150        y_diff = Self::seasonal_difference(&y_diff, self.seasonal_d, self.seasonal_period);
151
152        let _n = y_diff.len();
153        let mut history = y_diff.clone();
154        let mut predictions = Vec::with_capacity(steps);
155
156        for _ in 0..steps {
157            let mut pred = self.intercept_;
158
159            // AR component
160            for (i, &coef) in ar_coef.iter().enumerate() {
161                if i < history.len() {
162                    pred += coef * history[history.len() - 1 - i];
163                }
164            }
165
166            // Seasonal AR component
167            for (i, &coef) in seasonal_ar_coef.iter().enumerate() {
168                let lag = (i + 1) * self.seasonal_period;
169                if lag <= history.len() {
170                    pred += coef * history[history.len() - lag];
171                }
172            }
173
174            predictions.push(pred);
175            history.push(pred);
176        }
177
178        // Inverse differencing
179        let final_pred = self.inverse_difference(&predictions, diff_values);
180
181        Tensor::from_slice(&final_pred, &[steps]).unwrap()
182    }
183
184    fn inverse_difference(&self, predictions: &[f32], original: &[f32]) -> Vec<f32> {
185        let mut result = predictions.to_vec();
186
187        // Inverse seasonal differencing
188        if self.seasonal_d > 0 {
189            let n_orig = original.len();
190            for _ in 0..self.seasonal_d {
191                let mut undiff = Vec::new();
192                for (i, &pred) in result.iter().enumerate() {
193                    let base_idx = n_orig - self.seasonal_period + i % self.seasonal_period;
194                    if base_idx < n_orig {
195                        undiff.push(pred + original[base_idx]);
196                    } else {
197                        undiff.push(pred);
198                    }
199                }
200                result = undiff;
201            }
202        }
203
204        // Inverse regular differencing
205        if self.d > 0 {
206            let last_val = original[original.len() - 1];
207            let mut undiff = vec![last_val];
208            for &pred in &result {
209                undiff.push(undiff.last().unwrap() + pred);
210            }
211            result = undiff[1..].to_vec();
212        }
213
214        result
215    }
216}
217
218/// STL Decomposition - Seasonal and Trend decomposition using Loess
219pub struct STLDecomposition {
220    pub period: usize,
221    pub seasonal_deg: usize,
222    pub trend_deg: usize,
223    pub robust: bool,
224    pub n_iter: usize,
225    trend_: Option<Vec<f32>>,
226    seasonal_: Option<Vec<f32>>,
227    residual_: Option<Vec<f32>>,
228}
229
230impl STLDecomposition {
231    pub fn new(period: usize) -> Self {
232        STLDecomposition {
233            period,
234            seasonal_deg: 1,
235            trend_deg: 1,
236            robust: false,
237            n_iter: 2,
238            trend_: None,
239            seasonal_: None,
240            residual_: None,
241        }
242    }
243
244    pub fn robust(mut self, r: bool) -> Self { self.robust = r; self }
245
246    fn loess_smooth(&self, y: &[f32], weights: &[f32], bandwidth: usize) -> Vec<f32> {
247        let n = y.len();
248        let mut smoothed = vec![0.0f32; n];
249
250        for i in 0..n {
251            let start = i.saturating_sub(bandwidth / 2);
252            let end = (i + bandwidth / 2 + 1).min(n);
253
254            let mut sum_w = 0.0f32;
255            let mut sum_wy = 0.0f32;
256
257            for j in start..end {
258                let dist = (i as f32 - j as f32).abs() / (bandwidth as f32 / 2.0);
259                let tricube = if dist < 1.0 { (1.0 - dist.powi(3)).powi(3) } else { 0.0 };
260                let w = tricube * weights[j];
261                sum_w += w;
262                sum_wy += w * y[j];
263            }
264
265            smoothed[i] = if sum_w > 1e-10 { sum_wy / sum_w } else { y[i] };
266        }
267
268        smoothed
269    }
270
271    pub fn fit(&mut self, y: &Tensor) {
272        let y_data = y.data_f32();
273        let n = y_data.len();
274
275        let mut trend = vec![0.0f32; n];
276        let mut seasonal = vec![0.0f32; n];
277        let mut weights = vec![1.0f32; n];
278
279        for _ in 0..self.n_iter {
280            // Step 1: Detrend
281            let detrended: Vec<f32> = y_data.iter().zip(trend.iter())
282                .map(|(&y, &t)| y - t)
283                .collect();
284
285            // Step 2: Compute seasonal component
286            // Average by season
287            let mut seasonal_means = vec![0.0f32; self.period];
288            let mut seasonal_counts = vec![0usize; self.period];
289
290            for (i, &val) in detrended.iter().enumerate() {
291                let season = i % self.period;
292                seasonal_means[season] += val;
293                seasonal_counts[season] += 1;
294            }
295
296            for i in 0..self.period {
297                if seasonal_counts[i] > 0 {
298                    seasonal_means[i] /= seasonal_counts[i] as f32;
299                }
300            }
301
302            // Center seasonal component
303            let seasonal_mean: f32 = seasonal_means.iter().sum::<f32>() / self.period as f32;
304            for s in &mut seasonal_means {
305                *s -= seasonal_mean;
306            }
307
308            // Extend seasonal to full length
309            for i in 0..n {
310                seasonal[i] = seasonal_means[i % self.period];
311            }
312
313            // Step 3: Deseasonalize and compute trend
314            let deseasonalized: Vec<f32> = y_data.iter().zip(seasonal.iter())
315                .map(|(&y, &s)| y - s)
316                .collect();
317
318            // Smooth to get trend
319            let trend_bandwidth = (n / 2).max(3) | 1; // Ensure odd
320            trend = self.loess_smooth(&deseasonalized, &weights, trend_bandwidth);
321
322            // Step 4: Update weights for robust fitting
323            if self.robust {
324                let residual: Vec<f32> = y_data.iter()
325                    .zip(trend.iter())
326                    .zip(seasonal.iter())
327                    .map(|((&y, &t), &s)| y - t - s)
328                    .collect();
329
330                let mut abs_residual: Vec<f32> = residual.iter().map(|&r| r.abs()).collect();
331                abs_residual.sort_by(|a, b| a.partial_cmp(b).unwrap());
332                let h = 6.0 * abs_residual[n / 2]; // 6 * MAD
333
334                for (i, &r) in residual.iter().enumerate() {
335                    let u = r.abs() / h.max(1e-10);
336                    weights[i] = if u < 1.0 { (1.0 - u * u).powi(2) } else { 0.0 };
337                }
338            }
339        }
340
341        // Compute residual
342        let residual: Vec<f32> = y_data.iter()
343            .zip(trend.iter())
344            .zip(seasonal.iter())
345            .map(|((&y, &t), &s)| y - t - s)
346            .collect();
347
348        self.trend_ = Some(trend);
349        self.seasonal_ = Some(seasonal);
350        self.residual_ = Some(residual);
351    }
352
353    pub fn trend(&self) -> Option<&Vec<f32>> { self.trend_.as_ref() }
354    pub fn seasonal(&self) -> Option<&Vec<f32>> { self.seasonal_.as_ref() }
355    pub fn residual(&self) -> Option<&Vec<f32>> { self.residual_.as_ref() }
356}
357
358/// Autocorrelation Function
359pub fn acf(y: &Tensor, n_lags: usize) -> Vec<f32> {
360    let y_data = y.data_f32();
361    let n = y_data.len();
362    let mean = y_data.iter().sum::<f32>() / n as f32;
363    let var: f32 = y_data.iter().map(|&v| (v - mean).powi(2)).sum();
364
365    if var < 1e-10 {
366        return vec![1.0; n_lags + 1];
367    }
368
369    (0..=n_lags)
370        .map(|lag| {
371            if lag >= n { return 0.0; }
372            let cov: f32 = (0..n - lag)
373                .map(|i| (y_data[i] - mean) * (y_data[i + lag] - mean))
374                .sum();
375            cov / var
376        })
377        .collect()
378}
379
380/// Partial Autocorrelation Function (using Durbin-Levinson)
381pub fn pacf(y: &Tensor, n_lags: usize) -> Vec<f32> {
382    let acf_values = acf(y, n_lags);
383    let mut pacf_values = vec![0.0f32; n_lags + 1];
384    pacf_values[0] = 1.0;
385
386    if n_lags == 0 { return pacf_values; }
387
388    pacf_values[1] = acf_values[1];
389
390    let mut phi = vec![vec![0.0f32; n_lags + 1]; n_lags + 1];
391    phi[1][1] = acf_values[1];
392
393    for k in 2..=n_lags {
394        let mut num = acf_values[k];
395        let mut den = 1.0f32;
396
397        for j in 1..k {
398            num -= phi[k - 1][j] * acf_values[k - j];
399            den -= phi[k - 1][j] * acf_values[j];
400        }
401
402        phi[k][k] = if den.abs() > 1e-10 { num / den } else { 0.0 };
403        pacf_values[k] = phi[k][k];
404
405        for j in 1..k {
406            phi[k][j] = phi[k - 1][j] - phi[k][k] * phi[k - 1][k - j];
407        }
408    }
409
410    pacf_values
411}
412
413/// Augmented Dickey-Fuller Test for stationarity
414pub fn adf_test(y: &Tensor, max_lag: Option<usize>) -> (f32, f32) {
415    let y_data = y.data_f32();
416    let n = y_data.len();
417
418    // Compute first difference
419    let dy: Vec<f32> = y_data.windows(2).map(|w| w[1] - w[0]).collect();
420    let n_diff = dy.len();
421
422    // Determine lag order
423    let lag = max_lag.unwrap_or(((n as f32).powf(1.0 / 3.0) * 2.0) as usize);
424    let lag = lag.min(n_diff / 2);
425
426    // Build regression: dy_t = alpha + beta * y_{t-1} + sum(gamma_i * dy_{t-i}) + e_t
427    // We're interested in testing beta = 0
428
429    // Simplified: just compute the t-statistic for the coefficient on y_{t-1}
430    let start = lag + 1;
431    if start >= n_diff { return (0.0, 1.0); }
432
433    let n_obs = n_diff - start;
434    
435    // Compute OLS estimate of beta
436    let mut sum_xy = 0.0f32;
437    let mut sum_xx = 0.0f32;
438    let mut sum_y = 0.0f32;
439    let mut sum_x = 0.0f32;
440
441    for i in start..n_diff {
442        let x = y_data[i]; // y_{t-1}
443        let y = dy[i];
444        sum_xy += x * y;
445        sum_xx += x * x;
446        sum_y += y;
447        sum_x += x;
448    }
449
450    let mean_x = sum_x / n_obs as f32;
451    let mean_y = sum_y / n_obs as f32;
452    
453    let beta = (sum_xy - n_obs as f32 * mean_x * mean_y) / 
454               (sum_xx - n_obs as f32 * mean_x * mean_x).max(1e-10);
455
456    // Compute residual variance
457    let mut sse = 0.0f32;
458    for i in start..n_diff {
459        let x = y_data[i];
460        let y = dy[i];
461        let pred = mean_y + beta * (x - mean_x);
462        sse += (y - pred).powi(2);
463    }
464    let mse = sse / (n_obs - 2).max(1) as f32;
465
466    // Standard error of beta
467    let se_beta = (mse / (sum_xx - n_obs as f32 * mean_x * mean_x).max(1e-10)).sqrt();
468
469    // t-statistic
470    let t_stat = beta / se_beta.max(1e-10);
471
472    // Critical values (approximate for 5% significance)
473    // ADF critical value at 5% is approximately -2.86 for n > 100
474    let critical_value = -2.86f32;
475    let p_value = if t_stat < critical_value { 0.01 } else { 0.10 };
476
477    (t_stat, p_value)
478}
479
480fn solve_linear(a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
481    if n == 0 { return vec![]; }
482    
483    let mut aug = vec![0.0f32; n * (n + 1)];
484    for i in 0..n {
485        for j in 0..n {
486            aug[i * (n + 1) + j] = a[i * n + j];
487        }
488        aug[i * (n + 1) + n] = b[i];
489    }
490
491    for i in 0..n {
492        let mut max_row = i;
493        for k in (i + 1)..n {
494            if aug[k * (n + 1) + i].abs() > aug[max_row * (n + 1) + i].abs() {
495                max_row = k;
496            }
497        }
498
499        for j in 0..=n {
500            let tmp = aug[i * (n + 1) + j];
501            aug[i * (n + 1) + j] = aug[max_row * (n + 1) + j];
502            aug[max_row * (n + 1) + j] = tmp;
503        }
504
505        let pivot = aug[i * (n + 1) + i];
506        if pivot.abs() < 1e-10 { continue; }
507
508        for j in i..=n {
509            aug[i * (n + 1) + j] /= pivot;
510        }
511
512        for k in 0..n {
513            if k != i {
514                let factor = aug[k * (n + 1) + i];
515                for j in i..=n {
516                    aug[k * (n + 1) + j] -= factor * aug[i * (n + 1) + j];
517                }
518            }
519        }
520    }
521
522    (0..n).map(|i| aug[i * (n + 1) + n]).collect()
523}
524
525#[cfg(test)]
526mod tests {
527    use super::*;
528
529    #[test]
530    fn test_sarima() {
531        let y = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
532            2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0,
533        ], &[24]).unwrap();
534
535        let mut sarima = SARIMA::new(1, 0, 0, 1, 0, 0, 12);
536        sarima.fit(&y);
537        let pred = sarima.predict(3);
538        assert_eq!(pred.dims(), &[3]);
539    }
540
541    #[test]
542    fn test_stl() {
543        let y = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0,
544        ], &[12]).unwrap();
545
546        let mut stl = STLDecomposition::new(4);
547        stl.fit(&y);
548        
549        assert!(stl.trend().is_some());
550        assert!(stl.seasonal().is_some());
551        assert!(stl.residual().is_some());
552    }
553
554    #[test]
555    fn test_acf_pacf() {
556        let y = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0, 2.0], &[10]).unwrap();
557        
558        let acf_vals = acf(&y, 5);
559        assert_eq!(acf_vals.len(), 6);
560        assert!((acf_vals[0] - 1.0).abs() < 1e-5);
561
562        let pacf_vals = pacf(&y, 5);
563        assert_eq!(pacf_vals.len(), 6);
564    }
565}
566
567
568
569