Skip to main content

so_tsa/
utils.rs

1//! Utility functions for time series analysis
2//!
3//! This module provides utility functions for working with time series,
4//! including autocorrelation functions, periodogram estimation,
5//! and various transformations.
6
7use ndarray::{Array1, Array2};
8use so_core::error::{Error, Result};
9use std::collections::HashMap;
10
11/// Calculate autocorrelation function (ACF)
12pub fn acf(series: &Array1<f64>, max_lag: usize) -> Array1<f64> {
13    let n = series.len();
14    let mean = series.mean().unwrap_or(0.0);
15    let variance = series.var(1.0);
16
17    if variance <= 0.0 || n <= 1 {
18        return Array1::zeros(max_lag.min(n - 1));
19    }
20
21    let mut acf_values = Array1::zeros(max_lag.min(n - 1));
22
23    for lag in 1..=max_lag.min(n - 1) {
24        let mut autocov = 0.0;
25        for t in lag..n {
26            autocov += (series[t] - mean) * (series[t - lag] - mean);
27        }
28        acf_values[lag - 1] = autocov / (variance * n as f64);
29    }
30
31    acf_values
32}
33
34/// Calculate partial autocorrelation function (PACF)
35pub fn pacf(series: &Array1<f64>, max_lag: usize) -> Result<Array1<f64>> {
36    let n = series.len();
37    if n <= max_lag {
38        return Err(Error::DataError(format!(
39            "Need more observations than max_lag: n={}, max_lag={}",
40            n, max_lag
41        )));
42    }
43
44    let mut pacf_values = Array1::zeros(max_lag);
45
46    // Use Durbin-Levinson algorithm
47    let mut phi = Array1::zeros(max_lag + 1);
48    let mut v = Array1::zeros(max_lag + 1);
49
50    // Initial values
51    phi[0] = 1.0;
52    v[0] = series.var(1.0);
53
54    // Autocorrelations
55    let r = acf(series, max_lag);
56
57    for k in 1..=max_lag {
58        // Compute phi_kk
59        let mut num = r[k - 1];
60        for j in 1..k {
61            num -= phi[j] * r[(k - j - 1).min(max_lag - 1)];
62        }
63
64        let phi_kk = num / v[k - 1];
65        pacf_values[k - 1] = phi_kk;
66
67        // Update phi and v
68        phi[k] = phi_kk;
69        for j in 1..k {
70            phi[j] = phi[j] - phi_kk * phi[k - j];
71        }
72
73        v[k] = v[k - 1] * (1.0 - phi_kk.powi(2));
74    }
75
76    Ok(pacf_values)
77}
78
79/// Calculate cross-correlation function (CCF)
80pub fn ccf(x: &Array1<f64>, y: &Array1<f64>, max_lag: usize) -> Array1<f64> {
81    let n = x.len();
82    if n != y.len() {
83        return Array1::zeros(max_lag * 2 + 1);
84    }
85
86    let x_mean = x.mean().unwrap_or(0.0);
87    let y_mean = y.mean().unwrap_or(0.0);
88    let x_var = x.var(1.0);
89    let y_var = y.var(1.0);
90
91    if x_var <= 0.0 || y_var <= 0.0 {
92        return Array1::zeros(max_lag * 2 + 1);
93    }
94
95    let mut ccf_values = Array1::zeros(max_lag * 2 + 1);
96
97    for lag in -(max_lag as isize)..=max_lag as isize {
98        let idx = (lag + max_lag as isize) as usize;
99        let mut cross_cov = 0.0;
100
101        if lag >= 0 {
102            for t in lag as usize..n {
103                cross_cov += (x[t] - x_mean) * (y[t - lag as usize] - y_mean);
104            }
105        } else {
106            for t in (-lag) as usize..n {
107                cross_cov += (x[t + lag as usize] - x_mean) * (y[t] - y_mean);
108            }
109        }
110
111        ccf_values[idx] = cross_cov / (x_var.sqrt() * y_var.sqrt() * n as f64);
112    }
113
114    ccf_values
115}
116
117/// Calculate periodogram (spectral density estimate)
118pub fn periodogram(series: &Array1<f64>) -> (Array1<f64>, Array1<f64>) {
119    let n = series.len();
120    let n_freq = n / 2 + 1;
121
122    let mut frequencies = Array1::zeros(n_freq);
123    let mut spectrum = Array1::zeros(n_freq);
124
125    // Calculate Fourier frequencies
126    for k in 0..n_freq {
127        frequencies[k] = k as f64 / n as f64;
128
129        // Discrete Fourier Transform (simplified)
130        let mut real = 0.0;
131        let mut imag = 0.0;
132
133        for t in 0..n {
134            let angle = -2.0 * std::f64::consts::PI * k as f64 * t as f64 / n as f64;
135            real += series[t] * angle.cos();
136            imag += series[t] * angle.sin();
137        }
138
139        // Periodogram: squared magnitude / n
140        spectrum[k] = (real.powi(2) + imag.powi(2)) / n as f64;
141    }
142
143    (frequencies, spectrum)
144}
145
146/// Calculate spectral density using smoothed periodogram
147pub fn spectrum(series: &Array1<f64>, window: &str, bandwidth: f64) -> (Array1<f64>, Array1<f64>) {
148    let (freq, mut periodogram) = periodogram(series);
149    let n_freq = freq.len();
150
151    // Apply smoothing window
152    match window {
153        "bartlett" => {
154            let m = (bandwidth * n_freq as f64).round() as usize;
155            for k in 0..n_freq {
156                let mut smoothed = 0.0;
157                let mut weight_sum = 0.0;
158
159                for j in (k as isize - m as isize)..=(k as isize + m as isize) {
160                    if j >= 0 && (j as usize) < n_freq {
161                        let weight = 1.0 - (j - k as isize).abs() as f64 / m as f64;
162                        smoothed += weight * periodogram[j as usize];
163                        weight_sum += weight;
164                    }
165                }
166
167                if weight_sum > 0.0 {
168                    periodogram[k] = smoothed / weight_sum;
169                }
170            }
171        }
172        "parzen" => {
173            let m = (bandwidth * n_freq as f64).round() as usize;
174            for k in 0..n_freq {
175                let mut smoothed = 0.0;
176                let mut weight_sum = 0.0;
177
178                for j in (k as isize - m as isize)..=(k as isize + m as isize) {
179                    if j >= 0 && (j as usize) < n_freq {
180                        let u = (j - k as isize).abs() as f64 / m as f64;
181                        let weight = if u <= 0.5 {
182                            1.0 - 6.0 * u.powi(2) + 6.0 * u.powi(3)
183                        } else {
184                            2.0 * (1.0 - u).powi(3)
185                        };
186
187                        smoothed += weight * periodogram[j as usize];
188                        weight_sum += weight;
189                    }
190                }
191
192                if weight_sum > 0.0 {
193                    periodogram[k] = smoothed / weight_sum;
194                }
195            }
196        }
197        _ => {} // No smoothing
198    }
199
200    (freq, periodogram)
201}
202
203/// De-trend a series using polynomial regression
204pub fn detrend_poly(series: &Array1<f64>, degree: usize) -> Result<Array1<f64>> {
205    let n = series.len();
206    if n <= degree {
207        return Err(Error::DataError(format!(
208            "Need more observations than degree: n={}, degree={}",
209            n, degree
210        )));
211    }
212
213    // Build Vandermonde matrix
214    let mut x = Array2::zeros((n, degree + 1));
215    for i in 0..n {
216        for j in 0..=degree {
217            x[(i, j)] = (i as f64).powi(j as i32);
218        }
219    }
220
221    // Solve least squares: (X'X)β = X'y
222    use so_linalg;
223    let xt = x.t();
224    let xtx = xt.dot(&x);
225    let xty = xt.dot(series);
226
227    let beta = so_linalg::solve(&xtx, &xty)
228        .map_err(|e| Error::DataError(format!("Detrend failed: {}", e)))?;
229
230    // Calculate fitted trend
231    let trend = x.dot(&beta);
232    let detrended = series - &trend;
233
234    Ok(detrended)
235}
236
237/// Apply Box-Cox transformation
238pub fn box_cox(series: &Array1<f64>, lambda: f64) -> Array1<f64> {
239    if lambda.abs() < 1e-10 {
240        // Log transformation (special case)
241        series.mapv(|x| (x + 1e-10).ln())
242    } else {
243        series.mapv(|x| (x.powf(lambda) - 1.0) / lambda)
244    }
245}
246
247/// Find optimal Box-Cox lambda using maximum likelihood
248pub fn box_cox_lambda(series: &Array1<f64>, lambda_range: (f64, f64), steps: usize) -> f64 {
249    let (min_lambda, max_lambda) = lambda_range;
250    let step = (max_lambda - min_lambda) / steps as f64;
251
252    let mut best_lambda = 0.0;
253    let mut best_log_lik = f64::NEG_INFINITY;
254
255    for i in 0..=steps {
256        let lambda = min_lambda + i as f64 * step;
257        let transformed = box_cox(series, lambda);
258
259        // Log-likelihood assuming normality
260        let mean = transformed.mean().unwrap_or(0.0);
261        let variance = transformed.var(1.0);
262        if variance > 0.0 {
263            let log_lik = -0.5 * series.len() as f64 * (2.0 * std::f64::consts::PI * variance).ln()
264                - 0.5 / variance * transformed.mapv(|x| (x - mean).powi(2)).sum()
265                + (lambda - 1.0) * series.mapv(|x| x.ln()).sum();
266
267            if log_lik > best_log_lik {
268                best_log_lik = log_lik;
269                best_lambda = lambda;
270            }
271        }
272    }
273
274    best_lambda
275}
276
277/// Calculate rolling statistics
278pub fn rolling_statistic(
279    series: &Array1<f64>,
280    window: usize,
281    stat: &str,
282    center: bool,
283) -> Array1<f64> {
284    let n = series.len();
285    let mut result = Array1::zeros(n);
286
287    for i in 0..n {
288        let start = if center {
289            i.saturating_sub(window / 2)
290        } else {
291            i.saturating_sub(window - 1).min(i)
292        };
293        let end = if center {
294            (i + window / 2 + 1).min(n)
295        } else {
296            (i + 1).min(n)
297        };
298
299        let window_size = end - start;
300        if window_size > 0 {
301            let window_data = series.slice(ndarray::s![start..end]);
302
303            result[i] = match stat {
304                "mean" => window_data.mean().unwrap_or(0.0),
305                "median" => {
306                    let mut sorted: Vec<f64> = window_data.to_vec();
307                    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
308                    let mid = window_size / 2;
309                    if window_size % 2 == 0 {
310                        (sorted[mid - 1] + sorted[mid]) / 2.0
311                    } else {
312                        sorted[mid]
313                    }
314                }
315                "std" => window_data.std(1.0),
316                "var" => window_data.var(1.0),
317                "min" => window_data.iter().fold(f64::INFINITY, |a, &b| a.min(b)),
318                "max" => window_data.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)),
319                "sum" => window_data.sum(),
320                _ => window_data.mean().unwrap_or(0.0),
321            };
322        }
323    }
324
325    result
326}
327
328/// Calculate exponential weighted moving average (EWMA)
329pub fn ewma(series: &Array1<f64>, alpha: f64) -> Array1<f64> {
330    let n = series.len();
331    let mut ewma_values = Array1::zeros(n);
332
333    if n > 0 {
334        ewma_values[0] = series[0];
335        for i in 1..n {
336            ewma_values[i] = alpha * series[i] + (1.0 - alpha) * ewma_values[i - 1];
337        }
338    }
339
340    ewma_values
341}
342
343/// Calculate seasonal dummy variables
344pub fn seasonal_dummies(n: usize, period: usize, include_all: bool) -> Array2<f64> {
345    let n_dummies = if include_all { period } else { period - 1 };
346    let mut dummies = Array2::zeros((n, n_dummies));
347
348    for i in 0..n {
349        let season = i % period;
350        if season < n_dummies {
351            dummies[(i, season)] = 1.0;
352        }
353    }
354
355    dummies
356}
357
358/// Calculate forecast error metrics
359pub fn forecast_errors(actual: &Array1<f64>, forecast: &Array1<f64>) -> HashMap<String, f64> {
360    use std::collections::HashMap;
361
362    let n = actual.len().min(forecast.len());
363    let mut errors = HashMap::new();
364
365    if n == 0 {
366        return errors;
367    }
368
369    let mut mae = 0.0;
370    let mut mse = 0.0;
371    let mut mape = 0.0;
372    let mut mape_count = 0;
373
374    for i in 0..n {
375        let error = actual[i] - forecast[i];
376        mae += error.abs();
377        mse += error.powi(2);
378
379        if actual[i] != 0.0 {
380            mape += (error.abs() / actual[i].abs()) * 100.0;
381            mape_count += 1;
382        }
383    }
384
385    errors.insert("MAE".to_string(), mae / n as f64);
386    errors.insert("MSE".to_string(), mse / n as f64);
387    errors.insert("RMSE".to_string(), (mse / n as f64).sqrt());
388
389    if mape_count > 0 {
390        errors.insert("MAPE".to_string(), mape / mape_count as f64);
391    }
392
393    // Theil's U statistic
394    let naive_forecast: f64 = if n > 1 {
395        let mut naive_error = 0.0;
396        for i in 1..n {
397            naive_error += (actual[i] - actual[i - 1]).powi(2);
398        }
399        (naive_error / (n - 1) as f64).sqrt()
400    } else {
401        0.0
402    };
403
404    let rmse = (mse / n as f64).sqrt();
405    if naive_forecast > 0.0 {
406        errors.insert("TheilU".to_string(), rmse / naive_forecast);
407    }
408
409    errors
410}
411
412/// Calculate information criteria for model selection
413pub fn information_criteria(
414    log_likelihood: f64,
415    n_obs: usize,
416    n_params: usize,
417) -> HashMap<String, f64> {
418    use std::collections::HashMap;
419
420    let mut criteria = HashMap::new();
421
422    // Akaike Information Criterion
423    let aic = 2.0 * n_params as f64 - 2.0 * log_likelihood;
424    criteria.insert("AIC".to_string(), aic);
425
426    // Bayesian Information Criterion
427    let bic = (n_obs as f64).ln() * n_params as f64 - 2.0 * log_likelihood;
428    criteria.insert("BIC".to_string(), bic);
429
430    // Corrected AIC for small samples
431    let aicc = if n_obs > n_params + 1 {
432        aic + 2.0 * n_params as f64 * (n_params as f64 + 1.0)
433            / (n_obs as f64 - n_params as f64 - 1.0)
434    } else {
435        aic
436    };
437    criteria.insert("AICc".to_string(), aicc);
438
439    criteria
440}
441
442/// Calculate Diebold-Mariano test for forecast comparison
443pub fn diebold_mariano(
444    errors_a: &Array1<f64>,
445    errors_b: &Array1<f64>,
446    loss_fn: &str,
447) -> (f64, f64) {
448    let n = errors_a.len().min(errors_b.len());
449    if n < 2 {
450        return (0.0, 1.0);
451    }
452
453    // Calculate loss differential
454    let mut d = Array1::zeros(n);
455    for i in 0..n {
456        let loss_a = match loss_fn {
457            "squared" => errors_a[i].powi(2),
458            "absolute" => errors_a[i].abs(),
459            "percentage" => {
460                if errors_a[i] != 0.0 {
461                    errors_a[i].abs()
462                } else {
463                    0.0
464                }
465            }
466            _ => errors_a[i].powi(2),
467        };
468
469        let loss_b = match loss_fn {
470            "squared" => errors_b[i].powi(2),
471            "absolute" => errors_b[i].abs(),
472            "percentage" => {
473                if errors_b[i] != 0.0 {
474                    errors_b[i].abs()
475                } else {
476                    0.0
477                }
478            }
479            _ => errors_b[i].powi(2),
480        };
481
482        d[i] = loss_a - loss_b;
483    }
484
485    // Test statistic
486    let mean_d = d.mean().unwrap_or(0.0);
487    let var_d = d.var(1.0);
488
489    if var_d <= 0.0 {
490        return (0.0, 1.0);
491    }
492
493    let dm_stat = mean_d / (var_d / n as f64).sqrt();
494
495    // p-value (two-sided normal test)
496    use super::results::chi2_cdf;
497    let p_value = 2.0 * (1.0 - chi2_cdf(1, dm_stat.abs()));
498
499    (dm_stat, p_value)
500}