scirs2_series/
transformations.rs

1//! Time series transformations for preprocessing and analysis
2//!
3//! This module provides comprehensive transformation methods including:
4//! - Stationarity transformations (Box-Cox, differencing, detrending)
5//! - Normalization and scaling methods
6//! - Stationarity tests (ADF, KPSS)
7//! - Dimensionality reduction techniques
8
9use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix1, ScalarOperand};
10use scirs2_core::numeric::{Float, FromPrimitive};
11use std::fmt::{Debug, Display};
12
13use crate::error::{Result, TimeSeriesError};
14
15/// Box-Cox transformation parameters
16#[derive(Debug, Clone)]
17pub struct BoxCoxTransform<F> {
18    /// Lambda parameter for Box-Cox transformation
19    pub lambda: F,
20    /// Whether lambda was estimated from data
21    pub lambda_estimated: bool,
22    /// Minimum value adjustment for zero/negative values
23    pub min_adjustment: F,
24}
25
26/// Differencing transformation parameters
27#[derive(Debug, Clone)]
28pub struct DifferencingTransform {
29    /// Order of differencing (1 = first differences, 2 = second differences, etc.)
30    pub order: usize,
31    /// Seasonal differencing lag (0 = no seasonal differencing)
32    pub seasonal_lag: Option<usize>,
33}
34
35/// Normalization method
36#[derive(Debug, Clone, Copy, PartialEq)]
37pub enum NormalizationMethod {
38    /// Z-score normalization (standardization)
39    ZScore,
40    /// Min-max scaling to [0, 1]
41    MinMax,
42    /// Robust scaling using median and IQR
43    Robust,
44    /// Min-max scaling to custom range
45    MinMaxCustom(f64, f64),
46}
47
48/// Normalization parameters
49#[derive(Debug, Clone)]
50pub struct NormalizationParams<F> {
51    /// Mean (for Z-score) or minimum (for Min-Max)
52    pub location: F,
53    /// Standard deviation (for Z-score) or range (for Min-Max)
54    pub scale: F,
55    /// Method used for normalization
56    pub method: NormalizationMethod,
57}
58
59/// Stationarity test results
60#[derive(Debug, Clone)]
61pub struct StationarityTest<F> {
62    /// Test statistic
63    pub statistic: F,
64    /// p-value
65    pub p_value: F,
66    /// Critical values at different significance levels
67    pub critical_values: Vec<(F, F)>, // (significance_level, critical_value)
68    /// Whether the null hypothesis is rejected
69    pub is_stationary: bool,
70    /// Test type
71    pub test_type: StationarityTestType,
72}
73
74/// Type of stationarity test
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub enum StationarityTestType {
77    /// Augmented Dickey-Fuller test
78    ADF,
79    /// Kwiatkowski-Phillips-Schmidt-Shin test
80    KPSS,
81}
82
83/// Apply Box-Cox transformation to time series
84///
85/// The Box-Cox transformation is defined as:
86/// - If λ ≠ 0: y(λ) = (y^λ - 1) / λ
87/// - If λ = 0: y(λ) = ln(y)
88///
89/// # Arguments
90///
91/// * `ts` - Input time series (must be positive)
92/// * `lambda` - Box-Cox parameter. If None, optimal lambda is estimated
93///
94/// # Returns
95///
96/// Tuple of (transformed_series, transformation_parameters)
97///
98/// # Examples
99///
100/// ```
101/// use scirs2_core::ndarray::Array1;
102/// use scirs2_series::transformations::box_cox_transform;
103///
104/// let ts = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
105/// let (transformed, params) = box_cox_transform(&ts, Some(0.5)).unwrap();
106/// ```
107#[allow(dead_code)]
108pub fn box_cox_transform<F, S>(
109    ts: &ArrayBase<S, Ix1>,
110    lambda: Option<F>,
111) -> Result<(Array1<F>, BoxCoxTransform<F>)>
112where
113    S: Data<Elem = F>,
114    F: Float + FromPrimitive + Debug + Display + ScalarOperand,
115{
116    let n = ts.len();
117    if n == 0 {
118        return Err(TimeSeriesError::InvalidInput(
119            "Time series cannot be empty".to_string(),
120        ));
121    }
122
123    // Check for non-positive values and adjust if necessary
124    let min_val = ts.iter().fold(F::infinity(), |acc, &x| acc.min(x));
125    let min_adjustment = if min_val <= F::zero() {
126        F::one() - min_val
127    } else {
128        F::zero()
129    };
130
131    let adjusted_ts = if min_adjustment > F::zero() {
132        ts.mapv(|x| x + min_adjustment)
133    } else {
134        ts.to_owned()
135    };
136
137    // Estimate lambda if not provided
138    let lambda_val = if let Some(l) = lambda {
139        l
140    } else {
141        estimate_box_cox_lambda(&adjusted_ts)?
142    };
143
144    // Apply Box-Cox transformation
145    let transformed = if lambda_val.abs() < F::from(1e-10).unwrap() {
146        // Lambda ≈ 0: use natural logarithm
147        adjusted_ts.mapv(|x| x.ln())
148    } else {
149        // Lambda ≠ 0: use power transformation
150        adjusted_ts.mapv(|x| (x.powf(lambda_val) - F::one()) / lambda_val)
151    };
152
153    let transform_params = BoxCoxTransform {
154        lambda: lambda_val,
155        lambda_estimated: lambda.is_none(),
156        min_adjustment,
157    };
158
159    Ok((transformed, transform_params))
160}
161
162/// Estimate optimal Box-Cox lambda parameter using maximum likelihood
163#[allow(dead_code)]
164fn estimate_box_cox_lambda<F>(ts: &Array1<F>) -> Result<F>
165where
166    F: Float + FromPrimitive + Debug + Display,
167{
168    let n = ts.len();
169    let n_f = F::from(n).unwrap();
170
171    // Search over a range of lambda values
172    let lambda_range = Array1::linspace(-2.0, 2.0, 41);
173    let mut best_lambda = F::zero();
174    let mut best_log_likelihood = F::neg_infinity();
175
176    for &lambda_f64 in lambda_range.iter() {
177        let lambda = F::from(lambda_f64).unwrap();
178
179        // Transform the data
180        let transformed = if lambda.abs() < F::from(1e-10).unwrap() {
181            ts.mapv(|x| x.ln())
182        } else {
183            ts.mapv(|x| (x.powf(lambda) - F::one()) / lambda)
184        };
185
186        // Calculate log-likelihood
187        let mean = transformed.sum() / n_f;
188        let variance = transformed.mapv(|x| (x - mean) * (x - mean)).sum() / n_f;
189
190        if variance <= F::zero() {
191            continue;
192        }
193
194        // Log-likelihood for normal distribution
195        let log_likelihood = -n_f / F::from(2.0).unwrap()
196            * (F::from(2.0 * std::f64::consts::PI).unwrap().ln() + variance.ln())
197            - n_f / F::from(2.0).unwrap();
198
199        // Add Jacobian term: (λ - 1) * Σ ln(x_i)
200        let jacobian = (lambda - F::one()) * ts.mapv(|x| x.ln()).sum();
201        let total_log_likelihood = log_likelihood + jacobian;
202
203        if total_log_likelihood > best_log_likelihood {
204            best_log_likelihood = total_log_likelihood;
205            best_lambda = lambda;
206        }
207    }
208
209    Ok(best_lambda)
210}
211
212/// Inverse Box-Cox transformation
213///
214/// # Arguments
215///
216/// * `transformed_ts` - Box-Cox transformed time series
217/// * `params` - Box-Cox transformation parameters
218///
219/// # Returns
220///
221/// Original time series (approximately)
222#[allow(dead_code)]
223pub fn inverse_box_cox_transform<F, S>(
224    transformed_ts: &ArrayBase<S, Ix1>,
225    params: &BoxCoxTransform<F>,
226) -> Result<Array1<F>>
227where
228    S: Data<Elem = F>,
229    F: Float + FromPrimitive + Debug + Display,
230{
231    let lambda = params.lambda;
232
233    let original = if lambda.abs() < F::from(1e-10).unwrap() {
234        // Lambda ≈ 0: inverse of ln(x) is exp(x)
235        transformed_ts.mapv(|x| x.exp())
236    } else {
237        // Lambda ≠ 0: inverse of (x^λ - 1)/λ is (λ*y + 1)^(1/λ)
238        transformed_ts.mapv(|x| (lambda * x + F::one()).powf(F::one() / lambda))
239    };
240
241    // Remove the minimum adjustment
242    let result = if params.min_adjustment > F::zero() {
243        original.mapv(|x| x - params.min_adjustment)
244    } else {
245        original
246    };
247
248    Ok(result)
249}
250
251/// Apply differencing transformation to make time series stationary
252///
253/// # Arguments
254///
255/// * `ts` - Input time series
256/// * `order` - Order of differencing (1 = first differences, 2 = second differences, etc.)
257/// * `seasonal_lag` - Optional seasonal differencing lag
258///
259/// # Returns
260///
261/// Tuple of (differenced_series, transformation_parameters)
262///
263/// # Examples
264///
265/// ```
266/// use scirs2_core::ndarray::Array1;
267/// use scirs2_series::transformations::difference_transform;
268///
269/// let ts = Array1::from_vec(vec![1.0, 3.0, 6.0, 10.0, 15.0]);
270/// let (differenced, params) = difference_transform(&ts, 1, None).unwrap();
271/// // Result: [2.0, 3.0, 4.0, 5.0] (first differences)
272/// ```
273#[allow(dead_code)]
274pub fn difference_transform<F, S>(
275    ts: &ArrayBase<S, Ix1>,
276    order: usize,
277    seasonal_lag: Option<usize>,
278) -> Result<(Array1<F>, DifferencingTransform)>
279where
280    S: Data<Elem = F>,
281    F: Float + FromPrimitive + Debug + Clone,
282{
283    if order == 0 && seasonal_lag.is_none() {
284        return Ok((
285            ts.to_owned(),
286            DifferencingTransform {
287                order: 0,
288                seasonal_lag: None,
289            },
290        ));
291    }
292
293    let mut result = ts.to_owned();
294
295    // Apply seasonal differencing first if specified
296    if let Some(_lag) = seasonal_lag {
297        if _lag == 0 {
298            return Err(TimeSeriesError::InvalidInput(
299                "Seasonal _lag must be positive".to_string(),
300            ));
301        }
302
303        if result.len() <= _lag {
304            return Err(TimeSeriesError::InsufficientData {
305                message: format!(
306                    "Time series length {} is not sufficient for seasonal _lag {}",
307                    result.len(),
308                    _lag
309                ),
310                required: _lag + 1,
311                actual: result.len(),
312            });
313        }
314
315        let seasonal_diff =
316            Array1::from_shape_fn(result.len() - _lag, |i| result[i + _lag] - result[i]);
317        result = seasonal_diff;
318    }
319
320    // Apply regular differencing
321    for _ in 0..order {
322        if result.len() <= 1 {
323            return Err(TimeSeriesError::InsufficientData {
324                message: "Cannot apply more differences: series too short".to_string(),
325                required: 2,
326                actual: result.len(),
327            });
328        }
329
330        let diff = Array1::from_shape_fn(result.len() - 1, |i| result[i + 1] - result[i]);
331        result = diff;
332    }
333
334    let params = DifferencingTransform {
335        order,
336        seasonal_lag,
337    };
338    Ok((result, params))
339}
340
341/// Integrate (reverse difference) a time series
342///
343/// # Arguments
344///
345/// * `differenced_ts` - Differenced time series
346/// * `params` - Differencing transformation parameters
347/// * `initial_values` - Initial values needed for integration
348///
349/// # Returns
350///
351/// Integrated (original level) time series
352#[allow(dead_code)]
353pub fn integrate_transform<F, S>(
354    differenced_ts: &ArrayBase<S, Ix1>,
355    params: &DifferencingTransform,
356    initial_values: &Array1<F>,
357) -> Result<Array1<F>>
358where
359    S: Data<Elem = F>,
360    F: Float + FromPrimitive + Debug + Clone,
361{
362    let mut result = differenced_ts.to_owned();
363
364    // Reverse regular differencing
365    for _ in 0..params.order {
366        let mut integrated = Array1::zeros(result.len() + 1);
367
368        // Set initial value (should be provided in initial_values)
369        let init_idx = params.order - 1;
370        if init_idx >= initial_values.len() {
371            return Err(TimeSeriesError::InvalidInput(
372                "Insufficient initial _values for integration".to_string(),
373            ));
374        }
375        integrated[0] = initial_values[init_idx];
376
377        // Integrate by cumulative sum
378        for i in 0..result.len() {
379            integrated[i + 1] = integrated[i] + result[i];
380        }
381        result = integrated;
382    }
383
384    // Reverse seasonal differencing if applied
385    if let Some(lag) = params.seasonal_lag {
386        let mut seasonal_integrated = Array1::zeros(result.len() + lag);
387
388        // Set initial seasonal _values
389        for i in 0..lag {
390            if i >= initial_values.len() {
391                return Err(TimeSeriesError::InvalidInput(
392                    "Insufficient initial _values for seasonal integration".to_string(),
393                ));
394            }
395            seasonal_integrated[i] = initial_values[i];
396        }
397
398        // Integrate seasonally
399        for i in 0..result.len() {
400            seasonal_integrated[i + lag] = seasonal_integrated[i] + result[i];
401        }
402        result = seasonal_integrated;
403    }
404
405    Ok(result)
406}
407
408/// Normalize time series using specified method
409///
410/// # Arguments
411///
412/// * `ts` - Input time series
413/// * `method` - Normalization method to use
414///
415/// # Returns
416///
417/// Tuple of (normalized_series, normalization_parameters)
418///
419/// # Examples
420///
421/// ```
422/// use scirs2_core::ndarray::Array1;
423/// use scirs2_series::transformations::{normalize_transform, NormalizationMethod};
424///
425/// let ts = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
426/// let (normalized, params) = normalize_transform(&ts, NormalizationMethod::ZScore).unwrap();
427/// ```
428#[allow(dead_code)]
429pub fn normalize_transform<F, S>(
430    ts: &ArrayBase<S, Ix1>,
431    method: NormalizationMethod,
432) -> Result<(Array1<F>, NormalizationParams<F>)>
433where
434    S: Data<Elem = F>,
435    F: Float + FromPrimitive + Debug + Display + Clone,
436{
437    let n = ts.len();
438    if n == 0 {
439        return Err(TimeSeriesError::InvalidInput(
440            "Time series cannot be empty".to_string(),
441        ));
442    }
443
444    let (location, scale, normalized) = match method {
445        NormalizationMethod::ZScore => {
446            // Z-score normalization: (x - μ) / σ
447            let mean = ts.sum() / F::from(n).unwrap();
448            let variance = ts.mapv(|x| (x - mean) * (x - mean)).sum() / F::from(n - 1).unwrap();
449            let std_dev = variance.sqrt();
450
451            if std_dev <= F::zero() {
452                return Err(TimeSeriesError::InvalidInput(
453                    "Cannot normalize: standard deviation is zero".to_string(),
454                ));
455            }
456
457            let normalized = ts.mapv(|x| (x - mean) / std_dev);
458            (mean, std_dev, normalized)
459        }
460
461        NormalizationMethod::MinMax => {
462            // Min-max normalization: (x - min) / (max - min)
463            let min_val = ts.iter().fold(F::infinity(), |acc, &x| acc.min(x));
464            let max_val = ts.iter().fold(F::neg_infinity(), |acc, &x| acc.max(x));
465            let range = max_val - min_val;
466
467            if range <= F::zero() {
468                return Err(TimeSeriesError::InvalidInput(
469                    "Cannot normalize: min equals max".to_string(),
470                ));
471            }
472
473            let normalized = ts.mapv(|x| (x - min_val) / range);
474            (min_val, range, normalized)
475        }
476
477        NormalizationMethod::MinMaxCustom(min_target, max_target) => {
478            // Min-max normalization to custom range
479            let min_val = ts.iter().fold(F::infinity(), |acc, &x| acc.min(x));
480            let max_val = ts.iter().fold(F::neg_infinity(), |acc, &x| acc.max(x));
481            let range = max_val - min_val;
482
483            if range <= F::zero() {
484                return Err(TimeSeriesError::InvalidInput(
485                    "Cannot normalize: min equals max".to_string(),
486                ));
487            }
488
489            let min_target_f = F::from(min_target).unwrap();
490            let max_target_f = F::from(max_target).unwrap();
491            let target_range = max_target_f - min_target_f;
492
493            let normalized = ts.mapv(|x| {
494                let normalized_01 = (x - min_val) / range;
495                min_target_f + normalized_01 * target_range
496            });
497            (min_val, range, normalized)
498        }
499
500        NormalizationMethod::Robust => {
501            // Robust normalization using median and IQR
502            let mut sorted_values: Vec<F> = ts.iter().cloned().collect();
503            sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
504
505            let median = if n.is_multiple_of(2) {
506                (sorted_values[n / 2 - 1] + sorted_values[n / 2]) / F::from(2.0).unwrap()
507            } else {
508                sorted_values[n / 2]
509            };
510
511            let q1_idx = n / 4;
512            let q3_idx = 3 * n / 4;
513            let q1 = sorted_values[q1_idx];
514            let q3 = sorted_values[q3_idx.min(n - 1)];
515            let iqr = q3 - q1;
516
517            if iqr <= F::zero() {
518                return Err(TimeSeriesError::InvalidInput(
519                    "Cannot normalize: IQR is zero".to_string(),
520                ));
521            }
522
523            let normalized = ts.mapv(|x| (x - median) / iqr);
524            (median, iqr, normalized)
525        }
526    };
527
528    let params = NormalizationParams {
529        location,
530        scale,
531        method,
532    };
533
534    Ok((normalized, params))
535}
536
537/// Inverse normalization transformation
538///
539/// # Arguments
540///
541/// * `normalized_ts` - Normalized time series
542/// * `params` - Normalization parameters from the forward transformation
543///
544/// # Returns
545///
546/// Original scale time series
547#[allow(dead_code)]
548pub fn inverse_normalize_transform<F, S>(
549    normalized_ts: &ArrayBase<S, Ix1>,
550    params: &NormalizationParams<F>,
551) -> Array1<F>
552where
553    S: Data<Elem = F>,
554    F: Float + FromPrimitive + Debug + Clone,
555{
556    match params.method {
557        NormalizationMethod::ZScore => {
558            // Inverse Z-score: x = (z * σ) + μ
559            normalized_ts.mapv(|x| x * params.scale + params.location)
560        }
561
562        NormalizationMethod::MinMax => {
563            // Inverse min-max: x = (norm * range) + min
564            normalized_ts.mapv(|x| x * params.scale + params.location)
565        }
566
567        NormalizationMethod::MinMaxCustom(min_target, max_target) => {
568            // Inverse custom min-max
569            let min_target_f = F::from(min_target).unwrap();
570            let max_target_f = F::from(max_target).unwrap();
571            let target_range = max_target_f - min_target_f;
572
573            normalized_ts.mapv(|x| {
574                let normalized_01 = (x - min_target_f) / target_range;
575                normalized_01 * params.scale + params.location
576            })
577        }
578
579        NormalizationMethod::Robust => {
580            // Inverse robust: x = (norm * IQR) + median
581            normalized_ts.mapv(|x| x * params.scale + params.location)
582        }
583    }
584}
585
586/// Augmented Dickey-Fuller test for stationarity
587///
588/// Tests the null hypothesis that a unit root is present in the time series.
589/// H0: The series has a unit root (non-stationary)
590/// H1: The series is stationary
591///
592/// # Arguments
593///
594/// * `ts` - Input time series
595/// * `max_lags` - Maximum number of lags to include (auto-selected if None)
596/// * `regression_type` - Type of regression ('c' = constant, 'ct' = constant and trend, 'nc' = no constant)
597///
598/// # Returns
599///
600/// Stationarity test results
601#[allow(dead_code)]
602pub fn adf_test<F, S>(
603    ts: &ArrayBase<S, Ix1>,
604    max_lags: Option<usize>,
605    regression_type: &str,
606) -> Result<StationarityTest<F>>
607where
608    S: Data<Elem = F>,
609    F: Float + FromPrimitive + Debug + Display + Clone + 'static,
610{
611    let n = ts.len();
612    if n < 10 {
613        return Err(TimeSeriesError::InsufficientData {
614            message: "ADF test requires at least 10 observations".to_string(),
615            required: 10,
616            actual: n,
617        });
618    }
619
620    // Determine optimal number of _lags using information criteria
621    let _lags = max_lags
622        .unwrap_or_else(|| {
623            // Rule of thumb: 12 * (n/100)^(1/4)
624            let lag_estimate = 12.0 * (n as f64 / 100.0).powf(0.25);
625            lag_estimate.floor() as usize
626        })
627        .min((n - 1) / 3);
628
629    // Prepare regression data
630    let y_diff = Array1::from_shape_fn(n - 1, |i| ts[i + 1] - ts[i]);
631    let y_lag = Array1::from_shape_fn(n - 1, |i| ts[i]);
632
633    let start_idx = _lags;
634    let regression_length = n - 1 - start_idx;
635
636    if regression_length < 5 {
637        return Err(TimeSeriesError::InsufficientData {
638            message: "Insufficient data for ADF regression after accounting for _lags".to_string(),
639            required: start_idx + 5,
640            actual: n,
641        });
642    }
643
644    // Build regression matrix
645    let mut n_regressors = 1; // y_{t-1} coefficient
646    if regression_type.contains('c') {
647        n_regressors += 1;
648    } // constant
649    if regression_type.contains('t') {
650        n_regressors += 1;
651    } // trend
652    n_regressors += _lags; // lagged differences
653
654    let mut x_matrix = Array2::zeros((regression_length, n_regressors));
655    let mut y_vector = Array1::zeros(regression_length);
656
657    let mut col_idx = 0;
658
659    // Constant term
660    if regression_type.contains('c') {
661        for i in 0..regression_length {
662            x_matrix[[i, col_idx]] = F::one();
663        }
664        col_idx += 1;
665    }
666
667    // Trend term
668    if regression_type.contains('t') {
669        for i in 0..regression_length {
670            x_matrix[[i, col_idx]] = F::from(i + 1).unwrap();
671        }
672        col_idx += 1;
673    }
674
675    // y_{t-1} term (this is what we test)
676    for i in 0..regression_length {
677        x_matrix[[i, col_idx]] = y_lag[start_idx + i];
678        y_vector[i] = y_diff[start_idx + i];
679    }
680    col_idx += 1;
681
682    // Lagged difference terms
683    for lag in 1..=_lags {
684        for i in 0..regression_length {
685            let diff_idx = start_idx + i - lag;
686            x_matrix[[i, col_idx]] = y_diff[diff_idx];
687        }
688        col_idx += 1;
689    }
690
691    // Perform OLS regression
692    let xtx = x_matrix.t().dot(&x_matrix);
693    let xty = x_matrix.t().dot(&y_vector);
694
695    // Solve normal equations (simplified - would use proper linear algebra in practice)
696    let beta = solve_ols_simple(&xtx, &xty)?;
697
698    // Calculate residuals and standard errors
699    let y_pred = x_matrix.dot(&beta);
700    let residuals = &y_vector - &y_pred;
701    let mse = residuals.mapv(|x| x * x).sum() / F::from(regression_length - n_regressors).unwrap();
702
703    // Standard error of the coefficient of interest (y_{t-1})
704    let coeff_idx = if regression_type.contains('c') { 1 } else { 0 };
705    let coeff_idx = if regression_type.contains('t') {
706        coeff_idx + 1
707    } else {
708        coeff_idx
709    };
710
711    let var_coeff = mse * pseudo_inverse_diag(&xtx, coeff_idx)?;
712    let se_coeff = var_coeff.sqrt();
713
714    // t-statistic for unit root test
715    let t_stat = beta[coeff_idx] / se_coeff;
716
717    // Critical values (approximated)
718    let critical_values = get_adf_critical_values(regression_type);
719
720    // Determine if stationary (reject H0)
721    let is_stationary = t_stat < critical_values[1].1; // 5% level
722
723    // P-value approximation (simplified)
724    let p_value = approximate_adf_p_value(t_stat, regression_type);
725
726    Ok(StationarityTest {
727        statistic: t_stat,
728        p_value,
729        critical_values,
730        is_stationary,
731        test_type: StationarityTestType::ADF,
732    })
733}
734
735/// Simple OLS solver for small matrices
736#[allow(dead_code)]
737fn solve_ols_simple<F>(xtx: &Array2<F>, xty: &Array1<F>) -> Result<Array1<F>>
738where
739    F: Float + FromPrimitive + Debug + Display + Clone,
740{
741    let n = xtx.nrows();
742
743    // Simple case: 1x1 matrix
744    if n == 1 {
745        if xtx[[0, 0]].abs() < F::from(1e-12).unwrap() {
746            return Err(TimeSeriesError::NumericalInstability(
747                "Singular matrix in OLS".to_string(),
748            ));
749        }
750        return Ok(Array1::from_elem(1, xty[0] / xtx[[0, 0]]));
751    }
752
753    // For larger matrices, use simplified Gaussian elimination
754    let mut a = xtx.clone();
755    let mut b = xty.clone();
756
757    // Forward elimination
758    for k in 0..(n - 1) {
759        // Find pivot
760        let mut max_row = k;
761        for i in (k + 1)..n {
762            if a[[i, k]].abs() > a[[max_row, k]].abs() {
763                max_row = i;
764            }
765        }
766
767        // Swap rows
768        if max_row != k {
769            for j in k..n {
770                let temp = a[[k, j]];
771                a[[k, j]] = a[[max_row, j]];
772                a[[max_row, j]] = temp;
773            }
774            let temp = b[k];
775            b[k] = b[max_row];
776            b[max_row] = temp;
777        }
778
779        // Eliminate
780        for i in (k + 1)..n {
781            if a[[k, k]].abs() < F::from(1e-12).unwrap() {
782                return Err(TimeSeriesError::NumericalInstability(
783                    "Near-zero pivot in OLS".to_string(),
784                ));
785            }
786            let factor = a[[i, k]] / a[[k, k]];
787            for j in k..n {
788                a[[i, j]] = a[[i, j]] - factor * a[[k, j]];
789            }
790            b[i] = b[i] - factor * b[k];
791        }
792    }
793
794    // Back substitution
795    let mut x = Array1::zeros(n);
796    for i in (0..n).rev() {
797        let mut sum = F::zero();
798        for j in (i + 1)..n {
799            sum = sum + a[[i, j]] * x[j];
800        }
801        x[i] = (b[i] - sum) / a[[i, i]];
802    }
803
804    Ok(x)
805}
806
807/// Get diagonal element of pseudo-inverse (simplified)
808#[allow(dead_code)]
809fn pseudo_inverse_diag<F>(matrix: &Array2<F>, idx: usize) -> Result<F>
810where
811    F: Float + FromPrimitive + Debug,
812{
813    // Simplified: just return 1/diagonal for well-conditioned case
814    if matrix[[idx, idx]].abs() < F::from(1e-12).unwrap() {
815        return Err(TimeSeriesError::NumericalInstability(
816            "Matrix is singular".to_string(),
817        ));
818    }
819    Ok(F::one() / matrix[[idx, idx]])
820}
821
822/// Get ADF critical values (approximated)
823#[allow(dead_code)]
824fn get_adf_critical_values<F>(_regressiontype: &str) -> Vec<(F, F)>
825where
826    F: Float + FromPrimitive,
827{
828    // Simplified critical values - in practice these would be more sophisticated
829    match _regressiontype {
830        "nc" => vec![
831            (F::from(0.01).unwrap(), F::from(-2.58).unwrap()),
832            (F::from(0.05).unwrap(), F::from(-1.95).unwrap()),
833            (F::from(0.10).unwrap(), F::from(-1.62).unwrap()),
834        ],
835        "c" => vec![
836            (F::from(0.01).unwrap(), F::from(-3.43).unwrap()),
837            (F::from(0.05).unwrap(), F::from(-2.86).unwrap()),
838            (F::from(0.10).unwrap(), F::from(-2.57).unwrap()),
839        ],
840        "ct" => vec![
841            (F::from(0.01).unwrap(), F::from(-3.96).unwrap()),
842            (F::from(0.05).unwrap(), F::from(-3.41).unwrap()),
843            (F::from(0.10).unwrap(), F::from(-3.13).unwrap()),
844        ],
845        _ => vec![
846            (F::from(0.01).unwrap(), F::from(-3.43).unwrap()),
847            (F::from(0.05).unwrap(), F::from(-2.86).unwrap()),
848            (F::from(0.10).unwrap(), F::from(-2.57).unwrap()),
849        ],
850    }
851}
852
853/// Approximate p-value for ADF test (simplified)
854#[allow(dead_code)]
855fn approximate_adf_p_value<F>(_t_stat: F, test_type: &str) -> F
856where
857    F: Float + FromPrimitive,
858{
859    // Very simplified p-value approximation
860    if _t_stat < F::from(-3.0).unwrap() {
861        F::from(0.01).unwrap()
862    } else if _t_stat < F::from(-2.5).unwrap() {
863        F::from(0.05).unwrap()
864    } else if _t_stat < F::from(-2.0).unwrap() {
865        F::from(0.10).unwrap()
866    } else {
867        F::from(0.20).unwrap()
868    }
869}
870
871/// KPSS test for stationarity
872///
873/// Tests the null hypothesis that the time series is stationary around a deterministic trend.
874/// H0: The series is stationary
875/// H1: The series has a unit root (non-stationary)
876///
877/// # Arguments
878///
879/// * `ts` - Input time series
880/// * `regression_type` - Type of regression ('c' = level stationary, 'ct' = trend stationary)
881///
882/// # Returns
883///
884/// Stationarity test results
885#[allow(dead_code)]
886pub fn kpss_test<F, S>(_ts: &ArrayBase<S, Ix1>, regressiontype: &str) -> Result<StationarityTest<F>>
887where
888    S: Data<Elem = F>,
889    F: Float + FromPrimitive + Debug + Display + Clone,
890{
891    let n = _ts.len();
892    if n < 10 {
893        return Err(TimeSeriesError::InsufficientData {
894            message: "KPSS test requires at least 10 observations".to_string(),
895            required: 10,
896            actual: n,
897        });
898    }
899
900    // Determine regression _type
901    let include_trend = regressiontype.contains('t');
902
903    // Detrend the series
904    let detrended = if include_trend {
905        // Remove linear trend
906        detrend_linear(_ts)?
907    } else {
908        // Remove mean (level)
909        let mean = _ts.sum() / F::from(n).unwrap();
910        _ts.mapv(|x| x - mean)
911    };
912
913    // Calculate partial sums
914    let mut partial_sums = Array1::zeros(n);
915    partial_sums[0] = detrended[0];
916    for i in 1..n {
917        partial_sums[i] = partial_sums[i - 1] + detrended[i];
918    }
919
920    // Calculate LM statistic
921    let sum_squares = partial_sums.mapv(|x| x * x).sum();
922    let _residual_variance = detrended.mapv(|x| x * x).sum() / F::from(n).unwrap();
923
924    // Long-run variance estimation (Newey-West)
925    let long_run_variance = estimate_long_run_variance(&detrended)?;
926
927    let lm_stat = sum_squares / (F::from(n * n).unwrap() * long_run_variance);
928
929    // Critical values
930    let critical_values = get_kpss_critical_values(include_trend);
931
932    // Determine if stationary (fail to reject H0)
933    let is_stationary = lm_stat < critical_values[1].1; // 5% level
934
935    // P-value approximation
936    let p_value = approximate_kpss_p_value(lm_stat, include_trend);
937
938    Ok(StationarityTest {
939        statistic: lm_stat,
940        p_value,
941        critical_values,
942        is_stationary,
943        test_type: StationarityTestType::KPSS,
944    })
945}
946
947/// Remove linear trend from time series
948#[allow(dead_code)]
949fn detrend_linear<F, S>(ts: &ArrayBase<S, Ix1>) -> Result<Array1<F>>
950where
951    S: Data<Elem = F>,
952    F: Float + FromPrimitive + Debug + Clone,
953{
954    let n = ts.len();
955    let n_f = F::from(n).unwrap();
956
957    // Create time index
958    let time_index: Array1<F> = (0..n).map(|i| F::from(i).unwrap()).collect();
959
960    // Calculate linear regression coefficients
961    let sum_t = time_index.sum();
962    let sum_y = ts.sum();
963    let sum_tt = time_index.mapv(|t| t * t).sum();
964    let sum_ty = time_index
965        .iter()
966        .zip(ts.iter())
967        .map(|(&t, &y)| t * y)
968        .fold(F::zero(), |acc, x| acc + x);
969
970    let mean_t = sum_t / n_f;
971    let mean_y = sum_y / n_f;
972
973    let denominator = sum_tt - n_f * mean_t * mean_t;
974    if denominator.abs() < F::from(1e-12).unwrap() {
975        return Err(TimeSeriesError::NumericalInstability(
976            "Cannot detrend: degenerate time series".to_string(),
977        ));
978    }
979
980    let slope = (sum_ty - n_f * mean_t * mean_y) / denominator;
981    let intercept = mean_y - slope * mean_t;
982
983    // Remove trend
984    let detrended = time_index
985        .iter()
986        .zip(ts.iter())
987        .map(|(&t, &y)| y - (intercept + slope * t))
988        .collect();
989
990    Ok(detrended)
991}
992
993/// Estimate long-run variance using Newey-West estimator
994#[allow(dead_code)]
995fn estimate_long_run_variance<F>(residuals: &Array1<F>) -> Result<F>
996where
997    F: Float + FromPrimitive + Debug,
998{
999    let n = residuals.len();
1000    let n_f = F::from(n).unwrap();
1001
1002    // Base variance
1003    let mut variance = residuals.mapv(|x| x * x).sum() / n_f;
1004
1005    // Add autocovariance terms
1006    let max_lag = (n as f64).powf(1.0 / 3.0).floor() as usize; // Rule of thumb
1007
1008    for lag in 1..=max_lag.min(n - 1) {
1009        let mut autocovariance = F::zero();
1010        for i in lag..n {
1011            autocovariance = autocovariance + residuals[i] * residuals[i - lag];
1012        }
1013        autocovariance = autocovariance / n_f;
1014
1015        // Bartlett kernel weights
1016        let weight = F::one() - F::from(lag).unwrap() / F::from(max_lag + 1).unwrap();
1017        variance = variance + F::from(2.0).unwrap() * weight * autocovariance;
1018    }
1019
1020    Ok(variance.max(F::from(1e-10).unwrap())) // Ensure positive
1021}
1022
1023/// Get KPSS critical values
1024#[allow(dead_code)]
1025fn get_kpss_critical_values<F>(_includetrend: bool) -> Vec<(F, F)>
1026where
1027    F: Float + FromPrimitive,
1028{
1029    if _includetrend {
1030        vec![
1031            (F::from(0.01).unwrap(), F::from(0.216).unwrap()),
1032            (F::from(0.05).unwrap(), F::from(0.146).unwrap()),
1033            (F::from(0.10).unwrap(), F::from(0.119).unwrap()),
1034        ]
1035    } else {
1036        vec![
1037            (F::from(0.01).unwrap(), F::from(0.739).unwrap()),
1038            (F::from(0.05).unwrap(), F::from(0.463).unwrap()),
1039            (F::from(0.10).unwrap(), F::from(0.347).unwrap()),
1040        ]
1041    }
1042}
1043
1044/// Approximate p-value for KPSS test
1045#[allow(dead_code)]
1046fn approximate_kpss_p_value<F>(_lm_stat: F, includetrend: bool) -> F
1047where
1048    F: Float + FromPrimitive,
1049{
1050    let critical_vals = get_kpss_critical_values::<F>(includetrend);
1051
1052    if _lm_stat > critical_vals[0].1 {
1053        F::from(0.01).unwrap()
1054    } else if _lm_stat > critical_vals[1].1 {
1055        F::from(0.05).unwrap()
1056    } else if _lm_stat > critical_vals[2].1 {
1057        F::from(0.10).unwrap()
1058    } else {
1059        F::from(0.20).unwrap()
1060    }
1061}
1062
1063#[cfg(test)]
1064mod tests {
1065    use super::*;
1066    use approx::assert_relative_eq;
1067    use scirs2_core::ndarray::array;
1068
1069    #[test]
1070    fn test_box_cox_transform() {
1071        let ts = array![1.0, 2.0, 3.0, 4.0, 5.0];
1072
1073        // Test with lambda = 0 (log transformation)
1074        let (transformed, params) = box_cox_transform(&ts, Some(0.0)).unwrap();
1075        let expected: Array1<f64> = ts.mapv(|x| x.ln());
1076
1077        for i in 0..ts.len() {
1078            assert_relative_eq!(transformed[i], expected[i], epsilon = 1e-10);
1079        }
1080
1081        // Test inverse transformation
1082        let recovered = inverse_box_cox_transform(&transformed, &params).unwrap();
1083        for i in 0..ts.len() {
1084            assert_relative_eq!(recovered[i], ts[i], epsilon = 1e-10);
1085        }
1086    }
1087
1088    #[test]
1089    fn test_box_cox_lambda_estimation() {
1090        let ts = array![1.0, 4.0, 9.0, 16.0, 25.0]; // Perfect squares
1091        let (transformed, params) = box_cox_transform(&ts, None).unwrap();
1092
1093        // Should estimate a lambda that makes the series more normal
1094        assert!(params.lambda_estimated);
1095        assert!(transformed.len() == ts.len());
1096    }
1097
1098    #[test]
1099    fn test_difference_transform() {
1100        let ts = array![1.0, 3.0, 6.0, 10.0, 15.0, 21.0];
1101
1102        // First differences
1103        let (diff1, params) = difference_transform(&ts, 1, None).unwrap();
1104        let expected_diff1 = array![2.0, 3.0, 4.0, 5.0, 6.0];
1105
1106        assert_eq!(diff1, expected_diff1);
1107        assert_eq!(params.order, 1);
1108        assert_eq!(params.seasonal_lag, None);
1109
1110        // Second differences
1111        let (diff2, _) = difference_transform(&ts, 2, None).unwrap();
1112        let expected_diff2 = array![1.0, 1.0, 1.0, 1.0];
1113
1114        assert_eq!(diff2, expected_diff2);
1115    }
1116
1117    #[test]
1118    fn test_seasonal_difference() {
1119        let ts = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1120
1121        // Seasonal differencing with lag 4
1122        let (seasonal_diff, params) = difference_transform(&ts, 0, Some(4)).unwrap();
1123        let expected = array![4.0, 4.0, 4.0, 4.0]; // [5-1, 6-2, 7-3, 8-4]
1124
1125        assert_eq!(seasonal_diff, expected);
1126        assert_eq!(params.seasonal_lag, Some(4));
1127    }
1128
1129    #[test]
1130    fn test_normalize_z_score() {
1131        let ts = array![1.0, 2.0, 3.0, 4.0, 5.0];
1132        let (normalized, params) = normalize_transform(&ts, NormalizationMethod::ZScore).unwrap();
1133
1134        // Check that mean is approximately 0 and std is approximately 1
1135        let mean = normalized.sum() / normalized.len() as f64;
1136        let variance =
1137            normalized.mapv(|x| (x - mean) * (x - mean)).sum() / (normalized.len() - 1) as f64;
1138        let std = variance.sqrt();
1139
1140        assert_relative_eq!(mean, 0.0, epsilon = 1e-10);
1141        assert_relative_eq!(std, 1.0, epsilon = 1e-10);
1142
1143        // Test inverse transformation
1144        let recovered = inverse_normalize_transform(&normalized, &params);
1145        for i in 0..ts.len() {
1146            assert_relative_eq!(recovered[i], ts[i], epsilon = 1e-10);
1147        }
1148    }
1149
1150    #[test]
1151    fn test_normalize_min_max() {
1152        let ts = array![1.0, 2.0, 3.0, 4.0, 5.0];
1153        let (normalized, params) = normalize_transform(&ts, NormalizationMethod::MinMax).unwrap();
1154
1155        // Check that min is 0 and max is 1
1156        let min_val = normalized.iter().fold(f64::INFINITY, |acc, &x| acc.min(x));
1157        let max_val = normalized
1158            .iter()
1159            .fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
1160
1161        assert_relative_eq!(min_val, 0.0, epsilon = 1e-10);
1162        assert_relative_eq!(max_val, 1.0, epsilon = 1e-10);
1163
1164        // Test inverse transformation
1165        let recovered = inverse_normalize_transform(&normalized, &params);
1166        for i in 0..ts.len() {
1167            assert_relative_eq!(recovered[i], ts[i], epsilon = 1e-10);
1168        }
1169    }
1170
1171    #[test]
1172    fn test_adf_test() {
1173        // Create a non-stationary random walk with more variation and better conditioning
1174        let mut ts = Array1::zeros(100);
1175        ts[0] = 10.0;
1176        for i in 1..100 {
1177            ts[i] = ts[i - 1]
1178                + 0.5 * (i as f64 / 10.0).sin()
1179                + 0.1 * (i as f64)
1180                + 0.01 * ((i * 7) as f64).cos();
1181            // Trending with variation and additional noise for better conditioning
1182        }
1183
1184        let result = adf_test(&ts, Some(2), "c").unwrap();
1185
1186        // Should have proper structure
1187        assert_eq!(result.test_type, StationarityTestType::ADF);
1188        assert!(result.critical_values.len() >= 3);
1189        assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
1190    }
1191
1192    #[test]
1193    fn test_kpss_test() {
1194        // Create a stationary series
1195        let ts: Array1<f64> = (0..50)
1196            .map(|i| (i as f64 / 10.0).sin() + 0.1 * (i as f64))
1197            .collect();
1198
1199        let result = kpss_test(&ts, "c").unwrap();
1200
1201        // Should have proper structure
1202        assert_eq!(result.test_type, StationarityTestType::KPSS);
1203        assert!(result.critical_values.len() >= 3);
1204        assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
1205    }
1206
1207    #[test]
1208    fn test_detrend_linear() {
1209        let ts = array![1.0, 3.0, 5.0, 7.0, 9.0]; // Perfect linear trend
1210        let detrended = detrend_linear(&ts).unwrap();
1211
1212        // After detrending a perfect linear series, should be approximately zero
1213        for &val in detrended.iter() {
1214            assert_relative_eq!(val, 0.0, epsilon = 1e-10);
1215        }
1216    }
1217
1218    #[test]
1219    fn test_integration_differentiation_inverse() {
1220        let original = array![1.0, 3.0, 6.0, 10.0, 15.0];
1221
1222        // Difference then integrate
1223        let (differenced, params) = difference_transform(&original, 1, None).unwrap();
1224        let initial_vals = array![original[0]];
1225        let integrated = integrate_transform(&differenced, &params, &initial_vals).unwrap();
1226
1227        // Should recover original (approximately)
1228        for i in 0..original.len() {
1229            assert_relative_eq!(integrated[i], original[i], epsilon = 1e-10);
1230        }
1231    }
1232}
scirs2_series/transformations.rs

scirs2_series/
transformations.rs