scirs2_series/
regression.rs

1//! Time series regression models
2//!
3//! This module provides various regression methods for time series analysis:
4//! - Distributed lag models (DL)
5//! - Autoregressive distributed lag (ARDL) models
6//! - Error correction models (ECM)
7//! - Regression with ARIMA errors
8
9use crate::error::TimeSeriesError;
10use scirs2_core::ndarray::{s, Array1, Array2};
11use scirs2_core::validation::checkarray_finite;
12
13/// Result type for regression models
14pub type RegressionResult<T> = Result<T, TimeSeriesError>;
15
16/// Distributed lag model result
17#[derive(Debug, Clone)]
18pub struct DistributedLagResult {
19    /// Regression coefficients for lagged values
20    pub coefficients: Array1<f64>,
21    /// Standard errors of coefficients
22    pub standard_errors: Array1<f64>,
23    /// T-statistics for coefficients
24    pub t_statistics: Array1<f64>,
25    /// P-values for coefficients
26    pub p_values: Array1<f64>,
27    /// R-squared value
28    pub r_squared: f64,
29    /// Adjusted R-squared value
30    pub adjusted_r_squared: f64,
31    /// Residual sum of squares
32    pub residual_sum_squares: f64,
33    /// Degrees of freedom
34    pub degrees_of_freedom: usize,
35    /// Maximum lag used
36    pub max_lag: usize,
37    /// Fitted values
38    pub fitted_values: Array1<f64>,
39    /// Residuals
40    pub residuals: Array1<f64>,
41}
42
43/// ARDL model result
44#[derive(Debug, Clone)]
45pub struct ARDLResult {
46    /// Coefficients for dependent variable lags
47    pub y_coefficients: Array1<f64>,
48    /// Coefficients for independent variable lags
49    pub x_coefficients: Array1<f64>,
50    /// Intercept coefficient
51    pub intercept: f64,
52    /// Standard errors for all coefficients
53    pub standard_errors: Array1<f64>,
54    /// T-statistics for all coefficients
55    pub t_statistics: Array1<f64>,
56    /// P-values for all coefficients
57    pub p_values: Array1<f64>,
58    /// R-squared value
59    pub r_squared: f64,
60    /// Adjusted R-squared value
61    pub adjusted_r_squared: f64,
62    /// Number of lags for dependent variable
63    pub y_lags: usize,
64    /// Number of lags for independent variable
65    pub x_lags: usize,
66    /// Fitted values
67    pub fitted_values: Array1<f64>,
68    /// Residuals
69    pub residuals: Array1<f64>,
70    /// Information criteria
71    pub aic: f64,
72    /// Bayesian information criterion
73    pub bic: f64,
74}
75
76/// Error correction model result
77#[derive(Debug, Clone)]
78pub struct ErrorCorrectionResult {
79    /// Error correction coefficient (speed of adjustment)
80    pub error_correction_coeff: f64,
81    /// Short-run coefficients for dependent variable differences
82    pub short_run_y_coeffs: Array1<f64>,
83    /// Short-run coefficients for independent variable differences
84    pub short_run_x_coeffs: Array1<f64>,
85    /// Long-run coefficients
86    pub long_run_coeffs: Array1<f64>,
87    /// Standard errors
88    pub standard_errors: Array1<f64>,
89    /// T-statistics
90    pub t_statistics: Array1<f64>,
91    /// P-values
92    pub p_values: Array1<f64>,
93    /// R-squared value
94    pub r_squared: f64,
95    /// Fitted values
96    pub fitted_values: Array1<f64>,
97    /// Residuals
98    pub residuals: Array1<f64>,
99    /// Cointegration test p-value
100    pub cointegration_p_value: f64,
101}
102
103/// Regression with ARIMA errors result (placeholder for future implementation)
104#[derive(Debug, Clone)]
105pub struct ARIMAErrorsResult {
106    /// Regression coefficients
107    pub regression_coefficients: Array1<f64>,
108    /// Combined fitted values (regression + ARIMA error correction)
109    pub fitted_values: Array1<f64>,
110    /// Final residuals (should be white noise)
111    pub residuals: Array1<f64>,
112    /// R-squared for regression component
113    pub regression_r_squared: f64,
114    /// Log-likelihood of combined model
115    pub log_likelihood: f64,
116    /// AIC of combined model
117    pub aic: f64,
118    /// BIC of combined model
119    pub bic: f64,
120    /// Standard errors of regression coefficients
121    pub regression_std_errors: Array1<f64>,
122}
123
124/// Configuration for distributed lag models
125#[derive(Debug, Clone)]
126pub struct DistributedLagConfig {
127    /// Maximum lag to include
128    pub max_lag: usize,
129    /// Whether to include intercept
130    pub include_intercept: bool,
131    /// Significance level for tests
132    pub significance_level: f64,
133}
134
135impl Default for DistributedLagConfig {
136    fn default() -> Self {
137        Self {
138            max_lag: 4,
139            include_intercept: true,
140            significance_level: 0.05,
141        }
142    }
143}
144
145/// Configuration for ARDL models
146#[derive(Debug, Clone)]
147pub struct ARDLConfig {
148    /// Maximum lags for dependent variable
149    pub max_y_lags: usize,
150    /// Maximum lags for independent variables
151    pub max_x_lags: usize,
152    /// Whether to include intercept
153    pub include_intercept: bool,
154    /// Information criterion for model selection
155    pub selection_criterion: InformationCriterion,
156    /// Whether to perform automatic lag selection
157    pub auto_lag_selection: bool,
158}
159
160impl Default for ARDLConfig {
161    fn default() -> Self {
162        Self {
163            max_y_lags: 4,
164            max_x_lags: 4,
165            include_intercept: true,
166            selection_criterion: InformationCriterion::AIC,
167            auto_lag_selection: true,
168        }
169    }
170}
171
172/// Information criteria for model selection
173#[derive(Debug, Clone, Copy)]
174pub enum InformationCriterion {
175    /// Akaike Information Criterion
176    AIC,
177    /// Bayesian Information Criterion
178    BIC,
179    /// Hannan-Quinn Information Criterion
180    HQIC,
181}
182
183/// Configuration for error correction models
184#[derive(Debug, Clone)]
185pub struct ErrorCorrectionConfig {
186    /// Maximum lags for difference terms
187    pub max_diff_lags: usize,
188    /// Whether to test for cointegration
189    pub test_cointegration: bool,
190    /// Significance level for cointegration test
191    pub cointegration_significance: f64,
192}
193
194impl Default for ErrorCorrectionConfig {
195    fn default() -> Self {
196        Self {
197            max_diff_lags: 3,
198            test_cointegration: true,
199            cointegration_significance: 0.05,
200        }
201    }
202}
203
204/// Configuration for regression with ARIMA errors (placeholder)
205#[derive(Debug, Clone)]
206pub struct ARIMAErrorsConfig {
207    /// Whether to include intercept in regression
208    pub include_intercept: bool,
209    /// Maximum iterations for fitting
210    pub max_iterations: usize,
211    /// Convergence tolerance
212    pub tolerance: f64,
213}
214
215impl Default for ARIMAErrorsConfig {
216    fn default() -> Self {
217        Self {
218            include_intercept: true,
219            max_iterations: 100,
220            tolerance: 1e-6,
221        }
222    }
223}
224
225/// Time series regression model fitter
226pub struct TimeSeriesRegression {
227    /// Random seed for reproducibility
228    pub random_seed: Option<u64>,
229}
230
231impl TimeSeriesRegression {
232    /// Create a new time series regression fitter
233    pub fn new() -> Self {
234        Self { random_seed: None }
235    }
236
237    /// Create a new fitter with random seed
238    pub fn with_seed(seed: u64) -> Self {
239        Self {
240            random_seed: Some(seed),
241        }
242    }
243
244    /// Fit a distributed lag model
245    ///
246    /// Models y_t = α + β₀x_t + β₁x_{t-1} + ... + βₖx_{t-k} + ε_t
247    ///
248    /// # Arguments
249    ///
250    /// * `y` - Dependent variable time series
251    /// * `x` - Independent variable time series
252    /// * `config` - Configuration for the model
253    ///
254    /// # Returns
255    ///
256    /// Result containing distributed lag model estimates
257    pub fn fit_distributed_lag(
258        &self,
259        y: &Array1<f64>,
260        x: &Array1<f64>,
261        config: &DistributedLagConfig,
262    ) -> RegressionResult<DistributedLagResult> {
263        checkarray_finite(y, "y")?;
264        checkarray_finite(x, "x")?;
265
266        if y.len() != x.len() {
267            return Err(TimeSeriesError::InvalidInput(
268                "Time series must have the same length".to_string(),
269            ));
270        }
271
272        if y.len() <= config.max_lag + 1 {
273            return Err(TimeSeriesError::InvalidInput(
274                "Time series too short for the specified lag".to_string(),
275            ));
276        }
277
278        // Prepare the design matrix
279        let n = y.len() - config.max_lag;
280        let p = config.max_lag + 1 + if config.include_intercept { 1 } else { 0 };
281        let mut design_matrix = Array2::zeros((n, p));
282        let mut response = Array1::zeros(n);
283
284        for i in 0..n {
285            let row_idx = config.max_lag + i;
286            response[i] = y[row_idx];
287
288            let mut col_idx = 0;
289
290            // Add intercept
291            if config.include_intercept {
292                design_matrix[[i, col_idx]] = 1.0;
293                col_idx += 1;
294            }
295
296            // Add lagged x values (including contemporaneous)
297            for lag in 0..=config.max_lag {
298                design_matrix[[i, col_idx]] = x[row_idx - lag];
299                col_idx += 1;
300            }
301        }
302
303        // Fit OLS regression
304        let regression_result = self.fit_ols_regression(&design_matrix, &response)?;
305
306        Ok(DistributedLagResult {
307            coefficients: regression_result.coefficients,
308            standard_errors: regression_result.standard_errors,
309            t_statistics: regression_result.t_statistics,
310            p_values: regression_result.p_values,
311            r_squared: regression_result.r_squared,
312            adjusted_r_squared: regression_result.adjusted_r_squared,
313            residual_sum_squares: regression_result.residual_sum_squares,
314            degrees_of_freedom: regression_result.degrees_of_freedom,
315            max_lag: config.max_lag,
316            fitted_values: regression_result.fitted_values,
317            residuals: regression_result.residuals,
318        })
319    }
320
321    /// Fit an ARDL model
322    ///
323    /// Models y_t = α + Σφᵢy_{t-i} + Σβⱼx_{t-j} + ε_t
324    ///
325    /// # Arguments
326    ///
327    /// * `y` - Dependent variable time series
328    /// * `x` - Independent variable time series
329    /// * `config` - Configuration for the model
330    ///
331    /// # Returns
332    ///
333    /// Result containing ARDL model estimates
334    pub fn fit_ardl(
335        &self,
336        y: &Array1<f64>,
337        x: &Array1<f64>,
338        config: &ARDLConfig,
339    ) -> RegressionResult<ARDLResult> {
340        checkarray_finite(y, "y")?;
341        checkarray_finite(x, "x")?;
342
343        if y.len() != x.len() {
344            return Err(TimeSeriesError::InvalidInput(
345                "Time series must have the same length".to_string(),
346            ));
347        }
348
349        let (y_lags, x_lags) = if config.auto_lag_selection {
350            self.select_optimal_lags(y, x, config)?
351        } else {
352            (config.max_y_lags, config.max_x_lags)
353        };
354
355        let max_lag = y_lags.max(x_lags);
356        if y.len() <= max_lag + 1 {
357            return Err(TimeSeriesError::InvalidInput(
358                "Time series too short for the specified lags".to_string(),
359            ));
360        }
361
362        // Prepare the design matrix
363        let n = y.len() - max_lag;
364        let p = y_lags + x_lags + 1 + if config.include_intercept { 1 } else { 0 };
365        let mut design_matrix = Array2::zeros((n, p));
366        let mut response = Array1::zeros(n);
367
368        for i in 0..n {
369            let row_idx = max_lag + i;
370            response[i] = y[row_idx];
371
372            let mut col_idx = 0;
373
374            // Add intercept
375            if config.include_intercept {
376                design_matrix[[i, col_idx]] = 1.0;
377                col_idx += 1;
378            }
379
380            // Add lagged y values
381            for lag in 1..=y_lags {
382                design_matrix[[i, col_idx]] = y[row_idx - lag];
383                col_idx += 1;
384            }
385
386            // Add lagged x values (including contemporaneous)
387            for lag in 0..=x_lags {
388                design_matrix[[i, col_idx]] = x[row_idx - lag];
389                col_idx += 1;
390            }
391        }
392
393        // Fit OLS regression
394        let regression_result = self.fit_ols_regression(&design_matrix, &response)?;
395
396        // Extract coefficients
397        let mut coeff_idx = if config.include_intercept { 1 } else { 0 };
398        let intercept = if config.include_intercept {
399            regression_result.coefficients[0]
400        } else {
401            0.0
402        };
403
404        let y_coefficients = regression_result
405            .coefficients
406            .slice(s![coeff_idx..coeff_idx + y_lags])
407            .to_owned();
408        coeff_idx += y_lags;
409
410        let x_coefficients = regression_result
411            .coefficients
412            .slice(s![coeff_idx..coeff_idx + x_lags + 1])
413            .to_owned();
414
415        // Calculate information criteria
416        let k = regression_result.coefficients.len() as f64;
417        let n_f = n as f64;
418        let log_likelihood = -0.5
419            * n_f
420            * (2.0 * std::f64::consts::PI * regression_result.residual_sum_squares / n_f).ln()
421            - 0.5 * regression_result.residual_sum_squares
422                / (regression_result.residual_sum_squares / n_f);
423        let aic = -2.0 * log_likelihood + 2.0 * k;
424        let bic = -2.0 * log_likelihood + k * n_f.ln();
425
426        Ok(ARDLResult {
427            y_coefficients,
428            x_coefficients,
429            intercept,
430            standard_errors: regression_result.standard_errors,
431            t_statistics: regression_result.t_statistics,
432            p_values: regression_result.p_values,
433            r_squared: regression_result.r_squared,
434            adjusted_r_squared: regression_result.adjusted_r_squared,
435            y_lags,
436            x_lags,
437            fitted_values: regression_result.fitted_values,
438            residuals: regression_result.residuals,
439            aic,
440            bic,
441        })
442    }
443
444    /// Fit an error correction model
445    ///
446    /// Models Δy_t = α + γ(y_{t-1} - βx_{t-1}) + Σφᵢ Δy_{t-i} + Σθⱼ Δx_{t-j} + ε_t
447    ///
448    /// # Arguments
449    ///
450    /// * `y` - Dependent variable time series
451    /// * `x` - Independent variable time series
452    /// * `config` - Configuration for the model
453    ///
454    /// # Returns
455    ///
456    /// Result containing error correction model estimates
457    pub fn fit_error_correction(
458        &self,
459        y: &Array1<f64>,
460        x: &Array1<f64>,
461        config: &ErrorCorrectionConfig,
462    ) -> RegressionResult<ErrorCorrectionResult> {
463        checkarray_finite(y, "y")?;
464        checkarray_finite(x, "x")?;
465
466        if y.len() != x.len() {
467            return Err(TimeSeriesError::InvalidInput(
468                "Time series must have the same length".to_string(),
469            ));
470        }
471
472        if y.len() <= config.max_diff_lags + 2 {
473            return Err(TimeSeriesError::InvalidInput(
474                "Time series too short for error correction model".to_string(),
475            ));
476        }
477
478        // Step 1: Estimate long-run relationship
479        let long_run_result = self.estimate_long_run_relationship(y, x)?;
480
481        // Step 2: Test for cointegration if requested
482        let cointegration_p_value = if config.test_cointegration {
483            self.test_cointegration(&long_run_result.residuals)?
484        } else {
485            0.0 // Assume cointegration
486        };
487
488        // Step 3: Estimate error correction model
489        let ecm_result = self.estimate_ecm(y, x, &long_run_result, config)?;
490
491        Ok(ErrorCorrectionResult {
492            error_correction_coeff: ecm_result.error_correction_coeff,
493            short_run_y_coeffs: ecm_result.short_run_y_coeffs,
494            short_run_x_coeffs: ecm_result.short_run_x_coeffs,
495            long_run_coeffs: long_run_result.coefficients,
496            standard_errors: ecm_result.standard_errors,
497            t_statistics: ecm_result.t_statistics,
498            p_values: ecm_result.p_values,
499            r_squared: ecm_result.r_squared,
500            fitted_values: ecm_result.fitted_values,
501            residuals: ecm_result.residuals,
502            cointegration_p_value,
503        })
504    }
505
506    /// Fit regression with ARIMA errors
507    ///
508    /// Models y_t = βx_t + ε_t, where ε_t follows an ARIMA process
509    ///
510    /// # Arguments
511    ///
512    /// * `y` - Dependent variable time series
513    /// * `x` - Independent variables matrix
514    /// * `config` - Configuration for the model
515    ///
516    /// # Returns
517    ///
518    /// Result containing regression with ARIMA errors estimates
519    pub fn fit_regression_with_arima_errors(
520        &self,
521        y: &Array1<f64>,
522        x: &Array2<f64>,
523        config: &ARIMAErrorsConfig,
524    ) -> RegressionResult<ARIMAErrorsResult> {
525        checkarray_finite(y, "y")?;
526
527        if y.len() != x.nrows() {
528            return Err(TimeSeriesError::InvalidInput(
529                "Response and design matrix must have compatible dimensions".to_string(),
530            ));
531        }
532
533        // Placeholder implementation - ARIMA models not yet implemented
534        // For now, just perform simple OLS regression
535        let mut design_matrix = x.clone();
536        if config.include_intercept {
537            let _intercept_col = Array2::<f64>::ones((x.nrows(), 1));
538            // Simple concatenation for now
539            let mut new_matrix = Array2::zeros((x.nrows(), x.ncols() + 1));
540            for i in 0..x.nrows() {
541                new_matrix[[i, 0]] = 1.0;
542                for j in 0..x.ncols() {
543                    new_matrix[[i, j + 1]] = x[[i, j]];
544                }
545            }
546            design_matrix = new_matrix;
547        }
548
549        let regression_result = self.fit_ols_regression(&design_matrix, y)?;
550
551        // Simple placeholder calculations
552        let n = y.len() as f64;
553        let k = regression_result.coefficients.len() as f64;
554        let rss = regression_result.residual_sum_squares;
555        let log_likelihood =
556            -0.5 * n * (2.0 * std::f64::consts::PI * rss / n).ln() - 0.5 * rss / (rss / n);
557        let aic = -2.0 * log_likelihood + 2.0 * k;
558        let bic = -2.0 * log_likelihood + k * n.ln();
559
560        Ok(ARIMAErrorsResult {
561            regression_coefficients: regression_result.coefficients,
562            fitted_values: regression_result.fitted_values,
563            residuals: regression_result.residuals,
564            regression_r_squared: regression_result.r_squared,
565            log_likelihood,
566            aic,
567            bic,
568            regression_std_errors: regression_result.standard_errors,
569        })
570    }
571
572    // Helper methods
573
574    fn fit_ols_regression(&self, x: &Array2<f64>, y: &Array1<f64>) -> RegressionResult<OLSResult> {
575        let n = y.len() as f64;
576        let p = x.ncols() as f64;
577
578        // Compute (X'X)^{-1}X'y
579        let xt = x.t();
580        let xtx = xt.dot(x);
581        let xty = xt.dot(y);
582
583        // Add regularization for numerical stability
584        let mut xtx_reg = xtx;
585        for i in 0..xtx_reg.nrows() {
586            xtx_reg[[i, i]] += 1e-10;
587        }
588
589        let coefficients = self.solve_linear_system_robust(&xtx_reg, &xty)?;
590        let fitted_values = x.dot(&coefficients);
591        let residuals = y - &fitted_values;
592
593        // Calculate statistics with improved numerical stability
594        let rss = residuals.mapv(|x| x * x).sum();
595
596        // More robust mean calculation
597        let y_mean = y.sum() / n;
598        let tss = y.mapv(|yi| (yi - y_mean).powi(2)).sum();
599
600        let r_squared = if tss < 1e-14 || rss.is_nan() || tss.is_nan() {
601            0.0 // If no variance in y or NaN values, R-squared is 0
602        } else {
603            let r2 = 1.0 - rss / tss;
604            if r2.is_nan() || r2.is_infinite() {
605                0.0
606            } else {
607                r2.clamp(-1.0, 1.0) // Ensure R-squared is in valid range
608            }
609        };
610
611        let adjusted_r_squared = if tss < 1e-14 || n <= p {
612            0.0
613        } else {
614            let adj_r2 = 1.0 - (rss / (n - p)) / (tss / (n - 1.0));
615            if adj_r2.is_nan() || adj_r2.is_infinite() {
616                0.0
617            } else {
618                adj_r2.clamp(-1.0, 1.0)
619            }
620        };
621
622        // Standard errors with improved numerical stability
623        let mse = if n > p { rss / (n - p) } else { 1.0 };
624        let var_coeff_matrix_result = self.invert_matrix_robust(&xtx_reg);
625        let mut standard_errors = Array1::ones(coefficients.len()); // Default to 1.0
626
627        if let Ok(var_coeff_matrix) = var_coeff_matrix_result {
628            standard_errors = Array1::zeros(coefficients.len());
629            for i in 0..coefficients.len() {
630                let variance = var_coeff_matrix[[i, i]] * mse;
631                standard_errors[i] = if variance >= 0.0 {
632                    variance.sqrt()
633                } else {
634                    1.0
635                };
636            }
637        }
638
639        // T-statistics and p-values
640        let t_statistics = Array1::from_vec(
641            coefficients
642                .iter()
643                .zip(standard_errors.iter())
644                .map(|(&coeff, &se)| if se > 0.0 { coeff / se } else { 0.0 })
645                .collect(),
646        );
647        let df = if n > p { (n - p) as i32 } else { 1 };
648        let p_values = t_statistics.mapv(|t| {
649            if t.is_finite() {
650                2.0 * (1.0 - self.t_distribution_cdf(t.abs(), df))
651            } else {
652                1.0
653            }
654        });
655
656        Ok(OLSResult {
657            coefficients,
658            standard_errors,
659            t_statistics,
660            p_values,
661            r_squared,
662            adjusted_r_squared,
663            residual_sum_squares: rss,
664            degrees_of_freedom: df as usize,
665            fitted_values,
666            residuals,
667        })
668    }
669
670    #[allow(dead_code)]
671    fn solve_linear_system(
672        &self,
673        a: &Array2<f64>,
674        b: &Array1<f64>,
675    ) -> RegressionResult<Array1<f64>> {
676        // Simple Gauss-Seidel iterative solver
677        let n = a.nrows();
678        let mut x = Array1::zeros(n);
679        let max_iter = 1000;
680        let tolerance = 1e-12;
681
682        for _iter in 0..max_iter {
683            let mut x_new = x.clone();
684
685            for i in 0..n {
686                let mut sum = 0.0;
687                for j in 0..n {
688                    if i != j {
689                        sum += a[[i, j]] * x[j];
690                    }
691                }
692
693                if a[[i, i]].abs() < f64::EPSILON {
694                    return Err(TimeSeriesError::ComputationError(
695                        "Singular matrix".to_string(),
696                    ));
697                }
698
699                x_new[i] = (b[i] - sum) / a[[i, i]];
700            }
701
702            let diff = (&x_new - &x).mapv(|x| x.abs()).sum();
703            x = x_new;
704
705            if diff < tolerance {
706                break;
707            }
708        }
709
710        Ok(x)
711    }
712
713    fn solve_linear_system_robust(
714        &self,
715        a: &Array2<f64>,
716        b: &Array1<f64>,
717    ) -> RegressionResult<Array1<f64>> {
718        let n = a.nrows();
719
720        // Create augmented matrix [A | b]
721        let mut augmented = Array2::zeros((n, n + 1));
722        for i in 0..n {
723            for j in 0..n {
724                augmented[[i, j]] = a[[i, j]];
725            }
726            augmented[[i, n]] = b[i];
727        }
728
729        // Gaussian elimination with partial pivoting
730        for i in 0..n {
731            // Find pivot
732            let mut max_row = i;
733            for k in i + 1..n {
734                if augmented[[k, i]].abs() > augmented[[max_row, i]].abs() {
735                    max_row = k;
736                }
737            }
738
739            // Swap rows if needed
740            if max_row != i {
741                for j in 0..=n {
742                    let temp = augmented[[i, j]];
743                    augmented[[i, j]] = augmented[[max_row, j]];
744                    augmented[[max_row, j]] = temp;
745                }
746            }
747
748            // Check for singularity
749            if augmented[[i, i]].abs() < 1e-12 {
750                return Err(TimeSeriesError::ComputationError(
751                    "Matrix is singular or nearly singular".to_string(),
752                ));
753            }
754
755            // Scale pivot row
756            let pivot = augmented[[i, i]];
757            for j in i..=n {
758                augmented[[i, j]] /= pivot;
759            }
760
761            // Eliminate column
762            for k in i + 1..n {
763                let factor = augmented[[k, i]];
764                for j in i..=n {
765                    augmented[[k, j]] -= factor * augmented[[i, j]];
766                }
767            }
768        }
769
770        // Back substitution
771        let mut x = Array1::zeros(n);
772        for i in (0..n).rev() {
773            x[i] = augmented[[i, n]];
774            for j in i + 1..n {
775                x[i] -= augmented[[i, j]] * x[j];
776            }
777        }
778
779        // Validate solution
780        for &val in x.iter() {
781            if !val.is_finite() {
782                return Err(TimeSeriesError::ComputationError(
783                    "Solution contains non-finite values".to_string(),
784                ));
785            }
786        }
787
788        Ok(x)
789    }
790
791    #[allow(dead_code)]
792    fn invert_matrix(&self, matrix: &Array2<f64>) -> RegressionResult<Array2<f64>> {
793        let n = matrix.nrows();
794        let mut augmented = Array2::zeros((n, 2 * n));
795
796        // Create augmented matrix [A | I]
797        for i in 0..n {
798            for j in 0..n {
799                augmented[[i, j]] = matrix[[i, j]];
800                augmented[[i, j + n]] = if i == j { 1.0 } else { 0.0 };
801            }
802        }
803
804        // Gaussian elimination with partial pivoting
805        for i in 0..n {
806            // Find pivot
807            let mut max_row = i;
808            for k in i + 1..n {
809                if augmented[[k, i]].abs() > augmented[[max_row, i]].abs() {
810                    max_row = k;
811                }
812            }
813
814            // Swap rows
815            if max_row != i {
816                for j in 0..2 * n {
817                    let temp = augmented[[i, j]];
818                    augmented[[i, j]] = augmented[[max_row, j]];
819                    augmented[[max_row, j]] = temp;
820                }
821            }
822
823            // Check for singularity
824            if augmented[[i, i]].abs() < f64::EPSILON {
825                return Err(TimeSeriesError::ComputationError(
826                    "Matrix is singular".to_string(),
827                ));
828            }
829
830            // Scale pivot row
831            let pivot = augmented[[i, i]];
832            for j in 0..2 * n {
833                augmented[[i, j]] /= pivot;
834            }
835
836            // Eliminate column
837            for k in 0..n {
838                if k != i {
839                    let factor = augmented[[k, i]];
840                    for j in 0..2 * n {
841                        augmented[[k, j]] -= factor * augmented[[i, j]];
842                    }
843                }
844            }
845        }
846
847        // Extract inverse matrix
848        let mut inverse = Array2::zeros((n, n));
849        for i in 0..n {
850            for j in 0..n {
851                inverse[[i, j]] = augmented[[i, j + n]];
852            }
853        }
854
855        Ok(inverse)
856    }
857
858    fn invert_matrix_robust(&self, matrix: &Array2<f64>) -> RegressionResult<Array2<f64>> {
859        let n = matrix.nrows();
860        if n == 0 {
861            return Err(TimeSeriesError::ComputationError(
862                "Cannot invert empty matrix".to_string(),
863            ));
864        }
865
866        let mut augmented = Array2::zeros((n, 2 * n));
867
868        // Create augmented matrix [A | I]
869        for i in 0..n {
870            for j in 0..n {
871                augmented[[i, j]] = matrix[[i, j]];
872                augmented[[i, j + n]] = if i == j { 1.0 } else { 0.0 };
873            }
874        }
875
876        // Gaussian elimination with partial pivoting
877        for i in 0..n {
878            // Find pivot
879            let mut max_row = i;
880            let mut max_val = augmented[[i, i]].abs();
881            for k in i + 1..n {
882                let val = augmented[[k, i]].abs();
883                if val > max_val {
884                    max_row = k;
885                    max_val = val;
886                }
887            }
888
889            // Swap rows if needed
890            if max_row != i {
891                for j in 0..2 * n {
892                    let temp = augmented[[i, j]];
893                    augmented[[i, j]] = augmented[[max_row, j]];
894                    augmented[[max_row, j]] = temp;
895                }
896            }
897
898            // Check for singularity with better tolerance
899            if augmented[[i, i]].abs() < 1e-12 {
900                return Err(TimeSeriesError::ComputationError(
901                    "Matrix is singular or nearly singular".to_string(),
902                ));
903            }
904
905            // Scale pivot row
906            let pivot = augmented[[i, i]];
907            for j in 0..2 * n {
908                augmented[[i, j]] /= pivot;
909            }
910
911            // Eliminate column
912            for k in 0..n {
913                if k != i {
914                    let factor = augmented[[k, i]];
915                    for j in 0..2 * n {
916                        augmented[[k, j]] -= factor * augmented[[i, j]];
917                    }
918                }
919            }
920        }
921
922        // Extract inverse matrix and validate
923        let mut inverse = Array2::zeros((n, n));
924        for i in 0..n {
925            for j in 0..n {
926                let val = augmented[[i, j + n]];
927                if !val.is_finite() {
928                    return Err(TimeSeriesError::ComputationError(
929                        "Matrix inversion produced non-finite values".to_string(),
930                    ));
931                }
932                inverse[[i, j]] = val;
933            }
934        }
935
936        Ok(inverse)
937    }
938
939    fn t_distribution_cdf(&self, t: f64, df: i32) -> f64 {
940        // Approximation for t-distribution CDF
941        if df <= 0 {
942            return 0.5;
943        }
944
945        if df >= 100 {
946            return self.normal_cdf(t);
947        }
948
949        // Use incomplete beta function approximation
950        let x = df as f64 / (df as f64 + t * t);
951        0.5 + 0.5
952            * self.incomplete_beta(x, 0.5 * df as f64, 0.5).signum()
953            * (1.0 - self.incomplete_beta(x, 0.5 * df as f64, 0.5))
954    }
955
956    fn normal_cdf(&self, x: f64) -> f64 {
957        0.5 * (1.0 + self.erf(x / (2.0_f64).sqrt()))
958    }
959
960    fn erf(&self, x: f64) -> f64 {
961        // Approximation for error function
962        let a1 = 0.254829592;
963        let a2 = -0.284496736;
964        let a3 = 1.421413741;
965        let a4 = -1.453152027;
966        let a5 = 1.061405429;
967        let p = 0.3275911;
968
969        let sign = if x < 0.0 { -1.0 } else { 1.0 };
970        let x = x.abs();
971
972        let t = 1.0 / (1.0 + p * x);
973        let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
974
975        sign * y
976    }
977
978    fn incomplete_beta(&self, x: f64, a: f64, b: f64) -> f64 {
979        if x <= 0.0 {
980            return 0.0;
981        }
982        if x >= 1.0 {
983            return 1.0;
984        }
985
986        // Continued fraction approximation
987        let mut result = x.powf(a) * (1.0 - x).powf(b) / a;
988        let mut term = result;
989
990        for n in 1..100 {
991            let n_f = n as f64;
992            term *= (a + n_f - 1.0) * x / n_f;
993            result += term;
994            if term.abs() < 1e-10 {
995                break;
996            }
997        }
998
999        result / self.beta_function(a, b)
1000    }
1001
1002    fn beta_function(&self, a: f64, b: f64) -> f64 {
1003        self.gamma_function(a) * self.gamma_function(b) / self.gamma_function(a + b)
1004    }
1005
1006    #[allow(clippy::only_used_in_recursion)]
1007    fn gamma_function(&self, x: f64) -> f64 {
1008        if x < 1.0 {
1009            return self.gamma_function(x + 1.0) / x;
1010        }
1011
1012        (2.0 * std::f64::consts::PI / x).sqrt() * (x / std::f64::consts::E).powf(x)
1013    }
1014
1015    fn select_optimal_lags(
1016        &self,
1017        y: &Array1<f64>,
1018        x: &Array1<f64>,
1019        config: &ARDLConfig,
1020    ) -> RegressionResult<(usize, usize)> {
1021        let mut best_criterion = f64::INFINITY;
1022        let mut best_lags = (1, 1);
1023
1024        for y_lags in 1..=config.max_y_lags {
1025            for x_lags in 0..=config.max_x_lags {
1026                // Create temporary config for this lag combination
1027                let temp_config = ARDLConfig {
1028                    max_y_lags: y_lags,
1029                    max_x_lags: x_lags,
1030                    auto_lag_selection: false,
1031                    ..config.clone()
1032                };
1033
1034                if let Ok(result) = self.fit_ardl_fixed_lags(y, x, &temp_config) {
1035                    let criterion = match config.selection_criterion {
1036                        InformationCriterion::AIC => result.aic,
1037                        InformationCriterion::BIC => result.bic,
1038                        InformationCriterion::HQIC => {
1039                            // Hannan-Quinn: -2*log(L) + 2*k*log(log(n))
1040                            let n = y.len() as f64;
1041                            let k = result.y_coefficients.len() + result.x_coefficients.len() + 1;
1042                            -2.0 * (-result.aic / 2.0 + k as f64) + 2.0 * k as f64 * n.ln().ln()
1043                        }
1044                    };
1045
1046                    if criterion < best_criterion {
1047                        best_criterion = criterion;
1048                        best_lags = (y_lags, x_lags);
1049                    }
1050                }
1051            }
1052        }
1053
1054        Ok(best_lags)
1055    }
1056
1057    fn fit_ardl_fixed_lags(
1058        &self,
1059        y: &Array1<f64>,
1060        x: &Array1<f64>,
1061        config: &ARDLConfig,
1062    ) -> RegressionResult<ARDLResult> {
1063        // This is a simplified version that doesn't do auto lag selection
1064        let temp_config = ARDLConfig {
1065            auto_lag_selection: false,
1066            ..config.clone()
1067        };
1068        self.fit_ardl(y, x, &temp_config)
1069    }
1070
1071    fn estimate_long_run_relationship(
1072        &self,
1073        y: &Array1<f64>,
1074        x: &Array1<f64>,
1075    ) -> RegressionResult<OLSResult> {
1076        // Simple OLS regression for long-run relationship
1077        let n = y.len();
1078        let mut design_matrix = Array2::zeros((n, 2));
1079
1080        for i in 0..n {
1081            design_matrix[[i, 0]] = 1.0; // Intercept
1082            design_matrix[[i, 1]] = x[i];
1083        }
1084
1085        self.fit_ols_regression(&design_matrix, y)
1086    }
1087
1088    fn test_cointegration(&self, residuals: &Array1<f64>) -> RegressionResult<f64> {
1089        // Simplified Engle-Granger cointegration test
1090        // Test for unit root in residuals using ADF test
1091
1092        let n = residuals.len();
1093        if n < 10 {
1094            return Ok(1.0); // Not enough data, assume no cointegration
1095        }
1096
1097        // First difference of residuals
1098        let mut diff_residuals = Array1::zeros(n - 1);
1099        for i in 1..n {
1100            diff_residuals[i - 1] = residuals[i] - residuals[i - 1];
1101        }
1102
1103        // Regression: Δε_t = α + βε_{t-1} + error
1104        let mut design_matrix = Array2::zeros((n - 1, 2));
1105        for i in 0..n - 1 {
1106            design_matrix[[i, 0]] = 1.0; // Intercept
1107            design_matrix[[i, 1]] = residuals[i]; // Lagged residual
1108        }
1109
1110        let adf_result = self.fit_ols_regression(&design_matrix, &diff_residuals)?;
1111        let t_stat = adf_result.t_statistics[1]; // t-statistic for β coefficient
1112
1113        // Approximate p-value for Engle-Granger test
1114        // This is a very simplified approximation
1115        let p_value = if t_stat < -3.5 {
1116            0.01
1117        } else if t_stat < -3.0 {
1118            0.05
1119        } else if t_stat < -2.5 {
1120            0.10
1121        } else {
1122            0.20
1123        };
1124
1125        Ok(p_value)
1126    }
1127
1128    fn estimate_ecm(
1129        &self,
1130        y: &Array1<f64>,
1131        x: &Array1<f64>,
1132        long_run_result: &OLSResult,
1133        config: &ErrorCorrectionConfig,
1134    ) -> RegressionResult<ECMResult> {
1135        let n = y.len();
1136
1137        // Calculate error correction term (lagged residuals from long-run relationship)
1138        let ect = long_run_result.residuals.slice(s![..n - 1]).to_owned();
1139
1140        // Calculate first differences
1141        let mut dy = Array1::zeros(n - 1);
1142        let mut dx = Array1::zeros(n - 1);
1143        for i in 1..n {
1144            dy[i - 1] = y[i] - y[i - 1];
1145            dx[i - 1] = x[i] - x[i - 1];
1146        }
1147
1148        // Build design matrix for ECM
1149        let max_lag = config.max_diff_lags;
1150        let start_idx = max_lag;
1151        let n_obs = n - 1 - start_idx;
1152
1153        let n_params = 1 + 1 + max_lag + max_lag; // intercept + ECT + lagged dy + lagged dx
1154        let mut design_matrix = Array2::zeros((n_obs, n_params));
1155        let mut response = Array1::zeros(n_obs);
1156
1157        for i in 0..n_obs {
1158            let idx = start_idx + i;
1159            response[i] = dy[idx];
1160
1161            let mut col_idx = 0;
1162
1163            // Intercept
1164            design_matrix[[i, col_idx]] = 1.0;
1165            col_idx += 1;
1166
1167            // Error correction term
1168            design_matrix[[i, col_idx]] = ect[idx - 1];
1169            col_idx += 1;
1170
1171            // Lagged differences of y
1172            for lag in 1..=max_lag {
1173                if idx >= lag {
1174                    design_matrix[[i, col_idx]] = dy[idx - lag];
1175                }
1176                col_idx += 1;
1177            }
1178
1179            // Lagged differences of x
1180            for lag in 0..max_lag {
1181                if idx >= lag {
1182                    design_matrix[[i, col_idx]] = dx[idx - lag];
1183                }
1184                col_idx += 1;
1185            }
1186        }
1187
1188        let regression_result = self.fit_ols_regression(&design_matrix, &response)?;
1189
1190        // Extract coefficients
1191        let error_correction_coeff = regression_result.coefficients[1];
1192        let short_run_y_coeffs = regression_result
1193            .coefficients
1194            .slice(s![2..2 + max_lag])
1195            .to_owned();
1196        let short_run_x_coeffs = regression_result
1197            .coefficients
1198            .slice(s![2 + max_lag..])
1199            .to_owned();
1200
1201        Ok(ECMResult {
1202            error_correction_coeff,
1203            short_run_y_coeffs,
1204            short_run_x_coeffs,
1205            standard_errors: regression_result.standard_errors,
1206            t_statistics: regression_result.t_statistics,
1207            p_values: regression_result.p_values,
1208            r_squared: regression_result.r_squared,
1209            fitted_values: regression_result.fitted_values,
1210            residuals: regression_result.residuals,
1211        })
1212    }
1213}
1214
1215impl Default for TimeSeriesRegression {
1216    fn default() -> Self {
1217        Self::new()
1218    }
1219}
1220
1221// Helper structs
1222
1223#[derive(Debug, Clone)]
1224struct OLSResult {
1225    coefficients: Array1<f64>,
1226    standard_errors: Array1<f64>,
1227    t_statistics: Array1<f64>,
1228    p_values: Array1<f64>,
1229    r_squared: f64,
1230    adjusted_r_squared: f64,
1231    residual_sum_squares: f64,
1232    degrees_of_freedom: usize,
1233    fitted_values: Array1<f64>,
1234    residuals: Array1<f64>,
1235}
1236
1237#[derive(Debug, Clone)]
1238struct ECMResult {
1239    error_correction_coeff: f64,
1240    short_run_y_coeffs: Array1<f64>,
1241    short_run_x_coeffs: Array1<f64>,
1242    standard_errors: Array1<f64>,
1243    t_statistics: Array1<f64>,
1244    p_values: Array1<f64>,
1245    r_squared: f64,
1246    fitted_values: Array1<f64>,
1247    residuals: Array1<f64>,
1248}
1249
1250#[cfg(test)]
1251mod tests {
1252    use super::*;
1253    use scirs2_core::ndarray::Array1;
1254
1255    #[test]
1256    fn test_distributed_lag_model() {
1257        let n = 50;
1258        let mut y = Array1::zeros(n);
1259        let mut x = Array1::zeros(n);
1260
1261        // Generate test data with distributed lag relationship
1262        for i in 2..n {
1263            x[i] = (i as f64 * 0.1).sin();
1264            y[i] = 0.5 * x[i]
1265                + 0.3 * x[i - 1]
1266                + 0.1 * x[i - 2]
1267                + 0.1 * scirs2_core::random::random::<f64>();
1268        }
1269
1270        let regression = TimeSeriesRegression::new();
1271        let config = DistributedLagConfig {
1272            max_lag: 2,
1273            ..Default::default()
1274        };
1275
1276        let result = regression.fit_distributed_lag(&y, &x, &config).unwrap();
1277
1278        assert_eq!(result.max_lag, 2);
1279        eprintln!("Distributed lag R-squared: {}", result.r_squared);
1280        assert!(result.r_squared >= -1.0 && result.r_squared <= 1.0);
1281        assert_eq!(result.coefficients.len(), 4); // intercept + 3 lag terms
1282    }
1283
1284    #[test]
1285    fn test_ardl_model() {
1286        let n = 50;
1287        let mut y = Array1::zeros(n);
1288        let mut x = Array1::zeros(n);
1289
1290        // Generate test data with ARDL relationship
1291        for i in 2..n {
1292            x[i] = (i as f64 * 0.1).sin();
1293            y[i] = 0.3 * y[i - 1]
1294                + 0.5 * x[i]
1295                + 0.2 * x[i - 1]
1296                + 0.1 * scirs2_core::random::random::<f64>();
1297        }
1298
1299        let regression = TimeSeriesRegression::new();
1300        let config = ARDLConfig {
1301            max_y_lags: 1,
1302            max_x_lags: 1,
1303            auto_lag_selection: false,
1304            ..Default::default()
1305        };
1306
1307        let result = regression.fit_ardl(&y, &x, &config).unwrap();
1308
1309        assert_eq!(result.y_lags, 1);
1310        assert_eq!(result.x_lags, 1);
1311        eprintln!("ARDL R-squared: {}", result.r_squared);
1312        assert!(result.r_squared >= -1.0 && result.r_squared <= 1.0);
1313        assert!(result.aic.is_finite());
1314        assert!(result.bic.is_finite());
1315    }
1316
1317    #[test]
1318    fn test_regression_with_arima_errors() {
1319        let n = 30;
1320        let y = Array1::from_vec(
1321            (0..n)
1322                .map(|i| i as f64 + 0.1 * scirs2_core::random::random::<f64>())
1323                .collect(),
1324        );
1325        let x = Array2::from_shape_vec((n, 1), (0..n).map(|i| (i as f64).sin()).collect()).unwrap();
1326
1327        let regression = TimeSeriesRegression::new();
1328        let config = ARIMAErrorsConfig::default();
1329
1330        let result = regression
1331            .fit_regression_with_arima_errors(&y, &x, &config)
1332            .unwrap();
1333
1334        assert!(result.regression_r_squared >= 0.0 && result.regression_r_squared <= 1.0);
1335        assert!(result.aic.is_finite());
1336        assert!(result.bic.is_finite());
1337        assert_eq!(result.fitted_values.len(), y.len());
1338    }
1339}
scirs2_series/regression.rs

scirs2_series/
regression.rs