1use crate::error::TimeSeriesError;
10use scirs2_core::ndarray::{s, Array1, Array2};
11use scirs2_core::validation::checkarray_finite;
12
13pub type RegressionResult<T> = Result<T, TimeSeriesError>;
15
16#[derive(Debug, Clone)]
18pub struct DistributedLagResult {
19 pub coefficients: Array1<f64>,
21 pub standard_errors: Array1<f64>,
23 pub t_statistics: Array1<f64>,
25 pub p_values: Array1<f64>,
27 pub r_squared: f64,
29 pub adjusted_r_squared: f64,
31 pub residual_sum_squares: f64,
33 pub degrees_of_freedom: usize,
35 pub max_lag: usize,
37 pub fitted_values: Array1<f64>,
39 pub residuals: Array1<f64>,
41}
42
43#[derive(Debug, Clone)]
45pub struct ARDLResult {
46 pub y_coefficients: Array1<f64>,
48 pub x_coefficients: Array1<f64>,
50 pub intercept: f64,
52 pub standard_errors: Array1<f64>,
54 pub t_statistics: Array1<f64>,
56 pub p_values: Array1<f64>,
58 pub r_squared: f64,
60 pub adjusted_r_squared: f64,
62 pub y_lags: usize,
64 pub x_lags: usize,
66 pub fitted_values: Array1<f64>,
68 pub residuals: Array1<f64>,
70 pub aic: f64,
72 pub bic: f64,
74}
75
76#[derive(Debug, Clone)]
78pub struct ErrorCorrectionResult {
79 pub error_correction_coeff: f64,
81 pub short_run_y_coeffs: Array1<f64>,
83 pub short_run_x_coeffs: Array1<f64>,
85 pub long_run_coeffs: Array1<f64>,
87 pub standard_errors: Array1<f64>,
89 pub t_statistics: Array1<f64>,
91 pub p_values: Array1<f64>,
93 pub r_squared: f64,
95 pub fitted_values: Array1<f64>,
97 pub residuals: Array1<f64>,
99 pub cointegration_p_value: f64,
101}
102
103#[derive(Debug, Clone)]
105pub struct ARIMAErrorsResult {
106 pub regression_coefficients: Array1<f64>,
108 pub fitted_values: Array1<f64>,
110 pub residuals: Array1<f64>,
112 pub regression_r_squared: f64,
114 pub log_likelihood: f64,
116 pub aic: f64,
118 pub bic: f64,
120 pub regression_std_errors: Array1<f64>,
122}
123
124#[derive(Debug, Clone)]
126pub struct DistributedLagConfig {
127 pub max_lag: usize,
129 pub include_intercept: bool,
131 pub significance_level: f64,
133}
134
135impl Default for DistributedLagConfig {
136 fn default() -> Self {
137 Self {
138 max_lag: 4,
139 include_intercept: true,
140 significance_level: 0.05,
141 }
142 }
143}
144
145#[derive(Debug, Clone)]
147pub struct ARDLConfig {
148 pub max_y_lags: usize,
150 pub max_x_lags: usize,
152 pub include_intercept: bool,
154 pub selection_criterion: InformationCriterion,
156 pub auto_lag_selection: bool,
158}
159
160impl Default for ARDLConfig {
161 fn default() -> Self {
162 Self {
163 max_y_lags: 4,
164 max_x_lags: 4,
165 include_intercept: true,
166 selection_criterion: InformationCriterion::AIC,
167 auto_lag_selection: true,
168 }
169 }
170}
171
172#[derive(Debug, Clone, Copy)]
174pub enum InformationCriterion {
175 AIC,
177 BIC,
179 HQIC,
181}
182
183#[derive(Debug, Clone)]
185pub struct ErrorCorrectionConfig {
186 pub max_diff_lags: usize,
188 pub test_cointegration: bool,
190 pub cointegration_significance: f64,
192}
193
194impl Default for ErrorCorrectionConfig {
195 fn default() -> Self {
196 Self {
197 max_diff_lags: 3,
198 test_cointegration: true,
199 cointegration_significance: 0.05,
200 }
201 }
202}
203
204#[derive(Debug, Clone)]
206pub struct ARIMAErrorsConfig {
207 pub include_intercept: bool,
209 pub max_iterations: usize,
211 pub tolerance: f64,
213}
214
215impl Default for ARIMAErrorsConfig {
216 fn default() -> Self {
217 Self {
218 include_intercept: true,
219 max_iterations: 100,
220 tolerance: 1e-6,
221 }
222 }
223}
224
225pub struct TimeSeriesRegression {
227 pub random_seed: Option<u64>,
229}
230
231impl TimeSeriesRegression {
232 pub fn new() -> Self {
234 Self { random_seed: None }
235 }
236
237 pub fn with_seed(seed: u64) -> Self {
239 Self {
240 random_seed: Some(seed),
241 }
242 }
243
244 pub fn fit_distributed_lag(
258 &self,
259 y: &Array1<f64>,
260 x: &Array1<f64>,
261 config: &DistributedLagConfig,
262 ) -> RegressionResult<DistributedLagResult> {
263 checkarray_finite(y, "y")?;
264 checkarray_finite(x, "x")?;
265
266 if y.len() != x.len() {
267 return Err(TimeSeriesError::InvalidInput(
268 "Time series must have the same length".to_string(),
269 ));
270 }
271
272 if y.len() <= config.max_lag + 1 {
273 return Err(TimeSeriesError::InvalidInput(
274 "Time series too short for the specified lag".to_string(),
275 ));
276 }
277
278 let n = y.len() - config.max_lag;
280 let p = config.max_lag + 1 + if config.include_intercept { 1 } else { 0 };
281 let mut design_matrix = Array2::zeros((n, p));
282 let mut response = Array1::zeros(n);
283
284 for i in 0..n {
285 let row_idx = config.max_lag + i;
286 response[i] = y[row_idx];
287
288 let mut col_idx = 0;
289
290 if config.include_intercept {
292 design_matrix[[i, col_idx]] = 1.0;
293 col_idx += 1;
294 }
295
296 for lag in 0..=config.max_lag {
298 design_matrix[[i, col_idx]] = x[row_idx - lag];
299 col_idx += 1;
300 }
301 }
302
303 let regression_result = self.fit_ols_regression(&design_matrix, &response)?;
305
306 Ok(DistributedLagResult {
307 coefficients: regression_result.coefficients,
308 standard_errors: regression_result.standard_errors,
309 t_statistics: regression_result.t_statistics,
310 p_values: regression_result.p_values,
311 r_squared: regression_result.r_squared,
312 adjusted_r_squared: regression_result.adjusted_r_squared,
313 residual_sum_squares: regression_result.residual_sum_squares,
314 degrees_of_freedom: regression_result.degrees_of_freedom,
315 max_lag: config.max_lag,
316 fitted_values: regression_result.fitted_values,
317 residuals: regression_result.residuals,
318 })
319 }
320
321 pub fn fit_ardl(
335 &self,
336 y: &Array1<f64>,
337 x: &Array1<f64>,
338 config: &ARDLConfig,
339 ) -> RegressionResult<ARDLResult> {
340 checkarray_finite(y, "y")?;
341 checkarray_finite(x, "x")?;
342
343 if y.len() != x.len() {
344 return Err(TimeSeriesError::InvalidInput(
345 "Time series must have the same length".to_string(),
346 ));
347 }
348
349 let (y_lags, x_lags) = if config.auto_lag_selection {
350 self.select_optimal_lags(y, x, config)?
351 } else {
352 (config.max_y_lags, config.max_x_lags)
353 };
354
355 let max_lag = y_lags.max(x_lags);
356 if y.len() <= max_lag + 1 {
357 return Err(TimeSeriesError::InvalidInput(
358 "Time series too short for the specified lags".to_string(),
359 ));
360 }
361
362 let n = y.len() - max_lag;
364 let p = y_lags + x_lags + 1 + if config.include_intercept { 1 } else { 0 };
365 let mut design_matrix = Array2::zeros((n, p));
366 let mut response = Array1::zeros(n);
367
368 for i in 0..n {
369 let row_idx = max_lag + i;
370 response[i] = y[row_idx];
371
372 let mut col_idx = 0;
373
374 if config.include_intercept {
376 design_matrix[[i, col_idx]] = 1.0;
377 col_idx += 1;
378 }
379
380 for lag in 1..=y_lags {
382 design_matrix[[i, col_idx]] = y[row_idx - lag];
383 col_idx += 1;
384 }
385
386 for lag in 0..=x_lags {
388 design_matrix[[i, col_idx]] = x[row_idx - lag];
389 col_idx += 1;
390 }
391 }
392
393 let regression_result = self.fit_ols_regression(&design_matrix, &response)?;
395
396 let mut coeff_idx = if config.include_intercept { 1 } else { 0 };
398 let intercept = if config.include_intercept {
399 regression_result.coefficients[0]
400 } else {
401 0.0
402 };
403
404 let y_coefficients = regression_result
405 .coefficients
406 .slice(s![coeff_idx..coeff_idx + y_lags])
407 .to_owned();
408 coeff_idx += y_lags;
409
410 let x_coefficients = regression_result
411 .coefficients
412 .slice(s![coeff_idx..coeff_idx + x_lags + 1])
413 .to_owned();
414
415 let k = regression_result.coefficients.len() as f64;
417 let n_f = n as f64;
418 let log_likelihood = -0.5
419 * n_f
420 * (2.0 * std::f64::consts::PI * regression_result.residual_sum_squares / n_f).ln()
421 - 0.5 * regression_result.residual_sum_squares
422 / (regression_result.residual_sum_squares / n_f);
423 let aic = -2.0 * log_likelihood + 2.0 * k;
424 let bic = -2.0 * log_likelihood + k * n_f.ln();
425
426 Ok(ARDLResult {
427 y_coefficients,
428 x_coefficients,
429 intercept,
430 standard_errors: regression_result.standard_errors,
431 t_statistics: regression_result.t_statistics,
432 p_values: regression_result.p_values,
433 r_squared: regression_result.r_squared,
434 adjusted_r_squared: regression_result.adjusted_r_squared,
435 y_lags,
436 x_lags,
437 fitted_values: regression_result.fitted_values,
438 residuals: regression_result.residuals,
439 aic,
440 bic,
441 })
442 }
443
444 pub fn fit_error_correction(
458 &self,
459 y: &Array1<f64>,
460 x: &Array1<f64>,
461 config: &ErrorCorrectionConfig,
462 ) -> RegressionResult<ErrorCorrectionResult> {
463 checkarray_finite(y, "y")?;
464 checkarray_finite(x, "x")?;
465
466 if y.len() != x.len() {
467 return Err(TimeSeriesError::InvalidInput(
468 "Time series must have the same length".to_string(),
469 ));
470 }
471
472 if y.len() <= config.max_diff_lags + 2 {
473 return Err(TimeSeriesError::InvalidInput(
474 "Time series too short for error correction model".to_string(),
475 ));
476 }
477
478 let long_run_result = self.estimate_long_run_relationship(y, x)?;
480
481 let cointegration_p_value = if config.test_cointegration {
483 self.test_cointegration(&long_run_result.residuals)?
484 } else {
485 0.0 };
487
488 let ecm_result = self.estimate_ecm(y, x, &long_run_result, config)?;
490
491 Ok(ErrorCorrectionResult {
492 error_correction_coeff: ecm_result.error_correction_coeff,
493 short_run_y_coeffs: ecm_result.short_run_y_coeffs,
494 short_run_x_coeffs: ecm_result.short_run_x_coeffs,
495 long_run_coeffs: long_run_result.coefficients,
496 standard_errors: ecm_result.standard_errors,
497 t_statistics: ecm_result.t_statistics,
498 p_values: ecm_result.p_values,
499 r_squared: ecm_result.r_squared,
500 fitted_values: ecm_result.fitted_values,
501 residuals: ecm_result.residuals,
502 cointegration_p_value,
503 })
504 }
505
506 pub fn fit_regression_with_arima_errors(
520 &self,
521 y: &Array1<f64>,
522 x: &Array2<f64>,
523 config: &ARIMAErrorsConfig,
524 ) -> RegressionResult<ARIMAErrorsResult> {
525 checkarray_finite(y, "y")?;
526
527 if y.len() != x.nrows() {
528 return Err(TimeSeriesError::InvalidInput(
529 "Response and design matrix must have compatible dimensions".to_string(),
530 ));
531 }
532
533 let mut design_matrix = x.clone();
536 if config.include_intercept {
537 let _intercept_col = Array2::<f64>::ones((x.nrows(), 1));
538 let mut new_matrix = Array2::zeros((x.nrows(), x.ncols() + 1));
540 for i in 0..x.nrows() {
541 new_matrix[[i, 0]] = 1.0;
542 for j in 0..x.ncols() {
543 new_matrix[[i, j + 1]] = x[[i, j]];
544 }
545 }
546 design_matrix = new_matrix;
547 }
548
549 let regression_result = self.fit_ols_regression(&design_matrix, y)?;
550
551 let n = y.len() as f64;
553 let k = regression_result.coefficients.len() as f64;
554 let rss = regression_result.residual_sum_squares;
555 let log_likelihood =
556 -0.5 * n * (2.0 * std::f64::consts::PI * rss / n).ln() - 0.5 * rss / (rss / n);
557 let aic = -2.0 * log_likelihood + 2.0 * k;
558 let bic = -2.0 * log_likelihood + k * n.ln();
559
560 Ok(ARIMAErrorsResult {
561 regression_coefficients: regression_result.coefficients,
562 fitted_values: regression_result.fitted_values,
563 residuals: regression_result.residuals,
564 regression_r_squared: regression_result.r_squared,
565 log_likelihood,
566 aic,
567 bic,
568 regression_std_errors: regression_result.standard_errors,
569 })
570 }
571
572 fn fit_ols_regression(&self, x: &Array2<f64>, y: &Array1<f64>) -> RegressionResult<OLSResult> {
575 let n = y.len() as f64;
576 let p = x.ncols() as f64;
577
578 let xt = x.t();
580 let xtx = xt.dot(x);
581 let xty = xt.dot(y);
582
583 let mut xtx_reg = xtx;
585 for i in 0..xtx_reg.nrows() {
586 xtx_reg[[i, i]] += 1e-10;
587 }
588
589 let coefficients = self.solve_linear_system_robust(&xtx_reg, &xty)?;
590 let fitted_values = x.dot(&coefficients);
591 let residuals = y - &fitted_values;
592
593 let rss = residuals.mapv(|x| x * x).sum();
595
596 let y_mean = y.sum() / n;
598 let tss = y.mapv(|yi| (yi - y_mean).powi(2)).sum();
599
600 let r_squared = if tss < 1e-14 || rss.is_nan() || tss.is_nan() {
601 0.0 } else {
603 let r2 = 1.0 - rss / tss;
604 if r2.is_nan() || r2.is_infinite() {
605 0.0
606 } else {
607 r2.clamp(-1.0, 1.0) }
609 };
610
611 let adjusted_r_squared = if tss < 1e-14 || n <= p {
612 0.0
613 } else {
614 let adj_r2 = 1.0 - (rss / (n - p)) / (tss / (n - 1.0));
615 if adj_r2.is_nan() || adj_r2.is_infinite() {
616 0.0
617 } else {
618 adj_r2.clamp(-1.0, 1.0)
619 }
620 };
621
622 let mse = if n > p { rss / (n - p) } else { 1.0 };
624 let var_coeff_matrix_result = self.invert_matrix_robust(&xtx_reg);
625 let mut standard_errors = Array1::ones(coefficients.len()); if let Ok(var_coeff_matrix) = var_coeff_matrix_result {
628 standard_errors = Array1::zeros(coefficients.len());
629 for i in 0..coefficients.len() {
630 let variance = var_coeff_matrix[[i, i]] * mse;
631 standard_errors[i] = if variance >= 0.0 {
632 variance.sqrt()
633 } else {
634 1.0
635 };
636 }
637 }
638
639 let t_statistics = Array1::from_vec(
641 coefficients
642 .iter()
643 .zip(standard_errors.iter())
644 .map(|(&coeff, &se)| if se > 0.0 { coeff / se } else { 0.0 })
645 .collect(),
646 );
647 let df = if n > p { (n - p) as i32 } else { 1 };
648 let p_values = t_statistics.mapv(|t| {
649 if t.is_finite() {
650 2.0 * (1.0 - self.t_distribution_cdf(t.abs(), df))
651 } else {
652 1.0
653 }
654 });
655
656 Ok(OLSResult {
657 coefficients,
658 standard_errors,
659 t_statistics,
660 p_values,
661 r_squared,
662 adjusted_r_squared,
663 residual_sum_squares: rss,
664 degrees_of_freedom: df as usize,
665 fitted_values,
666 residuals,
667 })
668 }
669
670 #[allow(dead_code)]
671 fn solve_linear_system(
672 &self,
673 a: &Array2<f64>,
674 b: &Array1<f64>,
675 ) -> RegressionResult<Array1<f64>> {
676 let n = a.nrows();
678 let mut x = Array1::zeros(n);
679 let max_iter = 1000;
680 let tolerance = 1e-12;
681
682 for _iter in 0..max_iter {
683 let mut x_new = x.clone();
684
685 for i in 0..n {
686 let mut sum = 0.0;
687 for j in 0..n {
688 if i != j {
689 sum += a[[i, j]] * x[j];
690 }
691 }
692
693 if a[[i, i]].abs() < f64::EPSILON {
694 return Err(TimeSeriesError::ComputationError(
695 "Singular matrix".to_string(),
696 ));
697 }
698
699 x_new[i] = (b[i] - sum) / a[[i, i]];
700 }
701
702 let diff = (&x_new - &x).mapv(|x| x.abs()).sum();
703 x = x_new;
704
705 if diff < tolerance {
706 break;
707 }
708 }
709
710 Ok(x)
711 }
712
713 fn solve_linear_system_robust(
714 &self,
715 a: &Array2<f64>,
716 b: &Array1<f64>,
717 ) -> RegressionResult<Array1<f64>> {
718 let n = a.nrows();
719
720 let mut augmented = Array2::zeros((n, n + 1));
722 for i in 0..n {
723 for j in 0..n {
724 augmented[[i, j]] = a[[i, j]];
725 }
726 augmented[[i, n]] = b[i];
727 }
728
729 for i in 0..n {
731 let mut max_row = i;
733 for k in i + 1..n {
734 if augmented[[k, i]].abs() > augmented[[max_row, i]].abs() {
735 max_row = k;
736 }
737 }
738
739 if max_row != i {
741 for j in 0..=n {
742 let temp = augmented[[i, j]];
743 augmented[[i, j]] = augmented[[max_row, j]];
744 augmented[[max_row, j]] = temp;
745 }
746 }
747
748 if augmented[[i, i]].abs() < 1e-12 {
750 return Err(TimeSeriesError::ComputationError(
751 "Matrix is singular or nearly singular".to_string(),
752 ));
753 }
754
755 let pivot = augmented[[i, i]];
757 for j in i..=n {
758 augmented[[i, j]] /= pivot;
759 }
760
761 for k in i + 1..n {
763 let factor = augmented[[k, i]];
764 for j in i..=n {
765 augmented[[k, j]] -= factor * augmented[[i, j]];
766 }
767 }
768 }
769
770 let mut x = Array1::zeros(n);
772 for i in (0..n).rev() {
773 x[i] = augmented[[i, n]];
774 for j in i + 1..n {
775 x[i] -= augmented[[i, j]] * x[j];
776 }
777 }
778
779 for &val in x.iter() {
781 if !val.is_finite() {
782 return Err(TimeSeriesError::ComputationError(
783 "Solution contains non-finite values".to_string(),
784 ));
785 }
786 }
787
788 Ok(x)
789 }
790
791 #[allow(dead_code)]
792 fn invert_matrix(&self, matrix: &Array2<f64>) -> RegressionResult<Array2<f64>> {
793 let n = matrix.nrows();
794 let mut augmented = Array2::zeros((n, 2 * n));
795
796 for i in 0..n {
798 for j in 0..n {
799 augmented[[i, j]] = matrix[[i, j]];
800 augmented[[i, j + n]] = if i == j { 1.0 } else { 0.0 };
801 }
802 }
803
804 for i in 0..n {
806 let mut max_row = i;
808 for k in i + 1..n {
809 if augmented[[k, i]].abs() > augmented[[max_row, i]].abs() {
810 max_row = k;
811 }
812 }
813
814 if max_row != i {
816 for j in 0..2 * n {
817 let temp = augmented[[i, j]];
818 augmented[[i, j]] = augmented[[max_row, j]];
819 augmented[[max_row, j]] = temp;
820 }
821 }
822
823 if augmented[[i, i]].abs() < f64::EPSILON {
825 return Err(TimeSeriesError::ComputationError(
826 "Matrix is singular".to_string(),
827 ));
828 }
829
830 let pivot = augmented[[i, i]];
832 for j in 0..2 * n {
833 augmented[[i, j]] /= pivot;
834 }
835
836 for k in 0..n {
838 if k != i {
839 let factor = augmented[[k, i]];
840 for j in 0..2 * n {
841 augmented[[k, j]] -= factor * augmented[[i, j]];
842 }
843 }
844 }
845 }
846
847 let mut inverse = Array2::zeros((n, n));
849 for i in 0..n {
850 for j in 0..n {
851 inverse[[i, j]] = augmented[[i, j + n]];
852 }
853 }
854
855 Ok(inverse)
856 }
857
858 fn invert_matrix_robust(&self, matrix: &Array2<f64>) -> RegressionResult<Array2<f64>> {
859 let n = matrix.nrows();
860 if n == 0 {
861 return Err(TimeSeriesError::ComputationError(
862 "Cannot invert empty matrix".to_string(),
863 ));
864 }
865
866 let mut augmented = Array2::zeros((n, 2 * n));
867
868 for i in 0..n {
870 for j in 0..n {
871 augmented[[i, j]] = matrix[[i, j]];
872 augmented[[i, j + n]] = if i == j { 1.0 } else { 0.0 };
873 }
874 }
875
876 for i in 0..n {
878 let mut max_row = i;
880 let mut max_val = augmented[[i, i]].abs();
881 for k in i + 1..n {
882 let val = augmented[[k, i]].abs();
883 if val > max_val {
884 max_row = k;
885 max_val = val;
886 }
887 }
888
889 if max_row != i {
891 for j in 0..2 * n {
892 let temp = augmented[[i, j]];
893 augmented[[i, j]] = augmented[[max_row, j]];
894 augmented[[max_row, j]] = temp;
895 }
896 }
897
898 if augmented[[i, i]].abs() < 1e-12 {
900 return Err(TimeSeriesError::ComputationError(
901 "Matrix is singular or nearly singular".to_string(),
902 ));
903 }
904
905 let pivot = augmented[[i, i]];
907 for j in 0..2 * n {
908 augmented[[i, j]] /= pivot;
909 }
910
911 for k in 0..n {
913 if k != i {
914 let factor = augmented[[k, i]];
915 for j in 0..2 * n {
916 augmented[[k, j]] -= factor * augmented[[i, j]];
917 }
918 }
919 }
920 }
921
922 let mut inverse = Array2::zeros((n, n));
924 for i in 0..n {
925 for j in 0..n {
926 let val = augmented[[i, j + n]];
927 if !val.is_finite() {
928 return Err(TimeSeriesError::ComputationError(
929 "Matrix inversion produced non-finite values".to_string(),
930 ));
931 }
932 inverse[[i, j]] = val;
933 }
934 }
935
936 Ok(inverse)
937 }
938
939 fn t_distribution_cdf(&self, t: f64, df: i32) -> f64 {
940 if df <= 0 {
942 return 0.5;
943 }
944
945 if df >= 100 {
946 return self.normal_cdf(t);
947 }
948
949 let x = df as f64 / (df as f64 + t * t);
951 0.5 + 0.5
952 * self.incomplete_beta(x, 0.5 * df as f64, 0.5).signum()
953 * (1.0 - self.incomplete_beta(x, 0.5 * df as f64, 0.5))
954 }
955
956 fn normal_cdf(&self, x: f64) -> f64 {
957 0.5 * (1.0 + self.erf(x / (2.0_f64).sqrt()))
958 }
959
960 fn erf(&self, x: f64) -> f64 {
961 let a1 = 0.254829592;
963 let a2 = -0.284496736;
964 let a3 = 1.421413741;
965 let a4 = -1.453152027;
966 let a5 = 1.061405429;
967 let p = 0.3275911;
968
969 let sign = if x < 0.0 { -1.0 } else { 1.0 };
970 let x = x.abs();
971
972 let t = 1.0 / (1.0 + p * x);
973 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
974
975 sign * y
976 }
977
978 fn incomplete_beta(&self, x: f64, a: f64, b: f64) -> f64 {
979 if x <= 0.0 {
980 return 0.0;
981 }
982 if x >= 1.0 {
983 return 1.0;
984 }
985
986 let mut result = x.powf(a) * (1.0 - x).powf(b) / a;
988 let mut term = result;
989
990 for n in 1..100 {
991 let n_f = n as f64;
992 term *= (a + n_f - 1.0) * x / n_f;
993 result += term;
994 if term.abs() < 1e-10 {
995 break;
996 }
997 }
998
999 result / self.beta_function(a, b)
1000 }
1001
1002 fn beta_function(&self, a: f64, b: f64) -> f64 {
1003 self.gamma_function(a) * self.gamma_function(b) / self.gamma_function(a + b)
1004 }
1005
1006 #[allow(clippy::only_used_in_recursion)]
1007 fn gamma_function(&self, x: f64) -> f64 {
1008 if x < 1.0 {
1009 return self.gamma_function(x + 1.0) / x;
1010 }
1011
1012 (2.0 * std::f64::consts::PI / x).sqrt() * (x / std::f64::consts::E).powf(x)
1013 }
1014
1015 fn select_optimal_lags(
1016 &self,
1017 y: &Array1<f64>,
1018 x: &Array1<f64>,
1019 config: &ARDLConfig,
1020 ) -> RegressionResult<(usize, usize)> {
1021 let mut best_criterion = f64::INFINITY;
1022 let mut best_lags = (1, 1);
1023
1024 for y_lags in 1..=config.max_y_lags {
1025 for x_lags in 0..=config.max_x_lags {
1026 let temp_config = ARDLConfig {
1028 max_y_lags: y_lags,
1029 max_x_lags: x_lags,
1030 auto_lag_selection: false,
1031 ..config.clone()
1032 };
1033
1034 if let Ok(result) = self.fit_ardl_fixed_lags(y, x, &temp_config) {
1035 let criterion = match config.selection_criterion {
1036 InformationCriterion::AIC => result.aic,
1037 InformationCriterion::BIC => result.bic,
1038 InformationCriterion::HQIC => {
1039 let n = y.len() as f64;
1041 let k = result.y_coefficients.len() + result.x_coefficients.len() + 1;
1042 -2.0 * (-result.aic / 2.0 + k as f64) + 2.0 * k as f64 * n.ln().ln()
1043 }
1044 };
1045
1046 if criterion < best_criterion {
1047 best_criterion = criterion;
1048 best_lags = (y_lags, x_lags);
1049 }
1050 }
1051 }
1052 }
1053
1054 Ok(best_lags)
1055 }
1056
1057 fn fit_ardl_fixed_lags(
1058 &self,
1059 y: &Array1<f64>,
1060 x: &Array1<f64>,
1061 config: &ARDLConfig,
1062 ) -> RegressionResult<ARDLResult> {
1063 let temp_config = ARDLConfig {
1065 auto_lag_selection: false,
1066 ..config.clone()
1067 };
1068 self.fit_ardl(y, x, &temp_config)
1069 }
1070
1071 fn estimate_long_run_relationship(
1072 &self,
1073 y: &Array1<f64>,
1074 x: &Array1<f64>,
1075 ) -> RegressionResult<OLSResult> {
1076 let n = y.len();
1078 let mut design_matrix = Array2::zeros((n, 2));
1079
1080 for i in 0..n {
1081 design_matrix[[i, 0]] = 1.0; design_matrix[[i, 1]] = x[i];
1083 }
1084
1085 self.fit_ols_regression(&design_matrix, y)
1086 }
1087
1088 fn test_cointegration(&self, residuals: &Array1<f64>) -> RegressionResult<f64> {
1089 let n = residuals.len();
1093 if n < 10 {
1094 return Ok(1.0); }
1096
1097 let mut diff_residuals = Array1::zeros(n - 1);
1099 for i in 1..n {
1100 diff_residuals[i - 1] = residuals[i] - residuals[i - 1];
1101 }
1102
1103 let mut design_matrix = Array2::zeros((n - 1, 2));
1105 for i in 0..n - 1 {
1106 design_matrix[[i, 0]] = 1.0; design_matrix[[i, 1]] = residuals[i]; }
1109
1110 let adf_result = self.fit_ols_regression(&design_matrix, &diff_residuals)?;
1111 let t_stat = adf_result.t_statistics[1]; let p_value = if t_stat < -3.5 {
1116 0.01
1117 } else if t_stat < -3.0 {
1118 0.05
1119 } else if t_stat < -2.5 {
1120 0.10
1121 } else {
1122 0.20
1123 };
1124
1125 Ok(p_value)
1126 }
1127
1128 fn estimate_ecm(
1129 &self,
1130 y: &Array1<f64>,
1131 x: &Array1<f64>,
1132 long_run_result: &OLSResult,
1133 config: &ErrorCorrectionConfig,
1134 ) -> RegressionResult<ECMResult> {
1135 let n = y.len();
1136
1137 let ect = long_run_result.residuals.slice(s![..n - 1]).to_owned();
1139
1140 let mut dy = Array1::zeros(n - 1);
1142 let mut dx = Array1::zeros(n - 1);
1143 for i in 1..n {
1144 dy[i - 1] = y[i] - y[i - 1];
1145 dx[i - 1] = x[i] - x[i - 1];
1146 }
1147
1148 let max_lag = config.max_diff_lags;
1150 let start_idx = max_lag;
1151 let n_obs = n - 1 - start_idx;
1152
1153 let n_params = 1 + 1 + max_lag + max_lag; let mut design_matrix = Array2::zeros((n_obs, n_params));
1155 let mut response = Array1::zeros(n_obs);
1156
1157 for i in 0..n_obs {
1158 let idx = start_idx + i;
1159 response[i] = dy[idx];
1160
1161 let mut col_idx = 0;
1162
1163 design_matrix[[i, col_idx]] = 1.0;
1165 col_idx += 1;
1166
1167 design_matrix[[i, col_idx]] = ect[idx - 1];
1169 col_idx += 1;
1170
1171 for lag in 1..=max_lag {
1173 if idx >= lag {
1174 design_matrix[[i, col_idx]] = dy[idx - lag];
1175 }
1176 col_idx += 1;
1177 }
1178
1179 for lag in 0..max_lag {
1181 if idx >= lag {
1182 design_matrix[[i, col_idx]] = dx[idx - lag];
1183 }
1184 col_idx += 1;
1185 }
1186 }
1187
1188 let regression_result = self.fit_ols_regression(&design_matrix, &response)?;
1189
1190 let error_correction_coeff = regression_result.coefficients[1];
1192 let short_run_y_coeffs = regression_result
1193 .coefficients
1194 .slice(s![2..2 + max_lag])
1195 .to_owned();
1196 let short_run_x_coeffs = regression_result
1197 .coefficients
1198 .slice(s![2 + max_lag..])
1199 .to_owned();
1200
1201 Ok(ECMResult {
1202 error_correction_coeff,
1203 short_run_y_coeffs,
1204 short_run_x_coeffs,
1205 standard_errors: regression_result.standard_errors,
1206 t_statistics: regression_result.t_statistics,
1207 p_values: regression_result.p_values,
1208 r_squared: regression_result.r_squared,
1209 fitted_values: regression_result.fitted_values,
1210 residuals: regression_result.residuals,
1211 })
1212 }
1213}
1214
1215impl Default for TimeSeriesRegression {
1216 fn default() -> Self {
1217 Self::new()
1218 }
1219}
1220
1221#[derive(Debug, Clone)]
1224struct OLSResult {
1225 coefficients: Array1<f64>,
1226 standard_errors: Array1<f64>,
1227 t_statistics: Array1<f64>,
1228 p_values: Array1<f64>,
1229 r_squared: f64,
1230 adjusted_r_squared: f64,
1231 residual_sum_squares: f64,
1232 degrees_of_freedom: usize,
1233 fitted_values: Array1<f64>,
1234 residuals: Array1<f64>,
1235}
1236
1237#[derive(Debug, Clone)]
1238struct ECMResult {
1239 error_correction_coeff: f64,
1240 short_run_y_coeffs: Array1<f64>,
1241 short_run_x_coeffs: Array1<f64>,
1242 standard_errors: Array1<f64>,
1243 t_statistics: Array1<f64>,
1244 p_values: Array1<f64>,
1245 r_squared: f64,
1246 fitted_values: Array1<f64>,
1247 residuals: Array1<f64>,
1248}
1249
1250#[cfg(test)]
1251mod tests {
1252 use super::*;
1253 use scirs2_core::ndarray::Array1;
1254
1255 #[test]
1256 fn test_distributed_lag_model() {
1257 let n = 50;
1258 let mut y = Array1::zeros(n);
1259 let mut x = Array1::zeros(n);
1260
1261 for i in 2..n {
1263 x[i] = (i as f64 * 0.1).sin();
1264 y[i] = 0.5 * x[i]
1265 + 0.3 * x[i - 1]
1266 + 0.1 * x[i - 2]
1267 + 0.1 * scirs2_core::random::random::<f64>();
1268 }
1269
1270 let regression = TimeSeriesRegression::new();
1271 let config = DistributedLagConfig {
1272 max_lag: 2,
1273 ..Default::default()
1274 };
1275
1276 let result = regression.fit_distributed_lag(&y, &x, &config).unwrap();
1277
1278 assert_eq!(result.max_lag, 2);
1279 eprintln!("Distributed lag R-squared: {}", result.r_squared);
1280 assert!(result.r_squared >= -1.0 && result.r_squared <= 1.0);
1281 assert_eq!(result.coefficients.len(), 4); }
1283
1284 #[test]
1285 fn test_ardl_model() {
1286 let n = 50;
1287 let mut y = Array1::zeros(n);
1288 let mut x = Array1::zeros(n);
1289
1290 for i in 2..n {
1292 x[i] = (i as f64 * 0.1).sin();
1293 y[i] = 0.3 * y[i - 1]
1294 + 0.5 * x[i]
1295 + 0.2 * x[i - 1]
1296 + 0.1 * scirs2_core::random::random::<f64>();
1297 }
1298
1299 let regression = TimeSeriesRegression::new();
1300 let config = ARDLConfig {
1301 max_y_lags: 1,
1302 max_x_lags: 1,
1303 auto_lag_selection: false,
1304 ..Default::default()
1305 };
1306
1307 let result = regression.fit_ardl(&y, &x, &config).unwrap();
1308
1309 assert_eq!(result.y_lags, 1);
1310 assert_eq!(result.x_lags, 1);
1311 eprintln!("ARDL R-squared: {}", result.r_squared);
1312 assert!(result.r_squared >= -1.0 && result.r_squared <= 1.0);
1313 assert!(result.aic.is_finite());
1314 assert!(result.bic.is_finite());
1315 }
1316
1317 #[test]
1318 fn test_regression_with_arima_errors() {
1319 let n = 30;
1320 let y = Array1::from_vec(
1321 (0..n)
1322 .map(|i| i as f64 + 0.1 * scirs2_core::random::random::<f64>())
1323 .collect(),
1324 );
1325 let x = Array2::from_shape_vec((n, 1), (0..n).map(|i| (i as f64).sin()).collect()).unwrap();
1326
1327 let regression = TimeSeriesRegression::new();
1328 let config = ARIMAErrorsConfig::default();
1329
1330 let result = regression
1331 .fit_regression_with_arima_errors(&y, &x, &config)
1332 .unwrap();
1333
1334 assert!(result.regression_r_squared >= 0.0 && result.regression_r_squared <= 1.0);
1335 assert!(result.aic.is_finite());
1336 assert!(result.bic.is_finite());
1337 assert_eq!(result.fitted_values.len(), y.len());
1338 }
1339}