1use u_numflow::matrix::Matrix;
20use u_numflow::special;
21use u_numflow::stats;
22
23#[derive(Debug, Clone)]
25pub struct SimpleRegressionResult {
26 pub slope: f64,
28 pub intercept: f64,
30 pub r_squared: f64,
32 pub adjusted_r_squared: f64,
34 pub slope_se: f64,
36 pub intercept_se: f64,
38 pub slope_t: f64,
40 pub intercept_t: f64,
42 pub slope_p: f64,
44 pub intercept_p: f64,
46 pub residual_se: f64,
48 pub f_statistic: f64,
50 pub f_p_value: f64,
52 pub residuals: Vec<f64>,
54 pub fitted: Vec<f64>,
56 pub n: usize,
58}
59
60#[derive(Debug, Clone)]
62pub struct MultipleRegressionResult {
63 pub coefficients: Vec<f64>,
65 pub std_errors: Vec<f64>,
67 pub t_statistics: Vec<f64>,
69 pub p_values: Vec<f64>,
71 pub r_squared: f64,
73 pub adjusted_r_squared: f64,
75 pub f_statistic: f64,
77 pub f_p_value: f64,
79 pub residual_se: f64,
81 pub residuals: Vec<f64>,
83 pub fitted: Vec<f64>,
85 pub vif: Vec<f64>,
87 pub n: usize,
89 pub p: usize,
91}
92
93pub fn simple_linear_regression(x: &[f64], y: &[f64]) -> Option<SimpleRegressionResult> {
126 let n = x.len();
127 if n < 3 || n != y.len() {
128 return None;
129 }
130 if x.iter().any(|v| !v.is_finite()) || y.iter().any(|v| !v.is_finite()) {
131 return None;
132 }
133
134 let x_mean = stats::mean(x)?;
135 let y_mean = stats::mean(y)?;
136 let x_var = stats::variance(x)?;
137 let cov = stats::covariance(x, y)?;
138
139 if x_var < 1e-300 {
140 return None; }
142
143 let slope = cov / x_var;
144 let intercept = y_mean - slope * x_mean;
145
146 let fitted: Vec<f64> = x.iter().map(|&xi| intercept + slope * xi).collect();
148 let residuals: Vec<f64> = y
149 .iter()
150 .zip(fitted.iter())
151 .map(|(&yi, &fi)| yi - fi)
152 .collect();
153
154 let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
156 let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
157
158 let nf = n as f64;
159 let df_res = nf - 2.0;
160
161 let r_squared = if ss_tot > 1e-300 {
162 1.0 - ss_res / ss_tot
163 } else {
164 1.0
165 };
166 let adjusted_r_squared = 1.0 - (1.0 - r_squared) * (nf - 1.0) / df_res;
167
168 let mse = ss_res / df_res;
170 let residual_se = mse.sqrt();
171
172 let ss_x: f64 = x.iter().map(|&xi| (xi - x_mean).powi(2)).sum();
174 let slope_se = (mse / ss_x).sqrt();
175 let intercept_se = (mse * (1.0 / nf + x_mean * x_mean / ss_x)).sqrt();
176
177 let slope_t = if slope_se > 1e-300 {
179 slope / slope_se
180 } else {
181 f64::INFINITY
182 };
183 let intercept_t = if intercept_se > 1e-300 {
184 intercept / intercept_se
185 } else {
186 f64::INFINITY
187 };
188
189 let slope_p = 2.0 * (1.0 - special::t_distribution_cdf(slope_t.abs(), df_res));
191 let intercept_p = 2.0 * (1.0 - special::t_distribution_cdf(intercept_t.abs(), df_res));
192
193 let f_statistic = slope_t * slope_t;
195 let f_p_value = if f_statistic.is_infinite() {
196 0.0
197 } else {
198 1.0 - special::f_distribution_cdf(f_statistic, 1.0, df_res)
199 };
200
201 Some(SimpleRegressionResult {
202 slope,
203 intercept,
204 r_squared,
205 adjusted_r_squared,
206 slope_se,
207 intercept_se,
208 slope_t,
209 intercept_t,
210 slope_p,
211 intercept_p,
212 residual_se,
213 f_statistic,
214 f_p_value,
215 residuals,
216 fitted,
217 n,
218 })
219}
220
221pub fn multiple_linear_regression(
261 predictors: &[&[f64]],
262 y: &[f64],
263) -> Option<MultipleRegressionResult> {
264 let p = predictors.len(); let n = y.len();
266
267 if p == 0 || n < p + 2 {
268 return None;
269 }
270
271 for pred in predictors {
273 if pred.len() != n {
274 return None;
275 }
276 if pred.iter().any(|v| !v.is_finite()) {
277 return None;
278 }
279 }
280 if y.iter().any(|v| !v.is_finite()) {
281 return None;
282 }
283
284 let ncols = p + 1; let mut x_data = Vec::with_capacity(n * ncols);
288 for i in 0..n {
289 x_data.push(1.0); for pred in predictors {
291 x_data.push(pred[i]);
292 }
293 }
294 let x_mat = Matrix::new(n, ncols, x_data).ok()?;
295
296 let xt = x_mat.transpose();
298 let xtx = xt.mul_mat(&x_mat).ok()?;
299
300 let xty = xt.mul_vec(y).ok()?;
302
303 let coefficients = xtx.cholesky_solve(&xty).ok()?;
305
306 let fitted = x_mat.mul_vec(&coefficients).ok()?;
308 let residuals: Vec<f64> = y
309 .iter()
310 .zip(fitted.iter())
311 .map(|(&yi, &fi)| yi - fi)
312 .collect();
313
314 let y_mean = stats::mean(y)?;
316 let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
317 let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
318
319 let nf = n as f64;
320 let pf = p as f64;
321 let df_res = nf - pf - 1.0;
322
323 let r_squared = if ss_tot > 1e-300 {
324 1.0 - ss_res / ss_tot
325 } else {
326 1.0
327 };
328 let adjusted_r_squared = 1.0 - (1.0 - r_squared) * (nf - 1.0) / df_res;
329
330 let mse = ss_res / df_res;
332 let residual_se = mse.sqrt();
333
334 let xtx_inv = xtx.inverse().ok()?;
336 let mut std_errors = Vec::with_capacity(ncols);
337 let mut t_statistics = Vec::with_capacity(ncols);
338 let mut p_values = Vec::with_capacity(ncols);
339
340 for (j, &coeff_j) in coefficients.iter().enumerate() {
341 let se = (xtx_inv.get(j, j) * mse).sqrt();
342 std_errors.push(se);
343 let t = if se > 1e-300 {
344 coeff_j / se
345 } else {
346 f64::INFINITY
347 };
348 t_statistics.push(t);
349 let pv = 2.0 * (1.0 - special::t_distribution_cdf(t.abs(), df_res));
350 p_values.push(pv);
351 }
352
353 let ss_reg = ss_tot - ss_res;
355 let f_statistic = if pf > 0.0 && mse > 1e-300 {
356 (ss_reg / pf) / mse
357 } else {
358 0.0
359 };
360 let f_p_value = if f_statistic.is_infinite() || f_statistic.is_nan() {
361 0.0
362 } else {
363 1.0 - special::f_distribution_cdf(f_statistic, pf, df_res)
364 };
365
366 let vif = compute_vif(predictors);
369
370 Some(MultipleRegressionResult {
371 coefficients,
372 std_errors,
373 t_statistics,
374 p_values,
375 r_squared,
376 adjusted_r_squared,
377 f_statistic,
378 f_p_value,
379 residual_se,
380 residuals,
381 fitted,
382 vif,
383 n,
384 p,
385 })
386}
387
388fn compute_vif(predictors: &[&[f64]]) -> Vec<f64> {
390 let p = predictors.len();
391 if p < 2 {
392 return vec![1.0; p];
393 }
394
395 let mut vif = Vec::with_capacity(p);
396 for j in 0..p {
397 let y_j = predictors[j];
399 let others: Vec<&[f64]> = predictors
400 .iter()
401 .enumerate()
402 .filter(|&(i, _)| i != j)
403 .map(|(_, v)| *v)
404 .collect();
405
406 if let Some(result) = multiple_linear_regression(&others, y_j) {
407 let r2 = result.r_squared;
408 if r2 < 1.0 - 1e-15 {
409 vif.push(1.0 / (1.0 - r2));
410 } else {
411 vif.push(f64::INFINITY); }
413 } else {
414 vif.push(f64::NAN);
415 }
416 }
417 vif
418}
419
420pub fn predict_simple(model: &SimpleRegressionResult, x_new: &[f64]) -> Vec<f64> {
435 x_new
436 .iter()
437 .map(|&xi| model.intercept + model.slope * xi)
438 .collect()
439}
440
441pub fn predict_multiple(
450 model: &MultipleRegressionResult,
451 predictors_new: &[&[f64]],
452) -> Option<Vec<f64>> {
453 if predictors_new.len() != model.p {
454 return None;
455 }
456 let n = predictors_new.first()?.len();
457 for pred in predictors_new {
458 if pred.len() != n {
459 return None;
460 }
461 }
462
463 let mut result = Vec::with_capacity(n);
464 for i in 0..n {
465 let mut yi = model.coefficients[0]; for (j, pred) in predictors_new.iter().enumerate() {
467 yi += model.coefficients[j + 1] * pred[i];
468 }
469 result.push(yi);
470 }
471 Some(result)
472}
473
474pub fn vif(predictors: &[&[f64]]) -> Option<Vec<f64>> {
507 let p = predictors.len();
508 if p == 0 {
509 return None;
510 }
511 let n = predictors[0].len();
512 if !predictors.iter().all(|c| c.len() == n) {
513 return None;
514 }
515 if n <= p {
516 return None;
517 }
518 if predictors
519 .iter()
520 .any(|c| c.iter().any(|v| !v.is_finite()))
521 {
522 return None;
523 }
524 Some(compute_vif(predictors))
525}
526
527pub fn condition_number(predictors: &[&[f64]]) -> Option<f64> {
556 use u_numflow::matrix::Matrix;
557 let p = predictors.len();
558 if p == 0 {
559 return None;
560 }
561 let n = predictors[0].len();
562 if !predictors.iter().all(|c| c.len() == n) {
563 return None;
564 }
565 if n <= p {
566 return None;
567 }
568 if predictors
569 .iter()
570 .any(|c| c.iter().any(|v| !v.is_finite()))
571 {
572 return None;
573 }
574
575 let mut means = vec![0.0_f64; p];
577 for (j, col) in predictors.iter().enumerate() {
578 means[j] = col.iter().sum::<f64>() / n as f64;
579 }
580 let mut cov = vec![0.0_f64; p * p];
581 let denom = (n - 1) as f64;
582 for r in 0..p {
583 let col_r = predictors[r];
584 let mean_r = means[r];
585 for c in r..p {
586 let col_c = predictors[c];
587 let mean_c = means[c];
588 let sum: f64 = col_r
589 .iter()
590 .zip(col_c.iter())
591 .map(|(&a, &b)| (a - mean_r) * (b - mean_c))
592 .sum();
593 let v = sum / denom;
594 cov[r * p + c] = v;
595 cov[c * p + r] = v;
596 }
597 }
598
599 let mat = Matrix::new(p, p, cov).ok()?;
600 let (eigenvalues, _eigenvectors) = mat.eigen_symmetric().ok()?;
601 let lambda_max = eigenvalues
602 .iter()
603 .copied()
604 .fold(f64::NEG_INFINITY, f64::max);
605 let lambda_min = eigenvalues.iter().copied().fold(f64::INFINITY, f64::min);
606
607 if !lambda_max.is_finite() || lambda_max <= 0.0 {
608 return None;
609 }
610 if lambda_min < 1e-15 {
611 return Some(f64::INFINITY);
612 }
613 Some(lambda_max / lambda_min)
614}
615
616#[cfg(test)]
617mod tests {
618 use super::*;
619
620 #[test]
625 fn simple_perfect_fit() {
626 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
627 let y = [3.0, 5.0, 7.0, 9.0, 11.0]; let r = simple_linear_regression(&x, &y).expect("should compute");
629 assert!((r.slope - 2.0).abs() < 1e-10);
630 assert!((r.intercept - 1.0).abs() < 1e-10);
631 assert!((r.r_squared - 1.0).abs() < 1e-10);
632 }
633
634 #[test]
635 fn simple_with_noise() {
636 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
637 let y = [2.1, 3.9, 6.1, 7.9, 10.1]; let r = simple_linear_regression(&x, &y).expect("should compute");
639 assert!((r.slope - 2.0).abs() < 0.1);
640 assert!(r.r_squared > 0.99);
641 assert_eq!(r.residuals.len(), 5);
642 assert_eq!(r.fitted.len(), 5);
643 }
644
645 #[test]
646 fn simple_residuals_sum_to_zero() {
647 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
648 let y = [2.1, 3.9, 6.1, 7.9, 10.1];
649 let r = simple_linear_regression(&x, &y).expect("should compute");
650 let sum: f64 = r.residuals.iter().sum();
651 assert!(sum.abs() < 1e-10, "residuals sum = {sum}");
652 }
653
654 #[test]
655 fn simple_f_equals_t_squared() {
656 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
657 let y = [2.1, 3.9, 6.1, 7.9, 10.1];
658 let r = simple_linear_regression(&x, &y).expect("should compute");
659 assert!(
660 (r.f_statistic - r.slope_t * r.slope_t).abs() < 1e-8,
661 "F = {}, t² = {}",
662 r.f_statistic,
663 r.slope_t * r.slope_t
664 );
665 }
666
667 #[test]
668 fn simple_significant_slope() {
669 let x: Vec<f64> = (0..20).map(|i| i as f64).collect();
670 let y: Vec<f64> = x.iter().map(|&xi| 3.0 + 2.0 * xi).collect();
671 let r = simple_linear_regression(&x, &y).expect("should compute");
672 assert!(r.slope_p < 1e-10, "slope p = {}", r.slope_p);
673 assert!(r.f_p_value < 1e-10, "F p = {}", r.f_p_value);
674 }
675
676 #[test]
677 fn simple_negative_slope() {
678 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
679 let y = [10.0, 8.0, 6.0, 4.0, 2.0]; let r = simple_linear_regression(&x, &y).expect("should compute");
681 assert!((r.slope + 2.0).abs() < 1e-10);
682 assert!((r.intercept - 12.0).abs() < 1e-10);
683 }
684
685 #[test]
686 fn simple_edge_cases() {
687 assert!(simple_linear_regression(&[1.0, 2.0], &[3.0, 4.0]).is_none()); assert!(simple_linear_regression(&[1.0, 2.0, 3.0], &[4.0, 5.0]).is_none()); assert!(simple_linear_regression(&[5.0, 5.0, 5.0], &[1.0, 2.0, 3.0]).is_none()); assert!(simple_linear_regression(&[1.0, f64::NAN, 3.0], &[4.0, 5.0, 6.0]).is_none());
691 }
692
693 #[test]
703 fn simple_numeric_reference_ols() {
704 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
705 let y = [2.0, 4.0, 5.0, 4.0, 5.0];
706 let r = simple_linear_regression(&x, &y).expect("should compute");
707
708 assert!(
709 (r.slope - 0.6).abs() < 1e-10,
710 "β̂₁ expected 0.6, got {}",
711 r.slope
712 );
713 assert!(
714 (r.intercept - 2.2).abs() < 1e-10,
715 "β̂₀ expected 2.2, got {}",
716 r.intercept
717 );
718 assert!(
719 (r.r_squared - 0.6).abs() < 1e-3,
720 "R² expected 0.6, got {}",
721 r.r_squared
722 );
723
724 let expected_fitted = [2.8, 3.4, 4.0, 4.6, 5.2];
727 for (i, (&fi, &ef)) in r.fitted.iter().zip(expected_fitted.iter()).enumerate() {
728 assert!(
729 (fi - ef).abs() < 1e-10,
730 "ŷ_{} expected {}, got {}",
731 i + 1,
732 ef,
733 fi
734 );
735 }
736 }
737
738 #[test]
739 fn simple_adjusted_r_squared() {
740 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
741 let y = [2.1, 3.9, 6.1, 7.9, 10.1];
742 let r = simple_linear_regression(&x, &y).expect("should compute");
743 assert!(r.adjusted_r_squared <= r.r_squared);
745 assert!(r.adjusted_r_squared > 0.98);
746 }
747
748 #[test]
753 fn multiple_perfect_fit() {
754 let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
756 let x2 = [2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0, 5.0];
757 let y: Vec<f64> = x1
758 .iter()
759 .zip(x2.iter())
760 .map(|(&a, &b)| 1.0 + 2.0 * a + 3.0 * b)
761 .collect();
762 let r = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
763
764 assert!(
765 (r.coefficients[0] - 1.0).abs() < 1e-8,
766 "β₀ = {}",
767 r.coefficients[0]
768 );
769 assert!(
770 (r.coefficients[1] - 2.0).abs() < 1e-8,
771 "β₁ = {}",
772 r.coefficients[1]
773 );
774 assert!(
775 (r.coefficients[2] - 3.0).abs() < 1e-8,
776 "β₂ = {}",
777 r.coefficients[2]
778 );
779 assert!((r.r_squared - 1.0).abs() < 1e-8);
780 }
781
782 #[test]
783 fn multiple_with_noise() {
784 let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
785 let x2 = [2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0];
786 let y = [5.1, 5.0, 9.2, 8.9, 13.1, 12.0, 17.2, 15.9];
787 let r = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
788 assert!(r.r_squared > 0.95);
789 assert_eq!(r.coefficients.len(), 3);
790 assert_eq!(r.std_errors.len(), 3);
791 assert_eq!(r.residuals.len(), 8);
792 assert_eq!(r.vif.len(), 2);
793 }
794
795 #[test]
796 fn multiple_residuals_sum_to_zero() {
797 let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
798 let x2 = [2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0];
799 let y = [5.1, 5.0, 9.2, 8.9, 13.1, 12.0, 17.2, 15.9];
800 let r = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
801 let sum: f64 = r.residuals.iter().sum();
802 assert!(sum.abs() < 1e-8, "residuals sum = {sum}");
803 }
804
805 #[test]
806 fn multiple_single_predictor_matches_simple() {
807 let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
808 let y: Vec<f64> = x
809 .iter()
810 .map(|&xi| 3.0 + 2.5 * xi + 0.1 * (xi - 5.0))
811 .collect();
812
813 let simple = simple_linear_regression(&x, &y).expect("simple");
814 let multi = multiple_linear_regression(&[&x], &y).expect("multiple");
815
816 assert!(
817 (simple.slope - multi.coefficients[1]).abs() < 1e-8,
818 "slope: {} vs {}",
819 simple.slope,
820 multi.coefficients[1]
821 );
822 assert!(
823 (simple.intercept - multi.coefficients[0]).abs() < 1e-8,
824 "intercept: {} vs {}",
825 simple.intercept,
826 multi.coefficients[0]
827 );
828 assert!((simple.r_squared - multi.r_squared).abs() < 1e-8);
829 }
830
831 #[test]
832 fn multiple_edge_cases() {
833 let x1 = [1.0, 2.0];
834 let y = [3.0, 4.0];
835 assert!(multiple_linear_regression(&[&x1], &y).is_none()); let x2 = [1.0, 2.0, 3.0];
838 let y2 = [4.0, 5.0];
839 assert!(multiple_linear_regression(&[&x2], &y2).is_none()); }
841
842 #[test]
843 fn multiple_vif_independent_predictors() {
844 let x1 = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0];
846 let x2 = [0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0];
847 let y: Vec<f64> = x1
848 .iter()
849 .zip(x2.iter())
850 .map(|(&a, &b)| 1.0 + 2.0 * a + 3.0 * b)
851 .collect();
852 let r = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
853 for (i, &v) in r.vif.iter().enumerate() {
854 assert!(
855 v < 2.0,
856 "VIF[{i}] = {v}, expected near 1.0 for independent predictors"
857 );
858 }
859 }
860
861 #[test]
862 fn multiple_vif_correlated_predictors() {
863 let x1: Vec<f64> = (0..20).map(|i| i as f64).collect();
865 let noise = [
867 0.1, -0.2, 0.3, -0.1, 0.2, -0.3, 0.1, -0.1, 0.2, -0.2, 0.3, -0.1, 0.1, -0.2, 0.3, -0.3,
868 0.1, -0.1, 0.2, -0.2,
869 ];
870 let x2: Vec<f64> = x1
871 .iter()
872 .zip(noise.iter())
873 .map(|(&v, &n)| v * 0.9 + 1.0 + n)
874 .collect();
875 let y: Vec<f64> = x1
876 .iter()
877 .zip(x2.iter())
878 .map(|(&a, &b)| 1.0 + a + b)
879 .collect();
880 let r = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
881 assert!(
883 r.vif[0] > 5.0,
884 "VIF[0] = {}, expected > 5.0 for correlated predictors",
885 r.vif[0]
886 );
887 }
888
889 #[test]
894 fn predict_simple_basic() {
895 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
896 let y = [3.0, 5.0, 7.0, 9.0, 11.0]; let model = simple_linear_regression(&x, &y).expect("should compute");
898 let pred = predict_simple(&model, &[6.0, 7.0]);
899 assert!((pred[0] - 13.0).abs() < 1e-10);
900 assert!((pred[1] - 15.0).abs() < 1e-10);
901 }
902
903 #[test]
904 fn predict_multiple_basic() {
905 let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
906 let x2 = [2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0, 5.0];
907 let y: Vec<f64> = x1
908 .iter()
909 .zip(x2.iter())
910 .map(|(&a, &b)| 1.0 + 2.0 * a + 3.0 * b)
911 .collect();
912 let model = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
913
914 let new_x1 = [11.0];
915 let new_x2 = [6.0];
916 let pred = predict_multiple(&model, &[&new_x1, &new_x2]).expect("should predict");
917 let expected = 1.0 + 2.0 * 11.0 + 3.0 * 6.0;
918 assert!(
919 (pred[0] - expected).abs() < 1e-6,
920 "pred = {}, expected = {}",
921 pred[0],
922 expected
923 );
924 }
925
926 #[test]
931 fn vif_pub_independent_predictors() {
932 let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
933 let x2 = [8.0, 1.0, 6.0, 3.0, 5.0, 7.0, 2.0, 4.0];
934 let v = vif(&[&x1, &x2]).expect("should compute");
935 for r in &v {
936 assert!(*r < 2.0, "VIF should be near 1.0 for independent, got {r}");
937 }
938 }
939
940 #[test]
941 fn vif_pub_collinear_predictors() {
942 let x1: Vec<f64> = (0..20).map(|i| i as f64).collect();
943 let x2: Vec<f64> = x1.iter().map(|&v| v * 2.0 + 0.001).collect();
944 let v = vif(&[&x1[..], &x2[..]]).expect("should compute");
945 assert!(
946 v[0] > 50.0,
947 "VIF should explode under near-collinearity, got {}",
948 v[0]
949 );
950 }
951
952 #[test]
953 fn vif_pub_rejects_short_input() {
954 let x = [1.0, 2.0];
955 assert!(vif(&[&x[..], &x[..]]).is_none());
956 }
957
958 #[test]
959 fn vif_pub_rejects_non_finite() {
960 let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
961 let x2 = [1.0, 2.0, f64::NAN, 4.0, 5.0, 6.0];
962 assert!(vif(&[&x1[..], &x2[..]]).is_none());
963 }
964
965 #[test]
966 fn condition_number_orthogonal() {
967 let x1 = [1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0];
968 let x2 = [1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0];
969 let c = condition_number(&[&x1[..], &x2[..]]).expect("should compute");
970 assert!(c < 5.0, "near-orthogonal: cond should be small, got {c}");
971 }
972
973 #[test]
974 fn condition_number_collinear_explodes() {
975 let x1: Vec<f64> = (0..20).map(|i| i as f64).collect();
976 let x2: Vec<f64> = x1.iter().map(|&v| v * 2.0 + 1e-6).collect();
977 let c = condition_number(&[&x1[..], &x2[..]]).expect("should compute");
978 assert!(
979 c > 1e5 || c.is_infinite(),
980 "collinear: cond should be huge, got {c}"
981 );
982 }
983
984 #[test]
985 fn condition_number_rejects_short_input() {
986 let x = [1.0, 2.0];
987 assert!(condition_number(&[&x[..], &x[..]]).is_none());
988 }
989
990 #[test]
991 fn predict_multiple_wrong_predictors() {
992 let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
993 let x2 = [2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0, 5.0];
994 let y: Vec<f64> = x1.iter().zip(x2.iter()).map(|(&a, &b)| a + b).collect();
995 let model = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
996
997 assert!(predict_multiple(&model, &[&[1.0]]).is_none());
999 }
1000}
1001
1002#[cfg(test)]
1003mod proptests {
1004 use super::*;
1005 use proptest::prelude::*;
1006
1007 proptest! {
1008 #[test]
1009 fn simple_r_squared_bounded(
1010 data in proptest::collection::vec(-1e3_f64..1e3, 5..=30)
1011 .prop_flat_map(|x| {
1012 let n = x.len();
1013 (Just(x), proptest::collection::vec(-1e3_f64..1e3, n..=n))
1014 })
1015 ) {
1016 let (x, y) = data;
1017 if let Some(r) = simple_linear_regression(&x, &y) {
1018 prop_assert!(r.r_squared >= -0.01 && r.r_squared <= 1.01,
1019 "R² = {}", r.r_squared);
1020 }
1021 }
1022
1023 #[test]
1024 fn simple_residuals_orthogonal_to_x(
1025 data in proptest::collection::vec(-1e3_f64..1e3, 5..=30)
1026 .prop_flat_map(|x| {
1027 let n = x.len();
1028 (Just(x), proptest::collection::vec(-1e3_f64..1e3, n..=n))
1029 })
1030 ) {
1031 let (x, y) = data;
1032 if let Some(r) = simple_linear_regression(&x, &y) {
1033 let dot: f64 = x.iter().zip(r.residuals.iter()).map(|(&xi, &ei)| xi * ei).sum();
1035 let norm = r.residuals.iter().map(|e| e * e).sum::<f64>().sqrt();
1036 let x_norm = x.iter().map(|xi| xi * xi).sum::<f64>().sqrt();
1037 if norm > 1e-10 && x_norm > 1e-10 {
1038 prop_assert!((dot / (norm * x_norm)).abs() < 1e-6,
1039 "residuals not orthogonal to x: dot={dot}");
1040 }
1041 }
1042 }
1043
1044 #[test]
1045 fn multiple_r_squared_bounded(
1046 x1 in proptest::collection::vec(-1e3_f64..1e3, 8..=20),
1047 x2_seed in proptest::collection::vec(-1e3_f64..1e3, 8..=20),
1048 y_seed in proptest::collection::vec(-1e3_f64..1e3, 8..=20),
1049 ) {
1050 let n = x1.len().min(x2_seed.len()).min(y_seed.len());
1051 let x2 = &x2_seed[..n];
1052 let y = &y_seed[..n];
1053 let x1 = &x1[..n];
1054 if let Some(r) = multiple_linear_regression(&[x1, x2], y) {
1055 prop_assert!(r.r_squared >= -0.01 && r.r_squared <= 1.01,
1056 "R² = {}", r.r_squared);
1057 }
1058 }
1059 }
1060}