use u_numflow::matrix::Matrix;
use u_numflow::special;
use u_numflow::stats;
#[derive(Debug, Clone)]
pub struct SimpleRegressionResult {
pub slope: f64,
pub intercept: f64,
pub r_squared: f64,
pub adjusted_r_squared: f64,
pub slope_se: f64,
pub intercept_se: f64,
pub slope_t: f64,
pub intercept_t: f64,
pub slope_p: f64,
pub intercept_p: f64,
pub residual_se: f64,
pub f_statistic: f64,
pub f_p_value: f64,
pub residuals: Vec<f64>,
pub fitted: Vec<f64>,
pub n: usize,
}
#[derive(Debug, Clone)]
pub struct MultipleRegressionResult {
pub coefficients: Vec<f64>,
pub std_errors: Vec<f64>,
pub t_statistics: Vec<f64>,
pub p_values: Vec<f64>,
pub r_squared: f64,
pub adjusted_r_squared: f64,
pub f_statistic: f64,
pub f_p_value: f64,
pub residual_se: f64,
pub residuals: Vec<f64>,
pub fitted: Vec<f64>,
pub vif: Vec<f64>,
pub n: usize,
pub p: usize,
}
pub fn simple_linear_regression(x: &[f64], y: &[f64]) -> Option<SimpleRegressionResult> {
let n = x.len();
if n < 3 || n != y.len() {
return None;
}
if x.iter().any(|v| !v.is_finite()) || y.iter().any(|v| !v.is_finite()) {
return None;
}
let x_mean = stats::mean(x)?;
let y_mean = stats::mean(y)?;
let x_var = stats::variance(x)?;
let cov = stats::covariance(x, y)?;
if x_var < 1e-300 {
return None; }
let slope = cov / x_var;
let intercept = y_mean - slope * x_mean;
let fitted: Vec<f64> = x.iter().map(|&xi| intercept + slope * xi).collect();
let residuals: Vec<f64> = y
.iter()
.zip(fitted.iter())
.map(|(&yi, &fi)| yi - fi)
.collect();
let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
let nf = n as f64;
let df_res = nf - 2.0;
let r_squared = if ss_tot > 1e-300 {
1.0 - ss_res / ss_tot
} else {
1.0
};
let adjusted_r_squared = 1.0 - (1.0 - r_squared) * (nf - 1.0) / df_res;
let mse = ss_res / df_res;
let residual_se = mse.sqrt();
let ss_x: f64 = x.iter().map(|&xi| (xi - x_mean).powi(2)).sum();
let slope_se = (mse / ss_x).sqrt();
let intercept_se = (mse * (1.0 / nf + x_mean * x_mean / ss_x)).sqrt();
let slope_t = if slope_se > 1e-300 {
slope / slope_se
} else {
f64::INFINITY
};
let intercept_t = if intercept_se > 1e-300 {
intercept / intercept_se
} else {
f64::INFINITY
};
let slope_p = 2.0 * (1.0 - special::t_distribution_cdf(slope_t.abs(), df_res));
let intercept_p = 2.0 * (1.0 - special::t_distribution_cdf(intercept_t.abs(), df_res));
let f_statistic = slope_t * slope_t;
let f_p_value = if f_statistic.is_infinite() {
0.0
} else {
1.0 - special::f_distribution_cdf(f_statistic, 1.0, df_res)
};
Some(SimpleRegressionResult {
slope,
intercept,
r_squared,
adjusted_r_squared,
slope_se,
intercept_se,
slope_t,
intercept_t,
slope_p,
intercept_p,
residual_se,
f_statistic,
f_p_value,
residuals,
fitted,
n,
})
}
pub fn multiple_linear_regression(
predictors: &[&[f64]],
y: &[f64],
) -> Option<MultipleRegressionResult> {
let p = predictors.len(); let n = y.len();
if p == 0 || n < p + 2 {
return None;
}
for pred in predictors {
if pred.len() != n {
return None;
}
if pred.iter().any(|v| !v.is_finite()) {
return None;
}
}
if y.iter().any(|v| !v.is_finite()) {
return None;
}
let ncols = p + 1;
let mut x_data = Vec::with_capacity(n * ncols);
for i in 0..n {
x_data.push(1.0); for pred in predictors {
x_data.push(pred[i]);
}
}
let x_mat = Matrix::new(n, ncols, x_data).ok()?;
let xt = x_mat.transpose();
let xtx = xt.mul_mat(&x_mat).ok()?;
let xty = xt.mul_vec(y).ok()?;
let coefficients = xtx.cholesky_solve(&xty).ok()?;
let fitted = x_mat.mul_vec(&coefficients).ok()?;
let residuals: Vec<f64> = y
.iter()
.zip(fitted.iter())
.map(|(&yi, &fi)| yi - fi)
.collect();
let y_mean = stats::mean(y)?;
let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
let ss_tot: f64 = y.iter().map(|&yi| (yi - y_mean).powi(2)).sum();
let nf = n as f64;
let pf = p as f64;
let df_res = nf - pf - 1.0;
let r_squared = if ss_tot > 1e-300 {
1.0 - ss_res / ss_tot
} else {
1.0
};
let adjusted_r_squared = 1.0 - (1.0 - r_squared) * (nf - 1.0) / df_res;
let mse = ss_res / df_res;
let residual_se = mse.sqrt();
let xtx_inv = xtx.inverse().ok()?;
let mut std_errors = Vec::with_capacity(ncols);
let mut t_statistics = Vec::with_capacity(ncols);
let mut p_values = Vec::with_capacity(ncols);
for (j, &coeff_j) in coefficients.iter().enumerate() {
let se = (xtx_inv.get(j, j) * mse).sqrt();
std_errors.push(se);
let t = if se > 1e-300 {
coeff_j / se
} else {
f64::INFINITY
};
t_statistics.push(t);
let pv = 2.0 * (1.0 - special::t_distribution_cdf(t.abs(), df_res));
p_values.push(pv);
}
let ss_reg = ss_tot - ss_res;
let f_statistic = if pf > 0.0 && mse > 1e-300 {
(ss_reg / pf) / mse
} else {
0.0
};
let f_p_value = if f_statistic.is_infinite() || f_statistic.is_nan() {
0.0
} else {
1.0 - special::f_distribution_cdf(f_statistic, pf, df_res)
};
let vif = compute_vif(predictors);
Some(MultipleRegressionResult {
coefficients,
std_errors,
t_statistics,
p_values,
r_squared,
adjusted_r_squared,
f_statistic,
f_p_value,
residual_se,
residuals,
fitted,
vif,
n,
p,
})
}
fn compute_vif(predictors: &[&[f64]]) -> Vec<f64> {
let p = predictors.len();
if p < 2 {
return vec![1.0; p];
}
let mut vif = Vec::with_capacity(p);
for j in 0..p {
let y_j = predictors[j];
let others: Vec<&[f64]> = predictors
.iter()
.enumerate()
.filter(|&(i, _)| i != j)
.map(|(_, v)| *v)
.collect();
if let Some(result) = multiple_linear_regression(&others, y_j) {
let r2 = result.r_squared;
if r2 < 1.0 - 1e-15 {
vif.push(1.0 / (1.0 - r2));
} else {
vif.push(f64::INFINITY); }
} else {
vif.push(f64::NAN);
}
}
vif
}
pub fn predict_simple(model: &SimpleRegressionResult, x_new: &[f64]) -> Vec<f64> {
x_new
.iter()
.map(|&xi| model.intercept + model.slope * xi)
.collect()
}
pub fn predict_multiple(
model: &MultipleRegressionResult,
predictors_new: &[&[f64]],
) -> Option<Vec<f64>> {
if predictors_new.len() != model.p {
return None;
}
let n = predictors_new.first()?.len();
for pred in predictors_new {
if pred.len() != n {
return None;
}
}
let mut result = Vec::with_capacity(n);
for i in 0..n {
let mut yi = model.coefficients[0]; for (j, pred) in predictors_new.iter().enumerate() {
yi += model.coefficients[j + 1] * pred[i];
}
result.push(yi);
}
Some(result)
}
pub fn vif(predictors: &[&[f64]]) -> Option<Vec<f64>> {
let p = predictors.len();
if p == 0 {
return None;
}
let n = predictors[0].len();
if !predictors.iter().all(|c| c.len() == n) {
return None;
}
if n <= p {
return None;
}
if predictors
.iter()
.any(|c| c.iter().any(|v| !v.is_finite()))
{
return None;
}
Some(compute_vif(predictors))
}
pub fn condition_number(predictors: &[&[f64]]) -> Option<f64> {
use u_numflow::matrix::Matrix;
let p = predictors.len();
if p == 0 {
return None;
}
let n = predictors[0].len();
if !predictors.iter().all(|c| c.len() == n) {
return None;
}
if n <= p {
return None;
}
if predictors
.iter()
.any(|c| c.iter().any(|v| !v.is_finite()))
{
return None;
}
let mut means = vec![0.0_f64; p];
for (j, col) in predictors.iter().enumerate() {
means[j] = col.iter().sum::<f64>() / n as f64;
}
let mut cov = vec![0.0_f64; p * p];
let denom = (n - 1) as f64;
for r in 0..p {
let col_r = predictors[r];
let mean_r = means[r];
for c in r..p {
let col_c = predictors[c];
let mean_c = means[c];
let sum: f64 = col_r
.iter()
.zip(col_c.iter())
.map(|(&a, &b)| (a - mean_r) * (b - mean_c))
.sum();
let v = sum / denom;
cov[r * p + c] = v;
cov[c * p + r] = v;
}
}
let mat = Matrix::new(p, p, cov).ok()?;
let (eigenvalues, _eigenvectors) = mat.eigen_symmetric().ok()?;
let lambda_max = eigenvalues
.iter()
.copied()
.fold(f64::NEG_INFINITY, f64::max);
let lambda_min = eigenvalues.iter().copied().fold(f64::INFINITY, f64::min);
if !lambda_max.is_finite() || lambda_max <= 0.0 {
return None;
}
if lambda_min < 1e-15 {
return Some(f64::INFINITY);
}
Some(lambda_max / lambda_min)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple_perfect_fit() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [3.0, 5.0, 7.0, 9.0, 11.0]; let r = simple_linear_regression(&x, &y).expect("should compute");
assert!((r.slope - 2.0).abs() < 1e-10);
assert!((r.intercept - 1.0).abs() < 1e-10);
assert!((r.r_squared - 1.0).abs() < 1e-10);
}
#[test]
fn simple_with_noise() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [2.1, 3.9, 6.1, 7.9, 10.1]; let r = simple_linear_regression(&x, &y).expect("should compute");
assert!((r.slope - 2.0).abs() < 0.1);
assert!(r.r_squared > 0.99);
assert_eq!(r.residuals.len(), 5);
assert_eq!(r.fitted.len(), 5);
}
#[test]
fn simple_residuals_sum_to_zero() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [2.1, 3.9, 6.1, 7.9, 10.1];
let r = simple_linear_regression(&x, &y).expect("should compute");
let sum: f64 = r.residuals.iter().sum();
assert!(sum.abs() < 1e-10, "residuals sum = {sum}");
}
#[test]
fn simple_f_equals_t_squared() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [2.1, 3.9, 6.1, 7.9, 10.1];
let r = simple_linear_regression(&x, &y).expect("should compute");
assert!(
(r.f_statistic - r.slope_t * r.slope_t).abs() < 1e-8,
"F = {}, t² = {}",
r.f_statistic,
r.slope_t * r.slope_t
);
}
#[test]
fn simple_significant_slope() {
let x: Vec<f64> = (0..20).map(|i| i as f64).collect();
let y: Vec<f64> = x.iter().map(|&xi| 3.0 + 2.0 * xi).collect();
let r = simple_linear_regression(&x, &y).expect("should compute");
assert!(r.slope_p < 1e-10, "slope p = {}", r.slope_p);
assert!(r.f_p_value < 1e-10, "F p = {}", r.f_p_value);
}
#[test]
fn simple_negative_slope() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [10.0, 8.0, 6.0, 4.0, 2.0]; let r = simple_linear_regression(&x, &y).expect("should compute");
assert!((r.slope + 2.0).abs() < 1e-10);
assert!((r.intercept - 12.0).abs() < 1e-10);
}
#[test]
fn simple_edge_cases() {
assert!(simple_linear_regression(&[1.0, 2.0], &[3.0, 4.0]).is_none()); assert!(simple_linear_regression(&[1.0, 2.0, 3.0], &[4.0, 5.0]).is_none()); assert!(simple_linear_regression(&[5.0, 5.0, 5.0], &[1.0, 2.0, 3.0]).is_none()); assert!(simple_linear_regression(&[1.0, f64::NAN, 3.0], &[4.0, 5.0, 6.0]).is_none());
}
#[test]
fn simple_numeric_reference_ols() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [2.0, 4.0, 5.0, 4.0, 5.0];
let r = simple_linear_regression(&x, &y).expect("should compute");
assert!(
(r.slope - 0.6).abs() < 1e-10,
"β̂₁ expected 0.6, got {}",
r.slope
);
assert!(
(r.intercept - 2.2).abs() < 1e-10,
"β̂₀ expected 2.2, got {}",
r.intercept
);
assert!(
(r.r_squared - 0.6).abs() < 1e-3,
"R² expected 0.6, got {}",
r.r_squared
);
let expected_fitted = [2.8, 3.4, 4.0, 4.6, 5.2];
for (i, (&fi, &ef)) in r.fitted.iter().zip(expected_fitted.iter()).enumerate() {
assert!(
(fi - ef).abs() < 1e-10,
"ŷ_{} expected {}, got {}",
i + 1,
ef,
fi
);
}
}
#[test]
fn simple_adjusted_r_squared() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [2.1, 3.9, 6.1, 7.9, 10.1];
let r = simple_linear_regression(&x, &y).expect("should compute");
assert!(r.adjusted_r_squared <= r.r_squared);
assert!(r.adjusted_r_squared > 0.98);
}
#[test]
fn multiple_perfect_fit() {
let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let x2 = [2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0, 5.0];
let y: Vec<f64> = x1
.iter()
.zip(x2.iter())
.map(|(&a, &b)| 1.0 + 2.0 * a + 3.0 * b)
.collect();
let r = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
assert!(
(r.coefficients[0] - 1.0).abs() < 1e-8,
"β₀ = {}",
r.coefficients[0]
);
assert!(
(r.coefficients[1] - 2.0).abs() < 1e-8,
"β₁ = {}",
r.coefficients[1]
);
assert!(
(r.coefficients[2] - 3.0).abs() < 1e-8,
"β₂ = {}",
r.coefficients[2]
);
assert!((r.r_squared - 1.0).abs() < 1e-8);
}
#[test]
fn multiple_with_noise() {
let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let x2 = [2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0];
let y = [5.1, 5.0, 9.2, 8.9, 13.1, 12.0, 17.2, 15.9];
let r = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
assert!(r.r_squared > 0.95);
assert_eq!(r.coefficients.len(), 3);
assert_eq!(r.std_errors.len(), 3);
assert_eq!(r.residuals.len(), 8);
assert_eq!(r.vif.len(), 2);
}
#[test]
fn multiple_residuals_sum_to_zero() {
let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let x2 = [2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0];
let y = [5.1, 5.0, 9.2, 8.9, 13.1, 12.0, 17.2, 15.9];
let r = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
let sum: f64 = r.residuals.iter().sum();
assert!(sum.abs() < 1e-8, "residuals sum = {sum}");
}
#[test]
fn multiple_single_predictor_matches_simple() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let y: Vec<f64> = x
.iter()
.map(|&xi| 3.0 + 2.5 * xi + 0.1 * (xi - 5.0))
.collect();
let simple = simple_linear_regression(&x, &y).expect("simple");
let multi = multiple_linear_regression(&[&x], &y).expect("multiple");
assert!(
(simple.slope - multi.coefficients[1]).abs() < 1e-8,
"slope: {} vs {}",
simple.slope,
multi.coefficients[1]
);
assert!(
(simple.intercept - multi.coefficients[0]).abs() < 1e-8,
"intercept: {} vs {}",
simple.intercept,
multi.coefficients[0]
);
assert!((simple.r_squared - multi.r_squared).abs() < 1e-8);
}
#[test]
fn multiple_edge_cases() {
let x1 = [1.0, 2.0];
let y = [3.0, 4.0];
assert!(multiple_linear_regression(&[&x1], &y).is_none());
let x2 = [1.0, 2.0, 3.0];
let y2 = [4.0, 5.0];
assert!(multiple_linear_regression(&[&x2], &y2).is_none()); }
#[test]
fn multiple_vif_independent_predictors() {
let x1 = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0];
let x2 = [0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0];
let y: Vec<f64> = x1
.iter()
.zip(x2.iter())
.map(|(&a, &b)| 1.0 + 2.0 * a + 3.0 * b)
.collect();
let r = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
for (i, &v) in r.vif.iter().enumerate() {
assert!(
v < 2.0,
"VIF[{i}] = {v}, expected near 1.0 for independent predictors"
);
}
}
#[test]
fn multiple_vif_correlated_predictors() {
let x1: Vec<f64> = (0..20).map(|i| i as f64).collect();
let noise = [
0.1, -0.2, 0.3, -0.1, 0.2, -0.3, 0.1, -0.1, 0.2, -0.2, 0.3, -0.1, 0.1, -0.2, 0.3, -0.3,
0.1, -0.1, 0.2, -0.2,
];
let x2: Vec<f64> = x1
.iter()
.zip(noise.iter())
.map(|(&v, &n)| v * 0.9 + 1.0 + n)
.collect();
let y: Vec<f64> = x1
.iter()
.zip(x2.iter())
.map(|(&a, &b)| 1.0 + a + b)
.collect();
let r = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
assert!(
r.vif[0] > 5.0,
"VIF[0] = {}, expected > 5.0 for correlated predictors",
r.vif[0]
);
}
#[test]
fn predict_simple_basic() {
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
let y = [3.0, 5.0, 7.0, 9.0, 11.0]; let model = simple_linear_regression(&x, &y).expect("should compute");
let pred = predict_simple(&model, &[6.0, 7.0]);
assert!((pred[0] - 13.0).abs() < 1e-10);
assert!((pred[1] - 15.0).abs() < 1e-10);
}
#[test]
fn predict_multiple_basic() {
let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let x2 = [2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0, 5.0];
let y: Vec<f64> = x1
.iter()
.zip(x2.iter())
.map(|(&a, &b)| 1.0 + 2.0 * a + 3.0 * b)
.collect();
let model = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
let new_x1 = [11.0];
let new_x2 = [6.0];
let pred = predict_multiple(&model, &[&new_x1, &new_x2]).expect("should predict");
let expected = 1.0 + 2.0 * 11.0 + 3.0 * 6.0;
assert!(
(pred[0] - expected).abs() < 1e-6,
"pred = {}, expected = {}",
pred[0],
expected
);
}
#[test]
fn vif_pub_independent_predictors() {
let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let x2 = [8.0, 1.0, 6.0, 3.0, 5.0, 7.0, 2.0, 4.0];
let v = vif(&[&x1, &x2]).expect("should compute");
for r in &v {
assert!(*r < 2.0, "VIF should be near 1.0 for independent, got {r}");
}
}
#[test]
fn vif_pub_collinear_predictors() {
let x1: Vec<f64> = (0..20).map(|i| i as f64).collect();
let x2: Vec<f64> = x1.iter().map(|&v| v * 2.0 + 0.001).collect();
let v = vif(&[&x1[..], &x2[..]]).expect("should compute");
assert!(
v[0] > 50.0,
"VIF should explode under near-collinearity, got {}",
v[0]
);
}
#[test]
fn vif_pub_rejects_short_input() {
let x = [1.0, 2.0];
assert!(vif(&[&x[..], &x[..]]).is_none());
}
#[test]
fn vif_pub_rejects_non_finite() {
let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let x2 = [1.0, 2.0, f64::NAN, 4.0, 5.0, 6.0];
assert!(vif(&[&x1[..], &x2[..]]).is_none());
}
#[test]
fn condition_number_orthogonal() {
let x1 = [1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0];
let x2 = [1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0];
let c = condition_number(&[&x1[..], &x2[..]]).expect("should compute");
assert!(c < 5.0, "near-orthogonal: cond should be small, got {c}");
}
#[test]
fn condition_number_collinear_explodes() {
let x1: Vec<f64> = (0..20).map(|i| i as f64).collect();
let x2: Vec<f64> = x1.iter().map(|&v| v * 2.0 + 1e-6).collect();
let c = condition_number(&[&x1[..], &x2[..]]).expect("should compute");
assert!(
c > 1e5 || c.is_infinite(),
"collinear: cond should be huge, got {c}"
);
}
#[test]
fn condition_number_rejects_short_input() {
let x = [1.0, 2.0];
assert!(condition_number(&[&x[..], &x[..]]).is_none());
}
#[test]
fn predict_multiple_wrong_predictors() {
let x1 = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let x2 = [2.0, 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0, 5.0];
let y: Vec<f64> = x1.iter().zip(x2.iter()).map(|(&a, &b)| a + b).collect();
let model = multiple_linear_regression(&[&x1, &x2], &y).expect("should compute");
assert!(predict_multiple(&model, &[&[1.0]]).is_none());
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn simple_r_squared_bounded(
data in proptest::collection::vec(-1e3_f64..1e3, 5..=30)
.prop_flat_map(|x| {
let n = x.len();
(Just(x), proptest::collection::vec(-1e3_f64..1e3, n..=n))
})
) {
let (x, y) = data;
if let Some(r) = simple_linear_regression(&x, &y) {
prop_assert!(r.r_squared >= -0.01 && r.r_squared <= 1.01,
"R² = {}", r.r_squared);
}
}
#[test]
fn simple_residuals_orthogonal_to_x(
data in proptest::collection::vec(-1e3_f64..1e3, 5..=30)
.prop_flat_map(|x| {
let n = x.len();
(Just(x), proptest::collection::vec(-1e3_f64..1e3, n..=n))
})
) {
let (x, y) = data;
if let Some(r) = simple_linear_regression(&x, &y) {
let dot: f64 = x.iter().zip(r.residuals.iter()).map(|(&xi, &ei)| xi * ei).sum();
let norm = r.residuals.iter().map(|e| e * e).sum::<f64>().sqrt();
let x_norm = x.iter().map(|xi| xi * xi).sum::<f64>().sqrt();
if norm > 1e-10 && x_norm > 1e-10 {
prop_assert!((dot / (norm * x_norm)).abs() < 1e-6,
"residuals not orthogonal to x: dot={dot}");
}
}
}
#[test]
fn multiple_r_squared_bounded(
x1 in proptest::collection::vec(-1e3_f64..1e3, 8..=20),
x2_seed in proptest::collection::vec(-1e3_f64..1e3, 8..=20),
y_seed in proptest::collection::vec(-1e3_f64..1e3, 8..=20),
) {
let n = x1.len().min(x2_seed.len()).min(y_seed.len());
let x2 = &x2_seed[..n];
let y = &y_seed[..n];
let x1 = &x1[..n];
if let Some(r) = multiple_linear_regression(&[x1, x2], y) {
prop_assert!(r.r_squared >= -0.01 && r.r_squared <= 1.01,
"R² = {}", r.r_squared);
}
}
}
}