use faer::Mat;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConditionSeverity {
WellConditioned,
Moderate,
High,
Severe,
}
impl ConditionSeverity {
pub fn description(&self) -> &'static str {
match self {
Self::WellConditioned => "Well-conditioned: numerically stable",
Self::Moderate => "Moderate collinearity: some instability possible",
Self::High => "High collinearity: numerical instability likely",
Self::Severe => "Severe collinearity: coefficients may be unreliable",
}
}
}
#[derive(Debug, Clone)]
pub struct ConditionDiagnostic {
pub condition_number: f64,
pub condition_number_xtx: f64,
pub singular_values: Vec<f64>,
pub condition_indices: Vec<f64>,
pub severity: ConditionSeverity,
pub warning: Option<String>,
}
pub fn condition_number(x: &Mat<f64>, with_intercept: bool) -> f64 {
let x_design = if with_intercept {
let n = x.nrows();
let p = x.ncols();
let mut x_aug = Mat::zeros(n, p + 1);
for i in 0..n {
x_aug[(i, 0)] = 1.0;
for j in 0..p {
x_aug[(i, j + 1)] = x[(i, j)];
}
}
x_aug
} else {
x.clone()
};
let svd = match x_design.svd() {
Ok(svd) => svd,
Err(_) => return f64::INFINITY,
};
let s = svd.S();
let s_col = s.column_vector();
let n_params = s_col.nrows();
if n_params == 0 {
return f64::INFINITY;
}
let mut s_max = f64::NEG_INFINITY;
let mut s_min = f64::INFINITY;
for i in 0..n_params {
let si = s_col[i];
if si > s_max {
s_max = si;
}
if si > 0.0 && si < s_min {
s_min = si;
}
}
if s_min <= 0.0 {
f64::INFINITY
} else {
s_max / s_min
}
}
pub fn condition_diagnostic(x: &Mat<f64>, with_intercept: bool) -> ConditionDiagnostic {
let x_design = if with_intercept {
let n = x.nrows();
let p = x.ncols();
let mut x_aug = Mat::zeros(n, p + 1);
for i in 0..n {
x_aug[(i, 0)] = 1.0;
for j in 0..p {
x_aug[(i, j + 1)] = x[(i, j)];
}
}
x_aug
} else {
x.clone()
};
let svd = match x_design.svd() {
Ok(svd) => svd,
Err(_) => {
return ConditionDiagnostic {
condition_number: f64::INFINITY,
condition_number_xtx: f64::INFINITY,
singular_values: vec![],
condition_indices: vec![],
severity: ConditionSeverity::Severe,
warning: Some("SVD computation failed".to_string()),
};
}
};
let s = svd.S();
let s_col = s.column_vector();
let n_params = s_col.nrows();
if n_params == 0 {
return ConditionDiagnostic {
condition_number: f64::INFINITY,
condition_number_xtx: f64::INFINITY,
singular_values: vec![],
condition_indices: vec![],
severity: ConditionSeverity::Severe,
warning: Some("Empty design matrix".to_string()),
};
}
let mut singular_values: Vec<f64> = (0..n_params).map(|i| s_col[i]).collect();
singular_values.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
let s_max = singular_values[0];
let s_min = *singular_values.iter().rfind(|&&v| v > 0.0).unwrap_or(&0.0);
let condition_number = if s_min > 0.0 {
s_max / s_min
} else {
f64::INFINITY
};
let condition_number_xtx = condition_number * condition_number;
let condition_indices: Vec<f64> = singular_values
.iter()
.map(|&si| if si > 0.0 { s_max / si } else { f64::INFINITY })
.collect();
let severity = classify_condition_number(condition_number);
let warning = match severity {
ConditionSeverity::WellConditioned => None,
ConditionSeverity::Moderate => Some(format!(
"Moderate collinearity detected (κ = {:.1}). Results should be reliable but verify.",
condition_number
)),
ConditionSeverity::High => Some(format!(
"High collinearity detected (κ = {:.1}). Coefficients may be unstable. \
Consider removing collinear features or using regularization.",
condition_number
)),
ConditionSeverity::Severe => Some(format!(
"Severe collinearity detected (κ = {:.1}). Coefficients are likely unreliable. \
Use regularization (Ridge/Lasso) or remove highly correlated features.",
condition_number
)),
};
ConditionDiagnostic {
condition_number,
condition_number_xtx,
singular_values,
condition_indices,
severity,
warning,
}
}
pub fn classify_condition_number(cond: f64) -> ConditionSeverity {
if cond < 30.0 {
ConditionSeverity::WellConditioned
} else if cond < 100.0 {
ConditionSeverity::Moderate
} else if cond < 1000.0 {
ConditionSeverity::High
} else {
ConditionSeverity::Severe
}
}
pub fn variance_decomposition_proportions(x: &Mat<f64>, with_intercept: bool) -> Mat<f64> {
let x_design = if with_intercept {
let n = x.nrows();
let p = x.ncols();
let mut x_aug = Mat::zeros(n, p + 1);
for i in 0..n {
x_aug[(i, 0)] = 1.0;
for j in 0..p {
x_aug[(i, j + 1)] = x[(i, j)];
}
}
x_aug
} else {
x.clone()
};
let svd = match x_design.svd() {
Ok(svd) => svd,
Err(_) => return Mat::zeros(0, 0),
};
let v = svd.V().to_owned();
let s = svd.S();
let s_col = s.column_vector();
let n_params = s_col.nrows();
if n_params == 0 {
return Mat::zeros(0, 0);
}
let mut phi = Mat::zeros(n_params, n_params);
for j in 0..n_params {
for k in 0..n_params {
let s_k = s_col[k];
if s_k > 0.0 {
phi[(j, k)] = v[(j, k)] * v[(j, k)] / (s_k * s_k);
}
}
}
let mut proportions = Mat::zeros(n_params, n_params);
for j in 0..n_params {
let row_sum: f64 = (0..n_params).map(|k| phi[(j, k)]).sum();
if row_sum > 0.0 {
for k in 0..n_params {
proportions[(j, k)] = phi[(j, k)] / row_sum;
}
}
}
proportions
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_condition_number_orthogonal() {
let n = 100;
let x = Mat::from_fn(
n,
2,
|i, j| if j == 0 { i as f64 } else { 100.0 - i as f64 },
);
let cond = condition_number(&x, false);
assert!(cond < 10.0, "Expected low condition number, got {}", cond);
}
#[test]
fn test_condition_number_collinear() {
let n = 100;
let x = Mat::from_fn(n, 2, |i, j| {
if j == 0 {
i as f64
} else {
i as f64 + 0.001 }
});
let cond = condition_number(&x, false);
assert!(
cond > 100.0,
"Expected high condition number for collinear data, got {}",
cond
);
}
#[test]
fn test_condition_diagnostic() {
let n = 100;
let x = Mat::from_fn(n, 2, |i, j| (i + j) as f64);
let diag = condition_diagnostic(&x, true);
assert!(diag.condition_number > 0.0);
assert_eq!(diag.singular_values.len(), 3); assert_eq!(diag.condition_indices.len(), 3);
assert!((diag.condition_indices[0] - 1.0).abs() < 1e-10);
for ci in &diag.condition_indices {
assert!(*ci >= 1.0);
}
}
#[test]
fn test_classify_condition_number() {
assert_eq!(
classify_condition_number(10.0),
ConditionSeverity::WellConditioned
);
assert_eq!(classify_condition_number(50.0), ConditionSeverity::Moderate);
assert_eq!(classify_condition_number(500.0), ConditionSeverity::High);
assert_eq!(classify_condition_number(5000.0), ConditionSeverity::Severe);
}
#[test]
fn test_severity_descriptions() {
assert_eq!(
ConditionSeverity::WellConditioned.description(),
"Well-conditioned: numerically stable"
);
assert_eq!(
ConditionSeverity::Moderate.description(),
"Moderate collinearity: some instability possible"
);
assert_eq!(
ConditionSeverity::High.description(),
"High collinearity: numerical instability likely"
);
assert_eq!(
ConditionSeverity::Severe.description(),
"Severe collinearity: coefficients may be unreliable"
);
}
#[test]
fn test_condition_number_empty_matrix() {
let x = Mat::<f64>::zeros(0, 0);
let cond = condition_number(&x, false);
assert!(cond.is_infinite());
}
#[test]
fn test_condition_number_with_intercept() {
let n = 50;
let x = Mat::from_fn(n, 2, |i, j| (i + j) as f64 / 10.0);
let cond_no_int = condition_number(&x, false);
let cond_with_int = condition_number(&x, true);
assert!(cond_no_int.is_finite());
assert!(cond_with_int.is_finite());
}
#[test]
fn test_condition_diagnostic_warnings() {
let n = 100;
let x = Mat::from_fn(n, 2, |i, j| {
if j == 0 {
i as f64
} else {
i as f64 * 1.5 + (i % 5) as f64 }
});
let diag = condition_diagnostic(&x, true);
assert!(diag.condition_number > 0.0);
assert_eq!(diag.condition_number_xtx, diag.condition_number.powi(2));
}
#[test]
fn test_condition_diagnostic_severe_collinearity() {
let n = 100;
let x = Mat::from_fn(n, 2, |i, j| {
if j == 0 {
i as f64
} else {
i as f64 + 0.0001 }
});
let diag = condition_diagnostic(&x, false);
assert_eq!(diag.severity, ConditionSeverity::Severe);
assert!(diag.warning.is_some());
let warning = diag.warning.unwrap();
assert!(warning.contains("Severe collinearity"));
}
#[test]
fn test_condition_diagnostic_moderate_collinearity() {
let n = 100;
let x = Mat::from_fn(n, 2, |i, j| {
if j == 0 {
(i as f64).sin()
} else {
(i as f64 * 0.1).cos() * 10.0
}
});
let diag = condition_diagnostic(&x, true);
assert!(diag.condition_number > 0.0);
assert!(!diag.singular_values.is_empty());
}
#[test]
fn test_condition_diagnostic_high_collinearity() {
let n = 100;
let x = Mat::from_fn(n, 2, |i, j| {
if j == 0 {
i as f64
} else {
i as f64 + 0.1 * ((i % 10) as f64)
}
});
let diag = condition_diagnostic(&x, false);
if diag.severity != ConditionSeverity::WellConditioned {
assert!(diag.warning.is_some());
}
}
#[test]
fn test_variance_decomposition_proportions() {
let n = 50;
let x = Mat::from_fn(n, 2, |i, j| (i + j) as f64);
let vdp = variance_decomposition_proportions(&x, true);
assert_eq!(vdp.nrows(), 3);
assert_eq!(vdp.ncols(), 3);
for j in 0..vdp.nrows() {
let row_sum: f64 = (0..vdp.ncols()).map(|k| vdp[(j, k)]).sum();
assert!(
(row_sum - 1.0).abs() < 1e-6,
"Row {} sum = {}, expected 1.0",
j,
row_sum
);
}
}
#[test]
fn test_variance_decomposition_proportions_empty() {
let x = Mat::<f64>::zeros(0, 0);
let vdp = variance_decomposition_proportions(&x, false);
assert_eq!(vdp.nrows(), 0);
assert_eq!(vdp.ncols(), 0);
}
#[test]
fn test_variance_decomposition_proportions_no_intercept() {
let n = 50;
let x = Mat::from_fn(n, 2, |i, j| (i * (j + 1)) as f64);
let vdp = variance_decomposition_proportions(&x, false);
assert_eq!(vdp.nrows(), 2);
assert_eq!(vdp.ncols(), 2);
}
#[test]
fn test_condition_number_rank_deficient() {
let n = 50;
let x = Mat::from_fn(n, 3, |i, j| {
if j == 2 {
i as f64
} else {
i as f64 + j as f64
}
});
let diag = condition_diagnostic(&x, false);
assert!(
diag.condition_number > 1000.0 || diag.condition_number.is_infinite(),
"Expected high condition number for rank-deficient matrix"
);
}
#[test]
fn test_condition_indices_ordering() {
let n = 100;
let x = Mat::from_fn(n, 2, |i, j| (i + j) as f64);
let diag = condition_diagnostic(&x, true);
assert!((diag.condition_indices[0] - 1.0).abs() < 1e-10);
for i in 1..diag.condition_indices.len() {
assert!(
diag.condition_indices[i] >= diag.condition_indices[i - 1],
"Condition indices should be non-decreasing"
);
}
}
#[test]
fn test_singular_values_sorted() {
let n = 100;
let x = Mat::from_fn(n, 3, |i, j| (i * j + 1) as f64);
let diag = condition_diagnostic(&x, false);
for i in 1..diag.singular_values.len() {
assert!(
diag.singular_values[i] <= diag.singular_values[i - 1],
"Singular values should be sorted descending"
);
}
}
#[test]
fn test_condition_diagnostic_boundary_values() {
assert_eq!(
classify_condition_number(29.9),
ConditionSeverity::WellConditioned
);
assert_eq!(classify_condition_number(30.0), ConditionSeverity::Moderate);
assert_eq!(classify_condition_number(99.9), ConditionSeverity::Moderate);
assert_eq!(classify_condition_number(100.0), ConditionSeverity::High);
assert_eq!(classify_condition_number(999.9), ConditionSeverity::High);
assert_eq!(classify_condition_number(1000.0), ConditionSeverity::Severe);
}
}