use super::helpers::two_tailed_p_value;
use super::types::{DiagnosticTestResult, HarveyCollierMethod};
use crate::error::{Error, Result};
use crate::linalg::{vec_mean, Matrix};
pub fn harvey_collier_test(
y: &[f64],
x_vars: &[Vec<f64>],
method: HarveyCollierMethod,
) -> Result<DiagnosticTestResult> {
match method {
HarveyCollierMethod::R => harvey_collier_test_r(y, x_vars),
HarveyCollierMethod::Python => harvey_collier_test_python(y, x_vars),
}
}
#[allow(clippy::needless_range_loop)]
fn harvey_collier_test_r(y: &[f64], x_vars: &[Vec<f64>]) -> Result<DiagnosticTestResult> {
let n = y.len();
let k = x_vars.len(); let p = k + 1;
if n <= p + 1 {
return Err(Error::InsufficientData {
required: p + 2,
available: n,
});
}
super::helpers::validate_regression_data(y, x_vars)?;
let mut x_data = vec![0.0; n * p];
for row in 0..n {
x_data[row * p] = 1.0;
for (col, x_var) in x_vars.iter().enumerate() {
x_data[row * p + col + 1] = x_var[row];
}
}
let y_sorted = y.to_vec();
let x_sorted = x_data;
let x_r1 = Matrix::new(p, p, x_sorted[0..(p * p)].to_vec());
let mut x1 = match x_r1.chol2inv_from_qr() {
Some(inv) => inv,
None => return Err(Error::SingularMatrix),
};
let xt = x_r1.transpose();
let xty = xt.mul_vec(&y_sorted[0..p]);
let mut betar = x1.mul_vec(&xty);
let mut xr: Vec<f64> = (0..p).map(|j| x_sorted[p * p + j]).collect();
let mut fr = 1.0;
for i in 0..p {
for j in 0..p {
fr += xr[i] * x1.get(i, j) * xr[j];
}
}
let mut w = vec![0.0_f64; n - p];
let y_pred = (0..p).map(|j| betar[j] * xr[j]).sum::<f64>();
w[0] = (y_sorted[p] - y_pred) / fr.sqrt();
for r in (p + 1)..n {
let mut u = vec![0.0; p];
for i in 0..p {
for j in 0..p {
u[i] += x1.get(i, j) * xr[j];
}
}
for i in 0..p {
for j in 0..p {
let update = (u[i] * u[j]) / fr;
x1.set(i, j, x1.get(i, j) - update);
}
}
let w_prev = w[r - p - 1];
let scale = w_prev * fr.sqrt();
let mut v = vec![0.0; p];
for i in 0..p {
for j in 0..p {
v[i] += x1.get(i, j) * xr[j];
}
}
for i in 0..p {
betar[i] += v[i] * scale;
}
xr = (0..p).map(|j| x_sorted[r * p + j]).collect();
fr = 1.0;
for i in 0..p {
for j in 0..p {
fr += xr[i] * x1.get(i, j) * xr[j];
}
}
let y_pred_r = (0..p).map(|j| betar[j] * xr[j]).sum::<f64>();
w[r - p] = (y_sorted[r] - y_pred_r) / fr.sqrt();
}
let n_rr = w.len();
if n_rr < 2 {
return Err(Error::InsufficientData {
required: 2,
available: n_rr,
});
}
let mean_w = vec_mean(&w);
let var_w = w.iter().map(|&v| (v - mean_w).powi(2)).sum::<f64>() / ((n_rr - 1) as f64);
if !var_w.is_finite() || var_w <= 0.0 {
return Err(Error::InvalidInput(
"Invalid variance in Harvey-Collier test".to_string(),
));
}
let df = (n - p - 1) as f64; let sigma = (var_w * ((n_rr - 1) as f64) / df).sqrt();
let resr: Vec<f64> = w.iter().map(|&v| v / sigma).collect();
let sum_resr = resr.iter().sum::<f64>();
let mean_resr = sum_resr / (n_rr as f64);
let var_resr = resr.iter().map(|&v| (v - mean_resr).powi(2)).sum::<f64>() / ((n_rr - 1) as f64);
if !var_resr.is_finite() || var_resr <= 0.0 {
return Err(Error::InvalidInput(
"Invalid scaled variance in Harvey-Collier test".to_string(),
));
}
let harv = (sum_resr / ((n - p) as f64).sqrt()).abs() / var_resr.sqrt();
let p_value = two_tailed_p_value(harv, df);
let alpha = 0.05;
let passed = p_value > alpha;
let (interpretation, guidance) = if passed {
(
format!(
"p-value = {:.4} is greater than {:.2}. Cannot reject H0. No significant evidence of functional misspecification.",
p_value, alpha
),
"The linear model specification appears appropriate."
)
} else {
(
format!(
"p-value = {:.4} is less than or equal to {:.2}. Reject H0. Significant evidence of functional misspecification.",
p_value, alpha
),
"Consider adding polynomial terms, transforming variables, or using non-linear modeling."
)
};
Ok(DiagnosticTestResult {
test_name: "Harvey-Collier Test for Linearity".to_string(),
statistic: harv,
p_value,
passed,
interpretation,
guidance: guidance.to_string(),
})
}
#[allow(clippy::needless_range_loop)]
fn harvey_collier_test_python(y: &[f64], x_vars: &[Vec<f64>]) -> Result<DiagnosticTestResult> {
let n = y.len();
let k = x_vars.len(); let p = k + 1;
if n <= p + 1 {
return Err(Error::InsufficientData {
required: p + 2,
available: n,
});
}
super::helpers::validate_regression_data(y, x_vars)?;
let mut x_data = vec![0.0; n * p];
for row in 0..n {
x_data[row * p] = 1.0;
for (col, x_var) in x_vars.iter().enumerate() {
x_data[row * p + col + 1] = x_var[row];
}
}
let y_sorted = y.to_vec();
let x_sorted = x_data;
let x_r1 = Matrix::new(p, p, x_sorted[0..(p * p)].to_vec());
let mut x1 = match x_r1.chol2inv_from_qr() {
Some(inv) => inv,
None => return Err(Error::SingularMatrix),
};
let xt = x_r1.transpose();
let xty = xt.mul_vec(&y_sorted[0..p]);
let mut betar = x1.mul_vec(&xty);
let mut xr: Vec<f64> = (0..p).map(|j| x_sorted[p * p + j]).collect();
let mut fr = 1.0;
for i in 0..p {
for j in 0..p {
fr += xr[i] * x1.get(i, j) * xr[j];
}
}
let mut w = vec![0.0_f64; n - p];
let y_pred = (0..p).map(|j| betar[j] * xr[j]).sum::<f64>();
w[0] = (y_sorted[p] - y_pred) / fr.sqrt();
for r in (p + 1)..n {
let mut u = vec![0.0; p];
for i in 0..p {
for j in 0..p {
u[i] += x1.get(i, j) * xr[j];
}
}
for i in 0..p {
for j in 0..p {
let update = (u[i] * u[j]) / fr;
x1.set(i, j, x1.get(i, j) - update);
}
}
let w_prev = w[r - p - 1];
let scale = w_prev * fr.sqrt();
let mut v = vec![0.0; p];
for i in 0..p {
for j in 0..p {
v[i] += x1.get(i, j) * xr[j];
}
}
for i in 0..p {
betar[i] += v[i] * scale;
}
xr = (0..p).map(|j| x_sorted[r * p + j]).collect();
fr = 1.0;
for i in 0..p {
for j in 0..p {
fr += xr[i] * x1.get(i, j) * xr[j];
}
}
let y_pred_r = (0..p).map(|j| betar[j] * xr[j]).sum::<f64>();
w[r - p] = (y_sorted[r] - y_pred_r) / fr.sqrt();
}
let n_rr = w.len();
if n_rr < 4 {
return Err(Error::InsufficientData {
required: 4,
available: n,
});
}
let mean_w = vec_mean(&w);
let var_w = w.iter().map(|&v| (v - mean_w).powi(2)).sum::<f64>() / ((n_rr - 1) as f64);
if !var_w.is_finite() || var_w <= 0.0 {
return Err(Error::InvalidInput(
"Invalid variance in Harvey-Collier test".to_string(),
));
}
let df = (n - p - 1) as f64;
let sigma = (var_w * ((n_rr - 1) as f64) / df).sqrt();
let resr: Vec<f64> = w.iter().map(|&v| v / sigma).collect();
let skip_offset = 3usize.saturating_sub(p);
let resr_test: Vec<f64> = resr.iter().skip(skip_offset).cloned().collect();
let n_test = resr_test.len();
if n_test < 2 {
return Err(Error::InsufficientData {
required: 5, available: n,
});
}
let sum_resr = resr_test.iter().sum::<f64>();
let mean_resr = sum_resr / (n_test as f64);
let var_resr = resr_test
.iter()
.map(|&v| (v - mean_resr).powi(2))
.sum::<f64>()
/ ((n_test - 1) as f64);
if !var_resr.is_finite() || var_resr <= 0.0 {
return Err(Error::InvalidInput(
"Invalid scaled variance in Harvey-Collier test (Python)".to_string(),
));
}
let harv = sum_resr / (n_test as f64).sqrt() / var_resr.sqrt();
let p_value = two_tailed_p_value(harv.abs(), df);
let alpha = 0.05;
let passed = p_value > alpha;
let (interpretation, guidance) = if passed {
(
format!(
"p-value = {:.4} is greater than {:.2}. Cannot reject H0. No significant evidence of functional misspecification.",
p_value, alpha
),
"The linear model specification appears appropriate."
)
} else {
(
format!(
"p-value = {:.4} is less than or equal to {:.2}. Reject H0. Significant evidence of functional misspecification.",
p_value, alpha
),
"Consider adding polynomial terms, transforming variables, or using non-linear modeling."
)
};
Ok(DiagnosticTestResult {
test_name: "Harvey-Collier Test for Linearity (Python variant)".to_string(),
statistic: harv,
p_value,
passed,
interpretation,
guidance: guidance.to_string(),
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_harvey_collier_insufficient_data() {
let y = vec![1.0, 2.0];
let x = vec![1.0, 2.0];
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R);
assert!(result.is_err());
if let Err(Error::InsufficientData { required, available }) = result {
assert_eq!(required, 4); assert_eq!(available, 2);
} else {
panic!("Expected InsufficientData error");
}
}
#[test]
fn test_harvey_collier_exact_minimum_data() {
let y = vec![1.001, 1.999, 3.002, 3.998];
let x = vec![1.0, 2.0, 3.0, 4.0];
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R);
assert!(result.is_ok());
let result = result.unwrap();
assert!(result.p_value > 0.0);
assert!(result.statistic >= 0.0);
}
#[test]
fn test_harvey_collier_linear_relationship() {
let y: Vec<f64> = (1..=30).map(|i| {
1.0 + 2.0 * i as f64 + 0.01 * ((i % 7) as f64 - 3.0) }).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R).unwrap();
assert!(result.p_value > 0.05);
assert!(result.passed);
}
#[test]
fn test_harvey_collier_quadratic_relationship() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + i as f64 + 0.1 * i as f64 * i as f64).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R).unwrap();
assert!(result.p_value < 0.1);
}
#[test]
fn test_harvey_collier_multiple_predictors() {
let y: Vec<f64> = (1..=30).map(|i| {
1.0 + 2.0 * i as f64 + 0.3 * (i as f64) * (i as f64) + 0.01 * ((i % 5) as f64 - 2.0)
}).collect();
let x1: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let x2: Vec<f64> = (1..=30).map(|i| (i as f64) * (i as f64)).collect();
let result = harvey_collier_test(&y, &[x1, x2], HarveyCollierMethod::R).unwrap();
assert!(result.p_value > 0.01);
}
#[test]
fn test_harvey_collier_multiple_predictors_nonlinear() {
let y: Vec<f64> = (1..=30).map(|i| {
1.0 + i as f64 + (i as f64 / 2.0) + 0.05 * i as f64 * i as f64
}).collect();
let x1: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let x2: Vec<f64> = (1..=30).map(|i| (i as f64) * (i as f64)).collect();
let result = harvey_collier_test(&y, &[x1, x2], HarveyCollierMethod::R).unwrap();
assert!(result.p_value < 0.15);
}
#[test]
fn test_harvey_collier_three_predictors() {
let y: Vec<f64> = (1..=20).map(|i| {
1.0 + 2.0 * i as f64 + 0.3 * (i as f64).powi(2) + 0.5 * ((i % 4) as f64)
}).collect();
let x1: Vec<f64> = (1..=20).map(|i| i as f64).collect();
let x2: Vec<f64> = (1..=20).map(|i| (i as f64) * (i as f64)).collect();
let x3: Vec<f64> = (1..=20).map(|i| (i % 4) as f64).collect();
let result = harvey_collier_test(&y, &[x1, x2, x3], HarveyCollierMethod::R);
assert!(result.is_ok());
}
#[test]
fn test_harvey_collier_small_dataset() {
let y = vec![1.0, 2.5, 4.0, 5.5, 7.0];
let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R);
assert!(result.is_ok());
let result = result.unwrap();
assert!(result.p_value > 0.0);
assert!(result.statistic.is_finite());
}
#[test]
fn test_harvey_collier_constant_y() {
let y = vec![5.0; 30];
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R);
assert!(result.is_err() || result.unwrap().p_value >= 0.0);
}
#[test]
fn test_harvey_collier_interpretation_and_guidance() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + 2.0 * i as f64).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R).unwrap();
assert!(result.interpretation.contains("p-value"));
assert!(result.guidance.contains("appropriate") || result.guidance.contains("polynomial"));
assert_eq!(result.test_name, "Harvey-Collier Test for Linearity");
}
#[test]
fn test_harvey_collier_perfect_collinearity() {
let y: Vec<f64> = (1..=10).map(|i| 1.0 + 2.0 * i as f64).collect();
let x1: Vec<f64> = (1..=10).map(|i| i as f64).collect();
let x2: Vec<f64> = (1..=10).map(|i| 2.0 * i as f64).collect();
let result = harvey_collier_test(&y, &[x1, x2], HarveyCollierMethod::R);
match result {
Ok(r) => {
assert!(r.p_value >= 0.0);
}
Err(Error::SingularMatrix) => {
}
Err(_) => {
panic!("Unexpected error type");
}
}
}
#[test]
fn test_harvey_collier_exponential_relationship() {
let y: Vec<f64> = (0..30).map(|i| (1.0 + 0.1 * i as f64).exp()).collect();
let x: Vec<f64> = (0..30).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R).unwrap();
assert!(result.p_value < 0.05);
assert!(!result.passed);
}
#[test]
fn test_harvey_collier_logarithmic_relationship() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + 2.0 * (i as f64).ln()).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R).unwrap();
assert!(result.p_value < 0.15);
}
#[test]
#[cfg(not(target_arch = "wasm32"))]
fn test_harvey_collier_noisy_linear() {
use rand::Rng;
use rand::SeedableRng;
let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(42);
let y: Vec<f64> = (1..=50).map(|i| {
1.0 + 2.0 * i as f64 + (rng.gen::<f64>() - 0.5) * 2.0
}).collect();
let x: Vec<f64> = (1..=50).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R).unwrap();
assert!(result.p_value > 0.0);
}
#[test]
fn test_harvey_collier_sin_relationship() {
let y: Vec<f64> = (1..=40).map(|i| 5.0 + 2.0 * (0.2 * i as f64).sin()).collect();
let x: Vec<f64> = (1..=40).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R).unwrap();
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
assert!(result.statistic.is_finite());
}
#[test]
fn test_harvey_collier_single_predictor_minimum_obs() {
let y = vec![1.0, 2.0, 4.0, 5.0];
let x = vec![1.0, 2.0, 3.0, 4.0];
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R);
assert!(result.is_ok());
}
#[test]
fn test_harvey_collier_result_structure() {
let y: Vec<f64> = (1..=20).map(|i| 1.0 + 2.0 * i as f64).collect();
let x: Vec<f64> = (1..=20).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R).unwrap();
assert!(!result.test_name.is_empty());
assert!(result.statistic >= 0.0);
assert!(result.p_value >= 0.0);
assert!(result.p_value <= 1.0);
assert!(result.interpretation.contains("p-value"));
assert!(!result.guidance.is_empty());
}
#[test]
fn test_harvey_collier_large_dataset() {
let y: Vec<f64> = (1..=200).map(|i| 1.0 + 2.0 * i as f64 + 0.001 * i as f64 * i as f64).collect();
let x: Vec<f64> = (1..=200).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R).unwrap();
assert!(result.p_value < 0.05);
}
#[test]
fn test_harvey_collier_negative_values() {
let y: Vec<f64> = (-10..=10).map(|i| 1.0 + 2.0 * i as f64 + 0.01 * ((i as f64) % 3.0 - 1.0)).collect();
let x: Vec<f64> = (-10..=10).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R).unwrap();
assert!(result.p_value > 0.01); }
#[test]
fn test_harvey_collier_step_function() {
let y: Vec<f64> = (1..=30).map(|i| if i < 15 { 5.0 } else { 10.0 }).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = harvey_collier_test(&y, &[x], HarveyCollierMethod::R).unwrap();
assert!(result.p_value >= 0.0 && result.p_value <= 1.0);
}
}