use super::helpers::{compute_rss, f_p_value, fit_ols};
use super::types::DiagnosticTestResult;
use crate::error::{Error, Result};
use crate::linalg::Matrix;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ResetType {
Fitted,
Regressor,
PrincipalComponent,
}
impl ResetType {
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Fitted => "fitted",
Self::Regressor => "regressor",
Self::PrincipalComponent => "princomp",
}
}
}
#[allow(clippy::similar_names)]
pub fn reset_test(
y: &[f64],
x_vars: &[Vec<f64>],
powers: &[usize],
type_: ResetType,
) -> Result<DiagnosticTestResult> {
let n = y.len();
let k = x_vars.len(); let p = k + 1;
if powers.is_empty() {
return Err(Error::InvalidInput(
"Powers array cannot be empty".to_string(),
));
}
for &power in powers {
if power <= 1 {
return Err(Error::InvalidInput(format!(
"All powers must be greater than 1, got {power}"
)));
}
}
super::helpers::validate_regression_data(y, x_vars)?;
let mut x_data = vec![0.0; n * p];
for row in 0..n {
x_data[row * p] = 1.0; for (col, x_var) in x_vars.iter().enumerate() {
x_data[row * p + col + 1] = x_var[row];
}
}
let x = Matrix::new(n, p, x_data);
let beta_restricted = fit_ols(y, &x)?;
let rss_restricted = compute_rss(y, &x, &beta_restricted)?;
let z = generate_z_matrix(y, x_vars, &x, powers, type_, &beta_restricted)?;
let q = z.cols;
if n <= p + q {
return Err(Error::InsufficientData {
required: p + q + 1,
available: n,
});
}
let xz_data = augment_x_with_z(&x, &z);
let xz = Matrix::new(n, p + q, xz_data);
let beta_unrestricted = fit_ols(y, &xz)?;
let rss_unrestricted = compute_rss(y, &xz, &beta_unrestricted)?;
let df1 = q as f64;
let df2 = (n - p - q) as f64;
let f_stat = (df2 / df1) * ((rss_restricted - rss_unrestricted) / rss_unrestricted);
let f_stat = if !f_stat.is_finite() || f_stat < 0.0 {
0.0
} else {
f_stat
};
let p_value = f_p_value(f_stat, df1, df2);
let alpha = 0.05;
let passed = p_value > alpha;
let type_str = type_.as_str();
let powers_str = powers
.iter()
.map(|p| p.to_string())
.collect::<Vec<_>>()
.join(", ");
let (interpretation, guidance) = if passed {
(
format!(
"p-value = {:.4} is greater than {:.2}. Cannot reject H0. No significant evidence of model misspecification.",
p_value, alpha
),
"The current model specification appears adequate. Consider keeping the current functional form.",
)
} else {
(
format!(
"p-value = {:.4} is less than or equal to {:.2}. Reject H0. Significant evidence of model misspecification.",
p_value, alpha
),
"Consider adding polynomial terms, transforming variables, including omitted predictors, or using non-linear modeling.",
)
};
Ok(DiagnosticTestResult {
test_name: format!(
"RESET Test (type={}, powers=[{}])",
type_str, powers_str
),
statistic: f_stat,
p_value,
passed,
interpretation,
guidance: guidance.to_string(),
})
}
#[allow(clippy::needless_range_loop)]
fn generate_z_matrix(
y: &[f64],
x_vars: &[Vec<f64>],
x: &Matrix,
powers: &[usize],
type_: ResetType,
beta: &[f64],
) -> Result<Matrix> {
let n = y.len();
let k = x_vars.len();
match type_ {
ResetType::Fitted => {
let fitted = x.mul_vec(beta);
let mut z_data = Vec::with_capacity(n * powers.len());
for &power in powers {
for &f in &fitted {
z_data.push(f.powi(power as i32));
}
}
let mut z_transposed = vec![0.0; n * powers.len()];
for row in 0..n {
for (col, &power) in powers.iter().enumerate() {
z_transposed[row * powers.len() + col] = fitted[row].powi(power as i32);
}
}
Ok(Matrix::new(n, powers.len(), z_transposed))
}
ResetType::Regressor => {
let q = powers.len() * k;
let mut z_data = vec![0.0; n * q];
for (var_idx, x_var) in x_vars.iter().enumerate() {
for (power_idx, &power) in powers.iter().enumerate() {
let col_idx = var_idx * powers.len() + power_idx;
for (row_idx, &x_val) in x_var.iter().enumerate() {
z_data[row_idx * q + col_idx] = x_val.powi(power as i32);
}
}
}
Ok(Matrix::new(n, q, z_data))
}
ResetType::PrincipalComponent => {
let mut x_reg_data = vec![0.0; n * k];
for row in 0..n {
for (col, x_var) in x_vars.iter().enumerate() {
x_reg_data[row * k + col] = x_var[row];
}
}
let x_reg = Matrix::new(n, k, x_reg_data.clone());
let mut x_centered_data = x_reg_data;
for col in 0..k {
let col_mean = (0..n)
.map(|row| x_reg.get(row, col))
.sum::<f64>()
/ n as f64;
for row in 0..n {
x_centered_data[row * k + col] -= col_mean;
}
}
let x_centered = Matrix::new(n, k, x_centered_data);
let xt_x = x_centered.transpose().matmul(&x_centered);
let pc1 = first_principal_component(&xt_x, k)?;
let mut pc1_scores = vec![0.0; n];
for row in 0..n {
let mut sum = 0.0;
for col in 0..k {
sum += x_reg.get(row, col) * pc1[col];
}
pc1_scores[row] = sum;
}
let mut z_data = vec![0.0; n * powers.len()];
for (power_idx, &power) in powers.iter().enumerate() {
for row in 0..n {
z_data[row * powers.len() + power_idx] = pc1_scores[row].powi(power as i32);
}
}
Ok(Matrix::new(n, powers.len(), z_data))
}
}
}
#[allow(clippy::needless_range_loop)]
fn first_principal_component(matrix: &Matrix, k: usize) -> Result<Vec<f64>> {
let mut v = vec![1.0 / k as f64; k]; let max_iter = 1000;
let tolerance = 1e-10;
for _ in 0..max_iter {
let v_old = v.clone();
v = matrix.mul_vec(&v);
let norm: f64 = v.iter().map(|&x| x * x).sum::<f64>().sqrt();
if norm < 1e-14 {
return Err(Error::SingularMatrix);
}
for val in &mut v {
*val /= norm;
}
let diff: f64 = v
.iter()
.zip(&v_old)
.map(|(a, b)| (a - b).abs())
.sum::<f64>();
if diff < tolerance {
break;
}
}
Ok(v)
}
fn augment_x_with_z(x: &Matrix, z: &Matrix) -> Vec<f64> {
let n = x.rows;
let p = x.cols;
let q = z.cols;
let mut xz_data = vec![0.0; n * (p + q)];
for row in 0..n {
for col in 0..p {
xz_data[row * (p + q) + col] = x.get(row, col);
}
for col in 0..q {
xz_data[row * (p + q) + p + col] = z.get(row, col);
}
}
xz_data
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_reset_test_fitted() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + 2.0 * i as f64).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = reset_test(&y, &[x], &[2, 3], ResetType::Fitted).unwrap();
assert!(result.p_value > 0.01);
}
#[test]
fn test_reset_test_quadratic() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + i as f64 + 0.1 * i as f64 * i as f64).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = reset_test(&y, &[x], &[2, 3], ResetType::Fitted).unwrap();
assert!(result.p_value < 0.05);
}
#[test]
fn test_reset_invalid_powers() {
let y = vec![1.0, 2.0, 3.0];
let x = vec![1.0, 2.0, 3.0];
let result = reset_test(&y, &[x.clone()], &[1], ResetType::Fitted);
assert!(result.is_err());
let result = reset_test(&y, &[x], &[], ResetType::Fitted);
assert!(result.is_err());
}
#[test]
fn test_first_principal_component() {
let m = Matrix::new(2, 2, vec![1.0, 0.0, 0.0, 1.0]);
let pc1 = first_principal_component(&m, 2).unwrap();
let norm: f64 = pc1.iter().map(|&x| x * x).sum::<f64>().sqrt();
assert!((norm - 1.0).abs() < 0.01);
}
#[test]
fn test_reset_type_as_str() {
assert_eq!(ResetType::Fitted.as_str(), "fitted");
assert_eq!(ResetType::Regressor.as_str(), "regressor");
assert_eq!(ResetType::PrincipalComponent.as_str(), "princomp");
}
#[test]
fn test_reset_insufficient_data_for_additional_terms() {
let y = vec![1.0, 2.0, 3.0];
let x1 = vec![1.0, 2.0, 3.0];
let x2 = vec![2.0, 4.0, 6.0];
let result = reset_test(&y, &[x1, x2], &[2, 3, 4], ResetType::Fitted);
assert!(result.is_err());
if let Err(Error::InsufficientData { required, available }) = result {
assert_eq!(required, 7); assert_eq!(available, 3);
} else {
panic!("Expected InsufficientData error");
}
}
#[test]
fn test_reset_regressor_type() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + 2.0 * i as f64).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = reset_test(&y, &[x.clone()], &[2, 3], ResetType::Regressor).unwrap();
assert!(result.p_value > 0.01);
assert!(result.test_name.contains("regressor"));
}
#[test]
fn test_reset_regressor_type_quadratic() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + i as f64 + 0.1 * i as f64 * i as f64).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = reset_test(&y, &[x], &[2, 3], ResetType::Regressor).unwrap();
assert!(result.p_value < 0.05);
}
#[test]
fn test_reset_regressor_multiple_predictors() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + 2.0 * i as f64 + 0.5 * (i as f64 / 2.0)).collect();
let x1: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let x2: Vec<f64> = (1..=30).map(|i| i as f64 / 2.0).collect();
let result = reset_test(&y, &[x1, x2], &[2], ResetType::Regressor).unwrap();
assert!(result.p_value > 0.01);
}
#[test]
fn test_reset_principal_component_type() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + 2.0 * i as f64).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = reset_test(&y, &[x.clone()], &[2, 3], ResetType::PrincipalComponent).unwrap();
assert!(result.p_value > 0.01);
assert!(result.test_name.contains("princomp"));
}
#[test]
fn test_reset_principal_component_quadratic() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + i as f64 + 0.1 * i as f64 * i as f64).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = reset_test(&y, &[x], &[2, 3], ResetType::PrincipalComponent).unwrap();
assert!(result.p_value < 0.05);
}
#[test]
fn test_reset_principal_component_multiple_predictors() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + 2.0 * i as f64 + 0.5 * (i as f64 / 2.0)).collect();
let x1: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let x2: Vec<f64> = (1..=30).map(|i| i as f64 / 2.0).collect();
let result = reset_test(&y, &[x1, x2], &[2], ResetType::PrincipalComponent).unwrap();
assert!(result.p_value > 0.01);
}
#[test]
fn test_reset_single_power() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + 2.0 * i as f64).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result_fitted = reset_test(&y, &[x.clone()], &[4], ResetType::Fitted).unwrap();
assert!(result_fitted.p_value > 0.01);
let result_regressor = reset_test(&y, &[x.clone()], &[4], ResetType::Regressor).unwrap();
assert!(result_regressor.p_value > 0.01);
let result_pc = reset_test(&y, &[x], &[4], ResetType::PrincipalComponent).unwrap();
assert!(result_pc.p_value > 0.01);
}
#[test]
fn test_reset_higher_powers() {
let y: Vec<f64> = (1..=50).map(|i| 1.0 + 2.0 * i as f64 + 0.01 * i as f64 * i as f64).collect();
let x: Vec<f64> = (1..=50).map(|i| i as f64).collect();
let result = reset_test(&y, &[x], &[3, 4, 5], ResetType::Fitted).unwrap();
assert!(result.p_value < 0.1);
}
#[test]
fn test_first_principal_component_singular_matrix() {
let m = Matrix::new(2, 2, vec![1e-20, 0.0, 0.0, 1e-20]);
let result = first_principal_component(&m, 2);
assert!(result.is_err());
if let Err(Error::SingularMatrix) = result {
} else {
panic!("Expected SingularMatrix error");
}
}
#[test]
fn test_reset_all_three_types_consistent() {
let y: Vec<f64> = (1..=40).map(|i| 1.0 + 2.0 * i as f64).collect();
let x: Vec<f64> = (1..=40).map(|i| i as f64).collect();
let result_fitted = reset_test(&y, &[x.clone()], &[2, 3], ResetType::Fitted).unwrap();
let result_regressor = reset_test(&y, &[x.clone()], &[2, 3], ResetType::Regressor).unwrap();
let result_pc = reset_test(&y, &[x], &[2, 3], ResetType::PrincipalComponent).unwrap();
assert!(result_fitted.p_value > 0.05);
assert!(result_regressor.p_value > 0.05);
assert!(result_pc.p_value > 0.05);
}
#[test]
fn test_reset_interpretation_and_guidance() {
let y: Vec<f64> = (1..=30).map(|i| 1.0 + 2.0 * i as f64).collect();
let x: Vec<f64> = (1..=30).map(|i| i as f64).collect();
let result = reset_test(&y, &[x], &[2, 3], ResetType::Fitted).unwrap();
assert!(result.interpretation.contains("p-value"));
assert!(result.guidance.contains("adequate") || result.guidance.contains("polynomial"));
assert!(result.test_name.contains("fitted"));
assert!(result.test_name.contains("2"));
assert!(result.test_name.contains("3"));
}
#[test]
fn test_reset_boundary_power_values() {
let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let result = reset_test(&y, &[x.clone()], &[0], ResetType::Fitted);
assert!(result.is_err());
let result = reset_test(&y, &[x.clone()], &[1], ResetType::Fitted);
assert!(result.is_err());
let result = reset_test(&y, &[x], &[2], ResetType::Fitted);
assert!(result.is_ok());
}
}