use super::helpers::fit_ols;
use super::types::DiagnosticTestResult;
use crate::distributions::normal_cdf;
use crate::error::{Error, Result};
use crate::linalg::{vec_mean, Matrix};
fn log_normal_cdf(z: f64) -> f64 {
const MIN_LOG: f64 = -745.0;
if z >= 0.0 {
normal_cdf(z).ln()
} else {
let p_neg = normal_cdf(-z);
let log_p = (-p_neg).ln_1p(); log_p.max(MIN_LOG) }
}
fn log_normal_cdf_complement(z: f64) -> f64 {
const MIN_LOG: f64 = -745.0;
log_normal_cdf(-z).max(MIN_LOG)
}
pub fn anderson_darling_test(y: &[f64], x_vars: &[Vec<f64>]) -> Result<DiagnosticTestResult> {
let n = y.len();
let k = x_vars.len();
let p = k + 1;
if n < 8 {
return Err(Error::InsufficientData {
required: 8,
available: n,
});
}
super::helpers::validate_regression_data(y, x_vars)?;
let mut x_data = vec![1.0; n * p];
for row in 0..n {
x_data[row * p] = 1.0; for (col, x_var) in x_vars.iter().enumerate() {
x_data[row * p + col + 1] = x_var[row];
}
}
let x_full = Matrix::new(n, p, x_data);
let beta = fit_ols(y, &x_full)?;
let predictions = x_full.mul_vec(&beta);
let mut residuals: Vec<f64> = y
.iter()
.zip(predictions.iter())
.map(|(&yi, &yi_hat)| yi - yi_hat)
.collect();
residuals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mean = vec_mean(&residuals);
let variance: f64 = residuals
.iter()
.map(|&r| {
let diff = r - mean;
diff * diff
})
.sum::<f64>()
/ (n as f64 - 1.0);
if variance <= 0.0 || !variance.is_finite() {
return Err(Error::InvalidInput(
"Invalid residual variance (all values identical)".to_string(),
));
}
let std_dev = variance.sqrt();
let z: Vec<f64> = residuals.iter().map(|&r| (r - mean) / std_dev).collect();
let logp1: Vec<f64> = z.iter().map(|&zi| log_normal_cdf(zi)).collect();
let logp2: Vec<f64> = z.iter().map(|&zi| log_normal_cdf_complement(zi)).collect();
let mut h_sum = 0.0;
for i in 0..n {
let weight = 2.0 * (i as f64 + 1.0) - 1.0;
h_sum += weight * (logp1[i] + logp2[n - 1 - i]);
}
let ad_stat = -(n as f64) - h_sum / (n as f64);
let n_f = n as f64;
let correction = 1.0 + 0.75 / n_f + 2.25 / (n_f * n_f);
let ad_modified = ad_stat * correction;
let p_value = anderson_darling_p_value(ad_modified);
let alpha = 0.05;
let passed = p_value > alpha;
let (interpretation, guidance) = if passed {
(
format!(
"p-value = {:.4} is greater than {:.2}. Cannot reject H0. No significant evidence that residuals deviate from normality.",
p_value, alpha
),
"The normality assumption appears to be met. Anderson-Darling test does not detect significant deviation from normal distribution, including in the tails."
)
} else {
(
format!(
"p-value = {:.4} is less than or equal to {:.2}. Reject H0. Significant evidence that residuals deviate from normality.",
p_value, alpha
),
"Consider transforming the dependent variable (e.g., log, Box-Cox transformation), using robust standard errors, or applying a different estimation method. The Anderson-Darling test is particularly sensitive to tail deviations."
)
};
Ok(DiagnosticTestResult {
test_name: "Anderson-Darling Test for Normality".to_string(),
statistic: ad_stat, p_value,
passed,
interpretation,
guidance: guidance.to_string(),
})
}
pub fn anderson_darling_test_raw(sample: &[f64]) -> Result<DiagnosticTestResult> {
let n = sample.len();
if n < 8 {
return Err(Error::InsufficientData {
required: 8,
available: n,
});
}
for (i, &val) in sample.iter().enumerate() {
if !val.is_finite() {
return Err(Error::InvalidInput(format!(
"Sample contains non-finite value at index {}: {}",
i, val
)));
}
}
let mut sorted = sample.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mean = vec_mean(&sorted);
let variance: f64 = sorted
.iter()
.map(|&x| {
let diff = x - mean;
diff * diff
})
.sum::<f64>()
/ (n as f64 - 1.0);
if variance <= 0.0 || !variance.is_finite() {
return Err(Error::InvalidInput(
"Invalid sample variance (all values identical)".to_string(),
));
}
let std_dev = variance.sqrt();
let z: Vec<f64> = sorted.iter().map(|&x| (x - mean) / std_dev).collect();
let logp1: Vec<f64> = z.iter().map(|&zi| log_normal_cdf(zi)).collect();
let logp2: Vec<f64> = z.iter().map(|&zi| log_normal_cdf_complement(zi)).collect();
let mut h_sum = 0.0;
for i in 0..n {
let weight = 2.0 * (i as f64 + 1.0) - 1.0;
h_sum += weight * (logp1[i] + logp2[n - 1 - i]);
}
let ad_stat = -(n as f64) - h_sum / (n as f64);
let n_f = n as f64;
let correction = 1.0 + 0.75 / n_f + 2.25 / (n_f * n_f);
let ad_modified = ad_stat * correction;
let p_value = anderson_darling_p_value(ad_modified);
let alpha = 0.05;
let passed = p_value > alpha;
let (interpretation, guidance) = if passed {
(
format!(
"p-value = {:.4} is greater than {:.2}. Cannot reject H0.",
p_value, alpha
),
"No significant evidence that the sample deviates from normality.",
)
} else {
(
format!(
"p-value = {:.4} is less than or equal to {:.2}. Reject H0.",
p_value, alpha
),
"Significant evidence that the sample deviates from normality.",
)
};
Ok(DiagnosticTestResult {
test_name: "Anderson-Darling Test for Normality".to_string(),
statistic: ad_stat, p_value,
passed,
interpretation,
guidance: guidance.to_string(),
})
}
fn anderson_darling_p_value(ad_modified: f64) -> f64 {
let aa = ad_modified;
let aa_sq = aa * aa;
let p_val = if aa < 0.2 {
1.0 - (-13.436 + 101.14 * aa - 223.73 * aa_sq).exp()
} else if aa < 0.34 {
1.0 - (-8.318 + 42.796 * aa - 59.938 * aa_sq).exp()
} else if aa < 0.6 {
(0.9177 - 4.279 * aa - 1.38 * aa_sq).exp()
} else if aa < 10.0 {
(1.2937 - 5.709 * aa + 0.0186 * aa_sq).exp()
} else {
3.7e-24
};
p_val.clamp(0.0, 1.0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_anderson_darling_normal_data() {
let normal_data = vec![
0.1, -0.5, 0.3, 1.2, -0.8, 0.4, -0.2, 0.9, -0.3, 0.6, -0.1, 0.7, -0.4, 0.2, 1.1, -0.6,
0.8, -0.9, 0.5, -0.7, 0.0, 0.3, -0.4, 0.6, -0.2,
];
let result = anderson_darling_test_raw(&normal_data).unwrap();
assert!(result.p_value > 0.01, "p-value = {}", result.p_value);
assert!(result.statistic >= 0.0);
}
#[test]
fn test_anderson_darling_uniform_data() {
let uniform_data: Vec<f64> = (0..50).map(|i| i as f64 / 50.0).collect();
let result = anderson_darling_test_raw(&uniform_data).unwrap();
assert!(result.p_value < 0.25, "p-value = {}", result.p_value);
}
#[test]
fn test_anderson_darling_small_sample() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let result = anderson_darling_test_raw(&data);
assert!(result.is_ok());
}
#[test]
fn test_anderson_darling_too_small() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
let result = anderson_darling_test_raw(&data);
assert!(result.is_err());
}
#[test]
fn test_anderson_darling_constant_data() {
let data = vec![5.0; 100];
let result = anderson_darling_test_raw(&data);
assert!(result.is_err());
}
#[test]
fn test_anderson_darling_with_regression() {
let y = vec![
10.5, 12.3, 11.8, 14.2, 13.5, 15.1, 14.8, 16.3, 15.9, 17.2, 16.8, 18.5, 18.1, 19.3,
19.0, 20.5, 20.1, 21.8, 21.3, 22.5,
];
let x1 = vec![
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0,
];
let result = anderson_darling_test(&y, &[x1]);
assert!(result.is_ok());
let r = result.unwrap();
assert!(r.statistic >= 0.0);
assert!(r.p_value >= 0.0 && r.p_value <= 1.0);
}
}