use crate::error::{EvalError, EvalResult};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
pub enum TargetDistribution {
#[default]
Normal,
LogNormal,
Exponential,
Uniform,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FittedParameters {
Normal {
mean: f64,
std_dev: f64,
},
LogNormal {
mu: f64,
sigma: f64,
},
Exponential {
lambda: f64,
},
Uniform {
min: f64,
max: f64,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AndersonDarlingAnalysis {
pub sample_size: usize,
pub target_distribution: TargetDistribution,
pub statistic: f64,
pub critical_values: CriticalValues,
pub p_value: f64,
pub significance_level: f64,
pub passes: bool,
pub fitted_params: FittedParameters,
pub issues: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CriticalValues {
pub cv_15: f64,
pub cv_10: f64,
pub cv_05: f64,
pub cv_025: f64,
pub cv_01: f64,
}
impl CriticalValues {
pub fn normal() -> Self {
Self {
cv_15: 0.576,
cv_10: 0.656,
cv_05: 0.787,
cv_025: 0.918,
cv_01: 1.092,
}
}
pub fn exponential() -> Self {
Self {
cv_15: 0.922,
cv_10: 1.078,
cv_05: 1.341,
cv_025: 1.606,
cv_01: 1.957,
}
}
}
pub struct AndersonDarlingAnalyzer {
target_distribution: TargetDistribution,
significance_level: f64,
}
impl AndersonDarlingAnalyzer {
pub fn new() -> Self {
Self {
target_distribution: TargetDistribution::Normal,
significance_level: 0.05,
}
}
pub fn with_target_distribution(mut self, dist: TargetDistribution) -> Self {
self.target_distribution = dist;
self
}
pub fn with_significance_level(mut self, level: f64) -> Self {
self.significance_level = level;
self
}
pub fn analyze(&self, values: &[f64]) -> EvalResult<AndersonDarlingAnalysis> {
let n = values.len();
if n < 8 {
return Err(EvalError::InsufficientData {
required: 8,
actual: n,
});
}
let valid_values: Vec<f64> = values.iter().filter(|&&v| v.is_finite()).copied().collect();
if valid_values.len() < 8 {
return Err(EvalError::InsufficientData {
required: 8,
actual: valid_values.len(),
});
}
let mut issues = Vec::new();
let (statistic, fitted_params) = match self.target_distribution {
TargetDistribution::Normal => self.test_normal(&valid_values),
TargetDistribution::LogNormal => self.test_lognormal(&valid_values, &mut issues)?,
TargetDistribution::Exponential => self.test_exponential(&valid_values, &mut issues)?,
TargetDistribution::Uniform => self.test_uniform(&valid_values),
};
let critical_values = match self.target_distribution {
TargetDistribution::Exponential => CriticalValues::exponential(),
_ => CriticalValues::normal(),
};
let p_value = self.approximate_p_value(statistic, self.target_distribution);
let critical_threshold = match self.significance_level {
s if s >= 0.15 => critical_values.cv_15,
s if s >= 0.10 => critical_values.cv_10,
s if s >= 0.05 => critical_values.cv_05,
s if s >= 0.025 => critical_values.cv_025,
_ => critical_values.cv_01,
};
let passes = statistic <= critical_threshold;
if !passes {
issues.push(format!(
"A² = {:.4} exceeds critical value {:.4} at α = {:.2}",
statistic, critical_threshold, self.significance_level
));
}
Ok(AndersonDarlingAnalysis {
sample_size: valid_values.len(),
target_distribution: self.target_distribution,
statistic,
critical_values,
p_value,
significance_level: self.significance_level,
passes,
fitted_params,
issues,
})
}
fn test_normal(&self, values: &[f64]) -> (f64, FittedParameters) {
let n = values.len();
let n_f = n as f64;
let mean = values.iter().sum::<f64>() / n_f;
let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n_f;
let std_dev = variance.sqrt();
let mut z: Vec<f64> = values
.iter()
.map(|&x| (x - mean) / std_dev.max(1e-10))
.collect();
z.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let a2 = self.compute_a2_statistic(&z, Self::normal_cdf);
let a2_corrected = a2 * (1.0 + 0.75 / n_f + 2.25 / (n_f * n_f));
(a2_corrected, FittedParameters::Normal { mean, std_dev })
}
fn test_lognormal(
&self,
values: &[f64],
issues: &mut Vec<String>,
) -> EvalResult<(f64, FittedParameters)> {
let positive_values: Vec<f64> = values.iter().filter(|&&v| v > 0.0).copied().collect();
if positive_values.len() < 8 {
return Err(EvalError::InvalidParameter(
"Log-normal test requires at least 8 positive values".to_string(),
));
}
let skipped = values.len() - positive_values.len();
if skipped > 0 {
issues.push(format!(
"Skipped {skipped} non-positive values for log-normal test"
));
}
let log_values: Vec<f64> = positive_values.iter().map(|&x| x.ln()).collect();
let (a2, normal_params) = self.test_normal(&log_values);
let (mu, sigma) = match normal_params {
FittedParameters::Normal { mean, std_dev } => (mean, std_dev),
_ => unreachable!(),
};
Ok((a2, FittedParameters::LogNormal { mu, sigma }))
}
fn test_exponential(
&self,
values: &[f64],
issues: &mut Vec<String>,
) -> EvalResult<(f64, FittedParameters)> {
let positive_values: Vec<f64> = values.iter().filter(|&&v| v > 0.0).copied().collect();
if positive_values.len() < 8 {
return Err(EvalError::InvalidParameter(
"Exponential test requires at least 8 positive values".to_string(),
));
}
let skipped = values.len() - positive_values.len();
if skipped > 0 {
issues.push(format!(
"Skipped {skipped} non-positive values for exponential test"
));
}
let n = positive_values.len();
let n_f = n as f64;
let mean = positive_values.iter().sum::<f64>() / n_f;
let lambda = 1.0 / mean;
let mut sorted: Vec<f64> = positive_values;
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let u: Vec<f64> = sorted.iter().map(|&x| 1.0 - (-lambda * x).exp()).collect();
let a2 = self.compute_a2_uniform(&u);
let a2_corrected = a2 * (1.0 + 0.6 / n_f);
Ok((a2_corrected, FittedParameters::Exponential { lambda }))
}
fn test_uniform(&self, values: &[f64]) -> (f64, FittedParameters) {
let min = values.iter().cloned().fold(f64::INFINITY, f64::min);
let max = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let mut sorted: Vec<f64> = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let range = (max - min).max(1e-10);
let u: Vec<f64> = sorted.iter().map(|&x| (x - min) / range).collect();
let a2 = self.compute_a2_uniform(&u);
(a2, FittedParameters::Uniform { min, max })
}
fn compute_a2_statistic<F>(&self, z: &[f64], cdf: F) -> f64
where
F: Fn(f64) -> f64,
{
let n = z.len();
let n_f = n as f64;
let mut sum = 0.0;
for (i, &zi) in z.iter().enumerate() {
let fi = cdf(zi).clamp(1e-10, 1.0 - 1e-10);
let fn_minus_i = cdf(z[n - 1 - i]).clamp(1e-10, 1.0 - 1e-10);
let term = (2.0 * (i as f64) + 1.0) * (fi.ln() + (1.0 - fn_minus_i).ln());
sum += term;
}
-n_f - sum / n_f
}
fn compute_a2_uniform(&self, u: &[f64]) -> f64 {
let n = u.len();
let n_f = n as f64;
let mut sum = 0.0;
for (i, &ui) in u.iter().enumerate() {
let ui = ui.clamp(1e-10, 1.0 - 1e-10);
let u_n_minus_i = u[n - 1 - i].clamp(1e-10, 1.0 - 1e-10);
let term = (2.0 * (i as f64) + 1.0) * (ui.ln() + (1.0 - u_n_minus_i).ln());
sum += term;
}
-n_f - sum / n_f
}
fn normal_cdf(x: f64) -> f64 {
0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2))
}
fn approximate_p_value(&self, a2: f64, dist: TargetDistribution) -> f64 {
match dist {
TargetDistribution::Normal => {
if a2 < 0.2 {
1.0 - (-13.436 + 101.14 * a2 - 223.73 * a2.powi(2)).exp()
} else if a2 < 0.34 {
1.0 - (-8.318 + 42.796 * a2 - 59.938 * a2.powi(2)).exp()
} else if a2 < 0.6 {
(0.9177 - 4.279 * a2 - 1.38 * a2.powi(2)).exp()
} else if a2 < 13.0 {
(1.2937 - 5.709 * a2 + 0.0186 * a2.powi(2)).exp()
} else {
0.0
}
}
TargetDistribution::Exponential => {
if a2 < 0.26 {
1.0 - (-12.0 + 70.0 * a2 - 100.0 * a2.powi(2)).exp()
} else if a2 < 0.51 {
1.0 - (-6.0 + 24.0 * a2 - 24.0 * a2.powi(2)).exp()
} else if a2 < 0.95 {
(0.7 - 3.5 * a2 + 0.6 * a2.powi(2)).exp()
} else if a2 < 10.0 {
(0.9 - 4.0 * a2 + 0.01 * a2.powi(2)).exp()
} else {
0.0
}
}
_ => {
let cv = CriticalValues::normal();
if a2 <= cv.cv_15 {
0.25 } else if a2 <= cv.cv_10 {
0.15
} else if a2 <= cv.cv_05 {
0.10
} else if a2 <= cv.cv_025 {
0.05
} else if a2 <= cv.cv_01 {
0.025
} else {
(1.2937 - 5.709 * a2 + 0.0186 * a2.powi(2)).exp().max(0.0)
}
}
}
}
}
impl Default for AndersonDarlingAnalyzer {
fn default() -> Self {
Self::new()
}
}
fn erf(x: f64) -> f64 {
let a1 = 0.254829592;
let a2 = -0.284496736;
let a3 = 1.421413741;
let a4 = -1.453152027;
let a5 = 1.061405429;
let p = 0.3275911;
let sign = if x < 0.0 { -1.0 } else { 1.0 };
let x = x.abs();
let t = 1.0 / (1.0 + p * x);
let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
sign * y
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use rand::SeedableRng;
use rand_chacha::ChaCha8Rng;
use rand_distr::{Distribution, Exp, LogNormal, Normal, Uniform};
#[test]
fn test_normal_sample() {
let mut rng = ChaCha8Rng::seed_from_u64(42);
let normal = Normal::new(0.0, 1.0).unwrap();
let values: Vec<f64> = (0..500).map(|_| normal.sample(&mut rng)).collect();
let analyzer = AndersonDarlingAnalyzer::new()
.with_target_distribution(TargetDistribution::Normal)
.with_significance_level(0.05);
let result = analyzer.analyze(&values).unwrap();
assert!(result.passes, "Normal sample should pass normality test");
assert!(result.p_value > 0.05);
}
#[test]
fn test_non_normal_sample() {
let mut rng = ChaCha8Rng::seed_from_u64(42);
let exp = Exp::new(1.0).unwrap();
let values: Vec<f64> = (0..500).map(|_| exp.sample(&mut rng)).collect();
let analyzer = AndersonDarlingAnalyzer::new()
.with_target_distribution(TargetDistribution::Normal)
.with_significance_level(0.05);
let result = analyzer.analyze(&values).unwrap();
assert!(
!result.passes,
"Exponential sample should fail normality test"
);
}
#[test]
fn test_lognormal_sample() {
let mut rng = ChaCha8Rng::seed_from_u64(42);
let lognormal = LogNormal::new(3.0, 0.5).unwrap();
let values: Vec<f64> = (0..500).map(|_| lognormal.sample(&mut rng)).collect();
let analyzer = AndersonDarlingAnalyzer::new()
.with_target_distribution(TargetDistribution::LogNormal)
.with_significance_level(0.05);
let result = analyzer.analyze(&values).unwrap();
assert!(
result.passes,
"Log-normal sample should pass log-normality test"
);
if let FittedParameters::LogNormal { mu, sigma } = result.fitted_params {
assert!((mu - 3.0).abs() < 0.5, "Mu should be close to 3.0");
assert!((sigma - 0.5).abs() < 0.2, "Sigma should be close to 0.5");
} else {
panic!("Expected LogNormal parameters");
}
}
#[test]
fn test_exponential_sample() {
let mut rng = ChaCha8Rng::seed_from_u64(42);
let exp = Exp::new(2.0).unwrap();
let values: Vec<f64> = (0..500).map(|_| exp.sample(&mut rng)).collect();
let analyzer = AndersonDarlingAnalyzer::new()
.with_target_distribution(TargetDistribution::Exponential)
.with_significance_level(0.05);
let result = analyzer.analyze(&values).unwrap();
assert!(
result.passes,
"Exponential sample should pass exponential test"
);
if let FittedParameters::Exponential { lambda } = result.fitted_params {
assert!(
(lambda - 2.0).abs() < 0.5,
"Lambda should be close to 2.0, got {}",
lambda
);
} else {
panic!("Expected Exponential parameters");
}
}
#[test]
fn test_uniform_sample() {
let mut rng = ChaCha8Rng::seed_from_u64(42);
let uniform = Uniform::new(0.0, 10.0).unwrap();
let values: Vec<f64> = (0..500).map(|_| uniform.sample(&mut rng)).collect();
let analyzer = AndersonDarlingAnalyzer::new()
.with_target_distribution(TargetDistribution::Uniform)
.with_significance_level(0.05);
let result = analyzer.analyze(&values).unwrap();
assert!(result.sample_size == 500);
}
#[test]
fn test_insufficient_data() {
let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let analyzer = AndersonDarlingAnalyzer::new();
let result = analyzer.analyze(&values);
assert!(matches!(
result,
Err(EvalError::InsufficientData {
required: 8,
actual: 5
})
));
}
#[test]
fn test_critical_values() {
let cv = CriticalValues::normal();
assert!(cv.cv_15 < cv.cv_10);
assert!(cv.cv_10 < cv.cv_05);
assert!(cv.cv_05 < cv.cv_025);
assert!(cv.cv_025 < cv.cv_01);
}
}