1use crate::error::{EvalError, EvalResult};
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
11pub enum TargetDistribution {
12 #[default]
14 Normal,
15 LogNormal,
17 Exponential,
19 Uniform,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25pub enum FittedParameters {
26 Normal {
28 mean: f64,
30 std_dev: f64,
32 },
33 LogNormal {
35 mu: f64,
37 sigma: f64,
39 },
40 Exponential {
42 lambda: f64,
44 },
45 Uniform {
47 min: f64,
49 max: f64,
51 },
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct AndersonDarlingAnalysis {
57 pub sample_size: usize,
59 pub target_distribution: TargetDistribution,
61 pub statistic: f64,
63 pub critical_values: CriticalValues,
65 pub p_value: f64,
67 pub significance_level: f64,
69 pub passes: bool,
71 pub fitted_params: FittedParameters,
73 pub issues: Vec<String>,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct CriticalValues {
80 pub cv_15: f64,
82 pub cv_10: f64,
84 pub cv_05: f64,
86 pub cv_025: f64,
88 pub cv_01: f64,
90}
91
92impl CriticalValues {
93 pub fn normal() -> Self {
95 Self {
97 cv_15: 0.576,
98 cv_10: 0.656,
99 cv_05: 0.787,
100 cv_025: 0.918,
101 cv_01: 1.092,
102 }
103 }
104
105 pub fn exponential() -> Self {
107 Self {
108 cv_15: 0.922,
109 cv_10: 1.078,
110 cv_05: 1.341,
111 cv_025: 1.606,
112 cv_01: 1.957,
113 }
114 }
115}
116
117pub struct AndersonDarlingAnalyzer {
119 target_distribution: TargetDistribution,
121 significance_level: f64,
123}
124
125impl AndersonDarlingAnalyzer {
126 pub fn new() -> Self {
128 Self {
129 target_distribution: TargetDistribution::Normal,
130 significance_level: 0.05,
131 }
132 }
133
134 pub fn with_target_distribution(mut self, dist: TargetDistribution) -> Self {
136 self.target_distribution = dist;
137 self
138 }
139
140 pub fn with_significance_level(mut self, level: f64) -> Self {
142 self.significance_level = level;
143 self
144 }
145
146 pub fn analyze(&self, values: &[f64]) -> EvalResult<AndersonDarlingAnalysis> {
148 let n = values.len();
149 if n < 8 {
150 return Err(EvalError::InsufficientData {
151 required: 8,
152 actual: n,
153 });
154 }
155
156 let valid_values: Vec<f64> = values.iter().filter(|&&v| v.is_finite()).copied().collect();
158
159 if valid_values.len() < 8 {
160 return Err(EvalError::InsufficientData {
161 required: 8,
162 actual: valid_values.len(),
163 });
164 }
165
166 let mut issues = Vec::new();
167
168 let (statistic, fitted_params) = match self.target_distribution {
170 TargetDistribution::Normal => self.test_normal(&valid_values),
171 TargetDistribution::LogNormal => self.test_lognormal(&valid_values, &mut issues)?,
172 TargetDistribution::Exponential => self.test_exponential(&valid_values, &mut issues)?,
173 TargetDistribution::Uniform => self.test_uniform(&valid_values),
174 };
175
176 let critical_values = match self.target_distribution {
178 TargetDistribution::Exponential => CriticalValues::exponential(),
179 _ => CriticalValues::normal(),
180 };
181
182 let p_value = self.approximate_p_value(statistic, self.target_distribution);
184
185 let critical_threshold = match self.significance_level {
187 s if s >= 0.15 => critical_values.cv_15,
188 s if s >= 0.10 => critical_values.cv_10,
189 s if s >= 0.05 => critical_values.cv_05,
190 s if s >= 0.025 => critical_values.cv_025,
191 _ => critical_values.cv_01,
192 };
193 let passes = statistic <= critical_threshold;
194
195 if !passes {
196 issues.push(format!(
197 "A² = {:.4} exceeds critical value {:.4} at α = {:.2}",
198 statistic, critical_threshold, self.significance_level
199 ));
200 }
201
202 Ok(AndersonDarlingAnalysis {
203 sample_size: valid_values.len(),
204 target_distribution: self.target_distribution,
205 statistic,
206 critical_values,
207 p_value,
208 significance_level: self.significance_level,
209 passes,
210 fitted_params,
211 issues,
212 })
213 }
214
215 fn test_normal(&self, values: &[f64]) -> (f64, FittedParameters) {
217 let n = values.len();
218 let n_f = n as f64;
219
220 let mean = values.iter().sum::<f64>() / n_f;
222 let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n_f;
223 let std_dev = variance.sqrt();
224
225 let mut z: Vec<f64> = values
227 .iter()
228 .map(|&x| (x - mean) / std_dev.max(1e-10))
229 .collect();
230 z.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
231
232 let a2 = self.compute_a2_statistic(&z, Self::normal_cdf);
234
235 let a2_corrected = a2 * (1.0 + 0.75 / n_f + 2.25 / (n_f * n_f));
237
238 (a2_corrected, FittedParameters::Normal { mean, std_dev })
239 }
240
241 fn test_lognormal(
243 &self,
244 values: &[f64],
245 issues: &mut Vec<String>,
246 ) -> EvalResult<(f64, FittedParameters)> {
247 let positive_values: Vec<f64> = values.iter().filter(|&&v| v > 0.0).copied().collect();
249
250 if positive_values.len() < 8 {
251 return Err(EvalError::InvalidParameter(
252 "Log-normal test requires at least 8 positive values".to_string(),
253 ));
254 }
255
256 let skipped = values.len() - positive_values.len();
257 if skipped > 0 {
258 issues.push(format!(
259 "Skipped {} non-positive values for log-normal test",
260 skipped
261 ));
262 }
263
264 let log_values: Vec<f64> = positive_values.iter().map(|&x| x.ln()).collect();
266
267 let (a2, normal_params) = self.test_normal(&log_values);
269
270 let (mu, sigma) = match normal_params {
272 FittedParameters::Normal { mean, std_dev } => (mean, std_dev),
273 _ => unreachable!(),
274 };
275
276 Ok((a2, FittedParameters::LogNormal { mu, sigma }))
277 }
278
279 fn test_exponential(
281 &self,
282 values: &[f64],
283 issues: &mut Vec<String>,
284 ) -> EvalResult<(f64, FittedParameters)> {
285 let positive_values: Vec<f64> = values.iter().filter(|&&v| v > 0.0).copied().collect();
287
288 if positive_values.len() < 8 {
289 return Err(EvalError::InvalidParameter(
290 "Exponential test requires at least 8 positive values".to_string(),
291 ));
292 }
293
294 let skipped = values.len() - positive_values.len();
295 if skipped > 0 {
296 issues.push(format!(
297 "Skipped {} non-positive values for exponential test",
298 skipped
299 ));
300 }
301
302 let n = positive_values.len();
303 let n_f = n as f64;
304
305 let mean = positive_values.iter().sum::<f64>() / n_f;
307 let lambda = 1.0 / mean;
308
309 let mut sorted: Vec<f64> = positive_values;
311 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
312
313 let u: Vec<f64> = sorted.iter().map(|&x| 1.0 - (-lambda * x).exp()).collect();
315
316 let a2 = self.compute_a2_uniform(&u);
318
319 let a2_corrected = a2 * (1.0 + 0.6 / n_f);
321
322 Ok((a2_corrected, FittedParameters::Exponential { lambda }))
323 }
324
325 fn test_uniform(&self, values: &[f64]) -> (f64, FittedParameters) {
327 let min = values.iter().cloned().fold(f64::INFINITY, f64::min);
328 let max = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
329
330 let mut sorted: Vec<f64> = values.to_vec();
332 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
333
334 let range = (max - min).max(1e-10);
336 let u: Vec<f64> = sorted.iter().map(|&x| (x - min) / range).collect();
337
338 let a2 = self.compute_a2_uniform(&u);
339
340 (a2, FittedParameters::Uniform { min, max })
341 }
342
343 fn compute_a2_statistic<F>(&self, z: &[f64], cdf: F) -> f64
345 where
346 F: Fn(f64) -> f64,
347 {
348 let n = z.len();
349 let n_f = n as f64;
350
351 let mut sum = 0.0;
352 for (i, &zi) in z.iter().enumerate() {
353 let fi = cdf(zi).clamp(1e-10, 1.0 - 1e-10);
354 let fn_minus_i = cdf(z[n - 1 - i]).clamp(1e-10, 1.0 - 1e-10);
355
356 let term = (2.0 * (i as f64) + 1.0) * (fi.ln() + (1.0 - fn_minus_i).ln());
357 sum += term;
358 }
359
360 -n_f - sum / n_f
361 }
362
363 fn compute_a2_uniform(&self, u: &[f64]) -> f64 {
365 let n = u.len();
366 let n_f = n as f64;
367
368 let mut sum = 0.0;
369 for (i, &ui) in u.iter().enumerate() {
370 let ui = ui.clamp(1e-10, 1.0 - 1e-10);
371 let u_n_minus_i = u[n - 1 - i].clamp(1e-10, 1.0 - 1e-10);
372
373 let term = (2.0 * (i as f64) + 1.0) * (ui.ln() + (1.0 - u_n_minus_i).ln());
374 sum += term;
375 }
376
377 -n_f - sum / n_f
378 }
379
380 fn normal_cdf(x: f64) -> f64 {
382 0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2))
383 }
384
385 fn approximate_p_value(&self, a2: f64, dist: TargetDistribution) -> f64 {
387 match dist {
388 TargetDistribution::Normal => {
389 if a2 < 0.2 {
391 1.0 - (-13.436 + 101.14 * a2 - 223.73 * a2.powi(2)).exp()
392 } else if a2 < 0.34 {
393 1.0 - (-8.318 + 42.796 * a2 - 59.938 * a2.powi(2)).exp()
394 } else if a2 < 0.6 {
395 (0.9177 - 4.279 * a2 - 1.38 * a2.powi(2)).exp()
396 } else if a2 < 13.0 {
397 (1.2937 - 5.709 * a2 + 0.0186 * a2.powi(2)).exp()
398 } else {
399 0.0
400 }
401 }
402 TargetDistribution::Exponential => {
403 if a2 < 0.26 {
405 1.0 - (-12.0 + 70.0 * a2 - 100.0 * a2.powi(2)).exp()
406 } else if a2 < 0.51 {
407 1.0 - (-6.0 + 24.0 * a2 - 24.0 * a2.powi(2)).exp()
408 } else if a2 < 0.95 {
409 (0.7 - 3.5 * a2 + 0.6 * a2.powi(2)).exp()
410 } else if a2 < 10.0 {
411 (0.9 - 4.0 * a2 + 0.01 * a2.powi(2)).exp()
412 } else {
413 0.0
414 }
415 }
416 _ => {
417 if a2 < 2.0 {
419 (-a2 + 0.5).exp().clamp(0.0, 1.0)
420 } else {
421 0.0
422 }
423 }
424 }
425 }
426}
427
428impl Default for AndersonDarlingAnalyzer {
429 fn default() -> Self {
430 Self::new()
431 }
432}
433
434fn erf(x: f64) -> f64 {
436 let a1 = 0.254829592;
437 let a2 = -0.284496736;
438 let a3 = 1.421413741;
439 let a4 = -1.453152027;
440 let a5 = 1.061405429;
441 let p = 0.3275911;
442
443 let sign = if x < 0.0 { -1.0 } else { 1.0 };
444 let x = x.abs();
445
446 let t = 1.0 / (1.0 + p * x);
447 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
448
449 sign * y
450}
451
452#[cfg(test)]
453#[allow(clippy::unwrap_used)]
454mod tests {
455 use super::*;
456 use rand::SeedableRng;
457 use rand_chacha::ChaCha8Rng;
458 use rand_distr::{Distribution, Exp, LogNormal, Normal, Uniform};
459
460 #[test]
461 fn test_normal_sample() {
462 let mut rng = ChaCha8Rng::seed_from_u64(42);
463 let normal = Normal::new(0.0, 1.0).unwrap();
464 let values: Vec<f64> = (0..500).map(|_| normal.sample(&mut rng)).collect();
465
466 let analyzer = AndersonDarlingAnalyzer::new()
467 .with_target_distribution(TargetDistribution::Normal)
468 .with_significance_level(0.05);
469
470 let result = analyzer.analyze(&values).unwrap();
471 assert!(result.passes, "Normal sample should pass normality test");
472 assert!(result.p_value > 0.05);
473 }
474
475 #[test]
476 fn test_non_normal_sample() {
477 let mut rng = ChaCha8Rng::seed_from_u64(42);
478 let exp = Exp::new(1.0).unwrap();
479 let values: Vec<f64> = (0..500).map(|_| exp.sample(&mut rng)).collect();
480
481 let analyzer = AndersonDarlingAnalyzer::new()
482 .with_target_distribution(TargetDistribution::Normal)
483 .with_significance_level(0.05);
484
485 let result = analyzer.analyze(&values).unwrap();
486 assert!(
487 !result.passes,
488 "Exponential sample should fail normality test"
489 );
490 }
491
492 #[test]
493 fn test_lognormal_sample() {
494 let mut rng = ChaCha8Rng::seed_from_u64(42);
495 let lognormal = LogNormal::new(3.0, 0.5).unwrap();
496 let values: Vec<f64> = (0..500).map(|_| lognormal.sample(&mut rng)).collect();
497
498 let analyzer = AndersonDarlingAnalyzer::new()
499 .with_target_distribution(TargetDistribution::LogNormal)
500 .with_significance_level(0.05);
501
502 let result = analyzer.analyze(&values).unwrap();
503 assert!(
504 result.passes,
505 "Log-normal sample should pass log-normality test"
506 );
507
508 if let FittedParameters::LogNormal { mu, sigma } = result.fitted_params {
510 assert!((mu - 3.0).abs() < 0.5, "Mu should be close to 3.0");
511 assert!((sigma - 0.5).abs() < 0.2, "Sigma should be close to 0.5");
512 } else {
513 panic!("Expected LogNormal parameters");
514 }
515 }
516
517 #[test]
518 fn test_exponential_sample() {
519 let mut rng = ChaCha8Rng::seed_from_u64(42);
520 let exp = Exp::new(2.0).unwrap();
521 let values: Vec<f64> = (0..500).map(|_| exp.sample(&mut rng)).collect();
522
523 let analyzer = AndersonDarlingAnalyzer::new()
524 .with_target_distribution(TargetDistribution::Exponential)
525 .with_significance_level(0.05);
526
527 let result = analyzer.analyze(&values).unwrap();
528 assert!(
529 result.passes,
530 "Exponential sample should pass exponential test"
531 );
532
533 if let FittedParameters::Exponential { lambda } = result.fitted_params {
535 assert!(
536 (lambda - 2.0).abs() < 0.5,
537 "Lambda should be close to 2.0, got {}",
538 lambda
539 );
540 } else {
541 panic!("Expected Exponential parameters");
542 }
543 }
544
545 #[test]
546 fn test_uniform_sample() {
547 let mut rng = ChaCha8Rng::seed_from_u64(42);
548 let uniform = Uniform::new(0.0, 10.0);
549 let values: Vec<f64> = (0..500).map(|_| uniform.sample(&mut rng)).collect();
550
551 let analyzer = AndersonDarlingAnalyzer::new()
552 .with_target_distribution(TargetDistribution::Uniform)
553 .with_significance_level(0.05);
554
555 let result = analyzer.analyze(&values).unwrap();
556 assert!(result.sample_size == 500);
558 }
559
560 #[test]
561 fn test_insufficient_data() {
562 let values = vec![1.0, 2.0, 3.0, 4.0, 5.0]; let analyzer = AndersonDarlingAnalyzer::new();
565 let result = analyzer.analyze(&values);
566
567 assert!(matches!(
568 result,
569 Err(EvalError::InsufficientData {
570 required: 8,
571 actual: 5
572 })
573 ));
574 }
575
576 #[test]
577 fn test_critical_values() {
578 let cv = CriticalValues::normal();
579 assert!(cv.cv_15 < cv.cv_10);
580 assert!(cv.cv_10 < cv.cv_05);
581 assert!(cv.cv_05 < cv.cv_025);
582 assert!(cv.cv_025 < cv.cv_01);
583 }
584}