1use crate::error::{EvalError, EvalResult};
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
11pub enum TargetDistribution {
12 #[default]
14 Normal,
15 LogNormal,
17 Exponential,
19 Uniform,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25pub enum FittedParameters {
26 Normal {
28 mean: f64,
30 std_dev: f64,
32 },
33 LogNormal {
35 mu: f64,
37 sigma: f64,
39 },
40 Exponential {
42 lambda: f64,
44 },
45 Uniform {
47 min: f64,
49 max: f64,
51 },
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct AndersonDarlingAnalysis {
57 pub sample_size: usize,
59 pub target_distribution: TargetDistribution,
61 pub statistic: f64,
63 pub critical_values: CriticalValues,
65 pub p_value: f64,
67 pub significance_level: f64,
69 pub passes: bool,
71 pub fitted_params: FittedParameters,
73 pub issues: Vec<String>,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct CriticalValues {
80 pub cv_15: f64,
82 pub cv_10: f64,
84 pub cv_05: f64,
86 pub cv_025: f64,
88 pub cv_01: f64,
90}
91
92impl CriticalValues {
93 pub fn normal() -> Self {
95 Self {
97 cv_15: 0.576,
98 cv_10: 0.656,
99 cv_05: 0.787,
100 cv_025: 0.918,
101 cv_01: 1.092,
102 }
103 }
104
105 pub fn exponential() -> Self {
107 Self {
108 cv_15: 0.922,
109 cv_10: 1.078,
110 cv_05: 1.341,
111 cv_025: 1.606,
112 cv_01: 1.957,
113 }
114 }
115}
116
117pub struct AndersonDarlingAnalyzer {
119 target_distribution: TargetDistribution,
121 significance_level: f64,
123}
124
125impl AndersonDarlingAnalyzer {
126 pub fn new() -> Self {
128 Self {
129 target_distribution: TargetDistribution::Normal,
130 significance_level: 0.05,
131 }
132 }
133
134 pub fn with_target_distribution(mut self, dist: TargetDistribution) -> Self {
136 self.target_distribution = dist;
137 self
138 }
139
140 pub fn with_significance_level(mut self, level: f64) -> Self {
142 self.significance_level = level;
143 self
144 }
145
146 pub fn analyze(&self, values: &[f64]) -> EvalResult<AndersonDarlingAnalysis> {
148 let n = values.len();
149 if n < 8 {
150 return Err(EvalError::InsufficientData {
151 required: 8,
152 actual: n,
153 });
154 }
155
156 let valid_values: Vec<f64> = values.iter().filter(|&&v| v.is_finite()).copied().collect();
158
159 if valid_values.len() < 8 {
160 return Err(EvalError::InsufficientData {
161 required: 8,
162 actual: valid_values.len(),
163 });
164 }
165
166 let mut issues = Vec::new();
167
168 let (statistic, fitted_params) = match self.target_distribution {
170 TargetDistribution::Normal => self.test_normal(&valid_values),
171 TargetDistribution::LogNormal => self.test_lognormal(&valid_values, &mut issues)?,
172 TargetDistribution::Exponential => self.test_exponential(&valid_values, &mut issues)?,
173 TargetDistribution::Uniform => self.test_uniform(&valid_values),
174 };
175
176 let critical_values = match self.target_distribution {
178 TargetDistribution::Exponential => CriticalValues::exponential(),
179 _ => CriticalValues::normal(),
180 };
181
182 let p_value = self.approximate_p_value(statistic, self.target_distribution);
184
185 let critical_threshold = match self.significance_level {
187 s if s >= 0.15 => critical_values.cv_15,
188 s if s >= 0.10 => critical_values.cv_10,
189 s if s >= 0.05 => critical_values.cv_05,
190 s if s >= 0.025 => critical_values.cv_025,
191 _ => critical_values.cv_01,
192 };
193 let passes = statistic <= critical_threshold;
194
195 if !passes {
196 issues.push(format!(
197 "A² = {:.4} exceeds critical value {:.4} at α = {:.2}",
198 statistic, critical_threshold, self.significance_level
199 ));
200 }
201
202 Ok(AndersonDarlingAnalysis {
203 sample_size: valid_values.len(),
204 target_distribution: self.target_distribution,
205 statistic,
206 critical_values,
207 p_value,
208 significance_level: self.significance_level,
209 passes,
210 fitted_params,
211 issues,
212 })
213 }
214
215 fn test_normal(&self, values: &[f64]) -> (f64, FittedParameters) {
217 let n = values.len();
218 let n_f = n as f64;
219
220 let mean = values.iter().sum::<f64>() / n_f;
222 let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n_f;
223 let std_dev = variance.sqrt();
224
225 let mut z: Vec<f64> = values
227 .iter()
228 .map(|&x| (x - mean) / std_dev.max(1e-10))
229 .collect();
230 z.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
231
232 let a2 = self.compute_a2_statistic(&z, Self::normal_cdf);
234
235 let a2_corrected = a2 * (1.0 + 0.75 / n_f + 2.25 / (n_f * n_f));
237
238 (a2_corrected, FittedParameters::Normal { mean, std_dev })
239 }
240
241 fn test_lognormal(
243 &self,
244 values: &[f64],
245 issues: &mut Vec<String>,
246 ) -> EvalResult<(f64, FittedParameters)> {
247 let positive_values: Vec<f64> = values.iter().filter(|&&v| v > 0.0).copied().collect();
249
250 if positive_values.len() < 8 {
251 return Err(EvalError::InvalidParameter(
252 "Log-normal test requires at least 8 positive values".to_string(),
253 ));
254 }
255
256 let skipped = values.len() - positive_values.len();
257 if skipped > 0 {
258 issues.push(format!(
259 "Skipped {} non-positive values for log-normal test",
260 skipped
261 ));
262 }
263
264 let log_values: Vec<f64> = positive_values.iter().map(|&x| x.ln()).collect();
266
267 let (a2, normal_params) = self.test_normal(&log_values);
269
270 let (mu, sigma) = match normal_params {
272 FittedParameters::Normal { mean, std_dev } => (mean, std_dev),
273 _ => unreachable!(),
274 };
275
276 Ok((a2, FittedParameters::LogNormal { mu, sigma }))
277 }
278
279 fn test_exponential(
281 &self,
282 values: &[f64],
283 issues: &mut Vec<String>,
284 ) -> EvalResult<(f64, FittedParameters)> {
285 let positive_values: Vec<f64> = values.iter().filter(|&&v| v > 0.0).copied().collect();
287
288 if positive_values.len() < 8 {
289 return Err(EvalError::InvalidParameter(
290 "Exponential test requires at least 8 positive values".to_string(),
291 ));
292 }
293
294 let skipped = values.len() - positive_values.len();
295 if skipped > 0 {
296 issues.push(format!(
297 "Skipped {} non-positive values for exponential test",
298 skipped
299 ));
300 }
301
302 let n = positive_values.len();
303 let n_f = n as f64;
304
305 let mean = positive_values.iter().sum::<f64>() / n_f;
307 let lambda = 1.0 / mean;
308
309 let mut sorted: Vec<f64> = positive_values;
311 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
312
313 let u: Vec<f64> = sorted.iter().map(|&x| 1.0 - (-lambda * x).exp()).collect();
315
316 let a2 = self.compute_a2_uniform(&u);
318
319 let a2_corrected = a2 * (1.0 + 0.6 / n_f);
321
322 Ok((a2_corrected, FittedParameters::Exponential { lambda }))
323 }
324
325 fn test_uniform(&self, values: &[f64]) -> (f64, FittedParameters) {
327 let min = values.iter().cloned().fold(f64::INFINITY, f64::min);
328 let max = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
329
330 let mut sorted: Vec<f64> = values.to_vec();
332 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
333
334 let range = (max - min).max(1e-10);
336 let u: Vec<f64> = sorted.iter().map(|&x| (x - min) / range).collect();
337
338 let a2 = self.compute_a2_uniform(&u);
339
340 (a2, FittedParameters::Uniform { min, max })
341 }
342
343 fn compute_a2_statistic<F>(&self, z: &[f64], cdf: F) -> f64
345 where
346 F: Fn(f64) -> f64,
347 {
348 let n = z.len();
349 let n_f = n as f64;
350
351 let mut sum = 0.0;
352 for (i, &zi) in z.iter().enumerate() {
353 let fi = cdf(zi).clamp(1e-10, 1.0 - 1e-10);
354 let fn_minus_i = cdf(z[n - 1 - i]).clamp(1e-10, 1.0 - 1e-10);
355
356 let term = (2.0 * (i as f64) + 1.0) * (fi.ln() + (1.0 - fn_minus_i).ln());
357 sum += term;
358 }
359
360 -n_f - sum / n_f
361 }
362
363 fn compute_a2_uniform(&self, u: &[f64]) -> f64 {
365 let n = u.len();
366 let n_f = n as f64;
367
368 let mut sum = 0.0;
369 for (i, &ui) in u.iter().enumerate() {
370 let ui = ui.clamp(1e-10, 1.0 - 1e-10);
371 let u_n_minus_i = u[n - 1 - i].clamp(1e-10, 1.0 - 1e-10);
372
373 let term = (2.0 * (i as f64) + 1.0) * (ui.ln() + (1.0 - u_n_minus_i).ln());
374 sum += term;
375 }
376
377 -n_f - sum / n_f
378 }
379
380 fn normal_cdf(x: f64) -> f64 {
382 0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2))
383 }
384
385 fn approximate_p_value(&self, a2: f64, dist: TargetDistribution) -> f64 {
387 match dist {
388 TargetDistribution::Normal => {
389 if a2 < 0.2 {
391 1.0 - (-13.436 + 101.14 * a2 - 223.73 * a2.powi(2)).exp()
392 } else if a2 < 0.34 {
393 1.0 - (-8.318 + 42.796 * a2 - 59.938 * a2.powi(2)).exp()
394 } else if a2 < 0.6 {
395 (0.9177 - 4.279 * a2 - 1.38 * a2.powi(2)).exp()
396 } else if a2 < 13.0 {
397 (1.2937 - 5.709 * a2 + 0.0186 * a2.powi(2)).exp()
398 } else {
399 0.0
400 }
401 }
402 TargetDistribution::Exponential => {
403 if a2 < 0.26 {
405 1.0 - (-12.0 + 70.0 * a2 - 100.0 * a2.powi(2)).exp()
406 } else if a2 < 0.51 {
407 1.0 - (-6.0 + 24.0 * a2 - 24.0 * a2.powi(2)).exp()
408 } else if a2 < 0.95 {
409 (0.7 - 3.5 * a2 + 0.6 * a2.powi(2)).exp()
410 } else if a2 < 10.0 {
411 (0.9 - 4.0 * a2 + 0.01 * a2.powi(2)).exp()
412 } else {
413 0.0
414 }
415 }
416 _ => {
417 let cv = CriticalValues::normal();
421 if a2 <= cv.cv_15 {
422 0.25 } else if a2 <= cv.cv_10 {
424 0.15
425 } else if a2 <= cv.cv_05 {
426 0.10
427 } else if a2 <= cv.cv_025 {
428 0.05
429 } else if a2 <= cv.cv_01 {
430 0.025
431 } else {
432 (1.2937 - 5.709 * a2 + 0.0186 * a2.powi(2)).exp().max(0.0)
434 }
435 }
436 }
437 }
438}
439
440impl Default for AndersonDarlingAnalyzer {
441 fn default() -> Self {
442 Self::new()
443 }
444}
445
446fn erf(x: f64) -> f64 {
448 let a1 = 0.254829592;
449 let a2 = -0.284496736;
450 let a3 = 1.421413741;
451 let a4 = -1.453152027;
452 let a5 = 1.061405429;
453 let p = 0.3275911;
454
455 let sign = if x < 0.0 { -1.0 } else { 1.0 };
456 let x = x.abs();
457
458 let t = 1.0 / (1.0 + p * x);
459 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
460
461 sign * y
462}
463
464#[cfg(test)]
465#[allow(clippy::unwrap_used)]
466mod tests {
467 use super::*;
468 use rand::SeedableRng;
469 use rand_chacha::ChaCha8Rng;
470 use rand_distr::{Distribution, Exp, LogNormal, Normal, Uniform};
471
472 #[test]
473 fn test_normal_sample() {
474 let mut rng = ChaCha8Rng::seed_from_u64(42);
475 let normal = Normal::new(0.0, 1.0).unwrap();
476 let values: Vec<f64> = (0..500).map(|_| normal.sample(&mut rng)).collect();
477
478 let analyzer = AndersonDarlingAnalyzer::new()
479 .with_target_distribution(TargetDistribution::Normal)
480 .with_significance_level(0.05);
481
482 let result = analyzer.analyze(&values).unwrap();
483 assert!(result.passes, "Normal sample should pass normality test");
484 assert!(result.p_value > 0.05);
485 }
486
487 #[test]
488 fn test_non_normal_sample() {
489 let mut rng = ChaCha8Rng::seed_from_u64(42);
490 let exp = Exp::new(1.0).unwrap();
491 let values: Vec<f64> = (0..500).map(|_| exp.sample(&mut rng)).collect();
492
493 let analyzer = AndersonDarlingAnalyzer::new()
494 .with_target_distribution(TargetDistribution::Normal)
495 .with_significance_level(0.05);
496
497 let result = analyzer.analyze(&values).unwrap();
498 assert!(
499 !result.passes,
500 "Exponential sample should fail normality test"
501 );
502 }
503
504 #[test]
505 fn test_lognormal_sample() {
506 let mut rng = ChaCha8Rng::seed_from_u64(42);
507 let lognormal = LogNormal::new(3.0, 0.5).unwrap();
508 let values: Vec<f64> = (0..500).map(|_| lognormal.sample(&mut rng)).collect();
509
510 let analyzer = AndersonDarlingAnalyzer::new()
511 .with_target_distribution(TargetDistribution::LogNormal)
512 .with_significance_level(0.05);
513
514 let result = analyzer.analyze(&values).unwrap();
515 assert!(
516 result.passes,
517 "Log-normal sample should pass log-normality test"
518 );
519
520 if let FittedParameters::LogNormal { mu, sigma } = result.fitted_params {
522 assert!((mu - 3.0).abs() < 0.5, "Mu should be close to 3.0");
523 assert!((sigma - 0.5).abs() < 0.2, "Sigma should be close to 0.5");
524 } else {
525 panic!("Expected LogNormal parameters");
526 }
527 }
528
529 #[test]
530 fn test_exponential_sample() {
531 let mut rng = ChaCha8Rng::seed_from_u64(42);
532 let exp = Exp::new(2.0).unwrap();
533 let values: Vec<f64> = (0..500).map(|_| exp.sample(&mut rng)).collect();
534
535 let analyzer = AndersonDarlingAnalyzer::new()
536 .with_target_distribution(TargetDistribution::Exponential)
537 .with_significance_level(0.05);
538
539 let result = analyzer.analyze(&values).unwrap();
540 assert!(
541 result.passes,
542 "Exponential sample should pass exponential test"
543 );
544
545 if let FittedParameters::Exponential { lambda } = result.fitted_params {
547 assert!(
548 (lambda - 2.0).abs() < 0.5,
549 "Lambda should be close to 2.0, got {}",
550 lambda
551 );
552 } else {
553 panic!("Expected Exponential parameters");
554 }
555 }
556
557 #[test]
558 fn test_uniform_sample() {
559 let mut rng = ChaCha8Rng::seed_from_u64(42);
560 let uniform = Uniform::new(0.0, 10.0).unwrap();
561 let values: Vec<f64> = (0..500).map(|_| uniform.sample(&mut rng)).collect();
562
563 let analyzer = AndersonDarlingAnalyzer::new()
564 .with_target_distribution(TargetDistribution::Uniform)
565 .with_significance_level(0.05);
566
567 let result = analyzer.analyze(&values).unwrap();
568 assert!(result.sample_size == 500);
570 }
571
572 #[test]
573 fn test_insufficient_data() {
574 let values = vec![1.0, 2.0, 3.0, 4.0, 5.0]; let analyzer = AndersonDarlingAnalyzer::new();
577 let result = analyzer.analyze(&values);
578
579 assert!(matches!(
580 result,
581 Err(EvalError::InsufficientData {
582 required: 8,
583 actual: 5
584 })
585 ));
586 }
587
588 #[test]
589 fn test_critical_values() {
590 let cv = CriticalValues::normal();
591 assert!(cv.cv_15 < cv.cv_10);
592 assert!(cv.cv_10 < cv.cv_05);
593 assert!(cv.cv_05 < cv.cv_025);
594 assert!(cv.cv_025 < cv.cv_01);
595 }
596}