1use crate::error::{EvalError, EvalResult};
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
11pub enum TargetDistribution {
12 #[default]
14 Normal,
15 LogNormal,
17 Exponential,
19 Uniform,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25pub enum FittedParameters {
26 Normal {
28 mean: f64,
30 std_dev: f64,
32 },
33 LogNormal {
35 mu: f64,
37 sigma: f64,
39 },
40 Exponential {
42 lambda: f64,
44 },
45 Uniform {
47 min: f64,
49 max: f64,
51 },
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct AndersonDarlingAnalysis {
57 pub sample_size: usize,
59 pub target_distribution: TargetDistribution,
61 pub statistic: f64,
63 pub critical_values: CriticalValues,
65 pub p_value: f64,
67 pub significance_level: f64,
69 pub passes: bool,
71 pub fitted_params: FittedParameters,
73 pub issues: Vec<String>,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct CriticalValues {
80 pub cv_15: f64,
82 pub cv_10: f64,
84 pub cv_05: f64,
86 pub cv_025: f64,
88 pub cv_01: f64,
90}
91
92impl CriticalValues {
93 pub fn normal() -> Self {
95 Self {
97 cv_15: 0.576,
98 cv_10: 0.656,
99 cv_05: 0.787,
100 cv_025: 0.918,
101 cv_01: 1.092,
102 }
103 }
104
105 pub fn exponential() -> Self {
107 Self {
108 cv_15: 0.922,
109 cv_10: 1.078,
110 cv_05: 1.341,
111 cv_025: 1.606,
112 cv_01: 1.957,
113 }
114 }
115}
116
117pub struct AndersonDarlingAnalyzer {
119 target_distribution: TargetDistribution,
121 significance_level: f64,
123}
124
125impl AndersonDarlingAnalyzer {
126 pub fn new() -> Self {
128 Self {
129 target_distribution: TargetDistribution::Normal,
130 significance_level: 0.05,
131 }
132 }
133
134 pub fn with_target_distribution(mut self, dist: TargetDistribution) -> Self {
136 self.target_distribution = dist;
137 self
138 }
139
140 pub fn with_significance_level(mut self, level: f64) -> Self {
142 self.significance_level = level;
143 self
144 }
145
146 pub fn analyze(&self, values: &[f64]) -> EvalResult<AndersonDarlingAnalysis> {
148 let n = values.len();
149 if n < 8 {
150 return Err(EvalError::InsufficientData {
151 required: 8,
152 actual: n,
153 });
154 }
155
156 let valid_values: Vec<f64> = values.iter().filter(|&&v| v.is_finite()).copied().collect();
158
159 if valid_values.len() < 8 {
160 return Err(EvalError::InsufficientData {
161 required: 8,
162 actual: valid_values.len(),
163 });
164 }
165
166 let mut issues = Vec::new();
167
168 let (statistic, fitted_params) = match self.target_distribution {
170 TargetDistribution::Normal => self.test_normal(&valid_values),
171 TargetDistribution::LogNormal => self.test_lognormal(&valid_values, &mut issues)?,
172 TargetDistribution::Exponential => self.test_exponential(&valid_values, &mut issues)?,
173 TargetDistribution::Uniform => self.test_uniform(&valid_values),
174 };
175
176 let critical_values = match self.target_distribution {
178 TargetDistribution::Exponential => CriticalValues::exponential(),
179 _ => CriticalValues::normal(),
180 };
181
182 let p_value = self.approximate_p_value(statistic, self.target_distribution);
184
185 let critical_threshold = match self.significance_level {
187 s if s >= 0.15 => critical_values.cv_15,
188 s if s >= 0.10 => critical_values.cv_10,
189 s if s >= 0.05 => critical_values.cv_05,
190 s if s >= 0.025 => critical_values.cv_025,
191 _ => critical_values.cv_01,
192 };
193 let passes = statistic <= critical_threshold;
194
195 if !passes {
196 issues.push(format!(
197 "A² = {:.4} exceeds critical value {:.4} at α = {:.2}",
198 statistic, critical_threshold, self.significance_level
199 ));
200 }
201
202 Ok(AndersonDarlingAnalysis {
203 sample_size: valid_values.len(),
204 target_distribution: self.target_distribution,
205 statistic,
206 critical_values,
207 p_value,
208 significance_level: self.significance_level,
209 passes,
210 fitted_params,
211 issues,
212 })
213 }
214
215 fn test_normal(&self, values: &[f64]) -> (f64, FittedParameters) {
217 let n = values.len();
218 let n_f = n as f64;
219
220 let mean = values.iter().sum::<f64>() / n_f;
222 let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n_f;
223 let std_dev = variance.sqrt();
224
225 let mut z: Vec<f64> = values
227 .iter()
228 .map(|&x| (x - mean) / std_dev.max(1e-10))
229 .collect();
230 z.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
231
232 let a2 = self.compute_a2_statistic(&z, Self::normal_cdf);
234
235 let a2_corrected = a2 * (1.0 + 0.75 / n_f + 2.25 / (n_f * n_f));
237
238 (a2_corrected, FittedParameters::Normal { mean, std_dev })
239 }
240
241 fn test_lognormal(
243 &self,
244 values: &[f64],
245 issues: &mut Vec<String>,
246 ) -> EvalResult<(f64, FittedParameters)> {
247 let positive_values: Vec<f64> = values.iter().filter(|&&v| v > 0.0).copied().collect();
249
250 if positive_values.len() < 8 {
251 return Err(EvalError::InvalidParameter(
252 "Log-normal test requires at least 8 positive values".to_string(),
253 ));
254 }
255
256 let skipped = values.len() - positive_values.len();
257 if skipped > 0 {
258 issues.push(format!(
259 "Skipped {} non-positive values for log-normal test",
260 skipped
261 ));
262 }
263
264 let log_values: Vec<f64> = positive_values.iter().map(|&x| x.ln()).collect();
266
267 let (a2, normal_params) = self.test_normal(&log_values);
269
270 let (mu, sigma) = match normal_params {
272 FittedParameters::Normal { mean, std_dev } => (mean, std_dev),
273 _ => unreachable!(),
274 };
275
276 Ok((a2, FittedParameters::LogNormal { mu, sigma }))
277 }
278
279 fn test_exponential(
281 &self,
282 values: &[f64],
283 issues: &mut Vec<String>,
284 ) -> EvalResult<(f64, FittedParameters)> {
285 let positive_values: Vec<f64> = values.iter().filter(|&&v| v > 0.0).copied().collect();
287
288 if positive_values.len() < 8 {
289 return Err(EvalError::InvalidParameter(
290 "Exponential test requires at least 8 positive values".to_string(),
291 ));
292 }
293
294 let skipped = values.len() - positive_values.len();
295 if skipped > 0 {
296 issues.push(format!(
297 "Skipped {} non-positive values for exponential test",
298 skipped
299 ));
300 }
301
302 let n = positive_values.len();
303 let n_f = n as f64;
304
305 let mean = positive_values.iter().sum::<f64>() / n_f;
307 let lambda = 1.0 / mean;
308
309 let mut sorted: Vec<f64> = positive_values;
311 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
312
313 let u: Vec<f64> = sorted.iter().map(|&x| 1.0 - (-lambda * x).exp()).collect();
315
316 let a2 = self.compute_a2_uniform(&u);
318
319 let a2_corrected = a2 * (1.0 + 0.6 / n_f);
321
322 Ok((a2_corrected, FittedParameters::Exponential { lambda }))
323 }
324
325 fn test_uniform(&self, values: &[f64]) -> (f64, FittedParameters) {
327 let min = values.iter().cloned().fold(f64::INFINITY, f64::min);
328 let max = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
329
330 let mut sorted: Vec<f64> = values.to_vec();
332 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
333
334 let range = (max - min).max(1e-10);
336 let u: Vec<f64> = sorted.iter().map(|&x| (x - min) / range).collect();
337
338 let a2 = self.compute_a2_uniform(&u);
339
340 (a2, FittedParameters::Uniform { min, max })
341 }
342
343 fn compute_a2_statistic<F>(&self, z: &[f64], cdf: F) -> f64
345 where
346 F: Fn(f64) -> f64,
347 {
348 let n = z.len();
349 let n_f = n as f64;
350
351 let mut sum = 0.0;
352 for (i, &zi) in z.iter().enumerate() {
353 let fi = cdf(zi).clamp(1e-10, 1.0 - 1e-10);
354 let fn_minus_i = cdf(z[n - 1 - i]).clamp(1e-10, 1.0 - 1e-10);
355
356 let term = (2.0 * (i as f64) + 1.0) * (fi.ln() + (1.0 - fn_minus_i).ln());
357 sum += term;
358 }
359
360 -n_f - sum / n_f
361 }
362
363 fn compute_a2_uniform(&self, u: &[f64]) -> f64 {
365 let n = u.len();
366 let n_f = n as f64;
367
368 let mut sum = 0.0;
369 for (i, &ui) in u.iter().enumerate() {
370 let ui = ui.clamp(1e-10, 1.0 - 1e-10);
371 let u_n_minus_i = u[n - 1 - i].clamp(1e-10, 1.0 - 1e-10);
372
373 let term = (2.0 * (i as f64) + 1.0) * (ui.ln() + (1.0 - u_n_minus_i).ln());
374 sum += term;
375 }
376
377 -n_f - sum / n_f
378 }
379
380 fn normal_cdf(x: f64) -> f64 {
382 0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2))
383 }
384
385 fn approximate_p_value(&self, a2: f64, dist: TargetDistribution) -> f64 {
387 match dist {
388 TargetDistribution::Normal => {
389 if a2 < 0.2 {
391 1.0 - (-13.436 + 101.14 * a2 - 223.73 * a2.powi(2)).exp()
392 } else if a2 < 0.34 {
393 1.0 - (-8.318 + 42.796 * a2 - 59.938 * a2.powi(2)).exp()
394 } else if a2 < 0.6 {
395 (0.9177 - 4.279 * a2 - 1.38 * a2.powi(2)).exp()
396 } else if a2 < 13.0 {
397 (1.2937 - 5.709 * a2 + 0.0186 * a2.powi(2)).exp()
398 } else {
399 0.0
400 }
401 }
402 TargetDistribution::Exponential => {
403 if a2 < 0.26 {
405 1.0 - (-12.0 + 70.0 * a2 - 100.0 * a2.powi(2)).exp()
406 } else if a2 < 0.51 {
407 1.0 - (-6.0 + 24.0 * a2 - 24.0 * a2.powi(2)).exp()
408 } else if a2 < 0.95 {
409 (0.7 - 3.5 * a2 + 0.6 * a2.powi(2)).exp()
410 } else if a2 < 10.0 {
411 (0.9 - 4.0 * a2 + 0.01 * a2.powi(2)).exp()
412 } else {
413 0.0
414 }
415 }
416 _ => {
417 if a2 < 2.0 {
419 (-a2 + 0.5).exp().clamp(0.0, 1.0)
420 } else {
421 0.0
422 }
423 }
424 }
425 }
426}
427
428impl Default for AndersonDarlingAnalyzer {
429 fn default() -> Self {
430 Self::new()
431 }
432}
433
434fn erf(x: f64) -> f64 {
436 let a1 = 0.254829592;
437 let a2 = -0.284496736;
438 let a3 = 1.421413741;
439 let a4 = -1.453152027;
440 let a5 = 1.061405429;
441 let p = 0.3275911;
442
443 let sign = if x < 0.0 { -1.0 } else { 1.0 };
444 let x = x.abs();
445
446 let t = 1.0 / (1.0 + p * x);
447 let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
448
449 sign * y
450}
451
452#[cfg(test)]
453mod tests {
454 use super::*;
455 use rand::SeedableRng;
456 use rand_chacha::ChaCha8Rng;
457 use rand_distr::{Distribution, Exp, LogNormal, Normal, Uniform};
458
459 #[test]
460 fn test_normal_sample() {
461 let mut rng = ChaCha8Rng::seed_from_u64(42);
462 let normal = Normal::new(0.0, 1.0).unwrap();
463 let values: Vec<f64> = (0..500).map(|_| normal.sample(&mut rng)).collect();
464
465 let analyzer = AndersonDarlingAnalyzer::new()
466 .with_target_distribution(TargetDistribution::Normal)
467 .with_significance_level(0.05);
468
469 let result = analyzer.analyze(&values).unwrap();
470 assert!(result.passes, "Normal sample should pass normality test");
471 assert!(result.p_value > 0.05);
472 }
473
474 #[test]
475 fn test_non_normal_sample() {
476 let mut rng = ChaCha8Rng::seed_from_u64(42);
477 let exp = Exp::new(1.0).unwrap();
478 let values: Vec<f64> = (0..500).map(|_| exp.sample(&mut rng)).collect();
479
480 let analyzer = AndersonDarlingAnalyzer::new()
481 .with_target_distribution(TargetDistribution::Normal)
482 .with_significance_level(0.05);
483
484 let result = analyzer.analyze(&values).unwrap();
485 assert!(
486 !result.passes,
487 "Exponential sample should fail normality test"
488 );
489 }
490
491 #[test]
492 fn test_lognormal_sample() {
493 let mut rng = ChaCha8Rng::seed_from_u64(42);
494 let lognormal = LogNormal::new(3.0, 0.5).unwrap();
495 let values: Vec<f64> = (0..500).map(|_| lognormal.sample(&mut rng)).collect();
496
497 let analyzer = AndersonDarlingAnalyzer::new()
498 .with_target_distribution(TargetDistribution::LogNormal)
499 .with_significance_level(0.05);
500
501 let result = analyzer.analyze(&values).unwrap();
502 assert!(
503 result.passes,
504 "Log-normal sample should pass log-normality test"
505 );
506
507 if let FittedParameters::LogNormal { mu, sigma } = result.fitted_params {
509 assert!((mu - 3.0).abs() < 0.5, "Mu should be close to 3.0");
510 assert!((sigma - 0.5).abs() < 0.2, "Sigma should be close to 0.5");
511 } else {
512 panic!("Expected LogNormal parameters");
513 }
514 }
515
516 #[test]
517 fn test_exponential_sample() {
518 let mut rng = ChaCha8Rng::seed_from_u64(42);
519 let exp = Exp::new(2.0).unwrap();
520 let values: Vec<f64> = (0..500).map(|_| exp.sample(&mut rng)).collect();
521
522 let analyzer = AndersonDarlingAnalyzer::new()
523 .with_target_distribution(TargetDistribution::Exponential)
524 .with_significance_level(0.05);
525
526 let result = analyzer.analyze(&values).unwrap();
527 assert!(
528 result.passes,
529 "Exponential sample should pass exponential test"
530 );
531
532 if let FittedParameters::Exponential { lambda } = result.fitted_params {
534 assert!(
535 (lambda - 2.0).abs() < 0.5,
536 "Lambda should be close to 2.0, got {}",
537 lambda
538 );
539 } else {
540 panic!("Expected Exponential parameters");
541 }
542 }
543
544 #[test]
545 fn test_uniform_sample() {
546 let mut rng = ChaCha8Rng::seed_from_u64(42);
547 let uniform = Uniform::new(0.0, 10.0);
548 let values: Vec<f64> = (0..500).map(|_| uniform.sample(&mut rng)).collect();
549
550 let analyzer = AndersonDarlingAnalyzer::new()
551 .with_target_distribution(TargetDistribution::Uniform)
552 .with_significance_level(0.05);
553
554 let result = analyzer.analyze(&values).unwrap();
555 assert!(result.sample_size == 500);
557 }
558
559 #[test]
560 fn test_insufficient_data() {
561 let values = vec![1.0, 2.0, 3.0, 4.0, 5.0]; let analyzer = AndersonDarlingAnalyzer::new();
564 let result = analyzer.analyze(&values);
565
566 assert!(matches!(
567 result,
568 Err(EvalError::InsufficientData {
569 required: 8,
570 actual: 5
571 })
572 ));
573 }
574
575 #[test]
576 fn test_critical_values() {
577 let cv = CriticalValues::normal();
578 assert!(cv.cv_15 < cv.cv_10);
579 assert!(cv.cv_10 < cv.cv_05);
580 assert!(cv.cv_05 < cv.cv_025);
581 assert!(cv.cv_025 < cv.cv_01);
582 }
583}