Skip to main content

datasynth_core/distributions/
validation.rs

1//! Statistical validation runner for generated amount distributions
2//! (v3.5.1+).
3//!
4//! Executes the tests declared in
5//! `distributions.validation.tests` (schema-side [`StatisticalTest
6//! Config`](../../../../datasynth-config/src/schema.rs)) against a slice
7//! of sampled amounts and emits a [`StatisticalValidationReport`]
8//! summarising which tests passed, warned, or failed.
9//!
10//! This module deliberately keeps the surface minimal: the schema already
11//! has richer test types (Anderson-Darling, correlation check) that will
12//! land in follow-up releases. v3.5.1 implements Benford first-digit,
13//! chi-squared goodness-of-fit, and a lightweight Kolmogorov-Smirnov
14//! distribution-fit check — enough to catch the most common realism
15//! regressions without pulling in a heavyweight stats dependency.
16
17use rust_decimal::prelude::ToPrimitive;
18use rust_decimal::Decimal;
19use serde::{Deserialize, Serialize};
20
21use super::benford::get_first_digit;
22
23/// Outcome of a single statistical test.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
25#[serde(rename_all = "snake_case")]
26pub enum TestOutcome {
27    /// Test passed all thresholds.
28    Passed,
29    /// Test passed the hard threshold but exceeded a warning band.
30    Warning,
31    /// Test failed the hard threshold.
32    Failed,
33    /// Test was skipped (e.g. too few samples, not-yet-implemented variant).
34    Skipped,
35}
36
37/// Result of running a single statistical test.
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct StatisticalTestResult {
40    /// Human-readable test name (e.g. "benford_first_digit").
41    pub name: String,
42    /// Test outcome.
43    pub outcome: TestOutcome,
44    /// Key measured statistic (e.g. MAD for Benford, chi-squared value).
45    pub statistic: f64,
46    /// Threshold compared against (typically the hard-fail threshold).
47    pub threshold: f64,
48    /// One-line human description of what happened.
49    pub message: String,
50}
51
52/// Aggregate report covering every test run.
53#[derive(Debug, Clone, Default, Serialize, Deserialize)]
54pub struct StatisticalValidationReport {
55    /// Number of samples the report was computed over.
56    pub sample_count: usize,
57    /// Per-test results in input order.
58    pub results: Vec<StatisticalTestResult>,
59}
60
61impl StatisticalValidationReport {
62    /// Did every test pass? (Warnings do not count as failures.)
63    pub fn all_passed(&self) -> bool {
64        self.results
65            .iter()
66            .all(|r| !matches!(r.outcome, TestOutcome::Failed))
67    }
68
69    /// Is there at least one warning?
70    pub fn has_warnings(&self) -> bool {
71        self.results
72            .iter()
73            .any(|r| matches!(r.outcome, TestOutcome::Warning))
74    }
75
76    /// Collect all failed test names.
77    pub fn failed_names(&self) -> Vec<String> {
78        self.results
79            .iter()
80            .filter(|r| matches!(r.outcome, TestOutcome::Failed))
81            .map(|r| r.name.clone())
82            .collect()
83    }
84}
85
86/// Benford first-digit mean-absolute-deviation (MAD) test.
87///
88/// Returns a [`StatisticalTestResult`] where `statistic` is the MAD and
89/// `threshold` is the hard-fail threshold. `Warning` when MAD > warning
90/// threshold but <= hard threshold. `Skipped` when fewer than 100
91/// positive amounts are available (sample too small for stable MAD).
92pub fn run_benford_first_digit(
93    amounts: &[Decimal],
94    threshold_mad: f64,
95    warning_mad: f64,
96) -> StatisticalTestResult {
97    let mut counts = [0u32; 10]; // index 0 unused; 1..=9 used
98    let mut total = 0u32;
99    for amount in amounts {
100        if let Some(d) = get_first_digit(*amount) {
101            counts[d as usize] += 1;
102            total += 1;
103        }
104    }
105
106    if total < 100 {
107        return StatisticalTestResult {
108            name: "benford_first_digit".to_string(),
109            outcome: TestOutcome::Skipped,
110            statistic: 0.0,
111            threshold: threshold_mad,
112            message: format!("only {total} samples with valid first digit; need ≥100"),
113        };
114    }
115
116    // Expected Benford probability for digit d: log10(1 + 1/d).
117    // Index 0 is unused; values for d ∈ {1..=9}.
118    const EXPECTED: [f64; 10] = [
119        0.0,
120        std::f64::consts::LOG10_2, // log10(2)
121        0.17609125905568124,       // log10(3/2)
122        0.12493873660829995,
123        0.09691001300805642,
124        0.07918124604762482,
125        0.06694678963061322,
126        0.057991946977686726,
127        0.05115252244738129,
128        0.04575749056067514,
129    ];
130
131    let total_f = total as f64;
132    let mad: f64 = (1..=9)
133        .map(|d| (counts[d] as f64 / total_f - EXPECTED[d]).abs())
134        .sum::<f64>()
135        / 9.0;
136
137    let outcome = if mad > threshold_mad {
138        TestOutcome::Failed
139    } else if mad > warning_mad {
140        TestOutcome::Warning
141    } else {
142        TestOutcome::Passed
143    };
144
145    StatisticalTestResult {
146        name: "benford_first_digit".to_string(),
147        outcome,
148        statistic: mad,
149        threshold: threshold_mad,
150        message: format!(
151            "MAD={mad:.4} over {total} first digits (threshold={threshold_mad:.4}, warn={warning_mad:.4})"
152        ),
153    }
154}
155
156/// Chi-squared goodness-of-fit test against a uniform binning of
157/// log-scale amounts.
158///
159/// This is intentionally lightweight — it checks that amounts aren't
160/// concentrated in one log-bin (which would indicate a broken mixture
161/// or collapsed distribution). Hard fails when the chi-squared statistic
162/// exceeds the critical value at the configured significance.
163pub fn run_chi_squared(
164    amounts: &[Decimal],
165    bins: usize,
166    significance: f64,
167) -> StatisticalTestResult {
168    if amounts.len() < 100 {
169        return StatisticalTestResult {
170            name: "chi_squared".to_string(),
171            outcome: TestOutcome::Skipped,
172            statistic: 0.0,
173            threshold: 0.0,
174            message: format!("only {} samples; need ≥100", amounts.len()),
175        };
176    }
177
178    let bins = bins.max(2);
179    let positives: Vec<f64> = amounts
180        .iter()
181        .filter_map(|a| a.to_f64())
182        .filter(|v| *v > 0.0)
183        .collect();
184    if positives.len() < 100 {
185        return StatisticalTestResult {
186            name: "chi_squared".to_string(),
187            outcome: TestOutcome::Skipped,
188            statistic: 0.0,
189            threshold: 0.0,
190            message: format!("only {} positive samples; need ≥100", positives.len()),
191        };
192    }
193
194    let logs: Vec<f64> = positives.iter().map(|v| v.ln()).collect();
195    let min = logs.iter().cloned().fold(f64::INFINITY, f64::min);
196    let max = logs.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
197    if !min.is_finite() || !max.is_finite() || max <= min {
198        return StatisticalTestResult {
199            name: "chi_squared".to_string(),
200            outcome: TestOutcome::Skipped,
201            statistic: 0.0,
202            threshold: 0.0,
203            message: "degenerate log-range".to_string(),
204        };
205    }
206
207    let bin_width = (max - min) / bins as f64;
208    let mut observed = vec![0u32; bins];
209    for v in &logs {
210        let idx = (((v - min) / bin_width) as usize).min(bins - 1);
211        observed[idx] += 1;
212    }
213
214    let n = logs.len() as f64;
215    let expected_per_bin = n / bins as f64;
216    let chi_sq: f64 = observed
217        .iter()
218        .map(|o| {
219            let diff = *o as f64 - expected_per_bin;
220            diff * diff / expected_per_bin
221        })
222        .sum();
223
224    // Approximate chi-squared critical value for df = bins - 1 at the
225    // configured significance. We ship hard-coded tables for the common
226    // cases (α ∈ {0.01, 0.05, 0.10}, df ∈ {4,5,6,7,8,9,10,14,19,24,29})
227    // and fall back to a generous ceiling otherwise.
228    let df = bins - 1;
229    let critical = chi_sq_critical(df, significance);
230
231    let outcome = if chi_sq > critical {
232        TestOutcome::Failed
233    } else {
234        TestOutcome::Passed
235    };
236
237    StatisticalTestResult {
238        name: "chi_squared".to_string(),
239        outcome,
240        statistic: chi_sq,
241        threshold: critical,
242        message: format!(
243            "χ²={chi_sq:.2} over {bins} log-bins ({n} samples), critical={critical:.2} at α={significance}"
244        ),
245    }
246}
247
248/// Kolmogorov-Smirnov goodness-of-fit against a uniform CDF on the
249/// log-scale of amounts.
250///
251/// This is the simplest version — compares the empirical log-amount CDF
252/// against a uniform CDF on `[min_log, max_log]`. Useful for detecting
253/// grossly skewed outputs; more sophisticated target-distribution fits
254/// (Normal/LogNormal/Exponential) ship in v3.5.2.
255pub fn run_ks_uniform_log(amounts: &[Decimal], significance: f64) -> StatisticalTestResult {
256    let positives: Vec<f64> = amounts
257        .iter()
258        .filter_map(|a| a.to_f64())
259        .filter(|v| *v > 0.0)
260        .collect();
261    if positives.len() < 100 {
262        return StatisticalTestResult {
263            name: "ks_uniform_log".to_string(),
264            outcome: TestOutcome::Skipped,
265            statistic: 0.0,
266            threshold: 0.0,
267            message: format!("only {} positive samples; need ≥100", positives.len()),
268        };
269    }
270
271    let mut logs: Vec<f64> = positives.iter().map(|v| v.ln()).collect();
272    logs.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
273    let min = logs[0];
274    let max = logs[logs.len() - 1];
275    if max <= min {
276        return StatisticalTestResult {
277            name: "ks_uniform_log".to_string(),
278            outcome: TestOutcome::Skipped,
279            statistic: 0.0,
280            threshold: 0.0,
281            message: "degenerate log-range".to_string(),
282        };
283    }
284
285    let n = logs.len() as f64;
286    let mut max_diff: f64 = 0.0;
287    for (i, v) in logs.iter().enumerate() {
288        let empirical = (i as f64 + 1.0) / n;
289        let uniform = (v - min) / (max - min);
290        let diff = (empirical - uniform).abs();
291        if diff > max_diff {
292            max_diff = diff;
293        }
294    }
295
296    // Approximate KS critical value at large n (Kolmogorov):
297    //   D_α ≈ c(α) / sqrt(n)
298    // where c(0.05) ≈ 1.358, c(0.01) ≈ 1.628, c(0.10) ≈ 1.224.
299    let c = if significance <= 0.011 {
300        1.628
301    } else if significance <= 0.051 {
302        1.358
303    } else {
304        1.224
305    };
306    let critical = c / n.sqrt();
307
308    let outcome = if max_diff > critical {
309        TestOutcome::Failed
310    } else {
311        TestOutcome::Passed
312    };
313
314    StatisticalTestResult {
315        name: "ks_uniform_log".to_string(),
316        outcome,
317        statistic: max_diff,
318        threshold: critical,
319        message: format!(
320            "D={max_diff:.4} over {n} samples, critical={critical:.4} at α={significance}"
321        ),
322    }
323}
324
325/// Spearman rank correlation between two equal-length samples.
326///
327/// Returns a value in `[-1, 1]`. Used by [`run_correlation_check`]
328/// and [`run_expected_correlations`].
329pub fn spearman_rank_correlation(xs: &[f64], ys: &[f64]) -> f64 {
330    let n = xs.len().min(ys.len());
331    if n < 2 {
332        return 0.0;
333    }
334    let rank = |v: &[f64]| -> Vec<f64> {
335        let mut idx: Vec<usize> = (0..v.len()).collect();
336        idx.sort_by(|&a, &b| v[a].partial_cmp(&v[b]).unwrap_or(std::cmp::Ordering::Equal));
337        let mut ranks = vec![0.0; v.len()];
338        // Handle ties via average-rank. Walk the sorted idx in groups.
339        let mut i = 0;
340        while i < idx.len() {
341            let mut j = i;
342            while j + 1 < idx.len() && v[idx[j + 1]] == v[idx[i]] {
343                j += 1;
344            }
345            // Positions i..=j all tied; average rank is (i + j) / 2 + 1.
346            let avg = (i + j) as f64 / 2.0 + 1.0;
347            for k in i..=j {
348                ranks[idx[k]] = avg;
349            }
350            i = j + 1;
351        }
352        ranks
353    };
354    let rx = rank(&xs[..n]);
355    let ry = rank(&ys[..n]);
356    let mean_x = rx.iter().sum::<f64>() / n as f64;
357    let mean_y = ry.iter().sum::<f64>() / n as f64;
358    let mut num = 0.0;
359    let mut den_x = 0.0;
360    let mut den_y = 0.0;
361    for i in 0..n {
362        let dx = rx[i] - mean_x;
363        let dy = ry[i] - mean_y;
364        num += dx * dy;
365        den_x += dx * dx;
366        den_y += dy * dy;
367    }
368    let denom = (den_x * den_y).sqrt();
369    if denom == 0.0 {
370        0.0
371    } else {
372        num / denom
373    }
374}
375
376/// Correlation check: assert that the empirical Spearman correlation
377/// between two paired samples falls within `±tolerance` of the
378/// expected value. Used by the v3.5.1 statistical-validation phase to
379/// honour `distributions.correlations.expected_correlations`.
380pub fn run_correlation_check(
381    name: &str,
382    xs: &[f64],
383    ys: &[f64],
384    expected: f64,
385    tolerance: f64,
386) -> StatisticalTestResult {
387    if xs.len().min(ys.len()) < 100 {
388        return StatisticalTestResult {
389            name: format!("correlation_check_{name}"),
390            outcome: TestOutcome::Skipped,
391            statistic: 0.0,
392            threshold: tolerance,
393            message: format!("only {} paired samples; need ≥100", xs.len().min(ys.len())),
394        };
395    }
396    let rho = spearman_rank_correlation(xs, ys);
397    let diff = (rho - expected).abs();
398    let outcome = if diff > tolerance {
399        TestOutcome::Failed
400    } else {
401        TestOutcome::Passed
402    };
403    StatisticalTestResult {
404        name: format!("correlation_check_{name}"),
405        outcome,
406        statistic: rho,
407        threshold: tolerance,
408        message: format!(
409            "Spearman ρ={rho:.4} (expected {expected:.4} ±{tolerance:.4}; diff {diff:.4})"
410        ),
411    }
412}
413
414/// Anderson-Darling test for log-normality on the log-scale.
415///
416/// Applies the classical A² statistic to the standardised log-amounts
417/// and compares against the critical value at the configured
418/// significance. Log-normal fit ≡ Gaussian fit on `ln(amount)`.
419pub fn run_anderson_darling(amounts: &[Decimal], significance: f64) -> StatisticalTestResult {
420    let positives: Vec<f64> = amounts
421        .iter()
422        .filter_map(|a| a.to_f64())
423        .filter(|v| *v > 0.0)
424        .collect();
425    if positives.len() < 100 {
426        return StatisticalTestResult {
427            name: "anderson_darling".to_string(),
428            outcome: TestOutcome::Skipped,
429            statistic: 0.0,
430            threshold: 0.0,
431            message: format!("only {} positive samples; need ≥100", positives.len()),
432        };
433    }
434    let mut logs: Vec<f64> = positives.iter().map(|v| v.ln()).collect();
435    let n = logs.len() as f64;
436    let mean = logs.iter().sum::<f64>() / n;
437    let var = logs.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / (n - 1.0);
438    let sd = var.sqrt();
439    if sd == 0.0 {
440        return StatisticalTestResult {
441            name: "anderson_darling".to_string(),
442            outcome: TestOutcome::Skipped,
443            statistic: 0.0,
444            threshold: 0.0,
445            message: "zero log-variance (degenerate input)".to_string(),
446        };
447    }
448    logs.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
449    // Standardise + CDF-map via standard normal.
450    let standard_normal_cdf = |x: f64| -> f64 { 0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2)) };
451    let mut s = 0.0;
452    for (i, v) in logs.iter().enumerate() {
453        let z = (v - mean) / sd;
454        let p = standard_normal_cdf(z).clamp(1e-12, 1.0 - 1e-12);
455        let q = 1.0
456            - standard_normal_cdf((logs[logs.len() - 1 - i] - mean) / sd).clamp(1e-12, 1.0 - 1e-12);
457        s += (2.0 * (i + 1) as f64 - 1.0) * (p.ln() + q.ln());
458    }
459    let a_sq = -n - s / n;
460    // Case-corrected A² for mean+sd both estimated: A*² = A²(1 + 0.75/n + 2.25/n²).
461    let a_sq_star = a_sq * (1.0 + 0.75 / n + 2.25 / n.powi(2));
462    // Critical values for mean+sd-estimated A* at common α (D'Agostino & Stephens).
463    let critical = if significance <= 0.011 {
464        1.035
465    } else if significance <= 0.026 {
466        0.873
467    } else if significance <= 0.051 {
468        0.752
469    } else if significance <= 0.101 {
470        0.631
471    } else {
472        0.500
473    };
474    let outcome = if a_sq_star > critical {
475        TestOutcome::Failed
476    } else {
477        TestOutcome::Passed
478    };
479    StatisticalTestResult {
480        name: "anderson_darling".to_string(),
481        outcome,
482        statistic: a_sq_star,
483        threshold: critical,
484        message: format!(
485            "A*²={a_sq_star:.4} vs log-normal, critical={critical:.4} at α={significance} (n={n})"
486        ),
487    }
488}
489
490/// Error function approximation for the standard normal CDF.
491/// Accurate to ~1e-7 (Abramowitz & Stegun 7.1.26).
492fn erf(x: f64) -> f64 {
493    let sign = if x < 0.0 { -1.0 } else { 1.0 };
494    let x = x.abs();
495    let t = 1.0 / (1.0 + 0.3275911 * x);
496    let y = 1.0
497        - (((((1.061405429 * t - 1.453152027) * t) + 1.421413741) * t - 0.284496736) * t
498            + 0.254829592)
499            * t
500            * (-x * x).exp();
501    sign * y
502}
503
504/// Chi-squared critical values for common (df, α) combinations.
505/// Returns a generous upper ceiling for rarely-used df values so the
506/// test defaults to passing in ambiguous cases.
507fn chi_sq_critical(df: usize, alpha: f64) -> f64 {
508    // Rows: (df, α=0.10, α=0.05, α=0.01)
509    let table: &[(usize, f64, f64, f64)] = &[
510        (1, 2.706, 3.841, 6.635),
511        (2, 4.605, 5.991, 9.210),
512        (3, 6.251, 7.815, 11.345),
513        (4, 7.779, 9.488, 13.277),
514        (5, 9.236, 11.070, 15.086),
515        (6, 10.645, 12.592, 16.812),
516        (7, 12.017, 14.067, 18.475),
517        (8, 13.362, 15.507, 20.090),
518        (9, 14.684, 16.919, 21.666),
519        (10, 15.987, 18.307, 23.209),
520        (14, 21.064, 23.685, 29.141),
521        (19, 27.204, 30.144, 36.191),
522        (24, 33.196, 36.415, 42.980),
523        (29, 39.087, 42.557, 49.588),
524    ];
525
526    let row = table
527        .iter()
528        .min_by_key(|(d, _, _, _)| (*d as i64 - df as i64).unsigned_abs());
529    if let Some(&(_, c_10, c_05, c_01)) = row {
530        if alpha <= 0.011 {
531            c_01
532        } else if alpha <= 0.051 {
533            c_05
534        } else {
535            c_10
536        }
537    } else {
538        // Very generous fallback — don't fail tests on exotic df values.
539        1_000_000.0
540    }
541}
542
543#[cfg(test)]
544#[allow(clippy::unwrap_used)]
545mod tests {
546    use super::*;
547    use rand::SeedableRng;
548    use rand_chacha::ChaCha8Rng;
549    use rand_distr::{Distribution, LogNormal};
550
551    fn lognormal_samples(n: usize, mu: f64, sigma: f64, seed: u64) -> Vec<Decimal> {
552        let mut rng = ChaCha8Rng::seed_from_u64(seed);
553        let ln = LogNormal::new(mu, sigma).unwrap();
554        (0..n)
555            .map(|_| Decimal::from_f64_retain(ln.sample(&mut rng)).unwrap_or(Decimal::ONE))
556            .collect()
557    }
558
559    #[test]
560    fn benford_passes_for_lognormal() {
561        let samples = lognormal_samples(2000, 7.0, 2.0, 42);
562        let r = run_benford_first_digit(&samples, 0.015, 0.010);
563        assert!(
564            !matches!(r.outcome, TestOutcome::Failed),
565            "expected pass/warning, got {:?}: {}",
566            r.outcome,
567            r.message
568        );
569    }
570
571    #[test]
572    fn benford_fails_for_concentrated_single_digit() {
573        // All values start with 5 — catastrophic Benford violation.
574        let samples: Vec<Decimal> = (0..500).map(|i| Decimal::from(5000 + i)).collect();
575        let r = run_benford_first_digit(&samples, 0.015, 0.010);
576        assert!(matches!(r.outcome, TestOutcome::Failed));
577    }
578
579    #[test]
580    fn benford_skipped_below_100_samples() {
581        let samples: Vec<Decimal> = (0..50).map(Decimal::from).collect();
582        let r = run_benford_first_digit(&samples, 0.015, 0.010);
583        assert!(matches!(r.outcome, TestOutcome::Skipped));
584    }
585
586    #[test]
587    fn chi_squared_passes_for_log_uniform() {
588        // chi_squared tests uniformity on log scale. Feed it data that is
589        // uniform-on-log (i.e. log-uniform) to get the expected pass.
590        // A log-normal would — correctly — fail uniformity.
591        let samples: Vec<Decimal> = (0..1000)
592            .map(|i| {
593                // Evenly-spaced log values → exactly uniform on log scale.
594                let log_val = (i as f64 / 1000.0) * 10.0;
595                let v = log_val.exp();
596                Decimal::from_f64_retain(v).unwrap_or(Decimal::ONE)
597            })
598            .collect();
599        let r = run_chi_squared(&samples, 10, 0.05);
600        assert!(
601            !matches!(r.outcome, TestOutcome::Failed),
602            "expected pass, got {:?}: {}",
603            r.outcome,
604            r.message
605        );
606    }
607
608    #[test]
609    fn chi_squared_fails_for_bimodal_concentration() {
610        // Bimodal: 450 small values, 50 huge values. Every mid bin empty.
611        // Chi-squared against a uniform expectation will fail hard.
612        let mut samples: Vec<Decimal> = (0..450).map(|_| Decimal::from(1000)).collect();
613        samples.extend((0..50).map(|_| Decimal::from(1_000_000)));
614        let r = run_chi_squared(&samples, 10, 0.05);
615        assert!(
616            matches!(r.outcome, TestOutcome::Failed),
617            "expected Failed for bimodal, got {:?}: {}",
618            r.outcome,
619            r.message
620        );
621    }
622
623    #[test]
624    fn spearman_rho_perfect_positive() {
625        let xs: Vec<f64> = (1..=100).map(|i| i as f64).collect();
626        let ys: Vec<f64> = (1..=100).map(|i| i as f64).collect();
627        let rho = spearman_rank_correlation(&xs, &ys);
628        assert!((rho - 1.0).abs() < 1e-6, "expected ρ=1.0, got {rho}");
629    }
630
631    #[test]
632    fn spearman_rho_perfect_negative() {
633        let xs: Vec<f64> = (1..=100).map(|i| i as f64).collect();
634        let ys: Vec<f64> = (1..=100).rev().map(|i| i as f64).collect();
635        let rho = spearman_rank_correlation(&xs, &ys);
636        assert!((rho + 1.0).abs() < 1e-6, "expected ρ=-1.0, got {rho}");
637    }
638
639    #[test]
640    fn correlation_check_passes_when_within_tolerance() {
641        let xs: Vec<f64> = (1..=200).map(|i| i as f64).collect();
642        let ys: Vec<f64> = xs.iter().map(|v| v + 0.5).collect();
643        let r = run_correlation_check("test", &xs, &ys, 1.0, 0.05);
644        assert!(matches!(r.outcome, TestOutcome::Passed));
645    }
646
647    #[test]
648    fn correlation_check_fails_when_off_target() {
649        let xs: Vec<f64> = (1..=200).map(|i| i as f64).collect();
650        let ys: Vec<f64> = xs.iter().rev().copied().collect();
651        let r = run_correlation_check("test", &xs, &ys, 1.0, 0.05);
652        assert!(matches!(r.outcome, TestOutcome::Failed));
653    }
654
655    #[test]
656    fn anderson_darling_passes_for_lognormal() {
657        let samples = lognormal_samples(2000, 7.0, 1.5, 42);
658        let r = run_anderson_darling(&samples, 0.05);
659        assert!(
660            !matches!(r.outcome, TestOutcome::Failed),
661            "expected pass/warning for log-normal, got {:?}: {}",
662            r.outcome,
663            r.message
664        );
665    }
666
667    #[test]
668    fn anderson_darling_fails_for_uniform() {
669        // Uniform data is very unlike log-normal.
670        let samples: Vec<Decimal> = (0..2000)
671            .map(|i| Decimal::from(1000 + (i % 500) * 20))
672            .collect();
673        let r = run_anderson_darling(&samples, 0.05);
674        assert!(
675            matches!(r.outcome, TestOutcome::Failed),
676            "expected fail for uniform-like data, got {:?}: {}",
677            r.outcome,
678            r.message
679        );
680    }
681
682    #[test]
683    fn report_all_passed_tracks_failures() {
684        let rep = StatisticalValidationReport {
685            sample_count: 100,
686            results: vec![
687                StatisticalTestResult {
688                    name: "a".into(),
689                    outcome: TestOutcome::Passed,
690                    statistic: 0.0,
691                    threshold: 1.0,
692                    message: "".into(),
693                },
694                StatisticalTestResult {
695                    name: "b".into(),
696                    outcome: TestOutcome::Warning,
697                    statistic: 0.0,
698                    threshold: 1.0,
699                    message: "".into(),
700                },
701            ],
702        };
703        assert!(rep.all_passed()); // warnings don't count
704        assert!(rep.has_warnings());
705
706        let rep_failed = StatisticalValidationReport {
707            sample_count: 100,
708            results: vec![StatisticalTestResult {
709                name: "c".into(),
710                outcome: TestOutcome::Failed,
711                statistic: 2.0,
712                threshold: 1.0,
713                message: "".into(),
714            }],
715        };
716        assert!(!rep_failed.all_passed());
717        assert_eq!(rep_failed.failed_names(), vec!["c".to_string()]);
718    }
719}