Skip to main content

datasynth_eval/statistical/
mod.rs

1//! Statistical quality evaluation module.
2//!
3//! Provides statistical tests and analyses for validating that generated
4//! synthetic data follows expected distributions.
5
6mod amount_distribution;
7mod benford;
8mod line_item;
9mod temporal;
10
11pub use amount_distribution::{AmountDistributionAnalysis, AmountDistributionAnalyzer};
12pub use benford::{BenfordAnalysis, BenfordAnalyzer, BenfordConformity};
13pub use line_item::{LineItemAnalysis, LineItemAnalyzer, LineItemEntry};
14pub use temporal::{TemporalAnalysis, TemporalAnalyzer, TemporalEntry};
15
16use serde::{Deserialize, Serialize};
17
18/// Combined statistical evaluation results.
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct StatisticalEvaluation {
21    /// Benford's Law analysis results.
22    pub benford: Option<BenfordAnalysis>,
23    /// Amount distribution analysis results.
24    pub amount_distribution: Option<AmountDistributionAnalysis>,
25    /// Line item distribution analysis results.
26    pub line_item: Option<LineItemAnalysis>,
27    /// Temporal pattern analysis results.
28    pub temporal: Option<TemporalAnalysis>,
29    /// Overall pass/fail status.
30    pub passes: bool,
31    /// Summary of failed checks.
32    pub failures: Vec<String>,
33    /// Summary of issues (alias for failures).
34    pub issues: Vec<String>,
35    /// Overall statistical quality score (0.0-1.0).
36    pub overall_score: f64,
37}
38
39impl StatisticalEvaluation {
40    /// Create a new empty evaluation.
41    pub fn new() -> Self {
42        Self {
43            benford: None,
44            amount_distribution: None,
45            line_item: None,
46            temporal: None,
47            passes: true,
48            failures: Vec::new(),
49            issues: Vec::new(),
50            overall_score: 1.0,
51        }
52    }
53
54    /// Check all results against thresholds and update pass status.
55    pub fn check_thresholds(&mut self, thresholds: &crate::config::EvaluationThresholds) {
56        self.failures.clear();
57        self.issues.clear();
58        let mut scores = Vec::new();
59
60        if let Some(ref benford) = self.benford {
61            if benford.p_value < thresholds.benford_p_value_min {
62                self.failures.push(format!(
63                    "Benford p-value {} < {} (threshold)",
64                    benford.p_value, thresholds.benford_p_value_min
65                ));
66            }
67            if benford.mad > thresholds.benford_mad_max {
68                self.failures.push(format!(
69                    "Benford MAD {} > {} (threshold)",
70                    benford.mad, thresholds.benford_mad_max
71                ));
72            }
73            // Benford score: higher p-value and lower MAD are better
74            let p_score = (benford.p_value / 0.5).min(1.0);
75            let mad_score = 1.0 - (benford.mad / 0.05).min(1.0);
76            scores.push((p_score + mad_score) / 2.0);
77        }
78
79        if let Some(ref amount) = self.amount_distribution {
80            if let Some(p_value) = amount.lognormal_ks_pvalue {
81                if p_value < thresholds.amount_ks_p_value_min {
82                    self.failures.push(format!(
83                        "Amount KS p-value {} < {} (threshold)",
84                        p_value, thresholds.amount_ks_p_value_min
85                    ));
86                }
87                scores.push((p_value / 0.5).min(1.0));
88            }
89        }
90
91        if let Some(ref temporal) = self.temporal {
92            if temporal.pattern_correlation < thresholds.temporal_correlation_min {
93                self.failures.push(format!(
94                    "Temporal correlation {} < {} (threshold)",
95                    temporal.pattern_correlation, thresholds.temporal_correlation_min
96                ));
97            }
98            scores.push(temporal.pattern_correlation);
99        }
100
101        // Sync issues with failures
102        self.issues = self.failures.clone();
103        self.passes = self.failures.is_empty();
104
105        // Calculate overall score
106        self.overall_score = if scores.is_empty() {
107            1.0
108        } else {
109            scores.iter().sum::<f64>() / scores.len() as f64
110        };
111    }
112}
113
114impl Default for StatisticalEvaluation {
115    fn default() -> Self {
116        Self::new()
117    }
118}