datasynth_eval/statistical/
mod.rs

1//! Statistical quality evaluation module.
2//!
3//! Provides statistical tests and analyses for validating that generated
4//! synthetic data follows expected distributions.
5//!
6//! # Modules
7//!
8//! - **amount_distribution**: Log-normal amount distribution analysis
9//! - **benford**: Benford's Law compliance testing
10//! - **line_item**: Line item distribution analysis
11//! - **temporal**: Temporal pattern analysis
12//! - **correlation**: Cross-field correlation analysis
13//! - **anderson_darling**: Anderson-Darling goodness-of-fit test
14//! - **chi_squared**: Chi-squared goodness-of-fit test
15//! - **drift_detection**: Drift detection evaluation and ground truth validation
16
17mod amount_distribution;
18mod anderson_darling;
19mod benford;
20mod chi_squared;
21mod correlation;
22mod drift_detection;
23mod line_item;
24mod temporal;
25
26pub use amount_distribution::{AmountDistributionAnalysis, AmountDistributionAnalyzer};
27pub use anderson_darling::{
28    AndersonDarlingAnalysis, AndersonDarlingAnalyzer, CriticalValues, FittedParameters,
29    TargetDistribution,
30};
31pub use benford::{BenfordAnalysis, BenfordAnalyzer, BenfordConformity};
32pub use chi_squared::{
33    BinFrequency, BinningStrategy, ChiSquaredAnalysis, ChiSquaredAnalyzer, ExpectedDistribution,
34};
35pub use correlation::{
36    pearson_correlation, spearman_correlation, CorrelationAnalysis, CorrelationAnalyzer,
37    CorrelationCheckResult, ExpectedCorrelation,
38};
39pub use drift_detection::{
40    DetectionDifficulty, DriftDetectionAnalysis, DriftDetectionAnalyzer, DriftDetectionEntry,
41    DriftDetectionMetrics, DriftEventCategory, LabeledDriftEvent, LabeledEventAnalysis,
42};
43pub use line_item::{LineItemAnalysis, LineItemAnalyzer, LineItemEntry};
44pub use temporal::{TemporalAnalysis, TemporalAnalyzer, TemporalEntry};
45
46use serde::{Deserialize, Serialize};
47
48/// Combined statistical evaluation results.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct StatisticalEvaluation {
51    /// Benford's Law analysis results.
52    pub benford: Option<BenfordAnalysis>,
53    /// Amount distribution analysis results.
54    pub amount_distribution: Option<AmountDistributionAnalysis>,
55    /// Line item distribution analysis results.
56    pub line_item: Option<LineItemAnalysis>,
57    /// Temporal pattern analysis results.
58    pub temporal: Option<TemporalAnalysis>,
59    /// Correlation analysis results.
60    pub correlation: Option<CorrelationAnalysis>,
61    /// Anderson-Darling goodness-of-fit test results.
62    pub anderson_darling: Option<AndersonDarlingAnalysis>,
63    /// Chi-squared goodness-of-fit test results.
64    pub chi_squared: Option<ChiSquaredAnalysis>,
65    /// Drift detection analysis results.
66    pub drift_detection: Option<DriftDetectionAnalysis>,
67    /// Labeled drift event analysis results.
68    pub drift_events: Option<LabeledEventAnalysis>,
69    /// Overall pass/fail status.
70    pub passes: bool,
71    /// Summary of failed checks.
72    pub failures: Vec<String>,
73    /// Summary of issues (alias for failures).
74    pub issues: Vec<String>,
75    /// Overall statistical quality score (0.0-1.0).
76    pub overall_score: f64,
77}
78
79impl StatisticalEvaluation {
80    /// Create a new empty evaluation.
81    pub fn new() -> Self {
82        Self {
83            benford: None,
84            amount_distribution: None,
85            line_item: None,
86            temporal: None,
87            correlation: None,
88            anderson_darling: None,
89            chi_squared: None,
90            drift_detection: None,
91            drift_events: None,
92            passes: true,
93            failures: Vec::new(),
94            issues: Vec::new(),
95            overall_score: 1.0,
96        }
97    }
98
99    /// Check all results against thresholds and update pass status.
100    pub fn check_thresholds(&mut self, thresholds: &crate::config::EvaluationThresholds) {
101        self.failures.clear();
102        self.issues.clear();
103        let mut scores = Vec::new();
104
105        if let Some(ref benford) = self.benford {
106            if benford.p_value < thresholds.benford_p_value_min {
107                self.failures.push(format!(
108                    "Benford p-value {} < {} (threshold)",
109                    benford.p_value, thresholds.benford_p_value_min
110                ));
111            }
112            if benford.mad > thresholds.benford_mad_max {
113                self.failures.push(format!(
114                    "Benford MAD {} > {} (threshold)",
115                    benford.mad, thresholds.benford_mad_max
116                ));
117            }
118            // Benford score: higher p-value and lower MAD are better
119            let p_score = (benford.p_value / 0.5).min(1.0);
120            let mad_score = 1.0 - (benford.mad / 0.05).min(1.0);
121            scores.push((p_score + mad_score) / 2.0);
122        }
123
124        if let Some(ref amount) = self.amount_distribution {
125            if let Some(p_value) = amount.lognormal_ks_pvalue {
126                if p_value < thresholds.amount_ks_p_value_min {
127                    self.failures.push(format!(
128                        "Amount KS p-value {} < {} (threshold)",
129                        p_value, thresholds.amount_ks_p_value_min
130                    ));
131                }
132                scores.push((p_value / 0.5).min(1.0));
133            }
134        }
135
136        if let Some(ref temporal) = self.temporal {
137            if temporal.pattern_correlation < thresholds.temporal_correlation_min {
138                self.failures.push(format!(
139                    "Temporal correlation {} < {} (threshold)",
140                    temporal.pattern_correlation, thresholds.temporal_correlation_min
141                ));
142            }
143            scores.push(temporal.pattern_correlation);
144        }
145
146        // Check correlation analysis
147        if let Some(ref correlation) = self.correlation {
148            if !correlation.passes {
149                for issue in &correlation.issues {
150                    self.failures.push(format!("Correlation: {}", issue));
151                }
152            }
153            // Score based on pass rate
154            let total_checks = correlation.checks_passed + correlation.checks_failed;
155            if total_checks > 0 {
156                scores.push(correlation.checks_passed as f64 / total_checks as f64);
157            }
158        }
159
160        // Check Anderson-Darling test
161        if let Some(ref ad) = self.anderson_darling {
162            if !ad.passes {
163                for issue in &ad.issues {
164                    self.failures.push(format!("Anderson-Darling: {}", issue));
165                }
166            }
167            // Score based on p-value (higher is better for goodness-of-fit)
168            scores.push((ad.p_value / 0.5).min(1.0));
169        }
170
171        // Check Chi-squared test
172        if let Some(ref chi_sq) = self.chi_squared {
173            if !chi_sq.passes {
174                for issue in &chi_sq.issues {
175                    self.failures.push(format!("Chi-squared: {}", issue));
176                }
177            }
178            // Score based on p-value (higher is better for goodness-of-fit)
179            scores.push((chi_sq.p_value / 0.5).min(1.0));
180        }
181
182        // Check drift detection
183        if let Some(ref drift) = self.drift_detection {
184            if !drift.passes {
185                for issue in &drift.issues {
186                    self.failures.push(format!("Drift detection: {}", issue));
187                }
188            }
189            // Score based on F1 score if drift was significant
190            if drift.drift_magnitude >= thresholds.drift_magnitude_min {
191                scores.push(drift.detection_metrics.f1_score);
192            }
193            // Check Hellinger distance threshold
194            if let Some(hellinger) = drift.hellinger_distance {
195                if hellinger > thresholds.drift_hellinger_max {
196                    self.failures.push(format!(
197                        "Drift Hellinger distance {} > {} (threshold)",
198                        hellinger, thresholds.drift_hellinger_max
199                    ));
200                }
201            }
202            // Check PSI threshold
203            if let Some(psi) = drift.psi {
204                if psi > thresholds.drift_psi_max {
205                    self.failures.push(format!(
206                        "Drift PSI {} > {} (threshold)",
207                        psi, thresholds.drift_psi_max
208                    ));
209                }
210            }
211        }
212
213        // Check labeled drift events
214        if let Some(ref events) = self.drift_events {
215            if !events.passes {
216                for issue in &events.issues {
217                    self.failures.push(format!("Drift events: {}", issue));
218                }
219            }
220            // Score based on event coverage
221            if events.total_events > 0 {
222                let difficulty_score = 1.0 - events.avg_difficulty;
223                scores.push(difficulty_score);
224            }
225        }
226
227        // Sync issues with failures
228        self.issues = self.failures.clone();
229        self.passes = self.failures.is_empty();
230
231        // Calculate overall score
232        self.overall_score = if scores.is_empty() {
233            1.0
234        } else {
235            scores.iter().sum::<f64>() / scores.len() as f64
236        };
237    }
238}
239
240impl Default for StatisticalEvaluation {
241    fn default() -> Self {
242        Self::new()
243    }
244}
datasynth_eval/statistical/mod.rs

datasynth_eval/statistical/
mod.rs