datasynth_eval/statistical/
mod.rs

1//! Statistical quality evaluation module.
2//!
3//! Provides statistical tests and analyses for validating that generated
4//! synthetic data follows expected distributions.
5//!
6//! # Modules
7//!
8//! - **amount_distribution**: Log-normal amount distribution analysis
9//! - **benford**: Benford's Law compliance testing
10//! - **line_item**: Line item distribution analysis
11//! - **temporal**: Temporal pattern analysis
12//! - **correlation**: Cross-field correlation analysis
13//! - **anderson_darling**: Anderson-Darling goodness-of-fit test
14//! - **chi_squared**: Chi-squared goodness-of-fit test
15//! - **drift_detection**: Drift detection evaluation and ground truth validation
16
17mod amount_distribution;
18mod anderson_darling;
19mod anomaly_realism;
20mod benford;
21mod chi_squared;
22mod correlation;
23mod drift_detection;
24mod line_item;
25mod temporal;
26
27pub use amount_distribution::{AmountDistributionAnalysis, AmountDistributionAnalyzer};
28pub use anderson_darling::{
29    AndersonDarlingAnalysis, AndersonDarlingAnalyzer, CriticalValues, FittedParameters,
30    TargetDistribution,
31};
32pub use benford::{BenfordAnalysis, BenfordAnalyzer, BenfordConformity};
33pub use chi_squared::{
34    BinFrequency, BinningStrategy, ChiSquaredAnalysis, ChiSquaredAnalyzer, ExpectedDistribution,
35};
36pub use correlation::{
37    pearson_correlation, spearman_correlation, CorrelationAnalysis, CorrelationAnalyzer,
38    CorrelationCheckResult, ExpectedCorrelation,
39};
40pub use drift_detection::{
41    DetectionDifficulty, DriftDetectionAnalysis, DriftDetectionAnalyzer, DriftDetectionEntry,
42    DriftDetectionMetrics, DriftEventCategory, LabeledDriftEvent, LabeledEventAnalysis,
43};
44pub use line_item::{LineItemAnalysis, LineItemAnalyzer, LineItemEntry};
45pub use temporal::{TemporalAnalysis, TemporalAnalyzer, TemporalEntry};
46
47pub use anomaly_realism::{
48    AnomalyData, AnomalyRealismEvaluation, AnomalyRealismEvaluator, AnomalyRealismThresholds,
49};
50
51use serde::{Deserialize, Serialize};
52
53/// Combined statistical evaluation results.
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct StatisticalEvaluation {
56    /// Benford's Law analysis results.
57    pub benford: Option<BenfordAnalysis>,
58    /// Amount distribution analysis results.
59    pub amount_distribution: Option<AmountDistributionAnalysis>,
60    /// Line item distribution analysis results.
61    pub line_item: Option<LineItemAnalysis>,
62    /// Temporal pattern analysis results.
63    pub temporal: Option<TemporalAnalysis>,
64    /// Correlation analysis results.
65    pub correlation: Option<CorrelationAnalysis>,
66    /// Anderson-Darling goodness-of-fit test results.
67    pub anderson_darling: Option<AndersonDarlingAnalysis>,
68    /// Chi-squared goodness-of-fit test results.
69    pub chi_squared: Option<ChiSquaredAnalysis>,
70    /// Drift detection analysis results.
71    pub drift_detection: Option<DriftDetectionAnalysis>,
72    /// Labeled drift event analysis results.
73    pub drift_events: Option<LabeledEventAnalysis>,
74    /// Anomaly injection realism analysis results.
75    #[serde(default, skip_serializing_if = "Option::is_none")]
76    pub anomaly_realism: Option<AnomalyRealismEvaluation>,
77    /// Overall pass/fail status.
78    pub passes: bool,
79    /// Summary of failed checks.
80    pub failures: Vec<String>,
81    /// Summary of issues (alias for failures).
82    pub issues: Vec<String>,
83    /// Overall statistical quality score (0.0-1.0).
84    pub overall_score: f64,
85}
86
87impl StatisticalEvaluation {
88    /// Create a new empty evaluation.
89    pub fn new() -> Self {
90        Self {
91            benford: None,
92            amount_distribution: None,
93            line_item: None,
94            temporal: None,
95            correlation: None,
96            anderson_darling: None,
97            chi_squared: None,
98            drift_detection: None,
99            drift_events: None,
100            anomaly_realism: None,
101            passes: true,
102            failures: Vec::new(),
103            issues: Vec::new(),
104            overall_score: 1.0,
105        }
106    }
107
108    /// Check all results against thresholds and update pass status.
109    pub fn check_thresholds(&mut self, thresholds: &crate::config::EvaluationThresholds) {
110        self.failures.clear();
111        self.issues.clear();
112        let mut scores = Vec::new();
113
114        if let Some(ref benford) = self.benford {
115            if benford.p_value < thresholds.benford_p_value_min {
116                self.failures.push(format!(
117                    "Benford p-value {} < {} (threshold)",
118                    benford.p_value, thresholds.benford_p_value_min
119                ));
120            }
121            if benford.mad > thresholds.benford_mad_max {
122                self.failures.push(format!(
123                    "Benford MAD {} > {} (threshold)",
124                    benford.mad, thresholds.benford_mad_max
125                ));
126            }
127            // Benford score: higher p-value and lower MAD are better
128            let p_score = (benford.p_value / 0.5).min(1.0);
129            let mad_score = 1.0 - (benford.mad / 0.05).min(1.0);
130            scores.push((p_score + mad_score) / 2.0);
131        }
132
133        if let Some(ref amount) = self.amount_distribution {
134            if let Some(p_value) = amount.lognormal_ks_pvalue {
135                if p_value < thresholds.amount_ks_p_value_min {
136                    self.failures.push(format!(
137                        "Amount KS p-value {} < {} (threshold)",
138                        p_value, thresholds.amount_ks_p_value_min
139                    ));
140                }
141                scores.push((p_value / 0.5).min(1.0));
142            }
143        }
144
145        if let Some(ref temporal) = self.temporal {
146            if temporal.pattern_correlation < thresholds.temporal_correlation_min {
147                self.failures.push(format!(
148                    "Temporal correlation {} < {} (threshold)",
149                    temporal.pattern_correlation, thresholds.temporal_correlation_min
150                ));
151            }
152            scores.push(temporal.pattern_correlation);
153        }
154
155        // Check correlation analysis
156        if let Some(ref correlation) = self.correlation {
157            if !correlation.passes {
158                for issue in &correlation.issues {
159                    self.failures.push(format!("Correlation: {}", issue));
160                }
161            }
162            // Score based on pass rate
163            let total_checks = correlation.checks_passed + correlation.checks_failed;
164            if total_checks > 0 {
165                scores.push(correlation.checks_passed as f64 / total_checks as f64);
166            }
167        }
168
169        // Check Anderson-Darling test
170        if let Some(ref ad) = self.anderson_darling {
171            if !ad.passes {
172                for issue in &ad.issues {
173                    self.failures.push(format!("Anderson-Darling: {}", issue));
174                }
175            }
176            // Score based on p-value (higher is better for goodness-of-fit)
177            scores.push((ad.p_value / 0.5).min(1.0));
178        }
179
180        // Check Chi-squared test
181        if let Some(ref chi_sq) = self.chi_squared {
182            if !chi_sq.passes {
183                for issue in &chi_sq.issues {
184                    self.failures.push(format!("Chi-squared: {}", issue));
185                }
186            }
187            // Score based on p-value (higher is better for goodness-of-fit)
188            scores.push((chi_sq.p_value / 0.5).min(1.0));
189        }
190
191        // Check drift detection
192        if let Some(ref drift) = self.drift_detection {
193            if !drift.passes {
194                for issue in &drift.issues {
195                    self.failures.push(format!("Drift detection: {}", issue));
196                }
197            }
198            // Score based on F1 score if drift was significant
199            if drift.drift_magnitude >= thresholds.drift_magnitude_min {
200                scores.push(drift.detection_metrics.f1_score);
201            }
202            // Check Hellinger distance threshold
203            if let Some(hellinger) = drift.hellinger_distance {
204                if hellinger > thresholds.drift_hellinger_max {
205                    self.failures.push(format!(
206                        "Drift Hellinger distance {} > {} (threshold)",
207                        hellinger, thresholds.drift_hellinger_max
208                    ));
209                }
210            }
211            // Check PSI threshold
212            if let Some(psi) = drift.psi {
213                if psi > thresholds.drift_psi_max {
214                    self.failures.push(format!(
215                        "Drift PSI {} > {} (threshold)",
216                        psi, thresholds.drift_psi_max
217                    ));
218                }
219            }
220        }
221
222        // Check labeled drift events
223        if let Some(ref events) = self.drift_events {
224            if !events.passes {
225                for issue in &events.issues {
226                    self.failures.push(format!("Drift events: {}", issue));
227                }
228            }
229            // Score based on event coverage
230            if events.total_events > 0 {
231                let difficulty_score = 1.0 - events.avg_difficulty;
232                scores.push(difficulty_score);
233            }
234        }
235
236        // Check anomaly realism
237        if let Some(ref anomaly_realism) = self.anomaly_realism {
238            if !anomaly_realism.passes {
239                for issue in &anomaly_realism.issues {
240                    self.failures.push(format!("Anomaly realism: {}", issue));
241                }
242            }
243            // Score based on detectability
244            scores.push(anomaly_realism.statistical_detectability);
245        }
246
247        // Sync issues with failures
248        self.issues = self.failures.clone();
249        self.passes = self.failures.is_empty();
250
251        // Calculate overall score
252        self.overall_score = if scores.is_empty() {
253            1.0
254        } else {
255            scores.iter().sum::<f64>() / scores.len() as f64
256        };
257    }
258}
259
260impl Default for StatisticalEvaluation {
261    fn default() -> Self {
262        Self::new()
263    }
264}
datasynth_eval/statistical/mod.rs

datasynth_eval/statistical/
mod.rs