datasynth_eval/statistical/
mod.rs

1//! Statistical quality evaluation module.
2//!
3//! Provides statistical tests and analyses for validating that generated
4//! synthetic data follows expected distributions.
5//!
6//! # Modules
7//!
8//! - **amount_distribution**: Log-normal amount distribution analysis
9//! - **benford**: Benford's Law compliance testing
10//! - **line_item**: Line item distribution analysis
11//! - **temporal**: Temporal pattern analysis
12//! - **correlation**: Cross-field correlation analysis
13//! - **anderson_darling**: Anderson-Darling goodness-of-fit test
14//! - **chi_squared**: Chi-squared goodness-of-fit test
15//! - **drift_detection**: Drift detection evaluation and ground truth validation
16
17mod amount_distribution;
18mod anderson_darling;
19mod anomaly_realism;
20mod benford;
21mod chi_squared;
22mod correlation;
23mod drift_detection;
24mod line_item;
25mod relational_fidelity;
26mod temporal;
27
28pub use amount_distribution::{AmountDistributionAnalysis, AmountDistributionAnalyzer};
29pub use anderson_darling::{
30    AndersonDarlingAnalysis, AndersonDarlingAnalyzer, CriticalValues, FittedParameters,
31    TargetDistribution,
32};
33pub use benford::{BenfordAnalysis, BenfordAnalyzer, BenfordConformity, SecondDigitAnalysis};
34pub use chi_squared::{
35    BinFrequency, BinningStrategy, ChiSquaredAnalysis, ChiSquaredAnalyzer, ExpectedDistribution,
36};
37pub use correlation::{
38    pearson_correlation, spearman_correlation, CorrelationAnalysis, CorrelationAnalyzer,
39    CorrelationCheckResult, ExpectedCorrelation,
40};
41pub use drift_detection::{
42    DetectionDifficulty, DriftDetectionAnalysis, DriftDetectionAnalyzer, DriftDetectionEntry,
43    DriftDetectionMetrics, DriftEventCategory, LabeledDriftEvent, LabeledEventAnalysis,
44};
45pub use line_item::{LineItemAnalysis, LineItemAnalyzer, LineItemEntry};
46pub use relational_fidelity::{
47    flow_edges_from_entries, FlowEdge, RelationalFidelityAnalyzer, RelationalFidelityReport,
48    RelationalFidelityThresholds,
49};
50pub use temporal::{TemporalAnalysis, TemporalAnalyzer, TemporalEntry};
51
52pub use anomaly_realism::{
53    AnomalyData, AnomalyRealismEvaluation, AnomalyRealismEvaluator, AnomalyRealismThresholds,
54};
55
56use serde::{Deserialize, Serialize};
57
58/// Combined statistical evaluation results.
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct StatisticalEvaluation {
61    /// Benford's Law analysis results.
62    pub benford: Option<BenfordAnalysis>,
63    /// Amount distribution analysis results.
64    pub amount_distribution: Option<AmountDistributionAnalysis>,
65    /// Line item distribution analysis results.
66    pub line_item: Option<LineItemAnalysis>,
67    /// Temporal pattern analysis results.
68    pub temporal: Option<TemporalAnalysis>,
69    /// Correlation analysis results.
70    pub correlation: Option<CorrelationAnalysis>,
71    /// Anderson-Darling goodness-of-fit test results.
72    pub anderson_darling: Option<AndersonDarlingAnalysis>,
73    /// Chi-squared goodness-of-fit test results.
74    pub chi_squared: Option<ChiSquaredAnalysis>,
75    /// Drift detection analysis results.
76    pub drift_detection: Option<DriftDetectionAnalysis>,
77    /// Labeled drift event analysis results.
78    pub drift_events: Option<LabeledEventAnalysis>,
79    /// Anomaly injection realism analysis results.
80    #[serde(default, skip_serializing_if = "Option::is_none")]
81    pub anomaly_realism: Option<AnomalyRealismEvaluation>,
82    /// Overall pass/fail status.
83    pub passes: bool,
84    /// Summary of failed checks.
85    pub failures: Vec<String>,
86    /// Summary of issues (alias for failures).
87    pub issues: Vec<String>,
88    /// Overall statistical quality score (0.0-1.0).
89    pub overall_score: f64,
90}
91
92impl StatisticalEvaluation {
93    /// Create a new empty evaluation.
94    pub fn new() -> Self {
95        Self {
96            benford: None,
97            amount_distribution: None,
98            line_item: None,
99            temporal: None,
100            correlation: None,
101            anderson_darling: None,
102            chi_squared: None,
103            drift_detection: None,
104            drift_events: None,
105            anomaly_realism: None,
106            passes: true,
107            failures: Vec::new(),
108            issues: Vec::new(),
109            overall_score: 1.0,
110        }
111    }
112
113    /// Check all results against thresholds and update pass status.
114    pub fn check_thresholds(&mut self, thresholds: &crate::config::EvaluationThresholds) {
115        self.failures.clear();
116        self.issues.clear();
117        let mut scores = Vec::new();
118
119        if let Some(ref benford) = self.benford {
120            if benford.p_value < thresholds.benford_p_value_min {
121                self.failures.push(format!(
122                    "Benford p-value {} < {} (threshold)",
123                    benford.p_value, thresholds.benford_p_value_min
124                ));
125            }
126            if benford.mad > thresholds.benford_mad_max {
127                self.failures.push(format!(
128                    "Benford MAD {} > {} (threshold)",
129                    benford.mad, thresholds.benford_mad_max
130                ));
131            }
132            // Benford score: higher p-value and lower MAD are better
133            let p_score = (benford.p_value / 0.5).min(1.0);
134            let mad_score = 1.0 - (benford.mad / 0.05).min(1.0);
135            scores.push((p_score + mad_score) / 2.0);
136        }
137
138        if let Some(ref amount) = self.amount_distribution {
139            if let Some(p_value) = amount.lognormal_ks_pvalue {
140                if p_value < thresholds.amount_ks_p_value_min {
141                    self.failures.push(format!(
142                        "Amount KS p-value {} < {} (threshold)",
143                        p_value, thresholds.amount_ks_p_value_min
144                    ));
145                }
146                scores.push((p_value / 0.5).min(1.0));
147            }
148        }
149
150        if let Some(ref temporal) = self.temporal {
151            if temporal.pattern_correlation < thresholds.temporal_correlation_min {
152                self.failures.push(format!(
153                    "Temporal correlation {} < {} (threshold)",
154                    temporal.pattern_correlation, thresholds.temporal_correlation_min
155                ));
156            }
157            scores.push(temporal.pattern_correlation);
158        }
159
160        // Check correlation analysis
161        if let Some(ref correlation) = self.correlation {
162            if !correlation.passes {
163                for issue in &correlation.issues {
164                    self.failures.push(format!("Correlation: {issue}"));
165                }
166            }
167            // Score based on pass rate
168            let total_checks = correlation.checks_passed + correlation.checks_failed;
169            if total_checks > 0 {
170                scores.push(correlation.checks_passed as f64 / total_checks as f64);
171            }
172        }
173
174        // Check Anderson-Darling test
175        if let Some(ref ad) = self.anderson_darling {
176            if !ad.passes {
177                for issue in &ad.issues {
178                    self.failures.push(format!("Anderson-Darling: {issue}"));
179                }
180            }
181            // Score based on p-value (higher is better for goodness-of-fit)
182            scores.push((ad.p_value / 0.5).min(1.0));
183        }
184
185        // Check Chi-squared test
186        if let Some(ref chi_sq) = self.chi_squared {
187            if !chi_sq.passes {
188                for issue in &chi_sq.issues {
189                    self.failures.push(format!("Chi-squared: {issue}"));
190                }
191            }
192            // Score based on p-value (higher is better for goodness-of-fit)
193            scores.push((chi_sq.p_value / 0.5).min(1.0));
194        }
195
196        // Check drift detection
197        if let Some(ref drift) = self.drift_detection {
198            if !drift.passes {
199                for issue in &drift.issues {
200                    self.failures.push(format!("Drift detection: {issue}"));
201                }
202            }
203            // Score based on F1 score if drift was significant
204            if drift.drift_magnitude >= thresholds.drift_magnitude_min {
205                scores.push(drift.detection_metrics.f1_score);
206            }
207            // Check Hellinger distance threshold
208            if let Some(hellinger) = drift.hellinger_distance {
209                if hellinger > thresholds.drift_hellinger_max {
210                    self.failures.push(format!(
211                        "Drift Hellinger distance {} > {} (threshold)",
212                        hellinger, thresholds.drift_hellinger_max
213                    ));
214                }
215            }
216            // Check PSI threshold
217            if let Some(psi) = drift.psi {
218                if psi > thresholds.drift_psi_max {
219                    self.failures.push(format!(
220                        "Drift PSI {} > {} (threshold)",
221                        psi, thresholds.drift_psi_max
222                    ));
223                }
224            }
225        }
226
227        // Check labeled drift events
228        if let Some(ref events) = self.drift_events {
229            if !events.passes {
230                for issue in &events.issues {
231                    self.failures.push(format!("Drift events: {issue}"));
232                }
233            }
234            // Score based on event coverage
235            if events.total_events > 0 {
236                let difficulty_score = 1.0 - events.avg_difficulty;
237                scores.push(difficulty_score);
238            }
239        }
240
241        // Check anomaly realism
242        if let Some(ref anomaly_realism) = self.anomaly_realism {
243            if !anomaly_realism.passes {
244                for issue in &anomaly_realism.issues {
245                    self.failures.push(format!("Anomaly realism: {issue}"));
246                }
247            }
248            // Score based on detectability
249            scores.push(anomaly_realism.statistical_detectability);
250        }
251
252        // Sync issues with failures
253        self.issues = self.failures.clone();
254        self.passes = self.failures.is_empty();
255
256        // Calculate overall score
257        self.overall_score = if scores.is_empty() {
258            1.0
259        } else {
260            scores.iter().sum::<f64>() / scores.len() as f64
261        };
262    }
263}
264
265impl Default for StatisticalEvaluation {
266    fn default() -> Self {
267        Self::new()
268    }
269}
datasynth_eval/statistical/mod.rs

datasynth_eval/statistical/
mod.rs