datasynth_eval/statistical/
mod.rs1mod amount_distribution;
7mod benford;
8mod line_item;
9mod temporal;
10
11pub use amount_distribution::{AmountDistributionAnalysis, AmountDistributionAnalyzer};
12pub use benford::{BenfordAnalysis, BenfordAnalyzer, BenfordConformity};
13pub use line_item::{LineItemAnalysis, LineItemAnalyzer, LineItemEntry};
14pub use temporal::{TemporalAnalysis, TemporalAnalyzer, TemporalEntry};
15
16use serde::{Deserialize, Serialize};
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct StatisticalEvaluation {
21 pub benford: Option<BenfordAnalysis>,
23 pub amount_distribution: Option<AmountDistributionAnalysis>,
25 pub line_item: Option<LineItemAnalysis>,
27 pub temporal: Option<TemporalAnalysis>,
29 pub passes: bool,
31 pub failures: Vec<String>,
33 pub issues: Vec<String>,
35 pub overall_score: f64,
37}
38
39impl StatisticalEvaluation {
40 pub fn new() -> Self {
42 Self {
43 benford: None,
44 amount_distribution: None,
45 line_item: None,
46 temporal: None,
47 passes: true,
48 failures: Vec::new(),
49 issues: Vec::new(),
50 overall_score: 1.0,
51 }
52 }
53
54 pub fn check_thresholds(&mut self, thresholds: &crate::config::EvaluationThresholds) {
56 self.failures.clear();
57 self.issues.clear();
58 let mut scores = Vec::new();
59
60 if let Some(ref benford) = self.benford {
61 if benford.p_value < thresholds.benford_p_value_min {
62 self.failures.push(format!(
63 "Benford p-value {} < {} (threshold)",
64 benford.p_value, thresholds.benford_p_value_min
65 ));
66 }
67 if benford.mad > thresholds.benford_mad_max {
68 self.failures.push(format!(
69 "Benford MAD {} > {} (threshold)",
70 benford.mad, thresholds.benford_mad_max
71 ));
72 }
73 let p_score = (benford.p_value / 0.5).min(1.0);
75 let mad_score = 1.0 - (benford.mad / 0.05).min(1.0);
76 scores.push((p_score + mad_score) / 2.0);
77 }
78
79 if let Some(ref amount) = self.amount_distribution {
80 if let Some(p_value) = amount.lognormal_ks_pvalue {
81 if p_value < thresholds.amount_ks_p_value_min {
82 self.failures.push(format!(
83 "Amount KS p-value {} < {} (threshold)",
84 p_value, thresholds.amount_ks_p_value_min
85 ));
86 }
87 scores.push((p_value / 0.5).min(1.0));
88 }
89 }
90
91 if let Some(ref temporal) = self.temporal {
92 if temporal.pattern_correlation < thresholds.temporal_correlation_min {
93 self.failures.push(format!(
94 "Temporal correlation {} < {} (threshold)",
95 temporal.pattern_correlation, thresholds.temporal_correlation_min
96 ));
97 }
98 scores.push(temporal.pattern_correlation);
99 }
100
101 self.issues = self.failures.clone();
103 self.passes = self.failures.is_empty();
104
105 self.overall_score = if scores.is_empty() {
107 1.0
108 } else {
109 scores.iter().sum::<f64>() / scores.len() as f64
110 };
111 }
112}
113
114impl Default for StatisticalEvaluation {
115 fn default() -> Self {
116 Self::new()
117 }
118}