Skip to main content

datasynth_eval/report/
thresholds.rs

1//! Threshold checking for pass/fail determination.
2//!
3//! Validates metrics against configured thresholds and generates
4//! pass/fail results with detailed feedback.
5
6use crate::config::EvaluationThresholds;
7use serde::{Deserialize, Serialize};
8
9/// Result of threshold checking.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct ThresholdResult {
12    /// Metric name.
13    pub metric_name: String,
14    /// Actual value.
15    pub actual_value: f64,
16    /// Threshold value.
17    pub threshold_value: f64,
18    /// Comparison operator.
19    pub operator: ThresholdOperator,
20    /// Whether threshold was met.
21    pub passed: bool,
22    /// Human-readable explanation.
23    pub explanation: String,
24}
25
26/// Threshold comparison operator.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
28pub enum ThresholdOperator {
29    /// Greater than or equal.
30    GreaterOrEqual,
31    /// Less than or equal.
32    LessOrEqual,
33    /// Greater than.
34    GreaterThan,
35    /// Less than.
36    LessThan,
37    /// Equal (with tolerance).
38    Equal,
39    /// Within range.
40    InRange,
41}
42
43/// Checker for threshold validation.
44pub struct ThresholdChecker {
45    /// Thresholds to check against.
46    thresholds: EvaluationThresholds,
47}
48
49impl ThresholdChecker {
50    /// Create a new checker with the specified thresholds.
51    pub fn new(thresholds: EvaluationThresholds) -> Self {
52        Self { thresholds }
53    }
54
55    /// Check a single metric against a minimum threshold.
56    pub fn check_min(&self, name: &str, actual: f64, threshold: f64) -> ThresholdResult {
57        let passed = actual >= threshold;
58        ThresholdResult {
59            metric_name: name.to_string(),
60            actual_value: actual,
61            threshold_value: threshold,
62            operator: ThresholdOperator::GreaterOrEqual,
63            passed,
64            explanation: if passed {
65                format!("{} ({:.4}) >= {} (threshold)", name, actual, threshold)
66            } else {
67                format!(
68                    "{} ({:.4}) < {} (threshold) - FAILED",
69                    name, actual, threshold
70                )
71            },
72        }
73    }
74
75    /// Check a single metric against a maximum threshold.
76    pub fn check_max(&self, name: &str, actual: f64, threshold: f64) -> ThresholdResult {
77        let passed = actual <= threshold;
78        ThresholdResult {
79            metric_name: name.to_string(),
80            actual_value: actual,
81            threshold_value: threshold,
82            operator: ThresholdOperator::LessOrEqual,
83            passed,
84            explanation: if passed {
85                format!("{} ({:.4}) <= {} (threshold)", name, actual, threshold)
86            } else {
87                format!(
88                    "{} ({:.4}) > {} (threshold) - FAILED",
89                    name, actual, threshold
90                )
91            },
92        }
93    }
94
95    /// Check a metric is within a range.
96    pub fn check_range(&self, name: &str, actual: f64, min: f64, max: f64) -> ThresholdResult {
97        let passed = actual >= min && actual <= max;
98        ThresholdResult {
99            metric_name: name.to_string(),
100            actual_value: actual,
101            threshold_value: (min + max) / 2.0,
102            operator: ThresholdOperator::InRange,
103            passed,
104            explanation: if passed {
105                format!("{} ({:.4}) in range [{}, {}]", name, actual, min, max)
106            } else {
107                format!(
108                    "{} ({:.4}) outside range [{}, {}] - FAILED",
109                    name, actual, min, max
110                )
111            },
112        }
113    }
114
115    /// Check all statistical thresholds.
116    pub fn check_statistical(
117        &self,
118        benford_p: Option<f64>,
119        benford_mad: Option<f64>,
120        temporal_corr: Option<f64>,
121    ) -> Vec<ThresholdResult> {
122        let mut results = Vec::new();
123
124        if let Some(p) = benford_p {
125            results.push(self.check_min("benford_p_value", p, self.thresholds.benford_p_value_min));
126        }
127
128        if let Some(mad) = benford_mad {
129            results.push(self.check_max("benford_mad", mad, self.thresholds.benford_mad_max));
130        }
131
132        if let Some(corr) = temporal_corr {
133            results.push(self.check_min(
134                "temporal_correlation",
135                corr,
136                self.thresholds.temporal_correlation_min,
137            ));
138        }
139
140        results
141    }
142
143    /// Check all coherence thresholds.
144    pub fn check_coherence(
145        &self,
146        balance_imbalance: Option<f64>,
147        subledger_rate: Option<f64>,
148        doc_chain_rate: Option<f64>,
149        ic_match_rate: Option<f64>,
150    ) -> Vec<ThresholdResult> {
151        let mut results = Vec::new();
152
153        if let Some(imb) = balance_imbalance {
154            let tolerance = self
155                .thresholds
156                .balance_tolerance
157                .to_string()
158                .parse::<f64>()
159                .unwrap_or(0.01);
160            results.push(self.check_max("balance_imbalance", imb, tolerance));
161        }
162
163        if let Some(rate) = subledger_rate {
164            results.push(self.check_min(
165                "subledger_reconciliation",
166                rate,
167                self.thresholds.subledger_reconciliation_rate_min,
168            ));
169        }
170
171        if let Some(rate) = doc_chain_rate {
172            results.push(self.check_min(
173                "document_chain_completion",
174                rate,
175                self.thresholds.document_chain_completion_min,
176            ));
177        }
178
179        if let Some(rate) = ic_match_rate {
180            results.push(self.check_min("ic_match_rate", rate, self.thresholds.ic_match_rate_min));
181        }
182
183        results
184    }
185
186    /// Check all quality thresholds.
187    pub fn check_quality(
188        &self,
189        duplicate_rate: Option<f64>,
190        completeness: Option<f64>,
191        format_consistency: Option<f64>,
192    ) -> Vec<ThresholdResult> {
193        let mut results = Vec::new();
194
195        if let Some(rate) = duplicate_rate {
196            results.push(self.check_max(
197                "duplicate_rate",
198                rate,
199                self.thresholds.duplicate_rate_max,
200            ));
201        }
202
203        if let Some(comp) = completeness {
204            results.push(self.check_min(
205                "completeness",
206                comp,
207                self.thresholds.completeness_rate_min,
208            ));
209        }
210
211        if let Some(fmt) = format_consistency {
212            results.push(self.check_min(
213                "format_consistency",
214                fmt,
215                self.thresholds.format_consistency_min,
216            ));
217        }
218
219        results
220    }
221
222    /// Check all ML thresholds.
223    pub fn check_ml(
224        &self,
225        anomaly_rate: Option<f64>,
226        label_coverage: Option<f64>,
227        graph_connectivity: Option<f64>,
228    ) -> Vec<ThresholdResult> {
229        let mut results = Vec::new();
230
231        if let Some(rate) = anomaly_rate {
232            results.push(self.check_range(
233                "anomaly_rate",
234                rate,
235                self.thresholds.anomaly_rate_min,
236                self.thresholds.anomaly_rate_max,
237            ));
238        }
239
240        if let Some(cov) = label_coverage {
241            results.push(self.check_min("label_coverage", cov, self.thresholds.label_coverage_min));
242        }
243
244        if let Some(conn) = graph_connectivity {
245            results.push(self.check_min(
246                "graph_connectivity",
247                conn,
248                self.thresholds.graph_connectivity_min,
249            ));
250        }
251
252        results
253    }
254
255    /// Get all threshold results.
256    pub fn check_all(
257        &self,
258        benford_p: Option<f64>,
259        benford_mad: Option<f64>,
260        temporal_corr: Option<f64>,
261        balance_imbalance: Option<f64>,
262        subledger_rate: Option<f64>,
263        doc_chain_rate: Option<f64>,
264        ic_match_rate: Option<f64>,
265        duplicate_rate: Option<f64>,
266        completeness: Option<f64>,
267        format_consistency: Option<f64>,
268        anomaly_rate: Option<f64>,
269        label_coverage: Option<f64>,
270        graph_connectivity: Option<f64>,
271    ) -> Vec<ThresholdResult> {
272        let mut all = Vec::new();
273        all.extend(self.check_statistical(benford_p, benford_mad, temporal_corr));
274        all.extend(self.check_coherence(
275            balance_imbalance,
276            subledger_rate,
277            doc_chain_rate,
278            ic_match_rate,
279        ));
280        all.extend(self.check_quality(duplicate_rate, completeness, format_consistency));
281        all.extend(self.check_ml(anomaly_rate, label_coverage, graph_connectivity));
282        all
283    }
284
285    /// Check if all results pass.
286    pub fn all_pass(results: &[ThresholdResult]) -> bool {
287        results.iter().all(|r| r.passed)
288    }
289}
290
291impl Default for ThresholdChecker {
292    fn default() -> Self {
293        Self::new(EvaluationThresholds::default())
294    }
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300
301    #[test]
302    fn test_check_min() {
303        let checker = ThresholdChecker::default();
304        let result = checker.check_min("test_metric", 0.95, 0.90);
305        assert!(result.passed);
306    }
307
308    #[test]
309    fn test_check_min_fail() {
310        let checker = ThresholdChecker::default();
311        let result = checker.check_min("test_metric", 0.85, 0.90);
312        assert!(!result.passed);
313    }
314
315    #[test]
316    fn test_check_max() {
317        let checker = ThresholdChecker::default();
318        let result = checker.check_max("test_metric", 0.05, 0.10);
319        assert!(result.passed);
320    }
321
322    #[test]
323    fn test_check_range() {
324        let checker = ThresholdChecker::default();
325        let result = checker.check_range("test_metric", 0.10, 0.05, 0.15);
326        assert!(result.passed);
327
328        let result2 = checker.check_range("test_metric", 0.20, 0.05, 0.15);
329        assert!(!result2.passed);
330    }
331}