datasynth_eval/gates/
engine.rs

1//! Quality gate evaluation engine.
2//!
3//! Evaluates generation results against configurable pass/fail criteria.
4
5use serde::{Deserialize, Serialize};
6
7use crate::ComprehensiveEvaluation;
8
9/// A quality metric that can be checked by a gate.
10#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
11#[serde(rename_all = "snake_case")]
12pub enum QualityMetric {
13    /// Benford's Law Mean Absolute Deviation.
14    BenfordMad,
15    /// Balance sheet coherence rate (0.0–1.0).
16    BalanceCoherence,
17    /// Document chain integrity rate (0.0–1.0).
18    DocumentChainIntegrity,
19    /// Correlation preservation score (0.0–1.0).
20    CorrelationPreservation,
21    /// Temporal consistency score (0.0–1.0).
22    TemporalConsistency,
23    /// Privacy MIA AUC-ROC score.
24    PrivacyMiaAuc,
25    /// Data completion rate (0.0–1.0).
26    CompletionRate,
27    /// Duplicate rate (0.0–1.0).
28    DuplicateRate,
29    /// Referential integrity rate (0.0–1.0).
30    ReferentialIntegrity,
31    /// Intercompany match rate (0.0–1.0).
32    IcMatchRate,
33    /// S2C chain completion rate.
34    S2CChainCompletion,
35    /// Payroll calculation accuracy.
36    PayrollAccuracy,
37    /// Manufacturing yield rate.
38    ManufacturingYield,
39    /// Bank reconciliation balance accuracy.
40    BankReconciliationBalance,
41    /// Financial reporting tie-back rate.
42    FinancialReportingTieBack,
43    /// AML detectability coverage.
44    AmlDetectability,
45    /// Process mining event coverage.
46    ProcessMiningCoverage,
47    /// Audit evidence coverage.
48    AuditEvidenceCoverage,
49    /// Anomaly separability (AUC-ROC).
50    AnomalySeparability,
51    /// Feature quality score.
52    FeatureQualityScore,
53    /// GNN readiness score.
54    GnnReadinessScore,
55    /// Domain gap score.
56    DomainGapScore,
57    /// Custom metric identified by name.
58    Custom(String),
59}
60
61impl std::fmt::Display for QualityMetric {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        match self {
64            Self::BenfordMad => write!(f, "benford_mad"),
65            Self::BalanceCoherence => write!(f, "balance_coherence"),
66            Self::DocumentChainIntegrity => write!(f, "document_chain_integrity"),
67            Self::CorrelationPreservation => write!(f, "correlation_preservation"),
68            Self::TemporalConsistency => write!(f, "temporal_consistency"),
69            Self::PrivacyMiaAuc => write!(f, "privacy_mia_auc"),
70            Self::CompletionRate => write!(f, "completion_rate"),
71            Self::DuplicateRate => write!(f, "duplicate_rate"),
72            Self::ReferentialIntegrity => write!(f, "referential_integrity"),
73            Self::IcMatchRate => write!(f, "ic_match_rate"),
74            Self::S2CChainCompletion => write!(f, "s2c_chain_completion"),
75            Self::PayrollAccuracy => write!(f, "payroll_accuracy"),
76            Self::ManufacturingYield => write!(f, "manufacturing_yield"),
77            Self::BankReconciliationBalance => write!(f, "bank_reconciliation_balance"),
78            Self::FinancialReportingTieBack => write!(f, "financial_reporting_tie_back"),
79            Self::AmlDetectability => write!(f, "aml_detectability"),
80            Self::ProcessMiningCoverage => write!(f, "process_mining_coverage"),
81            Self::AuditEvidenceCoverage => write!(f, "audit_evidence_coverage"),
82            Self::AnomalySeparability => write!(f, "anomaly_separability"),
83            Self::FeatureQualityScore => write!(f, "feature_quality_score"),
84            Self::GnnReadinessScore => write!(f, "gnn_readiness_score"),
85            Self::DomainGapScore => write!(f, "domain_gap_score"),
86            Self::Custom(name) => write!(f, "custom:{name}"),
87        }
88    }
89}
90
91/// Comparison operator for threshold checks.
92#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
93#[serde(rename_all = "snake_case")]
94pub enum Comparison {
95    /// Greater than or equal to threshold.
96    Gte,
97    /// Less than or equal to threshold.
98    Lte,
99    /// Equal to threshold (with epsilon).
100    Eq,
101    /// Between two thresholds (inclusive). Uses `threshold` as lower and `upper_threshold` as upper.
102    Between,
103}
104
105/// Strategy for handling gate failures.
106#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
107#[serde(rename_all = "snake_case")]
108pub enum FailStrategy {
109    /// Stop checking on first failure.
110    FailFast,
111    /// Check all gates and collect all failures.
112    #[default]
113    CollectAll,
114}
115
116/// A single quality gate with a metric, threshold, and comparison.
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct QualityGate {
119    /// Human-readable name for this gate.
120    pub name: String,
121    /// The metric to check.
122    pub metric: QualityMetric,
123    /// Threshold value for comparison.
124    pub threshold: f64,
125    /// Upper threshold for Between comparison.
126    #[serde(default, skip_serializing_if = "Option::is_none")]
127    pub upper_threshold: Option<f64>,
128    /// How to compare the metric value against the threshold.
129    pub comparison: Comparison,
130}
131
132impl QualityGate {
133    /// Create a new quality gate.
134    pub fn new(
135        name: impl Into<String>,
136        metric: QualityMetric,
137        threshold: f64,
138        comparison: Comparison,
139    ) -> Self {
140        Self {
141            name: name.into(),
142            metric,
143            threshold,
144            upper_threshold: None,
145            comparison,
146        }
147    }
148
149    /// Create a gate that requires metric >= threshold.
150    pub fn gte(name: impl Into<String>, metric: QualityMetric, threshold: f64) -> Self {
151        Self::new(name, metric, threshold, Comparison::Gte)
152    }
153
154    /// Create a gate that requires metric <= threshold.
155    pub fn lte(name: impl Into<String>, metric: QualityMetric, threshold: f64) -> Self {
156        Self::new(name, metric, threshold, Comparison::Lte)
157    }
158
159    /// Create a gate that requires metric between lower and upper (inclusive).
160    pub fn between(name: impl Into<String>, metric: QualityMetric, lower: f64, upper: f64) -> Self {
161        Self {
162            name: name.into(),
163            metric,
164            threshold: lower,
165            upper_threshold: Some(upper),
166            comparison: Comparison::Between,
167        }
168    }
169
170    /// Check if an actual value passes this gate.
171    pub fn check(&self, actual: f64) -> bool {
172        match self.comparison {
173            Comparison::Gte => actual >= self.threshold,
174            Comparison::Lte => actual <= self.threshold,
175            Comparison::Eq => (actual - self.threshold).abs() < 1e-9,
176            Comparison::Between => {
177                let upper = self.upper_threshold.unwrap_or(self.threshold);
178                actual >= self.threshold && actual <= upper
179            }
180        }
181    }
182}
183
184/// A named collection of quality gates.
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct GateProfile {
187    /// Profile name (e.g., "strict", "default", "lenient").
188    pub name: String,
189    /// List of quality gates in this profile.
190    pub gates: Vec<QualityGate>,
191    /// Strategy for handling failures.
192    #[serde(default)]
193    pub fail_strategy: FailStrategy,
194}
195
196impl GateProfile {
197    /// Create a new gate profile.
198    pub fn new(name: impl Into<String>, gates: Vec<QualityGate>) -> Self {
199        Self {
200            name: name.into(),
201            gates,
202            fail_strategy: FailStrategy::default(),
203        }
204    }
205
206    /// Set the fail strategy.
207    pub fn with_fail_strategy(mut self, strategy: FailStrategy) -> Self {
208        self.fail_strategy = strategy;
209        self
210    }
211}
212
213/// Result of checking a single gate.
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct GateCheckResult {
216    /// Gate name.
217    pub gate_name: String,
218    /// Metric checked.
219    pub metric: QualityMetric,
220    /// Whether the gate passed.
221    pub passed: bool,
222    /// Actual metric value.
223    pub actual_value: Option<f64>,
224    /// Expected threshold.
225    pub threshold: f64,
226    /// Comparison used.
227    pub comparison: Comparison,
228    /// Human-readable message.
229    pub message: String,
230}
231
232/// Overall result of evaluating all gates in a profile.
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct GateResult {
235    /// Whether all gates passed.
236    pub passed: bool,
237    /// Profile name used.
238    pub profile_name: String,
239    /// Individual gate results.
240    pub results: Vec<GateCheckResult>,
241    /// Summary message.
242    pub summary: String,
243    /// Number of gates that passed.
244    pub gates_passed: usize,
245    /// Total number of gates checked.
246    pub gates_total: usize,
247}
248
249/// Engine that evaluates quality gates against a comprehensive evaluation.
250pub struct GateEngine;
251
252impl GateEngine {
253    /// Evaluate a comprehensive evaluation against a gate profile.
254    pub fn evaluate(evaluation: &ComprehensiveEvaluation, profile: &GateProfile) -> GateResult {
255        let mut results = Vec::new();
256        let mut all_passed = true;
257
258        for gate in &profile.gates {
259            let (actual_value, message) = Self::extract_metric(evaluation, gate);
260
261            let check_result = match actual_value {
262                Some(value) => {
263                    let passed = gate.check(value);
264                    if !passed {
265                        all_passed = false;
266                    }
267                    GateCheckResult {
268                        gate_name: gate.name.clone(),
269                        metric: gate.metric.clone(),
270                        passed,
271                        actual_value: Some(value),
272                        threshold: gate.threshold,
273                        comparison: gate.comparison.clone(),
274                        message: if passed {
275                            format!(
276                                "{}: {:.4} passes {:?} {:.4}",
277                                gate.name, value, gate.comparison, gate.threshold
278                            )
279                        } else {
280                            format!(
281                                "{}: {:.4} fails {:?} {:.4}",
282                                gate.name, value, gate.comparison, gate.threshold
283                            )
284                        },
285                    }
286                }
287                None => {
288                    // Metric not available - treat as not applicable (pass)
289                    GateCheckResult {
290                        gate_name: gate.name.clone(),
291                        metric: gate.metric.clone(),
292                        passed: true,
293                        actual_value: None,
294                        threshold: gate.threshold,
295                        comparison: gate.comparison.clone(),
296                        message: format!("{}: metric not available ({})", gate.name, message),
297                    }
298                }
299            };
300
301            let failed = !check_result.passed;
302            results.push(check_result);
303
304            if failed && profile.fail_strategy == FailStrategy::FailFast {
305                break;
306            }
307        }
308
309        let gates_passed = results.iter().filter(|r| r.passed).count();
310        let gates_total = results.len();
311
312        let summary = if all_passed {
313            format!(
314                "All {}/{} quality gates passed (profile: {})",
315                gates_passed, gates_total, profile.name
316            )
317        } else {
318            let failed_names: Vec<_> = results
319                .iter()
320                .filter(|r| !r.passed)
321                .map(|r| r.gate_name.as_str())
322                .collect();
323            format!(
324                "{}/{} quality gates passed, {} failed: {} (profile: {})",
325                gates_passed,
326                gates_total,
327                gates_total - gates_passed,
328                failed_names.join(", "),
329                profile.name
330            )
331        };
332
333        GateResult {
334            passed: all_passed,
335            profile_name: profile.name.clone(),
336            results,
337            summary,
338            gates_passed,
339            gates_total,
340        }
341    }
342
343    /// Extract a metric value from a comprehensive evaluation.
344    fn extract_metric(
345        evaluation: &ComprehensiveEvaluation,
346        gate: &QualityGate,
347    ) -> (Option<f64>, String) {
348        match &gate.metric {
349            QualityMetric::BenfordMad => {
350                let mad = evaluation.statistical.benford.as_ref().map(|b| b.mad);
351                (mad, "benford analysis not available".to_string())
352            }
353            QualityMetric::BalanceCoherence => {
354                let rate = evaluation.coherence.balance.as_ref().map(|b| {
355                    if b.periods_evaluated == 0 {
356                        0.0
357                    } else {
358                        (b.periods_evaluated - b.periods_imbalanced) as f64
359                            / b.periods_evaluated as f64
360                    }
361                });
362                (rate, "balance sheet evaluation not available".to_string())
363            }
364            QualityMetric::DocumentChainIntegrity => {
365                let rate = evaluation
366                    .coherence
367                    .document_chain
368                    .as_ref()
369                    .map(|d| d.p2p_completion_rate);
370                (rate, "document chain evaluation not available".to_string())
371            }
372            QualityMetric::CorrelationPreservation => {
373                let rate = evaluation.statistical.correlation.as_ref().map(|c| {
374                    let total = c.checks_passed + c.checks_failed;
375                    if total > 0 {
376                        c.checks_passed as f64 / total as f64
377                    } else {
378                        1.0 // No checks = perfect score
379                    }
380                });
381                (rate, "correlation analysis not available".to_string())
382            }
383            QualityMetric::TemporalConsistency => {
384                let rate = evaluation
385                    .statistical
386                    .temporal
387                    .as_ref()
388                    .map(|t| t.pattern_correlation);
389                (rate, "temporal analysis not available".to_string())
390            }
391            QualityMetric::PrivacyMiaAuc => {
392                let auc = evaluation
393                    .privacy
394                    .as_ref()
395                    .and_then(|p| p.membership_inference.as_ref())
396                    .map(|m| m.auc_roc);
397                (auc, "privacy MIA evaluation not available".to_string())
398            }
399            QualityMetric::CompletionRate => {
400                let rate = evaluation
401                    .quality
402                    .completeness
403                    .as_ref()
404                    .map(|c| c.overall_completeness);
405                (rate, "completeness analysis not available".to_string())
406            }
407            QualityMetric::DuplicateRate => {
408                let rate = evaluation
409                    .quality
410                    .uniqueness
411                    .as_ref()
412                    .map(|u| u.duplicate_rate);
413                (rate, "uniqueness analysis not available".to_string())
414            }
415            QualityMetric::ReferentialIntegrity => {
416                let rate = evaluation
417                    .coherence
418                    .referential
419                    .as_ref()
420                    .map(|r| r.overall_integrity_score);
421                (
422                    rate,
423                    "referential integrity evaluation not available".to_string(),
424                )
425            }
426            QualityMetric::IcMatchRate => {
427                let rate = evaluation
428                    .coherence
429                    .intercompany
430                    .as_ref()
431                    .map(|ic| ic.match_rate);
432                (rate, "IC matching evaluation not available".to_string())
433            }
434            QualityMetric::S2CChainCompletion => {
435                let rate = evaluation
436                    .coherence
437                    .sourcing
438                    .as_ref()
439                    .map(|s| s.rfx_completion_rate);
440                (rate, "sourcing evaluation not available".to_string())
441            }
442            QualityMetric::PayrollAccuracy => {
443                let rate = evaluation
444                    .coherence
445                    .hr_payroll
446                    .as_ref()
447                    .map(|h| h.gross_to_net_accuracy);
448                (rate, "HR/payroll evaluation not available".to_string())
449            }
450            QualityMetric::ManufacturingYield => {
451                let rate = evaluation
452                    .coherence
453                    .manufacturing
454                    .as_ref()
455                    .map(|m| m.yield_rate_consistency);
456                (rate, "manufacturing evaluation not available".to_string())
457            }
458            QualityMetric::BankReconciliationBalance => {
459                let rate = evaluation
460                    .coherence
461                    .bank_reconciliation
462                    .as_ref()
463                    .map(|b| b.balance_accuracy);
464                (
465                    rate,
466                    "bank reconciliation evaluation not available".to_string(),
467                )
468            }
469            QualityMetric::FinancialReportingTieBack => {
470                let rate = evaluation
471                    .coherence
472                    .financial_reporting
473                    .as_ref()
474                    .map(|fr| fr.statement_tb_tie_rate);
475                (
476                    rate,
477                    "financial reporting evaluation not available".to_string(),
478                )
479            }
480            QualityMetric::AmlDetectability => {
481                let rate = evaluation
482                    .banking
483                    .as_ref()
484                    .and_then(|b| b.aml.as_ref())
485                    .map(|a| a.typology_coverage);
486                (
487                    rate,
488                    "AML detectability evaluation not available".to_string(),
489                )
490            }
491            QualityMetric::ProcessMiningCoverage => {
492                let rate = evaluation
493                    .process_mining
494                    .as_ref()
495                    .and_then(|pm| pm.event_sequence.as_ref())
496                    .map(|es| es.timestamp_monotonicity);
497                (rate, "process mining evaluation not available".to_string())
498            }
499            QualityMetric::AuditEvidenceCoverage => {
500                let rate = evaluation
501                    .coherence
502                    .audit
503                    .as_ref()
504                    .map(|a| a.evidence_to_finding_rate);
505                (rate, "audit evaluation not available".to_string())
506            }
507            QualityMetric::AnomalySeparability => {
508                let score = evaluation
509                    .ml_readiness
510                    .anomaly_scoring
511                    .as_ref()
512                    .map(|a| a.anomaly_separability);
513                (
514                    score,
515                    "anomaly scoring evaluation not available".to_string(),
516                )
517            }
518            QualityMetric::FeatureQualityScore => {
519                let score = evaluation
520                    .ml_readiness
521                    .feature_quality
522                    .as_ref()
523                    .map(|f| f.feature_quality_score);
524                (
525                    score,
526                    "feature quality evaluation not available".to_string(),
527                )
528            }
529            QualityMetric::GnnReadinessScore => {
530                let score = evaluation
531                    .ml_readiness
532                    .gnn_readiness
533                    .as_ref()
534                    .map(|g| g.gnn_readiness_score);
535                (score, "GNN readiness evaluation not available".to_string())
536            }
537            QualityMetric::DomainGapScore => {
538                let score = evaluation
539                    .ml_readiness
540                    .domain_gap
541                    .as_ref()
542                    .map(|d| d.domain_gap_score);
543                (score, "domain gap evaluation not available".to_string())
544            }
545            QualityMetric::Custom(name) => {
546                tracing::error!(
547                    "Custom metric '{}' gate '{}' cannot be evaluated — custom metrics not implemented",
548                    name, gate.name
549                );
550                (
551                    None,
552                    format!("custom metric '{name}' not implemented — gate cannot be evaluated"),
553                )
554            }
555        }
556    }
557}
558
559#[cfg(test)]
560#[allow(clippy::unwrap_used)]
561mod tests {
562    use super::*;
563
564    fn sample_profile() -> GateProfile {
565        GateProfile::new(
566            "test",
567            vec![
568                QualityGate::lte("benford_compliance", QualityMetric::BenfordMad, 0.015),
569                QualityGate::gte("completeness", QualityMetric::CompletionRate, 0.95),
570            ],
571        )
572    }
573
574    #[test]
575    fn test_gate_check_gte() {
576        let gate = QualityGate::gte("test", QualityMetric::CompletionRate, 0.95);
577        assert!(gate.check(0.96));
578        assert!(gate.check(0.95));
579        assert!(!gate.check(0.94));
580    }
581
582    #[test]
583    fn test_gate_check_lte() {
584        let gate = QualityGate::lte("test", QualityMetric::BenfordMad, 0.015);
585        assert!(gate.check(0.01));
586        assert!(gate.check(0.015));
587        assert!(!gate.check(0.016));
588    }
589
590    #[test]
591    fn test_gate_check_between() {
592        let gate = QualityGate::between("test", QualityMetric::DuplicateRate, 0.0, 0.05);
593        assert!(gate.check(0.0));
594        assert!(gate.check(0.03));
595        assert!(gate.check(0.05));
596        assert!(!gate.check(0.06));
597    }
598
599    #[test]
600    fn test_gate_check_eq() {
601        let gate = QualityGate::new("test", QualityMetric::BalanceCoherence, 1.0, Comparison::Eq);
602        assert!(gate.check(1.0));
603        assert!(!gate.check(0.99));
604    }
605
606    #[test]
607    fn test_evaluate_empty_evaluation() {
608        let evaluation = ComprehensiveEvaluation::new();
609        let profile = sample_profile();
610        let result = GateEngine::evaluate(&evaluation, &profile);
611        // All metrics unavailable → treated as pass
612        assert!(result.passed);
613        assert_eq!(result.gates_total, 2);
614    }
615
616    #[test]
617    fn test_fail_fast_stops_on_first_failure() {
618        let evaluation = ComprehensiveEvaluation::new();
619        let profile = GateProfile::new(
620            "strict",
621            vec![
622                // This will fail because balance_coherence is not available
623                // but N/A is treated as pass. Let's create a custom gate
624                // that we know will fail
625                QualityGate::gte(
626                    "custom_gate",
627                    QualityMetric::Custom("nonexistent".to_string()),
628                    0.99,
629                ),
630                QualityGate::gte(
631                    "another",
632                    QualityMetric::Custom("also_nonexistent".to_string()),
633                    0.99,
634                ),
635            ],
636        )
637        .with_fail_strategy(FailStrategy::FailFast);
638
639        let result = GateEngine::evaluate(&evaluation, &profile);
640        // Custom metrics unavailable are treated as pass, so both pass
641        assert!(result.passed);
642    }
643
644    #[test]
645    fn test_collect_all_reports_all_failures() {
646        let evaluation = ComprehensiveEvaluation::new();
647        let profile = GateProfile::new(
648            "test",
649            vec![
650                QualityGate::lte("mad", QualityMetric::BenfordMad, 0.015),
651                QualityGate::gte("completion", QualityMetric::CompletionRate, 0.95),
652            ],
653        )
654        .with_fail_strategy(FailStrategy::CollectAll);
655
656        let result = GateEngine::evaluate(&evaluation, &profile);
657        assert_eq!(result.results.len(), 2);
658    }
659
660    #[test]
661    fn test_gate_result_summary() {
662        let evaluation = ComprehensiveEvaluation::new();
663        let profile = sample_profile();
664        let result = GateEngine::evaluate(&evaluation, &profile);
665        assert!(result.summary.contains("test"));
666    }
667
668    #[test]
669    fn test_quality_metric_display() {
670        assert_eq!(QualityMetric::BenfordMad.to_string(), "benford_mad");
671        assert_eq!(
672            QualityMetric::BalanceCoherence.to_string(),
673            "balance_coherence"
674        );
675        assert_eq!(
676            QualityMetric::Custom("my_metric".to_string()).to_string(),
677            "custom:my_metric"
678        );
679    }
680
681    #[test]
682    fn test_gate_profile_serialization() {
683        let profile = sample_profile();
684        let json = serde_json::to_string(&profile).expect("serialize");
685        let deserialized: GateProfile = serde_json::from_str(&json).expect("deserialize");
686        assert_eq!(deserialized.name, "test");
687        assert_eq!(deserialized.gates.len(), 2);
688    }
689}
datasynth_eval/gates/engine.rs

datasynth_eval/gates/
engine.rs