Skip to main content

datasynth_eval/gates/
engine.rs

1//! Quality gate evaluation engine.
2//!
3//! Evaluates generation results against configurable pass/fail criteria.
4
5use serde::{Deserialize, Serialize};
6
7use crate::ComprehensiveEvaluation;
8
9/// A quality metric that can be checked by a gate.
10#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
11#[serde(rename_all = "snake_case")]
12pub enum QualityMetric {
13    /// Benford's Law Mean Absolute Deviation.
14    BenfordMad,
15    /// Balance sheet coherence rate (0.0–1.0).
16    BalanceCoherence,
17    /// Document chain integrity rate (0.0–1.0).
18    DocumentChainIntegrity,
19    /// Correlation preservation score (0.0–1.0).
20    CorrelationPreservation,
21    /// Temporal consistency score (0.0–1.0).
22    TemporalConsistency,
23    /// Privacy MIA AUC-ROC score.
24    PrivacyMiaAuc,
25    /// Data completion rate (0.0–1.0).
26    CompletionRate,
27    /// Duplicate rate (0.0–1.0).
28    DuplicateRate,
29    /// Referential integrity rate (0.0–1.0).
30    ReferentialIntegrity,
31    /// Intercompany match rate (0.0–1.0).
32    IcMatchRate,
33    /// S2C chain completion rate.
34    S2CChainCompletion,
35    /// Payroll calculation accuracy.
36    PayrollAccuracy,
37    /// Manufacturing yield rate.
38    ManufacturingYield,
39    /// Bank reconciliation balance accuracy.
40    BankReconciliationBalance,
41    /// Financial reporting tie-back rate.
42    FinancialReportingTieBack,
43    /// AML detectability coverage.
44    AmlDetectability,
45    /// Process mining event coverage.
46    ProcessMiningCoverage,
47    /// Audit evidence coverage.
48    AuditEvidenceCoverage,
49    /// Anomaly separability (AUC-ROC).
50    AnomalySeparability,
51    /// Feature quality score.
52    FeatureQualityScore,
53    /// GNN readiness score.
54    GnnReadinessScore,
55    /// Domain gap score.
56    DomainGapScore,
57    /// Custom metric identified by name.
58    Custom(String),
59}
60
61impl std::fmt::Display for QualityMetric {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        match self {
64            Self::BenfordMad => write!(f, "benford_mad"),
65            Self::BalanceCoherence => write!(f, "balance_coherence"),
66            Self::DocumentChainIntegrity => write!(f, "document_chain_integrity"),
67            Self::CorrelationPreservation => write!(f, "correlation_preservation"),
68            Self::TemporalConsistency => write!(f, "temporal_consistency"),
69            Self::PrivacyMiaAuc => write!(f, "privacy_mia_auc"),
70            Self::CompletionRate => write!(f, "completion_rate"),
71            Self::DuplicateRate => write!(f, "duplicate_rate"),
72            Self::ReferentialIntegrity => write!(f, "referential_integrity"),
73            Self::IcMatchRate => write!(f, "ic_match_rate"),
74            Self::S2CChainCompletion => write!(f, "s2c_chain_completion"),
75            Self::PayrollAccuracy => write!(f, "payroll_accuracy"),
76            Self::ManufacturingYield => write!(f, "manufacturing_yield"),
77            Self::BankReconciliationBalance => write!(f, "bank_reconciliation_balance"),
78            Self::FinancialReportingTieBack => write!(f, "financial_reporting_tie_back"),
79            Self::AmlDetectability => write!(f, "aml_detectability"),
80            Self::ProcessMiningCoverage => write!(f, "process_mining_coverage"),
81            Self::AuditEvidenceCoverage => write!(f, "audit_evidence_coverage"),
82            Self::AnomalySeparability => write!(f, "anomaly_separability"),
83            Self::FeatureQualityScore => write!(f, "feature_quality_score"),
84            Self::GnnReadinessScore => write!(f, "gnn_readiness_score"),
85            Self::DomainGapScore => write!(f, "domain_gap_score"),
86            Self::Custom(name) => write!(f, "custom:{}", name),
87        }
88    }
89}
90
91/// Comparison operator for threshold checks.
92#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
93#[serde(rename_all = "snake_case")]
94pub enum Comparison {
95    /// Greater than or equal to threshold.
96    Gte,
97    /// Less than or equal to threshold.
98    Lte,
99    /// Equal to threshold (with epsilon).
100    Eq,
101    /// Between two thresholds (inclusive). Uses `threshold` as lower and `upper_threshold` as upper.
102    Between,
103}
104
105/// Strategy for handling gate failures.
106#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
107#[serde(rename_all = "snake_case")]
108pub enum FailStrategy {
109    /// Stop checking on first failure.
110    FailFast,
111    /// Check all gates and collect all failures.
112    #[default]
113    CollectAll,
114}
115
116/// A single quality gate with a metric, threshold, and comparison.
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct QualityGate {
119    /// Human-readable name for this gate.
120    pub name: String,
121    /// The metric to check.
122    pub metric: QualityMetric,
123    /// Threshold value for comparison.
124    pub threshold: f64,
125    /// Upper threshold for Between comparison.
126    #[serde(default, skip_serializing_if = "Option::is_none")]
127    pub upper_threshold: Option<f64>,
128    /// How to compare the metric value against the threshold.
129    pub comparison: Comparison,
130}
131
132impl QualityGate {
133    /// Create a new quality gate.
134    pub fn new(
135        name: impl Into<String>,
136        metric: QualityMetric,
137        threshold: f64,
138        comparison: Comparison,
139    ) -> Self {
140        Self {
141            name: name.into(),
142            metric,
143            threshold,
144            upper_threshold: None,
145            comparison,
146        }
147    }
148
149    /// Create a gate that requires metric >= threshold.
150    pub fn gte(name: impl Into<String>, metric: QualityMetric, threshold: f64) -> Self {
151        Self::new(name, metric, threshold, Comparison::Gte)
152    }
153
154    /// Create a gate that requires metric <= threshold.
155    pub fn lte(name: impl Into<String>, metric: QualityMetric, threshold: f64) -> Self {
156        Self::new(name, metric, threshold, Comparison::Lte)
157    }
158
159    /// Create a gate that requires metric between lower and upper (inclusive).
160    pub fn between(name: impl Into<String>, metric: QualityMetric, lower: f64, upper: f64) -> Self {
161        Self {
162            name: name.into(),
163            metric,
164            threshold: lower,
165            upper_threshold: Some(upper),
166            comparison: Comparison::Between,
167        }
168    }
169
170    /// Check if an actual value passes this gate.
171    pub fn check(&self, actual: f64) -> bool {
172        match self.comparison {
173            Comparison::Gte => actual >= self.threshold,
174            Comparison::Lte => actual <= self.threshold,
175            Comparison::Eq => (actual - self.threshold).abs() < 1e-9,
176            Comparison::Between => {
177                let upper = self.upper_threshold.unwrap_or(self.threshold);
178                actual >= self.threshold && actual <= upper
179            }
180        }
181    }
182}
183
184/// A named collection of quality gates.
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct GateProfile {
187    /// Profile name (e.g., "strict", "default", "lenient").
188    pub name: String,
189    /// List of quality gates in this profile.
190    pub gates: Vec<QualityGate>,
191    /// Strategy for handling failures.
192    #[serde(default)]
193    pub fail_strategy: FailStrategy,
194}
195
196impl GateProfile {
197    /// Create a new gate profile.
198    pub fn new(name: impl Into<String>, gates: Vec<QualityGate>) -> Self {
199        Self {
200            name: name.into(),
201            gates,
202            fail_strategy: FailStrategy::default(),
203        }
204    }
205
206    /// Set the fail strategy.
207    pub fn with_fail_strategy(mut self, strategy: FailStrategy) -> Self {
208        self.fail_strategy = strategy;
209        self
210    }
211}
212
213/// Result of checking a single gate.
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct GateCheckResult {
216    /// Gate name.
217    pub gate_name: String,
218    /// Metric checked.
219    pub metric: QualityMetric,
220    /// Whether the gate passed.
221    pub passed: bool,
222    /// Actual metric value.
223    pub actual_value: Option<f64>,
224    /// Expected threshold.
225    pub threshold: f64,
226    /// Comparison used.
227    pub comparison: Comparison,
228    /// Human-readable message.
229    pub message: String,
230}
231
232/// Overall result of evaluating all gates in a profile.
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct GateResult {
235    /// Whether all gates passed.
236    pub passed: bool,
237    /// Profile name used.
238    pub profile_name: String,
239    /// Individual gate results.
240    pub results: Vec<GateCheckResult>,
241    /// Summary message.
242    pub summary: String,
243    /// Number of gates that passed.
244    pub gates_passed: usize,
245    /// Total number of gates checked.
246    pub gates_total: usize,
247}
248
249/// Engine that evaluates quality gates against a comprehensive evaluation.
250pub struct GateEngine;
251
252impl GateEngine {
253    /// Evaluate a comprehensive evaluation against a gate profile.
254    pub fn evaluate(evaluation: &ComprehensiveEvaluation, profile: &GateProfile) -> GateResult {
255        let mut results = Vec::new();
256        let mut all_passed = true;
257
258        for gate in &profile.gates {
259            let (actual_value, message) = Self::extract_metric(evaluation, &gate.metric);
260
261            let check_result = match actual_value {
262                Some(value) => {
263                    let passed = gate.check(value);
264                    if !passed {
265                        all_passed = false;
266                    }
267                    GateCheckResult {
268                        gate_name: gate.name.clone(),
269                        metric: gate.metric.clone(),
270                        passed,
271                        actual_value: Some(value),
272                        threshold: gate.threshold,
273                        comparison: gate.comparison.clone(),
274                        message: if passed {
275                            format!(
276                                "{}: {:.4} passes {:?} {:.4}",
277                                gate.name, value, gate.comparison, gate.threshold
278                            )
279                        } else {
280                            format!(
281                                "{}: {:.4} fails {:?} {:.4}",
282                                gate.name, value, gate.comparison, gate.threshold
283                            )
284                        },
285                    }
286                }
287                None => {
288                    // Metric not available - treat as not applicable (pass)
289                    GateCheckResult {
290                        gate_name: gate.name.clone(),
291                        metric: gate.metric.clone(),
292                        passed: true,
293                        actual_value: None,
294                        threshold: gate.threshold,
295                        comparison: gate.comparison.clone(),
296                        message: format!("{}: metric not available ({})", gate.name, message),
297                    }
298                }
299            };
300
301            let failed = !check_result.passed;
302            results.push(check_result);
303
304            if failed && profile.fail_strategy == FailStrategy::FailFast {
305                break;
306            }
307        }
308
309        let gates_passed = results.iter().filter(|r| r.passed).count();
310        let gates_total = results.len();
311
312        let summary = if all_passed {
313            format!(
314                "All {}/{} quality gates passed (profile: {})",
315                gates_passed, gates_total, profile.name
316            )
317        } else {
318            let failed_names: Vec<_> = results
319                .iter()
320                .filter(|r| !r.passed)
321                .map(|r| r.gate_name.as_str())
322                .collect();
323            format!(
324                "{}/{} quality gates passed, {} failed: {} (profile: {})",
325                gates_passed,
326                gates_total,
327                gates_total - gates_passed,
328                failed_names.join(", "),
329                profile.name
330            )
331        };
332
333        GateResult {
334            passed: all_passed,
335            profile_name: profile.name.clone(),
336            results,
337            summary,
338            gates_passed,
339            gates_total,
340        }
341    }
342
343    /// Extract a metric value from a comprehensive evaluation.
344    fn extract_metric(
345        evaluation: &ComprehensiveEvaluation,
346        metric: &QualityMetric,
347    ) -> (Option<f64>, String) {
348        match metric {
349            QualityMetric::BenfordMad => {
350                let mad = evaluation.statistical.benford.as_ref().map(|b| b.mad);
351                (mad, "benford analysis not available".to_string())
352            }
353            QualityMetric::BalanceCoherence => {
354                let rate = evaluation.coherence.balance.as_ref().map(|b| {
355                    if b.periods_evaluated == 0 {
356                        0.0
357                    } else {
358                        (b.periods_evaluated - b.periods_imbalanced) as f64
359                            / b.periods_evaluated as f64
360                    }
361                });
362                (rate, "balance sheet evaluation not available".to_string())
363            }
364            QualityMetric::DocumentChainIntegrity => {
365                let rate = evaluation
366                    .coherence
367                    .document_chain
368                    .as_ref()
369                    .map(|d| d.p2p_completion_rate);
370                (rate, "document chain evaluation not available".to_string())
371            }
372            QualityMetric::CorrelationPreservation => {
373                // Not directly available in ComprehensiveEvaluation - return None
374                (
375                    None,
376                    "correlation preservation metric not available".to_string(),
377                )
378            }
379            QualityMetric::TemporalConsistency => {
380                let rate = evaluation
381                    .statistical
382                    .temporal
383                    .as_ref()
384                    .map(|t| t.pattern_correlation);
385                (rate, "temporal analysis not available".to_string())
386            }
387            QualityMetric::PrivacyMiaAuc => {
388                let auc = evaluation
389                    .privacy
390                    .as_ref()
391                    .and_then(|p| p.membership_inference.as_ref())
392                    .map(|m| m.auc_roc);
393                (auc, "privacy MIA evaluation not available".to_string())
394            }
395            QualityMetric::CompletionRate => {
396                let rate = evaluation
397                    .quality
398                    .completeness
399                    .as_ref()
400                    .map(|c| c.overall_completeness);
401                (rate, "completeness analysis not available".to_string())
402            }
403            QualityMetric::DuplicateRate => {
404                let rate = evaluation
405                    .quality
406                    .uniqueness
407                    .as_ref()
408                    .map(|u| u.duplicate_rate);
409                (rate, "uniqueness analysis not available".to_string())
410            }
411            QualityMetric::ReferentialIntegrity => {
412                let rate = evaluation
413                    .coherence
414                    .referential
415                    .as_ref()
416                    .map(|r| r.overall_integrity_score);
417                (
418                    rate,
419                    "referential integrity evaluation not available".to_string(),
420                )
421            }
422            QualityMetric::IcMatchRate => {
423                let rate = evaluation
424                    .coherence
425                    .intercompany
426                    .as_ref()
427                    .map(|ic| ic.match_rate);
428                (rate, "IC matching evaluation not available".to_string())
429            }
430            QualityMetric::S2CChainCompletion => {
431                let rate = evaluation
432                    .coherence
433                    .sourcing
434                    .as_ref()
435                    .map(|s| s.rfx_completion_rate);
436                (rate, "sourcing evaluation not available".to_string())
437            }
438            QualityMetric::PayrollAccuracy => {
439                let rate = evaluation
440                    .coherence
441                    .hr_payroll
442                    .as_ref()
443                    .map(|h| h.gross_to_net_accuracy);
444                (rate, "HR/payroll evaluation not available".to_string())
445            }
446            QualityMetric::ManufacturingYield => {
447                let rate = evaluation
448                    .coherence
449                    .manufacturing
450                    .as_ref()
451                    .map(|m| m.yield_rate_consistency);
452                (rate, "manufacturing evaluation not available".to_string())
453            }
454            QualityMetric::BankReconciliationBalance => {
455                let rate = evaluation
456                    .coherence
457                    .bank_reconciliation
458                    .as_ref()
459                    .map(|b| b.balance_accuracy);
460                (
461                    rate,
462                    "bank reconciliation evaluation not available".to_string(),
463                )
464            }
465            QualityMetric::FinancialReportingTieBack => {
466                let rate = evaluation
467                    .coherence
468                    .financial_reporting
469                    .as_ref()
470                    .map(|fr| fr.statement_tb_tie_rate);
471                (
472                    rate,
473                    "financial reporting evaluation not available".to_string(),
474                )
475            }
476            QualityMetric::AmlDetectability => {
477                let rate = evaluation
478                    .banking
479                    .as_ref()
480                    .and_then(|b| b.aml.as_ref())
481                    .map(|a| a.typology_coverage);
482                (
483                    rate,
484                    "AML detectability evaluation not available".to_string(),
485                )
486            }
487            QualityMetric::ProcessMiningCoverage => {
488                let rate = evaluation
489                    .process_mining
490                    .as_ref()
491                    .and_then(|pm| pm.event_sequence.as_ref())
492                    .map(|es| es.timestamp_monotonicity);
493                (rate, "process mining evaluation not available".to_string())
494            }
495            QualityMetric::AuditEvidenceCoverage => {
496                let rate = evaluation
497                    .coherence
498                    .audit
499                    .as_ref()
500                    .map(|a| a.evidence_to_finding_rate);
501                (rate, "audit evaluation not available".to_string())
502            }
503            QualityMetric::AnomalySeparability => {
504                let score = evaluation
505                    .ml_readiness
506                    .anomaly_scoring
507                    .as_ref()
508                    .map(|a| a.anomaly_separability);
509                (
510                    score,
511                    "anomaly scoring evaluation not available".to_string(),
512                )
513            }
514            QualityMetric::FeatureQualityScore => {
515                let score = evaluation
516                    .ml_readiness
517                    .feature_quality
518                    .as_ref()
519                    .map(|f| f.feature_quality_score);
520                (
521                    score,
522                    "feature quality evaluation not available".to_string(),
523                )
524            }
525            QualityMetric::GnnReadinessScore => {
526                let score = evaluation
527                    .ml_readiness
528                    .gnn_readiness
529                    .as_ref()
530                    .map(|g| g.gnn_readiness_score);
531                (score, "GNN readiness evaluation not available".to_string())
532            }
533            QualityMetric::DomainGapScore => {
534                let score = evaluation
535                    .ml_readiness
536                    .domain_gap
537                    .as_ref()
538                    .map(|d| d.domain_gap_score);
539                (score, "domain gap evaluation not available".to_string())
540            }
541            QualityMetric::Custom(name) => (
542                None,
543                format!(
544                    "custom metric '{}' not available in standard evaluation",
545                    name
546                ),
547            ),
548        }
549    }
550}
551
552#[cfg(test)]
553#[allow(clippy::unwrap_used)]
554mod tests {
555    use super::*;
556
557    fn sample_profile() -> GateProfile {
558        GateProfile::new(
559            "test",
560            vec![
561                QualityGate::lte("benford_compliance", QualityMetric::BenfordMad, 0.015),
562                QualityGate::gte("completeness", QualityMetric::CompletionRate, 0.95),
563            ],
564        )
565    }
566
567    #[test]
568    fn test_gate_check_gte() {
569        let gate = QualityGate::gte("test", QualityMetric::CompletionRate, 0.95);
570        assert!(gate.check(0.96));
571        assert!(gate.check(0.95));
572        assert!(!gate.check(0.94));
573    }
574
575    #[test]
576    fn test_gate_check_lte() {
577        let gate = QualityGate::lte("test", QualityMetric::BenfordMad, 0.015);
578        assert!(gate.check(0.01));
579        assert!(gate.check(0.015));
580        assert!(!gate.check(0.016));
581    }
582
583    #[test]
584    fn test_gate_check_between() {
585        let gate = QualityGate::between("test", QualityMetric::DuplicateRate, 0.0, 0.05);
586        assert!(gate.check(0.0));
587        assert!(gate.check(0.03));
588        assert!(gate.check(0.05));
589        assert!(!gate.check(0.06));
590    }
591
592    #[test]
593    fn test_gate_check_eq() {
594        let gate = QualityGate::new("test", QualityMetric::BalanceCoherence, 1.0, Comparison::Eq);
595        assert!(gate.check(1.0));
596        assert!(!gate.check(0.99));
597    }
598
599    #[test]
600    fn test_evaluate_empty_evaluation() {
601        let evaluation = ComprehensiveEvaluation::new();
602        let profile = sample_profile();
603        let result = GateEngine::evaluate(&evaluation, &profile);
604        // All metrics unavailable → treated as pass
605        assert!(result.passed);
606        assert_eq!(result.gates_total, 2);
607    }
608
609    #[test]
610    fn test_fail_fast_stops_on_first_failure() {
611        let evaluation = ComprehensiveEvaluation::new();
612        let profile = GateProfile::new(
613            "strict",
614            vec![
615                // This will fail because balance_coherence is not available
616                // but N/A is treated as pass. Let's create a custom gate
617                // that we know will fail
618                QualityGate::gte(
619                    "custom_gate",
620                    QualityMetric::Custom("nonexistent".to_string()),
621                    0.99,
622                ),
623                QualityGate::gte(
624                    "another",
625                    QualityMetric::Custom("also_nonexistent".to_string()),
626                    0.99,
627                ),
628            ],
629        )
630        .with_fail_strategy(FailStrategy::FailFast);
631
632        let result = GateEngine::evaluate(&evaluation, &profile);
633        // Custom metrics unavailable are treated as pass, so both pass
634        assert!(result.passed);
635    }
636
637    #[test]
638    fn test_collect_all_reports_all_failures() {
639        let evaluation = ComprehensiveEvaluation::new();
640        let profile = GateProfile::new(
641            "test",
642            vec![
643                QualityGate::lte("mad", QualityMetric::BenfordMad, 0.015),
644                QualityGate::gte("completion", QualityMetric::CompletionRate, 0.95),
645            ],
646        )
647        .with_fail_strategy(FailStrategy::CollectAll);
648
649        let result = GateEngine::evaluate(&evaluation, &profile);
650        assert_eq!(result.results.len(), 2);
651    }
652
653    #[test]
654    fn test_gate_result_summary() {
655        let evaluation = ComprehensiveEvaluation::new();
656        let profile = sample_profile();
657        let result = GateEngine::evaluate(&evaluation, &profile);
658        assert!(result.summary.contains("test"));
659    }
660
661    #[test]
662    fn test_quality_metric_display() {
663        assert_eq!(QualityMetric::BenfordMad.to_string(), "benford_mad");
664        assert_eq!(
665            QualityMetric::BalanceCoherence.to_string(),
666            "balance_coherence"
667        );
668        assert_eq!(
669            QualityMetric::Custom("my_metric".to_string()).to_string(),
670            "custom:my_metric"
671        );
672    }
673
674    #[test]
675    fn test_gate_profile_serialization() {
676        let profile = sample_profile();
677        let json = serde_json::to_string(&profile).expect("serialize");
678        let deserialized: GateProfile = serde_json::from_str(&json).expect("deserialize");
679        assert_eq!(deserialized.name, "test");
680        assert_eq!(deserialized.gates.len(), 2);
681    }
682}