datasynth_eval/gates/
engine.rs

1//! Quality gate evaluation engine.
2//!
3//! Evaluates generation results against configurable pass/fail criteria.
4
5use serde::{Deserialize, Serialize};
6
7use crate::ComprehensiveEvaluation;
8
9/// A quality metric that can be checked by a gate.
10#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
11#[serde(rename_all = "snake_case")]
12pub enum QualityMetric {
13    /// Benford's Law Mean Absolute Deviation.
14    BenfordMad,
15    /// Balance sheet coherence rate (0.0–1.0).
16    BalanceCoherence,
17    /// Document chain integrity rate (0.0–1.0).
18    DocumentChainIntegrity,
19    /// Correlation preservation score (0.0–1.0).
20    CorrelationPreservation,
21    /// Temporal consistency score (0.0–1.0).
22    TemporalConsistency,
23    /// Privacy MIA AUC-ROC score.
24    PrivacyMiaAuc,
25    /// Data completion rate (0.0–1.0).
26    CompletionRate,
27    /// Duplicate rate (0.0–1.0).
28    DuplicateRate,
29    /// Referential integrity rate (0.0–1.0).
30    ReferentialIntegrity,
31    /// Intercompany match rate (0.0–1.0).
32    IcMatchRate,
33    /// S2C chain completion rate.
34    S2CChainCompletion,
35    /// Payroll calculation accuracy.
36    PayrollAccuracy,
37    /// Manufacturing yield rate.
38    ManufacturingYield,
39    /// Bank reconciliation balance accuracy.
40    BankReconciliationBalance,
41    /// Financial reporting tie-back rate.
42    FinancialReportingTieBack,
43    /// AML detectability coverage.
44    AmlDetectability,
45    /// Process mining event coverage.
46    ProcessMiningCoverage,
47    /// Audit evidence coverage.
48    AuditEvidenceCoverage,
49    /// Anomaly separability (AUC-ROC).
50    AnomalySeparability,
51    /// Feature quality score.
52    FeatureQualityScore,
53    /// GNN readiness score.
54    GnnReadinessScore,
55    /// Domain gap score.
56    DomainGapScore,
57    /// Custom metric identified by name.
58    Custom(String),
59}
60
61impl std::fmt::Display for QualityMetric {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        match self {
64            Self::BenfordMad => write!(f, "benford_mad"),
65            Self::BalanceCoherence => write!(f, "balance_coherence"),
66            Self::DocumentChainIntegrity => write!(f, "document_chain_integrity"),
67            Self::CorrelationPreservation => write!(f, "correlation_preservation"),
68            Self::TemporalConsistency => write!(f, "temporal_consistency"),
69            Self::PrivacyMiaAuc => write!(f, "privacy_mia_auc"),
70            Self::CompletionRate => write!(f, "completion_rate"),
71            Self::DuplicateRate => write!(f, "duplicate_rate"),
72            Self::ReferentialIntegrity => write!(f, "referential_integrity"),
73            Self::IcMatchRate => write!(f, "ic_match_rate"),
74            Self::S2CChainCompletion => write!(f, "s2c_chain_completion"),
75            Self::PayrollAccuracy => write!(f, "payroll_accuracy"),
76            Self::ManufacturingYield => write!(f, "manufacturing_yield"),
77            Self::BankReconciliationBalance => write!(f, "bank_reconciliation_balance"),
78            Self::FinancialReportingTieBack => write!(f, "financial_reporting_tie_back"),
79            Self::AmlDetectability => write!(f, "aml_detectability"),
80            Self::ProcessMiningCoverage => write!(f, "process_mining_coverage"),
81            Self::AuditEvidenceCoverage => write!(f, "audit_evidence_coverage"),
82            Self::AnomalySeparability => write!(f, "anomaly_separability"),
83            Self::FeatureQualityScore => write!(f, "feature_quality_score"),
84            Self::GnnReadinessScore => write!(f, "gnn_readiness_score"),
85            Self::DomainGapScore => write!(f, "domain_gap_score"),
86            Self::Custom(name) => write!(f, "custom:{}", name),
87        }
88    }
89}
90
91/// Comparison operator for threshold checks.
92#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
93#[serde(rename_all = "snake_case")]
94pub enum Comparison {
95    /// Greater than or equal to threshold.
96    Gte,
97    /// Less than or equal to threshold.
98    Lte,
99    /// Equal to threshold (with epsilon).
100    Eq,
101    /// Between two thresholds (inclusive). Uses `threshold` as lower and `upper_threshold` as upper.
102    Between,
103}
104
105/// Strategy for handling gate failures.
106#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
107#[serde(rename_all = "snake_case")]
108pub enum FailStrategy {
109    /// Stop checking on first failure.
110    FailFast,
111    /// Check all gates and collect all failures.
112    #[default]
113    CollectAll,
114}
115
116/// A single quality gate with a metric, threshold, and comparison.
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct QualityGate {
119    /// Human-readable name for this gate.
120    pub name: String,
121    /// The metric to check.
122    pub metric: QualityMetric,
123    /// Threshold value for comparison.
124    pub threshold: f64,
125    /// Upper threshold for Between comparison.
126    #[serde(default, skip_serializing_if = "Option::is_none")]
127    pub upper_threshold: Option<f64>,
128    /// How to compare the metric value against the threshold.
129    pub comparison: Comparison,
130}
131
132impl QualityGate {
133    /// Create a new quality gate.
134    pub fn new(
135        name: impl Into<String>,
136        metric: QualityMetric,
137        threshold: f64,
138        comparison: Comparison,
139    ) -> Self {
140        Self {
141            name: name.into(),
142            metric,
143            threshold,
144            upper_threshold: None,
145            comparison,
146        }
147    }
148
149    /// Create a gate that requires metric >= threshold.
150    pub fn gte(name: impl Into<String>, metric: QualityMetric, threshold: f64) -> Self {
151        Self::new(name, metric, threshold, Comparison::Gte)
152    }
153
154    /// Create a gate that requires metric <= threshold.
155    pub fn lte(name: impl Into<String>, metric: QualityMetric, threshold: f64) -> Self {
156        Self::new(name, metric, threshold, Comparison::Lte)
157    }
158
159    /// Create a gate that requires metric between lower and upper (inclusive).
160    pub fn between(name: impl Into<String>, metric: QualityMetric, lower: f64, upper: f64) -> Self {
161        Self {
162            name: name.into(),
163            metric,
164            threshold: lower,
165            upper_threshold: Some(upper),
166            comparison: Comparison::Between,
167        }
168    }
169
170    /// Check if an actual value passes this gate.
171    pub fn check(&self, actual: f64) -> bool {
172        match self.comparison {
173            Comparison::Gte => actual >= self.threshold,
174            Comparison::Lte => actual <= self.threshold,
175            Comparison::Eq => (actual - self.threshold).abs() < 1e-9,
176            Comparison::Between => {
177                let upper = self.upper_threshold.unwrap_or(self.threshold);
178                actual >= self.threshold && actual <= upper
179            }
180        }
181    }
182}
183
184/// A named collection of quality gates.
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct GateProfile {
187    /// Profile name (e.g., "strict", "default", "lenient").
188    pub name: String,
189    /// List of quality gates in this profile.
190    pub gates: Vec<QualityGate>,
191    /// Strategy for handling failures.
192    #[serde(default)]
193    pub fail_strategy: FailStrategy,
194}
195
196impl GateProfile {
197    /// Create a new gate profile.
198    pub fn new(name: impl Into<String>, gates: Vec<QualityGate>) -> Self {
199        Self {
200            name: name.into(),
201            gates,
202            fail_strategy: FailStrategy::default(),
203        }
204    }
205
206    /// Set the fail strategy.
207    pub fn with_fail_strategy(mut self, strategy: FailStrategy) -> Self {
208        self.fail_strategy = strategy;
209        self
210    }
211}
212
213/// Result of checking a single gate.
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct GateCheckResult {
216    /// Gate name.
217    pub gate_name: String,
218    /// Metric checked.
219    pub metric: QualityMetric,
220    /// Whether the gate passed.
221    pub passed: bool,
222    /// Actual metric value.
223    pub actual_value: Option<f64>,
224    /// Expected threshold.
225    pub threshold: f64,
226    /// Comparison used.
227    pub comparison: Comparison,
228    /// Human-readable message.
229    pub message: String,
230}
231
232/// Overall result of evaluating all gates in a profile.
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct GateResult {
235    /// Whether all gates passed.
236    pub passed: bool,
237    /// Profile name used.
238    pub profile_name: String,
239    /// Individual gate results.
240    pub results: Vec<GateCheckResult>,
241    /// Summary message.
242    pub summary: String,
243    /// Number of gates that passed.
244    pub gates_passed: usize,
245    /// Total number of gates checked.
246    pub gates_total: usize,
247}
248
249/// Engine that evaluates quality gates against a comprehensive evaluation.
250pub struct GateEngine;
251
252impl GateEngine {
253    /// Evaluate a comprehensive evaluation against a gate profile.
254    pub fn evaluate(evaluation: &ComprehensiveEvaluation, profile: &GateProfile) -> GateResult {
255        let mut results = Vec::new();
256        let mut all_passed = true;
257
258        for gate in &profile.gates {
259            let (actual_value, message) = Self::extract_metric(evaluation, &gate.metric);
260
261            let check_result = match actual_value {
262                Some(value) => {
263                    let passed = gate.check(value);
264                    if !passed {
265                        all_passed = false;
266                    }
267                    GateCheckResult {
268                        gate_name: gate.name.clone(),
269                        metric: gate.metric.clone(),
270                        passed,
271                        actual_value: Some(value),
272                        threshold: gate.threshold,
273                        comparison: gate.comparison.clone(),
274                        message: if passed {
275                            format!(
276                                "{}: {:.4} passes {:?} {:.4}",
277                                gate.name, value, gate.comparison, gate.threshold
278                            )
279                        } else {
280                            format!(
281                                "{}: {:.4} fails {:?} {:.4}",
282                                gate.name, value, gate.comparison, gate.threshold
283                            )
284                        },
285                    }
286                }
287                None => {
288                    // Metric not available - treat as not applicable (pass)
289                    GateCheckResult {
290                        gate_name: gate.name.clone(),
291                        metric: gate.metric.clone(),
292                        passed: true,
293                        actual_value: None,
294                        threshold: gate.threshold,
295                        comparison: gate.comparison.clone(),
296                        message: format!("{}: metric not available ({})", gate.name, message),
297                    }
298                }
299            };
300
301            let failed = !check_result.passed;
302            results.push(check_result);
303
304            if failed && profile.fail_strategy == FailStrategy::FailFast {
305                break;
306            }
307        }
308
309        let gates_passed = results.iter().filter(|r| r.passed).count();
310        let gates_total = results.len();
311
312        let summary = if all_passed {
313            format!(
314                "All {}/{} quality gates passed (profile: {})",
315                gates_passed, gates_total, profile.name
316            )
317        } else {
318            let failed_names: Vec<_> = results
319                .iter()
320                .filter(|r| !r.passed)
321                .map(|r| r.gate_name.as_str())
322                .collect();
323            format!(
324                "{}/{} quality gates passed, {} failed: {} (profile: {})",
325                gates_passed,
326                gates_total,
327                gates_total - gates_passed,
328                failed_names.join(", "),
329                profile.name
330            )
331        };
332
333        GateResult {
334            passed: all_passed,
335            profile_name: profile.name.clone(),
336            results,
337            summary,
338            gates_passed,
339            gates_total,
340        }
341    }
342
343    /// Extract a metric value from a comprehensive evaluation.
344    fn extract_metric(
345        evaluation: &ComprehensiveEvaluation,
346        metric: &QualityMetric,
347    ) -> (Option<f64>, String) {
348        match metric {
349            QualityMetric::BenfordMad => {
350                let mad = evaluation.statistical.benford.as_ref().map(|b| b.mad);
351                (mad, "benford analysis not available".to_string())
352            }
353            QualityMetric::BalanceCoherence => {
354                let rate = evaluation.coherence.balance.as_ref().map(|b| {
355                    if b.periods_evaluated == 0 {
356                        0.0
357                    } else {
358                        (b.periods_evaluated - b.periods_imbalanced) as f64
359                            / b.periods_evaluated as f64
360                    }
361                });
362                (rate, "balance sheet evaluation not available".to_string())
363            }
364            QualityMetric::DocumentChainIntegrity => {
365                let rate = evaluation
366                    .coherence
367                    .document_chain
368                    .as_ref()
369                    .map(|d| d.p2p_completion_rate);
370                (rate, "document chain evaluation not available".to_string())
371            }
372            QualityMetric::CorrelationPreservation => {
373                // Correlation preservation is not yet computed in ComprehensiveEvaluation.
374                // This gate will always be skipped until the metric is wired in.
375                tracing::warn!("CorrelationPreservation metric always returns None — not yet wired into evaluation pipeline");
376                (
377                    None,
378                    "correlation preservation metric not available".to_string(),
379                )
380            }
381            QualityMetric::TemporalConsistency => {
382                let rate = evaluation
383                    .statistical
384                    .temporal
385                    .as_ref()
386                    .map(|t| t.pattern_correlation);
387                (rate, "temporal analysis not available".to_string())
388            }
389            QualityMetric::PrivacyMiaAuc => {
390                let auc = evaluation
391                    .privacy
392                    .as_ref()
393                    .and_then(|p| p.membership_inference.as_ref())
394                    .map(|m| m.auc_roc);
395                (auc, "privacy MIA evaluation not available".to_string())
396            }
397            QualityMetric::CompletionRate => {
398                let rate = evaluation
399                    .quality
400                    .completeness
401                    .as_ref()
402                    .map(|c| c.overall_completeness);
403                (rate, "completeness analysis not available".to_string())
404            }
405            QualityMetric::DuplicateRate => {
406                let rate = evaluation
407                    .quality
408                    .uniqueness
409                    .as_ref()
410                    .map(|u| u.duplicate_rate);
411                (rate, "uniqueness analysis not available".to_string())
412            }
413            QualityMetric::ReferentialIntegrity => {
414                let rate = evaluation
415                    .coherence
416                    .referential
417                    .as_ref()
418                    .map(|r| r.overall_integrity_score);
419                (
420                    rate,
421                    "referential integrity evaluation not available".to_string(),
422                )
423            }
424            QualityMetric::IcMatchRate => {
425                let rate = evaluation
426                    .coherence
427                    .intercompany
428                    .as_ref()
429                    .map(|ic| ic.match_rate);
430                (rate, "IC matching evaluation not available".to_string())
431            }
432            QualityMetric::S2CChainCompletion => {
433                let rate = evaluation
434                    .coherence
435                    .sourcing
436                    .as_ref()
437                    .map(|s| s.rfx_completion_rate);
438                (rate, "sourcing evaluation not available".to_string())
439            }
440            QualityMetric::PayrollAccuracy => {
441                let rate = evaluation
442                    .coherence
443                    .hr_payroll
444                    .as_ref()
445                    .map(|h| h.gross_to_net_accuracy);
446                (rate, "HR/payroll evaluation not available".to_string())
447            }
448            QualityMetric::ManufacturingYield => {
449                let rate = evaluation
450                    .coherence
451                    .manufacturing
452                    .as_ref()
453                    .map(|m| m.yield_rate_consistency);
454                (rate, "manufacturing evaluation not available".to_string())
455            }
456            QualityMetric::BankReconciliationBalance => {
457                let rate = evaluation
458                    .coherence
459                    .bank_reconciliation
460                    .as_ref()
461                    .map(|b| b.balance_accuracy);
462                (
463                    rate,
464                    "bank reconciliation evaluation not available".to_string(),
465                )
466            }
467            QualityMetric::FinancialReportingTieBack => {
468                let rate = evaluation
469                    .coherence
470                    .financial_reporting
471                    .as_ref()
472                    .map(|fr| fr.statement_tb_tie_rate);
473                (
474                    rate,
475                    "financial reporting evaluation not available".to_string(),
476                )
477            }
478            QualityMetric::AmlDetectability => {
479                let rate = evaluation
480                    .banking
481                    .as_ref()
482                    .and_then(|b| b.aml.as_ref())
483                    .map(|a| a.typology_coverage);
484                (
485                    rate,
486                    "AML detectability evaluation not available".to_string(),
487                )
488            }
489            QualityMetric::ProcessMiningCoverage => {
490                let rate = evaluation
491                    .process_mining
492                    .as_ref()
493                    .and_then(|pm| pm.event_sequence.as_ref())
494                    .map(|es| es.timestamp_monotonicity);
495                (rate, "process mining evaluation not available".to_string())
496            }
497            QualityMetric::AuditEvidenceCoverage => {
498                let rate = evaluation
499                    .coherence
500                    .audit
501                    .as_ref()
502                    .map(|a| a.evidence_to_finding_rate);
503                (rate, "audit evaluation not available".to_string())
504            }
505            QualityMetric::AnomalySeparability => {
506                let score = evaluation
507                    .ml_readiness
508                    .anomaly_scoring
509                    .as_ref()
510                    .map(|a| a.anomaly_separability);
511                (
512                    score,
513                    "anomaly scoring evaluation not available".to_string(),
514                )
515            }
516            QualityMetric::FeatureQualityScore => {
517                let score = evaluation
518                    .ml_readiness
519                    .feature_quality
520                    .as_ref()
521                    .map(|f| f.feature_quality_score);
522                (
523                    score,
524                    "feature quality evaluation not available".to_string(),
525                )
526            }
527            QualityMetric::GnnReadinessScore => {
528                let score = evaluation
529                    .ml_readiness
530                    .gnn_readiness
531                    .as_ref()
532                    .map(|g| g.gnn_readiness_score);
533                (score, "GNN readiness evaluation not available".to_string())
534            }
535            QualityMetric::DomainGapScore => {
536                let score = evaluation
537                    .ml_readiness
538                    .domain_gap
539                    .as_ref()
540                    .map(|d| d.domain_gap_score);
541                (score, "domain gap evaluation not available".to_string())
542            }
543            QualityMetric::Custom(name) => {
544                tracing::warn!("Custom metric '{}' always returns None — custom metric evaluation not implemented", name);
545                (
546                    None,
547                    format!(
548                        "custom metric '{}' not available in standard evaluation",
549                        name
550                    ),
551                )
552            }
553        }
554    }
555}
556
557#[cfg(test)]
558#[allow(clippy::unwrap_used)]
559mod tests {
560    use super::*;
561
562    fn sample_profile() -> GateProfile {
563        GateProfile::new(
564            "test",
565            vec![
566                QualityGate::lte("benford_compliance", QualityMetric::BenfordMad, 0.015),
567                QualityGate::gte("completeness", QualityMetric::CompletionRate, 0.95),
568            ],
569        )
570    }
571
572    #[test]
573    fn test_gate_check_gte() {
574        let gate = QualityGate::gte("test", QualityMetric::CompletionRate, 0.95);
575        assert!(gate.check(0.96));
576        assert!(gate.check(0.95));
577        assert!(!gate.check(0.94));
578    }
579
580    #[test]
581    fn test_gate_check_lte() {
582        let gate = QualityGate::lte("test", QualityMetric::BenfordMad, 0.015);
583        assert!(gate.check(0.01));
584        assert!(gate.check(0.015));
585        assert!(!gate.check(0.016));
586    }
587
588    #[test]
589    fn test_gate_check_between() {
590        let gate = QualityGate::between("test", QualityMetric::DuplicateRate, 0.0, 0.05);
591        assert!(gate.check(0.0));
592        assert!(gate.check(0.03));
593        assert!(gate.check(0.05));
594        assert!(!gate.check(0.06));
595    }
596
597    #[test]
598    fn test_gate_check_eq() {
599        let gate = QualityGate::new("test", QualityMetric::BalanceCoherence, 1.0, Comparison::Eq);
600        assert!(gate.check(1.0));
601        assert!(!gate.check(0.99));
602    }
603
604    #[test]
605    fn test_evaluate_empty_evaluation() {
606        let evaluation = ComprehensiveEvaluation::new();
607        let profile = sample_profile();
608        let result = GateEngine::evaluate(&evaluation, &profile);
609        // All metrics unavailable → treated as pass
610        assert!(result.passed);
611        assert_eq!(result.gates_total, 2);
612    }
613
614    #[test]
615    fn test_fail_fast_stops_on_first_failure() {
616        let evaluation = ComprehensiveEvaluation::new();
617        let profile = GateProfile::new(
618            "strict",
619            vec![
620                // This will fail because balance_coherence is not available
621                // but N/A is treated as pass. Let's create a custom gate
622                // that we know will fail
623                QualityGate::gte(
624                    "custom_gate",
625                    QualityMetric::Custom("nonexistent".to_string()),
626                    0.99,
627                ),
628                QualityGate::gte(
629                    "another",
630                    QualityMetric::Custom("also_nonexistent".to_string()),
631                    0.99,
632                ),
633            ],
634        )
635        .with_fail_strategy(FailStrategy::FailFast);
636
637        let result = GateEngine::evaluate(&evaluation, &profile);
638        // Custom metrics unavailable are treated as pass, so both pass
639        assert!(result.passed);
640    }
641
642    #[test]
643    fn test_collect_all_reports_all_failures() {
644        let evaluation = ComprehensiveEvaluation::new();
645        let profile = GateProfile::new(
646            "test",
647            vec![
648                QualityGate::lte("mad", QualityMetric::BenfordMad, 0.015),
649                QualityGate::gte("completion", QualityMetric::CompletionRate, 0.95),
650            ],
651        )
652        .with_fail_strategy(FailStrategy::CollectAll);
653
654        let result = GateEngine::evaluate(&evaluation, &profile);
655        assert_eq!(result.results.len(), 2);
656    }
657
658    #[test]
659    fn test_gate_result_summary() {
660        let evaluation = ComprehensiveEvaluation::new();
661        let profile = sample_profile();
662        let result = GateEngine::evaluate(&evaluation, &profile);
663        assert!(result.summary.contains("test"));
664    }
665
666    #[test]
667    fn test_quality_metric_display() {
668        assert_eq!(QualityMetric::BenfordMad.to_string(), "benford_mad");
669        assert_eq!(
670            QualityMetric::BalanceCoherence.to_string(),
671            "balance_coherence"
672        );
673        assert_eq!(
674            QualityMetric::Custom("my_metric".to_string()).to_string(),
675            "custom:my_metric"
676        );
677    }
678
679    #[test]
680    fn test_gate_profile_serialization() {
681        let profile = sample_profile();
682        let json = serde_json::to_string(&profile).expect("serialize");
683        let deserialized: GateProfile = serde_json::from_str(&json).expect("deserialize");
684        assert_eq!(deserialized.name, "test");
685        assert_eq!(deserialized.gates.len(), 2);
686    }
687}
datasynth_eval/gates/engine.rs

datasynth_eval/gates/
engine.rs