Skip to main content

aivcs_core/
gate.rs

1//! Merge gate rules engine.
2//!
3//! Evaluates [`CaseResult`] vectors against [`GateRuleSet`] configurations to
4//! produce a [`GateVerdict`] — the pass/fail decision that blocks or allows a
5//! merge. Supports threshold checks, regression limits, fail-fast, and
6//! tag-based required-pass rules.
7
8use serde::{Deserialize, Serialize};
9
10use crate::domain::eval::EvalThresholds;
11
12// ---------------------------------------------------------------------------
13// Eval result types (input to the gate)
14// ---------------------------------------------------------------------------
15
16/// Result of evaluating a single test case.
17#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
18pub struct CaseResult {
19    /// Identifier for the test case.
20    pub case_id: String,
21    /// Score in 0.0–1.0.
22    pub score: f32,
23    /// Whether this case passed.
24    pub passed: bool,
25    /// Tags inherited from the `EvalTestCase`.
26    pub tags: Vec<String>,
27}
28
29/// Aggregated report from an eval run — the input to the gate engine.
30#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
31pub struct EvalReport {
32    /// Per-case results.
33    pub case_results: Vec<CaseResult>,
34    /// Pass rate (0.0–1.0) from the eval runner.
35    pub pass_rate: f32,
36    /// Optional baseline pass rate for regression detection.
37    pub baseline_pass_rate: Option<f32>,
38}
39
40// ---------------------------------------------------------------------------
41// Gate rules
42// ---------------------------------------------------------------------------
43
44/// A single gate rule that can block a merge.
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
46#[serde(tag = "type", rename_all = "snake_case")]
47pub enum GateRule {
48    /// Pass rate must meet or exceed `EvalThresholds::min_pass_rate`.
49    MinPassRate,
50    /// Regression (baseline − current) must not exceed `EvalThresholds::max_regression`.
51    MaxRegression,
52    /// All cases with the given tag must pass.
53    RequireTag { tag: String },
54}
55
56/// A set of gate rules plus the thresholds they reference.
57#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
58pub struct GateRuleSet {
59    pub thresholds: EvalThresholds,
60    pub rules: Vec<GateRule>,
61}
62
63impl GateRuleSet {
64    /// Create a rule set with default thresholds and the standard rules
65    /// (`MinPassRate` + `MaxRegression`).
66    pub fn standard() -> Self {
67        Self {
68            thresholds: EvalThresholds::default(),
69            rules: vec![GateRule::MinPassRate, GateRule::MaxRegression],
70        }
71    }
72
73    /// Add a rule.
74    pub fn with_rule(mut self, rule: GateRule) -> Self {
75        self.rules.push(rule);
76        self
77    }
78
79    /// Override thresholds.
80    pub fn with_thresholds(mut self, thresholds: EvalThresholds) -> Self {
81        self.thresholds = thresholds;
82        self
83    }
84}
85
86// ---------------------------------------------------------------------------
87// Verdict
88// ---------------------------------------------------------------------------
89
90/// A single rule violation.
91#[derive(Debug, Clone, PartialEq)]
92pub struct Violation {
93    /// Which rule was violated.
94    pub rule: GateRule,
95    /// Human-readable explanation.
96    pub reason: String,
97}
98
99/// The outcome of evaluating a gate rule set against an eval report.
100#[derive(Debug, Clone, PartialEq)]
101pub struct GateVerdict {
102    /// Whether the gate passed (no violations).
103    pub passed: bool,
104    /// Violations found (empty when passed).
105    pub violations: Vec<Violation>,
106}
107
108impl GateVerdict {
109    fn pass() -> Self {
110        Self {
111            passed: true,
112            violations: Vec::new(),
113        }
114    }
115
116    fn fail(violations: Vec<Violation>) -> Self {
117        Self {
118            passed: false,
119            violations,
120        }
121    }
122}
123
124// ---------------------------------------------------------------------------
125// Engine
126// ---------------------------------------------------------------------------
127
128/// Evaluate an [`EvalReport`] against a [`GateRuleSet`], returning a [`GateVerdict`].
129///
130/// When `thresholds.fail_fast` is true, evaluation stops at the first violation.
131pub fn evaluate_gate(rule_set: &GateRuleSet, report: &EvalReport) -> GateVerdict {
132    let mut violations = Vec::new();
133    let fail_fast = rule_set.thresholds.fail_fast;
134
135    for rule in &rule_set.rules {
136        if let Some(v) = check_rule(rule, &rule_set.thresholds, report) {
137            violations.push(v);
138            if fail_fast {
139                return GateVerdict::fail(violations);
140            }
141        }
142    }
143
144    if violations.is_empty() {
145        GateVerdict::pass()
146    } else {
147        GateVerdict::fail(violations)
148    }
149}
150
151fn check_rule(
152    rule: &GateRule,
153    thresholds: &EvalThresholds,
154    report: &EvalReport,
155) -> Option<Violation> {
156    match rule {
157        GateRule::MinPassRate => {
158            if report.pass_rate < thresholds.min_pass_rate {
159                Some(Violation {
160                    rule: rule.clone(),
161                    reason: format!(
162                        "pass rate {:.2}% < required {:.2}%",
163                        report.pass_rate * 100.0,
164                        thresholds.min_pass_rate * 100.0,
165                    ),
166                })
167            } else {
168                None
169            }
170        }
171        GateRule::MaxRegression => {
172            if let Some(baseline) = report.baseline_pass_rate {
173                let regression = baseline - report.pass_rate;
174                if regression > thresholds.max_regression {
175                    Some(Violation {
176                        rule: rule.clone(),
177                        reason: format!(
178                            "regression {:.2}% > allowed {:.2}% (baseline {:.2}% → current {:.2}%)",
179                            regression * 100.0,
180                            thresholds.max_regression * 100.0,
181                            baseline * 100.0,
182                            report.pass_rate * 100.0,
183                        ),
184                    })
185                } else {
186                    None
187                }
188            } else {
189                // No baseline → no regression to check
190                None
191            }
192        }
193        GateRule::RequireTag { tag } => {
194            let tagged: Vec<&CaseResult> = report
195                .case_results
196                .iter()
197                .filter(|c| c.tags.contains(tag))
198                .collect();
199
200            let failed: Vec<&str> = tagged
201                .iter()
202                .filter(|c| !c.passed)
203                .map(|c| c.case_id.as_str())
204                .collect();
205
206            if failed.is_empty() {
207                None
208            } else {
209                Some(Violation {
210                    rule: rule.clone(),
211                    reason: format!(
212                        "{} of {} cases tagged '{}' failed: [{}]",
213                        failed.len(),
214                        tagged.len(),
215                        tag,
216                        failed.join(", "),
217                    ),
218                })
219            }
220        }
221    }
222}