Skip to main content

aivcs_core/
gate.rs

1//! Merge gate rules engine.
2//!
3//! Evaluates [`CaseResult`] vectors against [`GateRuleSet`] configurations to
4//! produce a [`GateVerdict`] — the pass/fail decision that blocks or allows a
5//! merge. Supports threshold checks, regression limits, fail-fast, and
6//! tag-based required-pass rules.
7
8use serde::{Deserialize, Serialize};
9
10use crate::domain::eval::EvalThresholds;
11
12// ---------------------------------------------------------------------------
13// Eval result types (input to the gate)
14// ---------------------------------------------------------------------------
15
16/// Result of evaluating a single test case.
17#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
18pub struct CaseResult {
19    /// Identifier for the test case.
20    pub case_id: String,
21    /// Score in 0.0–1.0.
22    pub score: f32,
23    /// Whether this case passed.
24    pub passed: bool,
25    /// Tags inherited from the `EvalTestCase`.
26    pub tags: Vec<String>,
27}
28
29/// Aggregated report from an eval run — the input to the gate engine.
30///
31/// # Invariants
32///
33/// `pass_rate` must be derived from `case_results` by the eval runner (number
34/// of `passed == true` cases divided by total cases) and kept consistent.
35/// Gate rules that operate on overall pass rate (`MinPassRate`, `MaxRegression`)
36/// use `pass_rate`; per-case rules (`RequireTag`) use `case_results`.
37/// Inconsistent values are a bug in the producer and can cause surprising verdicts.
38#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
39pub struct EvalReport {
40    /// Per-case results used by per-case and tag-based rules.
41    pub case_results: Vec<CaseResult>,
42    /// Overall pass rate (0.0–1.0) used by aggregate rules. Must be derived
43    /// from `case_results` and remain consistent with it.
44    pub pass_rate: f32,
45    /// Optional baseline pass rate for regression detection.
46    pub baseline_pass_rate: Option<f32>,
47}
48
49// ---------------------------------------------------------------------------
50// Gate rules
51// ---------------------------------------------------------------------------
52
53/// A single gate rule that can block a merge.
54#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
55#[serde(tag = "type", rename_all = "snake_case")]
56pub enum GateRule {
57    /// Pass rate must meet or exceed `EvalThresholds::min_pass_rate`.
58    MinPassRate,
59    /// Regression (baseline − current) must not exceed `EvalThresholds::max_regression`.
60    MaxRegression,
61    /// All cases with the given tag must pass.
62    RequireTag { tag: String },
63}
64
65/// A set of gate rules plus the thresholds they reference.
66#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
67pub struct GateRuleSet {
68    pub thresholds: EvalThresholds,
69    pub rules: Vec<GateRule>,
70}
71
72impl GateRuleSet {
73    /// Create a rule set with default thresholds and the standard rules
74    /// (`MinPassRate` + `MaxRegression`).
75    pub fn standard() -> Self {
76        Self {
77            thresholds: EvalThresholds::default(),
78            rules: vec![GateRule::MinPassRate, GateRule::MaxRegression],
79        }
80    }
81
82    /// Add a rule.
83    pub fn with_rule(mut self, rule: GateRule) -> Self {
84        self.rules.push(rule);
85        self
86    }
87
88    /// Override thresholds.
89    pub fn with_thresholds(mut self, thresholds: EvalThresholds) -> Self {
90        self.thresholds = thresholds;
91        self
92    }
93}
94
95// ---------------------------------------------------------------------------
96// Verdict
97// ---------------------------------------------------------------------------
98
99/// A single rule violation.
100#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
101pub struct Violation {
102    /// Which rule was violated.
103    pub rule: GateRule,
104    /// Human-readable explanation.
105    pub reason: String,
106}
107
108/// The outcome of evaluating a gate rule set against an eval report.
109#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
110pub struct GateVerdict {
111    /// Violations found (empty when passed).
112    pub violations: Vec<Violation>,
113}
114
115impl GateVerdict {
116    fn pass() -> Self {
117        Self {
118            violations: Vec::new(),
119        }
120    }
121
122    fn fail(violations: Vec<Violation>) -> Self {
123        Self { violations }
124    }
125
126    /// Whether the gate passed (i.e., there are no violations).
127    pub fn passed(&self) -> bool {
128        self.violations.is_empty()
129    }
130}
131
132// ---------------------------------------------------------------------------
133// Engine
134// ---------------------------------------------------------------------------
135
136/// Evaluate an [`EvalReport`] against a [`GateRuleSet`], returning a [`GateVerdict`].
137///
138/// When `thresholds.fail_fast` is true, evaluation stops at the first violation.
139pub fn evaluate_gate(rule_set: &GateRuleSet, report: &EvalReport) -> GateVerdict {
140    let mut violations = Vec::new();
141    let fail_fast = rule_set.thresholds.fail_fast;
142
143    for rule in &rule_set.rules {
144        if let Some(v) = check_rule(rule, &rule_set.thresholds, report) {
145            violations.push(v);
146            if fail_fast {
147                return GateVerdict::fail(violations);
148            }
149        }
150    }
151
152    if violations.is_empty() {
153        GateVerdict::pass()
154    } else {
155        GateVerdict::fail(violations)
156    }
157}
158
159fn check_rule(
160    rule: &GateRule,
161    thresholds: &EvalThresholds,
162    report: &EvalReport,
163) -> Option<Violation> {
164    match rule {
165        GateRule::MinPassRate => {
166            if report.pass_rate < thresholds.min_pass_rate {
167                Some(Violation {
168                    rule: rule.clone(),
169                    reason: format!(
170                        "pass rate {:.2}% < required {:.2}%",
171                        report.pass_rate * 100.0,
172                        thresholds.min_pass_rate * 100.0,
173                    ),
174                })
175            } else {
176                None
177            }
178        }
179        GateRule::MaxRegression => {
180            if let Some(baseline) = report.baseline_pass_rate {
181                let regression = baseline - report.pass_rate;
182                if regression > thresholds.max_regression {
183                    Some(Violation {
184                        rule: rule.clone(),
185                        reason: format!(
186                            "regression {:.2}% > allowed {:.2}% (baseline {:.2}% → current {:.2}%)",
187                            regression * 100.0,
188                            thresholds.max_regression * 100.0,
189                            baseline * 100.0,
190                            report.pass_rate * 100.0,
191                        ),
192                    })
193                } else {
194                    None
195                }
196            } else {
197                // No baseline → no regression to check
198                None
199            }
200        }
201        GateRule::RequireTag { tag } => {
202            let tagged: Vec<&CaseResult> = report
203                .case_results
204                .iter()
205                .filter(|c| c.tags.contains(tag))
206                .collect();
207
208            let failed: Vec<&str> = tagged
209                .iter()
210                .filter(|c| !c.passed)
211                .map(|c| c.case_id.as_str())
212                .collect();
213
214            if failed.is_empty() {
215                None
216            } else {
217                Some(Violation {
218                    rule: rule.clone(),
219                    reason: format!(
220                        "{} of {} cases tagged '{}' failed: [{}]",
221                        failed.len(),
222                        tagged.len(),
223                        tag,
224                        failed.join(", "),
225                    ),
226                })
227            }
228        }
229    }
230}