aivcs_core/gate.rs
1//! Merge gate rules engine.
2//!
3//! Evaluates [`CaseResult`] vectors against [`GateRuleSet`] configurations to
4//! produce a [`GateVerdict`] — the pass/fail decision that blocks or allows a
5//! merge. Supports threshold checks, regression limits, fail-fast, and
6//! tag-based required-pass rules.
7
8use serde::{Deserialize, Serialize};
9
10use crate::domain::eval::EvalThresholds;
11
12// ---------------------------------------------------------------------------
13// Eval result types (input to the gate)
14// ---------------------------------------------------------------------------
15
16/// Result of evaluating a single test case.
17#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
18pub struct CaseResult {
19 /// Identifier for the test case.
20 pub case_id: String,
21 /// Score in 0.0–1.0.
22 pub score: f32,
23 /// Whether this case passed.
24 pub passed: bool,
25 /// Tags inherited from the `EvalTestCase`.
26 pub tags: Vec<String>,
27}
28
29/// Aggregated report from an eval run — the input to the gate engine.
30///
31/// # Invariants
32///
33/// `pass_rate` must be derived from `case_results` by the eval runner (number
34/// of `passed == true` cases divided by total cases) and kept consistent.
35/// Gate rules that operate on overall pass rate (`MinPassRate`, `MaxRegression`)
36/// use `pass_rate`; per-case rules (`RequireTag`) use `case_results`.
37/// Inconsistent values are a bug in the producer and can cause surprising verdicts.
38#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
39pub struct EvalReport {
40 /// Per-case results used by per-case and tag-based rules.
41 pub case_results: Vec<CaseResult>,
42 /// Overall pass rate (0.0–1.0) used by aggregate rules. Must be derived
43 /// from `case_results` and remain consistent with it.
44 pub pass_rate: f32,
45 /// Optional baseline pass rate for regression detection.
46 pub baseline_pass_rate: Option<f32>,
47}
48
49// ---------------------------------------------------------------------------
50// Gate rules
51// ---------------------------------------------------------------------------
52
53/// A single gate rule that can block a merge.
54#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
55#[serde(tag = "type", rename_all = "snake_case")]
56pub enum GateRule {
57 /// Pass rate must meet or exceed `EvalThresholds::min_pass_rate`.
58 MinPassRate,
59 /// Regression (baseline − current) must not exceed `EvalThresholds::max_regression`.
60 MaxRegression,
61 /// All cases with the given tag must pass.
62 RequireTag { tag: String },
63}
64
65/// A set of gate rules plus the thresholds they reference.
66#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
67pub struct GateRuleSet {
68 pub thresholds: EvalThresholds,
69 pub rules: Vec<GateRule>,
70}
71
72impl GateRuleSet {
73 /// Create a rule set with default thresholds and the standard rules
74 /// (`MinPassRate` + `MaxRegression`).
75 pub fn standard() -> Self {
76 Self {
77 thresholds: EvalThresholds::default(),
78 rules: vec![GateRule::MinPassRate, GateRule::MaxRegression],
79 }
80 }
81
82 /// Add a rule.
83 pub fn with_rule(mut self, rule: GateRule) -> Self {
84 self.rules.push(rule);
85 self
86 }
87
88 /// Override thresholds.
89 pub fn with_thresholds(mut self, thresholds: EvalThresholds) -> Self {
90 self.thresholds = thresholds;
91 self
92 }
93}
94
95// ---------------------------------------------------------------------------
96// Verdict
97// ---------------------------------------------------------------------------
98
99/// A single rule violation.
100#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
101pub struct Violation {
102 /// Which rule was violated.
103 pub rule: GateRule,
104 /// Human-readable explanation.
105 pub reason: String,
106}
107
108/// The outcome of evaluating a gate rule set against an eval report.
109#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
110pub struct GateVerdict {
111 /// Violations found (empty when passed).
112 pub violations: Vec<Violation>,
113}
114
115impl GateVerdict {
116 fn pass() -> Self {
117 Self {
118 violations: Vec::new(),
119 }
120 }
121
122 fn fail(violations: Vec<Violation>) -> Self {
123 Self { violations }
124 }
125
126 /// Whether the gate passed (i.e., there are no violations).
127 pub fn passed(&self) -> bool {
128 self.violations.is_empty()
129 }
130}
131
132// ---------------------------------------------------------------------------
133// Engine
134// ---------------------------------------------------------------------------
135
136/// Evaluate an [`EvalReport`] against a [`GateRuleSet`], returning a [`GateVerdict`].
137///
138/// When `thresholds.fail_fast` is true, evaluation stops at the first violation.
139pub fn evaluate_gate(rule_set: &GateRuleSet, report: &EvalReport) -> GateVerdict {
140 let mut violations = Vec::new();
141 let fail_fast = rule_set.thresholds.fail_fast;
142
143 for rule in &rule_set.rules {
144 if let Some(v) = check_rule(rule, &rule_set.thresholds, report) {
145 violations.push(v);
146 if fail_fast {
147 return GateVerdict::fail(violations);
148 }
149 }
150 }
151
152 if violations.is_empty() {
153 GateVerdict::pass()
154 } else {
155 GateVerdict::fail(violations)
156 }
157}
158
159fn check_rule(
160 rule: &GateRule,
161 thresholds: &EvalThresholds,
162 report: &EvalReport,
163) -> Option<Violation> {
164 match rule {
165 GateRule::MinPassRate => {
166 if report.pass_rate < thresholds.min_pass_rate {
167 Some(Violation {
168 rule: rule.clone(),
169 reason: format!(
170 "pass rate {:.2}% < required {:.2}%",
171 report.pass_rate * 100.0,
172 thresholds.min_pass_rate * 100.0,
173 ),
174 })
175 } else {
176 None
177 }
178 }
179 GateRule::MaxRegression => {
180 if let Some(baseline) = report.baseline_pass_rate {
181 let regression = baseline - report.pass_rate;
182 if regression > thresholds.max_regression {
183 Some(Violation {
184 rule: rule.clone(),
185 reason: format!(
186 "regression {:.2}% > allowed {:.2}% (baseline {:.2}% → current {:.2}%)",
187 regression * 100.0,
188 thresholds.max_regression * 100.0,
189 baseline * 100.0,
190 report.pass_rate * 100.0,
191 ),
192 })
193 } else {
194 None
195 }
196 } else {
197 // No baseline → no regression to check
198 None
199 }
200 }
201 GateRule::RequireTag { tag } => {
202 let tagged: Vec<&CaseResult> = report
203 .case_results
204 .iter()
205 .filter(|c| c.tags.contains(tag))
206 .collect();
207
208 let failed: Vec<&str> = tagged
209 .iter()
210 .filter(|c| !c.passed)
211 .map(|c| c.case_id.as_str())
212 .collect();
213
214 if failed.is_empty() {
215 None
216 } else {
217 Some(Violation {
218 rule: rule.clone(),
219 reason: format!(
220 "{} of {} cases tagged '{}' failed: [{}]",
221 failed.len(),
222 tagged.len(),
223 tag,
224 failed.join(", "),
225 ),
226 })
227 }
228 }
229 }
230}