atomr_agents_eval/
regression.rs1use serde::{Deserialize, Serialize};
2
3use crate::suite::EvalRun;
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct RegressionResult {
7 pub baseline_pass_rate: f32,
8 pub current_pass_rate: f32,
9 pub delta: f32,
10 pub blocked: bool,
11 pub reason: String,
12}
13
14pub struct RegressionGate {
17 pub tolerance: f32,
18}
19
20impl RegressionGate {
21 pub fn check(&self, baseline: &EvalRun, current: &EvalRun) -> RegressionResult {
22 let delta = current.pass_rate() - baseline.pass_rate();
23 let blocked = delta < -self.tolerance;
24 let reason = if blocked {
25 format!(
26 "pass_rate dropped from {:.2} to {:.2} (tolerance {:.2})",
27 baseline.pass_rate(),
28 current.pass_rate(),
29 self.tolerance
30 )
31 } else {
32 "ok".into()
33 };
34 RegressionResult {
35 baseline_pass_rate: baseline.pass_rate(),
36 current_pass_rate: current.pass_rate(),
37 delta,
38 blocked,
39 reason,
40 }
41 }
42}
43
44#[cfg(test)]
45mod tests {
46 use super::*;
47
48 fn run_with(passed: u32, failed: u32) -> EvalRun {
49 EvalRun {
50 passed,
51 failed,
52 avg_score: 0.0,
53 results: vec![],
54 }
55 }
56
57 #[test]
58 fn regression_blocks_when_below_tolerance() {
59 let gate = RegressionGate { tolerance: 0.05 };
60 let baseline = run_with(9, 1); let current = run_with(7, 3); let r = gate.check(&baseline, ¤t);
63 assert!(r.blocked);
64 }
65
66 #[test]
67 fn regression_allows_within_tolerance() {
68 let gate = RegressionGate { tolerance: 0.10 };
69 let baseline = run_with(10, 0); let current = run_with(95, 5); let r = gate.check(&baseline, ¤t);
72 assert!(!r.blocked, "delta {} blocked unexpectedly", r.delta);
73 }
74}