aver/bench/
compare.rs

1//! Baseline comparison — diff a current `BenchReport` against a stored
2//! one and decide whether the run regressed past the configured tolerance.
3//!
4//! Gated metrics (any failure flips `regressed=true`):
5//! - `p50_ms` / `p95_ms` (per `[tolerance]` budget)
6//! - `compiler_visible_allocs` (exact match — growth past baseline is a
7//!   regression, shrinkage is reported as improvement)
8//! - `response_bytes` (exact match when both sides have a number)
9//!
10//! `passes_applied` mismatch is reported as a note but not gated —
11//! pipeline refactors legitimately change which stages run.
12
13use crate::bench::manifest::Tolerance;
14use crate::bench::report::BenchReport;
15
16#[derive(Debug)]
17pub struct DiffReport {
18    pub scenario: String,
19    pub p50: MetricDiff,
20    pub p95: MetricDiff,
21    /// `true` if any gated metric exceeded its tolerance.
22    pub regressed: bool,
23    /// Non-gated observations worth printing — e.g. response_bytes
24    /// changed, passes_applied set differs.
25    pub notes: Vec<String>,
26}
27
28#[derive(Debug, Clone, Copy)]
29pub struct MetricDiff {
30    pub baseline: f64,
31    pub current: f64,
32    pub delta_pct: f64,
33    pub tolerance_pct: f64,
34    pub regressed: bool,
35}
36
37impl MetricDiff {
38    fn new(baseline: f64, current: f64, tolerance_pct: f64) -> Self {
39        let delta_pct = if baseline > 0.0 {
40            ((current - baseline) / baseline) * 100.0
41        } else {
42            0.0
43        };
44        // Negative delta = faster than baseline = never a regression.
45        let regressed = delta_pct > tolerance_pct;
46        Self {
47            baseline,
48            current,
49            delta_pct,
50            tolerance_pct,
51            regressed,
52        }
53    }
54}
55
56pub fn diff(current: &BenchReport, baseline: &BenchReport, tolerance: Tolerance) -> DiffReport {
57    let p50 = MetricDiff::new(
58        baseline.iterations.p50_ms,
59        current.iterations.p50_ms,
60        tolerance.wall_time_p50_pct,
61    );
62    let p95 = MetricDiff::new(
63        baseline.iterations.p95_ms,
64        current.iterations.p95_ms,
65        tolerance.wall_time_p95_pct,
66    );
67
68    let mut notes = Vec::new();
69    if current.scenario.target != baseline.scenario.target {
70        notes.push(format!(
71            "target changed: baseline={} current={} — different units, do not trust the diff",
72            baseline.scenario.target, current.scenario.target
73        ));
74    }
75    if current.passes_applied != baseline.passes_applied {
76        notes.push(format!(
77            "passes_applied changed: baseline={:?} current={:?}",
78            baseline.passes_applied, current.passes_applied
79        ));
80    }
81    // `response_bytes` exact-match: any mismatch is reported and gated.
82    // Output bytes change ⇒ semantics changed (different result, different
83    // formatting) ⇒ regression. `None` on either side is a wash (capture
84    // not available), no gate.
85    let mut response_bytes_regressed = false;
86    if let (Some(c), Some(b)) = (current.response_bytes, baseline.response_bytes)
87        && c != b
88    {
89        notes.push(format!(
90            "response_bytes: baseline={} current={} — regression",
91            b, c
92        ));
93        response_bytes_regressed = true;
94    }
95
96    // `compiler_visible_allocs` is exact-match — any change is reported.
97    // Growing past baseline is a regression (a hot fn just learned to
98    // allocate); shrinking is good but worth noting (a fusion fired
99    // that didn't before, codegen got tighter, or the manifest changed).
100    let mut allocs_regressed = false;
101    if let (Some(c), Some(b)) = (
102        current.compiler_visible_allocs,
103        baseline.compiler_visible_allocs,
104    ) && c != b
105    {
106        if c > b {
107            notes.push(format!(
108                "compiler_visible_allocs grew: baseline={} current={} — regression",
109                b, c
110            ));
111            allocs_regressed = true;
112        } else {
113            notes.push(format!(
114                "compiler_visible_allocs shrank: baseline={} current={} — improvement",
115                b, c
116            ));
117        }
118    }
119
120    DiffReport {
121        scenario: current.scenario.name.clone(),
122        p50,
123        p95,
124        regressed: p50.regressed || p95.regressed || allocs_regressed || response_bytes_regressed,
125        notes,
126    }
127}
128
129/// Render a diff in the same compact column shape as `format_human` for
130/// `BenchReport` — single block per scenario, colour-free (callers add
131/// terminal colour at the print site if desired).
132pub fn format_diff(diff: &DiffReport) -> String {
133    use std::fmt::Write;
134
135    fn fmt_ms(ms: f64) -> String {
136        if ms >= 1.0 {
137            format!("{:.2}ms", ms)
138        } else {
139            format!("{:.0}µs", ms * 1000.0)
140        }
141    }
142    fn fmt_metric(label: &str, m: &MetricDiff) -> String {
143        let sign = if m.delta_pct >= 0.0 { "+" } else { "" };
144        let verdict = if m.regressed {
145            format!("REGRESSION (limit +{:.0}%)", m.tolerance_pct)
146        } else {
147            "ok".to_string()
148        };
149        format!(
150            "  {:<6} {} (baseline {}, {}{:.1}%)  {}",
151            label,
152            fmt_ms(m.current),
153            fmt_ms(m.baseline),
154            sign,
155            m.delta_pct,
156            verdict,
157        )
158    }
159
160    let mut out = String::new();
161    writeln!(
162        out,
163        "{}: {}",
164        diff.scenario,
165        if diff.regressed { "REGRESSION" } else { "ok" }
166    )
167    .ok();
168    writeln!(out, "{}", fmt_metric("p50", &diff.p50)).ok();
169    writeln!(out, "{}", fmt_metric("p95", &diff.p95)).ok();
170    for note in &diff.notes {
171        writeln!(out, "  note: {}", note).ok();
172    }
173    out
174}
aver/bench/compare.rs

aver/bench/
compare.rs