Skip to main content

aver/bench/
report.rs

1//! Bench report — the structured JSON shape that `aver bench` emits.
2//!
3//! This is the contract that `aver bench --compare baseline.json` (0.15.2)
4//! and the future CI gate read. Adding fields is fine, removing/renaming
5//! is a breaking change to that contract.
6
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct BenchReport {
11    pub scenario: ScenarioMetadata,
12    /// Identifies the build that ran the bench: aver version, build
13    /// profile, target backend, plus optional version strings for
14    /// per-target runtimes (e.g. wasmtime for `wasm-local`).
15    pub backend: BackendInfo,
16    /// OS / architecture / process identity. Same JSON shape across
17    /// targets; downstream tools join on `host.os + host.arch + backend.name`
18    /// to compare like-for-like across runs.
19    pub host: HostInfo,
20    pub iterations: IterationStats,
21    /// UTF-8 byte count of the last iteration's "result". Semantics
22    /// vary by target:
23    ///
24    /// - `vm`: byte length of `main`'s return value rendered through
25    ///   `aver_display` (same path `Console.print` uses). `None` when
26    ///   `main` returns `Unit` — those scenarios print for side effect,
27    ///   and bench mode silences the console.
28    /// - `wasm-local`: total bytes the guest tried to write through
29    ///   `fd_write` (sum of iovec lengths) during the last iteration.
30    ///   `0` when the guest never called `fd_write` (most scenarios
31    ///   that don't print).
32    /// - `rust`: actual stdout byte count from the spawned binary's
33    ///   subprocess output. `0` when the binary printed nothing.
34    ///
35    /// VM and wasm-local/rust use different shapes ("rendered return
36    /// value" vs "actual stdout"). `aver bench --compare` only ever
37    /// matches same-target baselines, so the divergence doesn't break
38    /// gating — the field is exact-match within a target.
39    pub response_bytes: Option<usize>,
40    /// `true` when the run satisfied every `[expected]` constraint in
41    /// the manifest. `null` when the manifest has no expectations.
42    pub expected_match: Option<bool>,
43    /// Pipeline stages that actually fired. Sourced from the pipeline's
44    /// `on_after_pass` hook so it reflects what *ran*, not what was
45    /// requested.
46    pub passes_applied: Vec<String>,
47    /// IR-level allocation counter. `null` in 0.15.1 — pending the
48    /// `aver compile --explain-allocations` work in 0.15.2.
49    pub compiler_visible_allocs: Option<usize>,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct BackendInfo {
54    /// Target name as parsed from `--target`
55    /// (`vm` / `wasm-local` / `wasm-gc` / `rust`).
56    pub name: String,
57    /// Version of the `aver` binary that ran the bench (Cargo package
58    /// version at compile time of this binary).
59    pub aver_version: String,
60    /// `"release"` or `"debug"`, derived from the calling binary's
61    /// build profile (`debug_assertions` cfg).
62    pub build: String,
63    /// wasmtime crate version when the report came from `--target=wasm-local`,
64    /// `null` otherwise.
65    pub wasmtime_version: Option<String>,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct HostInfo {
70    /// `"macos"` / `"linux"` / `"windows"` (from `std::env::consts::OS`).
71    pub os: String,
72    /// `"aarch64"` / `"x86_64"` / `"x86"` etc. (from `std::env::consts::ARCH`).
73    pub arch: String,
74    /// Logical CPU count from `std::thread::available_parallelism`.
75    pub cpus: usize,
76}
77
78impl BackendInfo {
79    pub fn for_target(target: crate::bench::manifest::BenchTarget) -> Self {
80        let build = if cfg!(debug_assertions) {
81            "debug"
82        } else {
83            "release"
84        };
85        let wasmtime_version = match target {
86            crate::bench::manifest::BenchTarget::WasmGc => Some(WASMTIME_VERSION.to_string()),
87            _ => None,
88        };
89        Self {
90            name: target.name().to_string(),
91            aver_version: env!("CARGO_PKG_VERSION").to_string(),
92            build: build.to_string(),
93            wasmtime_version,
94        }
95    }
96}
97
98impl HostInfo {
99    pub fn capture() -> Self {
100        let cpus = std::thread::available_parallelism()
101            .map(|n| n.get())
102            .unwrap_or(1);
103        Self {
104            os: std::env::consts::OS.to_string(),
105            arch: std::env::consts::ARCH.to_string(),
106            cpus,
107        }
108    }
109}
110
111/// Wasmtime version string compiled into the bench reports. Bumped
112/// alongside the `wasmtime` dependency in `Cargo.toml`; downstream
113/// tools that compare bench numbers across runs use it to detect
114/// runtime upgrades that might explain a delta.
115const WASMTIME_VERSION: &str = "29";
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct ScenarioMetadata {
119    pub name: String,
120    pub entry: String,
121    pub target: String,
122    pub iterations_count: usize,
123    pub warmup_count: usize,
124}
125
126/// Per-iteration wall-clock stats in milliseconds.
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct IterationStats {
129    pub min_ms: f64,
130    pub max_ms: f64,
131    pub mean_ms: f64,
132    pub p50_ms: f64,
133    pub p95_ms: f64,
134    pub p99_ms: f64,
135}
136
137/// Render `report` as a multi-line human-readable summary (default
138/// `aver bench` output). The shape is deliberately compact — bench
139/// engineers want one glance to read pass list + percentiles, not
140/// a wall of pretty-printed JSON.
141pub fn format_human(report: &BenchReport) -> String {
142    use std::fmt::Write;
143
144    fn fmt_ms(ms: f64) -> String {
145        if ms >= 1.0 {
146            format!("{:.2}ms", ms)
147        } else {
148            format!("{:.0}µs", ms * 1000.0)
149        }
150    }
151
152    let mut out = String::new();
153    let s = &report.scenario;
154    let b = &report.backend;
155    let h = &report.host;
156    let it = &report.iterations;
157    writeln!(out, "{} [{}]", s.name, s.target).ok();
158    writeln!(out, "  entry:        {}", s.entry).ok();
159    let mut backend_line = format!("aver {} ({})", b.aver_version, b.build);
160    if let Some(wt) = &b.wasmtime_version {
161        backend_line.push_str(&format!(", wasmtime {}", wt));
162    }
163    writeln!(out, "  backend:      {}", backend_line).ok();
164    writeln!(out, "  host:         {}/{} ({} cpus)", h.os, h.arch, h.cpus).ok();
165    writeln!(
166        out,
167        "  iterations:   {} (warmup {})",
168        s.iterations_count, s.warmup_count
169    )
170    .ok();
171    writeln!(
172        out,
173        "  passes:       {}",
174        if report.passes_applied.is_empty() {
175            "(none)".to_string()
176        } else {
177            report.passes_applied.join(", ")
178        }
179    )
180    .ok();
181    writeln!(
182        out,
183        "  wall_time:    min={}  p50={}  p95={}  max={}  mean={}",
184        fmt_ms(it.min_ms),
185        fmt_ms(it.p50_ms),
186        fmt_ms(it.p95_ms),
187        fmt_ms(it.max_ms),
188        fmt_ms(it.mean_ms),
189    )
190    .ok();
191    if let Some(bytes) = report.response_bytes {
192        writeln!(out, "  response:     {} bytes", bytes).ok();
193    }
194    if let Some(matched) = report.expected_match {
195        writeln!(
196            out,
197            "  expected:     {}",
198            if matched { "ok" } else { "MISMATCH" }
199        )
200        .ok();
201    }
202    out
203}
204
205impl IterationStats {
206    pub fn from_samples(samples: &[f64]) -> Self {
207        assert!(!samples.is_empty(), "IterationStats requires ≥1 sample");
208        let mut sorted: Vec<f64> = samples.to_vec();
209        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
210        let n = sorted.len();
211        let percentile = |p: f64| -> f64 {
212            // Nearest-rank percentile — small N so the choice between
213            // nearest-rank and linear-interp doesn't matter much; nearest-
214            // rank is dependency-free and reproducible.
215            let idx = ((p / 100.0) * (n as f64)).ceil() as usize;
216            let idx = idx.saturating_sub(1).min(n - 1);
217            sorted[idx]
218        };
219        IterationStats {
220            min_ms: *sorted.first().unwrap(),
221            max_ms: *sorted.last().unwrap(),
222            mean_ms: sorted.iter().sum::<f64>() / (n as f64),
223            p50_ms: percentile(50.0),
224            p95_ms: percentile(95.0),
225            p99_ms: percentile(99.0),
226        }
227    }
228}