aver/bench/report.rs
1//! Bench report — the structured JSON shape that `aver bench` emits.
2//!
3//! This is the contract that `aver bench --compare baseline.json` (0.15.2)
4//! and the future CI gate read. Adding fields is fine, removing/renaming
5//! is a breaking change to that contract.
6
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct BenchReport {
11 pub scenario: ScenarioMetadata,
12 /// Identifies the build that ran the bench: aver version, build
13 /// profile, target backend, plus optional version strings for
14 /// per-target runtimes (e.g. wasmtime for `wasm-local`).
15 pub backend: BackendInfo,
16 /// OS / architecture / process identity. Same JSON shape across
17 /// targets; downstream tools join on `host.os + host.arch + backend.name`
18 /// to compare like-for-like across runs.
19 pub host: HostInfo,
20 pub iterations: IterationStats,
21 /// UTF-8 byte count of the last iteration's "result". Semantics
22 /// vary by target:
23 ///
24 /// - `vm`: byte length of `main`'s return value rendered through
25 /// `aver_display` (same path `Console.print` uses). `None` when
26 /// `main` returns `Unit` — those scenarios print for side effect,
27 /// and bench mode silences the console.
28 /// - `wasm-local`: total bytes the guest tried to write through
29 /// `fd_write` (sum of iovec lengths) during the last iteration.
30 /// `0` when the guest never called `fd_write` (most scenarios
31 /// that don't print).
32 /// - `rust`: actual stdout byte count from the spawned binary's
33 /// subprocess output. `0` when the binary printed nothing.
34 ///
35 /// VM and wasm-local/rust use different shapes ("rendered return
36 /// value" vs "actual stdout"). `aver bench --compare` only ever
37 /// matches same-target baselines, so the divergence doesn't break
38 /// gating — the field is exact-match within a target.
39 pub response_bytes: Option<usize>,
40 /// `true` when the run satisfied every `[expected]` constraint in
41 /// the manifest. `null` when the manifest has no expectations.
42 pub expected_match: Option<bool>,
43 /// Pipeline stages that actually fired. Sourced from the pipeline's
44 /// `on_after_pass` hook so it reflects what *ran*, not what was
45 /// requested.
46 pub passes_applied: Vec<String>,
47 /// IR-level allocation counter. `null` in 0.15.1 — pending the
48 /// `aver compile --explain-allocations` work in 0.15.2.
49 pub compiler_visible_allocs: Option<usize>,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct BackendInfo {
54 /// Target name as parsed from `--target`
55 /// (`vm` / `wasm-local` / `wasm-gc` / `rust`).
56 pub name: String,
57 /// Version of the `aver` binary that ran the bench (Cargo package
58 /// version at compile time of this binary).
59 pub aver_version: String,
60 /// `"release"` or `"debug"`, derived from the calling binary's
61 /// build profile (`debug_assertions` cfg).
62 pub build: String,
63 /// wasmtime crate version when the report came from `--target=wasm-local`,
64 /// `null` otherwise.
65 pub wasmtime_version: Option<String>,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct HostInfo {
70 /// `"macos"` / `"linux"` / `"windows"` (from `std::env::consts::OS`).
71 pub os: String,
72 /// `"aarch64"` / `"x86_64"` / `"x86"` etc. (from `std::env::consts::ARCH`).
73 pub arch: String,
74 /// Logical CPU count from `std::thread::available_parallelism`.
75 pub cpus: usize,
76}
77
78impl BackendInfo {
79 pub fn for_target(target: crate::bench::manifest::BenchTarget) -> Self {
80 let build = if cfg!(debug_assertions) {
81 "debug"
82 } else {
83 "release"
84 };
85 let wasmtime_version = match target {
86 crate::bench::manifest::BenchTarget::WasmGc => Some(WASMTIME_VERSION.to_string()),
87 _ => None,
88 };
89 Self {
90 name: target.name().to_string(),
91 aver_version: env!("CARGO_PKG_VERSION").to_string(),
92 build: build.to_string(),
93 wasmtime_version,
94 }
95 }
96}
97
98impl HostInfo {
99 pub fn capture() -> Self {
100 let cpus = std::thread::available_parallelism()
101 .map(|n| n.get())
102 .unwrap_or(1);
103 Self {
104 os: std::env::consts::OS.to_string(),
105 arch: std::env::consts::ARCH.to_string(),
106 cpus,
107 }
108 }
109}
110
111/// Wasmtime version string compiled into the bench reports. Bumped
112/// alongside the `wasmtime` dependency in `Cargo.toml`; downstream
113/// tools that compare bench numbers across runs use it to detect
114/// runtime upgrades that might explain a delta.
115const WASMTIME_VERSION: &str = "29";
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct ScenarioMetadata {
119 pub name: String,
120 pub entry: String,
121 pub target: String,
122 pub iterations_count: usize,
123 pub warmup_count: usize,
124}
125
126/// Per-iteration wall-clock stats in milliseconds.
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct IterationStats {
129 pub min_ms: f64,
130 pub max_ms: f64,
131 pub mean_ms: f64,
132 pub p50_ms: f64,
133 pub p95_ms: f64,
134 pub p99_ms: f64,
135}
136
137/// Render `report` as a multi-line human-readable summary (default
138/// `aver bench` output). The shape is deliberately compact — bench
139/// engineers want one glance to read pass list + percentiles, not
140/// a wall of pretty-printed JSON.
141pub fn format_human(report: &BenchReport) -> String {
142 use std::fmt::Write;
143
144 fn fmt_ms(ms: f64) -> String {
145 if ms >= 1.0 {
146 format!("{:.2}ms", ms)
147 } else {
148 format!("{:.0}µs", ms * 1000.0)
149 }
150 }
151
152 let mut out = String::new();
153 let s = &report.scenario;
154 let b = &report.backend;
155 let h = &report.host;
156 let it = &report.iterations;
157 writeln!(out, "{} [{}]", s.name, s.target).ok();
158 writeln!(out, " entry: {}", s.entry).ok();
159 let mut backend_line = format!("aver {} ({})", b.aver_version, b.build);
160 if let Some(wt) = &b.wasmtime_version {
161 backend_line.push_str(&format!(", wasmtime {}", wt));
162 }
163 writeln!(out, " backend: {}", backend_line).ok();
164 writeln!(out, " host: {}/{} ({} cpus)", h.os, h.arch, h.cpus).ok();
165 writeln!(
166 out,
167 " iterations: {} (warmup {})",
168 s.iterations_count, s.warmup_count
169 )
170 .ok();
171 writeln!(
172 out,
173 " passes: {}",
174 if report.passes_applied.is_empty() {
175 "(none)".to_string()
176 } else {
177 report.passes_applied.join(", ")
178 }
179 )
180 .ok();
181 writeln!(
182 out,
183 " wall_time: min={} p50={} p95={} max={} mean={}",
184 fmt_ms(it.min_ms),
185 fmt_ms(it.p50_ms),
186 fmt_ms(it.p95_ms),
187 fmt_ms(it.max_ms),
188 fmt_ms(it.mean_ms),
189 )
190 .ok();
191 if let Some(bytes) = report.response_bytes {
192 writeln!(out, " response: {} bytes", bytes).ok();
193 }
194 if let Some(matched) = report.expected_match {
195 writeln!(
196 out,
197 " expected: {}",
198 if matched { "ok" } else { "MISMATCH" }
199 )
200 .ok();
201 }
202 out
203}
204
205impl IterationStats {
206 pub fn from_samples(samples: &[f64]) -> Self {
207 assert!(!samples.is_empty(), "IterationStats requires ≥1 sample");
208 let mut sorted: Vec<f64> = samples.to_vec();
209 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
210 let n = sorted.len();
211 let percentile = |p: f64| -> f64 {
212 // Nearest-rank percentile — small N so the choice between
213 // nearest-rank and linear-interp doesn't matter much; nearest-
214 // rank is dependency-free and reproducible.
215 let idx = ((p / 100.0) * (n as f64)).ceil() as usize;
216 let idx = idx.saturating_sub(1).min(n - 1);
217 sorted[idx]
218 };
219 IterationStats {
220 min_ms: *sorted.first().unwrap(),
221 max_ms: *sorted.last().unwrap(),
222 mean_ms: sorted.iter().sum::<f64>() / (n as f64),
223 p50_ms: percentile(50.0),
224 p95_ms: percentile(95.0),
225 p99_ms: percentile(99.0),
226 }
227 }
228}