aver/bench/report.rs
1//! Bench report — the structured JSON shape that `aver bench` emits.
2//!
3//! This is the contract that `aver bench --compare baseline.json` (0.15.2)
4//! and the future CI gate read. Adding fields is fine, removing/renaming
5//! is a breaking change to that contract.
6
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct BenchReport {
11 pub scenario: ScenarioMetadata,
12 /// Identifies the build that ran the bench: aver version, build
13 /// profile, target backend, plus optional version strings for
14 /// per-target runtimes (e.g. wasmtime for `wasm-local`).
15 pub backend: BackendInfo,
16 /// OS / architecture / process identity. Same JSON shape across
17 /// targets; downstream tools join on `host.os + host.arch + backend.name`
18 /// to compare like-for-like across runs.
19 pub host: HostInfo,
20 pub iterations: IterationStats,
21 /// UTF-8 byte count of the last iteration's "result". Semantics
22 /// vary by target:
23 ///
24 /// - `vm`: byte length of `main`'s return value rendered through
25 /// `aver_display` (same path `Console.print` uses). `None` when
26 /// `main` returns `Unit` — those scenarios print for side effect,
27 /// and bench mode silences the console.
28 /// - `wasm-local`: total bytes the guest tried to write through
29 /// `fd_write` (sum of iovec lengths) during the last iteration.
30 /// `0` when the guest never called `fd_write` (most scenarios
31 /// that don't print).
32 /// - `rust`: actual stdout byte count from the spawned binary's
33 /// subprocess output. `0` when the binary printed nothing.
34 ///
35 /// VM and wasm-local/rust use different shapes ("rendered return
36 /// value" vs "actual stdout"). `aver bench --compare` only ever
37 /// matches same-target baselines, so the divergence doesn't break
38 /// gating — the field is exact-match within a target.
39 pub response_bytes: Option<usize>,
40 /// `true` when the run satisfied every `[expected]` constraint in
41 /// the manifest. `null` when the manifest has no expectations.
42 pub expected_match: Option<bool>,
43 /// Pipeline stages that actually fired. Sourced from the pipeline's
44 /// `on_after_pass` hook so it reflects what *ran*, not what was
45 /// requested.
46 pub passes_applied: Vec<String>,
47 /// IR-level allocation counter. `null` in 0.15.1 — pending the
48 /// `aver compile --explain-allocations` work in 0.15.2.
49 pub compiler_visible_allocs: Option<usize>,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct BackendInfo {
54 /// Target name as parsed from `--target` (`vm` / `wasm-local` / `rust`).
55 pub name: String,
56 /// Version of the `aver` binary that ran the bench (Cargo package
57 /// version at compile time of this binary).
58 pub aver_version: String,
59 /// `"release"` or `"debug"`, derived from the calling binary's
60 /// build profile (`debug_assertions` cfg).
61 pub build: String,
62 /// wasmtime crate version when the report came from `--target=wasm-local`,
63 /// `null` otherwise.
64 pub wasmtime_version: Option<String>,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct HostInfo {
69 /// `"macos"` / `"linux"` / `"windows"` (from `std::env::consts::OS`).
70 pub os: String,
71 /// `"aarch64"` / `"x86_64"` / `"x86"` etc. (from `std::env::consts::ARCH`).
72 pub arch: String,
73 /// Logical CPU count from `std::thread::available_parallelism`.
74 pub cpus: usize,
75}
76
77impl BackendInfo {
78 pub fn for_target(target: crate::bench::manifest::BenchTarget) -> Self {
79 let build = if cfg!(debug_assertions) {
80 "debug"
81 } else {
82 "release"
83 };
84 let wasmtime_version = match target {
85 crate::bench::manifest::BenchTarget::WasmLocal => Some(WASMTIME_VERSION.to_string()),
86 _ => None,
87 };
88 Self {
89 name: target.name().to_string(),
90 aver_version: env!("CARGO_PKG_VERSION").to_string(),
91 build: build.to_string(),
92 wasmtime_version,
93 }
94 }
95}
96
97impl HostInfo {
98 pub fn capture() -> Self {
99 let cpus = std::thread::available_parallelism()
100 .map(|n| n.get())
101 .unwrap_or(1);
102 Self {
103 os: std::env::consts::OS.to_string(),
104 arch: std::env::consts::ARCH.to_string(),
105 cpus,
106 }
107 }
108}
109
110/// Wasmtime version string compiled into the bench reports. Bumped
111/// alongside the `wasmtime` dependency in `Cargo.toml`; downstream
112/// tools that compare bench numbers across runs use it to detect
113/// runtime upgrades that might explain a delta.
114const WASMTIME_VERSION: &str = "29";
115
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct ScenarioMetadata {
118 pub name: String,
119 pub entry: String,
120 pub target: String,
121 pub iterations_count: usize,
122 pub warmup_count: usize,
123}
124
125/// Per-iteration wall-clock stats in milliseconds.
126#[derive(Debug, Clone, Serialize, Deserialize)]
127pub struct IterationStats {
128 pub min_ms: f64,
129 pub max_ms: f64,
130 pub mean_ms: f64,
131 pub p50_ms: f64,
132 pub p95_ms: f64,
133 pub p99_ms: f64,
134}
135
136/// Render `report` as a multi-line human-readable summary (default
137/// `aver bench` output). The shape is deliberately compact — bench
138/// engineers want one glance to read pass list + percentiles, not
139/// a wall of pretty-printed JSON.
140pub fn format_human(report: &BenchReport) -> String {
141 use std::fmt::Write;
142
143 fn fmt_ms(ms: f64) -> String {
144 if ms >= 1.0 {
145 format!("{:.2}ms", ms)
146 } else {
147 format!("{:.0}µs", ms * 1000.0)
148 }
149 }
150
151 let mut out = String::new();
152 let s = &report.scenario;
153 let b = &report.backend;
154 let h = &report.host;
155 let it = &report.iterations;
156 writeln!(out, "{} [{}]", s.name, s.target).ok();
157 writeln!(out, " entry: {}", s.entry).ok();
158 let mut backend_line = format!("aver {} ({})", b.aver_version, b.build);
159 if let Some(wt) = &b.wasmtime_version {
160 backend_line.push_str(&format!(", wasmtime {}", wt));
161 }
162 writeln!(out, " backend: {}", backend_line).ok();
163 writeln!(out, " host: {}/{} ({} cpus)", h.os, h.arch, h.cpus).ok();
164 writeln!(
165 out,
166 " iterations: {} (warmup {})",
167 s.iterations_count, s.warmup_count
168 )
169 .ok();
170 writeln!(
171 out,
172 " passes: {}",
173 if report.passes_applied.is_empty() {
174 "(none)".to_string()
175 } else {
176 report.passes_applied.join(", ")
177 }
178 )
179 .ok();
180 writeln!(
181 out,
182 " wall_time: min={} p50={} p95={} max={} mean={}",
183 fmt_ms(it.min_ms),
184 fmt_ms(it.p50_ms),
185 fmt_ms(it.p95_ms),
186 fmt_ms(it.max_ms),
187 fmt_ms(it.mean_ms),
188 )
189 .ok();
190 if let Some(bytes) = report.response_bytes {
191 writeln!(out, " response: {} bytes", bytes).ok();
192 }
193 if let Some(matched) = report.expected_match {
194 writeln!(
195 out,
196 " expected: {}",
197 if matched { "ok" } else { "MISMATCH" }
198 )
199 .ok();
200 }
201 out
202}
203
204impl IterationStats {
205 pub fn from_samples(samples: &[f64]) -> Self {
206 assert!(!samples.is_empty(), "IterationStats requires ≥1 sample");
207 let mut sorted: Vec<f64> = samples.to_vec();
208 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
209 let n = sorted.len();
210 let percentile = |p: f64| -> f64 {
211 // Nearest-rank percentile — small N so the choice between
212 // nearest-rank and linear-interp doesn't matter much; nearest-
213 // rank is dependency-free and reproducible.
214 let idx = ((p / 100.0) * (n as f64)).ceil() as usize;
215 let idx = idx.saturating_sub(1).min(n - 1);
216 sorted[idx]
217 };
218 IterationStats {
219 min_ms: *sorted.first().unwrap(),
220 max_ms: *sorted.last().unwrap(),
221 mean_ms: sorted.iter().sum::<f64>() / (n as f64),
222 p50_ms: percentile(50.0),
223 p95_ms: percentile(95.0),
224 p99_ms: percentile(99.0),
225 }
226 }
227}