Skip to main content

aver/bench/
report.rs

1//! Bench report — the structured JSON shape that `aver bench` emits.
2//!
3//! This is the contract that `aver bench --compare baseline.json` (0.15.2)
4//! and the future CI gate read. Adding fields is fine, removing/renaming
5//! is a breaking change to that contract.
6
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct BenchReport {
11    pub scenario: ScenarioMetadata,
12    /// Identifies the build that ran the bench: aver version, build
13    /// profile, target backend, plus optional version strings for
14    /// per-target runtimes (e.g. wasmtime for `wasm-local`).
15    pub backend: BackendInfo,
16    /// OS / architecture / process identity. Same JSON shape across
17    /// targets; downstream tools join on `host.os + host.arch + backend.name`
18    /// to compare like-for-like across runs.
19    pub host: HostInfo,
20    pub iterations: IterationStats,
21    /// UTF-8 byte count of the last iteration's "result". Semantics
22    /// vary by target:
23    ///
24    /// - `vm`: byte length of `main`'s return value rendered through
25    ///   `aver_display` (same path `Console.print` uses). `None` when
26    ///   `main` returns `Unit` — those scenarios print for side effect,
27    ///   and bench mode silences the console.
28    /// - `wasm-local`: total bytes the guest tried to write through
29    ///   `fd_write` (sum of iovec lengths) during the last iteration.
30    ///   `0` when the guest never called `fd_write` (most scenarios
31    ///   that don't print).
32    /// - `rust`: actual stdout byte count from the spawned binary's
33    ///   subprocess output. `0` when the binary printed nothing.
34    ///
35    /// VM and wasm-local/rust use different shapes ("rendered return
36    /// value" vs "actual stdout"). `aver bench --compare` only ever
37    /// matches same-target baselines, so the divergence doesn't break
38    /// gating — the field is exact-match within a target.
39    pub response_bytes: Option<usize>,
40    /// `true` when the run satisfied every `[expected]` constraint in
41    /// the manifest. `null` when the manifest has no expectations.
42    pub expected_match: Option<bool>,
43    /// Pipeline stages that actually fired. Sourced from the pipeline's
44    /// `on_after_pass` hook so it reflects what *ran*, not what was
45    /// requested.
46    pub passes_applied: Vec<String>,
47    /// IR-level allocation counter. `null` in 0.15.1 — pending the
48    /// `aver compile --explain-allocations` work in 0.15.2.
49    pub compiler_visible_allocs: Option<usize>,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct BackendInfo {
54    /// Target name as parsed from `--target` (`vm` / `wasm-local` / `rust`).
55    pub name: String,
56    /// Version of the `aver` binary that ran the bench (Cargo package
57    /// version at compile time of this binary).
58    pub aver_version: String,
59    /// `"release"` or `"debug"`, derived from the calling binary's
60    /// build profile (`debug_assertions` cfg).
61    pub build: String,
62    /// wasmtime crate version when the report came from `--target=wasm-local`,
63    /// `null` otherwise.
64    pub wasmtime_version: Option<String>,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct HostInfo {
69    /// `"macos"` / `"linux"` / `"windows"` (from `std::env::consts::OS`).
70    pub os: String,
71    /// `"aarch64"` / `"x86_64"` / `"x86"` etc. (from `std::env::consts::ARCH`).
72    pub arch: String,
73    /// Logical CPU count from `std::thread::available_parallelism`.
74    pub cpus: usize,
75}
76
77impl BackendInfo {
78    pub fn for_target(target: crate::bench::manifest::BenchTarget) -> Self {
79        let build = if cfg!(debug_assertions) {
80            "debug"
81        } else {
82            "release"
83        };
84        let wasmtime_version = match target {
85            crate::bench::manifest::BenchTarget::WasmLocal => Some(WASMTIME_VERSION.to_string()),
86            _ => None,
87        };
88        Self {
89            name: target.name().to_string(),
90            aver_version: env!("CARGO_PKG_VERSION").to_string(),
91            build: build.to_string(),
92            wasmtime_version,
93        }
94    }
95}
96
97impl HostInfo {
98    pub fn capture() -> Self {
99        let cpus = std::thread::available_parallelism()
100            .map(|n| n.get())
101            .unwrap_or(1);
102        Self {
103            os: std::env::consts::OS.to_string(),
104            arch: std::env::consts::ARCH.to_string(),
105            cpus,
106        }
107    }
108}
109
110/// Wasmtime version string compiled into the bench reports. Bumped
111/// alongside the `wasmtime` dependency in `Cargo.toml`; downstream
112/// tools that compare bench numbers across runs use it to detect
113/// runtime upgrades that might explain a delta.
114const WASMTIME_VERSION: &str = "29";
115
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct ScenarioMetadata {
118    pub name: String,
119    pub entry: String,
120    pub target: String,
121    pub iterations_count: usize,
122    pub warmup_count: usize,
123}
124
125/// Per-iteration wall-clock stats in milliseconds.
126#[derive(Debug, Clone, Serialize, Deserialize)]
127pub struct IterationStats {
128    pub min_ms: f64,
129    pub max_ms: f64,
130    pub mean_ms: f64,
131    pub p50_ms: f64,
132    pub p95_ms: f64,
133    pub p99_ms: f64,
134}
135
136/// Render `report` as a multi-line human-readable summary (default
137/// `aver bench` output). The shape is deliberately compact — bench
138/// engineers want one glance to read pass list + percentiles, not
139/// a wall of pretty-printed JSON.
140pub fn format_human(report: &BenchReport) -> String {
141    use std::fmt::Write;
142
143    fn fmt_ms(ms: f64) -> String {
144        if ms >= 1.0 {
145            format!("{:.2}ms", ms)
146        } else {
147            format!("{:.0}µs", ms * 1000.0)
148        }
149    }
150
151    let mut out = String::new();
152    let s = &report.scenario;
153    let b = &report.backend;
154    let h = &report.host;
155    let it = &report.iterations;
156    writeln!(out, "{} [{}]", s.name, s.target).ok();
157    writeln!(out, "  entry:        {}", s.entry).ok();
158    let mut backend_line = format!("aver {} ({})", b.aver_version, b.build);
159    if let Some(wt) = &b.wasmtime_version {
160        backend_line.push_str(&format!(", wasmtime {}", wt));
161    }
162    writeln!(out, "  backend:      {}", backend_line).ok();
163    writeln!(out, "  host:         {}/{} ({} cpus)", h.os, h.arch, h.cpus).ok();
164    writeln!(
165        out,
166        "  iterations:   {} (warmup {})",
167        s.iterations_count, s.warmup_count
168    )
169    .ok();
170    writeln!(
171        out,
172        "  passes:       {}",
173        if report.passes_applied.is_empty() {
174            "(none)".to_string()
175        } else {
176            report.passes_applied.join(", ")
177        }
178    )
179    .ok();
180    writeln!(
181        out,
182        "  wall_time:    min={}  p50={}  p95={}  max={}  mean={}",
183        fmt_ms(it.min_ms),
184        fmt_ms(it.p50_ms),
185        fmt_ms(it.p95_ms),
186        fmt_ms(it.max_ms),
187        fmt_ms(it.mean_ms),
188    )
189    .ok();
190    if let Some(bytes) = report.response_bytes {
191        writeln!(out, "  response:     {} bytes", bytes).ok();
192    }
193    if let Some(matched) = report.expected_match {
194        writeln!(
195            out,
196            "  expected:     {}",
197            if matched { "ok" } else { "MISMATCH" }
198        )
199        .ok();
200    }
201    out
202}
203
204impl IterationStats {
205    pub fn from_samples(samples: &[f64]) -> Self {
206        assert!(!samples.is_empty(), "IterationStats requires ≥1 sample");
207        let mut sorted: Vec<f64> = samples.to_vec();
208        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
209        let n = sorted.len();
210        let percentile = |p: f64| -> f64 {
211            // Nearest-rank percentile — small N so the choice between
212            // nearest-rank and linear-interp doesn't matter much; nearest-
213            // rank is dependency-free and reproducible.
214            let idx = ((p / 100.0) * (n as f64)).ceil() as usize;
215            let idx = idx.saturating_sub(1).min(n - 1);
216            sorted[idx]
217        };
218        IterationStats {
219            min_ms: *sorted.first().unwrap(),
220            max_ms: *sorted.last().unwrap(),
221            mean_ms: sorted.iter().sum::<f64>() / (n as f64),
222            p50_ms: percentile(50.0),
223            p95_ms: percentile(95.0),
224            p99_ms: percentile(99.0),
225        }
226    }
227}