Skip to main content

zenbench/
lib.rs

1// Without precise-timing or alloc-profiling, no unsafe is permitted anywhere.
2// With either feature, unsafe is denied (errors) but the timing/alloc modules
3// can override with #[allow(unsafe_code)] for TSC reads, asm fences, and GlobalAlloc.
4#![cfg_attr(
5    not(any(feature = "precise-timing", feature = "alloc-profiling")),
6    forbid(unsafe_code)
7)]
8#![cfg_attr(
9    any(feature = "precise-timing", feature = "alloc-profiling"),
10    deny(unsafe_code)
11)]
12#![doc = include_str!("../README.md")]
13
14#[cfg(feature = "alloc-profiling")]
15mod alloc;
16pub mod baseline;
17mod bench;
18pub mod calibration;
19#[cfg(feature = "charts")]
20pub mod charts;
21mod checks;
22mod ci;
23#[cfg(feature = "criterion-compat")]
24pub mod criterion_compat;
25pub mod daemon;
26mod engine;
27mod format;
28mod gate;
29mod html;
30pub mod mcp;
31pub mod platform;
32pub mod quickchart;
33mod report;
34mod results;
35mod stats;
36#[cfg(feature = "precise-timing")]
37mod timing;
38#[cfg(feature = "wasm")]
39pub mod wasm;
40
41pub use bench::{BenchGroup, Bencher, GroupConfig, Suite, Throughput};
42
43/// Post-run processing: format output, save baseline, compare against baseline.
44///
45/// Shared between `main!` and `criterion_main!` macros. Not intended for
46/// direct use — call via the macros instead.
47#[doc(hidden)]
48pub fn postprocess_result(result: &SuiteResult) {
49    let args: Vec<String> = std::env::args().collect();
50    let format = args
51        .iter()
52        .find_map(|a| a.strip_prefix("--format=").map(String::from))
53        .or_else(|| std::env::var("ZENBENCH_FORMAT").ok());
54    let save_baseline: Option<String> = args
55        .iter()
56        .find_map(|a| a.strip_prefix("--save-baseline=").map(String::from));
57    let baseline_name: Option<String> = args
58        .iter()
59        .find_map(|a| a.strip_prefix("--baseline=").map(String::from));
60    let max_regression: f64 = args
61        .iter()
62        .find_map(|a| {
63            a.strip_prefix("--max-regression=")
64                .and_then(|v| v.parse().ok())
65        })
66        .unwrap_or(5.0);
67    let update_on_pass = args.iter().any(|a| a == "--update-on-pass");
68
69    // Output in requested format (to stdout)
70    match format.as_deref() {
71        Some("llm") => print!("{}", result.to_llm()),
72        Some("csv") => print!("{}", result.to_csv()),
73        Some("markdown" | "md") => print!("{}", result.to_markdown()),
74        Some("html") => print!("{}", result.to_html()),
75        Some("json") => {
76            if let Ok(json) = serde_json::to_string_pretty(result) {
77                println!("{json}");
78            }
79        }
80        _ => {} // default: terminal report already printed to stderr
81    }
82
83    // Save as named baseline
84    if let Some(ref name) = save_baseline {
85        match baseline::save_baseline(result, name) {
86            Ok(path) => eprintln!("[zenbench] baseline '{name}' saved to {}", path.display()),
87            Err(e) => {
88                eprintln!("[zenbench] error saving baseline '{name}': {e}");
89                std::process::exit(2);
90            }
91        }
92    }
93
94    // Compare against named baseline
95    if let Some(ref name) = baseline_name {
96        match baseline::load_baseline(name) {
97            Ok(saved) => {
98                let comparison = baseline::compare_against_baseline(&saved, result, max_regression);
99                baseline::print_comparison_report(&comparison);
100
101                if comparison.regressions > 0 {
102                    eprintln!(
103                        "\n[zenbench] FAIL: {} regression(s) exceed {max_regression}% threshold",
104                        comparison.regressions,
105                    );
106                    std::process::exit(1);
107                } else {
108                    eprintln!(
109                        "\n[zenbench] PASS: no regressions exceed {max_regression}% threshold"
110                    );
111                    // --update-on-pass: overwrite baseline with current results
112                    if update_on_pass {
113                        match baseline::save_baseline(result, name) {
114                            Ok(path) => eprintln!(
115                                "[zenbench] baseline '{name}' updated (--update-on-pass) → {}",
116                                path.display()
117                            ),
118                            Err(e) => {
119                                eprintln!("[zenbench] warning: failed to update baseline: {e}");
120                            }
121                        }
122                    }
123                }
124            }
125            Err(e) => {
126                eprintln!("[zenbench] {e}");
127                std::process::exit(2);
128            }
129        }
130    }
131
132    // Save results if in fire-and-forget mode
133    if let Some(path) = daemon::result_path_from_env() {
134        if let Err(e) = result.save(&path) {
135            eprintln!("[zenbench] error saving results: {e}");
136        }
137    }
138}
139#[cfg(feature = "alloc-profiling")]
140pub use alloc::{AllocProfiler, AllocStats};
141
142/// Create an Engine from a Suite (used by criterion_compat macros).
143#[doc(hidden)]
144pub fn engine_new(suite: Suite) -> engine::Engine {
145    engine::Engine::new(suite)
146}
147pub use format::format_ns;
148pub use gate::GateConfig;
149pub use platform::Testbed;
150pub use results::{BenchmarkResult, ComparisonResult, RunId, SuiteResult};
151pub use stats::{MeanCi, PairedAnalysis, Summary};
152
153// `Aggregation`, `run_passes`, and `aggregate_results` are defined below
154// and re-exported via the module root.
155
156/// Re-export `black_box` from std for convenience.
157///
158/// Prevents the compiler from optimizing away benchmark code.
159/// Always use this on benchmark return values and inputs.
160#[inline(always)]
161pub fn black_box<T>(x: T) -> T {
162    std::hint::black_box(x)
163}
164
165/// Prelude for convenient imports.
166///
167/// ```
168/// use zenbench::prelude::*;
169/// ```
170pub mod prelude {
171    pub use crate::bench::{BenchGroup, Bencher, GroupConfig, Suite, Throughput};
172    pub use crate::black_box;
173    pub use crate::gate::GateConfig;
174    pub use crate::results::SuiteResult;
175    pub use crate::stats::{MeanCi, PairedAnalysis, Summary};
176}
177
178/// Run a benchmark suite with default configuration.
179///
180/// # Example
181/// ```no_run
182/// zenbench::run(|suite| {
183///     suite.compare("sorting", |group| {
184///         let data: Vec<i32> = (0..1000).rev().collect();
185///         group.bench("std_sort", move |b| {
186///             let d = data.clone();
187///             b.with_input(move || d.clone())
188///                 .run(|mut v| { v.sort(); v })
189///         });
190///     });
191/// });
192/// ```
193pub fn run<F: FnOnce(&mut Suite)>(f: F) -> SuiteResult {
194    let mut suite = Suite::new();
195    f(&mut suite);
196    let engine = engine::Engine::new(suite);
197    engine.run()
198}
199
200/// How to combine N `SuiteResult`s into one.
201///
202/// Used by both [`run_passes`] (in-process) and [`run_processes`]
203/// (cross-OS-process). There is intentionally no default — callers
204/// must pick one. The correct choice depends on what you're trying to
205/// measure, and every policy answers a different question.
206#[derive(Clone, Copy, Debug)]
207pub enum Aggregation {
208    /// Keep the run with the lowest mean per benchmark. The reported
209    /// `summary` is that run's full within-run summary — `mean`,
210    /// `median`, `mad`, `min`, `max`, `n`, all preserved.
211    ///
212    /// Use on shared/busy hosts, dev machines, and CI runners: noise
213    /// from co-tenants, OS interrupts, and thermal throttling only
214    /// makes a run slower, so the fastest run is the closest honest
215    /// estimate of the hardware's quiet capability.
216    ///
217    /// Caveat: best-of-N is a **biased** estimator and does not
218    /// converge as N grows — the expected min drifts downward. Don't
219    /// interpret "best of 3" as the same underlying quantity that
220    /// "best of 100" would give you. It's useful for comparing
221    /// versions of the same code on the same host with the same N,
222    /// not as a statistical measurement of true mean.
223    ///
224    /// Inter-run spread is reported as a separate footer line so you
225    /// can still tell when runs disagreed badly.
226    Best,
227    /// Replace each bench `summary` with a fresh `Summary` built from
228    /// the distribution of per-run means. The resulting `mean` is the
229    /// mean of run means, `mad` is the inter-run spread.
230    ///
231    /// Use on quiet hosts when you want expected-case performance.
232    /// Converges via CLT and is unbiased. Pays for every contaminated
233    /// run by dragging the average up — that's the point.
234    Mean,
235    /// Median of run means. Robust to one or two contaminated runs
236    /// without the downward bias of Best. A reasonable middle ground
237    /// when you can't characterize the host's noise and don't want to
238    /// argue about which policy is correct.
239    Median,
240}
241
242/// Run a benchmark suite in N sequential "passes" inside the current
243/// OS process and combine the per-pass results under the chosen
244/// [`Aggregation`] policy.
245///
246/// A "pass" is one complete suite execution (warmup, calibration,
247/// sample collection, within-pass statistics) with a fresh `Suite`
248/// and `Engine`. All N passes share the **same OS process** — no
249/// `fork` / `exec` — which limits what they can reset.
250///
251/// # What `run_passes` resets between passes
252///
253/// These are genuinely re-done for each pass, so passes are useful
254/// against noise sources that depend on them:
255///
256///   * **Calibration** (iterations-per-sample estimation) — redone
257///     from scratch.
258///   * **Warmup** — re-run for the hot loop.
259///   * **Heap addresses of benchmark test data** — the bench-defining
260///     closure re-allocates its inputs each pass, so the data lands
261///     at a different heap address, shuffling TLB entries and L1/L2
262///     set assignments for the data.
263///   * **Data-dependent branch-predictor history** — because the
264///     data moved, branches keyed by data address re-train.
265///
266/// # What `run_passes` does **not** reset
267///
268/// These are constant across all samples in one OS process and
269/// therefore **constant across passes**. `run_passes` cannot attack
270/// them — they are exactly why you might run `cargo bench` twice from
271/// the shell and get different numbers:
272///
273///   * **CPU frequency / turbo / thermal state.**
274///   * **ASLR layout for the binary's code pages** (set at `execve`).
275///   * **Kernel page cache** for the binary mappings.
276///   * **Kernel scheduler state** (affinity, NUMA node, cpuset).
277///   * **Branch predictor tables for the hot code addresses** —
278///     same code pages, same training history.
279///   * **Background contention** on co-tenant cores.
280///
281/// For these, you need actually separate OS processes. Use
282/// [`run_processes`] / `--best-of-processes=N`:
283///
284/// ```text
285/// cargo bench -- --best-of-processes=3
286/// ```
287///
288/// # When to use passes vs processes
289///
290/// * Your benchmark allocates large per-iteration test data, and you
291///   suspect lucky heap alignment is skewing results → **passes**.
292/// * Your benchmark's outer loop calibrates differently each run
293///   (e.g. data-dependent iteration count estimation) → **passes**.
294/// * Your measurements bounce between cargo bench invocations but
295///   individual runs report `mad ±0.1%` → **processes**
296///   (this is the between-OS-process variance signature).
297/// * You want both → `--best-of-processes=3 --best-of-passes=2`.
298///   Each of 3 OS processes does 2 in-process passes. Total = 6 runs.
299///
300/// # Why a policy is required
301///
302/// Rounds reduce **within-pass** variance (timer noise, single-sample
303/// interrupts). Passes reduce a **subset of between-OS-process**
304/// variance (the parts listed above). Different noise sources want
305/// different aggregation rules, and the answer depends on what you're
306/// trying to measure:
307///
308///   * "What's the lowest this code can achieve?" → [`Best`]
309///   * "What will my users see on average?" → [`Mean`]
310///   * "I don't know and don't want to bias the estimate" → [`Median`]
311///
312/// Every option answers a different question, and zenbench refuses to
313/// pick for you. No default.
314///
315/// # CLI flags
316///
317/// The `main!` macro parses these from `cargo bench`:
318///
319/// ```text
320/// --best-of-passes=N          -> run_passes(N, Best,   ...)
321/// --mean-of-passes=N          -> run_passes(N, Mean,   ...)
322/// --median-of-passes=N        -> run_passes(N, Median, ...)
323/// ```
324///
325/// No flag → single `run()` call, exactly like before.
326///
327/// [`Best`]: Aggregation::Best
328/// [`Mean`]: Aggregation::Mean
329/// [`Median`]: Aggregation::Median
330pub fn run_passes<F: FnMut(&mut Suite)>(
331    passes: usize,
332    policy: Aggregation,
333    mut f: F,
334) -> SuiteResult {
335    if passes <= 1 {
336        let mut suite = Suite::new();
337        f(&mut suite);
338        let engine = engine::Engine::new(suite);
339        return engine.run();
340    }
341
342    // Run each pass silently, then print one aggregated report at the end.
343    let mut all_results: Vec<SuiteResult> = Vec::with_capacity(passes);
344    for i in 0..passes {
345        eprintln!("[zenbench] pass {}/{}", i + 1, passes);
346        let mut suite = Suite::new();
347        f(&mut suite);
348        let engine = engine::Engine::new(suite).quiet(true);
349        all_results.push(engine.run());
350    }
351
352    let aggregated = aggregate_results(all_results, policy);
353
354    // Final aggregated report (header + groups + footer).
355    report::print_header(
356        &aggregated.run_id,
357        aggregated.git_hash.as_deref(),
358        aggregated.ci_environment.as_deref(),
359    );
360    let banner = match policy {
361        Aggregation::Best => {
362            format!("[zenbench] best of {passes} passes (min pass mean; within-run mad preserved)")
363        }
364        Aggregation::Mean => {
365            format!("[zenbench] mean of {passes} passes (mad = inter-pass spread)")
366        }
367        Aggregation::Median => {
368            format!("[zenbench] median of {passes} passes (mad = inter-pass spread)")
369        }
370    };
371    eprintln!("{banner}");
372    for cmp in &aggregated.comparisons {
373        report::print_group(cmp, aggregated.timer_resolution_ns);
374    }
375    report::print_footer(
376        aggregated.total_time,
377        aggregated.gate_waits,
378        aggregated.gate_wait_time,
379        aggregated.unreliable,
380    );
381
382    aggregated
383}
384
385/// Combine N `SuiteResult`s into one under the caller-chosen policy.
386///
387/// Source-agnostic: the inputs can be sequential passes produced by
388/// [`run_passes`] (inside one OS process) or separate OS processes
389/// collected by `run_processes`. The policy treats each `SuiteResult`
390/// as one observation regardless of how it was produced.
391///
392/// See [`Aggregation`] for the policy definitions.
393pub fn aggregate_results(runs: Vec<SuiteResult>, policy: Aggregation) -> SuiteResult {
394    use std::collections::HashMap;
395
396    if runs.is_empty() {
397        return SuiteResult::default();
398    }
399    if runs.len() == 1 {
400        return runs.into_iter().next().unwrap();
401    }
402
403    // Collect per-(group, bench) the full list of per-run means.
404    // Used by all policies: Best needs to find the winner; Mean and
405    // Median rebuild the summary from this distribution; Best reports
406    // inter-run spread as a footer line.
407    let mut means: HashMap<(String, String), Vec<f64>> = HashMap::new();
408    for result in &runs {
409        for cmp in &result.comparisons {
410            for bench in &cmp.benchmarks {
411                let key = (cmp.group_name.clone(), bench.name.clone());
412                means.entry(key).or_default().push(bench.summary.mean);
413            }
414        }
415    }
416
417    // For Best policy: find the run index with the lowest mean
418    // per (group, bench).
419    let winners: HashMap<(String, String), usize> =
420        runs.iter()
421            .enumerate()
422            .fold(HashMap::new(), |mut acc, (ri, result)| {
423                for cmp in &result.comparisons {
424                    for bench in &cmp.benchmarks {
425                        let key = (cmp.group_name.clone(), bench.name.clone());
426                        let this_mean = bench.summary.mean;
427                        acc.entry(key)
428                            .and_modify(|best: &mut usize| {
429                                let prev_mean =
430                                    run_mean(&runs, *best, &cmp.group_name, &bench.name);
431                                if this_mean < prev_mean {
432                                    *best = ri;
433                                }
434                            })
435                            .or_insert(ri);
436                    }
437                }
438                acc
439            });
440
441    // Template = first run. Overwrite each bench's summary according
442    // to the chosen policy.
443    let mut out = runs[0].clone();
444    for cmp in out.comparisons.iter_mut() {
445        for bench in cmp.benchmarks.iter_mut() {
446            let key = (cmp.group_name.clone(), bench.name.clone());
447            let samples = match means.get(&key) {
448                Some(s) => s,
449                None => continue,
450            };
451            match policy {
452                Aggregation::Best => {
453                    // Copy the winning run's full summary verbatim,
454                    // including its within-run mad. Inter-run spread
455                    // is reported separately in the banner/footer so
456                    // this field continues to answer "how jittery was
457                    // this specific run?".
458                    if let Some(&best_ri) = winners.get(&key) {
459                        if let Some(best_bench) = find_bench(&runs[best_ri], &key.0, &key.1) {
460                            bench.summary = best_bench.summary.clone();
461                        }
462                    }
463                }
464                Aggregation::Mean => {
465                    // Rebuild summary from the distribution of per-run
466                    // means. The new `mad` is inter-run spread by
467                    // construction (Summary::from_slice computes it from
468                    // the input slice).
469                    bench.summary = crate::stats::Summary::from_slice(samples);
470                }
471                Aggregation::Median => {
472                    // Same rebuild, but replace the Welford-mean with the
473                    // median so it matches the advertised policy name.
474                    let fresh = crate::stats::Summary::from_slice(samples);
475                    bench.summary = fresh.clone();
476                    bench.summary.mean = fresh.median;
477                }
478            }
479            // The single-run bootstrap CI no longer describes the
480            // distribution the reported mean came from in any policy.
481            bench.mean_ci = None;
482        }
483    }
484    out
485}
486
487fn find_bench<'a>(
488    result: &'a SuiteResult,
489    group: &str,
490    bench_name: &str,
491) -> Option<&'a results::BenchmarkResult> {
492    result
493        .comparisons
494        .iter()
495        .find(|c| c.group_name == group)?
496        .benchmarks
497        .iter()
498        .find(|b| b.name == bench_name)
499}
500
501fn run_mean(results: &[SuiteResult], idx: usize, group: &str, bench_name: &str) -> f64 {
502    find_bench(&results[idx], group, bench_name)
503        .map(|b| b.summary.mean)
504        .unwrap_or(f64::INFINITY)
505}
506
507/// Run a benchmark suite with custom gate configuration.
508pub fn run_gated<F: FnOnce(&mut Suite)>(gate: GateConfig, f: F) -> SuiteResult {
509    let mut suite = Suite::new();
510    f(&mut suite);
511    let engine = engine::Engine::with_gate(suite, gate);
512    engine.run()
513}
514
515/// Run a benchmark suite and save results to a JSON file.
516///
517/// If the `ZENBENCH_RESULT_PATH` env var is set (fire-and-forget mode),
518/// results are saved there. Otherwise, results are saved to a timestamped
519/// file in the current directory.
520pub fn run_and_save<F: FnOnce(&mut Suite)>(f: F) -> SuiteResult {
521    let result = run(f);
522
523    let path = daemon::result_path_from_env().unwrap_or_else(|| {
524        let name = format!("zenbench-{}.json", result.run_id);
525        std::path::PathBuf::from(name)
526    });
527
528    if let Err(e) = result.save(&path) {
529        eprintln!("[zenbench] error saving results to {}: {e}", path.display());
530    } else {
531        eprintln!("[zenbench] results saved to {}", path.display());
532    }
533
534    result
535}
536
537/// Macro for defining benchmark binaries with `cargo bench`.
538///
539/// Use this in a `benches/*.rs` file with `harness = false` in `Cargo.toml`.
540///
541/// # Examples
542///
543/// **Function list** (composable — recommended):
544/// ```rust,ignore
545/// use zenbench::prelude::*;
546///
547/// fn bench_sort(suite: &mut Suite) {
548///     suite.group("sort", |g| {
549///         g.throughput(Throughput::Elements(1000));
550///         g.bench("std_sort", |b| {
551///             b.with_input(|| (0..1000).rev().collect::<Vec<i32>>())
552///                 .run(|mut v| { v.sort(); v })
553///         });
554///         g.bench("sort_unstable", |b| {
555///             b.with_input(|| (0..1000).rev().collect::<Vec<i32>>())
556///                 .run(|mut v| { v.sort_unstable(); v })
557///         });
558///     });
559/// }
560///
561/// fn bench_fib(suite: &mut Suite) {
562///     suite.bench_fn("fibonacci", || black_box(fib(20)));
563/// }
564///
565/// zenbench::main!(bench_sort, bench_fib);
566/// ```
567///
568/// **Closure** (quick single-file):
569/// ```rust,ignore
570/// zenbench::main!(|suite| {
571///     suite.group("sort", |g| {
572///         g.bench("std", |b| b.iter(|| data.sort()));
573///         g.bench("unstable", |b| b.iter(|| data.sort_unstable()));
574///     });
575/// });
576/// ```
577///
578/// In `Cargo.toml`:
579/// ```toml
580/// [[bench]]
581/// name = "my_bench"
582/// harness = false
583/// ```
584///
585/// Parse the `--{best,mean,median}-of-passes=N` flags. Returns the
586/// requested pass count + aggregation policy, or `None` for a
587/// single-run invocation. Errors and exits on conflicting flags or
588/// invalid values.
589///
590/// Exposed so both `main!` arms can share one implementation.
591#[doc(hidden)]
592pub fn parse_pass_args() -> Option<(usize, Aggregation)> {
593    let mut found: Option<(usize, Aggregation, &'static str)> = None;
594    // Prefix → policy → canonical flag name.
595    let flags: &[(&str, Aggregation, &str)] = &[
596        ("--best-of-passes=", Aggregation::Best, "--best-of-passes"),
597        ("--mean-of-passes=", Aggregation::Mean, "--mean-of-passes"),
598        (
599            "--median-of-passes=",
600            Aggregation::Median,
601            "--median-of-passes",
602        ),
603    ];
604
605    for arg in std::env::args() {
606        let parsed: Option<(usize, Aggregation, &'static str)> =
607            flags.iter().find_map(|(prefix, policy, name)| {
608                arg.strip_prefix(prefix)
609                    .and_then(|v| v.parse().ok())
610                    .map(|n: usize| (n, *policy, *name))
611            });
612        if let Some((n, p, name)) = parsed {
613            if let Some((_, _, prev_name)) = found {
614                eprintln!("[zenbench] error: {prev_name} and {name} are mutually exclusive");
615                std::process::exit(2);
616            }
617            if n == 0 {
618                eprintln!("[zenbench] error: {name}=0 is not meaningful");
619                std::process::exit(2);
620            }
621            found = Some((n, p, name));
622        }
623    }
624    found.map(|(n, p, _)| (n, p))
625}
626
627/// Parse `--{best,mean,median}-of-processes=N` flags for cross-OS-process
628/// aggregation. Returns `None` if `ZENBENCH_SUBPROCESS=1` is set (recursion
629/// guard) or if no process flag is present.
630#[doc(hidden)]
631pub fn parse_process_args() -> Option<(usize, Aggregation)> {
632    // Recursion guard: children spawned by run_processes set this.
633    if std::env::var("ZENBENCH_SUBPROCESS").as_deref() == Ok("1") {
634        return None;
635    }
636    let mut found: Option<(usize, Aggregation, &'static str)> = None;
637    let flags: &[(&str, Aggregation, &str)] = &[
638        (
639            "--best-of-processes=",
640            Aggregation::Best,
641            "--best-of-processes",
642        ),
643        (
644            "--mean-of-processes=",
645            Aggregation::Mean,
646            "--mean-of-processes",
647        ),
648        (
649            "--median-of-processes=",
650            Aggregation::Median,
651            "--median-of-processes",
652        ),
653    ];
654
655    for arg in std::env::args() {
656        let parsed: Option<(usize, Aggregation, &'static str)> =
657            flags.iter().find_map(|(prefix, policy, name)| {
658                arg.strip_prefix(prefix)
659                    .and_then(|v| v.parse().ok())
660                    .map(|n: usize| (n, *policy, *name))
661            });
662        if let Some((n, p, name)) = parsed {
663            if let Some((_, _, prev_name)) = found {
664                eprintln!("[zenbench] error: {prev_name} and {name} are mutually exclusive");
665                std::process::exit(2);
666            }
667            if n == 0 {
668                eprintln!("[zenbench] error: {name}=0 is not meaningful");
669                std::process::exit(2);
670            }
671            found = Some((n, p, name));
672        }
673    }
674    found.map(|(n, p, _)| (n, p))
675}
676
677/// Re-exec the current benchmark binary N times in separate OS processes
678/// and aggregate the results.
679///
680/// Each child gets a fresh ASLR layout, CPU frequency state, scheduler
681/// affinity, and page cache — noise sources that in-process `run_passes`
682/// cannot reset. The child writes its `SuiteResult` to a temp JSON file
683/// via `ZENBENCH_RESULT_PATH`; the parent reads it back and aggregates.
684///
685/// # CLI flags
686///
687/// The `main!` macro parses these from `cargo bench`:
688///
689/// ```text
690/// --best-of-processes=N          -> run_processes(N, Best)
691/// --mean-of-processes=N          -> run_processes(N, Mean)
692/// --median-of-processes=N        -> run_processes(N, Median)
693/// ```
694///
695/// Composable with passes: `--best-of-processes=3 --best-of-passes=2`
696/// runs 3 OS processes, each doing 2 in-process passes. Total = 6 runs.
697pub fn run_processes(processes: usize, policy: Aggregation) -> SuiteResult {
698    let exe = std::env::current_exe().unwrap_or_else(|e| {
699        eprintln!("[zenbench] error: cannot determine current executable: {e}");
700        std::process::exit(2);
701    });
702
703    // Build child argv: strip process flags and post-processing flags
704    // (parent handles --format, --save-baseline, --baseline, --update-on-pass
705    // on the aggregated result; children just measure and save JSON).
706    let child_args: Vec<String> = std::env::args()
707        .skip(1) // skip argv[0]
708        .filter(|a| {
709            !a.starts_with("--best-of-processes=")
710                && !a.starts_with("--mean-of-processes=")
711                && !a.starts_with("--median-of-processes=")
712                && !a.starts_with("--format=")
713                && !a.starts_with("--save-baseline=")
714                && !a.starts_with("--baseline=")
715                && !a.starts_with("--max-regression=")
716                && a != "--update-on-pass"
717        })
718        .collect();
719
720    // Unique run ID for temp files.
721    let now = std::time::SystemTime::now()
722        .duration_since(std::time::UNIX_EPOCH)
723        .unwrap_or_default()
724        .as_nanos();
725    let pid = std::process::id();
726    let run_id = format!("{now:x}-{pid:x}");
727
728    // Launcher PID chain for the benchmark-process gate (issue #5).
729    let launcher_pids = match std::env::var("ZENBENCH_LAUNCHER_PIDS") {
730        Ok(existing) => format!("{existing},{pid}"),
731        Err(_) => pid.to_string(),
732    };
733
734    let temp_dir = std::env::temp_dir();
735    let temp_paths: Vec<std::path::PathBuf> = (0..processes)
736        .map(|i| temp_dir.join(format!("zenbench-proc-{run_id}-{i}.json")))
737        .collect();
738
739    let mut results: Vec<SuiteResult> = Vec::with_capacity(processes);
740    for (i, path) in temp_paths.iter().enumerate() {
741        eprintln!("[zenbench] process {}/{processes}", i + 1);
742        let status = std::process::Command::new(&exe)
743            .args(&child_args)
744            .env("ZENBENCH_SUBPROCESS", "1")
745            .env("ZENBENCH_RESULT_PATH", path)
746            .env("ZENBENCH_LAUNCHER_PIDS", &launcher_pids)
747            .stdin(std::process::Stdio::null())
748            .stdout(std::process::Stdio::null())
749            .stderr(std::process::Stdio::inherit())
750            .status();
751
752        match status {
753            Ok(s) if s.success() => {}
754            Ok(s) => {
755                eprintln!("[zenbench] process {} exited with {s}", i + 1);
756                cleanup_temp(&temp_paths);
757                std::process::exit(1);
758            }
759            Err(e) => {
760                eprintln!("[zenbench] failed to spawn process {}: {e}", i + 1);
761                cleanup_temp(&temp_paths);
762                std::process::exit(1);
763            }
764        }
765
766        match SuiteResult::load(path) {
767            Ok(r) => results.push(r),
768            Err(e) => {
769                eprintln!("[zenbench] process {} produced no results: {e}", i + 1);
770                cleanup_temp(&temp_paths);
771                std::process::exit(1);
772            }
773        }
774    }
775
776    cleanup_temp(&temp_paths);
777
778    let aggregated = aggregate_results(results, policy);
779
780    // Print the aggregated report.
781    report::print_header(
782        &aggregated.run_id,
783        aggregated.git_hash.as_deref(),
784        aggregated.ci_environment.as_deref(),
785    );
786    let policy_name = match policy {
787        Aggregation::Best => "best",
788        Aggregation::Mean => "mean",
789        Aggregation::Median => "median",
790    };
791    eprintln!("[zenbench] {policy_name} of {processes} processes (cross-OS-process isolation)");
792    for cmp in &aggregated.comparisons {
793        report::print_group(cmp, aggregated.timer_resolution_ns);
794    }
795    report::print_footer(
796        aggregated.total_time,
797        aggregated.gate_waits,
798        aggregated.gate_wait_time,
799        aggregated.unreliable,
800    );
801
802    aggregated
803}
804
805fn cleanup_temp(paths: &[std::path::PathBuf]) {
806    for p in paths {
807        let _ = std::fs::remove_file(p);
808    }
809}
810
811#[macro_export]
812macro_rules! main {
813    // Form 1: function list — composable, like criterion
814    ($($func:path),+ $(,)?) => {
815        fn main() {
816            // Self-trampoline: re-exec in separate OS processes if requested.
817            if let Some((n, policy)) = $crate::parse_process_args() {
818                let result = $crate::run_processes(n, policy);
819                $crate::postprocess_result(&result);
820                return;
821            }
822
823            let group_filter: Option<String> = std::env::args()
824                .find_map(|a| a.strip_prefix("--group=").map(String::from));
825            let passes = $crate::parse_pass_args();
826
827            let closure = |suite: &mut $crate::Suite| {
828                if let Some(ref filter) = group_filter {
829                    suite.set_group_filter(filter.clone());
830                }
831                $( $func(suite); )+
832            };
833
834            let result = match passes {
835                Some((n, policy)) => $crate::run_passes(n, policy, closure),
836                None => $crate::run(closure),
837            };
838
839            $crate::postprocess_result(&result);
840        }
841    };
842    // Form 2: closure — quick single-file benchmarks
843    (|$suite:ident| $body:block) => {
844        fn main() {
845            // Self-trampoline: re-exec in separate OS processes if requested.
846            if let Some((n, policy)) = $crate::parse_process_args() {
847                let result = $crate::run_processes(n, policy);
848                $crate::postprocess_result(&result);
849                return;
850            }
851
852            let group_filter: Option<String> = std::env::args()
853                .find_map(|a| a.strip_prefix("--group=").map(String::from));
854            let passes = $crate::parse_pass_args();
855
856            let closure = |$suite: &mut $crate::Suite| {
857                if let Some(ref filter) = group_filter {
858                    $suite.set_group_filter(filter.clone());
859                }
860                $body
861            };
862
863            let result = match passes {
864                Some((n, policy)) => $crate::run_passes(n, policy, closure),
865                None => $crate::run(closure),
866            };
867
868            $crate::postprocess_result(&result);
869        }
870    };
871}