Skip to main content

zenbench/
lib.rs

1// Without precise-timing or alloc-profiling, no unsafe is permitted anywhere.
2// With either feature, unsafe is denied (errors) but the timing/alloc modules
3// can override with #[allow(unsafe_code)] for TSC reads, asm fences, and GlobalAlloc.
4#![cfg_attr(
5    not(any(feature = "precise-timing", feature = "alloc-profiling")),
6    forbid(unsafe_code)
7)]
8#![cfg_attr(
9    any(feature = "precise-timing", feature = "alloc-profiling"),
10    deny(unsafe_code)
11)]
12#![doc = include_str!("../README.md")]
13
14#[cfg(feature = "alloc-profiling")]
15mod alloc;
16pub mod baseline;
17mod bench;
18pub mod calibration;
19#[cfg(feature = "charts")]
20pub mod charts;
21mod checks;
22mod ci;
23#[cfg(feature = "criterion-compat")]
24pub mod criterion_compat;
25pub mod daemon;
26mod engine;
27pub mod exclusive;
28mod format;
29mod gate;
30mod html;
31pub mod mcp;
32pub mod platform;
33pub mod quickchart;
34mod report;
35mod results;
36mod stats;
37#[cfg(feature = "precise-timing")]
38mod timing;
39#[cfg(feature = "wasm")]
40pub mod wasm;
41
42pub use bench::{BenchGroup, Bencher, GroupConfig, Suite, Throughput};
43
44/// Post-run processing: format output, save baseline, compare against baseline.
45///
46/// Shared between `main!` and `criterion_main!` macros. Not intended for
47/// direct use — call via the macros instead.
48#[doc(hidden)]
49pub fn postprocess_result(result: &SuiteResult) {
50    let args: Vec<String> = std::env::args().collect();
51    let format = args
52        .iter()
53        .find_map(|a| a.strip_prefix("--format=").map(String::from))
54        .or_else(|| std::env::var("ZENBENCH_FORMAT").ok());
55    let save_baseline: Option<String> = args
56        .iter()
57        .find_map(|a| a.strip_prefix("--save-baseline=").map(String::from));
58    let baseline_name: Option<String> = args
59        .iter()
60        .find_map(|a| a.strip_prefix("--baseline=").map(String::from));
61    let max_regression: f64 = args
62        .iter()
63        .find_map(|a| {
64            a.strip_prefix("--max-regression=")
65                .and_then(|v| v.parse().ok())
66        })
67        .unwrap_or(5.0);
68    let update_on_pass = args.iter().any(|a| a == "--update-on-pass");
69
70    // Output in requested format (to stdout)
71    match format.as_deref() {
72        Some("llm") => print!("{}", result.to_llm()),
73        Some("csv") => print!("{}", result.to_csv()),
74        Some("markdown" | "md") => print!("{}", result.to_markdown()),
75        Some("html") => print!("{}", result.to_html()),
76        Some("json") => {
77            if let Ok(json) = serde_json::to_string_pretty(result) {
78                println!("{json}");
79            }
80        }
81        _ => {} // default: terminal report already printed to stderr
82    }
83
84    // Save as named baseline
85    if let Some(ref name) = save_baseline {
86        match baseline::save_baseline(result, name) {
87            Ok(path) => eprintln!("[zenbench] baseline '{name}' saved to {}", path.display()),
88            Err(e) => {
89                eprintln!("[zenbench] error saving baseline '{name}': {e}");
90                std::process::exit(2);
91            }
92        }
93    }
94
95    // Compare against named baseline
96    if let Some(ref name) = baseline_name {
97        match baseline::load_baseline(name) {
98            Ok(saved) => {
99                let comparison = baseline::compare_against_baseline(&saved, result, max_regression);
100                baseline::print_comparison_report(&comparison);
101
102                if comparison.regressions > 0 {
103                    eprintln!(
104                        "\n[zenbench] FAIL: {} regression(s) exceed {max_regression}% threshold",
105                        comparison.regressions,
106                    );
107                    std::process::exit(1);
108                } else {
109                    eprintln!(
110                        "\n[zenbench] PASS: no regressions exceed {max_regression}% threshold"
111                    );
112                    // --update-on-pass: overwrite baseline with current results
113                    if update_on_pass {
114                        match baseline::save_baseline(result, name) {
115                            Ok(path) => eprintln!(
116                                "[zenbench] baseline '{name}' updated (--update-on-pass) → {}",
117                                path.display()
118                            ),
119                            Err(e) => {
120                                eprintln!("[zenbench] warning: failed to update baseline: {e}");
121                            }
122                        }
123                    }
124                }
125            }
126            Err(e) => {
127                eprintln!("[zenbench] {e}");
128                std::process::exit(2);
129            }
130        }
131    }
132
133    // Save results if in fire-and-forget mode
134    if let Some(path) = daemon::result_path_from_env() {
135        if let Err(e) = result.save(&path) {
136            eprintln!("[zenbench] error saving results: {e}");
137        }
138    }
139}
140#[cfg(feature = "alloc-profiling")]
141pub use alloc::{AllocProfiler, AllocStats};
142
143/// Create an Engine from a Suite (used by criterion_compat macros).
144#[doc(hidden)]
145pub fn engine_new(suite: Suite) -> engine::Engine {
146    engine::Engine::new(suite)
147}
148pub use format::format_ns;
149pub use gate::GateConfig;
150pub use platform::Testbed;
151pub use results::{BenchmarkResult, ComparisonResult, RunId, SuiteResult};
152pub use stats::{MeanCi, PairedAnalysis, Summary};
153
154// `Aggregation`, `run_passes`, and `aggregate_results` are defined below
155// and re-exported via the module root.
156
157/// Re-export `black_box` from std for convenience.
158///
159/// Prevents the compiler from optimizing away benchmark code.
160/// Always use this on benchmark return values and inputs.
161#[inline(always)]
162pub fn black_box<T>(x: T) -> T {
163    std::hint::black_box(x)
164}
165
166/// Prelude for convenient imports.
167///
168/// ```
169/// use zenbench::prelude::*;
170/// ```
171pub mod prelude {
172    pub use crate::bench::{BenchGroup, Bencher, GroupConfig, Suite, Throughput};
173    pub use crate::black_box;
174    pub use crate::gate::GateConfig;
175    pub use crate::results::SuiteResult;
176    pub use crate::stats::{MeanCi, PairedAnalysis, Summary};
177}
178
179/// Run a benchmark suite with default configuration.
180///
181/// # Example
182/// ```no_run
183/// zenbench::run(|suite| {
184///     suite.compare("sorting", |group| {
185///         let data: Vec<i32> = (0..1000).rev().collect();
186///         group.bench("std_sort", move |b| {
187///             let d = data.clone();
188///             b.with_input(move || d.clone())
189///                 .run(|mut v| { v.sort(); v })
190///         });
191///     });
192/// });
193/// ```
194pub fn run<F: FnOnce(&mut Suite)>(f: F) -> SuiteResult {
195    let mut suite = Suite::new();
196    f(&mut suite);
197    let engine = engine::Engine::new(suite);
198    engine.run()
199}
200
201/// How to combine N `SuiteResult`s into one.
202///
203/// Used by both [`run_passes`] (in-process) and [`run_processes`]
204/// (cross-OS-process). There is intentionally no default — callers
205/// must pick one. The correct choice depends on what you're trying to
206/// measure, and every policy answers a different question.
207#[derive(Clone, Copy, Debug)]
208pub enum Aggregation {
209    /// Keep the run with the lowest mean per benchmark. The reported
210    /// `summary` is that run's full within-run summary — `mean`,
211    /// `median`, `mad`, `min`, `max`, `n`, all preserved.
212    ///
213    /// Use on shared/busy hosts, dev machines, and CI runners: noise
214    /// from co-tenants, OS interrupts, and thermal throttling only
215    /// makes a run slower, so the fastest run is the closest honest
216    /// estimate of the hardware's quiet capability.
217    ///
218    /// Caveat: best-of-N is a **biased** estimator and does not
219    /// converge as N grows — the expected min drifts downward. Don't
220    /// interpret "best of 3" as the same underlying quantity that
221    /// "best of 100" would give you. It's useful for comparing
222    /// versions of the same code on the same host with the same N,
223    /// not as a statistical measurement of true mean.
224    ///
225    /// Inter-run spread is reported as a separate footer line so you
226    /// can still tell when runs disagreed badly.
227    Best,
228    /// Replace each bench `summary` with a fresh `Summary` built from
229    /// the distribution of per-run means. The resulting `mean` is the
230    /// mean of run means, `mad` is the inter-run spread.
231    ///
232    /// Use on quiet hosts when you want expected-case performance.
233    /// Converges via CLT and is unbiased. Pays for every contaminated
234    /// run by dragging the average up — that's the point.
235    Mean,
236    /// Median of run means. Robust to one or two contaminated runs
237    /// without the downward bias of Best. A reasonable middle ground
238    /// when you can't characterize the host's noise and don't want to
239    /// argue about which policy is correct.
240    Median,
241}
242
243/// Run a benchmark suite in N sequential "passes" inside the current
244/// OS process and combine the per-pass results under the chosen
245/// [`Aggregation`] policy.
246///
247/// A "pass" is one complete suite execution (warmup, calibration,
248/// sample collection, within-pass statistics) with a fresh `Suite`
249/// and `Engine`. All N passes share the **same OS process** — no
250/// `fork` / `exec` — which limits what they can reset.
251///
252/// # What `run_passes` resets between passes
253///
254/// These are genuinely re-done for each pass, so passes are useful
255/// against noise sources that depend on them:
256///
257///   * **Calibration** (iterations-per-sample estimation) — redone
258///     from scratch.
259///   * **Warmup** — re-run for the hot loop.
260///   * **Heap addresses of benchmark test data** — the bench-defining
261///     closure re-allocates its inputs each pass, so the data lands
262///     at a different heap address, shuffling TLB entries and L1/L2
263///     set assignments for the data.
264///   * **Data-dependent branch-predictor history** — because the
265///     data moved, branches keyed by data address re-train.
266///
267/// # What `run_passes` does **not** reset
268///
269/// These are constant across all samples in one OS process and
270/// therefore **constant across passes**. `run_passes` cannot attack
271/// them — they are exactly why you might run `cargo bench` twice from
272/// the shell and get different numbers:
273///
274///   * **CPU frequency / turbo / thermal state.**
275///   * **ASLR layout for the binary's code pages** (set at `execve`).
276///   * **Kernel page cache** for the binary mappings.
277///   * **Kernel scheduler state** (affinity, NUMA node, cpuset).
278///   * **Branch predictor tables for the hot code addresses** —
279///     same code pages, same training history.
280///   * **Background contention** on co-tenant cores.
281///
282/// For these, you need actually separate OS processes. Use
283/// [`run_processes`] / `--best-of-processes=N`:
284///
285/// ```text
286/// cargo bench -- --best-of-processes=3
287/// ```
288///
289/// # When to use passes vs processes
290///
291/// * Your benchmark allocates large per-iteration test data, and you
292///   suspect lucky heap alignment is skewing results → **passes**.
293/// * Your benchmark's outer loop calibrates differently each run
294///   (e.g. data-dependent iteration count estimation) → **passes**.
295/// * Your measurements bounce between cargo bench invocations but
296///   individual runs report `mad ±0.1%` → **processes**
297///   (this is the between-OS-process variance signature).
298/// * You want both → `--best-of-processes=3 --best-of-passes=2`.
299///   Each of 3 OS processes does 2 in-process passes. Total = 6 runs.
300///
301/// # Why a policy is required
302///
303/// Rounds reduce **within-pass** variance (timer noise, single-sample
304/// interrupts). Passes reduce a **subset of between-OS-process**
305/// variance (the parts listed above). Different noise sources want
306/// different aggregation rules, and the answer depends on what you're
307/// trying to measure:
308///
309///   * "What's the lowest this code can achieve?" → [`Best`]
310///   * "What will my users see on average?" → [`Mean`]
311///   * "I don't know and don't want to bias the estimate" → [`Median`]
312///
313/// Every option answers a different question, and zenbench refuses to
314/// pick for you. No default.
315///
316/// # CLI flags
317///
318/// The `main!` macro parses these from `cargo bench`:
319///
320/// ```text
321/// --best-of-passes=N          -> run_passes(N, Best,   ...)
322/// --mean-of-passes=N          -> run_passes(N, Mean,   ...)
323/// --median-of-passes=N        -> run_passes(N, Median, ...)
324/// ```
325///
326/// No flag → single `run()` call, exactly like before.
327///
328/// [`Best`]: Aggregation::Best
329/// [`Mean`]: Aggregation::Mean
330/// [`Median`]: Aggregation::Median
331pub fn run_passes<F: FnMut(&mut Suite)>(
332    passes: usize,
333    policy: Aggregation,
334    mut f: F,
335) -> SuiteResult {
336    if passes <= 1 {
337        let mut suite = Suite::new();
338        f(&mut suite);
339        let engine = engine::Engine::new(suite);
340        return engine.run();
341    }
342
343    // Run each pass silently, then print one aggregated report at the end.
344    let mut all_results: Vec<SuiteResult> = Vec::with_capacity(passes);
345    for i in 0..passes {
346        eprintln!("[zenbench] pass {}/{}", i + 1, passes);
347        let mut suite = Suite::new();
348        f(&mut suite);
349        let engine = engine::Engine::new(suite).quiet(true);
350        all_results.push(engine.run());
351    }
352
353    let aggregated = aggregate_results(all_results, policy);
354
355    // Final aggregated report (header + groups + footer).
356    report::print_header(
357        &aggregated.run_id,
358        aggregated.git_hash.as_deref(),
359        aggregated.ci_environment.as_deref(),
360    );
361    let banner = match policy {
362        Aggregation::Best => {
363            format!("[zenbench] best of {passes} passes (min pass mean; within-run mad preserved)")
364        }
365        Aggregation::Mean => {
366            format!("[zenbench] mean of {passes} passes (mad = inter-pass spread)")
367        }
368        Aggregation::Median => {
369            format!("[zenbench] median of {passes} passes (mad = inter-pass spread)")
370        }
371    };
372    eprintln!("{banner}");
373    for cmp in &aggregated.comparisons {
374        report::print_group(cmp, aggregated.timer_resolution_ns);
375    }
376    report::print_footer(
377        aggregated.total_time,
378        aggregated.gate_waits,
379        aggregated.gate_wait_time,
380        aggregated.unreliable,
381    );
382
383    aggregated
384}
385
386/// Combine N `SuiteResult`s into one under the caller-chosen policy.
387///
388/// Source-agnostic: the inputs can be sequential passes produced by
389/// [`run_passes`] (inside one OS process) or separate OS processes
390/// collected by `run_processes`. The policy treats each `SuiteResult`
391/// as one observation regardless of how it was produced.
392///
393/// See [`Aggregation`] for the policy definitions.
394pub fn aggregate_results(runs: Vec<SuiteResult>, policy: Aggregation) -> SuiteResult {
395    use std::collections::HashMap;
396
397    if runs.is_empty() {
398        return SuiteResult::default();
399    }
400    if runs.len() == 1 {
401        return runs.into_iter().next().unwrap();
402    }
403
404    // Collect per-(group, bench) the full list of per-run means.
405    // Used by all policies: Best needs to find the winner; Mean and
406    // Median rebuild the summary from this distribution; Best reports
407    // inter-run spread as a footer line.
408    let mut means: HashMap<(String, String), Vec<f64>> = HashMap::new();
409    for result in &runs {
410        for cmp in &result.comparisons {
411            for bench in &cmp.benchmarks {
412                let key = (cmp.group_name.clone(), bench.name.clone());
413                means.entry(key).or_default().push(bench.summary.mean);
414            }
415        }
416    }
417
418    // For Best policy: find the run index with the lowest mean
419    // per (group, bench).
420    let winners: HashMap<(String, String), usize> =
421        runs.iter()
422            .enumerate()
423            .fold(HashMap::new(), |mut acc, (ri, result)| {
424                for cmp in &result.comparisons {
425                    for bench in &cmp.benchmarks {
426                        let key = (cmp.group_name.clone(), bench.name.clone());
427                        let this_mean = bench.summary.mean;
428                        acc.entry(key)
429                            .and_modify(|best: &mut usize| {
430                                let prev_mean =
431                                    run_mean(&runs, *best, &cmp.group_name, &bench.name);
432                                if this_mean < prev_mean {
433                                    *best = ri;
434                                }
435                            })
436                            .or_insert(ri);
437                    }
438                }
439                acc
440            });
441
442    // Template = first run. Overwrite each bench's summary according
443    // to the chosen policy.
444    let mut out = runs[0].clone();
445    for cmp in out.comparisons.iter_mut() {
446        for bench in cmp.benchmarks.iter_mut() {
447            let key = (cmp.group_name.clone(), bench.name.clone());
448            let samples = match means.get(&key) {
449                Some(s) => s,
450                None => continue,
451            };
452            match policy {
453                Aggregation::Best => {
454                    // Copy the winning run's full summary verbatim,
455                    // including its within-run mad. Inter-run spread
456                    // is reported separately in the banner/footer so
457                    // this field continues to answer "how jittery was
458                    // this specific run?".
459                    if let Some(&best_ri) = winners.get(&key) {
460                        if let Some(best_bench) = find_bench(&runs[best_ri], &key.0, &key.1) {
461                            bench.summary = best_bench.summary.clone();
462                        }
463                    }
464                }
465                Aggregation::Mean => {
466                    // Rebuild summary from the distribution of per-run
467                    // means. The new `mad` is inter-run spread by
468                    // construction (Summary::from_slice computes it from
469                    // the input slice).
470                    bench.summary = crate::stats::Summary::from_slice(samples);
471                }
472                Aggregation::Median => {
473                    // Same rebuild, but replace the Welford-mean with the
474                    // median so it matches the advertised policy name.
475                    let fresh = crate::stats::Summary::from_slice(samples);
476                    bench.summary = fresh.clone();
477                    bench.summary.mean = fresh.median;
478                }
479            }
480            // The single-run bootstrap CI no longer describes the
481            // distribution the reported mean came from in any policy.
482            bench.mean_ci = None;
483        }
484    }
485    out
486}
487
488fn find_bench<'a>(
489    result: &'a SuiteResult,
490    group: &str,
491    bench_name: &str,
492) -> Option<&'a results::BenchmarkResult> {
493    result
494        .comparisons
495        .iter()
496        .find(|c| c.group_name == group)?
497        .benchmarks
498        .iter()
499        .find(|b| b.name == bench_name)
500}
501
502fn run_mean(results: &[SuiteResult], idx: usize, group: &str, bench_name: &str) -> f64 {
503    find_bench(&results[idx], group, bench_name)
504        .map(|b| b.summary.mean)
505        .unwrap_or(f64::INFINITY)
506}
507
508/// Run a benchmark suite with custom gate configuration.
509pub fn run_gated<F: FnOnce(&mut Suite)>(gate: GateConfig, f: F) -> SuiteResult {
510    let mut suite = Suite::new();
511    f(&mut suite);
512    let engine = engine::Engine::with_gate(suite, gate);
513    engine.run()
514}
515
516/// Run a benchmark suite and save results to a JSON file.
517///
518/// If the `ZENBENCH_RESULT_PATH` env var is set (fire-and-forget mode),
519/// results are saved there. Otherwise, results are saved to a timestamped
520/// file in the current directory.
521pub fn run_and_save<F: FnOnce(&mut Suite)>(f: F) -> SuiteResult {
522    let result = run(f);
523
524    let path = daemon::result_path_from_env().unwrap_or_else(|| {
525        let name = format!("zenbench-{}.json", result.run_id);
526        std::path::PathBuf::from(name)
527    });
528
529    if let Err(e) = result.save(&path) {
530        eprintln!("[zenbench] error saving results to {}: {e}", path.display());
531    } else {
532        eprintln!("[zenbench] results saved to {}", path.display());
533    }
534
535    result
536}
537
538/// Macro for defining benchmark binaries with `cargo bench`.
539///
540/// Use this in a `benches/*.rs` file with `harness = false` in `Cargo.toml`.
541///
542/// # Examples
543///
544/// **Function list** (composable — recommended):
545/// ```rust,ignore
546/// use zenbench::prelude::*;
547///
548/// fn bench_sort(suite: &mut Suite) {
549///     suite.group("sort", |g| {
550///         g.throughput(Throughput::Elements(1000));
551///         g.bench("std_sort", |b| {
552///             b.with_input(|| (0..1000).rev().collect::<Vec<i32>>())
553///                 .run(|mut v| { v.sort(); v })
554///         });
555///         g.bench("sort_unstable", |b| {
556///             b.with_input(|| (0..1000).rev().collect::<Vec<i32>>())
557///                 .run(|mut v| { v.sort_unstable(); v })
558///         });
559///     });
560/// }
561///
562/// fn bench_fib(suite: &mut Suite) {
563///     suite.bench_fn("fibonacci", || black_box(fib(20)));
564/// }
565///
566/// zenbench::main!(bench_sort, bench_fib);
567/// ```
568///
569/// **Closure** (quick single-file):
570/// ```rust,ignore
571/// zenbench::main!(|suite| {
572///     suite.group("sort", |g| {
573///         g.bench("std", |b| b.iter(|| data.sort()));
574///         g.bench("unstable", |b| b.iter(|| data.sort_unstable()));
575///     });
576/// });
577/// ```
578///
579/// In `Cargo.toml`:
580/// ```toml
581/// [[bench]]
582/// name = "my_bench"
583/// harness = false
584/// ```
585///
586/// Parse the `--{best,mean,median}-of-passes=N` flags. Returns the
587/// requested pass count + aggregation policy, or `None` for a
588/// single-run invocation. Errors and exits on conflicting flags or
589/// invalid values.
590///
591/// Exposed so both `main!` arms can share one implementation.
592#[doc(hidden)]
593pub fn parse_pass_args() -> Option<(usize, Aggregation)> {
594    let mut found: Option<(usize, Aggregation, &'static str)> = None;
595    // Prefix → policy → canonical flag name.
596    let flags: &[(&str, Aggregation, &str)] = &[
597        ("--best-of-passes=", Aggregation::Best, "--best-of-passes"),
598        ("--mean-of-passes=", Aggregation::Mean, "--mean-of-passes"),
599        (
600            "--median-of-passes=",
601            Aggregation::Median,
602            "--median-of-passes",
603        ),
604    ];
605
606    for arg in std::env::args() {
607        let parsed: Option<(usize, Aggregation, &'static str)> =
608            flags.iter().find_map(|(prefix, policy, name)| {
609                arg.strip_prefix(prefix)
610                    .and_then(|v| v.parse().ok())
611                    .map(|n: usize| (n, *policy, *name))
612            });
613        if let Some((n, p, name)) = parsed {
614            if let Some((_, _, prev_name)) = found {
615                eprintln!("[zenbench] error: {prev_name} and {name} are mutually exclusive");
616                std::process::exit(2);
617            }
618            if n == 0 {
619                eprintln!("[zenbench] error: {name}=0 is not meaningful");
620                std::process::exit(2);
621            }
622            found = Some((n, p, name));
623        }
624    }
625    found.map(|(n, p, _)| (n, p))
626}
627
628/// Parse `--{best,mean,median}-of-processes=N` flags for cross-OS-process
629/// aggregation. Returns `None` if `ZENBENCH_SUBPROCESS=1` is set (recursion
630/// guard) or if no process flag is present.
631#[doc(hidden)]
632pub fn parse_process_args() -> Option<(usize, Aggregation)> {
633    // Recursion guard: children spawned by run_processes set this.
634    if std::env::var("ZENBENCH_SUBPROCESS").as_deref() == Ok("1") {
635        return None;
636    }
637    let mut found: Option<(usize, Aggregation, &'static str)> = None;
638    let flags: &[(&str, Aggregation, &str)] = &[
639        (
640            "--best-of-processes=",
641            Aggregation::Best,
642            "--best-of-processes",
643        ),
644        (
645            "--mean-of-processes=",
646            Aggregation::Mean,
647            "--mean-of-processes",
648        ),
649        (
650            "--median-of-processes=",
651            Aggregation::Median,
652            "--median-of-processes",
653        ),
654    ];
655
656    for arg in std::env::args() {
657        let parsed: Option<(usize, Aggregation, &'static str)> =
658            flags.iter().find_map(|(prefix, policy, name)| {
659                arg.strip_prefix(prefix)
660                    .and_then(|v| v.parse().ok())
661                    .map(|n: usize| (n, *policy, *name))
662            });
663        if let Some((n, p, name)) = parsed {
664            if let Some((_, _, prev_name)) = found {
665                eprintln!("[zenbench] error: {prev_name} and {name} are mutually exclusive");
666                std::process::exit(2);
667            }
668            if n == 0 {
669                eprintln!("[zenbench] error: {name}=0 is not meaningful");
670                std::process::exit(2);
671            }
672            found = Some((n, p, name));
673        }
674    }
675    found.map(|(n, p, _)| (n, p))
676}
677
678/// Re-exec the current benchmark binary N times in separate OS processes
679/// and aggregate the results.
680///
681/// Each child gets a fresh ASLR layout, CPU frequency state, scheduler
682/// affinity, and page cache — noise sources that in-process `run_passes`
683/// cannot reset. The child writes its `SuiteResult` to a temp JSON file
684/// via `ZENBENCH_RESULT_PATH`; the parent reads it back and aggregates.
685///
686/// # CLI flags
687///
688/// The `main!` macro parses these from `cargo bench`:
689///
690/// ```text
691/// --best-of-processes=N          -> run_processes(N, Best)
692/// --mean-of-processes=N          -> run_processes(N, Mean)
693/// --median-of-processes=N        -> run_processes(N, Median)
694/// ```
695///
696/// Composable with passes: `--best-of-processes=3 --best-of-passes=2`
697/// runs 3 OS processes, each doing 2 in-process passes. Total = 6 runs.
698pub fn run_processes(processes: usize, policy: Aggregation) -> SuiteResult {
699    let exe = std::env::current_exe().unwrap_or_else(|e| {
700        eprintln!("[zenbench] error: cannot determine current executable: {e}");
701        std::process::exit(2);
702    });
703
704    // Build child argv: strip process flags and post-processing flags
705    // (parent handles --format, --save-baseline, --baseline, --update-on-pass
706    // on the aggregated result; children just measure and save JSON).
707    let child_args: Vec<String> = std::env::args()
708        .skip(1) // skip argv[0]
709        .filter(|a| {
710            !a.starts_with("--best-of-processes=")
711                && !a.starts_with("--mean-of-processes=")
712                && !a.starts_with("--median-of-processes=")
713                && !a.starts_with("--format=")
714                && !a.starts_with("--save-baseline=")
715                && !a.starts_with("--baseline=")
716                && !a.starts_with("--max-regression=")
717                && a != "--update-on-pass"
718        })
719        .collect();
720
721    // Unique run ID for temp files.
722    let now = std::time::SystemTime::now()
723        .duration_since(std::time::UNIX_EPOCH)
724        .unwrap_or_default()
725        .as_nanos();
726    let pid = std::process::id();
727    let run_id = format!("{now:x}-{pid:x}");
728
729    // Launcher PID chain for the benchmark-process gate (issue #5).
730    let launcher_pids = match std::env::var("ZENBENCH_LAUNCHER_PIDS") {
731        Ok(existing) => format!("{existing},{pid}"),
732        Err(_) => pid.to_string(),
733    };
734
735    let temp_dir = std::env::temp_dir();
736    let temp_paths: Vec<std::path::PathBuf> = (0..processes)
737        .map(|i| temp_dir.join(format!("zenbench-proc-{run_id}-{i}.json")))
738        .collect();
739
740    let mut results: Vec<SuiteResult> = Vec::with_capacity(processes);
741    for (i, path) in temp_paths.iter().enumerate() {
742        eprintln!("[zenbench] process {}/{processes}", i + 1);
743        let status = std::process::Command::new(&exe)
744            .args(&child_args)
745            .env("ZENBENCH_SUBPROCESS", "1")
746            .env("ZENBENCH_RESULT_PATH", path)
747            .env("ZENBENCH_LAUNCHER_PIDS", &launcher_pids)
748            .stdin(std::process::Stdio::null())
749            .stdout(std::process::Stdio::null())
750            .stderr(std::process::Stdio::inherit())
751            .status();
752
753        match status {
754            Ok(s) if s.success() => {}
755            Ok(s) => {
756                eprintln!("[zenbench] process {} exited with {s}", i + 1);
757                cleanup_temp(&temp_paths);
758                std::process::exit(1);
759            }
760            Err(e) => {
761                eprintln!("[zenbench] failed to spawn process {}: {e}", i + 1);
762                cleanup_temp(&temp_paths);
763                std::process::exit(1);
764            }
765        }
766
767        match SuiteResult::load(path) {
768            Ok(r) => results.push(r),
769            Err(e) => {
770                eprintln!("[zenbench] process {} produced no results: {e}", i + 1);
771                cleanup_temp(&temp_paths);
772                std::process::exit(1);
773            }
774        }
775    }
776
777    cleanup_temp(&temp_paths);
778
779    let aggregated = aggregate_results(results, policy);
780
781    // Print the aggregated report.
782    report::print_header(
783        &aggregated.run_id,
784        aggregated.git_hash.as_deref(),
785        aggregated.ci_environment.as_deref(),
786    );
787    let policy_name = match policy {
788        Aggregation::Best => "best",
789        Aggregation::Mean => "mean",
790        Aggregation::Median => "median",
791    };
792    eprintln!("[zenbench] {policy_name} of {processes} processes (cross-OS-process isolation)");
793    for cmp in &aggregated.comparisons {
794        report::print_group(cmp, aggregated.timer_resolution_ns);
795    }
796    report::print_footer(
797        aggregated.total_time,
798        aggregated.gate_waits,
799        aggregated.gate_wait_time,
800        aggregated.unreliable,
801    );
802
803    aggregated
804}
805
806fn cleanup_temp(paths: &[std::path::PathBuf]) {
807    for p in paths {
808        let _ = std::fs::remove_file(p);
809    }
810}
811
812#[macro_export]
813macro_rules! main {
814    // Form 1: function list — composable, like criterion
815    ($($func:path),+ $(,)?) => {
816        fn main() {
817            // Self-trampoline: re-exec in separate OS processes if requested.
818            if let Some((n, policy)) = $crate::parse_process_args() {
819                let result = $crate::run_processes(n, policy);
820                $crate::postprocess_result(&result);
821                return;
822            }
823
824            let group_filter: Option<String> = std::env::args()
825                .find_map(|a| a.strip_prefix("--group=").map(String::from));
826            let passes = $crate::parse_pass_args();
827
828            let closure = |suite: &mut $crate::Suite| {
829                if let Some(ref filter) = group_filter {
830                    suite.set_group_filter(filter.clone());
831                }
832                $( $func(suite); )+
833            };
834
835            let result = match passes {
836                Some((n, policy)) => $crate::run_passes(n, policy, closure),
837                None => $crate::run(closure),
838            };
839
840            $crate::postprocess_result(&result);
841        }
842    };
843    // Form 2: closure — quick single-file benchmarks
844    (|$suite:ident| $body:block) => {
845        fn main() {
846            // Self-trampoline: re-exec in separate OS processes if requested.
847            if let Some((n, policy)) = $crate::parse_process_args() {
848                let result = $crate::run_processes(n, policy);
849                $crate::postprocess_result(&result);
850                return;
851            }
852
853            let group_filter: Option<String> = std::env::args()
854                .find_map(|a| a.strip_prefix("--group=").map(String::from));
855            let passes = $crate::parse_pass_args();
856
857            let closure = |$suite: &mut $crate::Suite| {
858                if let Some(ref filter) = group_filter {
859                    $suite.set_group_filter(filter.clone());
860                }
861                $body
862            };
863
864            let result = match passes {
865                Some((n, policy)) => $crate::run_passes(n, policy, closure),
866                None => $crate::run(closure),
867            };
868
869            $crate::postprocess_result(&result);
870        }
871    };
872}