nornir 0.4.33 - Docs.rs

//! Bench API — the contract child workspaces (znippy, holger, ...)
//! implement so a `nornir release` can drive their benches.
//!
//! Design contract (intentionally tiny):
//!
//! 1. Each repo adds `nornir = { path = "../nornir" }` (or git/version)
//!    under `[dev-dependencies]` ONLY — nothing about nornir leaks into
//!    the published crate.
//! 2. Each repo provides one `nornir-bench.rs` example of literally:
//!    ```ignore
//!    fn main() -> anyhow::Result<()> {
//!        nornir::bench::api::run_main_json()
//!    }
//!    ```
//!    Examples can use dev-deps; bins cannot. That's why this is an
//!    example, not a bin. `run_bench_example` accepts three locations:
//!    `<repo>/examples/nornir-bench.rs` (single package),
//!    `<repo>/xtask/examples/nornir-bench.rs` (workspace, `-p xtask`), or
//!    `<repo>/bench/examples/nornir-bench.rs` (detached `bench/` crate with
//!    its own `[workspace]`; cargo runs inside `bench/` — keeps the harness's
//!    dev-deps out of the published crate without an xtask member).
//! 3. Each bench is a tiny struct implementing [`Bencher`] and
//!    registered via [`register_bench!`]. `inventory` collects them at
//!    link time — no manual registry list to maintain.
//! 4. `cargo run --release --example nornir-bench` prints a
//!    [`crate::bench::BenchRun`] as JSON to stdout. Nornir's release
//!    pipeline subprocess-spawns that, parses stdout, writes the run
//!    into Urðr tagged with the release_id + per-repo git SHA + dep
//!    graph snapshot id.
//!
//! Why on stdout and not over a socket/pipe protocol? Same reason as
//! `cargo metadata`: stdout-JSON is the simplest possible contract
//! between two unrelated build trees. Nornir is the orchestrator;
//! children stay passive and `cargo run`-able in isolation.

use std::sync::atomic::{AtomicBool, Ordering};

use anyhow::Result;
use chrono::Utc;

use super::{BenchResult, BenchRun};

/// Set while the runner is executing the **single-core pass** (see
/// [`single_core_pass_requested`]). Process-global because the registry runs
/// benches sequentially on the main thread; a bencher consults it via
/// [`pinned_to_single_core`] to cap its own parallelism for the `_st` row.
static PINNED_SINGLE_CORE: AtomicBool = AtomicBool::new(false);

/// True iff the runner is currently executing the single-core (`_st`) pass.
///
/// A [`Bencher`] whose `run()` fans work across cores should consult this and
/// cap itself to one worker (e.g. `ThreadPoolBuilder::num_threads(1)`, a serial
/// loop, or honour `RAYON_NUM_THREADS=1` which the runner also sets) so the
/// `_st` row is a genuine single-core figure — the speedup *denominator*.
/// Benchers that don't consult it still get a `_st`-suffixed row carrying
/// `cores=1`; it just won't differ from the parallel one.
pub fn pinned_to_single_core() -> bool {
    PINNED_SINGLE_CORE.load(Ordering::Relaxed)
}

/// Whether a single-core (`_st`) pass should run in addition to the normal
/// multi-core pass. Honoured env:
///
/// - `NORNIR_BENCH_PIN` (canonical; per the design doc, e.g.
///   `NORNIR_BENCH_PIN=0`). The value *names a CPU* for the affinity pin, so
///   **any non-empty value** — including `0` — requests the pass.
/// - `NORNIR_BENCH_ST` — an explicit on/off toggle (empty or `0` = off, any
///   other value = on) for callers who just want "also do the single-core
///   pass" without thinking about a CPU id.
///
/// Driving it from env (not a CLI flag) is what makes it automatable: a single
/// `nornir bench run` / release run with the env set yields **both** the
/// multi-core and `_st` rows from one invocation, with no change to the child
/// repo's `examples/nornir-bench.rs` one-liner.
pub fn single_core_pass_requested() -> bool {
    // PIN names a CPU: any non-empty value enables (0 is a valid CPU id).
    let pin = std::env::var("NORNIR_BENCH_PIN").map(|v| !v.is_empty()).unwrap_or(false);
    // ST is a boolean toggle: empty / "0" are off.
    let st = std::env::var("NORNIR_BENCH_ST").map(|v| !v.is_empty() && v != "0").unwrap_or(false);
    pin || st
}

/// A single bench. `id` is logged into Urðr verbatim; convention is
/// `<repo>.<scenario>` (e.g. `"holger.artifact_throughput_st"`).
///
/// `run` returns the raw [`BenchResult`]; the runner stamps the run
/// envelope (date, version, machine, cores) automatically.
pub trait Bencher: Sync {
    fn id(&self) -> &'static str;
    fn run(&self) -> Result<BenchResult>;
}

/// Registers a `&'static dyn Bencher` so the runner discovers it at
/// link time via `inventory`.
///
/// Usage:
/// ```ignore
/// struct ArtifactThroughput;
/// impl nornir::bench::api::Bencher for ArtifactThroughput { /* … */ }
/// nornir::bench::register_bench!(ArtifactThroughput);
/// ```
/// Registers a bencher instance. The given expression is evaluated
/// (and leaked into `'static`) the first time the runner iterates the
/// registry. Use a zero-sized unit struct + `register_bench!(Foo)` for
/// the common case, or any constructor expression for stateful ones.
///
/// Order defaults to `0`. When stages of a pipeline must run in a
/// deterministic sequence (output of stage N feeds stage N+1) use
/// [`register_bench_ordered!`] with ascending integers; the runner
/// sorts by `order` before executing, breaking ties on `id()`.
///
/// Usage:
/// ```ignore
/// struct ArtifactThroughput;
/// impl nornir::bench::api::Bencher for ArtifactThroughput { /* … */ }
/// nornir::register_bench!(ArtifactThroughput);
///
/// // or with a constructor:
/// nornir::register_bench!(ArtifactThroughput::with_size(1 << 20));
/// ```
#[macro_export]
macro_rules! register_bench {
    ($expr:expr) => {
        $crate::bench::api::inventory_submit! {
            $crate::bench::api::BencherRegistration {
                order: 0,
                make: || {
                    let b: ::std::boxed::Box<dyn $crate::bench::api::Bencher> =
                        ::std::boxed::Box::new($expr);
                    ::std::boxed::Box::leak(b)
                },
            }
        }
    };
}

/// Like [`register_bench!`] but pins an explicit execution order.
/// Lower `$order` runs first; equal orders fall back to `id()`
/// lexicographic order. Use for strict pipelines (e.g.
/// `bz2 → pbf` must finish before `pbf → geoparquet` starts).
///
/// Usage:
/// ```ignore
/// nornir::register_bench_ordered!(0, Stage1BzToPbf);
/// nornir::register_bench_ordered!(1, Stage2PbfToGeo);
/// ```
#[macro_export]
macro_rules! register_bench_ordered {
    ($order:expr, $expr:expr) => {
        $crate::bench::api::inventory_submit! {
            $crate::bench::api::BencherRegistration {
                order: $order,
                make: || {
                    let b: ::std::boxed::Box<dyn $crate::bench::api::Bencher> =
                        ::std::boxed::Box::new($expr);
                    ::std::boxed::Box::leak(b)
                },
            }
        }
    };
}

// Re-export so users only need `nornir::register_bench!` plus the
// inventory submission target below.
pub use inventory::submit as inventory_submit;

/// Inventory entry. The function pointer dodges issues with const
/// `&dyn Trait` references — each registration just returns its
/// `'static` reference on demand.
///
/// `order` is a stable sort key applied before execution. Lower runs
/// first; ties broken on `Bencher::id()`. Default 0 (use
/// [`register_bench_ordered!`] to pin pipeline stages).
pub struct BencherRegistration {
    pub order: i32,
    pub make: fn() -> &'static dyn Bencher,
}

inventory::collect!(BencherRegistration);

/// The one-liner each repo's `examples/nornir-bench.rs` calls.
///
/// Collects every `register_bench!`'d entry, sorts by `(order, id)`
/// so pipeline stages run in a deterministic sequence, and executes
/// them sequentially (no parallel scheduling — bench machines should
/// be quiescent).
///
/// Builds a full [`BenchRun`] envelope and prints it as one JSON line
/// on stdout. Errors from individual benches are converted to red
/// [`super::TestOutcome`] entries so a bad bench degrades to a
/// recorded failure rather than killing the whole run — **except**
/// when `NORNIR_BENCH_STOP_ON_ERROR=1`, in which case any failure
/// aborts the remaining stages (use for pipelines where stage N+1
/// reads stage N's artifact and would fail anyway).
pub fn run_main_json() -> Result<()> {
    let mut regs: Vec<&'static BencherRegistration> =
        inventory::iter::<BencherRegistration>().collect();
    // Resolve `make` once so we can sort on id() too.
    let mut resolved: Vec<(i32, &'static dyn Bencher)> =
        regs.drain(..).map(|r| (r.order, (r.make)())).collect();
    resolved.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.id().cmp(b.1.id())));

    let stop_on_error = std::env::var("NORNIR_BENCH_STOP_ON_ERROR")
        .map(|v| v != "0" && !v.is_empty())
        .unwrap_or(false);

    // Capture the machine's real core count *before* the single-core pass —
    // that pass pins process affinity to one CPU, which would otherwise make
    // `available_parallelism()` report 1 for the whole-run envelope.
    let machine_cores = num_cpus_best_effort() as u32;

    // Normal (multi-core) pass.
    let (mut results, mut tests) = run_pass(&resolved, stop_on_error);

    // Optional single-core (`_st`) pass: re-run every bencher pinned to one
    // core and merge the rows back, suffixed `_st` and carrying `cores=1`.
    // One invocation therefore yields both the parallel headline and its
    // single-core denominator — the automation NEXT_SESSION asks for.
    if single_core_pass_requested() {
        let (st_results, st_tests) = run_single_core_pass(&resolved, stop_on_error);
        results.extend(st_results);
        tests.extend(st_tests);
    }

    let now = Utc::now();
    let run = BenchRun {
        date: now.format("%Y-%m-%d").to_string(),
        timestamp: Some(now.to_rfc3339()),
        // The bench runner is often a separate crate (e.g. a workspace's
        // `xtask`), so its `CARGO_PKG_VERSION` is not the version under test.
        // `NORNIR_BENCH_VERSION` lets the caller stamp the *subject* repo's
        // version (e.g. `nornir bench run` / a CI step sets it); fall back to
        // the bench crate's own version.
        version: std::env::var("NORNIR_BENCH_VERSION")
            .ok()
            .filter(|v| !v.is_empty())
            .unwrap_or_else(|| env!("CARGO_PKG_VERSION").to_string()),
        machine: std::env::var("NORNIR_MACHINE").unwrap_or_default(),
        cores: machine_cores,
        results,
        tests,
    };
    println!("{}", serde_json::to_string(&run)?);
    Ok(())
}

fn num_cpus_best_effort() -> usize {
    std::thread::available_parallelism().map(|n| n.get()).unwrap_or(1)
}

/// One short human blurb for a finished bench, for the `→ …` part of the
/// progress "done" line. Prefers a throughput metric (`*ops_sec` → "N ops/s",
/// `*_mbs` → "N MB/s"); otherwise falls back to the first numeric metric as
/// `key=value`. Returns "" when the result carries no numeric metric, in which
/// case the done line just shows the elapsed time.
fn summarize_metrics(r: &BenchResult) -> String {
    // Throughput keys first (the headline number an operator wants live).
    for (k, v) in r.metrics.iter() {
        let lk = k.to_ascii_lowercase();
        if let Some(n) = v.as_f64() {
            if lk.ends_with("ops_sec") || lk == "ops_per_sec" {
                return format!("{} ops/s", trim_num(n));
            }
            if lk.ends_with("_mbs") || lk.ends_with("mbps") || lk == "mb_per_sec" {
                return format!("{} MB/s", trim_num(n));
            }
        }
    }
    // Otherwise: first numeric metric as `key=value`.
    for (k, v) in r.metrics.iter() {
        if let Some(n) = v.as_f64() {
            return format!("{k}={}", trim_num(n));
        }
    }
    String::new()
}

/// Render a metric value compactly: integers without a trailing `.0`, others to
/// two decimals.
fn trim_num(n: f64) -> String {
    if n.fract() == 0.0 && n.abs() < 1e15 {
        format!("{}", n as i64)
    } else {
        format!("{n:.2}")
    }
}

/// Run the resolved registry once, sequentially, collecting one [`BenchResult`]
/// per success and one [`super::TestOutcome`] per bencher (red on error). With
/// `stop_on_error`, the first failure aborts the remaining stages. Shared by
/// the normal and single-core passes so both behave identically.
fn run_pass(
    resolved: &[(i32, &'static dyn Bencher)],
    stop_on_error: bool,
) -> (Vec<BenchResult>, Vec<super::TestOutcome>) {
    use super::progress;
    let mut results: Vec<BenchResult> = Vec::new();
    let mut tests: Vec<super::TestOutcome> = Vec::new();
    // `_st` pass sets this flag (see `run_single_core_pass`); the `mt`/`st`
    // label in the progress line follows it so both passes are distinguishable
    // in the trail.
    let single_core = pinned_to_single_core();
    for (_order, b) in resolved {
        let id = b.id().to_string();
        // Name the bench *before* it runs so the operator (and the viz
        // action-log overlay) sees which one is currently churning.
        progress::emit(&progress::starting_line(&id, single_core));
        // Start the CPU/mem sampler for *this* unit. It runs on its own thread
        // reading /proc at 1 Hz — non-perturbing — and is stopped the instant
        // the bench returns, so each row's telemetry is scoped to its own work.
        let sampler = super::telemetry::Sampler::start();
        let start = std::time::Instant::now();
        let outcome = b.run();
        let elapsed = start.elapsed();
        let telem = sampler.stop();
        match outcome {
            Ok(mut r) => {
                // Fold the sampler's min/avg/max into the result's metric map so
                // it rides the stdout-JSON hop into nornir and lands in the
                // warehouse (lifted out into `bench_telemetry` there).
                super::telemetry::inject_into_metrics(&mut r.metrics, &telem);
                progress::emit(&progress::done_line_with_telem(
                    &id,
                    single_core,
                    &summarize_metrics(&r),
                    elapsed,
                    telem.cores_busy_max,
                    telem.n_cores,
                ));
                results.push(r);
                tests.push(super::TestOutcome {
                    name: id,
                    passed: true,
                    duration_ms: Some(elapsed.as_secs_f64() * 1000.0),
                    message: None,
                });
            }
            Err(e) => {
                let msg = format!("{e:#}");
                progress::emit(&progress::failed_line(&id, single_core, &msg, elapsed));
                tests.push(super::TestOutcome {
                    name: id,
                    passed: false,
                    duration_ms: Some(elapsed.as_secs_f64() * 1000.0),
                    message: Some(msg),
                });
                if stop_on_error {
                    break;
                }
            }
        }
    }
    (results, tests)
}

/// Run the registry a second time pinned to a single core, then relabel the
/// rows as single-core variants.
///
/// Pinning is best-effort and layered so it works with or without a thread
/// pool in the bencher:
/// 1. Set the process-global flag [`pinned_to_single_core`] returns, so a
///    cooperative `Bencher::run()` caps its own fan-out.
/// 2. Export `RAYON_NUM_THREADS=1` for benchers that build a rayon pool from
///    env (rayon reads it once on first pool init; harmless otherwise).
/// 3. Best-effort OS affinity to one CPU on Linux via `sched_setaffinity` (no
///    extra crate — a raw syscall), so even non-cooperative benchers and any
///    threads they spawn share one core, and `available_parallelism()` drops
///    toward 1. The CPU is `NORNIR_BENCH_PIN`'s value when numeric, else 0.
///    Set `NORNIR_BENCH_NO_AFFINITY` to skip the syscall (tests / when an outer
///    `taskset` already pins).
///
/// Each resulting [`BenchResult`] is renamed with a `_st` suffix and stamped
/// `cores=1`, and each [`super::TestOutcome`] name gets the same suffix so the
/// two passes never collide in the merged [`BenchRun`].
fn run_single_core_pass(
    resolved: &[(i32, &'static dyn Bencher)],
    stop_on_error: bool,
) -> (Vec<BenchResult>, Vec<super::TestOutcome>) {
    // SAFETY/order: set env before any bencher pool is (re)built.
    unsafe { std::env::set_var("RAYON_NUM_THREADS", "1") };
    // Which physical CPU to pin to: `NORNIR_BENCH_PIN=<n>` names it (default 0).
    let cpu = std::env::var("NORNIR_BENCH_PIN")
        .ok()
        .and_then(|v| v.trim().parse::<usize>().ok())
        .filter(|&n| n < 64) // single-u64 affinity mask
        .unwrap_or(0);
    pin_to_cpu_best_effort(cpu);
    PINNED_SINGLE_CORE.store(true, Ordering::Relaxed);

    let (results, tests) = run_pass(resolved, stop_on_error);

    PINNED_SINGLE_CORE.store(false, Ordering::Relaxed);

    let results = results
        .into_iter()
        .map(|mut r| {
            r.name = format!("{}_st", r.name);
            r.metrics.insert("cores".into(), serde_json::json!(1));
            r
        })
        .collect();
    let tests = tests
        .into_iter()
        .map(|mut t| {
            t.name = format!("{}_st", t.name);
            t
        })
        .collect();
    (results, tests)
}

/// Best-effort pin of the current process to a single CPU (Linux only). Uses a
/// raw `sched_setaffinity(2)` syscall so no `libc`/`core_affinity` dependency is
/// pulled in. Any failure (non-Linux, sandbox, EPERM, cpu out of range) is
/// silently ignored — the cooperative flag + `RAYON_NUM_THREADS` still
/// constrain well-behaved benchers.
fn pin_to_cpu_best_effort(cpu: usize) {
    // Escape hatch: skip the real affinity syscall (CI/tests that must not
    // leave the process pinned, or a wrapper like `taskset` already pinning).
    if std::env::var_os("NORNIR_BENCH_NO_AFFINITY").is_some() {
        return;
    }
    #[cfg(target_os = "linux")]
    {
        if cpu >= 64 {
            return; // single-u64 mask covers CPUs 0..63
        }
        // cpu_set_t is a bit array; bit `cpu` selects that CPU. The kernel only
        // reads `cpusetsize` bytes, so one u64 (8 bytes) expresses an affinity
        // of a single CPU in 0..63 on every Linux we target.
        let mask: u64 = 1u64 << cpu;
        // pid 0 == calling thread/process.
        unsafe {
            libc_syscall::syscall3(
                libc_syscall::SCHED_SETAFFINITY,
                0,
                std::mem::size_of::<u64>(),
                (&mask as *const u64) as usize,
            );
        }
    }
    #[cfg(not(target_os = "linux"))]
    {
        let _ = cpu;
    }
}

/// Minimal raw-syscall shim for the one call we need (`sched_setaffinity`) so
/// the single-core pass needs no `libc` dependency. Linux x86-64 / aarch64.
#[cfg(target_os = "linux")]
mod libc_syscall {
    pub type Nr = i64;

    // sched_setaffinity syscall number: 203 on x86-64, 122 on aarch64.
    #[cfg(target_arch = "x86_64")]
    pub const SCHED_SETAFFINITY: Nr = 203;
    #[cfg(target_arch = "aarch64")]
    pub const SCHED_SETAFFINITY: Nr = 122;
    // Fallback for other arches: a clearly-invalid number so the syscall fails
    // harmlessly (ENOSYS) and we fall back to cooperative pinning only.
    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
    pub const SCHED_SETAFFINITY: Nr = -1;

    /// `syscall(nr, a, b, c)` — return value ignored (best-effort pin).
    ///
    /// # Safety
    /// Caller must pass arguments valid for the given syscall number. Here it
    /// is only ever invoked with `SCHED_SETAFFINITY` and a live `&u64` mask.
    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
    pub unsafe fn syscall3(nr: Nr, a: usize, b: usize, c: usize) -> isize {
        let ret: isize;
        #[cfg(target_arch = "x86_64")]
        unsafe {
            core::arch::asm!(
                "syscall",
                inlateout("rax") nr as isize => ret,
                in("rdi") a,
                in("rsi") b,
                in("rdx") c,
                lateout("rcx") _,
                lateout("r11") _,
                options(nostack, preserves_flags),
            );
        }
        #[cfg(target_arch = "aarch64")]
        unsafe {
            core::arch::asm!(
                "svc 0",
                in("x8") nr as isize,
                inlateout("x0") a => ret,
                in("x1") b,
                in("x2") c,
                options(nostack, preserves_flags),
            );
        }
        ret
    }

    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
    pub unsafe fn syscall3(_nr: Nr, _a: usize, _b: usize, _c: usize) -> isize {
        -1
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::sync::Mutex;

    // Serialises the env/global-state-mutating tests below; cargo runs tests
    // in parallel and `NORNIR_BENCH_*` / `PINNED_SINGLE_CORE` are process-wide.
    static ENV_GUARD: Mutex<()> = Mutex::new(());

    struct Demo;
    impl Bencher for Demo {
        fn id(&self) -> &'static str { "demo.always_42" }
        fn run(&self) -> Result<BenchResult> {
            let mut metrics = serde_json::Map::new();
            metrics.insert("answer".into(), serde_json::json!(42));
            Ok(BenchResult { name: "demo".into(), metrics })
        }
    }
    crate::register_bench!(Demo);

    #[test]
    fn registry_includes_demo() {
        let ids: Vec<&'static str> =
            inventory::iter::<BencherRegistration>().map(|r| (r.make)().id()).collect();
        assert!(ids.contains(&"demo.always_42"), "registry missing demo: {ids:?}");
    }

    /// A bencher that reports whether it observed the single-core pin flag, so
    /// we can assert the runner sets it during the `_st` pass and clears it
    /// after. `cores` it returns is what `pinned_to_single_core()` saw.
    struct PinAware;
    impl Bencher for PinAware {
        fn id(&self) -> &'static str { "demo.pin_aware" }
        fn run(&self) -> Result<BenchResult> {
            let mut metrics = serde_json::Map::new();
            metrics.insert("saw_pin".into(), serde_json::json!(pinned_to_single_core()));
            metrics.insert("ops_sec".into(), serde_json::json!(100));
            Ok(BenchResult { name: "pin_aware".into(), metrics })
        }
    }

    #[test]
    fn single_core_pass_relabels_and_sets_flag() {
        let _g = ENV_GUARD.lock().unwrap();
        // Don't actually pin the test binary to one core.
        unsafe { std::env::set_var("NORNIR_BENCH_NO_AFFINITY", "1") };
        let b: &'static dyn Bencher = &PinAware;
        let resolved: Vec<(i32, &'static dyn Bencher)> = vec![(0, b)];

        // Normal pass: flag is clear, no `_st` suffix.
        let (normal, _) = run_pass(&resolved, false);
        assert_eq!(normal.len(), 1);
        assert_eq!(normal[0].name, "pin_aware");
        assert_eq!(normal[0].metrics["saw_pin"], serde_json::json!(false));

        // Single-core pass: flag is set during run, rows are `_st` + cores=1.
        let (st, st_tests) = run_single_core_pass(&resolved, false);
        assert_eq!(st.len(), 1);
        assert_eq!(st[0].name, "pin_aware_st");
        assert_eq!(st[0].metrics["saw_pin"], serde_json::json!(true));
        assert_eq!(st[0].metrics["cores"], serde_json::json!(1));
        assert_eq!(st_tests[0].name, "demo.pin_aware_st");
        assert!(st_tests[0].passed);

        // Flag is cleared after the pass so a later normal pass is unaffected.
        assert!(!pinned_to_single_core());
        unsafe { std::env::remove_var("NORNIR_BENCH_NO_AFFINITY") };
    }

    #[test]
    fn pass_request_env_parsing() {
        let _g = ENV_GUARD.lock().unwrap();
        // No env set → not requested.
        unsafe {
            std::env::remove_var("NORNIR_BENCH_PIN");
            std::env::remove_var("NORNIR_BENCH_ST");
        }
        assert!(!single_core_pass_requested());

        // `0` / empty are off; any other value is on.
        unsafe { std::env::set_var("NORNIR_BENCH_ST", "0") };
        assert!(!single_core_pass_requested());
        unsafe { std::env::set_var("NORNIR_BENCH_ST", "1") };
        assert!(single_core_pass_requested());
        unsafe {
            std::env::remove_var("NORNIR_BENCH_ST");
            std::env::set_var("NORNIR_BENCH_PIN", "0");
        }
        // PIN names a CPU; "0" is a valid CPU id and DOES request the pass
        // (the canonical `NORNIR_BENCH_PIN=0` from the design doc).
        assert!(single_core_pass_requested());
        unsafe { std::env::remove_var("NORNIR_BENCH_PIN") };
        assert!(!single_core_pass_requested());
    }
}