nornir 0.5.0

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
//! Benchmark harness — project-agnostic.
//!
//! Provides a uniform run/result envelope plus an append-only JSONL
//! history. Repo-specific metrics live in [`BenchResult::metrics`] as
//! a free-form JSON object, so one envelope serves any project.

pub mod api;
pub mod assets;
pub mod history;
pub mod legacy;
pub mod progress;
pub mod telemetry;

use serde::{Deserialize, Serialize};

/// A single benchmark result. Repo-specific metrics live in `metrics`
/// so both holger (`holger_ops_sec` / `nexus_ops_sec`) and znippy
/// (`compress_mbs` / `decompress_mbs` / `files`) can share the same
/// run envelope.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchResult {
    pub name: String,
    #[serde(flatten)]
    pub metrics: serde_json::Map<String, serde_json::Value>,
}

/// Pass/fail outcome of a single test that ran alongside the bench.
///
/// `tests` lives next to `results` on a [`BenchRun`] so the no-regression
/// gate can fail a release both for scalar drops *and* for any test
/// flipping red. Optional fields (`duration_ms`, `message`) carry the
/// extra detail when the runner provides it (criterion / `cargo test`
/// JSON output etc.).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TestOutcome {
    pub name: String,
    pub passed: bool,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub duration_ms: Option<f64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub message: Option<String>,
}

/// One full bench run — one line in `bench_history.jsonl`.
///
/// Backward-compat fields:
/// - `version`, `machine`, `cores` are `#[serde(default)]` so legacy
///   entries (holger 2026-05-27 lines lacking machine/version/cores;
///   znippy lines lacking machine) deserialize cleanly. Going forward
///   all newly written runs must populate them — `history::append`
///   rejects empty `machine`.
/// - `date` (YYYY-MM-DD string) is retained for legacy compatibility.
///   New writers should also populate `timestamp` (RFC 3339, UTC) which
///   becomes the canonical time column in the Iceberg `bench_runs`
///   table once the warehouse phase lands.
/// - `tests` defaults to empty so existing JSONL parses; new runs are
///   expected to populate it.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchRun {
    pub date: String,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub timestamp: Option<String>,
    #[serde(default)]
    pub version: String,
    #[serde(default)]
    pub machine: String,
    #[serde(default)]
    pub cores: u32,
    pub results: Vec<BenchResult>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub tests: Vec<TestOutcome>,
}

impl BenchRun {
    pub fn find(&self, name: &str) -> Option<&BenchResult> {
        self.results.iter().find(|r| r.name == name)
    }

    /// True iff every test in `tests` passed. An empty `tests` vec is
    /// treated as "no tests run" → returns true.
    pub fn all_tests_passed(&self) -> bool {
        self.tests.iter().all(|t| t.passed)
    }

    /// Convenience: list the names of any failed tests.
    pub fn failed_tests(&self) -> Vec<&str> {
        self.tests
            .iter()
            .filter(|t| !t.passed)
            .map(|t| t.name.as_str())
            .collect()
    }
}

/// Which way is "better" for a metric — used by the `benches` doc renderer
/// to **bold the winning cell** in a comparison row (e.g. ours vs a legacy
/// tool). `High` = bigger is better (throughput), `Low` = smaller is better
/// (latency/time), `Neutral` = not a competition (counts, ratios) → never
/// bolded.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum MetricDirection {
    High,
    Low,
    Neutral,
}

/// Resolve a metric's direction: an explicit `overrides` entry wins (the
/// `benches` renderer fills this from a `best=metric:low` marker arg); otherwise
/// fall back to the unit implied by the metric name (the `_mbs` / `_ms` / …
/// convention — see [`unit_of`]).
pub fn direction_of(
    overrides: &std::collections::HashMap<String, MetricDirection>,
    metric: &str,
) -> MetricDirection {
    if let Some(d) = overrides.get(metric) {
        return *d;
    }
    unit_of(metric).map(|u| u.direction).unwrap_or(MetricDirection::Neutral)
}

/// A recognised metric unit and its natural direction. Two metric cells are
/// "comparable" (compete in a row, eligible for bolding) iff they share the
/// same `unit.name`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Unit {
    pub name: &'static str,
    pub direction: MetricDirection,
}

/// Recognise the trailing unit of a metric key. Covers both the
/// `<corpus>_<suffix>` convention (`_mbs`/`_ms`/`_pct`) and the longhand
/// holger-style keys (`mb_per_sec`/`ops_per_sec`/`seconds`). Returns `None`
/// for plain counts (`bytes`, `ops`, `files`) — those never bold.
pub fn unit_of(metric: &str) -> Option<Unit> {
    use MetricDirection::*;
    let m = metric.to_ascii_lowercase();
    // (suffix, unit-name, direction) — order matters: longest/most-specific first.
    const TABLE: &[(&str, &str, MetricDirection)] = &[
        ("mb_per_sec", "mbs", High),
        ("mbps", "mbs", High),
        ("_mbs", "mbs", High),
        ("gb_per_sec", "gbs", High),
        ("_gbs", "gbs", High),
        ("ops_per_sec", "ops_sec", High),
        ("ops_sec", "ops_sec", High),
        ("_ops_sec", "ops_sec", High),
        // generic throughput (rows_per_sec, commits_per_sec, …) — more is better.
        ("_per_sec", "per_sec", High),
        ("seconds", "secs", Low),
        ("_secs", "secs", Low),
        // bare seconds columns (build_s, query_s, total_s) — less is better.
        ("_s", "secs", Low),
        ("_ms", "ms", Low),
        ("_us", "us", Low),
        ("_ns", "ns", Low),
        ("_pct", "pct", Neutral),
        ("_x", "x", Neutral),
        ("speedup", "x", Neutral),
    ];
    for (suffix, name, dir) in TABLE {
        if m == *suffix || m.ends_with(suffix) {
            return Some(Unit { name, direction: *dir });
        }
    }
    None
}

#[cfg(test)]
mod direction_tests {
    use super::*;
    use std::collections::HashMap;

    #[test]
    fn unit_and_direction_from_name() {
        assert_eq!(unit_of("ljar_mbs").unwrap().name, "mbs");
        assert_eq!(unit_of("unzip_mbs").unwrap().direction, MetricDirection::High);
        assert_eq!(unit_of("mb_per_sec").unwrap().name, "mbs");
        assert_eq!(unit_of("holger_ops_sec").unwrap().name, "ops_sec");
        assert_eq!(unit_of("decode_ms").unwrap().direction, MetricDirection::Low);
        assert_eq!(unit_of("seconds").unwrap().direction, MetricDirection::Low);
        assert_eq!(unit_of("speedup_x").unwrap().direction, MetricDirection::Neutral);
        // plain counts have no unit → never bolded
        assert!(unit_of("bytes").is_none());
        assert!(unit_of("files").is_none());
    }

    #[test]
    fn explicit_override_wins() {
        let mut o = HashMap::new();
        o.insert("weird_metric".to_string(), MetricDirection::Low);
        assert_eq!(direction_of(&o, "weird_metric"), MetricDirection::Low);
        // unlisted → suffix fallback
        assert_eq!(direction_of(&o, "ljar_mbs"), MetricDirection::High);
        assert_eq!(direction_of(&o, "count"), MetricDirection::Neutral);
    }
}

/// `bench history-show` rendered as a uniform [`crate::cli_outcome::CommandOutcome`]
/// — the shared DATA FACE for the CLI (`run_bench`) and, by parity, the viz Bench
/// tab. `runs` arrive from the canonical source (warehouse `query_bench_runs` in fat
/// mode, the `Bench.History` RPC in thin mode); this fn only SHAPES + renders them,
/// it does not fetch (CommandOutcome is presentation, never a transport). Newest
/// first, optional `limit` cap. `ok ⟺ ≥1 run` (RAGNARÖK: an empty history is RED,
/// never silently green). AUT9 — see `.nornir/cli-command-contract.md`.
pub fn history_outcome(
    repo: &str,
    mut runs: Vec<BenchRun>,
    limit: usize,
) -> crate::cli_outcome::CommandOutcome {
    use crate::cli_outcome::CommandOutcome;
    // Newest first (timestamp if present, else date), then optional cap.
    runs.sort_by(|x, y| {
        let kx = x.timestamp.as_deref().unwrap_or(&x.date);
        let ky = y.timestamp.as_deref().unwrap_or(&y.date);
        ky.cmp(kx)
    });
    if limit > 0 {
        runs.truncate(limit);
    }
    if runs.is_empty() {
        return CommandOutcome::fail(
            "bench history-show",
            format!("no bench runs recorded for `{repo}`"),
        );
    }
    // The elaborate per-run summary the CLI used to print inline (now shared).
    let mut human = format!("{repo}{} run(s)", runs.len());
    for r in &runs {
        let date = if r.date.is_empty() { "-" } else { r.date.as_str() };
        let machine = if r.machine.is_empty() { "-" } else { r.machine.as_str() };
        human.push_str(&format!("\n\nv{} · {} · {} cores · {}", r.version, machine, r.cores, date));
        let mut scalars: Vec<String> = Vec::new();
        for res in &r.results {
            for (k, v) in &res.metrics {
                if let Some(f) = v.as_f64() {
                    scalars.push(format!("{}.{k}={f:.2}", res.name));
                }
            }
        }
        scalars.sort();
        if scalars.is_empty() {
            human.push_str("\n  metrics: (none)");
        } else {
            human.push_str(&format!("\n  metrics: {}", scalars.join("  ")));
        }
        if r.tests.is_empty() {
            human.push_str("\n  tests:   (none recorded)");
        } else {
            let passed = r.tests.iter().filter(|t| t.passed).count();
            let failed = r.tests.len() - passed;
            human.push_str(&format!("\n  tests:   {passed} passed, {failed} failed"));
            for t in r.tests.iter().filter(|t| !t.passed) {
                let msg = t.message.as_deref().unwrap_or("");
                human.push_str(&format!("\n{} {}", t.name, msg));
            }
        }
    }
    let data = serde_json::json!({ "repo": repo, "runs": runs });
    CommandOutcome::ok("bench history-show", data, human)
}

#[cfg(test)]
mod history_outcome_tests {
    use super::*;

    fn sample_run(date: &str, machine: &str) -> BenchRun {
        BenchRun {
            date: date.to_string(),
            timestamp: Some(format!("{date}T00:00:00Z")),
            version: "0.1.0".into(),
            machine: machine.into(),
            cores: 8,
            results: Vec::new(),
            tests: Vec::new(),
        }
    }

    #[test]
    fn empty_history_is_red_not_silently_green() {
        let o = history_outcome("holger", Vec::new(), 0);
        assert_eq!(o.command, "bench history-show");
        assert!(!o.is_sannr(), "empty bench history must be RED (RAGNARÖK)");
        assert!(o.human.contains("no bench runs"));
    }

    #[test]
    fn real_runs_are_sannr_newest_first_and_limited() {
        let runs = vec![sample_run("2026-06-01", "ryzen"), sample_run("2026-06-03", "epyc")];
        let o = history_outcome("holger", runs, 1);
        assert!(o.is_sannr(), "a real run is a true (sannr) outcome");
        let arr = o.data["runs"].as_array().unwrap();
        assert_eq!(arr.len(), 1, "limit=1 truncates");
        assert_eq!(arr[0]["machine"], serde_json::json!("epyc"), "newest (2026-06-03) first");
        assert_eq!(o.data["repo"], serde_json::json!("holger"));
    }
}