nornir 0.2.0

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
Documentation
//! Loader for the consumer's `nornir.toml`.
//!
//! Discovery rule (used by the CLI when no explicit path is given):
//! walk up from `cwd` looking for `workspace_holger/release/nornir.toml`.
//! The discovered file's *grandparent of the grandparent* is the
//! **workspace root** — the dir containing `workspace_holger/`,
//! `holger/`, `znippy/`, etc. All relative paths inside `nornir.toml`
//! (including `[guard].forbidden`) are interpreted against that root.

use std::collections::BTreeMap;
use std::path::{Path, PathBuf};

use anyhow::{anyhow, Context, Result};
use serde::{Deserialize, Serialize};

#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct Nornir {
    #[serde(default)]
    pub guard: Guard,
    #[serde(default)]
    pub storage: Storage,
    #[serde(default)]
    pub repo: BTreeMap<String, Repo>,
}

#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct Guard {
    #[serde(default)]
    pub forbidden: Vec<String>,
}

#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct Storage {
    /// `"local"` (default) or `"remote"`. Empty string also = local.
    #[serde(default)]
    pub kind: String,
    /// Workspace-root-relative dir holding `warehouse/` and `cache/`.
    /// Defaults to `workspace_holger/.nornir` when empty.
    #[serde(default)]
    pub local_path: String,
    /// Flight endpoint URL when `kind = "remote"`.
    #[serde(default)]
    pub remote_url: String,
}

#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct Repo {
    #[serde(default)] pub remote: String,
    #[serde(default)] pub history: String,
    #[serde(default)] pub readme: String,
    #[serde(default)] pub publish_order: Vec<Vec<String>>,
    #[serde(default)] pub gates: Gates,
    #[serde(default)] pub bench: BenchSpec,
}

#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct Gates {
    #[serde(default)] pub no_path_patches: bool,
    #[serde(default)] pub nexus_floor: bool,
    #[serde(default)] pub no_regression: bool,
    #[serde(default)] pub max_regression_pct: f64,
    #[serde(default)] pub integration_roundtrip: Vec<String>,
    #[serde(default)] pub docs_fresh: bool,
    /// Fail the release if any `[guard].forbidden` path drifted from the
    /// recorded manifest (perm re-grant or out-of-band content change).
    #[serde(default)] pub guard_intact: bool,
}

/// Per-repo bench contract.
///
/// **Corpus shape (decided 2026-05-31):**
///
/// 1. A *corpus* is identified by a flat string name (e.g. `text_500mb`,
///    `rust_crate_mt32`) listed in [`Self::required_results`]. The list
///    is **the schema** — every release-time `BenchRun` must produce at
///    least one [`crate::bench::BenchResult`] with each of these names.
///    Missing names → release gate fails *before* regression checks run.
/// 2. Result name format: `<corpus>[_<variant>]`, snake_case, ASCII.
///    Variants are free-form and meaningful only to the repo (e.g.
///    `_st` / `_mt32` for single- vs 32-thread variants in holger).
/// 3. Per-result `metrics` is a free-form JSON map — but cross-repo
///    rollups (Time-Travel viz, regression gate) only auto-discover
///    numeric metrics. Recommended (not enforced) suffix convention:
///      - throughput  → `*_mbs` (MiB/s)
///      - latency     → `*_ms`  or `*_us`
///      - ratios      → `*_pct`
///      - counts      → plain noun (`files`, `chunks`)
/// 4. Stability requirement: once a name lands in `required_results`,
///    it MUST keep producing comparable numbers across releases. Renames
///    are schema breaks and require a workspace.toml update.
///
/// **Environmental requirements:**
///
/// - [`Self::network_required`] — set `true` if the corpus pulls real
///   artifacts at test-time (e.g. ljar's maven_artifacts suite hits
///   Maven Central). When true the upcoming funnel `network_probe`
///   node must pass before `cargo_test` / `cargo_bench` run for this
///   repo; in offline mode the pipeline records `status=skipped_offline`
///   for these stages rather than failing them.
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct BenchSpec {
    #[serde(default)]
    pub required_results: Vec<String>,
    #[serde(default)]
    pub network_required: bool,
}

impl BenchSpec {
    /// Verify a `BenchRun` produces every required result name. Returns
    /// the sorted list of missing names (empty == ok).
    pub fn missing_in<'a>(&'a self, run: &crate::bench::BenchRun) -> Vec<&'a str> {
        let produced: std::collections::HashSet<&str> =
            run.results.iter().map(|r| r.name.as_str()).collect();
        let mut missing: Vec<&str> = self
            .required_results
            .iter()
            .map(|s| s.as_str())
            .filter(|n| !produced.contains(n))
            .collect();
        missing.sort();
        missing
    }

    /// Fail with a clear error if any required result is missing.
    pub fn validate(&self, run: &crate::bench::BenchRun) -> anyhow::Result<()> {
        let missing = self.missing_in(run);
        if !missing.is_empty() {
            anyhow::bail!(
                "bench corpus is missing {} required result(s): [{}]",
                missing.len(),
                missing.join(", ")
            );
        }
        Ok(())
    }
}

/// A loaded config plus the paths it was discovered through.
pub struct Loaded {
    pub nornir: Nornir,
    pub config_path: PathBuf,
    pub workspace_root: PathBuf,
}

impl Loaded {
    /// Canonical Iceberg warehouse directory for this workspace.
    /// Defaults to `workspace_holger/.nornir/warehouse` when
    /// `[storage].local_path` is empty, else
    /// `<workspace_root>/<local_path>/warehouse`.
    pub fn warehouse_root(&self) -> PathBuf {
        let storage = &self.nornir.storage;
        if storage.local_path.is_empty() {
            self.workspace_root.join("workspace_holger/.nornir/warehouse")
        } else {
            self.workspace_root.join(&storage.local_path).join("warehouse")
        }
    }
}

impl Nornir {
    pub fn load(path: &Path) -> Result<Self> {
        let text = std::fs::read_to_string(path)
            .with_context(|| format!("read {}", path.display()))?;
        toml::from_str(&text).with_context(|| format!("parse {}", path.display()))
    }

    /// Resolve the repo's filesystem path inside `workspace_root`
    /// (convention: `<workspace_root>/<repo_name>/`).
    pub fn repo_dir(workspace_root: &Path, name: &str) -> PathBuf {
        workspace_root.join(name)
    }
}

/// Discover `nornir.toml` by walking up from `start` looking for
/// `workspace_holger/release/nornir.toml`. Returns the loaded config
/// plus the resolved workspace root (parent of `workspace_holger`).
pub fn discover(start: &Path) -> Result<Loaded> {
    let mut cur = start
        .canonicalize()
        .unwrap_or_else(|_| start.to_path_buf());
    loop {
        let candidate = cur.join("workspace_holger/release/nornir.toml");
        if candidate.exists() {
            let nornir = Nornir::load(&candidate)?;
            return Ok(Loaded {
                nornir,
                config_path: candidate,
                workspace_root: cur,
            });
        }
        if !cur.pop() {
            return Err(anyhow!(
                "could not find workspace_holger/release/nornir.toml from {}",
                start.display()
            ));
        }
    }
}

/// Load from an explicit config path; workspace root = the two-up
/// ancestor (so `…/workspace_holger/release/nornir.toml` → `…/`).
pub fn load_explicit(config_path: &Path) -> Result<Loaded> {
    let nornir = Nornir::load(config_path)?;
    let workspace_root = config_path
        .parent()
        .and_then(Path::parent)
        .and_then(Path::parent)
        .ok_or_else(|| anyhow!("config path lacks grandparent dirs: {}", config_path.display()))?
        .to_path_buf();
    Ok(Loaded {
        nornir,
        config_path: config_path.to_path_buf(),
        workspace_root,
    })
}