nornir 0.4.46

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
//! Warehouse — append-only columnar store backing nornir's bench/symbol
//! history.
//!
//! The canonical backend is [`iceberg::IcebergWarehouse`] (Apache Iceberg via
//! `iceberg-rust` over the single-file **skade-katalog** redb catalogue).
//! Every derived fact is an Iceberg row keyed by git SHA. A `RemoteWarehouse`
//! (Arrow Flight to `nornir-server`, Phase 5) will reuse the same [`Warehouse`]
//! trait surface; the trait is the durable abstraction.

pub mod iceberg;
pub mod iceberg_schema;
pub mod dep_graph;
pub mod funnel;
pub mod release_events;
/// Iceberg writer/reader for the `clone_events` stream (PLAN #6) — per-member
/// clone/fetch/republish populate outcomes, readable by a thin viz/CLI client.
pub mod clone_events;
/// Iceberg writer/reader for the `airgap_events` DAG (EPIC AIRGAP / AIRGAP8) —
/// the durable twin of the lean `nornir-airgap` JSONL event sink.
pub mod airgap_events;
pub mod test_results;
pub mod test_inventory;
pub mod surface_coverage;
pub mod coverage;
pub mod viz_actions;
pub mod agent_model_runs;
pub mod codegen_judge;
pub mod blob_store;
/// Static extractor for the fn → warehouse-table access fact (AUT7 / EPIC ARCH
/// n-002): the syn pass + the accessor → table map.
pub mod access_scan;
/// Iceberg writer/reader for the `warehouse_access_edges` fact table.
pub mod warehouse_access;
/// Generative-LLM abstraction (EPIC #39): ONE `Generator` trait, three real
/// backends (candle / mistralrs / onnx) behind the `generator(spec)` factory,
/// plus `mock` and an off-by-default `ollama` client. Feeds the bake-off.
pub mod generator;

use std::path::Path;

use anyhow::Result;
use arrow::array::RecordBatch;
use uuid::Uuid;

use crate::bench::BenchRun;
use crate::config::Storage;

// Re-export skade's backend-neutral scan predicate so trait callers can build
// pushdown filters (`ScanFilter::eq("repo", repo)`, `is_in`, `range`) without
// naming any engine type. This is the durable predicate surface a future
// `SkadeWarehouse` / DuckDB backend implements identically; the Iceberg backend
// lowers it to an `iceberg::expr::Predicate` inside skade.
pub use skade::{Scalar, ScanFilter};

/// The durable storage seam for nornir's warehouse (bench/test/release/symbol
/// history, etc.). **Sync** by design — every method blocks internally on the
/// backend's own runtime (`IcebergWarehouse` owns a tokio `Runtime` and calls
/// `block_on`), so the ~150 sync call sites never have to become async.
///
/// Two layers:
///
///  1. A **generic Arrow core** ([`append_arrow`](Warehouse::append_arrow),
///     [`scan_arrow`](Warehouse::scan_arrow),
///     [`scan_filtered`](Warehouse::scan_filtered),
///     [`scan_limited`](Warehouse::scan_limited)) plus table/catalog lifecycle
///     ([`ensure_table`](Warehouse::ensure_table),
///     [`ensure_columns`](Warehouse::ensure_columns),
///     [`table_names`](Warehouse::table_names)). This is what the sibling
///     modules (`dep_graph`, `release_events`, `test_results`, …) need: build a
///     `RecordBatch` from `iceberg_schema::*` and append / scan it by table
///     name, without reaching for the raw catalog. Backends only have to make
///     these honest and every typed method composes over them.
///
///  2. **Named per-table convenience methods** ([`append_bench_run`] /
///     [`query_bench_runs`]) kept as thin wrappers so existing callers are
///     untouched. (P1 keeps just the two that were already on the trait; the
///     rest stay as inherent `IcebergWarehouse` methods until the next phase
///     migrates consumers onto the trait.)
///
/// [`IcebergWarehouse`](iceberg::IcebergWarehouse) is the **default** (and
/// today the only) implementation: it delegates each method to its existing
/// inherent body, which already wraps skade (`skade::append` / `read_*` /
/// `ingest_parallel`). A future `SkadeWarehouse` (reusing a `skade::Warehouse`
/// + `Table` handle) or a DuckDB backend plugs in behind this same trait — see
/// `.nornir/warehouse-trait-skade-migration.md` for the P2/P3/P4 plan.
///
/// `iceberg_schema.rs` remains the per-table shape source of truth; the trait
/// is table-name + `RecordBatch` based and stays schema-agnostic.
pub trait Warehouse: Send + Sync {
    // ── generic Arrow core ──────────────────────────────────────────────────

    /// Append one Arrow [`RecordBatch`] to `table`, evolving the table's stored
    /// schema forward (add-column) if `batch` carries columns the table lacks.
    /// The batch must match the table's `iceberg_schema::*` shape.
    fn append_arrow(&self, table: &str, batch: RecordBatch) -> Result<()>;

    /// Full-snapshot scan of `table` → Arrow, column order preserved (so
    /// positional downcasts stay valid).
    fn scan_arrow(&self, table: &str) -> Result<Vec<RecordBatch>>;

    /// Pushdown scan: prune to rows matching `filter` (file / row-group
    /// granularity — keep a residual per-row guard for exactness) and project
    /// `columns` (empty = all columns, order preserved). Returns an empty result
    /// when the table does not exist yet.
    fn scan_filtered(
        &self,
        table: &str,
        filter: &ScanFilter,
        columns: &[&str],
    ) -> Result<Vec<RecordBatch>>;

    /// Limit / early-break scan: stop once `max_rows` rows are in hand instead
    /// of materializing the whole table (`max_rows == 0` → full scan). Returns
    /// *some* `max_rows` rows, not a deterministic top-N (no ordering guarantee).
    fn scan_limited(&self, table: &str, max_rows: usize) -> Result<Vec<RecordBatch>>;

    // ── table / catalog lifecycle ───────────────────────────────────────────

    /// Create `table` if missing, partitioned by `partition_cols` (identity
    /// transform; empty = unpartitioned). No-op when the table already exists.
    fn ensure_table(
        &self,
        table: &str,
        schema: ::iceberg::spec::Schema,
        partition_cols: &[&str],
    ) -> Result<()>;

    /// Evolve `table`'s stored schema to `canonical` (Iceberg add-column
    /// migration) if it is missing any of `canonical`'s top-level columns.
    /// No-op when the table already carries every canonical column.
    fn ensure_columns(&self, table: &str, canonical: &::iceberg::spec::Schema) -> Result<()>;

    /// Every table name in the warehouse namespace, sorted (catalog metadata
    /// only, no data read).
    fn table_names(&self) -> Result<Vec<String>>;

    // ── named per-table convenience wrappers (thin) ─────────────────────────

    fn append_bench_run(&self, repo: &str, run: &BenchRun) -> Result<Uuid>;
    fn query_bench_runs(&self, filter: &BenchFilter) -> Result<Vec<BenchRun>>;
}

#[derive(Debug, Default, Clone)]
pub struct BenchFilter {
    pub repo: Option<String>,
    pub machine: Option<String>,
    pub limit: Option<usize>,
}

impl BenchFilter {
    pub fn for_repo(repo: impl Into<String>) -> Self {
        Self { repo: Some(repo.into()), machine: None, limit: None }
    }
}

/// Open the configured warehouse. Returns a boxed trait object so
/// callers don't bake an impl into their signatures. All local storage
/// kinds resolve to the Iceberg backend; only `remote` is distinct.
pub fn open(storage: &Storage, workspace_root: &Path) -> Result<Box<dyn Warehouse>> {
    match storage.kind.as_str() {
        "" | "local" | "iceberg" => {
            let root = warehouse_root(storage, workspace_root);
            Ok(Box::new(iceberg::IcebergWarehouse::open(&root)?))
        }
        "remote" => anyhow::bail!("remote warehouse not yet implemented (Phase 5)"),
        other => anyhow::bail!("unknown storage.kind: {other}"),
    }
}

/// Open the configured warehouse for **read-only** use, tolerating a live
/// `nornir-server` that already holds the exclusive redb lock on
/// `catalog.redb`. When the catalog is locked, this opens a copied-aside
/// read-only snapshot (logged with a WARNING) instead of hard-failing, so a
/// dev-side `nornir` CLI read (the `docs_fresh` gate, docs render, etc.) can
/// never be wedged by the running server. Mutating callers must use
/// [`open`] — the snapshot is read-only by construction.
pub fn open_read_only(storage: &Storage, workspace_root: &Path) -> Result<Box<dyn Warehouse>> {
    match storage.kind.as_str() {
        "" | "local" | "iceberg" => {
            let root = warehouse_root(storage, workspace_root);
            Ok(Box::new(iceberg::IcebergWarehouse::open_read_only(&root)?))
        }
        "remote" => anyhow::bail!("remote warehouse not yet implemented (Phase 5)"),
        other => anyhow::bail!("unknown storage.kind: {other}"),
    }
}

/// Resolve the on-disk warehouse root for a local storage config.
///
/// Precedence (must match `config::Loaded::warehouse_root` and the server's
/// resolution in `bin/nornir-server.rs`):
///   1. explicit `[storage].local_path` → `<workspace_root>/<local_path>/warehouse`
///      (the repo-/workspace-local home; the recommended default for a CLI),
///   2. otherwise the home-derived `<home>/.nornir/warehouse` default
///      (`config::warehouse_default_root`), which the live server also defaults
///      to — a collision risk, hence reads route through [`open_read_only`].
fn warehouse_root(storage: &Storage, workspace_root: &Path) -> std::path::PathBuf {
    if storage.local_path.is_empty() {
        crate::config::warehouse_default_root()
    } else {
        workspace_root.join(&storage.local_path).join("warehouse")
    }
}