nornir 0.1.0

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
Documentation
//! Iceberg spec::Schema definitions mirroring `super::schema::*` Arrow
//! schemas. Field IDs are explicit and stable — append new fields with
//! a fresh, unused ID; never reuse, never renumber.

use std::sync::Arc;

use anyhow::Result;
use iceberg::spec::{NestedField, PrimitiveType, Schema, Type};

pub fn bench_runs() -> Result<Schema> {
    Ok(Schema::builder()
        .with_schema_id(0)
        .with_fields(vec![
            Arc::new(NestedField::required(1, "run_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(2, "repo", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(3, "ts_micros", Type::Primitive(PrimitiveType::Timestamptz))),
            Arc::new(NestedField::required(4, "date", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(5, "version", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(6, "machine", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(7, "cores", Type::Primitive(PrimitiveType::Int))),
        ])
        .build()?)
}

pub fn bench_results() -> Result<Schema> {
    Ok(Schema::builder()
        .with_schema_id(0)
        .with_fields(vec![
            Arc::new(NestedField::required(1, "run_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(2, "result_name", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(3, "metric_name", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(4, "metric_value", Type::Primitive(PrimitiveType::Double))),
        ])
        .build()?)
}

pub fn test_outcomes() -> Result<Schema> {
    Ok(Schema::builder()
        .with_schema_id(0)
        .with_fields(vec![
            Arc::new(NestedField::required(1, "run_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(2, "test_name", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(3, "passed", Type::Primitive(PrimitiveType::Boolean))),
            Arc::new(NestedField::optional(4, "duration_ms", Type::Primitive(PrimitiveType::Double))),
            Arc::new(NestedField::optional(5, "message", Type::Primitive(PrimitiveType::String))),
        ])
        .build()?)
}

pub fn dep_graph_edges() -> Result<Schema> {
    Ok(Schema::builder()
        .with_schema_id(0)
        .with_fields(vec![
            Arc::new(NestedField::required(1, "snapshot_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(2, "workspace_name", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(3, "ts_micros", Type::Primitive(PrimitiveType::Timestamptz))),
            Arc::new(NestedField::required(4, "from_repo", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(5, "to_repo", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(6, "via_crate", Type::Primitive(PrimitiveType::String))),
        ])
        .build()?)
}

/// One row per (release, repo). A release's full provenance — which
/// git SHAs of which repos went into it, against which dep-graph
/// snapshot, in which order, and with what gate outcome.
///
/// `gate_status` is per-repo so a partial release that only got
/// through `znippy` before tripping a gate on `holger` is fully
/// reconstructable.
///
/// `published_versions_csv` is `crate@version,crate@version` for the
/// crates this repo actually shipped (empty for dry-runs / failed
/// gates).
pub fn release_lineage() -> Result<Schema> {
    Ok(Schema::builder()
        .with_schema_id(0)
        .with_fields(vec![
            Arc::new(NestedField::required(1, "release_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(2, "workspace_name", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(3, "ts_micros", Type::Primitive(PrimitiveType::Timestamptz))),
            Arc::new(NestedField::required(4, "dep_graph_snapshot_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(5, "build_order_idx", Type::Primitive(PrimitiveType::Int))),
            Arc::new(NestedField::required(6, "repo", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(7, "git_sha", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(8, "git_branch", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(9, "git_dirty", Type::Primitive(PrimitiveType::Boolean))),
            Arc::new(NestedField::required(10, "gate_status", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(11, "tests_passed", Type::Primitive(PrimitiveType::Int))),
            Arc::new(NestedField::required(12, "tests_failed", Type::Primitive(PrimitiveType::Int))),
            Arc::new(NestedField::required(13, "dry_run", Type::Primitive(PrimitiveType::Boolean))),
            Arc::new(NestedField::required(14, "published_versions_csv", Type::Primitive(PrimitiveType::String))),
            // Time-machine pins: each release can attach immutable
            // derived artifacts captured at the same `git_sha`. Nullable
            // because (a) the table predates the time machine and
            // (b) not every artifact kind exists for every repo.
            Arc::new(NestedField::optional(15, "tantivy_snapshot_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(16, "dwarf_snapshot_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(17, "gimli_snapshot_id", Type::Primitive(PrimitiveType::String))),
        ])
        .build()?)
}

/// Funnel events: every mutation to the idea-intake DAG lands here as
/// a new row. Each Iceberg append produces a new snapshot, so the
/// table at snapshot T is the funnel state as of time T.
///
/// Schema is **wide** — one optional column per variant-specific field —
/// rather than a JSON blob, so SQL readers (DuckDB / PyIceberg) can
/// query the DAG directly without re-parsing.
///
/// Nested types in use:
///   * `refs`, `targets`, `produced_test_runs` → `list<string>`
///   * `produced_commits` → `list<struct{repo, sha}>`
///
/// `params_json` stays a string because `NodeAdded.params` is an
/// open-ended `serde_json::Map` with no fixed value type. If we ever
/// constrain it, promote to a typed map<string, *>.
pub fn funnel_events() -> Result<Schema> {
    use iceberg::spec::{ListType, StructType};

    // produced_commits element struct {repo, sha}
    let commit_struct = Type::Struct(StructType::new(vec![
        Arc::new(NestedField::required(101, "repo", Type::Primitive(PrimitiveType::String))),
        Arc::new(NestedField::required(102, "sha", Type::Primitive(PrimitiveType::String))),
    ]));

    let list_string = |elem_id: i32| {
        Type::List(ListType::new(Arc::new(NestedField::required(
            elem_id,
            "element",
            Type::Primitive(PrimitiveType::String),
        ))))
    };

    Ok(Schema::builder()
        .with_schema_id(0)
        .with_fields(vec![
            // Envelope
            Arc::new(NestedField::required(1, "event_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(2, "ts_micros", Type::Primitive(PrimitiveType::Timestamptz))),
            Arc::new(NestedField::required(3, "kind", Type::Primitive(PrimitiveType::String))),

            // Common id refs (any subset depending on kind)
            Arc::new(NestedField::optional(4, "idea_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(5, "plan_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(6, "node_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(7, "run_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(8, "from_node", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(9, "to_node", Type::Primitive(PrimitiveType::String))),

            // IdeaSubmitted
            Arc::new(NestedField::optional(10, "source", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(11, "text", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(12, "refs", list_string(112))),

            // IdeaTriaged / NodeStatusChanged / PlanStatusChanged / RunRecorded
            Arc::new(NestedField::optional(13, "decision", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(14, "node_status", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(15, "plan_status", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(16, "why", Type::Primitive(PrimitiveType::String))),

            // PlanCreated
            Arc::new(NestedField::optional(17, "summary", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(18, "planner", Type::Primitive(PrimitiveType::String))),

            // NodeAdded
            Arc::new(NestedField::optional(19, "node_kind", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(20, "targets", list_string(120))),
            Arc::new(NestedField::optional(21, "prompt_excerpt", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(22, "params_json", Type::Primitive(PrimitiveType::String))),

            // RunRecorded
            Arc::new(NestedField::optional(23, "ran_by", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(24, "outcome", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(25, "log_ref", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::optional(26, "produced_commits",
                Type::List(ListType::new(Arc::new(NestedField::required(126, "element", commit_struct)))))),
            Arc::new(NestedField::optional(27, "produced_test_runs", list_string(127))),
        ])
        .build()?)
}

// ─── Generic artifact blob tables (time machine) ────────────────────
//
// Uniform two-table shape used by every immutable derived artifact
// (Tantivy index, DWARF debuginfo, gimli unwind tables, …):
//
//   <artifact>_snapshots — metadata, one row per (repo, git_sha) capture
//   <artifact>_blobs     — raw bytes, one row per file inside the capture
//
// Restore = SELECT blobs WHERE snapshot_id=X → write each (filename,
// bytes) row to a tmpdir → open with the artifact's native loader.
// No re-tokenize, no rebuild — Tantivy / gimli mmap the bytes back as
// if they had been written by the original producer.
//
// Field IDs use disjoint ranges per artifact kind so future renames /
// merges stay safe:
//   tantivy_index_snapshots : 1–9
//   tantivy_index_blobs     : 1–5
//   (future artifacts re-use the same low IDs — each table is its own
//    schema, IDs don't collide across tables.)

/// Snapshot metadata table for any artifact kind.
pub fn artifact_snapshots() -> Result<Schema> {
    Ok(Schema::builder()
        .with_schema_id(0)
        .with_fields(vec![
            Arc::new(NestedField::required(1, "snapshot_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(2, "workspace", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(3, "repo", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(4, "git_sha", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(5, "branch", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(6, "ts_micros", Type::Primitive(PrimitiveType::Timestamptz))),
            Arc::new(NestedField::required(7, "schema_hash", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(8, "blob_count", Type::Primitive(PrimitiveType::Int))),
            Arc::new(NestedField::required(9, "total_bytes", Type::Primitive(PrimitiveType::Long))),
        ])
        .build()?)
}

/// Blob payload table for any artifact kind.
pub fn artifact_blobs() -> Result<Schema> {
    Ok(Schema::builder()
        .with_schema_id(0)
        .with_fields(vec![
            Arc::new(NestedField::required(1, "snapshot_id", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(2, "filename", Type::Primitive(PrimitiveType::String))),
            Arc::new(NestedField::required(3, "bytes", Type::Primitive(PrimitiveType::Binary))),
            Arc::new(NestedField::required(4, "byte_len", Type::Primitive(PrimitiveType::Int))),
            Arc::new(NestedField::required(5, "sha256", Type::Primitive(PrimitiveType::String))),
        ])
        .build()?)
}