nornir 0.4.45

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
//! Change detection — the Urðr ↔ Verðandi diff.
//!
//! **Urðr** (the past) is what nornir last *recorded* shipped: the git
//! SHA per repo in the most recent release-lineage row. **Verðandi**
//! (the present) is the live working tree: each repo's current `HEAD`
//! SHA, read purely in-process via `gix` (no `git` subprocess).
//!
//! `detect` diffs the two to find which repos moved, then expands that
//! through the dependency graph into the **blast radius** — the set the
//! release/bench pipeline must re-run, in build order. This is the data
//! the dependency-Mímir MCP tool `changed_since_last_release` serves so
//! a small local model can ask "what do I need to rebuild?" instead of
//! reasoning over the whole graph itself.

use std::collections::BTreeMap;

use anyhow::{Context, Result};
use serde::Serialize;

use crate::warehouse::dep_graph::WorkspaceGraph;
use crate::warehouse::iceberg::IcebergWarehouse;

/// The result of an Urðr↔Verðandi diff.
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct ChangeSet {
    /// Repos whose current `HEAD` differs from the last recorded SHA,
    /// or that have never been recorded (brand-new).
    pub changed: Vec<String>,
    /// `changed` ∪ everything that transitively depends on them, in
    /// build order (dependencies first). The re-run / invalidation set.
    pub affected: Vec<String>,
    /// Full workspace build order, for context.
    pub build_order: Vec<String>,
}

/// Diff the last recorded release SHAs (Urðr) against the live working
/// trees (Verðandi) and expand the moved repos into their blast radius.
pub async fn detect(
    wh: &IcebergWarehouse,
    graph: &WorkspaceGraph,
    workspace_name: &str,
) -> Result<ChangeSet> {
    // Urðr: SHA per repo from the most recent recorded release.
    let history = crate::release::pipeline::query_release_history(wh, workspace_name, Some(1))
        .await
        .context("read release history (Urðr)")?;
    let recorded: BTreeMap<String, String> = history
        .last()
        .map(|r| {
            r.repos
                .iter()
                .map(|rr| (rr.repo.clone(), rr.git.sha.clone()))
                .collect()
        })
        .unwrap_or_default();

    // Verðandi: current HEAD SHA per repo, read via gix (git-fallback).
    //
    // A member whose `root` has no `.git` checkout on disk (e.g. an SSH-only
    // member the monitor hasn't materialized yet, or an empty/unset root) is
    // *skipped* rather than fatal — one absent checkout must not dead-end the
    // whole Urðr↔Verðandi diff. It simply isn't reported as changed (we can't
    // read its live SHA), exactly as a vanished repo is treated in
    // `diff_changed`. Members WITH a checkout still hard-error on a real read
    // failure (corrupt repo), surfacing the problem instead of hiding it.
    let mut current: BTreeMap<String, String> = BTreeMap::new();
    for (name, facts) in &graph.facts {
        if !facts.root.join(".git").exists() {
            eprintln!(
                "nornir-change: skipping `{name}` — no checkout at {} (Verðandi)",
                facts.root.display()
            );
            continue;
        }
        let sha = crate::gitio::head_sha(&facts.root)
            .with_context(|| format!("read HEAD of `{name}` (Verðandi)"))?;
        current.insert(name.clone(), sha);
    }

    let changed = diff_changed(&recorded, &current);
    let affected = graph.affected_by_change(&changed);
    let build_order = graph.build_order().unwrap_or_default();
    Ok(ChangeSet { changed, affected, build_order })
}

/// Pure core: repos whose current SHA differs from the recorded SHA, or
/// that were never recorded. Sorted (BTreeMap iteration) for
/// determinism. Repos that vanished from the working tree (in `recorded`
/// but not `current`) are intentionally *not* reported as changed — they
/// no longer exist to rebuild.
pub fn diff_changed(
    recorded: &BTreeMap<String, String>,
    current: &BTreeMap<String, String>,
) -> Vec<String> {
    current
        .iter()
        .filter(|(name, sha)| recorded.get(*name).map_or(true, |prev| prev != *sha))
        .map(|(name, _)| name.clone())
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    fn map(pairs: &[(&str, &str)]) -> BTreeMap<String, String> {
        pairs.iter().map(|(k, v)| (k.to_string(), v.to_string())).collect()
    }

    #[test]
    fn unchanged_repo_is_not_reported() {
        let recorded = map(&[("a", "sha1"), ("b", "sha1")]);
        let current = map(&[("a", "sha1"), ("b", "sha1")]);
        assert!(diff_changed(&recorded, &current).is_empty());
    }

    #[test]
    fn moved_sha_is_reported() {
        let recorded = map(&[("a", "sha1"), ("b", "sha1")]);
        let current = map(&[("a", "sha2"), ("b", "sha1")]);
        assert_eq!(diff_changed(&recorded, &current), vec!["a".to_string()]);
    }

    #[test]
    fn never_recorded_repo_counts_as_changed() {
        let recorded = map(&[("a", "sha1")]);
        let current = map(&[("a", "sha1"), ("b", "sha9")]);
        assert_eq!(diff_changed(&recorded, &current), vec!["b".to_string()]);
    }

    #[test]
    fn empty_history_means_everything_changed() {
        let recorded = BTreeMap::new();
        let current = map(&[("a", "s"), ("b", "s")]);
        assert_eq!(
            diff_changed(&recorded, &current),
            vec!["a".to_string(), "b".to_string()]
        );
    }

    #[test]
    fn removed_repo_is_not_reported() {
        let recorded = map(&[("a", "sha1"), ("gone", "sha1")]);
        let current = map(&[("a", "sha1")]);
        assert!(diff_changed(&recorded, &current).is_empty());
    }
}

/// End-to-end inject-assert for the **monitored multi-repo** path: the
/// dependency-Mímir graph rebuilt from the warehouse's `dep_graph_edges`
/// (no `nornir-workspace.toml`, no `cargo metadata`) must populate each
/// member's real on-disk checkout `root` so live-HEAD change-detection and
/// build-order work — the `nordisk`-workspace bug where `RepoFacts.root`
/// was empty (dead-ending `read HEAD of \`facett\``) and `build_order`
/// returned `[]`.
#[cfg(test)]
mod warehouse_graph_e2e {
    use crate::release::pipeline::{
        persist_lineage, RepoGitState, RepoReleaseRecord,
    };
    use crate::warehouse::dep_graph::{record_dep_graph, CrossRepoEdge, WorkspaceGraph};
    use crate::warehouse::iceberg::IcebergWarehouse;
    use std::collections::BTreeMap;
    use std::path::{Path, PathBuf};

    /// Materialize a real git checkout at `<git_root>/<name>` with one file and
    /// one commit, returning the resulting HEAD SHA.
    fn make_repo(git_root: &Path, name: &str, body: &str) -> (PathBuf, String) {
        let root = git_root.join(name);
        crate::gitio::init(&root).expect("git init member");
        std::fs::write(root.join("Cargo.toml"), body).expect("write file");
        let sha = crate::gitio::commit_all(&root, "initial").expect("commit member");
        (root, sha)
    }

    fn edge(from: &str, to: &str, via: &str) -> CrossRepoEdge {
        CrossRepoEdge {
            from: from.to_string(),
            to: to.to_string(),
            via: [via.to_string()].into_iter().collect(),
        }
    }

    /// Build the warehouse-snapshot graph exactly as the server does for a
    /// monitored workspace, then assert (a) facts roots are the real checkout
    /// dirs (non-empty, exist), (b) `head_sha(&root)` succeeds for each,
    /// (c) `build_order()` returns ALL members topologically (deps before
    /// dependents), and (d) `detect` (the `changed_since_last_release` core)
    /// runs end-to-end with NO "read HEAD" error and reports the moved repo +
    /// its blast radius.
    #[test]
    fn warehouse_graph_populates_real_roots_and_build_order_and_change() {
        // Two tmp dirs: one for the warehouse, one for the member checkouts —
        // the latter stands in for `<registry_root>/<ws>/git`.
        let wh_dir = tempfile::tempdir().expect("wh tmp");
        let git_root_td = tempfile::tempdir().expect("git tmp");
        let git_root = git_root_td.path().to_path_buf();
        let ws = "nordisk";

        // Diamond-ish: app → lib → util (consumer → producer edges). The three
        // are real git checkouts under git_root.
        let (_app_root, app_sha) = make_repo(&git_root, "app", "[package]\nname=\"app\"\n");
        let (_lib_root, lib_sha) = make_repo(&git_root, "lib", "[package]\nname=\"lib\"\n");
        let (util_root, util_sha) = make_repo(&git_root, "util", "[package]\nname=\"util\"\n");
        let members = vec!["app".to_string(), "lib".to_string(), "util".to_string()];

        let wh = IcebergWarehouse::open(wh_dir.path()).expect("open warehouse");

        // Record the cross-repo edges into `dep_graph_edges` (what the monitor
        // persists on republish): app→lib, lib→util.
        let snapshot_graph = WorkspaceGraph::from_query_parts(
            BTreeMap::new(),
            vec![edge("app", "lib", "lib_c"), edge("lib", "util", "util_c")],
        );
        wh.block_on(record_dep_graph(&wh, ws, &snapshot_graph))
            .expect("record dep graph");

        // ── Rebuild the graph from the warehouse, threading the real git_root. ──
        let g = crate::mimir::build_graph_from_warehouse(&wh, ws, &members, &git_root)
            .expect("build_graph_from_warehouse")
            .expect("graph present (edges + members)");

        // (a) Every member's `root` is the real on-disk checkout dir.
        for m in &members {
            let f = g.facts.get(m).unwrap_or_else(|| panic!("facts for `{m}`"));
            assert_eq!(f.root, git_root.join(m), "`{m}` root must be the real checkout");
            assert!(!f.root.as_os_str().is_empty(), "`{m}` root must be non-empty");
            assert!(f.root.exists(), "`{m}` checkout dir must exist on disk");
            assert!(f.root.join(".git").exists(), "`{m}` must be a git repo");
        }

        // (b) head_sha succeeds against each populated root (the original bug:
        //     empty root → gix + git both fail → "read HEAD of `facett`").
        for (m, want) in [("app", &app_sha), ("lib", &lib_sha), ("util", &util_sha)] {
            let got = crate::gitio::head_sha(&g.facts[m].root)
                .unwrap_or_else(|e| panic!("head_sha for `{m}`: {e:#}"));
            assert_eq!(&got, want, "head_sha for `{m}` must match the committed SHA");
        }

        // (c) build_order returns ALL members, dependencies first.
        let order = g.build_order().expect("build_order from warehouse edges");
        assert_eq!(
            order.iter().cloned().collect::<std::collections::BTreeSet<_>>(),
            members.iter().cloned().collect(),
            "build_order must contain every member (was [] before the fix)"
        );
        let pos = |n: &str| order.iter().position(|x| x == n).unwrap();
        assert!(pos("util") < pos("lib"), "util (dep) before lib");
        assert!(pos("lib") < pos("app"), "lib (dep) before app");

        // (d) Seed Urðr (a recorded release at the current SHAs), then MOVE
        //     `util`'s HEAD and assert `detect` runs end-to-end and reports the
        //     moved repo + its transitive dependents — no "read HEAD" error.
        let records: Vec<RepoReleaseRecord> = [
            ("util", &util_sha, 0usize),
            ("lib", &lib_sha, 1),
            ("app", &app_sha, 2),
        ]
        .into_iter()
        .map(|(repo, sha, idx)| RepoReleaseRecord {
            repo: repo.to_string(),
            build_order_idx: idx,
            git: RepoGitState { sha: (*sha).clone(), branch: "main".into(), dirty: false },
            gate_status: "succeeded".into(),
            tests_passed: 0,
            tests_failed: 0,
            published_versions: vec![],
            tantivy_snapshot_id: None,
            dwarf_snapshot_id: None,
        })
        .collect();
        wh.block_on(persist_lineage(&wh, uuid::Uuid::new_v4(), ws, &uuid::Uuid::nil(), &records, true))
            .expect("seed release lineage (Urðr)");

        // Nothing moved yet → no changes, but detect still runs cleanly.
        let cs0 = wh.block_on(super::detect(&wh, &g, ws)).expect("detect (no change)");
        assert!(cs0.changed.is_empty(), "no repo moved → empty changed set: {:?}", cs0.changed);
        assert_eq!(
            cs0.build_order.iter().cloned().collect::<std::collections::BTreeSet<_>>(),
            members.iter().cloned().collect(),
            "detect must carry the full build order through the warehouse graph"
        );

        // Move `util` (new commit → new HEAD). Rebuild the graph so its facts
        // re-read the live HEAD.
        std::fs::write(util_root.join("NEW.txt"), b"moved\n").expect("add file");
        let util_sha2 = crate::gitio::commit_all(&util_root, "move util").expect("recommit util");
        assert_ne!(util_sha2, util_sha, "util HEAD must have moved");

        let cs = wh.block_on(super::detect(&wh, &g, ws)).expect("detect (util moved)");
        assert_eq!(cs.changed, vec!["util".to_string()], "only util moved");
        // Blast radius of util = util ∪ {lib, app}, in build order.
        assert_eq!(
            cs.affected.iter().cloned().collect::<std::collections::BTreeSet<_>>(),
            members.iter().cloned().collect(),
            "a change to the shared leaf rebuilds every member"
        );
        let apos = |n: &str| cs.affected.iter().position(|x| x == n).unwrap();
        assert!(apos("util") < apos("lib") && apos("lib") < apos("app"), "affected in build order");
    }
}