nornir 0.4.22

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
Documentation
//! Persist DWARF-derived facts to the warehouse (Introspect Phase 2a).
//!
//! DWARF *symbols* extracted from a built binary by
//! [`super::artifact::extract_symbols`] are serialized and snapshotted into the
//! uniform `dwarf_snapshots` + `dwarf_blobs` artifact tables, keyed by the
//! source git SHA — the same byte-for-byte blob mechanism the Tantivy index
//! uses ([`crate::index::snapshot`]). This flips those tables from schema-only
//! to populated, so the DWARF knowledge map is historized and time-travelable
//! like every other warehouse projection.
//!
//! Callgraph edges are Phase 2b (they extend [`DwarfFacts`]); rustdoc-JSON
//! `api` facts are a later phase.

use std::path::Path;

use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};

use super::artifact::{self, Symbol};
use super::callgraph_dwarf::{self, CallEdge};
use crate::index::snapshot::{restore_dir_from_iceberg, snapshot_dir_to_iceberg, SnapshotRef};
use crate::warehouse::iceberg::{IcebergWarehouse, TABLE_DWARF_BLOBS, TABLE_DWARF_SNAPSHOTS};

/// File name for the facts blob inside a DWARF snapshot dir.
const SYMBOLS_FILE: &str = "dwarf.json";

/// DWARF facts captured for one `(repo, git SHA)` from a built binary: the
/// function symbols (Phase 2a) and the inline-call edges (Phase 2b).
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DwarfFacts {
    pub symbols: Vec<Symbol>,
    /// Inline-call edges (`#[serde(default)]` so older symbols-only blobs still load).
    #[serde(default)]
    pub calls: Vec<CallEdge>,
}

impl DwarfFacts {
    /// Symbols whose demangled name contains `pattern` (see [`artifact::lookup`]).
    pub fn lookup(&self, pattern: &str) -> Vec<&Symbol> {
        artifact::lookup(&self.symbols, pattern)
    }

    /// Symbols defined in a source file ending with `suffix` (see [`artifact::defined_in`]).
    pub fn defined_in(&self, suffix: &str) -> Vec<&Symbol> {
        artifact::defined_in(&self.symbols, suffix)
    }

    /// Functions that call `name` (inline-call edges only).
    pub fn callers_of(&self, name: &str) -> Vec<String> {
        callgraph_dwarf::Callgraph::from_edges(&self.calls).callers_of(name)
    }

    /// Functions called by `name` (inline-call edges only).
    pub fn callees_of(&self, name: &str) -> Vec<String> {
        callgraph_dwarf::Callgraph::from_edges(&self.calls).callees_of(name)
    }

    /// Shortest inline-call chain `from` → `to`, or `None`.
    pub fn call_path(&self, from: &str, to: &str) -> Option<Vec<String>> {
        callgraph_dwarf::Callgraph::from_edges(&self.calls).path_between(from, to)
    }
}

/// Serialize `facts` into `cache_dir` and snapshot it into the `dwarf_*` tables,
/// keyed by `git_sha`. Idempotent per `(repo, git_sha, schema_hash)` — the
/// underlying snapshot helper returns the existing snapshot on a content match.
pub fn snapshot_facts(
    wh: &IcebergWarehouse,
    workspace: &str,
    repo: &str,
    git_sha: &str,
    branch: &str,
    facts: &DwarfFacts,
    cache_dir: &Path,
) -> Result<SnapshotRef> {
    std::fs::create_dir_all(cache_dir)
        .with_context(|| format!("create dwarf cache dir {}", cache_dir.display()))?;
    let json = serde_json::to_vec_pretty(facts).context("serialize dwarf facts")?;
    let path = cache_dir.join(SYMBOLS_FILE);
    std::fs::write(&path, &json).with_context(|| format!("write {}", path.display()))?;
    snapshot_dir_to_iceberg(
        wh,
        TABLE_DWARF_SNAPSHOTS,
        TABLE_DWARF_BLOBS,
        workspace,
        repo,
        git_sha,
        branch,
        cache_dir,
    )
}

/// Extract DWARF symbols from `binary_path` and snapshot them — convenience
/// wrapper over [`snapshot_facts`]. `workspace_root` scopes which symbols are
/// kept (those whose source file lives inside it).
pub fn snapshot_dwarf(
    wh: &IcebergWarehouse,
    workspace: &str,
    repo: &str,
    git_sha: &str,
    branch: &str,
    binary_path: &Path,
    workspace_root: &Path,
    cache_dir: &Path,
) -> Result<SnapshotRef> {
    let symbols = artifact::extract_symbols(binary_path, workspace_root)?;
    let calls = callgraph_dwarf::extract_callgraph(binary_path, workspace_root)?;
    let facts = DwarfFacts { symbols, calls };
    snapshot_facts(wh, workspace, repo, git_sha, branch, &facts, cache_dir)
}

/// Restore the DWARF facts for `repo` (latest, or the snapshot pinned to `sha`)
/// from the warehouse into `into`, then deserialize them.
pub fn load_dwarf(
    wh: &IcebergWarehouse,
    repo: &str,
    sha: Option<&str>,
    into: &Path,
) -> Result<DwarfFacts> {
    restore_dir_from_iceberg(wh, TABLE_DWARF_SNAPSHOTS, TABLE_DWARF_BLOBS, repo, sha, into)?;
    let path = into.join(SYMBOLS_FILE);
    let bytes = std::fs::read(&path).with_context(|| format!("read restored {}", path.display()))?;
    serde_json::from_slice(&bytes).context("deserialize dwarf facts")
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::introspect::callgraph_dwarf::CallKind;

    fn edge(caller: &str, callee: &str) -> CallEdge {
        CallEdge { caller: caller.to_string(), callee: callee.to_string(), kind: CallKind::Inline }
    }

    fn sym(name: &str, file: &str, krate: &str, line: u32) -> Symbol {
        Symbol {
            name: name.to_string(),
            name_demangled: name.to_string(),
            name_mangled: format!("_ZN{name}"),
            file: file.to_string(),
            line: Some(line),
            size_bytes: Some(42),
            krate: krate.to_string(),
        }
    }

    /// Round-trip: snapshot synthetic DWARF facts into the `dwarf_*` tables and
    /// load them back — proves persistence + read-back without needing a binary.
    #[test]
    fn dwarf_facts_round_trip_through_warehouse() {
        let root = tempfile::tempdir().expect("tempdir");
        let wh = IcebergWarehouse::open(&root.path().join(".nornir/warehouse"))
            .expect("open warehouse");

        let facts = DwarfFacts {
            symbols: vec![
                sym("foo::Bar::new", "holger/src/bar.rs", "holger", 10),
                sym("foo::baz", "holger/src/lib.rs", "holger", 20),
            ],
            calls: vec![edge("foo::Bar::new", "foo::baz")],
        };
        let sha = "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef";
        let cache = root.path().join(".nornir/cache/dwarf/holger");
        let snap = snapshot_facts(&wh, "ws_t", "holger", sha, "main", &facts, &cache)
            .expect("snapshot dwarf facts");
        assert!(snap.blob_count > 0, "expected at least one blob");
        assert_eq!(snap.git_sha, sha);

        let into = root.path().join("restore/holger");
        let got = load_dwarf(&wh, "holger", None, &into).expect("load dwarf facts");
        assert_eq!(got.symbols.len(), 2);
        assert_eq!(got.lookup("Bar").len(), 1, "lookup by substring");
        assert_eq!(got.defined_in("lib.rs").len(), 1, "defined_in by suffix");
        // Callgraph round-trips too.
        assert_eq!(got.callees_of("foo::Bar::new"), vec!["foo::baz"], "callees");
        assert_eq!(got.callers_of("foo::baz"), vec!["foo::Bar::new"], "callers");
    }

    /// Pinning to a specific SHA restores that snapshot, not the latest.
    #[test]
    fn dwarf_load_pins_to_sha() {
        let root = tempfile::tempdir().expect("tempdir");
        let wh = IcebergWarehouse::open(&root.path().join(".nornir/warehouse"))
            .expect("open warehouse");
        let cache = root.path().join(".nornir/cache/dwarf/r");

        let sha1 = "1111111111111111111111111111111111111111";
        let f1 = DwarfFacts {
            symbols: vec![sym("a::one", "r/src/a.rs", "r", 1)],
            ..Default::default()
        };
        snapshot_facts(&wh, "ws", "r", sha1, "main", &f1, &cache).expect("snap1");

        let sha2 = "2222222222222222222222222222222222222222";
        let f2 = DwarfFacts {
            symbols: vec![sym("a::one", "r/src/a.rs", "r", 1), sym("a::two", "r/src/a.rs", "r", 2)],
            ..Default::default()
        };
        snapshot_facts(&wh, "ws", "r", sha2, "main", &f2, &cache).expect("snap2");

        let into = root.path().join("restore");
        let got1 = load_dwarf(&wh, "r", Some(sha1), &into.join("s1")).expect("load sha1");
        assert_eq!(got1.symbols.len(), 1, "sha1 snapshot has one symbol");
        let got_latest = load_dwarf(&wh, "r", None, &into.join("latest")).expect("load latest");
        assert_eq!(got_latest.symbols.len(), 2, "latest snapshot has two symbols");
    }
}