nornir 0.5.1 - Docs.rs

//! Read-back queries over the persisted knowledge map (`symbol_facts`,
//! `call_edges`) in iceberg — the counterpart to [`super::scan_repo`]'s
//! writer. Lets an agent answer callers/callees/defined-in/symbol-lookup
//! over the **pure-Rust (syn) facts**, with no compiled binary required
//! (unlike DWARF `introspect`).
//!
//! Spike scope: always reads the *latest* snapshot for `repo` (max ts).
//! Uses predicate pushdown (`with_filter(repo == …)`) so the planner
//! skips other repos' data files instead of scanning the whole table.

use anyhow::{anyhow, Result};
use arrow::array::{Array, Int32Array, RecordBatch, StringArray};
use std::collections::{HashMap, HashSet};
use std::sync::OnceLock;
use znippy_zoomies::stree32::STree32;

use super::symbols::{CallEdgeRow, SymbolRow};
use crate::warehouse::iceberg::{
    discover_latest_snapshot, read_snapshot_batches, IcebergWarehouse, TABLE_CALL_EDGES,
    TABLE_SYMBOL_FACTS,
};

/// Last `::`-separated path segment of `s` (the bare identifier).
fn last_seg(s: &str) -> &str {
    s.rsplit("::").next().unwrap_or(s)
}

/// 32-bit **FNV-1a** hash of a key string. Deterministic and dependency-free —
/// the [`StreeIndex`] maps every inverted-index key to one of these so the
/// sorted-key [`STree32`] can route it. The stree's unused-slot sentinel is
/// `u32::MAX`, so a key that hashes to `u32::MAX` is nudged to `u32::MAX - 1`;
/// the string cross-check in [`StreeIndex::resolve`] keeps that remap correct
/// (it only ever *adds* a benign hash collision, never a wrong answer).
#[inline]
fn fnv1a32(s: &str) -> u32 {
    let mut h: u32 = 0x811c_9dc5;
    for &b in s.as_bytes() {
        h ^= b as u32;
        h = h.wrapping_mul(0x0100_0193);
    }
    if h == u32::MAX { u32::MAX - 1 } else { h }
}

/// **R2.1 "Ragnar" stree accelerator** — the sorted-key, SIMD + software-prefetch,
/// BATCHED-lookup index that sits *behind* the [`CallIndex`] HashMap path. It is a
/// cache-line Eytzinger S+tree (`stree32`, Ragnar Groot Koerkamp's static search
/// tree, proven on the OSM planet in the sibling `znippy-zoomies`), REUSED here —
/// not re-written — as the scale accelerator for `callers_of` / `callees_of`.
///
/// Built ONCE per loaded snapshot from an inverted-index keyset. A query name is
/// hashed to a `u32`; a whole batch of hashes is routed through the tree with
/// Ragnar's pipelined `batch_stream` (overlapping cache-miss latency across the
/// batch), then each hit is **cross-checked against the exact key string** in its
/// hash bucket. So a hash collision can never return the wrong posting list:
/// results are byte-identical to `HashMap::get(name)` — the tree changes only
/// *how* a key is located, never *what* is returned. The HashMap stays as the
/// single-lookup path and the cross-check / fallback oracle.
struct StreeIndex {
    /// Unique, ascending key-hashes; the [`STree32`] is built over exactly this.
    /// `find_exact`/`batch_stream` return indices INTO this slice (== bucket id).
    sorted_hashes: Vec<u32>,
    /// CSR bucket bounds into `entry_keys`/`entry_post`: the keys sharing the hash
    /// `sorted_hashes[i]` are the entries `bucket_off[i]..bucket_off[i + 1]`.
    bucket_off: Vec<u32>,
    /// Key strings grouped by hash bucket — the collision cross-check material.
    entry_keys: Vec<String>,
    /// Posting lists parallel to `entry_keys`: edge indices in ascending `calls`
    /// order, a clone of the matching [`CallIndex`] HashMap value (so the stree
    /// path yields the identical edge set + order as the HashMap path).
    entry_post: Vec<Vec<usize>>,
    /// The static search tree over `sorted_hashes`. `None` for an empty keyset
    /// (a tree cannot be built over zero keys) → callers fall back to the HashMap.
    tree: Option<STree32>,
}

impl std::fmt::Debug for StreeIndex {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("StreeIndex")
            .field("keys", &self.entry_keys.len())
            .field("buckets", &self.sorted_hashes.len())
            .field("built", &self.tree.is_some())
            .finish()
    }
}

impl StreeIndex {
    /// Build the stree over the keys of an inverted-index map. Keys are grouped
    /// by hash into ascending unique-hash buckets; within a bucket the keys are
    /// sorted for a reproducible layout (the cross-check is by exact string).
    fn build(keys: &HashMap<String, Vec<usize>>) -> Self {
        let mut by_hash: HashMap<u32, Vec<&String>> = HashMap::new();
        for k in keys.keys() {
            by_hash.entry(fnv1a32(k)).or_default().push(k);
        }
        let mut sorted_hashes: Vec<u32> = by_hash.keys().copied().collect();
        sorted_hashes.sort_unstable();

        let mut bucket_off = Vec::with_capacity(sorted_hashes.len() + 1);
        let mut entry_keys = Vec::with_capacity(keys.len());
        let mut entry_post = Vec::with_capacity(keys.len());
        bucket_off.push(0u32);
        for h in &sorted_hashes {
            let mut bucket = by_hash.remove(h).unwrap();
            bucket.sort_unstable();
            for k in bucket {
                entry_keys.push(k.clone());
                entry_post.push(keys[k].clone());
            }
            bucket_off.push(entry_keys.len() as u32);
        }

        let tree = (!sorted_hashes.is_empty()).then(|| STree32::new(&sorted_hashes));
        StreeIndex { sorted_hashes, bucket_off, entry_keys, entry_post, tree }
    }

    /// Resolve a routed bucket index + the original `name` to its posting list,
    /// scanning the (tiny) collision bucket for the EXACT key string.
    #[inline]
    fn resolve(&self, bucket: usize, name: &str) -> Option<&Vec<usize>> {
        let lo = self.bucket_off[bucket] as usize;
        let hi = self.bucket_off[bucket + 1] as usize;
        (lo..hi).find(|&e| self.entry_keys[e] == name).map(|e| &self.entry_post[e])
    }

    /// Single-key lookup through the tree (SIMD `find_exact` + string cross-check).
    /// Identical result to `HashMap::get(name)`. Used by the cross-check tests as
    /// the scalar-stree oracle (the production single-lookup path stays on the
    /// HashMap; the stree's role is the batched fast path).
    #[cfg(test)]
    #[inline]
    fn get(&self, name: &str) -> Option<&Vec<usize>> {
        let tree = self.tree.as_ref()?;
        let bucket = tree.find_exact(fnv1a32(name))?;
        self.resolve(bucket, name)
    }

    /// BATCHED lookup — the Ragnar fast path. Hashes the whole `names` batch, then
    /// routes every hash through the tree in one pipelined `batch_stream` pass
    /// (SIMD compares + software prefetch hiding memory latency across the batch),
    /// and cross-checks each hit by string. Returns one posting-list reference (or
    /// `None`) per input name, positionally — identical to `get` called per name.
    fn get_batch(&self, names: &[&str]) -> Vec<Option<&Vec<usize>>> {
        if self.tree.is_none() {
            return vec![None; names.len()];
        }
        let tree = self.tree.as_ref().unwrap();
        let hashes: Vec<u32> = names.iter().map(|n| fnv1a32(n)).collect();
        let routed = tree.batch_stream::<16>(&hashes);
        names
            .iter()
            .zip(routed)
            .map(|(name, hit)| hit.and_then(|bucket| self.resolve(bucket, name)))
            .collect()
    }
}

/// **R0.3 "Ragnar" inverted index** over a [`KnowledgeView`]'s `calls`, built
/// ONCE per loaded snapshot and reused across every `callers_of` / `callees_of`
/// / `call_path` call (memoized behind a [`OnceLock`]). Replaces the previous
/// O(N) linear `Vec` scans (`ends_with("::name")` + per-row allocs) and the
/// per-call adjacency-map rebuild with O(hits) posting-list lookups.
///
/// **R2.1** adds a sorted-key [`StreeIndex`] (Ragnar's `stree32`) behind the two
/// posting maps, built lazily on the first BATCHED query, for SIMD + prefetch
/// batched lookups at scale — cross-checked against these maps for identity.
///
/// **Correctness contract:** every lookup returns EXACTLY the same edge set,
/// in the same order, as the old linear scan (see the `*_matches_linear_scan`
/// tests). The posting lists hold edge indices in ascending `calls` order, so
/// iterating a list yields the matching edges in their original `calls` order.
#[derive(Debug, Default)]
struct CallIndex {
    /// `name` → indices of edges whose `callee_ident` matches `callers_of(name)`
    /// (i.e. `callee == name` OR `callee.ends_with("::name")`). Keyed by every
    /// segment-suffix of each `callee_ident` so a single `.get(name)` is exact.
    callee_keys: HashMap<String, Vec<usize>>,
    /// `name` → indices of edges whose `caller_path` matches `callees_of(name)`.
    caller_keys: HashMap<String, Vec<usize>>,
    /// Memoized BFS adjacency at identifier granularity: `last_seg(caller)` →
    /// `last_seg(callee)` in edge order (so `call_path` no longer rebuilds it).
    adj: HashMap<String, Vec<String>>,
    /// All nodes (last segments) seen as a caller or callee.
    nodes: HashSet<String>,
    /// R2.1 stree over `callee_keys` (the `callers_of` keyset), built on first
    /// batched query and memoized. Empty/`None`-tree ⇒ batch falls back.
    callee_stree: OnceLock<StreeIndex>,
    /// R2.1 stree over `caller_keys` (the `callees_of` keyset).
    caller_stree: OnceLock<StreeIndex>,
}

impl CallIndex {
    /// The memoized `callers_of` (callee-key) stree, built once from `callee_keys`.
    fn callee_stree(&self) -> &StreeIndex {
        self.callee_stree.get_or_init(|| StreeIndex::build(&self.callee_keys))
    }
    /// The memoized `callees_of` (caller-key) stree, built once from `caller_keys`.
    fn caller_stree(&self) -> &StreeIndex {
        self.caller_stree.get_or_init(|| StreeIndex::build(&self.caller_keys))
    }
}

impl CallIndex {
    /// Index every segment-suffix of `path` under edge index `i`.
    ///
    /// A query `name` matches `path` iff `path == name` OR `path.ends_with("::name")`.
    /// Because `::` only ever occurs as a path separator (identifiers cannot
    /// contain `:`), the set of matching `name`s is exactly: the full `path`
    /// plus each contiguous trailing segment-group. All such suffixes have
    /// distinct lengths, so no `(key, i)` pair is inserted twice — each edge
    /// appears at most once per posting list, matching the linear scan's
    /// once-per-edge semantics.
    fn index_suffixes(keys: &mut HashMap<String, Vec<usize>>, path: &str, i: usize) {
        keys.entry(path.to_string()).or_default().push(i);
        let mut rest = path;
        while let Some(pos) = rest.find("::") {
            rest = &rest[pos + 2..];
            keys.entry(rest.to_string()).or_default().push(i);
        }
    }

    /// Build the index in a single pass over `calls`, in `calls` order.
    fn build(calls: &[CallEdgeRow]) -> Self {
        let mut idx = CallIndex::default();
        for (i, e) in calls.iter().enumerate() {
            Self::index_suffixes(&mut idx.callee_keys, &e.callee_ident, i);
            Self::index_suffixes(&mut idx.caller_keys, &e.caller_path, i);
            let f = last_seg(&e.caller_path);
            let t = last_seg(&e.callee_ident);
            idx.adj.entry(f.to_string()).or_default().push(t.to_string());
            idx.nodes.insert(f.to_string());
            idx.nodes.insert(t.to_string());
        }
        idx
    }
}

/// Latest persisted symbols + calls for `repo`.
pub struct KnowledgeView {
    pub symbols: Vec<SymbolRow>,
    pub calls: Vec<CallEdgeRow>,
    /// Lazily-built, memoized inverted index over `calls` (R0.3). Private: it is
    /// pure derived state, rebuilt from `calls` on first query and never serialized.
    index: OnceLock<CallIndex>,
}

impl KnowledgeView {
    /// Construct a view from its `symbols` + `calls`. The inverted index starts
    /// empty and is built on the first call-graph query (see [`CallIndex`]).
    pub fn new(symbols: Vec<SymbolRow>, calls: Vec<CallEdgeRow>) -> Self {
        KnowledgeView { symbols, calls, index: OnceLock::new() }
    }

    /// The memoized inverted index, built once on first access from `calls`.
    fn index(&self) -> &CallIndex {
        self.index.get_or_init(|| CallIndex::build(&self.calls))
    }
}

/// Read-amplification audit for [`load_latest_audited`] — how many rows the query
/// actually decoded, split by phase. `data_rows` is the count that matters: the
/// FULL-WIDTH rows materialized into `SymbolRow`/`CallEdgeRow`. The old full-scan
/// path decoded every snapshot's rows here; the snapshot-pushdown path decodes
/// only the latest snapshot's. Surfaced for the FAIL-ON-BUG/PASS-ON-FIX test.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub(crate) struct ScanAudit {
    /// Rows read by the narrow (3-column) snapshot-discovery scan.
    pub discovery_rows: usize,
    /// FULL-WIDTH rows decoded into the view (`symbol_facts` + `call_edges`).
    pub data_rows: usize,
}

fn col<'a, T: 'static>(batch: &'a RecordBatch, name: &str) -> Result<&'a T> {
    batch
        .column_by_name(name)
        .ok_or_else(|| anyhow!("missing column `{name}`"))?
        .as_any()
        .downcast_ref::<T>()
        .ok_or_else(|| anyhow!("column `{name}` has unexpected type"))
}

/// Load the latest-snapshot symbols + calls for `repo` from iceberg.
pub fn load_latest(wh: &IcebergWarehouse, repo: &str) -> Result<KnowledgeView> {
    Ok(load_latest_audited(wh, repo)?.0)
}

/// [`load_latest`] plus a [`ScanAudit`] of how many rows were decoded.
///
/// Two-phase, byte-identical to the old full-scan path (same max-`ts_micros`
/// first-seen-wins snapshot pick, same residual `snapshot_id == snap` row guard,
/// same row order), but with the wide decode pruned to one snapshot:
///   1. `discover_latest_snapshot` — narrow scan of `symbol_facts` picks the
///      latest `snapshot_id` for `repo`.
///   2. `read_snapshot_batches` — `snapshot_id`-pushdown reads of `symbol_facts`
///      and `call_edges`, decoding ONLY that snapshot's data file(s).
///
/// `call_edges` reuses the `symbol_facts` snapshot id (the two are written under
/// the same scan stamp), exactly as the old path applied `symbol_facts`'s `snap`
/// to the calls scan.
pub(crate) fn load_latest_audited(
    wh: &IcebergWarehouse,
    repo: &str,
) -> Result<(KnowledgeView, ScanAudit)> {
    wh.block_on(async {
        // ── discover the latest snapshot (narrow) ─────────────────
        let Some((snap, discovery_rows)) =
            discover_latest_snapshot(wh, TABLE_SYMBOL_FACTS, repo).await?
        else {
            return Ok((KnowledgeView::new(vec![], vec![]), ScanAudit::default()));
        };
        let mut data_rows = 0usize;

        // ── symbols (latest snapshot only) ────────────────────────
        let s_batches = read_snapshot_batches(wh, TABLE_SYMBOL_FACTS, &snap).await?;
        let mut symbols = Vec::new();
        for b in &s_batches {
            let snaps = col::<StringArray>(b, "snapshot_id")?;
            let crate_name = col::<StringArray>(b, "crate_name")?;
            let module_path = col::<StringArray>(b, "module_path")?;
            let item_kind = col::<StringArray>(b, "item_kind")?;
            let item_name = col::<StringArray>(b, "item_name")?;
            let visibility = col::<StringArray>(b, "visibility")?;
            let file = col::<StringArray>(b, "file")?;
            let line = col::<Int32Array>(b, "line")?;
            let doc_lines = col::<Int32Array>(b, "doc_lines")?;
            let signature = col::<StringArray>(b, "signature")?;
            for i in 0..b.num_rows() {
                // Residual guard: pushdown prunes whole files, so a non-`snap`
                // row can only appear if a future writer ever co-locates two
                // snapshots in one file — keep the guard to stay byte-identical.
                if snaps.value(i) != snap {
                    continue;
                }
                data_rows += 1;
                let sig = signature.value(i);
                symbols.push(SymbolRow {
                    crate_name: crate_name.value(i).to_string(),
                    module_path: module_path.value(i).to_string(),
                    item_kind: item_kind.value(i).to_string(),
                    item_name: item_name.value(i).to_string(),
                    visibility: visibility.value(i).to_string(),
                    file: file.value(i).to_string(),
                    line: line.value(i).max(0) as u32,
                    doc_lines: doc_lines.value(i).max(0) as u32,
                    signature: if sig.is_empty() { None } else { Some(sig.to_string()) },
                });
            }
        }

        // ── calls (same latest snapshot) ──────────────────────────
        let c_batches = read_snapshot_batches(wh, TABLE_CALL_EDGES, &snap).await?;
        let mut calls = Vec::new();
        for b in &c_batches {
            let snaps = col::<StringArray>(b, "snapshot_id")?;
            let crate_name = col::<StringArray>(b, "crate_name")?;
            let caller = col::<StringArray>(b, "caller_path")?;
            let callee = col::<StringArray>(b, "callee_ident")?;
            let kind = col::<StringArray>(b, "call_kind")?;
            let file = col::<StringArray>(b, "file")?;
            let line = col::<Int32Array>(b, "line")?;
            for i in 0..b.num_rows() {
                if snaps.value(i) != snap {
                    continue;
                }
                data_rows += 1;
                calls.push(CallEdgeRow {
                    crate_name: crate_name.value(i).to_string(),
                    caller_path: caller.value(i).to_string(),
                    callee_ident: callee.value(i).to_string(),
                    call_kind: kind.value(i).to_string(),
                    file: file.value(i).to_string(),
                    line: line.value(i).max(0) as u32,
                });
            }
        }

        Ok((KnowledgeView::new(symbols, calls), ScanAudit { discovery_rows, data_rows }))
    })
}

/// Load the latest-snapshot RESOLVED (SCIP) knowledge map for `repo` and shape
/// it into a [`KnowledgeView`] whose `calls` are built by *containment* from the
/// SCIP occurrences (see [`super::scip::scip_call_edges`]), and whose `symbols`
/// are the definition occurrences mapped to [`SymbolRow`]s.
///
/// This is the FULL-WIRING preference source: because every reference carries
/// its globally-unique resolved `symbol`, the resulting edges do not collide
/// across name-sharing functions and *do* span cross-crate (bin→lib) calls that
/// the name-based syn `call_edges` miss entirely.
///
/// Returns `Ok(None)` when the repo has no persisted SCIP rows (so the caller
/// can fall back to the syn [`load_latest`]). Gated on the `scip` feature.
#[cfg(feature = "scip")]
pub fn load_latest_scip(
    wh: &IcebergWarehouse,
    repo: &str,
) -> Result<Option<KnowledgeView>> {
    let scan = wh.load_latest_scip(repo)?;
    if scan.rows.is_empty() {
        return Ok(None);
    }
    let calls = super::scip::scip_call_edges(&scan);
    let symbols = scan.rows.iter().filter(|r| r.is_definition).map(scip_symbol_row).collect();
    Ok(Some(KnowledgeView::new(symbols, calls)))
}

/// One resolved DEFINITION occurrence → a [`SymbolRow`]. The resolved moniker has
/// no crate/module split, so we surface the display name as `item_name` and the
/// moniker as the `module_path` for traceability; `item_kind` is the SCIP kind.
#[cfg(feature = "scip")]
fn scip_symbol_row(r: &super::scip::ScipRow) -> SymbolRow {
    SymbolRow {
        crate_name: String::new(),
        module_path: r.symbol.clone(),
        item_kind: r.kind.clone(),
        item_name: if r.display_name.is_empty() { r.symbol.clone() } else { r.display_name.clone() },
        visibility: String::new(),
        file: r.file.clone(),
        line: r.start_line,
        doc_lines: 0,
        signature: None,
    }
}

/// **CROSS-BINARY PREFERENCE HELPER (S6b)** — load + merge the call-graph
/// [`KnowledgeView`] for a whole set of workspace `members`, resolving calls that
/// CROSS the binary/crate boundary by joining on SCIP monikers.
///
/// The per-member [`load_preferred`] builds resolved edges from a SINGLE scan, so
/// a call from member A (a binary) to a function DEFINED in member B (a lib) is
/// dropped: B's definition is in B's index, not A's, so A's scan cannot name the
/// callee and that rail silently falls back to syn. This helper fixes that:
///
///  1. Load every member's latest resolved SCIP scan.
///  2. Build ONE global moniker → (kind, name) table across ALL of them
///     ([`super::scip::global_symbol_table`]).
///  3. Build each member's edges with [`super::scip::scip_call_edges_with`] so a
///     reference whose def lives in ANOTHER member resolves via the moniker.
///  4. Members with NO resolved rows fall back to their syn [`load_latest`] view,
///     merged in, so a partially-indexed workspace still draws every rail.
///
/// Returns `(merged_view, source)` with the same tag convention as
/// [`load_preferred`]: `"resolved/scip"` when ≥1 member contributed resolved
/// rows, else `"syn"` when only name-based facts were found, else `""` (no data).
pub fn load_preferred_merged(
    wh: &IcebergWarehouse,
    members: &[String],
) -> Result<(KnowledgeView, &'static str)> {
    let mut symbols: Vec<SymbolRow> = Vec::new();
    let mut calls: Vec<CallEdgeRow> = Vec::new();
    #[allow(unused_mut)]
    let mut any_resolved = false;
    #[allow(unused_mut)]
    let mut resolved_members: std::collections::HashSet<String> = std::collections::HashSet::new();

    #[cfg(feature = "scip")]
    {
        // 1. Load every member's resolved scan (remember which had rows).
        let mut scans = Vec::new();
        for m in members {
            let scan = wh.load_latest_scip(m)?;
            if !scan.rows.is_empty() {
                resolved_members.insert(m.clone());
                scans.push(scan);
            }
        }
        if !scans.is_empty() {
            any_resolved = true;
            // 2. ONE global moniker table across ALL resolved members.
            let refs: Vec<&super::scip::ScipScan> = scans.iter().collect();
            let globals = super::scip::global_symbol_table(&refs);
            // 3. Per-member edges, joined on the global monikers.
            for scan in &scans {
                calls.extend(super::scip::scip_call_edges_with(scan, &globals));
                symbols.extend(scan.rows.iter().filter(|r| r.is_definition).map(scip_symbol_row));
            }
        }
    }

    // 4. Members without resolved rows → their syn view, merged in.
    let mut any_syn = false;
    for m in members {
        if resolved_members.contains(m) {
            continue;
        }
        let view = load_latest(wh, m)?;
        if !view.symbols.is_empty() || !view.calls.is_empty() {
            any_syn = true;
            symbols.extend(view.symbols);
            calls.extend(view.calls);
        }
    }

    let source = if any_resolved {
        "resolved/scip"
    } else if any_syn {
        "syn"
    } else {
        ""
    };
    Ok((KnowledgeView::new(symbols, calls), source))
}

/// **THE PREFERENCE HELPER** — load the call-graph [`KnowledgeView`] for `repo`,
/// PREFERRING the RESOLVED SCIP map ([`load_latest_scip`]) when the warehouse has
/// SCIP rows for the repo, else falling back to the syn [`load_latest`].
///
/// Returns `(view, source)` where `source` is a stable tag (`"resolved/scip"` vs
/// `"syn"`) the caller can log/surface so an operator can see WHICH index answered
/// the call-graph query. This is the single chokepoint the role-agnostic
/// call-graph consumers (`callers_of` / `callees_of` / `call_path`, the metro feed)
/// route through so the syn-vs-scip choice is made in exactly one place.
///
/// On the default (no-`scip`) build there is no resolved source, so this is always
/// the syn view tagged `"syn"`.
pub fn load_preferred(wh: &IcebergWarehouse, repo: &str) -> Result<(KnowledgeView, &'static str)> {
    #[cfg(feature = "scip")]
    {
        if let Some(view) = load_latest_scip(wh, repo)? {
            return Ok((view, "resolved/scip"));
        }
    }
    Ok((load_latest(wh, repo)?, "syn"))
}

impl KnowledgeView {
    /// Symbols whose `item_name` contains `pattern` (case-insensitive).
    pub fn symbol_lookup(&self, pattern: &str, limit: usize) -> Vec<&SymbolRow> {
        let p = pattern.to_lowercase();
        self.symbols
            .iter()
            .filter(|s| s.item_name.to_lowercase().contains(&p))
            .take(limit)
            .collect()
    }

    /// Symbols defined in a file whose path ends with `suffix`.
    pub fn defined_in(&self, suffix: &str) -> Vec<&SymbolRow> {
        self.symbols.iter().filter(|s| s.file.ends_with(suffix)).collect()
    }

    /// Call edges that *invoke* `name`. Matches either an exact `callee_ident`
    /// (bare method calls like `.new()`) or a path-qualified callee whose last
    /// segment is `name` (`Arc::new`, `Foo::new` all match a query of `new`).
    /// The `::` separator is required, so `new` does not match `renew`.
    ///
    /// O(hits): a single posting-list lookup in the memoized [`CallIndex`],
    /// returning the matching edges in their original `calls` order — identical
    /// to the previous linear `callee_ident == name || ends_with("::name")` scan.
    pub fn callers_of(&self, name: &str) -> Vec<&CallEdgeRow> {
        match self.index().callee_keys.get(name) {
            Some(hits) => hits.iter().map(|&i| &self.calls[i]).collect(),
            None => Vec::new(),
        }
    }

    /// Call edges *from* a caller whose path ends with `name`.
    ///
    /// O(hits): a single posting-list lookup in the memoized [`CallIndex`] —
    /// identical to the previous linear `caller_path == name || ends_with("::name")`
    /// scan, returning the matching edges in their original `calls` order.
    pub fn callees_of(&self, name: &str) -> Vec<&CallEdgeRow> {
        match self.index().caller_keys.get(name) {
            Some(hits) => hits.iter().map(|&i| &self.calls[i]).collect(),
            None => Vec::new(),
        }
    }

    /// **R2.1 BATCHED `callers_of`** — resolve MANY callee names in ONE shot
    /// through the "Ragnar" sorted-key stree (`stree32`): the batch of names is
    /// hashed and routed through the cache-line Eytzinger S+tree with Ragnar's
    /// pipelined SIMD + software-prefetch traversal (overlapping each query's
    /// cache-miss latency), then cross-checked by string. Returns, positionally,
    /// the matching edges for each input name.
    ///
    /// **Identical results** to calling [`callers_of`](Self::callers_of) per
    /// name — the stree only changes *how* each key is located; the posting list
    /// it resolves to is the same `callee_keys` entry the HashMap holds, mapped to
    /// the same `&CallEdgeRow`s in the same order. The HashMap path is the
    /// single-lookup default and the fallback when the tree is empty. This is the
    /// scale accelerator: one pipelined tree walk over the whole batch instead of
    /// N independent HashMap probes.
    pub fn callers_of_batch(&self, names: &[&str]) -> Vec<Vec<&CallEdgeRow>> {
        let stree = self.index().callee_stree();
        if stree.tree.is_none() {
            return names.iter().map(|n| self.callers_of(n)).collect();
        }
        stree
            .get_batch(names)
            .into_iter()
            .map(|hit| match hit {
                Some(post) => post.iter().map(|&i| &self.calls[i]).collect(),
                None => Vec::new(),
            })
            .collect()
    }

    /// **R2.1 BATCHED `callees_of`** — the [`callers_of_batch`](Self::callers_of_batch)
    /// counterpart over the `caller_path` keyset. Identical results to
    /// [`callees_of`](Self::callees_of) called per name; falls back to the HashMap
    /// for an empty tree.
    pub fn callees_of_batch(&self, names: &[&str]) -> Vec<Vec<&CallEdgeRow>> {
        let stree = self.index().caller_stree();
        if stree.tree.is_none() {
            return names.iter().map(|n| self.callees_of(n)).collect();
        }
        stree
            .get_batch(names)
            .into_iter()
            .map(|hit| match hit {
                Some(post) => post.iter().map(|&i| &self.calls[i]).collect(),
                None => Vec::new(),
            })
            .collect()
    }

    /// Shortest call chain from `from` to `to` over the persisted call edges
    /// (BFS following caller → callee), at **identifier granularity**: each
    /// node is a function's last path segment, so a query of `build`/`new`
    /// matches `Index::build`/`Arc::new`. Returns the sequence of identifiers
    /// from `from` to `to` inclusive, or `None` when unreachable.
    ///
    /// Approximate by construction: the syn facts record callees as
    /// identifiers (`Arc::new` is stored path-qualified, a bare `.new()` is
    /// not), never as fully-resolved defining paths, so distinct functions
    /// that share a name collapse to one node. Use it to surface *a* plausible
    /// call chain (like `dep_path` for repos), not a guaranteed-unique one.
    pub fn call_path(&self, from: &str, to: &str) -> Option<Vec<String>> {
        use std::collections::VecDeque;

        let from = last_seg(from).to_string();
        let to = last_seg(to).to_string();

        // Memoized adjacency (caller ident -> callee idents) + node set, built
        // ONCE per snapshot in `CallIndex` and reused across calls. The per-node
        // callee lists preserve edge order, so BFS yields the same path as the
        // previous build-it-every-call version.
        let idx = self.index();
        let adj = &idx.adj;
        let nodes = &idx.nodes;

        if from == to {
            return nodes.contains(from.as_str()).then(|| vec![from]);
        }
        if !nodes.contains(from.as_str()) {
            return None;
        }

        let mut parent: HashMap<String, String> = HashMap::new();
        let mut seen: HashSet<String> = HashSet::new();
        let mut queue: VecDeque<String> = VecDeque::new();
        seen.insert(from.clone());
        queue.push_back(from.clone());
        while let Some(cur) = queue.pop_front() {
            let Some(callees) = adj.get(cur.as_str()) else { continue };
            for c in callees {
                let c = c.as_str();
                if !seen.insert(c.to_string()) {
                    continue;
                }
                parent.insert(c.to_string(), cur.clone());
                if c == to {
                    let mut path = vec![to.clone()];
                    let mut node = to.clone();
                    while let Some(p) = parent.get(&node) {
                        path.push(p.clone());
                        node = p.clone();
                    }
                    path.reverse();
                    return Some(path);
                }
                queue.push_back(c.to_string());
            }
        }
        None
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::knowledge::symbols::CallEdgeRow;

    fn edge(callee: &str) -> CallEdgeRow {
        CallEdgeRow {
            crate_name: "demo".into(),
            caller_path: "demo::f".into(),
            callee_ident: callee.into(),
            call_kind: "call".into(),
            file: "src/lib.rs".into(),
            line: 1,
        }
    }

    fn edge_from(caller: &str, callee: &str) -> CallEdgeRow {
        CallEdgeRow {
            crate_name: "demo".into(),
            caller_path: caller.into(),
            callee_ident: callee.into(),
            call_kind: "call".into(),
            file: "src/lib.rs".into(),
            line: 1,
        }
    }

    /// R0.2 FAIL-ON-BUG / PASS-ON-FIX. Build one warehouse fixture with THREE
    /// `demo` snapshots (growing history) plus a second repo, then run the OLD
    /// full-history scan algorithm and the NEW snapshot-pushdown `load_latest`
    /// against it and assert:
    ///   (a) the two results are byte-identical (serialized), and
    ///   (b) the NEW path decodes strictly FEWER full-width rows than OLD
    ///       (`audit.data_rows < old_rows_scanned`), confirmed independently by
    ///       `plan_stats`: a `snapshot_id` predicate prunes to one snapshot's
    ///       data file, where the `repo` predicate plans the whole history.
    ///
    /// Validated as DATA (serialized views + row counts), never by eyeballing.
    /// If the snapshot-pushdown wiring is reverted, the NEW path reads the whole
    /// history → `data_rows == old_rows` → assertion (b) FAILS.
    #[test]
    fn load_latest_snapshot_pushdown_is_byte_identical_and_scans_fewer_rows() {
        use arrow::array::TimestampMicrosecondArray;
        use chrono::Duration;
        use iceberg::Catalog;

        use crate::knowledge::symbols::{SymbolRow, SymbolScan};
        use crate::warehouse::iceberg::IcebergWarehouse;

        fn sym(name: &str) -> SymbolRow {
            SymbolRow {
                crate_name: "demo".into(),
                module_path: format!("demo::{name}"),
                item_kind: "fn".into(),
                item_name: name.into(),
                visibility: "pub".into(),
                file: "src/lib.rs".into(),
                line: 1,
                doc_lines: 0,
                signature: Some(format!("fn {name}()")),
            }
        }
        fn call(caller: &str, callee: &str) -> CallEdgeRow {
            CallEdgeRow {
                crate_name: "demo".into(),
                caller_path: format!("demo::{caller}"),
                callee_ident: callee.into(),
                call_kind: "call".into(),
                file: "src/lib.rs".into(),
                line: 2,
            }
        }

        let dir = tempfile::tempdir().unwrap();
        let wh = IcebergWarehouse::open(dir.path()).unwrap();
        let base = chrono::Utc::now();
        let append = |repo: &str, secs: i64, symbols: Vec<SymbolRow>, calls: Vec<CallEdgeRow>| {
            let scan = SymbolScan {
                snapshot_id: uuid::Uuid::new_v4(),
                ts: base + Duration::seconds(secs),
                repo: repo.to_string(),
                symbols,
                calls,
                features: vec![],
                tests: vec![],
            };
            wh.append_symbol_scan(&scan).unwrap();
        };

        // demo: three snapshots of growing history; the LATEST (secs=2) is the
        // only one load_latest must surface.
        append("demo", 0, vec![sym("a1")], vec![call("a1", "new")]);
        append("demo", 1, vec![sym("a2"), sym("b2")], vec![call("a2", "push")]);
        append(
            "demo",
            2,
            vec![sym("a3"), sym("b3"), sym("c3")],
            vec![call("a3", "read"), call("b3", "write")],
        );
        // A second repo, appended LAST (max ts table-wide) — proves the latest
        // pick is per-repo and the snapshot prune cannot leak it.
        append("other", 9, vec![sym("o1")], vec![call("o1", "leak")]);

        // ── OLD reference: the pre-R0.2 full-history scan + RAM 2-pass. ──────
        let (old_view, old_rows, old_snap) = wh
            .block_on(async {
                let st = wh.catalog().load_table(&wh.table_ident(TABLE_SYMBOL_FACTS)).await?;
                let s_batches =
                    skade::read_filtered(&st, &skade::ScanFilter::eq("repo", "demo"), &[]).await?;
                let mut latest: Option<(String, i64)> = None;
                for b in &s_batches {
                    let snaps = col::<StringArray>(b, "snapshot_id")?;
                    let ts = col::<TimestampMicrosecondArray>(b, "ts_micros")?;
                    for i in 0..b.num_rows() {
                        let t = ts.value(i);
                        if latest.as_ref().map(|(_, lt)| t > *lt).unwrap_or(true) {
                            latest = Some((snaps.value(i).to_string(), t));
                        }
                    }
                }
                let (snap, _) = latest.unwrap();
                let mut rows = 0usize;
                let mut symbols = Vec::new();
                for b in &s_batches {
                    rows += b.num_rows();
                    let snaps = col::<StringArray>(b, "snapshot_id")?;
                    let ck = col::<StringArray>(b, "crate_name")?;
                    let mp = col::<StringArray>(b, "module_path")?;
                    let ik = col::<StringArray>(b, "item_kind")?;
                    let it = col::<StringArray>(b, "item_name")?;
                    let vis = col::<StringArray>(b, "visibility")?;
                    let file = col::<StringArray>(b, "file")?;
                    let line = col::<Int32Array>(b, "line")?;
                    let doc = col::<Int32Array>(b, "doc_lines")?;
                    let sig = col::<StringArray>(b, "signature")?;
                    for i in 0..b.num_rows() {
                        if snaps.value(i) != snap {
                            continue;
                        }
                        let s = sig.value(i);
                        symbols.push(SymbolRow {
                            crate_name: ck.value(i).to_string(),
                            module_path: mp.value(i).to_string(),
                            item_kind: ik.value(i).to_string(),
                            item_name: it.value(i).to_string(),
                            visibility: vis.value(i).to_string(),
                            file: file.value(i).to_string(),
                            line: line.value(i).max(0) as u32,
                            doc_lines: doc.value(i).max(0) as u32,
                            signature: if s.is_empty() { None } else { Some(s.to_string()) },
                        });
                    }
                }
                let ct = wh.catalog().load_table(&wh.table_ident(TABLE_CALL_EDGES)).await?;
                let c_batches =
                    skade::read_filtered(&ct, &skade::ScanFilter::eq("repo", "demo"), &[]).await?;
                let mut calls = Vec::new();
                for b in &c_batches {
                    rows += b.num_rows();
                    let snaps = col::<StringArray>(b, "snapshot_id")?;
                    let ck = col::<StringArray>(b, "crate_name")?;
                    let caller = col::<StringArray>(b, "caller_path")?;
                    let callee = col::<StringArray>(b, "callee_ident")?;
                    let kind = col::<StringArray>(b, "call_kind")?;
                    let file = col::<StringArray>(b, "file")?;
                    let line = col::<Int32Array>(b, "line")?;
                    for i in 0..b.num_rows() {
                        if snaps.value(i) != snap {
                            continue;
                        }
                        calls.push(CallEdgeRow {
                            crate_name: ck.value(i).to_string(),
                            caller_path: caller.value(i).to_string(),
                            callee_ident: callee.value(i).to_string(),
                            call_kind: kind.value(i).to_string(),
                            file: file.value(i).to_string(),
                            line: line.value(i).max(0) as u32,
                        });
                    }
                }
                anyhow::Ok((KnowledgeView::new(symbols, calls), rows, snap))
            })
            .unwrap();

        // ── NEW production path (snapshot pushdown). ────────────────────────
        let (new_view, audit) = load_latest_audited(&wh, "demo").unwrap();

        // (a) byte-identical results (serialize — the rows derive Serialize).
        let ser = |v: &KnowledgeView| {
            (serde_json::to_value(&v.symbols).unwrap(), serde_json::to_value(&v.calls).unwrap())
        };
        assert_eq!(ser(&new_view), ser(&old_view), "NEW must equal OLD byte-for-byte");
        // sanity: it really is the latest snapshot (3 syms / 2 calls), not history.
        assert_eq!(new_view.symbols.len(), 3);
        assert_eq!(new_view.calls.len(), 2);
        assert_eq!(
            new_view.symbols.iter().map(|s| s.item_name.as_str()).collect::<Vec<_>>(),
            vec!["a3", "b3", "c3"],
        );

        // (b) fewer rows scanned. OLD decoded all 3 snapshots' full-width rows
        // (6 symbols + 4 calls); NEW decodes only the latest (3 + 2).
        assert_eq!(old_rows, 10, "old full-history scan touches every snapshot");
        assert_eq!(audit.data_rows, 5, "new decodes only the latest snapshot");
        assert!(
            audit.data_rows < old_rows,
            "snapshot pushdown must decode fewer full-width rows: new={} old={}",
            audit.data_rows,
            old_rows,
        );

        // Independent plan-time proof of the file prune (no data read): the
        // snapshot predicate plans one snapshot's rows; the repo predicate plans
        // the whole history.
        let (repo_rows, snap_rows) = wh
            .block_on(async {
                let st = wh.catalog().load_table(&wh.table_ident(TABLE_SYMBOL_FACTS)).await?;
                let repo_plan =
                    skade::plan_stats(&st, Some(&skade::ScanFilter::eq("repo", "demo"))).await?;
                let snap_plan = skade::plan_stats(
                    &st,
                    Some(&skade::ScanFilter::eq("snapshot_id", old_snap.clone())),
                )
                .await?;
                anyhow::Ok((repo_plan.rows_planned, snap_plan.rows_planned))
            })
            .unwrap();
        assert_eq!(repo_rows, 6, "repo predicate plans the whole demo history");
        assert_eq!(snap_rows, 3, "snapshot predicate prunes to one snapshot's file");
        assert!(snap_rows < repo_rows);
    }

    #[test]
    fn callers_of_matches_last_segment_and_bare() {
        let view = KnowledgeView::new(
            vec![],
            vec![
                edge("new"),       // bare method call
                edge("Arc::new"),  // path-qualified
                edge("Foo::new"),  // path-qualified
                edge("renew"),     // must NOT match (no `::` boundary)
                edge("Foo::make"), // unrelated
            ],
        );

        let hits: Vec<&str> = view.callers_of("new").iter().map(|c| c.callee_ident.as_str()).collect();
        assert!(hits.contains(&"new"));
        assert!(hits.contains(&"Arc::new"));
        assert!(hits.contains(&"Foo::new"));
        assert!(!hits.contains(&"renew"), "{hits:?}");
        assert!(!hits.contains(&"Foo::make"));
        assert_eq!(hits.len(), 3);

        // A fully-qualified query still matches exactly.
        let exact: Vec<&str> = view.callers_of("Arc::new").iter().map(|c| c.callee_ident.as_str()).collect();
        assert_eq!(exact, vec!["Arc::new"]);
    }

    #[test]
    fn call_path_bfs_over_call_edges() {
        // chain: a::run -> b::step -> c::commit ; plus a detour a::run -> z::noop
        let view = KnowledgeView::new(
            vec![],
            vec![
                edge_from("a::run", "step"),
                edge_from("b::step", "Repo::commit"),
                edge_from("a::run", "noop"),
            ],
        );

        // last-segment identity: run -> step -> commit
        let p = view.call_path("run", "commit").expect("path exists");
        assert_eq!(p, vec!["run", "step", "commit"]);

        // fully-qualified inputs are normalised to their last segment.
        let p2 = view.call_path("a::run", "Repo::commit").expect("path exists");
        assert_eq!(p2, vec!["run", "step", "commit"]);

        // self-path when the node exists.
        assert_eq!(view.call_path("step", "step"), Some(vec!["step".to_string()]));

        // unreachable + unknown source.
        assert_eq!(view.call_path("commit", "run"), None);
        assert_eq!(view.call_path("ghost", "run"), None);
    }

    /// FULL-WIRING (S6b) warehouse round-trip: persist a RESOLVED SCIP scan
    /// whose `outer` body spans a call to `inner`, read it back via
    /// `load_latest_scip`, and assert the materialised [`KnowledgeView`] answers
    /// `callers_of("inner")` / `call_path("outer","inner")` over edges that came
    /// from containment of the resolved moniker — NOT name-based syn facts.
    #[cfg(feature = "scip")]
    #[test]
    fn load_latest_scip_builds_resolved_view() {
        use crate::knowledge::scip::{ingest_index, ScipScan};
        use crate::warehouse::iceberg::IcebergWarehouse;
        use scip::types::{symbol_information, Document, Index, Occurrence, SymbolInformation, SymbolRole};

        let mut idx = Index::new();
        let mut doc = Document::new();
        doc.relative_path = "src/lib.rs".into();

        // outer(): body [10,20] (0-based), calls inner.
        let mut outer_si = SymbolInformation::new();
        outer_si.symbol = "rust-analyzer cargo demo 0.1.0 outer().".into();
        outer_si.display_name = "outer".into();
        outer_si.kind = symbol_information::Kind::Function.into();
        doc.symbols.push(outer_si.clone());
        let mut outer_def = Occurrence::new();
        outer_def.range = vec![10, 3, 10, 8];
        outer_def.enclosing_range = vec![10, 0, 20, 1];
        outer_def.symbol = outer_si.symbol.clone();
        outer_def.symbol_roles = SymbolRole::Definition as i32;
        doc.occurrences.push(outer_def);

        // inner(): def + a call from inside outer at line 13.
        let mut inner_si = SymbolInformation::new();
        inner_si.symbol = "rust-analyzer cargo demo 0.1.0 inner().".into();
        inner_si.display_name = "inner".into();
        inner_si.kind = symbol_information::Kind::Function.into();
        doc.symbols.push(inner_si.clone());
        let mut inner_def = Occurrence::new();
        inner_def.range = vec![30, 3, 30, 8];
        inner_def.enclosing_range = vec![30, 0, 34, 1];
        inner_def.symbol = inner_si.symbol.clone();
        inner_def.symbol_roles = SymbolRole::Definition as i32;
        doc.occurrences.push(inner_def);
        let mut ref_inner = Occurrence::new();
        ref_inner.range = vec![13, 8, 13, 13];
        ref_inner.symbol = inner_si.symbol.clone();
        doc.occurrences.push(ref_inner);

        idx.documents.push(doc);
        let scan: ScipScan = ingest_index(idx, "demo", "deadbeefsha", uuid::Uuid::new_v4(), chrono::Utc::now());

        let dir = tempfile::tempdir().unwrap();
        let wh = IcebergWarehouse::open(dir.path()).unwrap();
        wh.append_scip_scan(&scan).unwrap();

        // Resolved view materialised from the warehouse.
        let view = load_latest_scip(&wh, "demo").unwrap().expect("scip rows present");
        // The enclosing-range edge build survives the warehouse trip.
        let callers: Vec<&str> = view.callers_of("inner").iter().map(|c| c.caller_path.as_str()).collect();
        assert_eq!(callers, vec!["outer"], "resolved caller via containment");
        assert_eq!(view.call_path("outer", "inner"), Some(vec!["outer".to_string(), "inner".to_string()]));

        // A repo with no SCIP rows → None (so the CLI falls back to syn).
        assert!(load_latest_scip(&wh, "other").unwrap().is_none());
    }

    /// PROOF (S6b cross-binary moniker join, warehouse round-trip). Two SEPARATE
    /// repos — a binary `demo_bin` whose `main` calls a function `helper`
    /// DEFINED IN a different repo `demo_lib`. Each repo's persisted SCIP scan
    /// holds only its OWN occurrences (the bin has the *reference* with the
    /// cross-crate moniker; the lib has the *definition*).
    ///
    /// * RED: per-member `load_preferred(demo_bin)` cannot name the callee (its
    ///   def is in the other repo) → no resolved `main → helper` edge.
    /// * GREEN: `load_preferred_merged([demo_bin, demo_lib])` joins on the global
    ///   moniker → the cross-binary edge `main → helper` resolves.
    #[cfg(feature = "scip")]
    #[test]
    fn load_preferred_merged_resolves_cross_binary() {
        use crate::knowledge::scip::{ingest_index, ScipScan};
        use crate::warehouse::iceberg::IcebergWarehouse;
        use scip::types::{symbol_information, Document, Index, Occurrence, SymbolInformation, SymbolRole};

        // ── demo_bin: main() calls the lib's helper (reference only) ──────────
        let mut bidx = Index::new();
        let mut bdoc = Document::new();
        bdoc.relative_path = "src/main.rs".into();
        let mut main_si = SymbolInformation::new();
        main_si.symbol = "rust-analyzer cargo demo_bin 0.1.0 main().".into();
        main_si.display_name = "main".into();
        main_si.kind = symbol_information::Kind::Function.into();
        bdoc.symbols.push(main_si.clone());
        let mut main_def = Occurrence::new();
        main_def.range = vec![10, 3, 10, 7];
        main_def.enclosing_range = vec![10, 0, 20, 1];
        main_def.symbol = main_si.symbol.clone();
        main_def.symbol_roles = SymbolRole::Definition as i32;
        bdoc.occurrences.push(main_def);
        let mut ref_helper = Occurrence::new();
        ref_helper.range = vec![13, 8, 13, 14];
        ref_helper.symbol = "rust-analyzer cargo demo_lib 0.1.0 helper().".into();
        bdoc.occurrences.push(ref_helper);
        bidx.documents.push(bdoc);
        let bin: ScipScan = ingest_index(bidx, "demo_bin", "binsha", uuid::Uuid::new_v4(), chrono::Utc::now());

        // ── demo_lib: the helper() DEFINITION (same global moniker) ───────────
        let mut lidx = Index::new();
        let mut ldoc = Document::new();
        ldoc.relative_path = "src/lib.rs".into();
        let mut helper_si = SymbolInformation::new();
        helper_si.symbol = "rust-analyzer cargo demo_lib 0.1.0 helper().".into();
        helper_si.display_name = "helper".into();
        helper_si.kind = symbol_information::Kind::Function.into();
        ldoc.symbols.push(helper_si.clone());
        let mut helper_def = Occurrence::new();
        helper_def.range = vec![5, 7, 5, 13];
        helper_def.enclosing_range = vec![5, 0, 9, 1];
        helper_def.symbol = helper_si.symbol.clone();
        helper_def.symbol_roles = SymbolRole::Definition as i32;
        ldoc.occurrences.push(helper_def);
        lidx.documents.push(ldoc);
        let lib: ScipScan = ingest_index(lidx, "demo_lib", "libsha", uuid::Uuid::new_v4(), chrono::Utc::now());

        let dir = tempfile::tempdir().unwrap();
        let wh = IcebergWarehouse::open(dir.path()).unwrap();
        wh.append_scip_scan(&bin).unwrap();
        wh.append_scip_scan(&lib).unwrap();

        // RED: the bin in ISOLATION cannot resolve the cross-binary callee.
        let (solo, _src) = load_preferred(&wh, "demo_bin").unwrap();
        assert!(
            solo.callers_of("helper").is_empty(),
            "single-member view must not resolve the cross-binary call: {:?}",
            solo.calls
        );

        // GREEN: the merged join resolves `main → helper` across the boundary.
        let members = vec!["demo_bin".to_string(), "demo_lib".to_string()];
        let (merged, source) = load_preferred_merged(&wh, &members).unwrap();
        assert_eq!(source, "resolved/scip");
        let callers: Vec<&str> =
            merged.callers_of("helper").iter().map(|c| c.caller_path.as_str()).collect();
        assert_eq!(callers, vec!["main"], "cross-binary edge resolved via moniker join");
        assert_eq!(
            merged.call_path("main", "helper"),
            Some(vec!["main".to_string(), "helper".to_string()]),
        );
    }

    // ════════════════════════════════════════════════════════════════════
    // R0.3 — inverted index + memoized adjacency. The index MUST be a drop-in
    // replacement for the old linear `Vec` scans: identical result sets, in
    // identical order, with O(hits) (not O(N)) lookup cost.
    // ════════════════════════════════════════════════════════════════════

    /// The ORIGINAL O(N) linear `callers_of` — the oracle the index must match
    /// EXACTLY. (Pre-R0.3 body, kept verbatim as the reference implementation.)
    fn linear_callers_of<'a>(calls: &'a [CallEdgeRow], name: &str) -> Vec<&'a CallEdgeRow> {
        let suffix = format!("::{name}");
        calls
            .iter()
            .filter(|c| c.callee_ident == name || c.callee_ident.ends_with(&suffix))
            .collect()
    }

    /// The ORIGINAL O(N) linear `callees_of` — oracle for the index.
    fn linear_callees_of<'a>(calls: &'a [CallEdgeRow], name: &str) -> Vec<&'a CallEdgeRow> {
        calls
            .iter()
            .filter(|c| c.caller_path == name || c.caller_path.ends_with(&format!("::{name}")))
            .collect()
    }

    /// The ORIGINAL `call_path`, rebuilding the adjacency `BTreeMap` every call —
    /// the oracle the memoized-adjacency BFS must match EXACTLY.
    fn linear_call_path(calls: &[CallEdgeRow], from: &str, to: &str) -> Option<Vec<String>> {
        use std::collections::{BTreeMap, BTreeSet, VecDeque};
        fn last_seg(s: &str) -> &str {
            s.rsplit("::").next().unwrap_or(s)
        }
        let from = last_seg(from).to_string();
        let to = last_seg(to).to_string();
        let mut adj: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
        let mut nodes: BTreeSet<&str> = BTreeSet::new();
        for e in calls {
            let f = last_seg(&e.caller_path);
            let t = last_seg(&e.callee_ident);
            adj.entry(f).or_default().push(t);
            nodes.insert(f);
            nodes.insert(t);
        }
        if from == to {
            return nodes.contains(from.as_str()).then(|| vec![from]);
        }
        if !nodes.contains(from.as_str()) {
            return None;
        }
        let mut parent: BTreeMap<String, String> = BTreeMap::new();
        let mut seen: BTreeSet<String> = BTreeSet::new();
        let mut queue: VecDeque<String> = VecDeque::new();
        seen.insert(from.clone());
        queue.push_back(from.clone());
        while let Some(cur) = queue.pop_front() {
            let Some(callees) = adj.get(cur.as_str()) else { continue };
            for &c in callees {
                if !seen.insert(c.to_string()) {
                    continue;
                }
                parent.insert(c.to_string(), cur.clone());
                if c == to {
                    let mut path = vec![to.clone()];
                    let mut node = to.clone();
                    while let Some(p) = parent.get(&node) {
                        path.push(p.clone());
                        node = p.clone();
                    }
                    path.reverse();
                    return Some(path);
                }
                queue.push_back(c.to_string());
            }
        }
        None
    }

    /// A deliberately gnarly fixture: bare callees, single- and multi-segment
    /// qualified callees, a `renew` decoy (must NOT match `new`), repeated
    /// callees in non-adjacent positions (order sensitivity), and callers at
    /// varying path depths. Edge order is interleaved on purpose.
    fn rich_fixture() -> Vec<CallEdgeRow> {
        vec![
            edge_from("a::run", "step"),          // 0
            edge_from("mod::a::run", "Arc::new"), // 1
            edge_from("b::step", "Repo::commit"), // 2
            edge_from("x::y::run", "new"),        // 3  bare callee
            edge_from("run", "Foo::new"),         // 4  bare caller
            edge_from("c::renew", "renew"),       // 5  decoy: not `new`/`run`
            edge_from("p::q::step", "a::b::new"), // 6  multi-segment callee
            edge_from("k::Arc::new", "Arc::new"), // 7  repeated callee, later
            edge_from("z::run", "noop"),          // 8
            edge_from("b::step", "x::y::new"),    // 9  another `*::new`
        ]
    }

    /// Identity of a `&CallEdgeRow` *within the same backing slice* — the
    /// strongest equality: same element, same position. Asserting on these
    /// catches any divergence in BOTH membership and order.
    fn ptrs(rows: Vec<&CallEdgeRow>) -> Vec<*const CallEdgeRow> {
        rows.into_iter().map(|r| r as *const CallEdgeRow).collect()
    }

    /// Borrowing variant of [`ptrs`] for a `&[&CallEdgeRow]` (a per-name entry of
    /// a BATCHED result) — asserts on it without moving the batch's Vec.
    fn ptrs_v(rows: &[&CallEdgeRow]) -> Vec<*const CallEdgeRow> {
        rows.iter().map(|&r| r as *const CallEdgeRow).collect()
    }

    /// FAIL-ON-BUG / PASS-ON-FIX: the indexed `callers_of` / `callees_of` return
    /// EXACTLY the same edges (same set, same order) as the old linear scan, over
    /// a battery of query names probing every match shape.
    #[test]
    fn indexed_lookups_match_linear_scan_exactly() {
        let view = KnowledgeView::new(vec![], rich_fixture());

        let names = [
            "new", "run", "step", "Arc::new", "commit", "Repo::commit", "renew", "noop",
            "a::b::new", "b::new", "y::new", "x::y::new", "Foo::new", "ghost", "",
        ];
        for name in names {
            assert_eq!(
                ptrs(view.callers_of(name)),
                ptrs(linear_callers_of(&view.calls, name)),
                "callers_of({name:?}) must equal the linear scan (set + order)",
            );
            assert_eq!(
                ptrs(view.callees_of(name)),
                ptrs(linear_callees_of(&view.calls, name)),
                "callees_of({name:?}) must equal the linear scan (set + order)",
            );
        }

        // call_path: memoized adjacency must yield the SAME path as rebuilding it.
        let pairs = [
            ("run", "new"),
            ("a::run", "Arc::new"),
            ("run", "commit"),
            ("step", "commit"),
            ("run", "run"),
            ("step", "step"),
            ("ghost", "new"),
            ("commit", "run"),
            ("run", "noop"),
            ("step", "new"),
            ("run", "renew"),
        ];
        for (from, to) in pairs {
            assert_eq!(
                view.call_path(from, to),
                linear_call_path(&view.calls, from, to),
                "call_path({from:?}, {to:?}) must equal the linear rebuild",
            );
        }
    }

    /// COMPLEXITY assertion as DATA: the lookup walks a posting list whose
    /// length is the HIT count, INDEPENDENT of N. A linear scan would touch all
    /// N rows; here the work (posting-list length) stays constant as N grows
    /// 200× — the operational meaning of O(hits) rather than O(N).
    #[test]
    fn callers_of_lookup_is_o_hits_not_o_n() {
        fn build(noise: usize) -> KnowledgeView {
            let mut calls = Vec::with_capacity(noise + 3);
            for i in 0..noise {
                calls.push(edge_from(&format!("noise::c{i}"), &format!("noise::g{i}")));
            }
            // Exactly three edges invoke `target` (bare, single-, multi-qualified).
            calls.push(edge_from("m::one", "target"));
            calls.push(edge_from("m::two", "Wrap::target"));
            calls.push(edge_from("m::three", "a::b::target"));
            KnowledgeView::new(vec![], calls)
        }

        let small = build(1_000);
        let big = build(200_000);

        // Same answer regardless of N.
        assert_eq!(small.callers_of("target").len(), 3);
        assert_eq!(big.callers_of("target").len(), 3);

        // The posting list the lookup dereferences == the hit count, for BOTH N.
        let small_work = small.index().callee_keys.get("target").map_or(0, Vec::len);
        let big_work = big.index().callee_keys.get("target").map_or(0, Vec::len);
        assert_eq!(small_work, 3, "posting list holds only the 3 hits at N=1_003");
        assert_eq!(big_work, 3, "posting list STILL holds only the 3 hits at N=200_003");
        assert_eq!(big_work, small_work, "lookup work independent of N ⇒ O(hits), not O(N)");
        assert!(big.calls.len() >= 200_000, "the large fixture really is large");

        // A miss touches ZERO edges (no posting list), never the whole Vec.
        assert!(big.index().callee_keys.get("absent_symbol").is_none());
        assert!(big.callers_of("absent_symbol").is_empty());
    }

    /// The adjacency map + posting index are built ONCE per loaded snapshot and
    /// reused across every call (the memoization the task asks for).
    #[test]
    fn index_is_built_once_and_reused() {
        let view = KnowledgeView::new(vec![], rich_fixture());
        // Lazy: nothing built until the first call-graph query.
        assert!(view.index.get().is_none(), "index is not built eagerly");

        let _ = view.callers_of("new");
        let built = view.index.get().expect("index materialised on first query");
        let first = built as *const CallIndex;

        // Subsequent queries of every kind reuse the SAME index instance.
        let _ = view.callees_of("run");
        let _ = view.call_path("run", "commit");
        let again = view.index.get().expect("still present") as *const CallIndex;
        assert_eq!(first, again, "same CallIndex reused — built once, not per call");
    }

    // ════════════════════════════════════════════════════════════════════
    // R2.1 — the "Ragnar" stree32 sorted-key, SIMD + software-prefetch,
    // BATCHED-lookup index wired BEHIND the R0.3 inverted index. It MUST be a
    // drop-in accelerator: identical result sets, in identical order, to BOTH
    // the inverted-index HashMap path AND the original linear scan — over a
    // gnarly small fixture and a large one with real tree depth.
    // ════════════════════════════════════════════════════════════════════

    /// FAIL-ON-BUG / PASS-ON-FIX: the R2.1 stree32 lookups equal the inverted
    /// index AND the linear scan oracle, EXACTLY (same edges, same order).
    ///
    /// Three independent cross-checks, all validated as DATA (`&CallEdgeRow`
    /// pointer identity within the backing slice — catches any divergence in
    /// membership OR order), never by eyeballing:
    ///   1. single-key stree `get` == the `CallIndex` HashMap `.get` (the
    ///      cross-check / fallback oracle the task names);
    ///   2. BATCHED `callers_of_batch` / `callees_of_batch` (the actual Ragnar
    ///      pipelined SIMD+prefetch path) == per-name `callers_of`/`callees_of`
    ///      == `linear_callers_of`/`linear_callees_of` (the original scan);
    ///   3. a STRUCTURAL/complexity stand-in for the (skipped, low-power) perf
    ///      bench: the tree is really built, its keys are strictly-ascending and
    ///      unique (one bucket per distinct key-hash), entry count == HashMap key
    ///      count, and the large fixture builds a deep multi-level tree.
    ///
    /// If the stree wiring regresses (bad hash, missing string cross-check,
    /// dropped fallback, wrong posting list), the batched results diverge from
    /// the oracle and this test FAILS; on a correct wiring it PASSES.
    #[test]
    fn stree_batched_lookups_match_inverted_index_and_linear_scan() {
        // ── small, gnarly fixture: every match shape + a `renew` decoy ──
        let view = KnowledgeView::new(vec![], rich_fixture());
        let names = [
            "new", "run", "step", "Arc::new", "commit", "Repo::commit", "renew", "noop",
            "a::b::new", "b::new", "y::new", "x::y::new", "Foo::new", "ghost", "",
        ];

        // (1) single-key stree get == inverted-index HashMap get.
        let idx = view.index();
        let callee_stree = idx.callee_stree();
        let caller_stree = idx.caller_stree();
        assert!(callee_stree.tree.is_some(), "the callee stree is actually built");
        assert!(caller_stree.tree.is_some(), "the caller stree is actually built");
        for name in names {
            assert_eq!(
                callee_stree.get(name),
                idx.callee_keys.get(name),
                "stree get({name:?}) must equal the callee_keys HashMap (cross-check)",
            );
            assert_eq!(
                caller_stree.get(name),
                idx.caller_keys.get(name),
                "stree get({name:?}) must equal the caller_keys HashMap (cross-check)",
            );
        }

        // (2) BATCHED stree path == per-name HashMap path == linear scan oracle.
        let batch_callers = view.callers_of_batch(&names);
        let batch_callees = view.callees_of_batch(&names);
        assert_eq!(batch_callers.len(), names.len());
        assert_eq!(batch_callees.len(), names.len());
        for (i, name) in names.iter().enumerate() {
            assert_eq!(
                ptrs_v(&batch_callers[i]),
                ptrs(view.callers_of(name)),
                "callers_of_batch[{name:?}] must equal per-name callers_of",
            );
            assert_eq!(
                ptrs_v(&batch_callers[i]),
                ptrs(linear_callers_of(&view.calls, name)),
                "callers_of_batch[{name:?}] must equal the linear scan oracle",
            );
            assert_eq!(
                ptrs_v(&batch_callees[i]),
                ptrs(view.callees_of(name)),
                "callees_of_batch[{name:?}] must equal per-name callees_of",
            );
            assert_eq!(
                ptrs_v(&batch_callees[i]),
                ptrs(linear_callees_of(&view.calls, name)),
                "callees_of_batch[{name:?}] must equal the linear scan oracle",
            );
        }

        // (3) STRUCTURAL: one ascending, UNIQUE bucket per distinct key-hash;
        // entry count == HashMap key count ⇒ the sorted-key index is complete.
        assert_eq!(
            callee_stree.entry_keys.len(),
            idx.callee_keys.len(),
            "every inverted-index key is present in the stree",
        );
        assert!(
            callee_stree.sorted_hashes.len() <= callee_stree.entry_keys.len(),
            "never more buckets than keys",
        );
        assert!(
            callee_stree.sorted_hashes.windows(2).all(|w| w[0] < w[1]),
            "stree keys are strictly ascending + unique (valid S-tree input)",
        );

        // ── large fixture: 50k distinct callee keys ⇒ a deep multi-level tree.
        // The batched walk over the whole batch is the no-bench scale stand-in.
        let mut calls = Vec::with_capacity(50_003);
        for i in 0..50_000 {
            calls.push(edge_from(&format!("noise{i}::caller"), &format!("noise{i}::callee{i}")));
        }
        // Exactly three edges invoke `target` (bare, single-, multi-qualified).
        calls.push(edge_from("m::one", "target"));
        calls.push(edge_from("m::two", "Wrap::target"));
        calls.push(edge_from("m::three", "a::b::target"));
        let big = KnowledgeView::new(vec![], calls);

        // A big batch mixing 50k hits + qualified hits + misses; the batched
        // stree result must equal the linear scan oracle for EVERY entry.
        let mut qnames: Vec<String> = (0..50_000).map(|i| format!("callee{i}")).collect();
        for s in ["target", "Wrap::target", "a::b::target", "absent_symbol", "new"] {
            qnames.push(s.to_string());
        }
        let qrefs: Vec<&str> = qnames.iter().map(String::as_str).collect();
        let big_batch = big.callers_of_batch(&qrefs);
        assert_eq!(big_batch.len(), qrefs.len());
        for (i, name) in qrefs.iter().enumerate() {
            // Full batch (all 50k) vs the per-name HashMap path — cheap (O(hits)
            // each) and itself proven == the linear scan on the small fixture.
            assert_eq!(
                ptrs_v(&big_batch[i]),
                ptrs(big.callers_of(name)),
                "large-fixture callers_of_batch[{name:?}] diverged from per-name callers_of",
            );
            // A sampled subset (every 500th + the qualified hits/miss tail) also
            // tied straight to the linear scan oracle — keeps the O(N²) oracle off
            // the hot path (low-power host) while still anchoring batch==linear.
            if i % 500 == 0 || i >= 50_000 {
                assert_eq!(
                    ptrs_v(&big_batch[i]),
                    ptrs(linear_callers_of(&big.calls, name)),
                    "large-fixture callers_of_batch[{name:?}] diverged from the linear scan",
                );
            }
        }
        // The multi-shape `target` query resolves its 3 edges through the batch.
        let ti = qrefs.iter().position(|&n| n == "target").unwrap();
        assert_eq!(big_batch[ti].len(), 3, "all 3 `target` callers via the batched stree");
        let ai = qrefs.iter().position(|&n| n == "absent_symbol").unwrap();
        assert!(big_batch[ai].is_empty(), "an absent key resolves to no edges");

        // STRUCTURAL: the big tree really has many distinct keys (deep tree),
        // the operational meaning of "scale" without running a heavy bench.
        assert!(
            big.index().callee_stree().sorted_hashes.len() > 10_000,
            "large fixture builds a deep multi-level stree (the no-bench scale stand-in)",
        );
    }
}