nornir 0.4.53

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
//! RESOLVED knowledge map — ingest of a `rust-analyzer scip` index (task #24).
//!
//! `rust-analyzer scip .` resolves *everything* the syntactic `syn` scan in
//! [`super::symbols`] cannot: trait-method dispatch, macro-generated items,
//! type-directed method resolution, re-exports and generic-impl expansion. It
//! emits a portable **SCIP** (`index.scip`) protobuf — a list of `Document`s,
//! each with `Occurrence`s (every defs/refs token, with a globally-unique
//! RESOLVED `symbol` moniker + a role bitset) and `SymbolInformation` (the
//! definitions, with kind + display name).
//!
//! This module reads that index (via the `scip` crate's protobuf types) into
//! flat [`ScipRow`]s and persists them to the `scip_occurrences` warehouse
//! table **SHA-keyed** — so the resolved map is HISTORIZED (which r-a never is),
//! giving find-usages that are both IDE-precise *and* time-travelable.
//!
//! Gated behind the `scip` cargo feature so default builds pay nothing.

use std::path::Path;

use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use scip::types::{symbol_information, Index, SymbolRole};
use uuid::Uuid;

/// One ingested SCIP occurrence — a single resolved defs/ref token.
#[derive(Debug, Clone, serde::Serialize, PartialEq, Eq)]
pub struct ScipRow {
    /// The RESOLVED SCIP symbol moniker (globally unique). For a local symbol
    /// it is `local N`; for a global it encodes scheme + package + descriptors.
    pub symbol: String,
    /// Decoded role label(s) — `definition`, `reference`, `import`, `write`,
    /// `read`, `generated`, `test` — joined by `+` when several bits are set.
    pub role: String,
    /// `true` when the `Definition` role bit is set (the defining occurrence).
    pub is_definition: bool,
    /// Short display name from the matching `SymbolInformation`, when present.
    pub display_name: String,
    /// SCIP `SymbolInformation.Kind` (`Function`, `Method`, `Struct`, `Trait`,
    /// …) when the symbol is defined in this index; `""` for refs to externals.
    pub kind: String,
    /// Document-relative source path (forward-slashed).
    pub file: String,
    /// 1-based line of the occurrence start (SCIP ranges are 0-based).
    pub start_line: u32,
    /// 1-based column of the occurrence start.
    pub start_col: u32,
}

/// A whole ingested SCIP index, ready to persist.
#[derive(Debug, Default)]
pub struct ScipScan {
    pub snapshot_id: Uuid,
    pub ts: DateTime<Utc>,
    pub repo: String,
    pub git_sha: String,
    pub rows: Vec<ScipRow>,
}

/// Decode the SCIP `symbol_roles` bitset into a human label.
fn decode_roles(bits: i32) -> (String, bool) {
    let mut labels = Vec::new();
    let is_def = bits & SymbolRole::Definition as i32 != 0;
    if is_def {
        labels.push("definition");
    }
    if bits & SymbolRole::Import as i32 != 0 {
        labels.push("import");
    }
    if bits & SymbolRole::WriteAccess as i32 != 0 {
        labels.push("write");
    }
    if bits & SymbolRole::ReadAccess as i32 != 0 {
        labels.push("read");
    }
    if bits & SymbolRole::Generated as i32 != 0 {
        labels.push("generated");
    }
    if bits & SymbolRole::Test as i32 != 0 {
        labels.push("test");
    }
    if labels.is_empty() {
        // No role bit set ⇒ a plain reference (the common case for usages).
        labels.push("reference");
    }
    (labels.join("+"), is_def)
}

/// SCIP `SymbolInformation.Kind` enum → a short string for the warehouse.
fn kind_label(kind: symbol_information::Kind) -> &'static str {
    use symbol_information::Kind::*;
    match kind {
        Function => "Function",
        Method => "Method",
        StaticMethod => "StaticMethod",
        Struct => "Struct",
        Trait => "Trait",
        TraitMethod => "TraitMethod",
        Enum => "Enum",
        EnumMember => "EnumMember",
        Field => "Field",
        Module => "Module",
        Macro => "Macro",
        TypeAlias => "TypeAlias",
        Constant => "Constant",
        Variable => "Variable",
        Parameter => "Parameter",
        TypeParameter => "TypeParameter",
        AssociatedType => "AssociatedType",
        SelfParameter => "SelfParameter",
        _ => "Other",
    }
}

/// Parse a SCIP index file (`index.scip`) into flat rows, tagged with `repo`
/// and `git_sha`. This is the BUILD-FREE half: it does not run rust-analyzer,
/// it only reads the protobuf the (separately invoked) `rust-analyzer scip`
/// produced — so the parse path is unit-testable against a hand-made index.
pub fn ingest_index_file(
    index_path: &Path,
    repo: &str,
    git_sha: &str,
    snapshot_id: Uuid,
    ts: DateTime<Utc>,
) -> Result<ScipScan> {
    let bytes = std::fs::read(index_path)
        .with_context(|| format!("reading SCIP index {}", index_path.display()))?;
    let index: Index = protobuf_parse(&bytes)?;
    Ok(ingest_index(index, repo, git_sha, snapshot_id, ts))
}

/// Decode the protobuf `Index` message from raw bytes.
fn protobuf_parse(bytes: &[u8]) -> Result<Index> {
    use protobuf::Message;
    Index::parse_from_bytes(bytes).context("parsing SCIP protobuf Index")
}

/// Map an in-memory [`Index`] to rows. Split out from file reading so the parse
/// → rows mapping is testable against a programmatically-built index.
pub fn ingest_index(
    index: Index,
    repo: &str,
    git_sha: &str,
    snapshot_id: Uuid,
    ts: DateTime<Utc>,
) -> ScipScan {
    let mut rows = Vec::new();
    for doc in &index.documents {
        // Build a per-document symbol → (kind, display_name) lookup from the
        // document's `SymbolInformation` (the definitions live here).
        let mut info: std::collections::HashMap<&str, (&'static str, &str)> =
            std::collections::HashMap::new();
        for si in &doc.symbols {
            let kind = si
                .kind
                .enum_value()
                .map(kind_label)
                .unwrap_or("Other");
            info.insert(si.symbol.as_str(), (kind, si.display_name.as_str()));
        }

        for occ in &doc.occurrences {
            // SCIP range is [startLine, startCol, endLine, endCol] or a
            // 3-element [startLine, startCol, endCol] when single-line.
            let (line, col) = match occ.range.as_slice() {
                [l, c, ..] => (*l, *c),
                _ => continue,
            };
            let (role, is_definition) = decode_roles(occ.symbol_roles);
            let (kind, display_name) = info
                .get(occ.symbol.as_str())
                .map(|(k, d)| ((*k).to_string(), (*d).to_string()))
                .unwrap_or_default();
            rows.push(ScipRow {
                symbol: occ.symbol.clone(),
                role,
                is_definition,
                display_name,
                kind,
                file: doc.relative_path.clone(),
                // SCIP positions are 0-based; present 1-based like syn rows.
                start_line: (line.max(0) as u32).saturating_add(1),
                start_col: (col.max(0) as u32).saturating_add(1),
            });
        }
    }
    ScipScan {
        snapshot_id,
        ts,
        repo: repo.to_string(),
        git_sha: git_sha.to_string(),
        rows,
    }
}

/// In-memory queries over an ingested SCIP scan — the RESOLVED counterpart to
/// [`super::query::KnowledgeView`]. Because every reference carries the *same*
/// resolved `symbol` moniker as its definition, find-usages is an exact symbol
/// match (no last-segment name heuristic, no cross-name collisions).
impl ScipScan {
    /// The defining occurrence(s) of every symbol whose display name or symbol
    /// moniker contains `pattern` (case-insensitive).
    pub fn definitions_matching<'a>(&'a self, pattern: &str) -> Vec<&'a ScipRow> {
        let p = pattern.to_lowercase();
        self.rows
            .iter()
            .filter(|r| r.is_definition)
            .filter(|r| {
                r.display_name.to_lowercase().contains(&p)
                    || r.symbol.to_lowercase().contains(&p)
            })
            .collect()
    }

    /// Resolve `pattern` to the unique symbol moniker(s) it *defines*. Used to
    /// turn a human name into the precise key for [`Self::usages_of`].
    pub fn resolve_symbols(&self, pattern: &str) -> Vec<String> {
        let mut syms: Vec<String> =
            self.definitions_matching(pattern).iter().map(|r| r.symbol.clone()).collect();
        syms.sort();
        syms.dedup();
        syms
    }

    /// Every *reference* occurrence of an exact resolved `symbol` — IDE-grade
    /// find-usages. Excludes the definition itself.
    pub fn usages_of<'a>(&'a self, symbol: &str) -> Vec<&'a ScipRow> {
        self.rows
            .iter()
            .filter(|r| r.symbol == symbol && !r.is_definition)
            .collect()
    }

    /// All occurrences (defs + refs) of an exact resolved `symbol`.
    pub fn occurrences_of<'a>(&'a self, symbol: &str) -> Vec<&'a ScipRow> {
        self.rows.iter().filter(|r| r.symbol == symbol).collect()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use scip::types::{Document, Occurrence, SymbolInformation};

    /// Build a tiny hand-made SCIP index with one trait, two impls of its
    /// method `name()`, and two call sites — the canonical case syn's
    /// name-match mis-attributes (it cannot tell which `name` is called).
    fn sample_index() -> Index {
        let mut idx = Index::new();
        let mut doc = Document::new();
        doc.relative_path = "src/lib.rs".into();

        // SymbolInformation: the trait method definition.
        let mut si = SymbolInformation::new();
        si.symbol = "rust-analyzer cargo demo 0.1.0 Greet#name().".into();
        si.display_name = "name".into();
        si.kind = symbol_information::Kind::TraitMethod.into();
        doc.symbols.push(si.clone());

        // A definition occurrence (role = Definition) for Greet::name.
        let mut def = Occurrence::new();
        def.range = vec![10, 4, 10, 8];
        def.symbol = si.symbol.clone();
        def.symbol_roles = SymbolRole::Definition as i32;
        doc.occurrences.push(def);

        // Two RESOLVED reference occurrences of the SAME trait method — what an
        // IDE find-usages returns. syn would only see the bare ident `name`.
        for line in [20, 30] {
            let mut r = Occurrence::new();
            r.range = vec![line, 8, line, 12];
            r.symbol = si.symbol.clone();
            r.symbol_roles = 0; // plain reference
            doc.occurrences.push(r);
        }

        // A DIFFERENT, unrelated `name` (a struct field) at the same bare ident
        // — syn's name match would wrongly fold this into the same bucket.
        let mut other = Occurrence::new();
        other.range = vec![40, 8, 40, 12];
        other.symbol = "rust-analyzer cargo demo 0.1.0 Config#name.".into();
        other.symbol_roles = SymbolRole::ReadAccess as i32;
        doc.occurrences.push(other);

        idx.documents.push(doc);
        idx
    }

    #[test]
    fn ingest_maps_roles_and_resolves_exact_symbol() {
        let scan = ingest_index(
            sample_index(),
            "demo",
            "deadbeef",
            Uuid::nil(),
            Utc::now(),
        );

        // 4 occurrences total (1 def + 2 refs of Greet::name + 1 Config.name).
        assert_eq!(scan.rows.len(), 4);

        // Resolve the human name "name" to its DEFINING moniker — there is
        // exactly one trait-method definition (Config#name. has no def occ here).
        let syms = scan.resolve_symbols("name");
        assert_eq!(syms, vec!["rust-analyzer cargo demo 0.1.0 Greet#name().".to_string()]);

        // find-usages on the resolved symbol returns EXACTLY the two true call
        // sites — and crucially NOT the unrelated `Config#name.` field access,
        // which a syn last-segment name match ("name") would wrongly include.
        let usages = scan.usages_of(&syms[0]);
        assert_eq!(usages.len(), 2);
        let lines: Vec<u32> = usages.iter().map(|r| r.start_line).collect();
        assert_eq!(lines, vec![21, 31]); // 0-based 20/30 → 1-based.

        // The definition occurrence carries the resolved kind + 1-based pos.
        let def: Vec<&ScipRow> = scan.rows.iter().filter(|r| r.is_definition).collect();
        assert_eq!(def.len(), 1);
        assert_eq!(def[0].kind, "TraitMethod");
        assert_eq!(def[0].role, "definition");
        assert_eq!(def[0].start_line, 11);
    }

    #[test]
    fn protobuf_roundtrip_through_bytes() {
        use protobuf::Message;
        let idx = sample_index();
        let bytes = idx.write_to_bytes().expect("encode");
        let back = protobuf_parse(&bytes).expect("decode");
        assert_eq!(back.documents.len(), 1);
        assert_eq!(back.documents[0].occurrences.len(), 4);
    }
}