nornir 0.4.54

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
//! RESOLVED knowledge map — rust-analyzer IN-PROCESS (task #24 relaunch).
//!
//! The [`super::scip`] sibling parses a SCIP index that a *separately invoked*
//! `rust-analyzer scip` subprocess produced. This module honors the relaunch's
//! HARD CONSTRAINT — **no shelling** — by linking rust-analyzer **as a library**:
//!
//! - [`ra_ap_load_cargo::load_workspace_at`] loads the cargo workspace into a
//!   `RootDatabase` (the same salsa DB the IDE runs on),
//! - [`ra_ap_ide::Analysis`] (the IDE query surface) pulls fully-RESOLVED
//!   definitions ([`Analysis::file_structure`]) and references
//!   ([`Analysis::find_all_refs`]) directly from memory — trait-method dispatch,
//!   macro-generated items and type-directed resolution included.
//!
//! No `index.scip` round-trip, no child process. The extracted rows reuse the
//! [`super::scip::ScipRow`]/[`super::scip::ScipScan`] data model and the same
//! `scip_occurrences` warehouse table, so the persistence + query halves are
//! shared with the SCIP-file path. SHA-keyed (historized), like that path.
//!
//! ## Resolved-symbol identity
//!
//! ra-ap's IDE API does not hand us a SCIP moniker string through this surface.
//! Instead we synthesize a **def-site key** — `file#Lline:Ccol` of the
//! definition's navigation range — and key every reference to the def-site that
//! [`Analysis::find_all_refs`] attributed it to. That makes find-usages an exact
//! identity match on the *resolved definition*, not a name match: two unrelated
//! `name`s resolve to two distinct def-site keys, never folded. (A future pass
//! can swap this for a real SCIP moniker via `ra_ap_ide::moniker`.)
//!
//! Gated behind the `ra-ingest` cargo feature; default builds never link `ra_ap_*`.

use std::path::Path;

use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use rayon::prelude::*;
use uuid::Uuid;

use ra_ap_ide::{Analysis, AnalysisHost, FileId};
use ra_ap_load_cargo::{load_workspace_at, LoadCargoConfig, ProcMacroServerChoice};
use ra_ap_project_model::CargoConfig;
use ra_ap_vfs::{Vfs, VfsPath};

use super::scip::{ScipRow, ScipScan};

/// A definition discovered in one file, with the position to query refs at.
struct DefSite {
    file_id: FileId,
    /// Byte offset of the definition's *name* (navigation range start).
    offset: ra_ap_ide::TextSize,
    label: String,
    kind: String,
    /// Workspace-relative file path of the definition.
    file: String,
    line: u32,
    col: u32,
}

/// Run rust-analyzer IN-PROCESS over the cargo workspace at `root`, extract the
/// RESOLVED defs + references, and return a [`ScipScan`] ready to persist.
///
/// `repo`/`git_sha` tag the snapshot (the SHA-key for historization). The heavy
/// work — loading the workspace into the salsa DB — happens once; the per-symbol
/// `find_all_refs` fan-out then runs in parallel via rayon.
pub fn ingest_in_process(
    root: &Path,
    repo: &str,
    git_sha: &str,
    snapshot_id: Uuid,
    ts: DateTime<Utc>,
) -> Result<ScipScan> {
    let cargo_config = CargoConfig::default();
    let load_config = LoadCargoConfig {
        load_out_dirs_from_check: true,
        with_proc_macro_server: ProcMacroServerChoice::Sysroot,
        prefill_caches: false,
        // Bound the in-process worker pools so the salsa DB load shares cores
        // with our own rayon ref fan-out rather than oversubscribing.
        num_worker_threads: 0,
        proc_macro_processes: 0,
    };

    // Load the workspace into a RootDatabase + Vfs — the in-process equivalent
    // of what `rust-analyzer scip` does before it emits, minus the subprocess.
    let (db, vfs, _proc_macro) = load_workspace_at(
        root,
        &cargo_config,
        &load_config,
        &|_progress| {},
    )
    .with_context(|| format!("loading cargo workspace at {} into rust-analyzer", root.display()))?;

    let host = AnalysisHost::with_database(db);
    let analysis = host.analysis();

    // The workspace files we care about: real on-disk `.rs` files under `root`.
    let root_str = root.to_string_lossy().replace('\\', "/");
    let files: Vec<(FileId, String)> = vfs
        .iter()
        .filter_map(|(file_id, path)| vfs_rel_path(path, &root_str).map(|p| (file_id, p)))
        .filter(|(_, p)| p.ends_with(".rs"))
        .collect();

    // 1. DEFINITIONS — `file_structure` gives every item's name position, fully
    //    resolved by ra-ap (it ran the IDE engine to build the DB). Collect the
    //    def-sites per file (sequential: the DB query layer is cheap here).
    let mut defs: Vec<DefSite> = Vec::new();
    for (file_id, file) in &files {
        let struct_cfg = ra_ap_ide::FileStructureConfig { exclude_locals: true };
        let Ok(nodes) = analysis.file_structure(&struct_cfg, *file_id) else { continue };
        let Ok(line_index) = analysis.file_line_index(*file_id) else { continue };
        for node in nodes {
            let off = node.navigation_range.start();
            let lc = line_index.line_col(off);
            defs.push(DefSite {
                file_id: *file_id,
                offset: off,
                label: node.label,
                kind: format!("{:?}", node.kind),
                file: file.clone(),
                line: lc.line + 1,
                col: lc.col + 1,
            });
        }
    }

    // 2. REFERENCES — fan out `find_all_refs` over the def-sites IN PARALLEL.
    //
    //    SPIKE FINDING: rust-analyzer's salsa `RootDatabase` (and thus
    //    `AnalysisHost` / `Analysis`) is `Send + !Sync` — its `ZalsaLocal` query
    //    stack is a `RefCell`. So we can NOT share one `Analysis` (or `&host`)
    //    across rayon workers (`map_init`/`map_with` need `Sync`/`Clone`, both
    //    absent). The salsa-correct parallel pattern: create ONE OWNED `Analysis`
    //    snapshot PER CHUNK on the main thread (each is cheap + `Send`), then move
    //    it into its rayon task. Snapshots share the immutable DB underneath, so
    //    this is real parallelism over the resolved query engine, not N reloads.
    let n_threads = rayon::current_num_threads().max(1);
    let chunk_len = defs.len().div_ceil(n_threads).max(1);
    // (chunk_of_defs, its_own_analysis) pairs — the analyses are made here,
    // single-threaded, then each moves into a worker.
    let work: Vec<(&[DefSite], Analysis)> = defs
        .chunks(chunk_len)
        .map(|chunk| (chunk, host.analysis()))
        .collect();
    let vfs_ref = &vfs;
    let root_ref = root_str.as_str();
    let rows: Vec<ScipRow> = work
        .into_par_iter()
        .flat_map_iter(move |(chunk, analysis)| {
            chunk
                .iter()
                .flat_map(|def| def_and_refs(&analysis, vfs_ref, root_ref, def))
                .collect::<Vec<_>>()
                .into_iter()
        })
        .collect();

    Ok(ScipScan {
        snapshot_id,
        ts,
        repo: repo.to_string(),
        git_sha: git_sha.to_string(),
        rows,
    })
}

/// Emit the definition row + every RESOLVED reference row for one def-site.
fn def_and_refs(
    analysis: &Analysis,
    vfs: &Vfs,
    root_str: &str,
    def: &DefSite,
) -> Vec<ScipRow> {
    // The resolved def-site key: every ref attributes back to THIS, exactly.
    let symbol = format!("{}#L{}:C{}", def.file, def.line, def.col);
    let mut out = Vec::new();

    out.push(ScipRow {
        symbol: symbol.clone(),
        role: "definition".to_string(),
        is_definition: true,
        display_name: def.label.clone(),
        kind: def.kind.clone(),
        file: def.file.clone(),
        start_line: def.line,
        start_col: def.col,
    });

    let pos = ra_ap_ide::FilePosition { file_id: def.file_id, offset: def.offset };
    // Resolved find-usages across the whole workspace (search_scope = None), no
    // test fixtures (we run on a real cargo workspace, not r-a's test harness).
    let refs_cfg = ra_ap_ide::FindAllRefsConfig {
        search_scope: None,
        ra_fixture: ra_ap_ide::RaFixtureConfig::default(),
        exclude_imports: false,
        exclude_tests: false,
    };
    let Ok(Some(refs)) = analysis.find_all_refs(pos, &refs_cfg) else {
        return out;
    };
    for res in refs {
        for (file_id, ranges) in res.references {
            let Some(path) = vfs_rel_path(vfs.file_path(file_id), root_str) else { continue };
            let Ok(line_index) = analysis.file_line_index(file_id) else { continue };
            for (range, category) in ranges {
                let lc = line_index.line_col(range.start());
                out.push(ScipRow {
                    symbol: symbol.clone(),
                    role: ref_category_label(category),
                    is_definition: false,
                    display_name: def.label.clone(),
                    kind: String::new(),
                    file: path.clone(),
                    start_line: lc.line + 1,
                    start_col: lc.col + 1,
                });
            }
        }
    }
    out
}

/// Map ra-ap's `ReferenceCategory` bitflags to the warehouse role label,
/// matching the SCIP-path vocabulary (`reference`/`write`/`read`/`import`/…).
fn ref_category_label(cat: ra_ap_ide::ReferenceCategory) -> String {
    use ra_ap_ide::ReferenceCategory as C;
    let mut labels: Vec<&str> = Vec::new();
    if cat.contains(C::WRITE) {
        labels.push("write");
    }
    if cat.contains(C::READ) {
        labels.push("read");
    }
    if cat.contains(C::IMPORT) {
        labels.push("import");
    }
    if cat.contains(C::TEST) {
        labels.push("test");
    }
    if labels.is_empty() {
        labels.push("reference");
    }
    labels.join("+")
}

/// Workspace-relative, forward-slashed path for a vfs entry under `root_str`,
/// or `None` for virtual / out-of-workspace files. Borrows the vfs string and
/// returns an owned relative slice (no intermediate allocation beyond the trim).
fn vfs_rel_path(path: &VfsPath, root_str: &str) -> Option<String> {
    let p = path.as_path()?;
    let s = p.to_string().replace('\\', "/");
    let rel = s.strip_prefix(root_str).unwrap_or(&s);
    Some(rel.trim_start_matches('/').to_string())
}