use std::path::Path;
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use scip::types::{symbol_information, Index, SymbolRole};
use uuid::Uuid;
#[derive(Debug, Clone, serde::Serialize, PartialEq, Eq)]
pub struct ScipRow {
pub symbol: String,
pub role: String,
pub is_definition: bool,
pub display_name: String,
pub kind: String,
pub file: String,
pub start_line: u32,
pub start_col: u32,
pub enc_start_line: u32,
pub enc_end_line: u32,
}
#[derive(Debug, Default)]
pub struct ScipScan {
pub snapshot_id: Uuid,
pub ts: DateTime<Utc>,
pub repo: String,
pub git_sha: String,
pub rows: Vec<ScipRow>,
}
fn decode_roles(bits: i32) -> (String, bool) {
let mut labels = Vec::new();
let is_def = bits & SymbolRole::Definition as i32 != 0;
if is_def {
labels.push("definition");
}
if bits & SymbolRole::Import as i32 != 0 {
labels.push("import");
}
if bits & SymbolRole::WriteAccess as i32 != 0 {
labels.push("write");
}
if bits & SymbolRole::ReadAccess as i32 != 0 {
labels.push("read");
}
if bits & SymbolRole::Generated as i32 != 0 {
labels.push("generated");
}
if bits & SymbolRole::Test as i32 != 0 {
labels.push("test");
}
if labels.is_empty() {
labels.push("reference");
}
(labels.join("+"), is_def)
}
fn kind_label(kind: symbol_information::Kind) -> &'static str {
use symbol_information::Kind::*;
match kind {
Function => "Function",
Method => "Method",
StaticMethod => "StaticMethod",
Struct => "Struct",
Trait => "Trait",
TraitMethod => "TraitMethod",
Enum => "Enum",
EnumMember => "EnumMember",
Field => "Field",
Module => "Module",
Macro => "Macro",
TypeAlias => "TypeAlias",
Constant => "Constant",
Variable => "Variable",
Parameter => "Parameter",
TypeParameter => "TypeParameter",
AssociatedType => "AssociatedType",
SelfParameter => "SelfParameter",
_ => "Other",
}
}
pub fn ingest_index_file(
index_path: &Path,
repo: &str,
git_sha: &str,
snapshot_id: Uuid,
ts: DateTime<Utc>,
) -> Result<ScipScan> {
let bytes = std::fs::read(index_path)
.with_context(|| format!("reading SCIP index {}", index_path.display()))?;
let index: Index = protobuf_parse(&bytes)?;
Ok(ingest_index(index, repo, git_sha, snapshot_id, ts))
}
pub fn ingest_via_rust_analyzer(
repo_root: &Path,
repo: &str,
git_sha: &str,
snapshot_id: Uuid,
ts: DateTime<Utc>,
) -> Result<ScipScan> {
let out = std::env::temp_dir().join(format!("nornir-scip-{snapshot_id}.scip"));
let status = std::process::Command::new("rust-analyzer")
.arg("scip")
.arg(repo_root)
.arg("--output")
.arg(&out)
.status()
.context("spawning rust-analyzer (install: `rustup component add rust-analyzer`)")?;
if !status.success() {
let _ = std::fs::remove_file(&out);
anyhow::bail!("rust-analyzer scip failed ({status})");
}
let scan = ingest_index_file(&out, repo, git_sha, snapshot_id, ts);
let _ = std::fs::remove_file(&out);
scan
}
fn protobuf_parse(bytes: &[u8]) -> Result<Index> {
use protobuf::Message;
Index::parse_from_bytes(bytes).context("parsing SCIP protobuf Index")
}
pub fn ingest_index(
index: Index,
repo: &str,
git_sha: &str,
snapshot_id: Uuid,
ts: DateTime<Utc>,
) -> ScipScan {
let mut rows = Vec::new();
for doc in &index.documents {
let mut info: std::collections::HashMap<&str, (&'static str, &str)> =
std::collections::HashMap::new();
for si in &doc.symbols {
let kind = si
.kind
.enum_value()
.map(kind_label)
.unwrap_or("Other");
info.insert(si.symbol.as_str(), (kind, si.display_name.as_str()));
}
for occ in &doc.occurrences {
let (line, col) = match occ.range.as_slice() {
[l, c, ..] => (*l, *c),
_ => continue,
};
let (role, is_definition) = decode_roles(occ.symbol_roles);
let (kind, display_name) = info
.get(occ.symbol.as_str())
.map(|(k, d)| ((*k).to_string(), (*d).to_string()))
.unwrap_or_default();
let (enc_start_line, enc_end_line) = if is_definition
&& matches!(kind.as_str(), "Function" | "Method" | "StaticMethod" | "TraitMethod")
{
match occ.enclosing_range.as_slice() {
[sl, _sc, el, _ec, ..] => (
(*sl).max(0) as u32 + 1,
(*el).max(0) as u32 + 1,
),
[sl, _sc, _ec] => {
let l = (*sl).max(0) as u32 + 1;
(l, l)
}
_ => (0, 0),
}
} else {
(0, 0)
};
rows.push(ScipRow {
symbol: occ.symbol.clone(),
role,
is_definition,
display_name,
kind,
file: doc.relative_path.clone(),
start_line: (line.max(0) as u32).saturating_add(1),
start_col: (col.max(0) as u32).saturating_add(1),
enc_start_line,
enc_end_line,
});
}
}
ScipScan {
snapshot_id,
ts,
repo: repo.to_string(),
git_sha: git_sha.to_string(),
rows,
}
}
impl ScipScan {
pub fn definitions_matching<'a>(&'a self, pattern: &str) -> Vec<&'a ScipRow> {
let p = pattern.to_lowercase();
self.rows
.iter()
.filter(|r| r.is_definition)
.filter(|r| {
r.display_name.to_lowercase().contains(&p)
|| r.symbol.to_lowercase().contains(&p)
})
.collect()
}
pub fn resolve_symbols(&self, pattern: &str) -> Vec<String> {
let mut syms: Vec<String> =
self.definitions_matching(pattern).iter().map(|r| r.symbol.clone()).collect();
syms.sort();
syms.dedup();
syms
}
pub fn usages_of<'a>(&'a self, symbol: &str) -> Vec<&'a ScipRow> {
self.rows
.iter()
.filter(|r| r.symbol == symbol && !r.is_definition)
.collect()
}
pub fn occurrences_of<'a>(&'a self, symbol: &str) -> Vec<&'a ScipRow> {
self.rows.iter().filter(|r| r.symbol == symbol).collect()
}
}
#[cfg(feature = "scip")]
pub fn scip_call_edges(scan: &ScipScan) -> Vec<super::symbols::CallEdgeRow> {
scip_call_edges_with(scan, &std::collections::HashMap::new())
}
#[cfg(feature = "scip")]
fn is_callable_kind(kind: &str) -> bool {
matches!(kind, "Function" | "Method" | "StaticMethod" | "TraitMethod")
}
#[cfg(feature = "scip")]
pub fn global_symbol_table(
scans: &[&ScipScan],
) -> std::collections::HashMap<String, (String, String)> {
let mut map: std::collections::HashMap<String, (String, String)> =
std::collections::HashMap::new();
for scan in scans {
for r in &scan.rows {
if r.is_definition && is_callable_kind(&r.kind) {
let name = if r.display_name.is_empty() {
r.symbol.clone()
} else {
r.display_name.clone()
};
map.entry(r.symbol.clone()).or_insert((r.kind.clone(), name));
}
}
}
map
}
#[cfg(feature = "scip")]
pub fn scip_call_edges_with(
scan: &ScipScan,
globals: &std::collections::HashMap<String, (String, String)>,
) -> Vec<super::symbols::CallEdgeRow> {
use super::symbols::CallEdgeRow;
fn is_callable(kind: &str) -> bool {
is_callable_kind(kind)
}
struct Def<'a> {
start: u32,
end: u32,
name: &'a str,
symbol: &'a str,
}
let mut defs_by_file: std::collections::HashMap<&str, Vec<Def<'_>>> =
std::collections::HashMap::new();
for r in &scan.rows {
if r.is_definition
&& is_callable(&r.kind)
&& r.enc_start_line != 0
&& r.enc_end_line >= r.enc_start_line
{
let name = if r.display_name.is_empty() { r.symbol.as_str() } else { r.display_name.as_str() };
defs_by_file.entry(r.file.as_str()).or_default().push(Def {
start: r.enc_start_line,
end: r.enc_end_line,
name,
symbol: r.symbol.as_str(),
});
}
}
let mut sym_kind: std::collections::HashMap<&str, (&str, &str)> = std::collections::HashMap::new();
for r in &scan.rows {
if r.is_definition && is_callable(&r.kind) {
let name = if r.display_name.is_empty() { r.symbol.as_str() } else { r.display_name.as_str() };
sym_kind.entry(r.symbol.as_str()).or_insert((r.kind.as_str(), name));
}
}
let mut edges = Vec::new();
for r in &scan.rows {
if r.is_definition {
continue;
}
let local = sym_kind.get(r.symbol.as_str()).map(|(_k, name)| *name);
let global = globals.get(r.symbol.as_str()).map(|(_k, name)| name.as_str());
let (callee_name, is_call) = match local.or(global) {
Some(name) => (name, true),
None => {
(r.display_name.as_str(), !r.display_name.is_empty())
}
};
if !is_call {
continue;
}
let Some(defs) = defs_by_file.get(r.file.as_str()) else { continue };
let mut best: Option<&Def<'_>> = None;
for d in defs {
if d.start <= r.start_line && r.start_line <= d.end {
let span = d.end - d.start;
if best.map(|b| span < (b.end - b.start)).unwrap_or(true) {
best = Some(d);
}
}
}
let Some(caller) = best else { continue };
if caller.symbol == r.symbol && r.start_line == caller.start {
continue;
}
edges.push(CallEdgeRow {
crate_name: String::new(),
caller_path: caller.name.to_string(),
callee_ident: callee_name.to_string(),
call_kind: "call".to_string(),
file: r.file.clone(),
line: r.start_line,
});
}
edges
}
#[cfg(test)]
mod tests {
use super::*;
use scip::types::{Document, Occurrence, SymbolInformation};
fn sample_index() -> Index {
let mut idx = Index::new();
let mut doc = Document::new();
doc.relative_path = "src/lib.rs".into();
let mut si = SymbolInformation::new();
si.symbol = "rust-analyzer cargo demo 0.1.0 Greet#name().".into();
si.display_name = "name".into();
si.kind = symbol_information::Kind::TraitMethod.into();
doc.symbols.push(si.clone());
let mut def = Occurrence::new();
def.range = vec![10, 4, 10, 8];
def.symbol = si.symbol.clone();
def.symbol_roles = SymbolRole::Definition as i32;
doc.occurrences.push(def);
for line in [20, 30] {
let mut r = Occurrence::new();
r.range = vec![line, 8, line, 12];
r.symbol = si.symbol.clone();
r.symbol_roles = 0; doc.occurrences.push(r);
}
let mut other = Occurrence::new();
other.range = vec![40, 8, 40, 12];
other.symbol = "rust-analyzer cargo demo 0.1.0 Config#name.".into();
other.symbol_roles = SymbolRole::ReadAccess as i32;
doc.occurrences.push(other);
idx.documents.push(doc);
idx
}
#[test]
fn ingest_maps_roles_and_resolves_exact_symbol() {
let scan = ingest_index(
sample_index(),
"demo",
"deadbeef",
Uuid::nil(),
Utc::now(),
);
assert_eq!(scan.rows.len(), 4);
let syms = scan.resolve_symbols("name");
assert_eq!(syms, vec!["rust-analyzer cargo demo 0.1.0 Greet#name().".to_string()]);
let usages = scan.usages_of(&syms[0]);
assert_eq!(usages.len(), 2);
let lines: Vec<u32> = usages.iter().map(|r| r.start_line).collect();
assert_eq!(lines, vec![21, 31]);
let def: Vec<&ScipRow> = scan.rows.iter().filter(|r| r.is_definition).collect();
assert_eq!(def.len(), 1);
assert_eq!(def[0].kind, "TraitMethod");
assert_eq!(def[0].role, "definition");
assert_eq!(def[0].start_line, 11);
}
#[cfg(feature = "scip")]
fn bin_scan() -> ScipScan {
let mut idx = Index::new();
let mut doc = Document::new();
doc.relative_path = "src/main.rs".into();
let mut main_si = SymbolInformation::new();
main_si.symbol = "rust-analyzer cargo demo_bin 0.1.0 main().".into();
main_si.display_name = "main".into();
main_si.kind = symbol_information::Kind::Function.into();
doc.symbols.push(main_si.clone());
let mut main_def = Occurrence::new();
main_def.range = vec![10, 3, 10, 7];
main_def.enclosing_range = vec![10, 0, 20, 1];
main_def.symbol = main_si.symbol.clone();
main_def.symbol_roles = SymbolRole::Definition as i32;
doc.occurrences.push(main_def);
let mut ref_helper = Occurrence::new();
ref_helper.range = vec![13, 8, 13, 14];
ref_helper.symbol = "rust-analyzer cargo demo_lib 0.1.0 helper().".into();
ref_helper.symbol_roles = 0; doc.occurrences.push(ref_helper);
idx.documents.push(doc);
ingest_index(idx, "demo_bin", "binsha", Uuid::nil(), Utc::now())
}
#[cfg(feature = "scip")]
fn lib_scan() -> ScipScan {
let mut idx = Index::new();
let mut doc = Document::new();
doc.relative_path = "src/lib.rs".into();
let mut helper_si = SymbolInformation::new();
helper_si.symbol = "rust-analyzer cargo demo_lib 0.1.0 helper().".into();
helper_si.display_name = "helper".into();
helper_si.kind = symbol_information::Kind::Function.into();
doc.symbols.push(helper_si.clone());
let mut helper_def = Occurrence::new();
helper_def.range = vec![5, 7, 5, 13];
helper_def.enclosing_range = vec![5, 0, 9, 1];
helper_def.symbol = helper_si.symbol.clone();
helper_def.symbol_roles = SymbolRole::Definition as i32;
doc.occurrences.push(helper_def);
idx.documents.push(doc);
ingest_index(idx, "demo_lib", "libsha", Uuid::nil(), Utc::now())
}
#[cfg(feature = "scip")]
#[test]
fn cross_binary_call_resolves_via_moniker_join() {
let bin = bin_scan();
let lib = lib_scan();
let solo = scip_call_edges(&bin);
assert!(
!solo.iter().any(|e| e.callee_ident == "helper"),
"single-scan build must NOT resolve the cross-binary call (it falls \
back to syn); got {solo:?}"
);
let globals = global_symbol_table(&[&bin, &lib]);
let joined = scip_call_edges_with(&bin, &globals);
let helper_edges: Vec<_> =
joined.iter().filter(|e| e.callee_ident == "helper").collect();
assert_eq!(
helper_edges.len(),
1,
"cross-binary call must resolve via moniker join; got {joined:?}"
);
assert_eq!(helper_edges[0].caller_path, "main", "caller by containment");
assert_eq!(helper_edges[0].line, 14, "ref at 0-based 13 → 1-based 14");
}
#[test]
fn protobuf_roundtrip_through_bytes() {
use protobuf::Message;
let idx = sample_index();
let bytes = idx.write_to_bytes().expect("encode");
let back = protobuf_parse(&bytes).expect("decode");
assert_eq!(back.documents.len(), 1);
assert_eq!(back.documents[0].occurrences.len(), 4);
}
fn wiring_index() -> Index {
let mut idx = Index::new();
let mut doc = Document::new();
doc.relative_path = "src/lib.rs".into();
let mut outer_si = SymbolInformation::new();
outer_si.symbol = "rust-analyzer cargo demo 0.1.0 outer().".into();
outer_si.display_name = "outer".into();
outer_si.kind = symbol_information::Kind::Function.into();
doc.symbols.push(outer_si.clone());
let mut outer_def = Occurrence::new();
outer_def.range = vec![10, 3, 10, 8];
outer_def.enclosing_range = vec![10, 0, 20, 1]; outer_def.symbol = outer_si.symbol.clone();
outer_def.symbol_roles = SymbolRole::Definition as i32;
doc.occurrences.push(outer_def);
let mut inner_si = SymbolInformation::new();
inner_si.symbol = "rust-analyzer cargo demo 0.1.0 inner().".into();
inner_si.display_name = "inner".into();
inner_si.kind = symbol_information::Kind::Function.into();
doc.symbols.push(inner_si.clone());
let mut inner_def = Occurrence::new();
inner_def.range = vec![30, 3, 30, 8];
inner_def.enclosing_range = vec![30, 0, 34, 1];
inner_def.symbol = inner_si.symbol.clone();
inner_def.symbol_roles = SymbolRole::Definition as i32;
doc.occurrences.push(inner_def);
let mut greet_si = SymbolInformation::new();
greet_si.symbol = "rust-analyzer cargo demo 0.1.0 Greet#name().".into();
greet_si.display_name = "name".into();
greet_si.kind = symbol_information::Kind::Method.into();
doc.symbols.push(greet_si.clone());
let mut greet_def = Occurrence::new();
greet_def.range = vec![40, 7, 40, 11];
greet_def.enclosing_range = vec![40, 0, 44, 1];
greet_def.symbol = greet_si.symbol.clone();
greet_def.symbol_roles = SymbolRole::Definition as i32;
doc.occurrences.push(greet_def);
let mut cfg_si = SymbolInformation::new();
cfg_si.symbol = "rust-analyzer cargo demo 0.1.0 Config#name().".into();
cfg_si.display_name = "name".into();
cfg_si.kind = symbol_information::Kind::Method.into();
doc.symbols.push(cfg_si.clone());
let mut cfg_def = Occurrence::new();
cfg_def.range = vec![50, 7, 50, 11];
cfg_def.enclosing_range = vec![50, 0, 54, 1];
cfg_def.symbol = cfg_si.symbol.clone();
cfg_def.symbol_roles = SymbolRole::Definition as i32;
doc.occurrences.push(cfg_def);
let mut ref_inner = Occurrence::new();
ref_inner.range = vec![12, 8, 12, 13];
ref_inner.symbol = inner_si.symbol.clone();
ref_inner.symbol_roles = 0; doc.occurrences.push(ref_inner);
let mut ref_greet = Occurrence::new();
ref_greet.range = vec![15, 8, 15, 12];
ref_greet.symbol = greet_si.symbol.clone();
ref_greet.symbol_roles = 0;
doc.occurrences.push(ref_greet);
idx.documents.push(doc);
idx
}
#[test]
fn scip_call_edges_resolve_caller_by_containment() {
let scan = ingest_index(wiring_index(), "demo", "deadbeef", Uuid::nil(), Utc::now());
let outer_def = scan
.rows
.iter()
.find(|r| r.is_definition && r.display_name == "outer")
.expect("outer def present");
assert_eq!((outer_def.enc_start_line, outer_def.enc_end_line), (11, 21));
let edges = scip_call_edges(&scan);
assert_eq!(edges.len(), 2, "{edges:?}");
for e in &edges {
assert_eq!(e.caller_path, "outer", "caller resolved by containment");
}
assert!(
edges.iter().any(|e| e.caller_path == "outer" && e.callee_ident == "inner"),
"expected resolved edge outer → inner: {edges:?}"
);
let name_edges: Vec<_> = edges.iter().filter(|e| e.callee_ident == "name").collect();
assert_eq!(name_edges.len(), 1, "exactly one resolved `name` call: {edges:?}");
assert_eq!(name_edges[0].caller_path, "outer");
assert_eq!(name_edges[0].line, 16, "ref at 0-based line 15 → 1-based 16");
assert!(
!edges.iter().any(|e| e.caller_path == "Config" || e.caller_path == "name"),
"no edge mis-attributed to a name-collision peer: {edges:?}"
);
}
}