use std::collections::{HashMap, HashSet};
use std::iter::once;
use crate::graph::types::{CodeGraph, Confidence, Edge, FileFacts, Provenance, RefRole, Symbol};
use crate::symbol::{Descriptor, SymbolId};
use super::{Resolver, enclosing_symbol_index, normalize_from_path};
#[derive(Debug, Default, Clone, Copy)]
pub struct ExternalResolver;
impl Resolver for ExternalResolver {
fn resolve(&self, files: &[FileFacts]) -> CodeGraph {
let symbols: Vec<Symbol> = files
.iter()
.flat_map(|f| f.symbols.iter().cloned())
.collect();
let mut by_file: HashMap<&str, Vec<usize>> = HashMap::new();
for (i, s) in symbols.iter().enumerate() {
by_file.entry(s.file.as_str()).or_default().push(i);
}
let mut known_names: HashSet<&str> = HashSet::new();
for s in &symbols {
if let Some(leaf) = s.id.leaf_name() {
known_names.insert(leaf);
}
}
let mut edges: Vec<Edge> = Vec::new();
for f in files {
let file_syms = by_file.get(f.file.as_str());
let lang = f.lang.as_str();
let mut import_map: HashMap<&str, &str> = HashMap::new();
for r in &f.references {
if r.role != RefRole::Import {
continue;
}
let Some(fp) = r.from_path.as_deref() else {
continue;
};
if fp.is_empty() {
continue;
}
import_map.entry(r.name.as_str()).or_insert(fp);
}
for r in &f.references {
if r.role != RefRole::Call {
continue;
}
let Some(from_idx) =
file_syms.and_then(|idxs| enclosing_symbol_index(&symbols, idxs, r.occ.byte))
else {
continue; };
if known_names.contains(r.name.as_str()) {
continue;
}
let Some(&from_path) = import_map.get(r.name.as_str()) else {
continue; };
let descriptors: Vec<Descriptor> = normalize_from_path(from_path)
.into_iter()
.map(|seg| Descriptor::Namespace(seg.to_owned()))
.chain(once(Descriptor::Term(r.name.clone())))
.collect();
let to = SymbolId::global(lang, descriptors);
edges.push(Edge {
from: symbols[from_idx].id.clone(),
to,
role: RefRole::Call,
confidence: Confidence::NameOnly,
provenance: Provenance::External,
occ: r.occ.clone(),
});
}
}
CodeGraph { symbols, edges }
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::extract::{Extractor, PythonExtractor, RustExtractor};
use crate::graph::types::{Occurrence, RefRole, Reference};
#[test]
fn python_import_backed_call_emits_external_edge() {
let file = PythonExtractor
.extract(
"from requests import get\n\ndef run():\n get()\n",
"src/client.py",
)
.unwrap();
let graph = ExternalResolver.resolve(&[file]);
let ext_edges: Vec<_> = graph
.edges
.iter()
.filter(|e| e.provenance == Provenance::External && e.role == RefRole::Call)
.collect();
assert_eq!(
ext_edges.len(),
1,
"expected exactly one External Call edge, got {}: {:?}",
ext_edges.len(),
ext_edges
.iter()
.map(|e| format!(
"{} → {} ({:?}/{:?})",
e.from.to_scip_string(),
e.to.to_scip_string(),
e.confidence,
e.provenance
))
.collect::<Vec<_>>()
);
let e = ext_edges[0];
assert!(
e.to.to_scip_string().ends_with("requests/get."),
"external target must end with `requests/get.`, got: {}",
e.to.to_scip_string()
);
assert_eq!(
e.confidence,
Confidence::NameOnly,
"external edge must be NameOnly, got {:?}",
e.confidence
);
assert_eq!(
e.provenance,
Provenance::External,
"provenance must be External, got {:?}",
e.provenance
);
assert!(
e.from.to_scip_string().ends_with("run().") || e.from.to_scip_string().contains("run"),
"edge `from` must be the `run` symbol, got: {}",
e.from.to_scip_string()
);
}
#[test]
fn non_import_backed_call_emits_nothing() {
let file = PythonExtractor
.extract("def run():\n mystery()\n", "src/client.py")
.unwrap();
let graph = ExternalResolver.resolve(&[file]);
let ext_edges: Vec<_> = graph
.edges
.iter()
.filter(|e| e.provenance == Provenance::External)
.collect();
assert!(
ext_edges.is_empty(),
"non-import-backed call must not produce an External edge; got {:?}",
ext_edges
.iter()
.map(|e| format!(
"{} → {} ({:?}/{:?})",
e.from.to_scip_string(),
e.to.to_scip_string(),
e.confidence,
e.provenance
))
.collect::<Vec<_>>()
);
}
#[test]
fn internally_defined_name_not_shadowed_by_external_edge() {
let lib = RustExtractor
.extract("pub fn helper() -> u32 { 1 }", "src/util.rs")
.unwrap();
let mut caller = RustExtractor
.extract("pub fn run() -> u32 { 0 }", "src/main.rs")
.unwrap();
caller.references.push(Reference {
name: "helper".to_owned(),
occ: Occurrence {
file: "src/main.rs".to_owned(),
line: 1,
col: 0,
byte: 0,
},
role: RefRole::Import,
source_module: None,
from_path: Some("util".to_owned()),
qualifier: None,
scope: None,
type_ref_ctx: None,
});
caller.references.push(Reference {
name: "helper".to_owned(),
occ: Occurrence {
file: "src/main.rs".to_owned(),
line: 1,
col: 22,
byte: 22,
},
role: RefRole::Call,
source_module: None,
from_path: None,
qualifier: None,
scope: None,
type_ref_ctx: None,
});
let graph = ExternalResolver.resolve(&[lib, caller]);
let helper_ext: Vec<_> = graph
.edges
.iter()
.filter(|e| {
e.provenance == Provenance::External && e.to.to_scip_string().contains("helper")
})
.collect();
assert!(
helper_ext.is_empty(),
"internally-defined `helper` must not produce an External edge; got {:?}",
helper_ext
.iter()
.map(|e| e.to.to_scip_string())
.collect::<Vec<_>>()
);
}
#[test]
fn rust_use_import_call_emits_external_edge() {
let mut file = RustExtractor
.extract("pub fn run() {}", "src/main.rs")
.unwrap();
file.references.push(Reference {
name: "from_str".to_owned(),
occ: Occurrence {
file: "src/main.rs".to_owned(),
line: 1,
col: 0,
byte: 0,
},
role: RefRole::Import,
source_module: None,
from_path: Some("serde_json".to_owned()),
qualifier: None,
scope: None,
type_ref_ctx: None,
});
let run_span_start = file
.symbols
.iter()
.find(|s| s.name == "run")
.expect("run symbol")
.span
.start;
file.references.push(Reference {
name: "from_str".to_owned(),
occ: Occurrence {
file: "src/main.rs".to_owned(),
line: 1,
col: 10,
byte: run_span_start,
},
role: RefRole::Call,
source_module: None,
from_path: None,
qualifier: None,
scope: None,
type_ref_ctx: None,
});
let graph = ExternalResolver.resolve(&[file]);
let ext_edges: Vec<_> = graph
.edges
.iter()
.filter(|e| e.provenance == Provenance::External && e.role == RefRole::Call)
.collect();
assert_eq!(
ext_edges.len(),
1,
"expected exactly one External Call edge for `from_str`, got {}: {:?}",
ext_edges.len(),
ext_edges
.iter()
.map(|e| format!("{} → {}", e.from.to_scip_string(), e.to.to_scip_string()))
.collect::<Vec<_>>()
);
assert!(
ext_edges[0]
.to
.to_scip_string()
.ends_with("serde_json/from_str."),
"external target must end with `serde_json/from_str.`, got: {}",
ext_edges[0].to.to_scip_string()
);
}
#[test]
fn deterministic_on_repeated_resolution() {
let file = PythonExtractor
.extract(
"from requests import get\n\ndef run():\n get()\n",
"src/client.py",
)
.unwrap();
let input = [file];
let g1 = ExternalResolver.resolve(&input);
let g2 = ExternalResolver.resolve(&input);
let scips1: Vec<_> = g1
.edges
.iter()
.map(|e| {
format!(
"{} → {} ({:?})",
e.from.to_scip_string(),
e.to.to_scip_string(),
e.role
)
})
.collect();
let scips2: Vec<_> = g2
.edges
.iter()
.map(|e| {
format!(
"{} → {} ({:?})",
e.from.to_scip_string(),
e.to.to_scip_string(),
e.role
)
})
.collect();
assert_eq!(
scips1, scips2,
"repeated resolution must yield identical edges"
);
}
}