use std::collections::{HashMap, HashSet};
use rayon::prelude::*;
use crate::db::Database;
use crate::types::*;
const CROSS_FILE_BLOCKLIST: &[&str] = &[
"Result",
"Option",
"String",
"Vec",
"Box",
"Arc",
"Rc",
"Ok",
"Err",
"Some",
"None",
"fmt",
"format",
"display",
"to_string",
"clone",
"clone_from",
"default",
"from",
"into",
"try_from",
"try_into",
"new",
"build",
"builder",
"parse",
"from_str",
"eq",
"ne",
"cmp",
"partial_cmp",
"hash",
"next",
"iter",
"into_iter",
"drop",
"deref",
"deref_mut",
"as_ref",
"as_mut",
"borrow",
"borrow_mut",
"read",
"write",
"flush",
"close",
"len",
"is_empty",
"contains",
"push",
"pop",
"insert",
"remove",
"get",
"unwrap",
"expect",
"map",
"and_then",
"or_else",
"unwrap_or",
"assert",
"assert_eq",
"assert_ne",
"debug_assert",
"run",
"start",
"stop",
"init",
"setup",
"status",
"modified",
"output",
"exists",
"join",
"display",
"to_owned",
"collect",
"filter",
"find",
"take",
"skip",
"count",
"sum",
"max",
"min",
"sort",
"extend",
"chain",
"zip",
"enumerate",
"flatten",
"open",
"create",
"metadata",
"canonicalize",
"spawn",
"wait",
"send",
"recv",
"lock",
"try_lock",
];
fn lang_from_path(path: &str) -> &'static str {
match path.rsplit('.').next().unwrap_or("") {
"rs" => "rust",
"go" => "go",
"py" | "pyi" => "python",
"js" | "jsx" | "mjs" | "cjs" => "javascript",
"ts" | "tsx" | "mts" | "cts" => "typescript",
"java" => "java",
"kt" | "kts" => "kotlin",
"swift" => "swift",
"c" | "h" => "c",
"cpp" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => "cpp",
"cs" => "csharp",
"rb" => "ruby",
"php" => "php",
"scala" | "sc" => "scala",
"dart" => "dart",
"lua" => "lua",
"pl" | "pm" => "perl",
"sh" | "bash" => "bash",
"nix" => "nix",
"zig" => "zig",
"proto" => "proto",
_ => "unknown",
}
}
fn path_proximity(a: &str, b: &str) -> i64 {
let seg_a: Vec<&str> = a.split('/').collect();
let seg_b: Vec<&str> = b.split('/').collect();
let shared = seg_a
.iter()
.zip(seg_b.iter())
.take_while(|(x, y)| x == y)
.count();
(shared as i64 * 5).min(40)
}
pub struct ReferenceResolver<'a> {
#[allow(dead_code)]
db: &'a Database,
name_cache: HashMap<String, Vec<Node>>,
qualified_name_cache: HashMap<String, Vec<Node>>,
suffix_cache: HashMap<String, Vec<String>>,
known_names: HashSet<String>,
import_index: HashMap<String, HashSet<String>>,
}
impl<'a> ReferenceResolver<'a> {
pub fn from_nodes(db: &'a Database, all_nodes: &[Node]) -> Self {
let mut name_cache: HashMap<String, Vec<Node>> = HashMap::new();
let mut qualified_name_cache: HashMap<String, Vec<Node>> = HashMap::new();
let mut suffix_cache: HashMap<String, Vec<String>> = HashMap::new();
for node in all_nodes {
if node.kind == NodeKind::Use {
continue;
}
name_cache
.entry(node.name.clone())
.or_default()
.push(node.clone());
let qn = &node.qualified_name;
qualified_name_cache
.entry(qn.clone())
.or_default()
.push(node.clone());
let mut pos = 0;
while let Some(idx) = qn[pos..].find("::") {
let suffix = &qn[pos + idx + 2..];
if !suffix.is_empty() {
suffix_cache
.entry(suffix.to_string())
.or_default()
.push(qn.clone());
}
pos += idx + 2;
}
}
for entries in suffix_cache.values_mut() {
entries.sort_unstable();
entries.dedup();
}
let mut known_names: HashSet<String> = HashSet::new();
for key in name_cache.keys() {
known_names.insert(key.clone());
}
for key in qualified_name_cache.keys() {
known_names.insert(key.clone());
}
for key in suffix_cache.keys() {
known_names.insert(key.clone());
}
let mut import_index: HashMap<String, HashSet<String>> = HashMap::new();
for node in all_nodes {
if node.kind == NodeKind::Use {
let imported = node.name.rsplit("::").next().unwrap_or(&node.name);
if imported != "*" {
import_index
.entry(node.file_path.clone())
.or_default()
.insert(imported.to_string());
}
}
}
Self {
db,
name_cache,
qualified_name_cache,
suffix_cache,
known_names,
import_index,
}
}
pub fn resolve_one(&self, uref: &UnresolvedRef) -> Option<ResolvedRef> {
if uref.reference_kind == EdgeKind::Uses {
let name = &uref.reference_name;
if name.starts_with("std::")
|| name.starts_with("core::")
|| name.starts_with("alloc::")
|| name.starts_with("serde")
|| name.starts_with("tokio::")
|| name.starts_with("rayon::")
|| name.starts_with("clap::")
|| name.starts_with("glob::")
|| name.starts_with("libsql::")
|| name.starts_with("sha2::")
|| name.starts_with("tree_sitter::")
|| name.starts_with("serde_json::")
|| name.starts_with("toml::")
|| name.starts_with("tempfile::")
|| name.starts_with("dirs::")
|| name.starts_with("bincode::")
|| name.contains("::*")
{
return None;
}
}
if uref.reference_name.contains("::") {
if let Some(resolved) = self.try_qualified_match(uref) {
return Some(resolved);
}
let simple_name = uref
.reference_name
.rsplit("::")
.next()
.unwrap_or(&uref.reference_name);
if let Some(resolved) = self.try_exact_name_match_simple(uref, simple_name) {
return Some(resolved);
}
return None;
}
self.try_exact_name_match(uref)
}
fn is_known_name(&self, name: &str) -> bool {
self.known_names.contains(name)
}
pub fn resolve_all(&self, refs: &[UnresolvedRef]) -> ResolutionResult {
let total = refs.len();
let (candidates, hopeless): (Vec<_>, Vec<_>) = refs
.iter()
.partition(|uref| self.is_known_name(&uref.reference_name));
let results: Vec<_> = candidates
.par_iter()
.map(|uref| (*uref, self.resolve_one(uref)))
.collect();
let mut resolved = Vec::new();
let mut unresolved: Vec<UnresolvedRef> = hopeless.into_iter().cloned().collect();
for (uref, res) in results {
match res {
Some(r) if r.confidence >= 0.6 => resolved.push(r),
Some(_) => unresolved.push(uref.clone()), None => unresolved.push(uref.clone()),
}
}
let resolved_count = resolved.len();
ResolutionResult {
resolved,
unresolved,
total,
resolved_count,
}
}
pub fn create_edges(&self, resolved: &[ResolvedRef]) -> Vec<Edge> {
resolved
.iter()
.map(|r| Edge {
source: r.original.from_node_id.clone(),
target: r.target_node_id.clone(),
kind: r.original.reference_kind,
line: Some(r.original.line),
})
.collect()
}
fn try_qualified_match(&self, uref: &UnresolvedRef) -> Option<ResolvedRef> {
if let Some(candidates) = self.qualified_name_cache.get(&uref.reference_name) {
if let Some(node) = candidates.first() {
return Some(ResolvedRef {
original: uref.clone(),
target_node_id: node.id.clone(),
confidence: 0.95,
resolved_by: "qualified-match".to_string(),
});
}
}
if let Some(full_names) = self.suffix_cache.get(&uref.reference_name) {
for full_name in full_names {
if let Some(candidates) = self.qualified_name_cache.get(full_name) {
if let Some(node) = candidates.first() {
return Some(ResolvedRef {
original: uref.clone(),
target_node_id: node.id.clone(),
confidence: 0.95,
resolved_by: "qualified-match".to_string(),
});
}
}
}
}
None
}
fn try_exact_name_match(&self, uref: &UnresolvedRef) -> Option<ResolvedRef> {
if CROSS_FILE_BLOCKLIST.contains(&uref.reference_name.as_str()) {
let candidates = self.name_cache.get(&uref.reference_name)?;
let same_file: Vec<_> = candidates
.iter()
.filter(|n| n.file_path == uref.file_path)
.collect();
if same_file.len() == 1 {
return Some(ResolvedRef {
original: uref.clone(),
target_node_id: same_file[0].id.clone(),
confidence: 0.9,
resolved_by: "same-file-blocklist".to_string(),
});
}
return None;
}
let candidates = self.name_cache.get(&uref.reference_name)?;
if candidates.len() == 1 {
let ref_lang = lang_from_path(&uref.file_path);
let candidate_lang = lang_from_path(&candidates[0].file_path);
let confidence = if ref_lang != "unknown"
&& candidate_lang != "unknown"
&& ref_lang != candidate_lang
{
0.5
} else {
0.9
};
return Some(ResolvedRef {
original: uref.clone(),
target_node_id: candidates[0].id.clone(),
confidence,
resolved_by: "exact-match".to_string(),
});
}
let best = Self::find_best_match(uref, candidates, &self.import_index)?;
Some(ResolvedRef {
original: uref.clone(),
target_node_id: best.id.clone(),
confidence: 0.7,
resolved_by: "exact-match".to_string(),
})
}
fn try_exact_name_match_simple(
&self,
uref: &UnresolvedRef,
simple_name: &str,
) -> Option<ResolvedRef> {
if CROSS_FILE_BLOCKLIST.contains(&simple_name) {
let candidates = self.name_cache.get(simple_name)?;
let same_file: Vec<_> = candidates
.iter()
.filter(|n| n.file_path == uref.file_path)
.collect();
if same_file.len() == 1 {
return Some(ResolvedRef {
original: uref.clone(),
target_node_id: same_file[0].id.clone(),
confidence: 0.9,
resolved_by: "same-file-blocklist".to_string(),
});
}
return None;
}
let candidates = self.name_cache.get(simple_name)?;
if candidates.len() == 1 {
let ref_lang = lang_from_path(&uref.file_path);
let candidate_lang = lang_from_path(&candidates[0].file_path);
let confidence = if ref_lang != "unknown"
&& candidate_lang != "unknown"
&& ref_lang != candidate_lang
{
0.5
} else {
0.9
};
return Some(ResolvedRef {
original: uref.clone(),
target_node_id: candidates[0].id.clone(),
confidence,
resolved_by: "simple-name-match".to_string(),
});
}
let best = Self::find_best_match(uref, candidates, &self.import_index)?;
Some(ResolvedRef {
original: uref.clone(),
target_node_id: best.id.clone(),
confidence: 0.7,
resolved_by: "simple-name-match".to_string(),
})
}
fn find_best_match(
uref: &UnresolvedRef,
candidates: &[Node],
import_index: &HashMap<String, HashSet<String>>,
) -> Option<Node> {
if candidates.is_empty() {
return None;
}
let ref_lang = lang_from_path(&uref.file_path);
let mut best_score = i64::MIN;
let mut best_node: Option<&Node> = None;
for node in candidates {
let mut score: i64 = 0;
if node.file_path == uref.file_path {
score += 100;
let distance = node.start_line.abs_diff(uref.line);
let proximity = 20_i64.saturating_sub(i64::from(distance) / 10);
score += proximity.max(0);
} else {
score += path_proximity(&uref.file_path, &node.file_path);
}
let candidate_lang = lang_from_path(&node.file_path);
if ref_lang != "unknown" && candidate_lang != "unknown" {
if ref_lang == candidate_lang {
score += 50;
} else {
score -= 80;
}
}
if node.visibility == Visibility::Pub {
score += 10;
}
if uref.reference_kind == EdgeKind::Calls
&& matches!(
node.kind,
NodeKind::Function
| NodeKind::Method
| NodeKind::StructMethod
| NodeKind::Constructor
| NodeKind::AbstractMethod
)
{
score += 25;
}
if let Some(imports) = import_index.get(&uref.file_path) {
if imports.contains(&node.name) {
score += 30;
}
}
if score > best_score {
best_score = score;
best_node = Some(node);
}
}
best_node.cloned()
}
}