use crate::index::symbol::{Reference, ReferenceKind, Symbol};
use codemem_core::RelationshipType;
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone)]
pub struct ResolvedEdge {
pub source_qualified_name: String,
pub target_qualified_name: String,
pub relationship: RelationshipType,
pub file_path: String,
pub line: usize,
pub resolution_confidence: f64,
}
#[derive(Debug, Clone)]
pub struct UnresolvedRef {
pub source_node: String,
pub target_name: String,
pub package_hint: Option<String>,
pub ref_kind: String,
pub file_path: String,
pub line: usize,
}
#[derive(Debug)]
pub struct ResolveResult {
pub edges: Vec<ResolvedEdge>,
pub unresolved: Vec<UnresolvedRef>,
}
pub struct ReferenceResolver {
symbol_index: HashMap<String, Symbol>,
name_index: HashMap<String, Vec<String>>,
file_imports: HashMap<String, HashSet<String>>,
}
impl ReferenceResolver {
pub fn new() -> Self {
Self {
symbol_index: HashMap::new(),
name_index: HashMap::new(),
file_imports: HashMap::new(),
}
}
pub fn add_symbols(&mut self, symbols: &[Symbol]) {
for sym in symbols {
self.symbol_index
.insert(sym.qualified_name.clone(), sym.clone());
self.name_index
.entry(sym.name.clone())
.or_default()
.push(sym.qualified_name.clone());
}
}
pub fn add_imports(&mut self, references: &[Reference]) {
for r in references {
if r.kind == ReferenceKind::Import {
self.file_imports
.entry(r.file_path.clone())
.or_default()
.insert(r.target_name.clone());
}
}
}
pub fn resolve_with_confidence(&self, reference: &Reference) -> Option<(&Symbol, f64)> {
if let Some(sym) = self.symbol_index.get(&reference.target_name) {
return Some((sym, 1.0));
}
if reference.target_name.starts_with("crate::") {
let stripped = &reference.target_name["crate::".len()..];
if let Some(sym) = self.symbol_index.get(stripped) {
return Some((sym, 0.95));
}
for (qn, sym) in &self.symbol_index {
if qn.ends_with(stripped) {
let prefix_len = qn.len() - stripped.len();
if prefix_len == 0 || qn[..prefix_len].ends_with("::") {
return Some((sym, 0.85));
}
}
}
}
if reference.target_name.contains("::") {
let with_crate = format!("crate::{}", reference.target_name);
if let Some(sym) = self.symbol_index.get(&with_crate) {
return Some((sym, 0.9));
}
for (qn, sym) in &self.symbol_index {
if qn.ends_with(&reference.target_name) {
let prefix_len = qn.len() - reference.target_name.len();
if prefix_len == 0 || qn[..prefix_len].ends_with("::") {
return Some((sym, 0.8));
}
}
}
}
let simple_name = reference
.target_name
.rsplit("::")
.next()
.unwrap_or(&reference.target_name);
if let Some(candidates) = self.name_index.get(simple_name) {
if candidates.len() == 1 {
let confidence = if simple_name == reference.target_name {
0.9 } else {
0.7 };
return self
.symbol_index
.get(&candidates[0])
.map(|s| (s, confidence));
}
let file_imports = self.file_imports.get(&reference.file_path);
let mut best: Option<(&Symbol, f64)> = None;
for qn in candidates {
if let Some(sym) = self.symbol_index.get(qn) {
let mut score: f64 = 0.0;
if let Some(imports) = file_imports {
if imports.contains(&sym.qualified_name)
|| imports.iter().any(|imp| imp.ends_with(&sym.name))
{
score += 0.4;
}
}
if sym.file_path == reference.file_path {
score += 0.3;
}
if sym.name == reference.target_name {
score += 0.2;
}
let ref_module = extract_module_path(&reference.file_path);
let sym_module = extract_module_path(&sym.file_path);
if ref_module == sym_module {
score += 0.1;
}
if best.is_none() || score > best.unwrap().1 {
best = Some((sym, score));
}
}
}
if let Some((sym, score)) = best {
let confidence = 0.3 + (score.min(1.0) * 0.5);
return Some((sym, confidence));
}
}
None
}
fn resolve_edge(&self, r: &Reference) -> Option<ResolvedEdge> {
let (target, confidence) = self.resolve_with_confidence(r)?;
let relationship = match r.kind {
ReferenceKind::Call | ReferenceKind::Callback => RelationshipType::Calls,
ReferenceKind::Import => RelationshipType::Imports,
ReferenceKind::Inherits => RelationshipType::Inherits,
ReferenceKind::Implements => RelationshipType::Implements,
ReferenceKind::TypeUsage => RelationshipType::DependsOn,
};
let confidence = if r.kind == ReferenceKind::Callback {
confidence.min(0.6)
} else {
confidence
};
Some(ResolvedEdge {
source_qualified_name: r.source_qualified_name.clone(),
target_qualified_name: target.qualified_name.clone(),
relationship,
file_path: r.file_path.clone(),
line: r.line,
resolution_confidence: confidence,
})
}
pub fn resolve_all(&self, references: &[Reference]) -> Vec<ResolvedEdge> {
references
.iter()
.filter_map(|r| self.resolve_edge(r))
.collect()
}
pub fn resolve_all_with_unresolved(&self, references: &[Reference]) -> ResolveResult {
let mut edges = Vec::new();
let mut unresolved = Vec::new();
for r in references {
if let Some(edge) = self.resolve_edge(r) {
edges.push(edge);
} else {
let package_hint = extract_package_hint(&r.target_name, r.kind);
unresolved.push(UnresolvedRef {
source_node: r.source_qualified_name.clone(),
target_name: r.target_name.clone(),
package_hint,
ref_kind: r.kind.to_string(),
file_path: r.file_path.clone(),
line: r.line,
});
}
}
ResolveResult { edges, unresolved }
}
}
pub(crate) fn extract_package_hint(target_name: &str, kind: ReferenceKind) -> Option<String> {
if kind != ReferenceKind::Import {
return None;
}
if target_name.starts_with('.')
|| target_name.starts_with("crate::")
|| target_name.starts_with("super::")
|| target_name.starts_with("self::")
{
return None;
}
if target_name.starts_with('@') {
let parts: Vec<&str> = target_name.splitn(3, '/').collect();
if parts.len() >= 2 {
return Some(format!("{}/{}", parts[0], parts[1]));
}
return Some(target_name.to_string());
}
if target_name.contains('/') {
let first_segment = target_name.split('/').next().unwrap_or("");
if is_go_module_domain(first_segment) {
return Some(target_name.to_string());
}
if !first_segment.is_empty() {
return Some(first_segment.to_string());
}
}
if target_name.contains("::") {
let first = target_name.split("::").next()?;
return Some(first.to_string());
}
if target_name.contains('.') {
let first = target_name.split('.').next()?;
return Some(first.to_string());
}
if is_python_stdlib(target_name) {
return None;
}
Some(target_name.to_string())
}
fn is_go_module_domain(segment: &str) -> bool {
matches!(
segment,
"github.com"
| "gitlab.com"
| "bitbucket.org"
| "golang.org"
| "google.golang.org"
| "gopkg.in"
| "go.uber.org"
| "go.etcd.io"
| "k8s.io"
| "sigs.k8s.io"
| "honnef.co"
| "mvdan.cc"
) || (segment.contains('.')
&& segment.rsplit('.').next().is_some_and(|tld| {
matches!(
tld,
"com" | "org" | "io" | "net" | "dev" | "in" | "cc" | "co"
)
}))
}
fn is_python_stdlib(name: &str) -> bool {
matches!(
name,
"os" | "sys"
| "re"
| "io"
| "json"
| "math"
| "time"
| "datetime"
| "collections"
| "itertools"
| "functools"
| "typing"
| "logging"
| "pathlib"
| "subprocess"
| "threading"
| "multiprocessing"
| "unittest"
| "copy"
| "abc"
| "enum"
| "dataclasses"
| "contextlib"
| "argparse"
| "hashlib"
| "hmac"
| "secrets"
| "socket"
| "http"
| "email"
| "html"
| "xml"
| "csv"
| "sqlite3"
| "pickle"
| "shelve"
| "marshal"
| "struct"
| "codecs"
| "string"
| "textwrap"
| "difflib"
| "pprint"
| "warnings"
| "traceback"
| "inspect"
| "dis"
| "ast"
| "token"
| "keyword"
| "linecache"
| "shutil"
| "tempfile"
| "glob"
| "fnmatch"
| "stat"
| "fileinput"
| "configparser"
| "signal"
| "errno"
| "ctypes"
| "types"
| "weakref"
| "array"
| "bisect"
| "heapq"
| "queue"
| "random"
| "statistics"
| "decimal"
| "fractions"
| "operator"
| "uuid"
| "base64"
| "binascii"
| "zlib"
| "gzip"
| "zipfile"
| "tarfile"
| "pdb"
| "profile"
| "cProfile"
| "timeit"
| "platform"
| "sysconfig"
| "builtins"
| "asyncio"
| "concurrent"
)
}
fn extract_module_path(file_path: &str) -> &str {
file_path.rsplit_once('/').map(|(dir, _)| dir).unwrap_or("")
}
impl Default for ReferenceResolver {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
#[path = "tests/resolver_tests.rs"]
mod tests;