1use crate::index::symbol::{Reference, ReferenceKind, Symbol};
7use codemem_core::RelationshipType;
8use std::collections::{HashMap, HashSet};
9
10#[derive(Debug, Clone)]
12pub struct ResolvedEdge {
13 pub source_qualified_name: String,
15 pub target_qualified_name: String,
17 pub relationship: RelationshipType,
19 pub file_path: String,
21 pub line: usize,
23 pub resolution_confidence: f64,
25}
26
27#[derive(Debug, Clone)]
30pub struct UnresolvedRef {
31 pub source_node: String,
33 pub target_name: String,
35 pub package_hint: Option<String>,
37 pub ref_kind: String,
39 pub file_path: String,
41 pub line: usize,
43}
44
45#[derive(Debug)]
47pub struct ResolveResult {
48 pub edges: Vec<ResolvedEdge>,
49 pub unresolved: Vec<UnresolvedRef>,
50}
51
52pub struct ReferenceResolver {
54 symbol_index: HashMap<String, Symbol>,
56 name_index: HashMap<String, Vec<String>>,
58 file_imports: HashMap<String, HashSet<String>>,
60}
61
62impl ReferenceResolver {
63 pub fn new() -> Self {
65 Self {
66 symbol_index: HashMap::new(),
67 name_index: HashMap::new(),
68 file_imports: HashMap::new(),
69 }
70 }
71
72 pub fn add_symbols(&mut self, symbols: &[Symbol]) {
74 for sym in symbols {
75 self.symbol_index
76 .insert(sym.qualified_name.clone(), sym.clone());
77
78 self.name_index
79 .entry(sym.name.clone())
80 .or_default()
81 .push(sym.qualified_name.clone());
82 }
83 }
84
85 pub fn add_imports(&mut self, references: &[Reference]) {
87 for r in references {
88 if r.kind == ReferenceKind::Import {
89 self.file_imports
90 .entry(r.file_path.clone())
91 .or_default()
92 .insert(r.target_name.clone());
93 }
94 }
95 }
96
97 pub fn resolve_with_confidence(&self, reference: &Reference) -> Option<(&Symbol, f64)> {
105 if let Some(sym) = self.symbol_index.get(&reference.target_name) {
107 return Some((sym, 1.0));
108 }
109
110 if reference.target_name.starts_with("crate::") {
112 let stripped = &reference.target_name["crate::".len()..];
113 if let Some(sym) = self.symbol_index.get(stripped) {
114 return Some((sym, 0.95));
115 }
116 for (qn, sym) in &self.symbol_index {
118 if qn.ends_with(stripped) {
119 let prefix_len = qn.len() - stripped.len();
120 if prefix_len == 0 || qn[..prefix_len].ends_with("::") {
121 return Some((sym, 0.85));
122 }
123 }
124 }
125 }
126
127 if reference.target_name.contains("::") {
129 let with_crate = format!("crate::{}", reference.target_name);
130 if let Some(sym) = self.symbol_index.get(&with_crate) {
131 return Some((sym, 0.9));
132 }
133 for (qn, sym) in &self.symbol_index {
135 if qn.ends_with(&reference.target_name) {
136 let prefix_len = qn.len() - reference.target_name.len();
137 if prefix_len == 0 || qn[..prefix_len].ends_with("::") {
138 return Some((sym, 0.8));
139 }
140 }
141 }
142 }
143
144 let simple_name = reference
146 .target_name
147 .rsplit("::")
148 .next()
149 .unwrap_or(&reference.target_name);
150
151 if let Some(candidates) = self.name_index.get(simple_name) {
152 if candidates.len() == 1 {
153 let confidence = if simple_name == reference.target_name {
155 0.9 } else {
157 0.7 };
159 return self
160 .symbol_index
161 .get(&candidates[0])
162 .map(|s| (s, confidence));
163 }
164
165 let file_imports = self.file_imports.get(&reference.file_path);
167 let mut best: Option<(&Symbol, f64)> = None;
168
169 for qn in candidates {
170 if let Some(sym) = self.symbol_index.get(qn) {
171 let mut score: f64 = 0.0;
172
173 if let Some(imports) = file_imports {
175 if imports.contains(&sym.qualified_name)
176 || imports.iter().any(|imp| imp.ends_with(&sym.name))
177 {
178 score += 0.4;
179 }
180 }
181
182 if sym.file_path == reference.file_path {
184 score += 0.3;
185 }
186
187 if sym.name == reference.target_name {
189 score += 0.2;
190 }
191
192 let ref_module = extract_module_path(&reference.file_path);
194 let sym_module = extract_module_path(&sym.file_path);
195 if ref_module == sym_module {
196 score += 0.1;
197 }
198
199 if best.is_none() || score > best.unwrap().1 {
200 best = Some((sym, score));
201 }
202 }
203 }
204
205 if let Some((sym, score)) = best {
206 let confidence = 0.3 + (score.min(1.0) * 0.5);
208 return Some((sym, confidence));
209 }
210 }
211
212 None
213 }
214
215 pub fn resolve_all(&self, references: &[Reference]) -> Vec<ResolvedEdge> {
219 references
220 .iter()
221 .filter_map(|r| {
222 let (target, confidence) = self.resolve_with_confidence(r)?;
223 let relationship = match r.kind {
224 ReferenceKind::Call => RelationshipType::Calls,
225 ReferenceKind::Import => RelationshipType::Imports,
226 ReferenceKind::Inherits => RelationshipType::Inherits,
227 ReferenceKind::Implements => RelationshipType::Implements,
228 ReferenceKind::TypeUsage => RelationshipType::DependsOn,
229 };
230
231 Some(ResolvedEdge {
232 source_qualified_name: r.source_qualified_name.clone(),
233 target_qualified_name: target.qualified_name.clone(),
234 relationship,
235 file_path: r.file_path.clone(),
236 line: r.line,
237 resolution_confidence: confidence,
238 })
239 })
240 .collect()
241 }
242
243 pub fn resolve_all_with_unresolved(&self, references: &[Reference]) -> ResolveResult {
248 let mut edges = Vec::new();
249 let mut unresolved = Vec::new();
250
251 for r in references {
252 match self.resolve_with_confidence(r) {
253 Some((target, confidence)) => {
254 let relationship = match r.kind {
255 ReferenceKind::Call => RelationshipType::Calls,
256 ReferenceKind::Import => RelationshipType::Imports,
257 ReferenceKind::Inherits => RelationshipType::Inherits,
258 ReferenceKind::Implements => RelationshipType::Implements,
259 ReferenceKind::TypeUsage => RelationshipType::DependsOn,
260 };
261 edges.push(ResolvedEdge {
262 source_qualified_name: r.source_qualified_name.clone(),
263 target_qualified_name: target.qualified_name.clone(),
264 relationship,
265 file_path: r.file_path.clone(),
266 line: r.line,
267 resolution_confidence: confidence,
268 });
269 }
270 None => {
271 let package_hint = extract_package_hint(&r.target_name, r.kind);
272 unresolved.push(UnresolvedRef {
273 source_node: r.source_qualified_name.clone(),
274 target_name: r.target_name.clone(),
275 package_hint,
276 ref_kind: r.kind.to_string(),
277 file_path: r.file_path.clone(),
278 line: r.line,
279 });
280 }
281 }
282 }
283
284 ResolveResult { edges, unresolved }
285 }
286}
287
288pub(crate) fn extract_package_hint(target_name: &str, kind: ReferenceKind) -> Option<String> {
300 if kind != ReferenceKind::Import {
302 return None;
303 }
304
305 if target_name.starts_with('.')
307 || target_name.starts_with("crate::")
308 || target_name.starts_with("super::")
309 || target_name.starts_with("self::")
310 {
311 return None;
312 }
313
314 if target_name.starts_with('@') {
316 let parts: Vec<&str> = target_name.splitn(3, '/').collect();
318 if parts.len() >= 2 {
319 return Some(format!("{}/{}", parts[0], parts[1]));
320 }
321 return Some(target_name.to_string());
322 }
323
324 if target_name.contains('/') {
329 let first_segment = target_name.split('/').next().unwrap_or("");
330 if is_go_module_domain(first_segment) {
331 return Some(target_name.to_string());
333 }
334 if !first_segment.is_empty() {
336 return Some(first_segment.to_string());
337 }
338 }
339
340 if target_name.contains("::") {
342 let first = target_name.split("::").next()?;
343 return Some(first.to_string());
344 }
345
346 if target_name.contains('.') {
348 let first = target_name.split('.').next()?;
349 return Some(first.to_string());
350 }
351
352 if is_python_stdlib(target_name) {
356 return None;
357 }
358 Some(target_name.to_string())
359}
360
361fn is_go_module_domain(segment: &str) -> bool {
364 matches!(
365 segment,
366 "github.com"
367 | "gitlab.com"
368 | "bitbucket.org"
369 | "golang.org"
370 | "google.golang.org"
371 | "gopkg.in"
372 | "go.uber.org"
373 | "go.etcd.io"
374 | "k8s.io"
375 | "sigs.k8s.io"
376 | "honnef.co"
377 | "mvdan.cc"
378 ) || (segment.contains('.')
379 && segment.rsplit('.').next().is_some_and(|tld| {
380 matches!(
381 tld,
382 "com" | "org" | "io" | "net" | "dev" | "in" | "cc" | "co"
383 )
384 }))
385}
386
387fn is_python_stdlib(name: &str) -> bool {
390 matches!(
391 name,
392 "os" | "sys"
393 | "re"
394 | "io"
395 | "json"
396 | "math"
397 | "time"
398 | "datetime"
399 | "collections"
400 | "itertools"
401 | "functools"
402 | "typing"
403 | "logging"
404 | "pathlib"
405 | "subprocess"
406 | "threading"
407 | "multiprocessing"
408 | "unittest"
409 | "copy"
410 | "abc"
411 | "enum"
412 | "dataclasses"
413 | "contextlib"
414 | "argparse"
415 | "hashlib"
416 | "hmac"
417 | "secrets"
418 | "socket"
419 | "http"
420 | "email"
421 | "html"
422 | "xml"
423 | "csv"
424 | "sqlite3"
425 | "pickle"
426 | "shelve"
427 | "marshal"
428 | "struct"
429 | "codecs"
430 | "string"
431 | "textwrap"
432 | "difflib"
433 | "pprint"
434 | "warnings"
435 | "traceback"
436 | "inspect"
437 | "dis"
438 | "ast"
439 | "token"
440 | "keyword"
441 | "linecache"
442 | "shutil"
443 | "tempfile"
444 | "glob"
445 | "fnmatch"
446 | "stat"
447 | "fileinput"
448 | "configparser"
449 | "signal"
450 | "errno"
451 | "ctypes"
452 | "types"
453 | "weakref"
454 | "array"
455 | "bisect"
456 | "heapq"
457 | "queue"
458 | "random"
459 | "statistics"
460 | "decimal"
461 | "fractions"
462 | "operator"
463 | "uuid"
464 | "base64"
465 | "binascii"
466 | "zlib"
467 | "gzip"
468 | "zipfile"
469 | "tarfile"
470 | "pdb"
471 | "profile"
472 | "cProfile"
473 | "timeit"
474 | "platform"
475 | "sysconfig"
476 | "builtins"
477 | "asyncio"
478 | "concurrent"
479 )
480}
481
482fn extract_module_path(file_path: &str) -> &str {
485 file_path.rsplit_once('/').map(|(dir, _)| dir).unwrap_or("")
486}
487
488impl Default for ReferenceResolver {
489 fn default() -> Self {
490 Self::new()
491 }
492}
493
494#[cfg(test)]
495#[path = "tests/resolver_tests.rs"]
496mod tests;