codemem_engine/index/
resolver.rs1use crate::index::symbol::{Reference, ReferenceKind, Symbol};
7use codemem_core::RelationshipType;
8use std::collections::{HashMap, HashSet};
9
10#[derive(Debug, Clone)]
12pub struct ResolvedEdge {
13 pub source_qualified_name: String,
15 pub target_qualified_name: String,
17 pub relationship: RelationshipType,
19 pub file_path: String,
21 pub line: usize,
23 pub resolution_confidence: f64,
25}
26
27#[derive(Debug, Clone)]
30pub struct UnresolvedRef {
31 pub source_node: String,
33 pub target_name: String,
35 pub package_hint: Option<String>,
37 pub ref_kind: String,
39 pub file_path: String,
41 pub line: usize,
43}
44
45#[derive(Debug)]
47pub struct ResolveResult {
48 pub edges: Vec<ResolvedEdge>,
49 pub unresolved: Vec<UnresolvedRef>,
50}
51
52pub struct ReferenceResolver {
54 symbol_index: HashMap<String, Symbol>,
56 name_index: HashMap<String, Vec<String>>,
58 file_imports: HashMap<String, HashSet<String>>,
60}
61
62impl ReferenceResolver {
63 pub fn new() -> Self {
65 Self {
66 symbol_index: HashMap::new(),
67 name_index: HashMap::new(),
68 file_imports: HashMap::new(),
69 }
70 }
71
72 pub fn add_symbols(&mut self, symbols: &[Symbol]) {
74 for sym in symbols {
75 self.symbol_index
76 .insert(sym.qualified_name.clone(), sym.clone());
77
78 self.name_index
79 .entry(sym.name.clone())
80 .or_default()
81 .push(sym.qualified_name.clone());
82 }
83 }
84
85 pub fn add_imports(&mut self, references: &[Reference]) {
87 for r in references {
88 if r.kind == ReferenceKind::Import {
89 self.file_imports
90 .entry(r.file_path.clone())
91 .or_default()
92 .insert(r.target_name.clone());
93 }
94 }
95 }
96
97 pub fn resolve_with_confidence(&self, reference: &Reference) -> Option<(&Symbol, f64)> {
105 if let Some(sym) = self.symbol_index.get(&reference.target_name) {
107 return Some((sym, 1.0));
108 }
109
110 if reference.target_name.starts_with("crate::") {
112 let stripped = &reference.target_name["crate::".len()..];
113 if let Some(sym) = self.symbol_index.get(stripped) {
114 return Some((sym, 0.95));
115 }
116 for (qn, sym) in &self.symbol_index {
118 if qn.ends_with(stripped) {
119 let prefix_len = qn.len() - stripped.len();
120 if prefix_len == 0 || qn[..prefix_len].ends_with("::") {
121 return Some((sym, 0.85));
122 }
123 }
124 }
125 }
126
127 if reference.target_name.contains("::") {
129 let with_crate = format!("crate::{}", reference.target_name);
130 if let Some(sym) = self.symbol_index.get(&with_crate) {
131 return Some((sym, 0.9));
132 }
133 for (qn, sym) in &self.symbol_index {
135 if qn.ends_with(&reference.target_name) {
136 let prefix_len = qn.len() - reference.target_name.len();
137 if prefix_len == 0 || qn[..prefix_len].ends_with("::") {
138 return Some((sym, 0.8));
139 }
140 }
141 }
142 }
143
144 let simple_name = reference
146 .target_name
147 .rsplit("::")
148 .next()
149 .unwrap_or(&reference.target_name);
150
151 if let Some(candidates) = self.name_index.get(simple_name) {
152 if candidates.len() == 1 {
153 let confidence = if simple_name == reference.target_name {
155 0.9 } else {
157 0.7 };
159 return self
160 .symbol_index
161 .get(&candidates[0])
162 .map(|s| (s, confidence));
163 }
164
165 let file_imports = self.file_imports.get(&reference.file_path);
167 let mut best: Option<(&Symbol, f64)> = None;
168
169 for qn in candidates {
170 if let Some(sym) = self.symbol_index.get(qn) {
171 let mut score: f64 = 0.0;
172
173 if let Some(imports) = file_imports {
175 if imports.contains(&sym.qualified_name)
176 || imports.iter().any(|imp| imp.ends_with(&sym.name))
177 {
178 score += 0.4;
179 }
180 }
181
182 if sym.file_path == reference.file_path {
184 score += 0.3;
185 }
186
187 if sym.name == reference.target_name {
189 score += 0.2;
190 }
191
192 let ref_module = extract_module_path(&reference.file_path);
194 let sym_module = extract_module_path(&sym.file_path);
195 if ref_module == sym_module {
196 score += 0.1;
197 }
198
199 if best.is_none() || score > best.unwrap().1 {
200 best = Some((sym, score));
201 }
202 }
203 }
204
205 if let Some((sym, score)) = best {
206 let confidence = 0.3 + (score.min(1.0) * 0.5);
208 return Some((sym, confidence));
209 }
210 }
211
212 None
213 }
214
215 fn resolve_edge(&self, r: &Reference) -> Option<ResolvedEdge> {
218 let (target, confidence) = self.resolve_with_confidence(r)?;
219 let relationship = match r.kind {
220 ReferenceKind::Call | ReferenceKind::Callback => RelationshipType::Calls,
221 ReferenceKind::Import => RelationshipType::Imports,
222 ReferenceKind::Inherits => RelationshipType::Inherits,
223 ReferenceKind::Implements => RelationshipType::Implements,
224 ReferenceKind::TypeUsage => RelationshipType::DependsOn,
225 };
226 let confidence = if r.kind == ReferenceKind::Callback {
228 confidence.min(0.6)
229 } else {
230 confidence
231 };
232 Some(ResolvedEdge {
233 source_qualified_name: r.source_qualified_name.clone(),
234 target_qualified_name: target.qualified_name.clone(),
235 relationship,
236 file_path: r.file_path.clone(),
237 line: r.line,
238 resolution_confidence: confidence,
239 })
240 }
241
242 pub fn resolve_all(&self, references: &[Reference]) -> Vec<ResolvedEdge> {
246 references
247 .iter()
248 .filter_map(|r| self.resolve_edge(r))
249 .collect()
250 }
251
252 pub fn resolve_all_with_unresolved(&self, references: &[Reference]) -> ResolveResult {
257 let mut edges = Vec::new();
258 let mut unresolved = Vec::new();
259
260 for r in references {
261 if let Some(edge) = self.resolve_edge(r) {
262 edges.push(edge);
263 } else {
264 let package_hint = extract_package_hint(&r.target_name, r.kind);
265 unresolved.push(UnresolvedRef {
266 source_node: r.source_qualified_name.clone(),
267 target_name: r.target_name.clone(),
268 package_hint,
269 ref_kind: r.kind.to_string(),
270 file_path: r.file_path.clone(),
271 line: r.line,
272 });
273 }
274 }
275
276 ResolveResult { edges, unresolved }
277 }
278}
279
280pub(crate) fn extract_package_hint(target_name: &str, kind: ReferenceKind) -> Option<String> {
292 if kind != ReferenceKind::Import {
294 return None;
295 }
296
297 if target_name.starts_with('.')
299 || target_name.starts_with("crate::")
300 || target_name.starts_with("super::")
301 || target_name.starts_with("self::")
302 {
303 return None;
304 }
305
306 if target_name.starts_with('@') {
308 let parts: Vec<&str> = target_name.splitn(3, '/').collect();
310 if parts.len() >= 2 {
311 return Some(format!("{}/{}", parts[0], parts[1]));
312 }
313 return Some(target_name.to_string());
314 }
315
316 if target_name.contains('/') {
321 let first_segment = target_name.split('/').next().unwrap_or("");
322 if is_go_module_domain(first_segment) {
323 return Some(target_name.to_string());
325 }
326 if !first_segment.is_empty() {
328 return Some(first_segment.to_string());
329 }
330 }
331
332 if target_name.contains("::") {
334 let first = target_name.split("::").next()?;
335 return Some(first.to_string());
336 }
337
338 if target_name.contains('.') {
340 let first = target_name.split('.').next()?;
341 return Some(first.to_string());
342 }
343
344 if is_python_stdlib(target_name) {
348 return None;
349 }
350 Some(target_name.to_string())
351}
352
353fn is_go_module_domain(segment: &str) -> bool {
356 matches!(
357 segment,
358 "github.com"
359 | "gitlab.com"
360 | "bitbucket.org"
361 | "golang.org"
362 | "google.golang.org"
363 | "gopkg.in"
364 | "go.uber.org"
365 | "go.etcd.io"
366 | "k8s.io"
367 | "sigs.k8s.io"
368 | "honnef.co"
369 | "mvdan.cc"
370 ) || (segment.contains('.')
371 && segment.rsplit('.').next().is_some_and(|tld| {
372 matches!(
373 tld,
374 "com" | "org" | "io" | "net" | "dev" | "in" | "cc" | "co"
375 )
376 }))
377}
378
379fn is_python_stdlib(name: &str) -> bool {
382 matches!(
383 name,
384 "os" | "sys"
385 | "re"
386 | "io"
387 | "json"
388 | "math"
389 | "time"
390 | "datetime"
391 | "collections"
392 | "itertools"
393 | "functools"
394 | "typing"
395 | "logging"
396 | "pathlib"
397 | "subprocess"
398 | "threading"
399 | "multiprocessing"
400 | "unittest"
401 | "copy"
402 | "abc"
403 | "enum"
404 | "dataclasses"
405 | "contextlib"
406 | "argparse"
407 | "hashlib"
408 | "hmac"
409 | "secrets"
410 | "socket"
411 | "http"
412 | "email"
413 | "html"
414 | "xml"
415 | "csv"
416 | "sqlite3"
417 | "pickle"
418 | "shelve"
419 | "marshal"
420 | "struct"
421 | "codecs"
422 | "string"
423 | "textwrap"
424 | "difflib"
425 | "pprint"
426 | "warnings"
427 | "traceback"
428 | "inspect"
429 | "dis"
430 | "ast"
431 | "token"
432 | "keyword"
433 | "linecache"
434 | "shutil"
435 | "tempfile"
436 | "glob"
437 | "fnmatch"
438 | "stat"
439 | "fileinput"
440 | "configparser"
441 | "signal"
442 | "errno"
443 | "ctypes"
444 | "types"
445 | "weakref"
446 | "array"
447 | "bisect"
448 | "heapq"
449 | "queue"
450 | "random"
451 | "statistics"
452 | "decimal"
453 | "fractions"
454 | "operator"
455 | "uuid"
456 | "base64"
457 | "binascii"
458 | "zlib"
459 | "gzip"
460 | "zipfile"
461 | "tarfile"
462 | "pdb"
463 | "profile"
464 | "cProfile"
465 | "timeit"
466 | "platform"
467 | "sysconfig"
468 | "builtins"
469 | "asyncio"
470 | "concurrent"
471 )
472}
473
474fn extract_module_path(file_path: &str) -> &str {
477 file_path.rsplit_once('/').map(|(dir, _)| dir).unwrap_or("")
478}
479
480impl Default for ReferenceResolver {
481 fn default() -> Self {
482 Self::new()
483 }
484}
485
486#[cfg(test)]
487#[path = "tests/resolver_tests.rs"]
488mod tests;