Skip to main content

agentic_codebase/semantic/
resolver.rs

1//! Cross-file symbol resolution.
2//!
3//! Builds a symbol table from raw code units and resolves references:
4//! local names, imported symbols, and external library references.
5
6use std::collections::{HashMap, HashSet};
7
8use crate::parse::{RawCodeUnit, RawReference, ReferenceKind};
9use crate::types::{AcbResult, CodeUnitType, Language};
10
11/// A hierarchical symbol table for name resolution.
12#[derive(Debug)]
13pub struct SymbolTable {
14    /// Qualified name → temp_id mapping.
15    symbol_map: HashMap<String, u64>,
16    /// Simple name → vec of temp_ids (handles overloading/shadowing).
17    name_map: HashMap<String, Vec<u64>>,
18    /// File path → vec of temp_ids.
19    file_map: HashMap<String, Vec<u64>>,
20    /// temp_id → qualified_name.
21    id_to_qname: HashMap<u64, String>,
22    /// Import target name → unit that imports it.
23    import_targets: HashMap<String, Vec<u64>>,
24}
25
26impl SymbolTable {
27    /// Create an empty symbol table.
28    pub fn new() -> Self {
29        Self {
30            symbol_map: HashMap::new(),
31            name_map: HashMap::new(),
32            file_map: HashMap::new(),
33            id_to_qname: HashMap::new(),
34            import_targets: HashMap::new(),
35        }
36    }
37
38    /// Build symbol table from raw units.
39    pub fn build(units: &[RawCodeUnit]) -> AcbResult<Self> {
40        let mut table = Self::new();
41
42        for unit in units {
43            // Register by qualified name
44            table
45                .symbol_map
46                .insert(unit.qualified_name.clone(), unit.temp_id);
47            table
48                .id_to_qname
49                .insert(unit.temp_id, unit.qualified_name.clone());
50
51            // Register by simple name
52            table
53                .name_map
54                .entry(unit.name.clone())
55                .or_default()
56                .push(unit.temp_id);
57
58            // Register by file
59            let file_key = unit.file_path.to_string_lossy().to_string();
60            table
61                .file_map
62                .entry(file_key)
63                .or_default()
64                .push(unit.temp_id);
65
66            // Track import targets
67            if unit.unit_type == CodeUnitType::Import {
68                for ref_info in &unit.references {
69                    if ref_info.kind == ReferenceKind::Import {
70                        table
71                            .import_targets
72                            .entry(ref_info.name.clone())
73                            .or_default()
74                            .push(unit.temp_id);
75                    }
76                }
77            }
78        }
79
80        Ok(table)
81    }
82
83    /// Look up a unit by qualified name.
84    pub fn lookup_qualified(&self, qname: &str) -> Option<u64> {
85        self.symbol_map.get(qname).copied()
86    }
87
88    /// Look up units by simple name.
89    pub fn lookup_name(&self, name: &str) -> &[u64] {
90        self.name_map.get(name).map(|v| v.as_slice()).unwrap_or(&[])
91    }
92
93    /// Look up units in the same file.
94    pub fn units_in_file(&self, file_path: &str) -> &[u64] {
95        self.file_map
96            .get(file_path)
97            .map(|v| v.as_slice())
98            .unwrap_or(&[])
99    }
100
101    /// Get the qualified name for a temp_id.
102    pub fn qname_for_id(&self, id: u64) -> Option<&str> {
103        self.id_to_qname.get(&id).map(|s| s.as_str())
104    }
105
106    /// Get all symbol entries.
107    pub fn all_symbols(&self) -> &HashMap<String, u64> {
108        &self.symbol_map
109    }
110
111    /// Number of symbols.
112    pub fn len(&self) -> usize {
113        self.symbol_map.len()
114    }
115
116    /// Check if empty.
117    pub fn is_empty(&self) -> bool {
118        self.symbol_map.is_empty()
119    }
120}
121
122impl Default for SymbolTable {
123    fn default() -> Self {
124        Self::new()
125    }
126}
127
128/// Resolves references from raw units to concrete targets.
129pub struct Resolver {
130    /// Known external libraries.
131    external_libs: HashMap<String, ExternalLibrary>,
132}
133
134/// An external library with known symbols.
135#[derive(Debug)]
136pub struct ExternalLibrary {
137    /// Library name.
138    pub name: String,
139    /// Language.
140    pub language: Language,
141    /// Known exported symbols.
142    pub known_symbols: HashSet<String>,
143    /// Is this a standard library?
144    pub is_stdlib: bool,
145}
146
147impl Resolver {
148    /// Create a new resolver with standard library knowledge.
149    pub fn new() -> Self {
150        let mut resolver = Self {
151            external_libs: HashMap::new(),
152        };
153        resolver.register_python_stdlib();
154        resolver.register_rust_stdlib();
155        resolver.register_node_builtins();
156        resolver.register_go_stdlib();
157        resolver
158    }
159
160    /// Resolve all references in the raw units.
161    pub fn resolve_all(
162        &self,
163        units: &[RawCodeUnit],
164        symbol_table: &SymbolTable,
165    ) -> AcbResult<Vec<ResolvedUnit>> {
166        let mut resolved = Vec::with_capacity(units.len());
167
168        for unit in units {
169            let resolved_refs = self.resolve_unit_references(unit, units, symbol_table)?;
170            resolved.push(ResolvedUnit {
171                unit: unit.clone(),
172                resolved_refs,
173            });
174        }
175
176        Ok(resolved)
177    }
178
179    fn resolve_unit_references(
180        &self,
181        unit: &RawCodeUnit,
182        all_units: &[RawCodeUnit],
183        symbol_table: &SymbolTable,
184    ) -> AcbResult<Vec<ResolvedReference>> {
185        let mut resolved = Vec::new();
186
187        for raw_ref in &unit.references {
188            let resolution = self.resolve_reference(raw_ref, unit, all_units, symbol_table);
189            resolved.push(ResolvedReference {
190                raw: raw_ref.clone(),
191                resolution,
192            });
193        }
194
195        Ok(resolved)
196    }
197
198    fn resolve_reference(
199        &self,
200        raw_ref: &RawReference,
201        unit: &RawCodeUnit,
202        all_units: &[RawCodeUnit],
203        symbol_table: &SymbolTable,
204    ) -> Resolution {
205        // Strategy 1: Try exact qualified name match
206        if let Some(target_id) = symbol_table.lookup_qualified(&raw_ref.name) {
207            if target_id != unit.temp_id {
208                return Resolution::Local(target_id);
209            }
210        }
211
212        // Strategy 2: Try local resolution (same file, then by simple name)
213        if let Some(local_id) = self.resolve_local(&raw_ref.name, unit, all_units, symbol_table) {
214            return Resolution::Local(local_id);
215        }
216
217        // Strategy 3: Try imported symbol resolution
218        if let Some(imported) = self.resolve_imported(&raw_ref.name, unit, all_units, symbol_table)
219        {
220            return Resolution::Imported(imported);
221        }
222
223        // Strategy 4: Try external library match
224        if let Some(external) = self.resolve_external(&raw_ref.name, unit.language) {
225            return Resolution::External(external);
226        }
227
228        Resolution::Unresolved
229    }
230
231    fn resolve_local(
232        &self,
233        name: &str,
234        unit: &RawCodeUnit,
235        _all_units: &[RawCodeUnit],
236        symbol_table: &SymbolTable,
237    ) -> Option<u64> {
238        let file_key = unit.file_path.to_string_lossy().to_string();
239        let file_units = symbol_table.units_in_file(&file_key);
240
241        // Look for a matching name in the same file
242        for &id in file_units {
243            if id == unit.temp_id {
244                continue;
245            }
246            if let Some(qname) = symbol_table.qname_for_id(id) {
247                // Match on the simple name part of the qname
248                let simple = qname.rsplit('.').next().unwrap_or(qname);
249                let simple2 = qname.rsplit("::").next().unwrap_or(qname);
250                if simple == name || simple2 == name || qname == name {
251                    return Some(id);
252                }
253            }
254        }
255
256        // Also look globally by simple name
257        let candidates = symbol_table.lookup_name(name);
258        candidates.iter().find(|&&cid| cid != unit.temp_id).copied()
259    }
260
261    fn resolve_imported(
262        &self,
263        name: &str,
264        unit: &RawCodeUnit,
265        all_units: &[RawCodeUnit],
266        symbol_table: &SymbolTable,
267    ) -> Option<ImportedSymbol> {
268        // Check if any import in the same file matches this name
269        let file_key = unit.file_path.to_string_lossy().to_string();
270        let file_unit_ids = symbol_table.units_in_file(&file_key);
271
272        for &fid in file_unit_ids {
273            // Find the unit for this ID
274            if let Some(file_unit) = all_units.iter().find(|u| u.temp_id == fid) {
275                if file_unit.unit_type == CodeUnitType::Import {
276                    // Check if this import's name matches the reference
277                    let import_name = &file_unit.name;
278                    if import_name.contains(name)
279                        || name.contains(import_name.rsplit('/').next().unwrap_or(import_name))
280                    {
281                        return Some(ImportedSymbol {
282                            unit_id: fid,
283                            import_path: import_name.clone(),
284                        });
285                    }
286                }
287            }
288        }
289
290        None
291    }
292
293    fn resolve_external(&self, name: &str, language: Language) -> Option<ExternalSymbol> {
294        for lib in self.external_libs.values() {
295            if lib.language == language && lib.known_symbols.contains(name) {
296                return Some(ExternalSymbol {
297                    library: lib.name.clone(),
298                    symbol: name.to_string(),
299                    is_stdlib: lib.is_stdlib,
300                });
301            }
302        }
303        None
304    }
305
306    fn register_python_stdlib(&mut self) {
307        let symbols: HashSet<String> = [
308            "print",
309            "len",
310            "range",
311            "int",
312            "str",
313            "float",
314            "bool",
315            "list",
316            "dict",
317            "set",
318            "tuple",
319            "type",
320            "isinstance",
321            "issubclass",
322            "hasattr",
323            "getattr",
324            "setattr",
325            "delattr",
326            "super",
327            "object",
328            "open",
329            "input",
330            "sorted",
331            "reversed",
332            "enumerate",
333            "zip",
334            "map",
335            "filter",
336            "any",
337            "all",
338            "min",
339            "max",
340            "sum",
341            "abs",
342            "round",
343            "format",
344            "repr",
345            "id",
346            "hash",
347            "iter",
348            "next",
349            "Exception",
350            "ValueError",
351            "TypeError",
352            "KeyError",
353            "IndexError",
354            "AttributeError",
355            "RuntimeError",
356            "StopIteration",
357            "OSError",
358        ]
359        .iter()
360        .map(|s| s.to_string())
361        .collect();
362
363        self.external_libs.insert(
364            "python_stdlib".to_string(),
365            ExternalLibrary {
366                name: "python_stdlib".to_string(),
367                language: Language::Python,
368                known_symbols: symbols,
369                is_stdlib: true,
370            },
371        );
372    }
373
374    fn register_rust_stdlib(&mut self) {
375        let symbols: HashSet<String> = [
376            "println",
377            "eprintln",
378            "format",
379            "vec",
380            "String",
381            "Vec",
382            "HashMap",
383            "HashSet",
384            "BTreeMap",
385            "BTreeSet",
386            "Option",
387            "Result",
388            "Ok",
389            "Err",
390            "Some",
391            "None",
392            "Box",
393            "Rc",
394            "Arc",
395            "RefCell",
396            "Mutex",
397            "RwLock",
398            "Clone",
399            "Debug",
400            "Display",
401            "Default",
402            "Iterator",
403            "IntoIterator",
404            "From",
405            "Into",
406            "TryFrom",
407            "TryInto",
408            "AsRef",
409            "AsMut",
410            "Drop",
411            "Fn",
412            "FnMut",
413            "FnOnce",
414            "Send",
415            "Sync",
416            "Sized",
417            "Unpin",
418        ]
419        .iter()
420        .map(|s| s.to_string())
421        .collect();
422
423        self.external_libs.insert(
424            "rust_stdlib".to_string(),
425            ExternalLibrary {
426                name: "rust_stdlib".to_string(),
427                language: Language::Rust,
428                known_symbols: symbols,
429                is_stdlib: true,
430            },
431        );
432    }
433
434    fn register_node_builtins(&mut self) {
435        let symbols: HashSet<String> = [
436            "console",
437            "setTimeout",
438            "setInterval",
439            "clearTimeout",
440            "clearInterval",
441            "Promise",
442            "fetch",
443            "JSON",
444            "Math",
445            "Date",
446            "RegExp",
447            "Error",
448            "TypeError",
449            "RangeError",
450            "Array",
451            "Object",
452            "Map",
453            "Set",
454            "WeakMap",
455            "WeakSet",
456            "Symbol",
457            "Proxy",
458            "Reflect",
459            "require",
460            "module",
461            "exports",
462            "process",
463            "Buffer",
464            "__dirname",
465            "__filename",
466        ]
467        .iter()
468        .map(|s| s.to_string())
469        .collect();
470
471        self.external_libs.insert(
472            "node_builtins".to_string(),
473            ExternalLibrary {
474                name: "node_builtins".to_string(),
475                language: Language::JavaScript,
476                known_symbols: symbols.clone(),
477                is_stdlib: true,
478            },
479        );
480
481        self.external_libs.insert(
482            "ts_builtins".to_string(),
483            ExternalLibrary {
484                name: "ts_builtins".to_string(),
485                language: Language::TypeScript,
486                known_symbols: symbols,
487                is_stdlib: true,
488            },
489        );
490    }
491
492    fn register_go_stdlib(&mut self) {
493        let symbols: HashSet<String> = [
494            "fmt", "os", "io", "strings", "strconv", "errors", "context", "sync", "time", "net",
495            "http", "json", "log", "testing", "reflect", "sort", "math", "crypto", "path",
496            "filepath", "bytes", "bufio", "regexp",
497        ]
498        .iter()
499        .map(|s| s.to_string())
500        .collect();
501
502        self.external_libs.insert(
503            "go_stdlib".to_string(),
504            ExternalLibrary {
505                name: "go_stdlib".to_string(),
506                language: Language::Go,
507                known_symbols: symbols,
508                is_stdlib: true,
509            },
510        );
511    }
512}
513
514impl Default for Resolver {
515    fn default() -> Self {
516        Self::new()
517    }
518}
519
520/// A raw unit with its resolved references.
521#[derive(Debug, Clone)]
522pub struct ResolvedUnit {
523    /// The original raw code unit.
524    pub unit: RawCodeUnit,
525    /// Resolved references.
526    pub resolved_refs: Vec<ResolvedReference>,
527}
528
529/// A resolved reference.
530#[derive(Debug, Clone)]
531pub struct ResolvedReference {
532    /// The original raw reference.
533    pub raw: RawReference,
534    /// Resolution result.
535    pub resolution: Resolution,
536}
537
538/// Result of resolving a reference.
539#[derive(Debug, Clone)]
540pub enum Resolution {
541    /// Resolved to a local unit by temp_id.
542    Local(u64),
543    /// Resolved to an imported unit.
544    Imported(ImportedSymbol),
545    /// Resolved to an external library.
546    External(ExternalSymbol),
547    /// Could not resolve.
548    Unresolved,
549}
550
551/// A symbol resolved through an import.
552#[derive(Debug, Clone)]
553pub struct ImportedSymbol {
554    /// The import unit temp_id.
555    pub unit_id: u64,
556    /// The import path string.
557    pub import_path: String,
558}
559
560/// A symbol from an external library.
561#[derive(Debug, Clone)]
562pub struct ExternalSymbol {
563    /// Library name.
564    pub library: String,
565    /// Symbol name.
566    pub symbol: String,
567    /// Is from standard library.
568    pub is_stdlib: bool,
569}