cadi_core/atomizer/
resolver.rs

1//! Symbol Resolver
2//!
3//! Resolves imports to content-addressed chunk IDs.
4//! This is the core of the "Smart Atomizer" - when we see:
5//!
6//! ```rust,ignore
7//! use crate::utils::helper;
8//! ```
9//!
10//! We resolve `helper` to its chunk ID: `chunk:sha256:abc123...`
11
12use std::collections::HashMap;
13use std::path::{Path, PathBuf};
14
15use serde::{Deserialize, Serialize};
16
17use crate::error::CadiResult;
18use crate::graph::GraphStore;
19
20/// Symbol resolver for import-to-chunk mapping
21pub struct SymbolResolver {
22    /// Cache of resolved symbols: (file, symbol) -> chunk_id
23    cache: HashMap<(PathBuf, String), String>,
24    
25    /// Project root for relative path resolution
26    project_root: PathBuf,
27    
28    /// Language being resolved
29    language: String,
30}
31
32/// A raw import statement
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct RawImport {
35    /// The import path (e.g., "./utils", "lodash", "crate::utils")
36    pub source: String,
37    
38    /// Symbols being imported
39    pub symbols: Vec<RawSymbol>,
40    
41    /// Is this a default import?
42    pub is_default: bool,
43    
44    /// Is this a namespace import (import * as)?
45    pub is_namespace: bool,
46    
47    /// Line number where this import appears
48    pub line: usize,
49}
50
51/// A raw symbol in an import
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct RawSymbol {
54    /// Original name in the source module
55    pub name: String,
56    
57    /// Alias if renamed (import { x as y })
58    pub alias: Option<String>,
59}
60
61/// A resolved import with chunk references
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct ResolvedImport {
64    /// Original import path
65    pub source_path: String,
66    
67    /// Imported symbols with their resolutions
68    pub symbols: Vec<ImportedSymbol>,
69    
70    /// Line number
71    pub line: usize,
72}
73
74/// A symbol resolved to its chunk
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct ImportedSymbol {
77    /// Symbol name
78    pub name: String,
79    
80    /// Alias if renamed
81    pub alias: Option<String>,
82    
83    /// Chunk ID where this symbol is defined
84    pub chunk_id: String,
85    
86    /// Hash of the chunk
87    pub chunk_hash: String,
88    
89    /// Type of the symbol (function, type, etc.)
90    pub symbol_type: Option<String>,
91}
92
93impl SymbolResolver {
94    /// Create a new resolver for a project
95    pub fn new(project_root: impl Into<PathBuf>, language: impl Into<String>) -> Self {
96        Self {
97            cache: HashMap::new(),
98            project_root: project_root.into(),
99            language: language.into(),
100        }
101    }
102
103    /// Clear the resolution cache
104    pub fn clear_cache(&mut self) {
105        self.cache.clear();
106    }
107
108    /// Extract raw imports from source code
109    pub fn extract_imports(&self, source: &str) -> Vec<RawImport> {
110        match self.language.as_str() {
111            "rust" => self.extract_rust_imports(source),
112            "typescript" | "javascript" => self.extract_ts_imports(source),
113            "python" => self.extract_python_imports(source),
114            "c" | "cpp" => self.extract_c_imports(source),
115            "csharp" => self.extract_csharp_imports(source),
116            "css" => self.extract_css_imports(source),
117            "glsl" => self.extract_glsl_imports(source),
118            _ => Vec::new(),
119        }
120    }
121
122    /// Resolve imports using a graph store
123    pub fn resolve_imports(
124        &mut self,
125        current_file: &Path,
126        imports: Vec<RawImport>,
127        graph: &GraphStore,
128    ) -> CadiResult<Vec<ResolvedImport>> {
129        let mut resolved = Vec::new();
130
131        for import in imports {
132            let target_path = self.resolve_path(current_file, &import.source)?;
133            let mut resolved_symbols = Vec::new();
134
135            for sym in import.symbols {
136                // Check cache first
137                let cache_key = (target_path.clone(), sym.name.clone());
138                
139                if let Some(chunk_id) = self.cache.get(&cache_key) {
140                    resolved_symbols.push(ImportedSymbol {
141                        name: sym.name,
142                        alias: sym.alias,
143                        chunk_id: chunk_id.clone(),
144                        chunk_hash: extract_hash(chunk_id),
145                        symbol_type: None,
146                    });
147                    continue;
148                }
149
150                // Try to find in graph
151                if let Ok(Some(chunk_id)) = graph.find_symbol(&sym.name) {
152                    self.cache.insert(cache_key, chunk_id.clone());
153                    resolved_symbols.push(ImportedSymbol {
154                        name: sym.name,
155                        alias: sym.alias,
156                        chunk_id: chunk_id.clone(),
157                        chunk_hash: extract_hash(&chunk_id),
158                        symbol_type: None,
159                    });
160                } else {
161                    // Symbol not found in graph - mark as unresolved
162                    resolved_symbols.push(ImportedSymbol {
163                        name: sym.name,
164                        alias: sym.alias,
165                        chunk_id: "unresolved".to_string(),
166                        chunk_hash: String::new(),
167                        symbol_type: None,
168                    });
169                }
170            }
171
172            resolved.push(ResolvedImport {
173                source_path: import.source,
174                symbols: resolved_symbols,
175                line: import.line,
176            });
177        }
178
179        Ok(resolved)
180    }
181
182    /// Create link references for use in atomized code
183    /// 
184    /// Transforms `import { X } from './y'` into `link:sha256:abc123`
185    pub fn create_link_references(&self, imports: &[ResolvedImport]) -> HashMap<String, String> {
186        let mut links = HashMap::new();
187
188        for import in imports {
189            for sym in &import.symbols {
190                if !sym.chunk_id.is_empty() && sym.chunk_id != "unresolved" {
191                    let key = sym.alias.as_ref().unwrap_or(&sym.name).clone();
192                    links.insert(key, format!("link:{}", sym.chunk_id));
193                }
194            }
195        }
196
197        links
198    }
199
200    // ========================================================================
201    // Language-specific import extraction
202    // ========================================================================
203
204    fn extract_rust_imports(&self, source: &str) -> Vec<RawImport> {
205        let mut imports = Vec::new();
206
207        // Match use statements
208        let use_regex = regex::Regex::new(
209            r"(?m)^use\s+([\w:]+)(?:::\{([^}]+)\})?;"
210        ).unwrap();
211
212        for (line_idx, line) in source.lines().enumerate() {
213            if let Some(cap) = use_regex.captures(line) {
214                let path = cap.get(1).map(|m| m.as_str()).unwrap_or("");
215                
216                let symbols = if let Some(group) = cap.get(2) {
217                    // use foo::{a, b, c}
218                    group.as_str()
219                        .split(',')
220                        .map(|s| {
221                            let s = s.trim();
222                            if s.contains(" as ") {
223                                let parts: Vec<&str> = s.split(" as ").collect();
224                                RawSymbol {
225                                    name: parts[0].trim().to_string(),
226                                    alias: Some(parts[1].trim().to_string()),
227                                }
228                            } else {
229                                RawSymbol {
230                                    name: s.to_string(),
231                                    alias: None,
232                                }
233                            }
234                        })
235                        .collect()
236                } else {
237                    // use foo::bar; (single import)
238                    let name = path.split("::").last().unwrap_or("").to_string();
239                    if name.is_empty() {
240                        continue;
241                    }
242                    vec![RawSymbol { name, alias: None }]
243                };
244
245                imports.push(RawImport {
246                    source: path.to_string(),
247                    symbols,
248                    is_default: false,
249                    is_namespace: path.ends_with("::*"),
250                    line: line_idx + 1,
251                });
252            }
253        }
254
255        imports
256    }
257
258    fn extract_ts_imports(&self, source: &str) -> Vec<RawImport> {
259        let mut imports = Vec::new();
260
261        // Named imports: import { a, b } from 'path'
262        let named_regex = regex::Regex::new(
263            r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#
264        ).unwrap();
265
266        // Default imports: import X from 'path'
267        let default_regex = regex::Regex::new(
268            r#"import\s+(\w+)\s+from\s*['"]([^'"]+)['"]"#
269        ).unwrap();
270
271        // Namespace imports: import * as X from 'path'
272        let namespace_regex = regex::Regex::new(
273            r#"import\s*\*\s*as\s+(\w+)\s+from\s*['"]([^'"]+)['"]"#
274        ).unwrap();
275
276        for (line_idx, line) in source.lines().enumerate() {
277            // Named imports
278            if let Some(cap) = named_regex.captures(line) {
279                let symbols_str = cap.get(1).map(|m| m.as_str()).unwrap_or("");
280                let path = cap.get(2).map(|m| m.as_str()).unwrap_or("");
281
282                let symbols: Vec<RawSymbol> = symbols_str
283                    .split(',')
284                    .map(|s| {
285                        let s = s.trim();
286                        if s.contains(" as ") {
287                            let parts: Vec<&str> = s.split(" as ").collect();
288                            RawSymbol {
289                                name: parts[0].trim().to_string(),
290                                alias: Some(parts[1].trim().to_string()),
291                            }
292                        } else {
293                            RawSymbol {
294                                name: s.to_string(),
295                                alias: None,
296                            }
297                        }
298                    })
299                    .filter(|s| !s.name.is_empty())
300                    .collect();
301
302                imports.push(RawImport {
303                    source: path.to_string(),
304                    symbols,
305                    is_default: false,
306                    is_namespace: false,
307                    line: line_idx + 1,
308                });
309            }
310            // Default imports
311            else if let Some(cap) = default_regex.captures(line) {
312                let name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
313                let path = cap.get(2).map(|m| m.as_str()).unwrap_or("");
314
315                imports.push(RawImport {
316                    source: path.to_string(),
317                    symbols: vec![RawSymbol {
318                        name: name.to_string(),
319                        alias: None,
320                    }],
321                    is_default: true,
322                    is_namespace: false,
323                    line: line_idx + 1,
324                });
325            }
326            // Namespace imports
327            else if let Some(cap) = namespace_regex.captures(line) {
328                let name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
329                let path = cap.get(2).map(|m| m.as_str()).unwrap_or("");
330
331                imports.push(RawImport {
332                    source: path.to_string(),
333                    symbols: vec![RawSymbol {
334                        name: name.to_string(),
335                        alias: None,
336                    }],
337                    is_default: false,
338                    is_namespace: true,
339                    line: line_idx + 1,
340                });
341            }
342        }
343
344        imports
345    }
346
347    fn extract_python_imports(&self, source: &str) -> Vec<RawImport> {
348        let mut imports = Vec::new();
349
350        // from x import a, b, c
351        let from_regex = regex::Regex::new(
352            r"from\s+([\w.]+)\s+import\s+(.+)"
353        ).unwrap();
354
355        // import x, y, z
356        let import_regex = regex::Regex::new(
357            r"^import\s+([\w., ]+)"
358        ).unwrap();
359
360        for (line_idx, line) in source.lines().enumerate() {
361            let trimmed = line.trim();
362            
363            if let Some(cap) = from_regex.captures(trimmed) {
364                let path = cap.get(1).map(|m| m.as_str()).unwrap_or("");
365                let symbols_str = cap.get(2).map(|m| m.as_str()).unwrap_or("");
366
367                let symbols: Vec<RawSymbol> = symbols_str
368                    .split(',')
369                    .map(|s| {
370                        let s = s.trim();
371                        if s.contains(" as ") {
372                            let parts: Vec<&str> = s.split(" as ").collect();
373                            RawSymbol {
374                                name: parts[0].trim().to_string(),
375                                alias: Some(parts[1].trim().to_string()),
376                            }
377                        } else {
378                            RawSymbol {
379                                name: s.to_string(),
380                                alias: None,
381                            }
382                        }
383                    })
384                    .filter(|s| !s.name.is_empty())
385                    .collect();
386
387                imports.push(RawImport {
388                    source: path.to_string(),
389                    symbols,
390                    is_default: false,
391                    is_namespace: false,
392                    line: line_idx + 1,
393                });
394            } else if let Some(cap) = import_regex.captures(trimmed) {
395                let modules = cap.get(1).map(|m| m.as_str()).unwrap_or("");
396                
397                for module in modules.split(',') {
398                    let module = module.trim();
399                    let (name, alias) = if module.contains(" as ") {
400                        let parts: Vec<&str> = module.split(" as ").collect();
401                        (parts[0].trim().to_string(), Some(parts[1].trim().to_string()))
402                    } else {
403                        (module.to_string(), None)
404                    };
405
406                    imports.push(RawImport {
407                        source: name.clone(),
408                        symbols: vec![RawSymbol { name, alias }],
409                        is_default: false,
410                        is_namespace: true,
411                        line: line_idx + 1,
412                    });
413                }
414            }
415        }
416
417        imports
418    }
419
420    fn extract_c_imports(&self, source: &str) -> Vec<RawImport> {
421        let mut imports = Vec::new();
422        let include_regex = regex::Regex::new(r#"#include\s+["<]([^">]+)[">]"#).unwrap();
423
424        for (line_idx, line) in source.lines().enumerate() {
425            if let Some(cap) = include_regex.captures(line) {
426                let path = cap.get(1).map(|m| m.as_str()).unwrap_or("");
427                imports.push(RawImport {
428                    source: path.to_string(),
429                    symbols: Vec::new(), // C includes don't have explicit symbols
430                    is_default: false,
431                    is_namespace: true,
432                    line: line_idx + 1,
433                });
434            }
435        }
436        imports
437    }
438
439    fn extract_csharp_imports(&self, source: &str) -> Vec<RawImport> {
440        let mut imports = Vec::new();
441        let using_regex = regex::Regex::new(r#"using\s+([\w.]+);"#).unwrap();
442
443        for (line_idx, line) in source.lines().enumerate() {
444            if let Some(cap) = using_regex.captures(line) {
445                let path = cap.get(1).map(|m| m.as_str()).unwrap_or("");
446                imports.push(RawImport {
447                    source: path.to_string(),
448                    symbols: Vec::new(),
449                    is_default: false,
450                    is_namespace: true,
451                    line: line_idx + 1,
452                });
453            }
454        }
455        imports
456    }
457
458    fn extract_css_imports(&self, source: &str) -> Vec<RawImport> {
459        let mut imports = Vec::new();
460        let import_regex = regex::Regex::new(r#"@import\s+['"]([^'"]+)['"]"#).unwrap();
461
462        for (line_idx, line) in source.lines().enumerate() {
463            if let Some(cap) = import_regex.captures(line) {
464                let path = cap.get(1).map(|m| m.as_str()).unwrap_or("");
465                imports.push(RawImport {
466                    source: path.to_string(),
467                    symbols: Vec::new(),
468                    is_default: false,
469                    is_namespace: true,
470                    line: line_idx + 1,
471                });
472            }
473        }
474        imports
475    }
476
477    fn extract_glsl_imports(&self, source: &str) -> Vec<RawImport> {
478        // GLSL often uses #include if extended, or we can look for vendor-specific includes
479        self.extract_c_imports(source)
480    }
481
482    // ========================================================================
483    // Path resolution
484    // ========================================================================
485
486    fn resolve_path(&self, current_file: &Path, import_path: &str) -> CadiResult<PathBuf> {
487        match self.language.as_str() {
488            "rust" => self.resolve_rust_path(current_file, import_path),
489            "typescript" | "javascript" => self.resolve_ts_path(current_file, import_path),
490            "python" => self.resolve_python_path(current_file, import_path),
491            "c" | "cpp" | "glsl" => self.resolve_c_path(current_file, import_path),
492            "csharp" => self.resolve_csharp_path(current_file, import_path),
493            "css" => self.resolve_css_path(current_file, import_path),
494            _ => Ok(PathBuf::from(import_path)),
495        }
496    }
497
498    fn resolve_rust_path(&self, current_file: &Path, import_path: &str) -> CadiResult<PathBuf> {
499        // Handle crate::, super::, self::
500        if import_path.starts_with("crate::") {
501            let relative = import_path.strip_prefix("crate::").unwrap();
502            let parts: Vec<&str> = relative.split("::").collect();
503            
504            let mut path = self.project_root.join("src");
505            for part in &parts[..parts.len().saturating_sub(1)] {
506                path = path.join(part);
507            }
508            path = path.with_extension("rs");
509            
510            Ok(path)
511        } else if import_path.starts_with("super::") {
512            let relative = import_path.strip_prefix("super::").unwrap();
513            let parent = current_file.parent().and_then(|p| p.parent()).unwrap_or(&self.project_root);
514            Ok(parent.join(relative.replace("::", "/")).with_extension("rs"))
515        } else {
516            // External crate - just return the path as-is
517            Ok(PathBuf::from(import_path.replace("::", "/")))
518        }
519    }
520
521    fn resolve_ts_path(&self, current_file: &Path, import_path: &str) -> CadiResult<PathBuf> {
522        if import_path.starts_with('.') {
523            // Relative import
524            let parent = current_file.parent().unwrap_or(&self.project_root);
525            let resolved = parent.join(import_path);
526            
527            // Try various extensions
528            for ext in &["ts", "tsx", "js", "jsx", "index.ts", "index.js"] {
529                let with_ext = resolved.with_extension(ext);
530                if with_ext.exists() {
531                    return Ok(with_ext);
532                }
533            }
534            
535            Ok(resolved)
536        } else if import_path.starts_with('@') || import_path.starts_with("~") {
537            // Aliased import - would need tsconfig.json to resolve
538            Ok(PathBuf::from(import_path))
539        } else {
540            // node_modules import
541            Ok(self.project_root.join("node_modules").join(import_path))
542        }
543    }
544
545    fn resolve_python_path(&self, current_file: &Path, import_path: &str) -> CadiResult<PathBuf> {
546        if import_path.starts_with('.') {
547            // Relative import
548            let dots = import_path.chars().take_while(|c| *c == '.').count();
549            let rest = &import_path[dots..];
550            
551            let mut base = current_file.to_path_buf();
552            for _ in 0..=dots {
553                base = base.parent().unwrap_or(&self.project_root).to_path_buf();
554            }
555            
556            Ok(base.join(rest.replace('.', "/")).with_extension("py"))
557        } else {
558            // Absolute import
559            Ok(self.project_root.join(import_path.replace('.', "/")).with_extension("py"))
560        }
561    }
562
563    fn resolve_c_path(&self, current_file: &Path, import_path: &str) -> CadiResult<PathBuf> {
564        let parent = current_file.parent().unwrap_or(&self.project_root);
565        let resolved = parent.join(import_path);
566        
567        if resolved.exists() {
568            Ok(resolved)
569        } else {
570            // Check common include directories if not found relatively
571            for dir in &["include", "src"] {
572                let candidate = self.project_root.join(dir).join(import_path);
573                if candidate.exists() {
574                    return Ok(candidate);
575                }
576            }
577            Ok(resolved)
578        }
579    }
580
581    fn resolve_csharp_path(&self, _current_file: &Path, import_path: &str) -> CadiResult<PathBuf> {
582        // C# namespaces don't map directly to paths always, but we'll try
583        Ok(self.project_root.join(import_path.replace('.', "/")).with_extension("cs"))
584    }
585
586    fn resolve_css_path(&self, current_file: &Path, import_path: &str) -> CadiResult<PathBuf> {
587        let parent = current_file.parent().unwrap_or(&self.project_root);
588        Ok(parent.join(import_path))
589    }
590}
591
592/// Extract hash from chunk ID
593fn extract_hash(chunk_id: &str) -> String {
594    chunk_id
595        .split(':')
596        .next_back()
597        .unwrap_or("")
598        .to_string()
599}
600
601#[cfg(test)]
602mod tests {
603    use super::*;
604
605    #[test]
606    fn test_rust_import_extraction() {
607        let source = r#"
608use std::collections::HashMap;
609use crate::utils::{helper, Logger as Log};
610use super::parent_module;
611"#;
612
613        let resolver = SymbolResolver::new("/project", "rust");
614        let imports = resolver.extract_imports(source);
615
616        assert_eq!(imports.len(), 3);
617        assert_eq!(imports[0].source, "std::collections::HashMap");
618        assert_eq!(imports[1].symbols.len(), 2);
619        assert_eq!(imports[1].symbols[1].alias, Some("Log".to_string()));
620    }
621
622    #[test]
623    fn test_ts_import_extraction() {
624        let source = r#"
625import { foo, bar as baz } from './utils';
626import React from 'react';
627import * as lodash from 'lodash';
628"#;
629
630        let resolver = SymbolResolver::new("/project", "typescript");
631        let imports = resolver.extract_imports(source);
632
633        assert_eq!(imports.len(), 3);
634        assert!(!imports[0].is_default);
635        assert!(imports[1].is_default);
636        assert!(imports[2].is_namespace);
637    }
638
639    #[test]
640    fn test_python_import_extraction() {
641        let source = r#"
642from os import path, getcwd as cwd
643import json, yaml
644from .utils import helper
645"#;
646
647        let resolver = SymbolResolver::new("/project", "python");
648        let imports = resolver.extract_imports(source);
649
650        assert_eq!(imports.len(), 4); // from os, import json, import yaml, from .utils
651    }
652
653    #[test]
654    fn test_c_import_extraction() {
655        let source = r#"
656#include <stdio.h>
657#include "my_header.h"
658"#;
659        let resolver = SymbolResolver::new("/project", "c");
660        let imports = resolver.extract_imports(source);
661        assert_eq!(imports.len(), 2);
662        assert_eq!(imports[0].source, "stdio.h");
663        assert_eq!(imports[1].source, "my_header.h");
664    }
665
666    #[test]
667    fn test_csharp_import_extraction() {
668        let source = r#"
669using System;
670using System.Collections.Generic;
671"#;
672        let resolver = SymbolResolver::new("/project", "csharp");
673        let imports = resolver.extract_imports(source);
674        assert_eq!(imports.len(), 2);
675        assert_eq!(imports[0].source, "System");
676        assert_eq!(imports[1].source, "System.Collections.Generic");
677    }
678
679    #[test]
680    fn test_css_import_extraction() {
681        let source = r#"
682@import "base.css";
683@import 'themes/dark.css';
684"#;
685        let resolver = SymbolResolver::new("/project", "css");
686        let imports = resolver.extract_imports(source);
687        assert_eq!(imports.len(), 2);
688        assert_eq!(imports[0].source, "base.css");
689        assert_eq!(imports[1].source, "themes/dark.css");
690    }
691}