Skip to main content

mati_core/analysis/resolvers/
mod.rs

1//! Trait-based import resolution system.
2//!
3//! Each supported language implements [`LanguageResolver`] to map import
4//! statements into repo-relative file paths. The [`ResolverRegistry`] provides
5//! dispatch by language, and [`FileIndex`] provides O(1) file existence checks
6//! plus helper queries needed by language-specific resolvers.
7//!
8//! # Architecture
9//!
10//! ```text
11//! build_edges()
12//!   → ResolverRegistry::resolve(import, file, language, &file_index)
13//!       → dispatches to LanguageResolver::resolve(import, file, &file_index)
14//!           → returns Option<String> (repo-relative target path)
15//! ```
16//!
17//! Adding a new language resolver:
18//! 1. Create `src/analysis/resolvers/<lang>.rs`
19//! 2. Implement `LanguageResolver` for your struct
20//! 3. Register in `ResolverRegistry::new()`
21
22pub mod c;
23pub mod cpp;
24pub mod elixir;
25pub mod go;
26pub mod haskell;
27pub mod java;
28pub mod python;
29pub mod ruby;
30pub mod rust;
31pub mod scala;
32pub mod typescript;
33
34use std::collections::HashMap;
35use std::collections::HashSet;
36use std::path::PathBuf;
37
38use crate::analysis::parser::import::ImportKind;
39use crate::analysis::parser::ImportStatement;
40use crate::analysis::walker::Language;
41
42// ── LanguageResolver trait ──────────────────────────────────────────────────
43
44/// Trait for language-specific import resolution.
45///
46/// Each implementation maps an `ImportStatement` from a source file to a
47/// repo-relative file path, using the `FileIndex` for existence checks.
48pub trait LanguageResolver: Send + Sync {
49    /// Resolve an import statement from `importing_file` into a repo-relative
50    /// file path that exists in `file_index`. Return `None` if resolution fails
51    /// or the import is external.
52    fn resolve(
53        &self,
54        import: &ImportStatement,
55        importing_file: &str,
56        file_index: &FileIndex,
57    ) -> Option<String>;
58
59    /// The language(s) this resolver handles.
60    fn language(&self) -> Language;
61
62    /// A short human-readable name for debugging and logging.
63    fn name(&self) -> &'static str;
64}
65
66// ── FileIndex ───────────────────────────────────────────────────────────────
67
68/// Index of all known repo-relative file paths for O(1) existence checks.
69///
70/// Wraps a `HashSet<String>` with helper methods that language resolvers
71/// commonly need. Linear-scan helpers (`files_with_prefix`, `files_with_stem`)
72/// are acceptable at Layer 0 scale — optimize if benchmarks show a hot spot.
73pub struct FileIndex {
74    files: HashSet<String>,
75    root: Option<PathBuf>,
76    /// Crate root prefixes for Rust workspace resolution (e.g. `"src/"`,
77    /// `"crates/regex/src/"`). Sorted longest-first so workspace member
78    /// paths take precedence over the repo-root `src/`.
79    crate_roots: Vec<String>,
80    /// Map from workspace member crate name (snake_case, as used in `use`
81    /// statements) to its crate root path (e.g. `"crates/regex/src/"`).
82    /// Populated from each member's `[package].name` in Cargo.toml.
83    /// Empty for non-Rust or single-crate projects.
84    workspace_members: HashMap<String, String>,
85    /// Scala source root prefixes discovered from file paths during init.
86    /// E.g. `["zio-json/shared/src/main/scala/", "zio-json/jvm/src/test/scala/"]`.
87    /// Used by the Scala resolver to find files in multi-project sbt layouts.
88    scala_source_roots: Vec<String>,
89    /// Ruby/Rails Zeitwerk autoload root directories discovered from file paths.
90    /// E.g. `["app/models/", "app/controllers/", "core/app/models/"]`.
91    /// Used by the Ruby resolver to map constant names to file paths.
92    ruby_autoload_roots: Vec<String>,
93    /// Ruby lib/ root directories for `require` resolution in monorepo layouts.
94    /// E.g. `["lib/", "core/lib/", "api/lib/"]`.
95    ruby_lib_roots: Vec<String>,
96}
97
98impl FileIndex {
99    /// Create a new FileIndex from an iterator of repo-relative paths.
100    pub fn new(paths: impl IntoIterator<Item = String>) -> Self {
101        Self {
102            files: paths.into_iter().collect(),
103            root: None,
104            crate_roots: Vec::new(),
105            workspace_members: HashMap::new(),
106            scala_source_roots: Vec::new(),
107            ruby_autoload_roots: Vec::new(),
108            ruby_lib_roots: Vec::new(),
109        }
110    }
111
112    /// Create a FileIndex with a repo root for file content access.
113    pub fn new_with_root(root: PathBuf, paths: impl IntoIterator<Item = String>) -> Self {
114        Self {
115            files: paths.into_iter().collect(),
116            root: Some(root),
117            crate_roots: Vec::new(),
118            workspace_members: HashMap::new(),
119            scala_source_roots: Vec::new(),
120            ruby_autoload_roots: Vec::new(),
121            ruby_lib_roots: Vec::new(),
122        }
123    }
124
125    /// Set Rust crate root prefixes for workspace resolution.
126    /// Roots are sorted longest-first so workspace member paths take
127    /// precedence over the repo-root `src/`.
128    pub fn set_crate_roots(&mut self, mut roots: Vec<String>) {
129        roots.sort_by_key(|b| std::cmp::Reverse(b.len()));
130        self.crate_roots = roots;
131    }
132
133    /// Returns the crate root prefix for the given file, if any.
134    /// For single-crate projects: `Some("src/")`.
135    /// For workspace files at `crates/foo/src/bar.rs`: `Some("crates/foo/src/")`.
136    pub fn crate_root_for(&self, file_path: &str) -> Option<&str> {
137        self.crate_roots
138            .iter()
139            .find(|root| file_path.starts_with(root.as_str()))
140            .map(|s| s.as_str())
141    }
142
143    /// Set workspace member mapping (snake_case crate name → crate root path).
144    pub fn set_workspace_members(&mut self, members: HashMap<String, String>) {
145        self.workspace_members = members;
146    }
147
148    /// Returns the crate root path for a workspace member crate name.
149    /// The first segment of an import path (e.g. `"grep_regex"` in
150    /// `"grep_regex::matcher::Foo"`) is the name to look up.
151    pub fn workspace_member_root(&self, crate_name: &str) -> Option<&str> {
152        self.workspace_members.get(crate_name).map(|s| s.as_str())
153    }
154
155    /// True if workspace member mappings are configured.
156    pub fn has_workspace_members(&self) -> bool {
157        !self.workspace_members.is_empty()
158    }
159
160    /// Set Scala source root prefixes for multi-project sbt resolution.
161    pub fn set_scala_source_roots(&mut self, roots: Vec<String>) {
162        self.scala_source_roots = roots;
163    }
164
165    /// Returns the discovered Scala source root prefixes.
166    pub fn scala_source_roots(&self) -> &[String] {
167        &self.scala_source_roots
168    }
169
170    /// Set Ruby/Rails Zeitwerk autoload root directories.
171    pub fn set_ruby_autoload_roots(&mut self, roots: Vec<String>) {
172        self.ruby_autoload_roots = roots;
173    }
174
175    /// Returns the discovered Ruby autoload root directories.
176    pub fn ruby_autoload_roots(&self) -> &[String] {
177        &self.ruby_autoload_roots
178    }
179
180    /// Set Ruby lib/ root directories for monorepo require resolution.
181    pub fn set_ruby_lib_roots(&mut self, roots: Vec<String>) {
182        self.ruby_lib_roots = roots;
183    }
184
185    /// Returns the discovered Ruby lib/ root directories.
186    pub fn ruby_lib_roots(&self) -> &[String] {
187        &self.ruby_lib_roots
188    }
189
190    /// Read a file relative to the index root. Returns None if no root is
191    /// configured or the file can't be read. Resolvers that need content
192    /// access (like Go for go.mod) must tolerate None gracefully.
193    pub fn read_file(&self, rel_path: &str) -> Option<String> {
194        let root = self.root.as_ref()?;
195        std::fs::read_to_string(root.join(rel_path)).ok()
196    }
197
198    /// Check if a repo-relative path exists in the index.
199    pub fn contains(&self, path: &str) -> bool {
200        self.files.contains(path)
201    }
202
203    /// Find all files whose path starts with the given prefix.
204    /// Returns references to avoid allocation when only checking existence.
205    pub fn files_with_prefix(&self, prefix: &str) -> Vec<&String> {
206        self.files
207            .iter()
208            .filter(|f| f.starts_with(prefix))
209            .collect()
210    }
211
212    /// Find all files whose stem (filename without extension) matches.
213    /// Useful for Go package resolution where `foo.go` matches package `foo`.
214    pub fn files_with_stem(&self, stem: &str) -> Vec<&String> {
215        self.files
216            .iter()
217            .filter(|f| {
218                std::path::Path::new(f.as_str())
219                    .file_stem()
220                    .and_then(|s| s.to_str())
221                    == Some(stem)
222            })
223            .collect()
224    }
225}
226
227// ── ResolverRegistry ────────────────────────────────────────────────────────
228
229/// Dispatch registry that maps languages to their resolvers.
230///
231/// Constructed once per `build_edges` call. Languages without a registered
232/// resolver simply return `None` for all imports (no edges created).
233pub struct ResolverRegistry {
234    resolvers: HashMap<Language, Box<dyn LanguageResolver>>,
235}
236
237impl ResolverRegistry {
238    /// Create a registry with all currently implemented resolvers.
239    pub fn new() -> Self {
240        let mut resolvers: HashMap<Language, Box<dyn LanguageResolver>> = HashMap::new();
241        resolvers.insert(Language::Rust, Box::new(rust::RustResolver));
242        resolvers.insert(Language::Python, Box::new(python::PythonResolver));
243        resolvers.insert(
244            Language::TypeScript,
245            Box::new(typescript::TypeScriptResolver),
246        );
247        resolvers.insert(
248            Language::JavaScript,
249            Box::new(typescript::TypeScriptResolver),
250        );
251        resolvers.insert(Language::Go, Box::new(go::GoResolver));
252        resolvers.insert(Language::Java, Box::new(java::JavaResolver));
253        resolvers.insert(Language::C, Box::new(c::CResolver));
254        resolvers.insert(Language::Cpp, Box::new(cpp::CppResolver));
255        resolvers.insert(Language::Ruby, Box::new(ruby::RubyResolver));
256        resolvers.insert(Language::Scala, Box::new(scala::ScalaResolver));
257        resolvers.insert(Language::Elixir, Box::new(elixir::ElixirResolver));
258        resolvers.insert(Language::Haskell, Box::new(haskell::HaskellResolver));
259        Self { resolvers }
260    }
261
262    /// Resolve an import statement for the given language.
263    ///
264    /// Returns `None` if:
265    /// - The import is classified as `External` (skipped without resolution)
266    /// - No resolver is registered for the language
267    /// - The resolver cannot find a matching file
268    pub fn resolve(
269        &self,
270        import: &ImportStatement,
271        importing_file: &str,
272        language: Language,
273        file_index: &FileIndex,
274    ) -> Option<String> {
275        // External imports are never resolved — the parser already classified them.
276        if import.kind == ImportKind::External {
277            return None;
278        }
279        self.resolvers
280            .get(&language)?
281            .resolve(import, importing_file, file_index)
282    }
283}
284
285impl Default for ResolverRegistry {
286    fn default() -> Self {
287        Self::new()
288    }
289}
290
291// ── Shared helpers ─────────────────────────────────────────────────────────
292
293/// Convert a PascalCase / CamelCase string to snake_case.
294///
295/// Handles acronyms and digit boundaries correctly:
296/// - `UserNotification` → `user_notification`
297/// - `HTTPServer` → `http_server`
298/// - `JSONParser` → `json_parser`
299/// - `API` → `api`
300/// - `FooID` → `foo_id`
301/// - `V2Parser` → `v2_parser`
302/// - `XMLParserV2` → `xml_parser_v2`
303///
304/// Used by the Ruby and Elixir resolvers for Zeitwerk / Mix path conventions.
305pub(crate) fn camel_to_snake(s: &str) -> String {
306    let mut result = String::with_capacity(s.len() + 4);
307    let chars: Vec<char> = s.chars().collect();
308
309    for (i, &c) in chars.iter().enumerate() {
310        if c.is_uppercase() {
311            if i > 0 {
312                let prev = chars[i - 1];
313                let next_is_lower = chars.get(i + 1).is_some_and(|c| c.is_lowercase());
314                // Insert underscore at word boundaries:
315                // - lowercase/digit → uppercase: userN... → user_n...
316                // - uppercase → uppercase+lowercase (acronym end): HTTP_S... → http_s...
317                if prev.is_lowercase()
318                    || prev.is_ascii_digit()
319                    || (prev.is_uppercase() && next_is_lower)
320                {
321                    result.push('_');
322                }
323            }
324            result.push(c.to_ascii_lowercase());
325        } else {
326            result.push(c);
327        }
328    }
329
330    result
331}
332
333// ── Tests ───────────────────────────────────────────────────────────────────
334
335#[cfg(test)]
336mod tests {
337    use super::*;
338
339    #[test]
340    fn file_index_contains() {
341        let idx = FileIndex::new(vec!["src/main.rs".into(), "src/lib.rs".into()]);
342        assert!(idx.contains("src/main.rs"));
343        assert!(!idx.contains("src/foo.rs"));
344    }
345
346    #[test]
347    fn file_index_prefix() {
348        let idx = FileIndex::new(vec![
349            "src/store/db.rs".into(),
350            "src/store/mod.rs".into(),
351            "src/main.rs".into(),
352        ]);
353        let results = idx.files_with_prefix("src/store/");
354        assert_eq!(results.len(), 2);
355    }
356
357    #[test]
358    fn file_index_stem() {
359        let idx = FileIndex::new(vec![
360            "src/utils.rs".into(),
361            "lib/utils.py".into(),
362            "src/main.rs".into(),
363        ]);
364        let results = idx.files_with_stem("utils");
365        assert_eq!(results.len(), 2);
366    }
367
368    #[test]
369    fn registry_skips_external() {
370        let registry = ResolverRegistry::new();
371        let idx = FileIndex::new(vec!["src/main.rs".into()]);
372        let import = ImportStatement::new("react", ImportKind::External, 1);
373        assert_eq!(
374            registry.resolve(&import, "src/app.ts", Language::TypeScript, &idx),
375            None
376        );
377    }
378
379    #[test]
380    fn registry_returns_none_for_unregistered_language() {
381        let registry = ResolverRegistry::new();
382        let idx = FileIndex::new(vec!["main.go".into()]);
383        let import = ImportStatement::new("fmt", ImportKind::Normal, 1);
384        assert_eq!(
385            registry.resolve(&import, "main.go", Language::Go, &idx),
386            None
387        );
388    }
389
390    // ── camel_to_snake ─────────────────────────────────────────────────────
391
392    #[test]
393    fn camel_to_snake_simple_word() {
394        assert_eq!(camel_to_snake("User"), "user");
395        assert_eq!(camel_to_snake("Router"), "router");
396    }
397
398    #[test]
399    fn camel_to_snake_multi_word() {
400        assert_eq!(camel_to_snake("UserNotification"), "user_notification");
401        assert_eq!(camel_to_snake("MyApp"), "my_app");
402        assert_eq!(
403            camel_to_snake("ApplicationController"),
404            "application_controller"
405        );
406    }
407
408    #[test]
409    fn camel_to_snake_acronyms() {
410        assert_eq!(camel_to_snake("HTTPServer"), "http_server");
411        assert_eq!(camel_to_snake("JSONParser"), "json_parser");
412        assert_eq!(camel_to_snake("XMLParser"), "xml_parser");
413        assert_eq!(camel_to_snake("API"), "api");
414        assert_eq!(camel_to_snake("HTTP"), "http");
415    }
416
417    #[test]
418    fn camel_to_snake_trailing_acronym() {
419        assert_eq!(camel_to_snake("FooID"), "foo_id");
420        assert_eq!(camel_to_snake("UserAPI"), "user_api");
421    }
422
423    #[test]
424    fn camel_to_snake_digit_boundaries() {
425        assert_eq!(camel_to_snake("V2Parser"), "v2_parser");
426        assert_eq!(camel_to_snake("XMLParserV2"), "xml_parser_v2");
427    }
428
429    #[test]
430    fn camel_to_snake_already_lowercase() {
431        assert_eq!(camel_to_snake("already_snake"), "already_snake");
432    }
433
434    #[test]
435    fn camel_to_snake_empty() {
436        assert_eq!(camel_to_snake(""), "");
437    }
438
439    #[test]
440    fn camel_to_snake_single_char() {
441        assert_eq!(camel_to_snake("A"), "a");
442        assert_eq!(camel_to_snake("x"), "x");
443    }
444}