mati 0.1.2

An enforcement layer for codebase knowledge: confirmed gotchas gate what AI agents read and edit at the hook level. Not a passive memory store.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
//! Trait-based import resolution system.
//!
//! Each supported language implements [`LanguageResolver`] to map import
//! statements into repo-relative file paths. The [`ResolverRegistry`] provides
//! dispatch by language, and [`FileIndex`] provides O(1) file existence checks
//! plus helper queries needed by language-specific resolvers.
//!
//! # Architecture
//!
//! ```text
//! build_edges()
//!   → ResolverRegistry::resolve(import, file, language, &file_index)
//!       → dispatches to LanguageResolver::resolve(import, file, &file_index)
//!           → returns Option<String> (repo-relative target path)
//! ```
//!
//! Adding a new language resolver:
//! 1. Create `src/analysis/resolvers/<lang>.rs`
//! 2. Implement `LanguageResolver` for your struct
//! 3. Register in `ResolverRegistry::new()`

pub mod c;
pub mod cpp;
pub mod elixir;
pub mod go;
pub mod haskell;
pub mod java;
pub mod python;
pub mod ruby;
pub mod rust;
pub mod scala;
pub mod typescript;

use std::collections::HashMap;
use std::collections::HashSet;
use std::path::PathBuf;

use crate::analysis::parser::import::ImportKind;
use crate::analysis::parser::ImportStatement;
use crate::analysis::walker::Language;

// ── LanguageResolver trait ──────────────────────────────────────────────────

/// Trait for language-specific import resolution.
///
/// Each implementation maps an `ImportStatement` from a source file to a
/// repo-relative file path, using the `FileIndex` for existence checks.
pub trait LanguageResolver: Send + Sync {
    /// Resolve an import statement from `importing_file` into a repo-relative
    /// file path that exists in `file_index`. Return `None` if resolution fails
    /// or the import is external.
    fn resolve(
        &self,
        import: &ImportStatement,
        importing_file: &str,
        file_index: &FileIndex,
    ) -> Option<String>;

    /// The language(s) this resolver handles.
    fn language(&self) -> Language;

    /// A short human-readable name for debugging and logging.
    fn name(&self) -> &'static str;
}

// ── FileIndex ───────────────────────────────────────────────────────────────

/// Index of all known repo-relative file paths for O(1) existence checks.
///
/// Wraps a `HashSet<String>` with helper methods that language resolvers
/// commonly need. Linear-scan helpers (`files_with_prefix`, `files_with_stem`)
/// are acceptable at Layer 0 scale — optimize if benchmarks show a hot spot.
pub struct FileIndex {
    files: HashSet<String>,
    root: Option<PathBuf>,
    /// Crate root prefixes for Rust workspace resolution (e.g. `"src/"`,
    /// `"crates/regex/src/"`). Sorted longest-first so workspace member
    /// paths take precedence over the repo-root `src/`.
    crate_roots: Vec<String>,
    /// Map from workspace member crate name (snake_case, as used in `use`
    /// statements) to its crate root path (e.g. `"crates/regex/src/"`).
    /// Populated from each member's `[package].name` in Cargo.toml.
    /// Empty for non-Rust or single-crate projects.
    workspace_members: HashMap<String, String>,
    /// Scala source root prefixes discovered from file paths during init.
    /// E.g. `["zio-json/shared/src/main/scala/", "zio-json/jvm/src/test/scala/"]`.
    /// Used by the Scala resolver to find files in multi-project sbt layouts.
    scala_source_roots: Vec<String>,
    /// Ruby/Rails Zeitwerk autoload root directories discovered from file paths.
    /// E.g. `["app/models/", "app/controllers/", "core/app/models/"]`.
    /// Used by the Ruby resolver to map constant names to file paths.
    ruby_autoload_roots: Vec<String>,
    /// Ruby lib/ root directories for `require` resolution in monorepo layouts.
    /// E.g. `["lib/", "core/lib/", "api/lib/"]`.
    ruby_lib_roots: Vec<String>,
}

impl FileIndex {
    /// Create a new FileIndex from an iterator of repo-relative paths.
    pub fn new(paths: impl IntoIterator<Item = String>) -> Self {
        Self {
            files: paths.into_iter().collect(),
            root: None,
            crate_roots: Vec::new(),
            workspace_members: HashMap::new(),
            scala_source_roots: Vec::new(),
            ruby_autoload_roots: Vec::new(),
            ruby_lib_roots: Vec::new(),
        }
    }

    /// Create a FileIndex with a repo root for file content access.
    pub fn new_with_root(root: PathBuf, paths: impl IntoIterator<Item = String>) -> Self {
        Self {
            files: paths.into_iter().collect(),
            root: Some(root),
            crate_roots: Vec::new(),
            workspace_members: HashMap::new(),
            scala_source_roots: Vec::new(),
            ruby_autoload_roots: Vec::new(),
            ruby_lib_roots: Vec::new(),
        }
    }

    /// Set Rust crate root prefixes for workspace resolution.
    /// Roots are sorted longest-first so workspace member paths take
    /// precedence over the repo-root `src/`.
    pub fn set_crate_roots(&mut self, mut roots: Vec<String>) {
        roots.sort_by_key(|b| std::cmp::Reverse(b.len()));
        self.crate_roots = roots;
    }

    /// Returns the crate root prefix for the given file, if any.
    /// For single-crate projects: `Some("src/")`.
    /// For workspace files at `crates/foo/src/bar.rs`: `Some("crates/foo/src/")`.
    pub fn crate_root_for(&self, file_path: &str) -> Option<&str> {
        self.crate_roots
            .iter()
            .find(|root| file_path.starts_with(root.as_str()))
            .map(|s| s.as_str())
    }

    /// Set workspace member mapping (snake_case crate name → crate root path).
    pub fn set_workspace_members(&mut self, members: HashMap<String, String>) {
        self.workspace_members = members;
    }

    /// Returns the crate root path for a workspace member crate name.
    /// The first segment of an import path (e.g. `"grep_regex"` in
    /// `"grep_regex::matcher::Foo"`) is the name to look up.
    pub fn workspace_member_root(&self, crate_name: &str) -> Option<&str> {
        self.workspace_members.get(crate_name).map(|s| s.as_str())
    }

    /// True if workspace member mappings are configured.
    pub fn has_workspace_members(&self) -> bool {
        !self.workspace_members.is_empty()
    }

    /// Set Scala source root prefixes for multi-project sbt resolution.
    pub fn set_scala_source_roots(&mut self, roots: Vec<String>) {
        self.scala_source_roots = roots;
    }

    /// Returns the discovered Scala source root prefixes.
    pub fn scala_source_roots(&self) -> &[String] {
        &self.scala_source_roots
    }

    /// Set Ruby/Rails Zeitwerk autoload root directories.
    pub fn set_ruby_autoload_roots(&mut self, roots: Vec<String>) {
        self.ruby_autoload_roots = roots;
    }

    /// Returns the discovered Ruby autoload root directories.
    pub fn ruby_autoload_roots(&self) -> &[String] {
        &self.ruby_autoload_roots
    }

    /// Set Ruby lib/ root directories for monorepo require resolution.
    pub fn set_ruby_lib_roots(&mut self, roots: Vec<String>) {
        self.ruby_lib_roots = roots;
    }

    /// Returns the discovered Ruby lib/ root directories.
    pub fn ruby_lib_roots(&self) -> &[String] {
        &self.ruby_lib_roots
    }

    /// Read a file relative to the index root. Returns None if no root is
    /// configured or the file can't be read. Resolvers that need content
    /// access (like Go for go.mod) must tolerate None gracefully.
    pub fn read_file(&self, rel_path: &str) -> Option<String> {
        let root = self.root.as_ref()?;
        std::fs::read_to_string(root.join(rel_path)).ok()
    }

    /// Check if a repo-relative path exists in the index.
    pub fn contains(&self, path: &str) -> bool {
        self.files.contains(path)
    }

    /// Find all files whose path starts with the given prefix.
    /// Returns references to avoid allocation when only checking existence.
    pub fn files_with_prefix(&self, prefix: &str) -> Vec<&String> {
        self.files
            .iter()
            .filter(|f| f.starts_with(prefix))
            .collect()
    }

    /// Find all files whose stem (filename without extension) matches.
    /// Useful for Go package resolution where `foo.go` matches package `foo`.
    pub fn files_with_stem(&self, stem: &str) -> Vec<&String> {
        self.files
            .iter()
            .filter(|f| {
                std::path::Path::new(f.as_str())
                    .file_stem()
                    .and_then(|s| s.to_str())
                    == Some(stem)
            })
            .collect()
    }
}

// ── ResolverRegistry ────────────────────────────────────────────────────────

/// Dispatch registry that maps languages to their resolvers.
///
/// Constructed once per `build_edges` call. Languages without a registered
/// resolver simply return `None` for all imports (no edges created).
pub struct ResolverRegistry {
    resolvers: HashMap<Language, Box<dyn LanguageResolver>>,
}

impl ResolverRegistry {
    /// Create a registry with all currently implemented resolvers.
    pub fn new() -> Self {
        let mut resolvers: HashMap<Language, Box<dyn LanguageResolver>> = HashMap::new();
        resolvers.insert(Language::Rust, Box::new(rust::RustResolver));
        resolvers.insert(Language::Python, Box::new(python::PythonResolver));
        resolvers.insert(
            Language::TypeScript,
            Box::new(typescript::TypeScriptResolver),
        );
        resolvers.insert(
            Language::JavaScript,
            Box::new(typescript::TypeScriptResolver),
        );
        resolvers.insert(Language::Go, Box::new(go::GoResolver));
        resolvers.insert(Language::Java, Box::new(java::JavaResolver));
        resolvers.insert(Language::C, Box::new(c::CResolver));
        resolvers.insert(Language::Cpp, Box::new(cpp::CppResolver));
        resolvers.insert(Language::Ruby, Box::new(ruby::RubyResolver));
        resolvers.insert(Language::Scala, Box::new(scala::ScalaResolver));
        resolvers.insert(Language::Elixir, Box::new(elixir::ElixirResolver));
        resolvers.insert(Language::Haskell, Box::new(haskell::HaskellResolver));
        Self { resolvers }
    }

    /// Resolve an import statement for the given language.
    ///
    /// Returns `None` if:
    /// - The import is classified as `External` (skipped without resolution)
    /// - No resolver is registered for the language
    /// - The resolver cannot find a matching file
    pub fn resolve(
        &self,
        import: &ImportStatement,
        importing_file: &str,
        language: Language,
        file_index: &FileIndex,
    ) -> Option<String> {
        // External imports are never resolved — the parser already classified them.
        if import.kind == ImportKind::External {
            return None;
        }
        self.resolvers
            .get(&language)?
            .resolve(import, importing_file, file_index)
    }
}

impl Default for ResolverRegistry {
    fn default() -> Self {
        Self::new()
    }
}

// ── Shared helpers ─────────────────────────────────────────────────────────

/// Convert a PascalCase / CamelCase string to snake_case.
///
/// Handles acronyms and digit boundaries correctly:
/// - `UserNotification` → `user_notification`
/// - `HTTPServer` → `http_server`
/// - `JSONParser` → `json_parser`
/// - `API` → `api`
/// - `FooID` → `foo_id`
/// - `V2Parser` → `v2_parser`
/// - `XMLParserV2` → `xml_parser_v2`
///
/// Used by the Ruby and Elixir resolvers for Zeitwerk / Mix path conventions.
pub(crate) fn camel_to_snake(s: &str) -> String {
    let mut result = String::with_capacity(s.len() + 4);
    let chars: Vec<char> = s.chars().collect();

    for (i, &c) in chars.iter().enumerate() {
        if c.is_uppercase() {
            if i > 0 {
                let prev = chars[i - 1];
                let next_is_lower = chars.get(i + 1).is_some_and(|c| c.is_lowercase());
                // Insert underscore at word boundaries:
                // - lowercase/digit → uppercase: userN... → user_n...
                // - uppercase → uppercase+lowercase (acronym end): HTTP_S... → http_s...
                if prev.is_lowercase()
                    || prev.is_ascii_digit()
                    || (prev.is_uppercase() && next_is_lower)
                {
                    result.push('_');
                }
            }
            result.push(c.to_ascii_lowercase());
        } else {
            result.push(c);
        }
    }

    result
}

// ── Tests ───────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn file_index_contains() {
        let idx = FileIndex::new(vec!["src/main.rs".into(), "src/lib.rs".into()]);
        assert!(idx.contains("src/main.rs"));
        assert!(!idx.contains("src/foo.rs"));
    }

    #[test]
    fn file_index_prefix() {
        let idx = FileIndex::new(vec![
            "src/store/db.rs".into(),
            "src/store/mod.rs".into(),
            "src/main.rs".into(),
        ]);
        let results = idx.files_with_prefix("src/store/");
        assert_eq!(results.len(), 2);
    }

    #[test]
    fn file_index_stem() {
        let idx = FileIndex::new(vec![
            "src/utils.rs".into(),
            "lib/utils.py".into(),
            "src/main.rs".into(),
        ]);
        let results = idx.files_with_stem("utils");
        assert_eq!(results.len(), 2);
    }

    #[test]
    fn registry_skips_external() {
        let registry = ResolverRegistry::new();
        let idx = FileIndex::new(vec!["src/main.rs".into()]);
        let import = ImportStatement::new("react", ImportKind::External, 1);
        assert_eq!(
            registry.resolve(&import, "src/app.ts", Language::TypeScript, &idx),
            None
        );
    }

    #[test]
    fn registry_returns_none_for_unregistered_language() {
        let registry = ResolverRegistry::new();
        let idx = FileIndex::new(vec!["main.go".into()]);
        let import = ImportStatement::new("fmt", ImportKind::Normal, 1);
        assert_eq!(
            registry.resolve(&import, "main.go", Language::Go, &idx),
            None
        );
    }

    // ── camel_to_snake ─────────────────────────────────────────────────────

    #[test]
    fn camel_to_snake_simple_word() {
        assert_eq!(camel_to_snake("User"), "user");
        assert_eq!(camel_to_snake("Router"), "router");
    }

    #[test]
    fn camel_to_snake_multi_word() {
        assert_eq!(camel_to_snake("UserNotification"), "user_notification");
        assert_eq!(camel_to_snake("MyApp"), "my_app");
        assert_eq!(
            camel_to_snake("ApplicationController"),
            "application_controller"
        );
    }

    #[test]
    fn camel_to_snake_acronyms() {
        assert_eq!(camel_to_snake("HTTPServer"), "http_server");
        assert_eq!(camel_to_snake("JSONParser"), "json_parser");
        assert_eq!(camel_to_snake("XMLParser"), "xml_parser");
        assert_eq!(camel_to_snake("API"), "api");
        assert_eq!(camel_to_snake("HTTP"), "http");
    }

    #[test]
    fn camel_to_snake_trailing_acronym() {
        assert_eq!(camel_to_snake("FooID"), "foo_id");
        assert_eq!(camel_to_snake("UserAPI"), "user_api");
    }

    #[test]
    fn camel_to_snake_digit_boundaries() {
        assert_eq!(camel_to_snake("V2Parser"), "v2_parser");
        assert_eq!(camel_to_snake("XMLParserV2"), "xml_parser_v2");
    }

    #[test]
    fn camel_to_snake_already_lowercase() {
        assert_eq!(camel_to_snake("already_snake"), "already_snake");
    }

    #[test]
    fn camel_to_snake_empty() {
        assert_eq!(camel_to_snake(""), "");
    }

    #[test]
    fn camel_to_snake_single_char() {
        assert_eq!(camel_to_snake("A"), "a");
        assert_eq!(camel_to_snake("x"), "x");
    }
}