Skip to main content

mati_core/analysis/
mod.rs

1// Layer 0 — static analysis engine (M-06)
2// Parallel file walker (ignore + rayon), tree-sitter parsing,
3// git2 history mining, dependency parsing (Cargo.toml, package.json, go.mod)
4// Target: <200ms on a 250-file Rust project
5
6use std::collections::HashSet;
7
8use crate::store::record::FileRecord;
9
10pub mod blast_radius;
11pub mod claude_md;
12pub mod clusters;
13pub mod deps;
14pub mod edges;
15pub mod enrich_signals;
16pub mod git;
17pub mod onboarding;
18pub mod parser;
19pub mod propagation;
20pub mod reparse;
21pub mod resolvers;
22pub mod walker;
23
24pub use claude_md::{import_claude_md, ClaudeMdImport};
25pub use deps::{
26    dep_display_name_from_key, dep_record_key, parse_dep_key, parse_dependencies, DepEcosystem,
27    DepEntry, DepSignals, DepVersion, ManifestKind,
28};
29pub use edges::{build_edges, build_edges_with_root, Layer0Edges};
30pub use git::{mine_git_history, GitSignals};
31pub use parser::{hash_and_parse_parallel, parse_file, parse_files_parallel, StaticFileAnalysis};
32pub use walker::{Language, WalkedFile, Walker};
33
34pub(crate) fn public_api_symbols(analysis: &StaticFileAnalysis) -> Vec<String> {
35    let mut seen = HashSet::new();
36    let mut symbols =
37        Vec::with_capacity(analysis.entry_points.len() + analysis.exported_types.len());
38
39    for symbol in analysis
40        .entry_points
41        .iter()
42        .chain(analysis.exported_types.iter())
43    {
44        if seen.insert(symbol.as_str()) {
45            symbols.push(symbol.clone());
46        }
47    }
48
49    symbols
50}
51
52/// Build one `FileRecord` from the parsed Layer 0 signals for a file.
53///
54/// If the parser extracted a module-level doc comment (`analysis.module_doc`),
55/// it is used as the initial `purpose`. Records with a purpose are auto-promoted
56/// to `additionalContext` quality in the caller (`init.rs`).
57pub fn build_file_record(
58    file: &WalkedFile,
59    analysis: &StaticFileAnalysis,
60    git: Option<&GitSignals>,
61    hotspot_files: Option<&HashSet<String>>,
62    last_modified_session: u64,
63) -> FileRecord {
64    let path = file.rel_path.clone();
65    let (change_frequency, last_author, is_hotspot) = match git {
66        Some(signals) => (
67            signals.change_frequency.get(&path).copied().unwrap_or(0),
68            signals.last_authors.get(&path).cloned(),
69            hotspot_files
70                .map(|hotspots| hotspots.contains(&path))
71                .unwrap_or(false),
72        ),
73        None => (0, None, false),
74    };
75
76    let token_cost_estimate = (file.size_bytes / 4).min(u32::MAX as u64) as u32;
77    let public_api = public_api_symbols(analysis);
78
79    let mut fr = FileRecord::layer0_stub(
80        path,
81        public_api,
82        analysis.imports.iter().map(|i| i.path.clone()).collect(),
83        analysis.todos.clone(),
84        analysis.unsafe_count,
85        analysis.unwrap_count,
86        change_frequency,
87        last_author,
88        is_hotspot,
89        token_cost_estimate,
90        last_modified_session,
91    );
92
93    // Propagate author-written doc comment to purpose — gives immediate
94    // additionalContext value after `mati init` with no LLM calls.
95    if let Some(doc) = &analysis.module_doc {
96        fr.purpose = doc.clone();
97    }
98
99    fr.content_hash = analysis.content_hash.clone();
100    fr.line_count = analysis.line_count;
101
102    fr
103}
104
105/// Build layer-0 file records for a batch of parsed files.
106pub fn build_file_records(
107    files: &[WalkedFile],
108    analyses: &[StaticFileAnalysis],
109    git: Option<&GitSignals>,
110    last_modified_session: u64,
111) -> Vec<FileRecord> {
112    assert_eq!(
113        files.len(),
114        analyses.len(),
115        "build_file_records expects one analysis per walked file"
116    );
117
118    let hotspot_files = git.map(|signals| {
119        signals
120            .hotspot_files
121            .iter()
122            .cloned()
123            .collect::<HashSet<_>>()
124    });
125    let hotspot_files = hotspot_files.as_ref();
126
127    files
128        .iter()
129        .zip(analyses)
130        .map(|(file, analysis)| {
131            build_file_record(file, analysis, git, hotspot_files, last_modified_session)
132        })
133        .collect()
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use crate::analysis::parser::{ImportKind, ImportStatement};
140    use crate::store::record::TodoComment;
141
142    #[test]
143    fn build_file_record_uses_layer0_defaults_and_git_signals() {
144        let analysis = StaticFileAnalysis {
145            path: "src/lib.rs".to_string(),
146            language: Language::Rust,
147            entry_points: vec!["run".to_string()],
148            exported_types: vec![],
149            imports: vec![ImportStatement::new("crate::utils", ImportKind::Normal, 1)],
150            todos: vec![TodoComment {
151                text: "TODO: tighten docs".to_string(),
152                line: 12,
153                kind: crate::store::record::TodoKind::Todo,
154            }],
155            unsafe_count: 1,
156            unwrap_count: 2,
157            panic_count: 0,
158            branch_count: 3,
159            module_doc: None,
160            content_hash: None,
161            line_count: 0,
162        };
163
164        let mut git = GitSignals::empty();
165        git.change_frequency.insert("src/lib.rs".to_string(), 9);
166        git.last_authors
167            .insert("src/lib.rs".to_string(), "ioni".to_string());
168        git.hotspot_files.push("src/lib.rs".to_string());
169
170        let file = WalkedFile {
171            abs_path: std::path::PathBuf::from("/repo/src/lib.rs"),
172            rel_path: "src/lib.rs".to_string(),
173            language: Language::Rust,
174            size_bytes: 400,
175            mtime_secs: 0,
176        };
177
178        let hotspots = git.hotspot_files.iter().cloned().collect::<HashSet<_>>();
179
180        let record = build_file_record(&file, &analysis, Some(&git), Some(&hotspots), 1234);
181
182        assert_eq!(record.path, "src/lib.rs");
183        assert!(record.purpose.is_empty());
184        assert_eq!(record.entry_points, vec!["run".to_string()]);
185        assert_eq!(record.imports, vec!["crate::utils".to_string()]);
186        assert_eq!(record.todos.len(), 1);
187        assert_eq!(record.unsafe_count, 1);
188        assert_eq!(record.unwrap_count, 2);
189        assert_eq!(record.change_frequency, 9);
190        assert_eq!(record.last_author.as_deref(), Some("ioni"));
191        assert!(record.is_hotspot);
192        assert_eq!(record.token_cost_estimate, 100);
193        assert_eq!(record.last_modified_session, 1234);
194    }
195
196    #[test]
197    fn module_doc_propagates_to_purpose() {
198        let analysis = StaticFileAnalysis {
199            path: "src/auth.rs".to_string(),
200            language: Language::Rust,
201            entry_points: vec![],
202            exported_types: vec![],
203            imports: vec![],
204            todos: vec![],
205            unsafe_count: 0,
206            unwrap_count: 0,
207            panic_count: 0,
208            branch_count: 0,
209            module_doc: Some("Handles JWT authentication.".to_string()),
210            content_hash: None,
211            line_count: 0,
212        };
213        let file = WalkedFile {
214            abs_path: std::path::PathBuf::from("/repo/src/auth.rs"),
215            rel_path: "src/auth.rs".to_string(),
216            language: Language::Rust,
217            size_bytes: 100,
218            mtime_secs: 0,
219        };
220        let record = build_file_record(&file, &analysis, None, None, 0);
221        assert_eq!(record.purpose, "Handles JWT authentication.");
222    }
223
224    #[test]
225    fn exported_types_are_folded_into_stored_api_surface() {
226        let analysis = StaticFileAnalysis {
227            path: "src/models.rs".to_string(),
228            language: Language::Rust,
229            entry_points: vec!["build".to_string()],
230            exported_types: vec!["Widget".to_string(), "Widget".to_string()],
231            imports: vec![],
232            todos: vec![],
233            unsafe_count: 0,
234            unwrap_count: 0,
235            panic_count: 0,
236            branch_count: 0,
237            module_doc: None,
238            content_hash: None,
239            line_count: 0,
240        };
241        let file = WalkedFile {
242            abs_path: std::path::PathBuf::from("/repo/src/models.rs"),
243            rel_path: "src/models.rs".to_string(),
244            language: Language::Rust,
245            size_bytes: 100,
246            mtime_secs: 0,
247        };
248
249        let record = build_file_record(&file, &analysis, None, None, 0);
250        assert_eq!(
251            record.entry_points,
252            vec!["build".to_string(), "Widget".to_string()]
253        );
254    }
255
256    #[test]
257    fn build_file_records_is_stable_for_missing_git_signals() {
258        let files = vec![WalkedFile {
259            abs_path: std::path::PathBuf::from("/repo/src/main.rs"),
260            rel_path: "src/main.rs".to_string(),
261            language: Language::Rust,
262            size_bytes: 8,
263            mtime_secs: 0,
264        }];
265        let analyses = vec![StaticFileAnalysis {
266            path: "src/main.rs".to_string(),
267            language: Language::Rust,
268            entry_points: vec![],
269            exported_types: vec![],
270            imports: vec![],
271            todos: vec![],
272            unsafe_count: 0,
273            unwrap_count: 0,
274            panic_count: 0,
275            branch_count: 0,
276            module_doc: None,
277            content_hash: None,
278            line_count: 0,
279        }];
280
281        let records = build_file_records(&files, &analyses, None, 55);
282
283        assert_eq!(records.len(), 1);
284        assert_eq!(records[0].path, "src/main.rs");
285        assert_eq!(records[0].change_frequency, 0);
286        assert!(records[0].last_author.is_none());
287        assert!(!records[0].is_hotspot);
288        assert_eq!(records[0].token_cost_estimate, 2);
289    }
290}