Skip to main content

mati_core/analysis/
mod.rs

1// Layer 0 — static analysis engine (M-06)
2// Parallel file walker (ignore + rayon), tree-sitter parsing,
3// git2 history mining, dependency parsing (Cargo.toml, package.json, go.mod)
4// Target: <200ms on a 250-file Rust project
5
6use std::collections::HashSet;
7
8use crate::store::record::FileRecord;
9
10pub mod blast_radius;
11pub mod claude_md;
12pub mod clusters;
13pub mod deps;
14pub mod edges;
15pub mod enrich_signals;
16pub mod git;
17pub mod parser;
18pub mod propagation;
19pub mod reparse;
20pub mod resolvers;
21pub mod walker;
22
23pub use claude_md::{import_claude_md, ClaudeMdImport};
24pub use deps::{
25    dep_display_name_from_key, dep_record_key, parse_dep_key, parse_dependencies, DepEcosystem,
26    DepEntry, DepSignals, DepVersion, ManifestKind,
27};
28pub use edges::{build_edges, build_edges_with_root, Layer0Edges};
29pub use git::{mine_git_history, GitSignals};
30pub use parser::{hash_and_parse_parallel, parse_file, parse_files_parallel, StaticFileAnalysis};
31pub use walker::{Language, WalkedFile, Walker};
32
33pub(crate) fn public_api_symbols(analysis: &StaticFileAnalysis) -> Vec<String> {
34    let mut seen = HashSet::new();
35    let mut symbols =
36        Vec::with_capacity(analysis.entry_points.len() + analysis.exported_types.len());
37
38    for symbol in analysis
39        .entry_points
40        .iter()
41        .chain(analysis.exported_types.iter())
42    {
43        if seen.insert(symbol.as_str()) {
44            symbols.push(symbol.clone());
45        }
46    }
47
48    symbols
49}
50
51/// Build one `FileRecord` from the parsed Layer 0 signals for a file.
52///
53/// If the parser extracted a module-level doc comment (`analysis.module_doc`),
54/// it is used as the initial `purpose`. Records with a purpose are auto-promoted
55/// to `additionalContext` quality in the caller (`init.rs`).
56pub fn build_file_record(
57    file: &WalkedFile,
58    analysis: &StaticFileAnalysis,
59    git: Option<&GitSignals>,
60    hotspot_files: Option<&HashSet<String>>,
61    last_modified_session: u64,
62) -> FileRecord {
63    let path = file.rel_path.clone();
64    let (change_frequency, last_author, is_hotspot) = match git {
65        Some(signals) => (
66            signals.change_frequency.get(&path).copied().unwrap_or(0),
67            signals.last_authors.get(&path).cloned(),
68            hotspot_files
69                .map(|hotspots| hotspots.contains(&path))
70                .unwrap_or(false),
71        ),
72        None => (0, None, false),
73    };
74
75    let token_cost_estimate = (file.size_bytes / 4).min(u32::MAX as u64) as u32;
76    let public_api = public_api_symbols(analysis);
77
78    let mut fr = FileRecord::layer0_stub(
79        path,
80        public_api,
81        analysis.imports.iter().map(|i| i.path.clone()).collect(),
82        analysis.todos.clone(),
83        analysis.unsafe_count,
84        analysis.unwrap_count,
85        change_frequency,
86        last_author,
87        is_hotspot,
88        token_cost_estimate,
89        last_modified_session,
90    );
91
92    // Propagate author-written doc comment to purpose — gives immediate
93    // additionalContext value after `mati init` with no LLM calls.
94    if let Some(doc) = &analysis.module_doc {
95        fr.purpose = doc.clone();
96    }
97
98    fr.content_hash = analysis.content_hash.clone();
99    fr.line_count = analysis.line_count;
100
101    fr
102}
103
104/// Build layer-0 file records for a batch of parsed files.
105pub fn build_file_records(
106    files: &[WalkedFile],
107    analyses: &[StaticFileAnalysis],
108    git: Option<&GitSignals>,
109    last_modified_session: u64,
110) -> Vec<FileRecord> {
111    assert_eq!(
112        files.len(),
113        analyses.len(),
114        "build_file_records expects one analysis per walked file"
115    );
116
117    let hotspot_files = git.map(|signals| {
118        signals
119            .hotspot_files
120            .iter()
121            .cloned()
122            .collect::<HashSet<_>>()
123    });
124    let hotspot_files = hotspot_files.as_ref();
125
126    files
127        .iter()
128        .zip(analyses)
129        .map(|(file, analysis)| {
130            build_file_record(file, analysis, git, hotspot_files, last_modified_session)
131        })
132        .collect()
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138    use crate::analysis::parser::{ImportKind, ImportStatement};
139    use crate::store::record::TodoComment;
140
141    #[test]
142    fn build_file_record_uses_layer0_defaults_and_git_signals() {
143        let analysis = StaticFileAnalysis {
144            path: "src/lib.rs".to_string(),
145            language: Language::Rust,
146            entry_points: vec!["run".to_string()],
147            exported_types: vec![],
148            imports: vec![ImportStatement::new("crate::utils", ImportKind::Normal, 1)],
149            todos: vec![TodoComment {
150                text: "TODO: tighten docs".to_string(),
151                line: 12,
152                kind: crate::store::record::TodoKind::Todo,
153            }],
154            unsafe_count: 1,
155            unwrap_count: 2,
156            panic_count: 0,
157            branch_count: 3,
158            module_doc: None,
159            content_hash: None,
160            line_count: 0,
161        };
162
163        let mut git = GitSignals::empty();
164        git.change_frequency.insert("src/lib.rs".to_string(), 9);
165        git.last_authors
166            .insert("src/lib.rs".to_string(), "ioni".to_string());
167        git.hotspot_files.push("src/lib.rs".to_string());
168
169        let file = WalkedFile {
170            abs_path: std::path::PathBuf::from("/repo/src/lib.rs"),
171            rel_path: "src/lib.rs".to_string(),
172            language: Language::Rust,
173            size_bytes: 400,
174            mtime_secs: 0,
175        };
176
177        let hotspots = git.hotspot_files.iter().cloned().collect::<HashSet<_>>();
178
179        let record = build_file_record(&file, &analysis, Some(&git), Some(&hotspots), 1234);
180
181        assert_eq!(record.path, "src/lib.rs");
182        assert!(record.purpose.is_empty());
183        assert_eq!(record.entry_points, vec!["run".to_string()]);
184        assert_eq!(record.imports, vec!["crate::utils".to_string()]);
185        assert_eq!(record.todos.len(), 1);
186        assert_eq!(record.unsafe_count, 1);
187        assert_eq!(record.unwrap_count, 2);
188        assert_eq!(record.change_frequency, 9);
189        assert_eq!(record.last_author.as_deref(), Some("ioni"));
190        assert!(record.is_hotspot);
191        assert_eq!(record.token_cost_estimate, 100);
192        assert_eq!(record.last_modified_session, 1234);
193    }
194
195    #[test]
196    fn module_doc_propagates_to_purpose() {
197        let analysis = StaticFileAnalysis {
198            path: "src/auth.rs".to_string(),
199            language: Language::Rust,
200            entry_points: vec![],
201            exported_types: vec![],
202            imports: vec![],
203            todos: vec![],
204            unsafe_count: 0,
205            unwrap_count: 0,
206            panic_count: 0,
207            branch_count: 0,
208            module_doc: Some("Handles JWT authentication.".to_string()),
209            content_hash: None,
210            line_count: 0,
211        };
212        let file = WalkedFile {
213            abs_path: std::path::PathBuf::from("/repo/src/auth.rs"),
214            rel_path: "src/auth.rs".to_string(),
215            language: Language::Rust,
216            size_bytes: 100,
217            mtime_secs: 0,
218        };
219        let record = build_file_record(&file, &analysis, None, None, 0);
220        assert_eq!(record.purpose, "Handles JWT authentication.");
221    }
222
223    #[test]
224    fn exported_types_are_folded_into_stored_api_surface() {
225        let analysis = StaticFileAnalysis {
226            path: "src/models.rs".to_string(),
227            language: Language::Rust,
228            entry_points: vec!["build".to_string()],
229            exported_types: vec!["Widget".to_string(), "Widget".to_string()],
230            imports: vec![],
231            todos: vec![],
232            unsafe_count: 0,
233            unwrap_count: 0,
234            panic_count: 0,
235            branch_count: 0,
236            module_doc: None,
237            content_hash: None,
238            line_count: 0,
239        };
240        let file = WalkedFile {
241            abs_path: std::path::PathBuf::from("/repo/src/models.rs"),
242            rel_path: "src/models.rs".to_string(),
243            language: Language::Rust,
244            size_bytes: 100,
245            mtime_secs: 0,
246        };
247
248        let record = build_file_record(&file, &analysis, None, None, 0);
249        assert_eq!(
250            record.entry_points,
251            vec!["build".to_string(), "Widget".to_string()]
252        );
253    }
254
255    #[test]
256    fn build_file_records_is_stable_for_missing_git_signals() {
257        let files = vec![WalkedFile {
258            abs_path: std::path::PathBuf::from("/repo/src/main.rs"),
259            rel_path: "src/main.rs".to_string(),
260            language: Language::Rust,
261            size_bytes: 8,
262            mtime_secs: 0,
263        }];
264        let analyses = vec![StaticFileAnalysis {
265            path: "src/main.rs".to_string(),
266            language: Language::Rust,
267            entry_points: vec![],
268            exported_types: vec![],
269            imports: vec![],
270            todos: vec![],
271            unsafe_count: 0,
272            unwrap_count: 0,
273            panic_count: 0,
274            branch_count: 0,
275            module_doc: None,
276            content_hash: None,
277            line_count: 0,
278        }];
279
280        let records = build_file_records(&files, &analyses, None, 55);
281
282        assert_eq!(records.len(), 1);
283        assert_eq!(records[0].path, "src/main.rs");
284        assert_eq!(records[0].change_frequency, 0);
285        assert!(records[0].last_author.is_none());
286        assert!(!records[0].is_hotspot);
287        assert_eq!(records[0].token_cost_estimate, 2);
288    }
289}