1use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::time::Instant;
10
11use crate::types::{AcbError, AcbResult, Language};
12
13use super::cpp::CppParser;
14use super::go::GoParser;
15use super::python::PythonParser;
16use super::rust::RustParser;
17use super::treesitter::parse_with_language;
18use super::typescript::TypeScriptParser;
19use super::{LanguageParser, ParseFileError, RawCodeUnit, Severity};
20
21#[derive(Debug, Clone)]
23pub struct ParseOptions {
24 pub languages: Vec<Language>,
26 pub exclude: Vec<String>,
28 pub include_tests: bool,
30 pub max_file_size: usize,
32}
33
34impl Default for ParseOptions {
35 fn default() -> Self {
36 Self {
37 languages: vec![],
38 exclude: vec![
39 "**/node_modules/**".into(),
40 "**/target/**".into(),
41 "**/.git/**".into(),
42 "**/__pycache__/**".into(),
43 "**/venv/**".into(),
44 "**/.venv/**".into(),
45 "**/dist/**".into(),
46 "**/build/**".into(),
47 ],
48 include_tests: true,
49 max_file_size: 10 * 1024 * 1024, }
51 }
52}
53
54#[derive(Debug)]
56pub struct ParseResult {
57 pub units: Vec<RawCodeUnit>,
59 pub errors: Vec<ParseFileError>,
61 pub stats: ParseStats,
63}
64
65#[derive(Debug, Clone)]
67pub struct ParseStats {
68 pub files_parsed: usize,
70 pub files_skipped: usize,
72 pub files_errored: usize,
74 pub total_lines: usize,
76 pub parse_time_ms: u64,
78 pub by_language: HashMap<Language, usize>,
80 pub coverage: ParseCoverageStats,
82}
83
84#[derive(Debug, Clone, Default)]
86pub struct ParseCoverageStats {
87 pub files_seen: usize,
89 pub files_candidate: usize,
91 pub skipped_unknown_language: usize,
93 pub skipped_language_filter: usize,
95 pub skipped_excluded_pattern: usize,
97 pub skipped_too_large: usize,
99 pub skipped_test_file: usize,
101 pub read_errors: usize,
103 pub parse_errors: usize,
105}
106
107impl ParseCoverageStats {
108 pub fn total_skipped(&self) -> usize {
110 self.skipped_unknown_language
111 + self.skipped_language_filter
112 + self.skipped_excluded_pattern
113 + self.skipped_too_large
114 + self.skipped_test_file
115 }
116}
117
118struct CollectFilesResult {
119 files: Vec<PathBuf>,
120 coverage: ParseCoverageStats,
121}
122
123pub struct Parser {
125 parsers: HashMap<Language, Box<dyn LanguageParser>>,
127}
128
129impl Parser {
130 pub fn new() -> Self {
132 let mut parsers: HashMap<Language, Box<dyn LanguageParser>> = HashMap::new();
133 parsers.insert(Language::Python, Box::new(PythonParser::new()));
134 parsers.insert(Language::Rust, Box::new(RustParser::new()));
135 parsers.insert(Language::TypeScript, Box::new(TypeScriptParser::new()));
136 parsers.insert(Language::JavaScript, Box::new(TypeScriptParser::new()));
137 parsers.insert(Language::Go, Box::new(GoParser::new()));
138 parsers.insert(Language::Cpp, Box::new(CppParser::new()));
139 Self { parsers }
140 }
141
142 pub fn parse_file(&self, path: &Path, content: &str) -> AcbResult<Vec<RawCodeUnit>> {
144 let lang = Language::from_path(path);
145 if lang == Language::Unknown {
146 return Err(AcbError::ParseError {
147 path: path.to_path_buf(),
148 message: "Unknown language".into(),
149 });
150 }
151
152 let parser = self
153 .parsers
154 .get(&lang)
155 .ok_or_else(|| AcbError::ParseError {
156 path: path.to_path_buf(),
157 message: format!("No parser for language: {}", lang),
158 })?;
159
160 let ts_lang = if matches!(
162 path.extension().and_then(|e| e.to_str()),
163 Some("tsx") | Some("jsx")
164 ) {
165 tree_sitter_typescript::language_tsx()
166 } else {
167 lang.tree_sitter_language()
168 .ok_or_else(|| AcbError::ParseError {
169 path: path.to_path_buf(),
170 message: format!("No tree-sitter grammar for: {}", lang),
171 })?
172 };
173
174 let tree = parse_with_language(content, ts_lang)?;
175 parser.extract_units(&tree, content, path)
176 }
177
178 pub fn parse_directory(&self, root: &Path, options: &ParseOptions) -> AcbResult<ParseResult> {
180 let start = Instant::now();
181
182 let collected = self.collect_files(root, options)?;
183 let files = collected.files;
184
185 let mut all_units = Vec::new();
186 let mut all_errors = Vec::new();
187 let mut files_parsed = 0usize;
188 let mut files_errored = 0usize;
189 let mut total_lines = 0usize;
190 let mut by_language: HashMap<Language, usize> = HashMap::new();
191 let mut coverage = collected.coverage;
192
193 for file_path in &files {
194 let content = match std::fs::read_to_string(file_path) {
195 Ok(c) => c,
196 Err(e) => {
197 all_errors.push(ParseFileError {
198 path: file_path.clone(),
199 span: None,
200 message: format!("Could not read file: {}", e),
201 severity: Severity::Error,
202 });
203 files_errored += 1;
204 coverage.read_errors += 1;
205 continue;
206 }
207 };
208
209 if content.len() > options.max_file_size {
211 coverage.skipped_too_large += 1;
212 continue;
213 }
214
215 let lang = Language::from_path(file_path);
216 if lang == Language::Unknown {
217 coverage.skipped_unknown_language += 1;
218 continue;
219 }
220
221 if !options.include_tests {
223 if let Some(parser) = self.parsers.get(&lang) {
224 if parser.is_test_file(file_path, &content) {
225 coverage.skipped_test_file += 1;
226 continue;
227 }
228 }
229 }
230
231 match self.parse_file(file_path, &content) {
232 Ok(units) => {
233 total_lines += content.lines().count();
234 *by_language.entry(lang).or_insert(0) += 1;
235 all_units.extend(units);
236 files_parsed += 1;
237 }
238 Err(e) => {
239 all_errors.push(ParseFileError {
240 path: file_path.clone(),
241 span: None,
242 message: format!("{}", e),
243 severity: Severity::Error,
244 });
245 files_errored += 1;
246 coverage.parse_errors += 1;
247 }
248 }
249 }
250
251 let elapsed = start.elapsed();
252 let files_skipped = coverage.total_skipped();
253
254 Ok(ParseResult {
255 units: all_units,
256 errors: all_errors,
257 stats: ParseStats {
258 files_parsed,
259 files_skipped,
260 files_errored,
261 total_lines,
262 parse_time_ms: elapsed.as_millis() as u64,
263 by_language,
264 coverage,
265 },
266 })
267 }
268
269 pub fn should_parse(&self, path: &Path) -> bool {
271 let lang = Language::from_path(path);
272 lang != Language::Unknown && self.parsers.contains_key(&lang)
273 }
274
275 fn collect_files(&self, root: &Path, options: &ParseOptions) -> AcbResult<CollectFilesResult> {
277 use ignore::WalkBuilder;
278
279 let mut files = Vec::new();
280 let mut coverage = ParseCoverageStats::default();
281
282 let walker = WalkBuilder::new(root).hidden(true).git_ignore(true).build();
283
284 for entry in walker {
285 let entry = match entry {
286 Ok(e) => e,
287 Err(_) => continue,
288 };
289 let path = entry.path();
290
291 if !path.is_file() {
292 continue;
293 }
294 coverage.files_seen += 1;
295
296 let lang = Language::from_path(path);
297 if lang == Language::Unknown {
298 coverage.skipped_unknown_language += 1;
299 continue;
300 }
301
302 if !options.languages.is_empty() && !options.languages.contains(&lang) {
304 coverage.skipped_language_filter += 1;
305 continue;
306 }
307
308 if self.is_excluded(path, &options.exclude) {
310 coverage.skipped_excluded_pattern += 1;
311 continue;
312 }
313
314 files.push(path.to_path_buf());
315 }
316 coverage.files_candidate = files.len();
317
318 Ok(CollectFilesResult { files, coverage })
319 }
320
321 fn is_excluded(&self, path: &Path, excludes: &[String]) -> bool {
323 let path_str = path.to_string_lossy();
324 for pattern in excludes {
325 let pattern_str = pattern.replace("**", "");
327 let pattern_str = pattern_str.trim_matches('/');
328 if !pattern_str.is_empty() && path_str.contains(pattern_str) {
329 return true;
330 }
331 }
332 false
333 }
334}
335
336impl Default for Parser {
337 fn default() -> Self {
338 Self::new()
339 }
340}