1use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::time::Instant;
10
11use crate::types::{AcbError, AcbResult, Language};
12
13use super::cpp::CppParser;
14use super::csharp::CSharpParser;
15use super::go::GoParser;
16use super::java::JavaParser;
17use super::python::PythonParser;
18use super::rust::RustParser;
19use super::treesitter::parse_with_language;
20use super::typescript::TypeScriptParser;
21use super::{LanguageParser, ParseFileError, RawCodeUnit, Severity};
22
23#[derive(Debug, Clone)]
25pub struct ParseOptions {
26 pub languages: Vec<Language>,
28 pub exclude: Vec<String>,
30 pub include_tests: bool,
32 pub max_file_size: usize,
34}
35
36impl Default for ParseOptions {
37 fn default() -> Self {
38 Self {
39 languages: vec![],
40 exclude: vec![
41 "**/node_modules/**".into(),
42 "**/target/**".into(),
43 "**/.git/**".into(),
44 "**/__pycache__/**".into(),
45 "**/venv/**".into(),
46 "**/.venv/**".into(),
47 "**/dist/**".into(),
48 "**/build/**".into(),
49 ],
50 include_tests: true,
51 max_file_size: 10 * 1024 * 1024, }
53 }
54}
55
56#[derive(Debug)]
58pub struct ParseResult {
59 pub units: Vec<RawCodeUnit>,
61 pub errors: Vec<ParseFileError>,
63 pub stats: ParseStats,
65}
66
67#[derive(Debug, Clone)]
69pub struct ParseStats {
70 pub files_parsed: usize,
72 pub files_skipped: usize,
74 pub files_errored: usize,
76 pub total_lines: usize,
78 pub parse_time_ms: u64,
80 pub by_language: HashMap<Language, usize>,
82 pub coverage: ParseCoverageStats,
84}
85
86#[derive(Debug, Clone, Default)]
88pub struct ParseCoverageStats {
89 pub files_seen: usize,
91 pub files_candidate: usize,
93 pub skipped_unknown_language: usize,
95 pub skipped_language_filter: usize,
97 pub skipped_excluded_pattern: usize,
99 pub skipped_too_large: usize,
101 pub skipped_test_file: usize,
103 pub read_errors: usize,
105 pub parse_errors: usize,
107 pub unsupported_extensions: HashMap<String, usize>,
109}
110
111impl ParseCoverageStats {
112 pub fn total_skipped(&self) -> usize {
114 self.skipped_unknown_language
115 + self.skipped_language_filter
116 + self.skipped_excluded_pattern
117 + self.skipped_too_large
118 + self.skipped_test_file
119 }
120}
121
122struct CollectFilesResult {
123 files: Vec<PathBuf>,
124 coverage: ParseCoverageStats,
125}
126
127pub struct Parser {
129 parsers: HashMap<Language, Box<dyn LanguageParser>>,
131}
132
133impl Parser {
134 pub fn new() -> Self {
136 let mut parsers: HashMap<Language, Box<dyn LanguageParser>> = HashMap::new();
137 parsers.insert(Language::Python, Box::new(PythonParser::new()));
138 parsers.insert(Language::Rust, Box::new(RustParser::new()));
139 parsers.insert(Language::TypeScript, Box::new(TypeScriptParser::new()));
140 parsers.insert(Language::JavaScript, Box::new(TypeScriptParser::new()));
141 parsers.insert(Language::Go, Box::new(GoParser::new()));
142 parsers.insert(Language::Cpp, Box::new(CppParser::new()));
143 parsers.insert(Language::Java, Box::new(JavaParser::new()));
144 parsers.insert(Language::CSharp, Box::new(CSharpParser::new()));
145 Self { parsers }
146 }
147
148 pub fn parse_file(&self, path: &Path, content: &str) -> AcbResult<Vec<RawCodeUnit>> {
150 let lang = Language::from_path(path);
151 if lang == Language::Unknown {
152 return Err(AcbError::ParseError {
153 path: path.to_path_buf(),
154 message: "Unknown language".into(),
155 });
156 }
157
158 let parser = self
159 .parsers
160 .get(&lang)
161 .ok_or_else(|| AcbError::ParseError {
162 path: path.to_path_buf(),
163 message: format!("No parser for language: {}", lang),
164 })?;
165
166 let ts_lang = if matches!(
168 path.extension().and_then(|e| e.to_str()),
169 Some("tsx") | Some("jsx")
170 ) {
171 tree_sitter_typescript::language_tsx()
172 } else {
173 lang.tree_sitter_language()
174 .ok_or_else(|| AcbError::ParseError {
175 path: path.to_path_buf(),
176 message: format!("No tree-sitter grammar for: {}", lang),
177 })?
178 };
179
180 let tree = parse_with_language(content, ts_lang)?;
181 parser.extract_units(&tree, content, path)
182 }
183
184 pub fn parse_directory(&self, root: &Path, options: &ParseOptions) -> AcbResult<ParseResult> {
186 let start = Instant::now();
187
188 let collected = self.collect_files(root, options)?;
189 let files = collected.files;
190
191 let mut all_units = Vec::new();
192 let mut all_errors = Vec::new();
193 let mut files_parsed = 0usize;
194 let mut files_errored = 0usize;
195 let mut total_lines = 0usize;
196 let mut by_language: HashMap<Language, usize> = HashMap::new();
197 let mut coverage = collected.coverage;
198
199 for file_path in &files {
200 let content = match std::fs::read_to_string(file_path) {
201 Ok(c) => c,
202 Err(e) => {
203 all_errors.push(ParseFileError {
204 path: file_path.clone(),
205 span: None,
206 message: format!("Could not read file: {}", e),
207 severity: Severity::Error,
208 });
209 files_errored += 1;
210 coverage.read_errors += 1;
211 continue;
212 }
213 };
214
215 if content.len() > options.max_file_size {
217 coverage.skipped_too_large += 1;
218 continue;
219 }
220
221 let lang = Language::from_path(file_path);
222 if lang == Language::Unknown {
223 coverage.skipped_unknown_language += 1;
224 continue;
225 }
226
227 if !options.include_tests {
229 if let Some(parser) = self.parsers.get(&lang) {
230 if parser.is_test_file(file_path, &content) {
231 coverage.skipped_test_file += 1;
232 continue;
233 }
234 }
235 }
236
237 match self.parse_file(file_path, &content) {
238 Ok(units) => {
239 total_lines += content.lines().count();
240 *by_language.entry(lang).or_insert(0) += 1;
241 all_units.extend(units);
242 files_parsed += 1;
243 }
244 Err(e) => {
245 all_errors.push(ParseFileError {
246 path: file_path.clone(),
247 span: None,
248 message: format!("{}", e),
249 severity: Severity::Error,
250 });
251 files_errored += 1;
252 coverage.parse_errors += 1;
253 }
254 }
255 }
256
257 let elapsed = start.elapsed();
258 let files_skipped = coverage.total_skipped();
259
260 Ok(ParseResult {
261 units: all_units,
262 errors: all_errors,
263 stats: ParseStats {
264 files_parsed,
265 files_skipped,
266 files_errored,
267 total_lines,
268 parse_time_ms: elapsed.as_millis() as u64,
269 by_language,
270 coverage,
271 },
272 })
273 }
274
275 pub fn should_parse(&self, path: &Path) -> bool {
277 let lang = Language::from_path(path);
278 lang != Language::Unknown && self.parsers.contains_key(&lang)
279 }
280
281 fn collect_files(&self, root: &Path, options: &ParseOptions) -> AcbResult<CollectFilesResult> {
283 use ignore::WalkBuilder;
284
285 let mut files = Vec::new();
286 let mut coverage = ParseCoverageStats::default();
287
288 let walker = WalkBuilder::new(root).hidden(true).git_ignore(true).build();
289
290 for entry in walker {
291 let entry = match entry {
292 Ok(e) => e,
293 Err(_) => continue,
294 };
295 let path = entry.path();
296
297 if !path.is_file() {
298 continue;
299 }
300 coverage.files_seen += 1;
301
302 let lang = Language::from_path(path);
303 if lang == Language::Unknown {
304 coverage.skipped_unknown_language += 1;
305 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
306 *coverage
307 .unsupported_extensions
308 .entry(ext.to_lowercase())
309 .or_insert(0) += 1;
310 }
311 continue;
312 }
313
314 if !options.languages.is_empty() && !options.languages.contains(&lang) {
316 coverage.skipped_language_filter += 1;
317 continue;
318 }
319
320 if self.is_excluded(path, &options.exclude) {
322 coverage.skipped_excluded_pattern += 1;
323 continue;
324 }
325
326 files.push(path.to_path_buf());
327 }
328 coverage.files_candidate = files.len();
329
330 Ok(CollectFilesResult { files, coverage })
331 }
332
333 fn is_excluded(&self, path: &Path, excludes: &[String]) -> bool {
335 let path_str = path.to_string_lossy();
336 for pattern in excludes {
337 let pattern_str = pattern.replace("**", "");
339 let pattern_str = pattern_str.trim_matches('/');
340 if !pattern_str.is_empty() && path_str.contains(pattern_str) {
341 return true;
342 }
343 }
344 false
345 }
346}
347
348impl Default for Parser {
349 fn default() -> Self {
350 Self::new()
351 }
352}