1use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::time::Instant;
10
11use crate::types::{AcbError, AcbResult, Language};
12
13use super::go::GoParser;
14use super::python::PythonParser;
15use super::rust::RustParser;
16use super::treesitter::parse_with_language;
17use super::typescript::TypeScriptParser;
18use super::{LanguageParser, ParseFileError, RawCodeUnit, Severity};
19
20#[derive(Debug, Clone)]
22pub struct ParseOptions {
23 pub languages: Vec<Language>,
25 pub exclude: Vec<String>,
27 pub include_tests: bool,
29 pub max_file_size: usize,
31}
32
33impl Default for ParseOptions {
34 fn default() -> Self {
35 Self {
36 languages: vec![],
37 exclude: vec![
38 "**/node_modules/**".into(),
39 "**/target/**".into(),
40 "**/.git/**".into(),
41 "**/__pycache__/**".into(),
42 "**/venv/**".into(),
43 "**/.venv/**".into(),
44 "**/dist/**".into(),
45 "**/build/**".into(),
46 ],
47 include_tests: true,
48 max_file_size: 10 * 1024 * 1024, }
50 }
51}
52
53#[derive(Debug)]
55pub struct ParseResult {
56 pub units: Vec<RawCodeUnit>,
58 pub errors: Vec<ParseFileError>,
60 pub stats: ParseStats,
62}
63
64#[derive(Debug, Clone)]
66pub struct ParseStats {
67 pub files_parsed: usize,
69 pub files_skipped: usize,
71 pub files_errored: usize,
73 pub total_lines: usize,
75 pub parse_time_ms: u64,
77 pub by_language: HashMap<Language, usize>,
79 pub coverage: ParseCoverageStats,
81}
82
83#[derive(Debug, Clone, Default)]
85pub struct ParseCoverageStats {
86 pub files_seen: usize,
88 pub files_candidate: usize,
90 pub skipped_unknown_language: usize,
92 pub skipped_language_filter: usize,
94 pub skipped_excluded_pattern: usize,
96 pub skipped_too_large: usize,
98 pub skipped_test_file: usize,
100 pub read_errors: usize,
102 pub parse_errors: usize,
104}
105
106impl ParseCoverageStats {
107 pub fn total_skipped(&self) -> usize {
109 self.skipped_unknown_language
110 + self.skipped_language_filter
111 + self.skipped_excluded_pattern
112 + self.skipped_too_large
113 + self.skipped_test_file
114 }
115}
116
117struct CollectFilesResult {
118 files: Vec<PathBuf>,
119 coverage: ParseCoverageStats,
120}
121
122pub struct Parser {
124 parsers: HashMap<Language, Box<dyn LanguageParser>>,
126}
127
128impl Parser {
129 pub fn new() -> Self {
131 let mut parsers: HashMap<Language, Box<dyn LanguageParser>> = HashMap::new();
132 parsers.insert(Language::Python, Box::new(PythonParser::new()));
133 parsers.insert(Language::Rust, Box::new(RustParser::new()));
134 parsers.insert(Language::TypeScript, Box::new(TypeScriptParser::new()));
135 parsers.insert(Language::JavaScript, Box::new(TypeScriptParser::new()));
136 parsers.insert(Language::Go, Box::new(GoParser::new()));
137 Self { parsers }
138 }
139
140 pub fn parse_file(&self, path: &Path, content: &str) -> AcbResult<Vec<RawCodeUnit>> {
142 let lang = Language::from_path(path);
143 if lang == Language::Unknown {
144 return Err(AcbError::ParseError {
145 path: path.to_path_buf(),
146 message: "Unknown language".into(),
147 });
148 }
149
150 let parser = self
151 .parsers
152 .get(&lang)
153 .ok_or_else(|| AcbError::ParseError {
154 path: path.to_path_buf(),
155 message: format!("No parser for language: {}", lang),
156 })?;
157
158 let ts_lang = if matches!(
160 path.extension().and_then(|e| e.to_str()),
161 Some("tsx") | Some("jsx")
162 ) {
163 tree_sitter_typescript::language_tsx()
164 } else {
165 lang.tree_sitter_language()
166 .ok_or_else(|| AcbError::ParseError {
167 path: path.to_path_buf(),
168 message: format!("No tree-sitter grammar for: {}", lang),
169 })?
170 };
171
172 let tree = parse_with_language(content, ts_lang)?;
173 parser.extract_units(&tree, content, path)
174 }
175
176 pub fn parse_directory(&self, root: &Path, options: &ParseOptions) -> AcbResult<ParseResult> {
178 let start = Instant::now();
179
180 let collected = self.collect_files(root, options)?;
181 let files = collected.files;
182
183 let mut all_units = Vec::new();
184 let mut all_errors = Vec::new();
185 let mut files_parsed = 0usize;
186 let mut files_errored = 0usize;
187 let mut total_lines = 0usize;
188 let mut by_language: HashMap<Language, usize> = HashMap::new();
189 let mut coverage = collected.coverage;
190
191 for file_path in &files {
192 let content = match std::fs::read_to_string(file_path) {
193 Ok(c) => c,
194 Err(e) => {
195 all_errors.push(ParseFileError {
196 path: file_path.clone(),
197 span: None,
198 message: format!("Could not read file: {}", e),
199 severity: Severity::Error,
200 });
201 files_errored += 1;
202 coverage.read_errors += 1;
203 continue;
204 }
205 };
206
207 if content.len() > options.max_file_size {
209 coverage.skipped_too_large += 1;
210 continue;
211 }
212
213 let lang = Language::from_path(file_path);
214 if lang == Language::Unknown {
215 coverage.skipped_unknown_language += 1;
216 continue;
217 }
218
219 if !options.include_tests {
221 if let Some(parser) = self.parsers.get(&lang) {
222 if parser.is_test_file(file_path, &content) {
223 coverage.skipped_test_file += 1;
224 continue;
225 }
226 }
227 }
228
229 match self.parse_file(file_path, &content) {
230 Ok(units) => {
231 total_lines += content.lines().count();
232 *by_language.entry(lang).or_insert(0) += 1;
233 all_units.extend(units);
234 files_parsed += 1;
235 }
236 Err(e) => {
237 all_errors.push(ParseFileError {
238 path: file_path.clone(),
239 span: None,
240 message: format!("{}", e),
241 severity: Severity::Error,
242 });
243 files_errored += 1;
244 coverage.parse_errors += 1;
245 }
246 }
247 }
248
249 let elapsed = start.elapsed();
250 let files_skipped = coverage.total_skipped();
251
252 Ok(ParseResult {
253 units: all_units,
254 errors: all_errors,
255 stats: ParseStats {
256 files_parsed,
257 files_skipped,
258 files_errored,
259 total_lines,
260 parse_time_ms: elapsed.as_millis() as u64,
261 by_language,
262 coverage,
263 },
264 })
265 }
266
267 pub fn should_parse(&self, path: &Path) -> bool {
269 let lang = Language::from_path(path);
270 lang != Language::Unknown && self.parsers.contains_key(&lang)
271 }
272
273 fn collect_files(&self, root: &Path, options: &ParseOptions) -> AcbResult<CollectFilesResult> {
275 use ignore::WalkBuilder;
276
277 let mut files = Vec::new();
278 let mut coverage = ParseCoverageStats::default();
279
280 let walker = WalkBuilder::new(root).hidden(true).git_ignore(true).build();
281
282 for entry in walker {
283 let entry = match entry {
284 Ok(e) => e,
285 Err(_) => continue,
286 };
287 let path = entry.path();
288
289 if !path.is_file() {
290 continue;
291 }
292 coverage.files_seen += 1;
293
294 let lang = Language::from_path(path);
295 if lang == Language::Unknown {
296 coverage.skipped_unknown_language += 1;
297 continue;
298 }
299
300 if !options.languages.is_empty() && !options.languages.contains(&lang) {
302 coverage.skipped_language_filter += 1;
303 continue;
304 }
305
306 if self.is_excluded(path, &options.exclude) {
308 coverage.skipped_excluded_pattern += 1;
309 continue;
310 }
311
312 files.push(path.to_path_buf());
313 }
314 coverage.files_candidate = files.len();
315
316 Ok(CollectFilesResult { files, coverage })
317 }
318
319 fn is_excluded(&self, path: &Path, excludes: &[String]) -> bool {
321 let path_str = path.to_string_lossy();
322 for pattern in excludes {
323 let pattern_str = pattern.replace("**", "");
325 let pattern_str = pattern_str.trim_matches('/');
326 if !pattern_str.is_empty() && path_str.contains(pattern_str) {
327 return true;
328 }
329 }
330 false
331 }
332}
333
334impl Default for Parser {
335 fn default() -> Self {
336 Self::new()
337 }
338}