1use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::time::Instant;
10
11use crate::types::{AcbError, AcbResult, Language};
12
13use super::go::GoParser;
14use super::python::PythonParser;
15use super::rust::RustParser;
16use super::treesitter::parse_with_language;
17use super::typescript::TypeScriptParser;
18use super::{LanguageParser, ParseFileError, RawCodeUnit, Severity};
19
20#[derive(Debug, Clone)]
22pub struct ParseOptions {
23 pub languages: Vec<Language>,
25 pub exclude: Vec<String>,
27 pub include_tests: bool,
29 pub max_file_size: usize,
31}
32
33impl Default for ParseOptions {
34 fn default() -> Self {
35 Self {
36 languages: vec![],
37 exclude: vec![
38 "**/node_modules/**".into(),
39 "**/target/**".into(),
40 "**/.git/**".into(),
41 "**/__pycache__/**".into(),
42 "**/venv/**".into(),
43 "**/.venv/**".into(),
44 "**/dist/**".into(),
45 "**/build/**".into(),
46 ],
47 include_tests: true,
48 max_file_size: 10 * 1024 * 1024, }
50 }
51}
52
53#[derive(Debug)]
55pub struct ParseResult {
56 pub units: Vec<RawCodeUnit>,
58 pub errors: Vec<ParseFileError>,
60 pub stats: ParseStats,
62}
63
64#[derive(Debug, Clone)]
66pub struct ParseStats {
67 pub files_parsed: usize,
69 pub files_skipped: usize,
71 pub files_errored: usize,
73 pub total_lines: usize,
75 pub parse_time_ms: u64,
77 pub by_language: HashMap<Language, usize>,
79}
80
81pub struct Parser {
83 parsers: HashMap<Language, Box<dyn LanguageParser>>,
85}
86
87impl Parser {
88 pub fn new() -> Self {
90 let mut parsers: HashMap<Language, Box<dyn LanguageParser>> = HashMap::new();
91 parsers.insert(Language::Python, Box::new(PythonParser::new()));
92 parsers.insert(Language::Rust, Box::new(RustParser::new()));
93 parsers.insert(Language::TypeScript, Box::new(TypeScriptParser::new()));
94 parsers.insert(Language::JavaScript, Box::new(TypeScriptParser::new()));
95 parsers.insert(Language::Go, Box::new(GoParser::new()));
96 Self { parsers }
97 }
98
99 pub fn parse_file(&self, path: &Path, content: &str) -> AcbResult<Vec<RawCodeUnit>> {
101 let lang = Language::from_path(path);
102 if lang == Language::Unknown {
103 return Err(AcbError::ParseError {
104 path: path.to_path_buf(),
105 message: "Unknown language".into(),
106 });
107 }
108
109 let parser = self
110 .parsers
111 .get(&lang)
112 .ok_or_else(|| AcbError::ParseError {
113 path: path.to_path_buf(),
114 message: format!("No parser for language: {}", lang),
115 })?;
116
117 let ts_lang = if matches!(
119 path.extension().and_then(|e| e.to_str()),
120 Some("tsx") | Some("jsx")
121 ) {
122 tree_sitter_typescript::language_tsx()
123 } else {
124 lang.tree_sitter_language()
125 .ok_or_else(|| AcbError::ParseError {
126 path: path.to_path_buf(),
127 message: format!("No tree-sitter grammar for: {}", lang),
128 })?
129 };
130
131 let tree = parse_with_language(content, ts_lang)?;
132 parser.extract_units(&tree, content, path)
133 }
134
135 pub fn parse_directory(&self, root: &Path, options: &ParseOptions) -> AcbResult<ParseResult> {
137 let start = Instant::now();
138
139 let files = self.collect_files(root, options)?;
140
141 let mut all_units = Vec::new();
142 let mut all_errors = Vec::new();
143 let mut files_parsed = 0usize;
144 let mut files_skipped = 0usize;
145 let mut files_errored = 0usize;
146 let mut total_lines = 0usize;
147 let mut by_language: HashMap<Language, usize> = HashMap::new();
148
149 for file_path in &files {
150 let content = match std::fs::read_to_string(file_path) {
151 Ok(c) => c,
152 Err(e) => {
153 all_errors.push(ParseFileError {
154 path: file_path.clone(),
155 span: None,
156 message: format!("Could not read file: {}", e),
157 severity: Severity::Error,
158 });
159 files_errored += 1;
160 continue;
161 }
162 };
163
164 if content.len() > options.max_file_size {
166 files_skipped += 1;
167 continue;
168 }
169
170 let lang = Language::from_path(file_path);
171 if lang == Language::Unknown {
172 files_skipped += 1;
173 continue;
174 }
175
176 if !options.include_tests {
178 if let Some(parser) = self.parsers.get(&lang) {
179 if parser.is_test_file(file_path, &content) {
180 files_skipped += 1;
181 continue;
182 }
183 }
184 }
185
186 match self.parse_file(file_path, &content) {
187 Ok(units) => {
188 total_lines += content.lines().count();
189 *by_language.entry(lang).or_insert(0) += 1;
190 all_units.extend(units);
191 files_parsed += 1;
192 }
193 Err(e) => {
194 all_errors.push(ParseFileError {
195 path: file_path.clone(),
196 span: None,
197 message: format!("{}", e),
198 severity: Severity::Error,
199 });
200 files_errored += 1;
201 }
202 }
203 }
204
205 let elapsed = start.elapsed();
206
207 Ok(ParseResult {
208 units: all_units,
209 errors: all_errors,
210 stats: ParseStats {
211 files_parsed,
212 files_skipped,
213 files_errored,
214 total_lines,
215 parse_time_ms: elapsed.as_millis() as u64,
216 by_language,
217 },
218 })
219 }
220
221 pub fn should_parse(&self, path: &Path) -> bool {
223 let lang = Language::from_path(path);
224 lang != Language::Unknown && self.parsers.contains_key(&lang)
225 }
226
227 fn collect_files(&self, root: &Path, options: &ParseOptions) -> AcbResult<Vec<PathBuf>> {
229 use ignore::WalkBuilder;
230
231 let mut files = Vec::new();
232
233 let walker = WalkBuilder::new(root).hidden(true).git_ignore(true).build();
234
235 for entry in walker {
236 let entry = match entry {
237 Ok(e) => e,
238 Err(_) => continue,
239 };
240 let path = entry.path();
241
242 if !path.is_file() {
243 continue;
244 }
245
246 let lang = Language::from_path(path);
247 if lang == Language::Unknown {
248 continue;
249 }
250
251 if !options.languages.is_empty() && !options.languages.contains(&lang) {
253 continue;
254 }
255
256 if self.is_excluded(path, &options.exclude) {
258 continue;
259 }
260
261 files.push(path.to_path_buf());
262 }
263
264 Ok(files)
265 }
266
267 fn is_excluded(&self, path: &Path, excludes: &[String]) -> bool {
269 let path_str = path.to_string_lossy();
270 for pattern in excludes {
271 let pattern_str = pattern.replace("**", "");
273 let pattern_str = pattern_str.trim_matches('/');
274 if !pattern_str.is_empty() && path_str.contains(pattern_str) {
275 return true;
276 }
277 }
278 false
279 }
280}
281
282impl Default for Parser {
283 fn default() -> Self {
284 Self::new()
285 }
286}