1pub mod languages;
7pub mod walker;
8
9use anyhow::{Context, Result};
10use rayon::prelude::*;
11use rma_common::{Language, RmaConfig, RmaError, SourceLocation};
12use serde::{Deserialize, Serialize};
13use std::path::{Path, PathBuf};
14use std::sync::Arc;
15use tracing::{debug, info, instrument, warn};
16use tree_sitter::{Node, Parser, Tree};
17
18#[derive(Debug)]
20pub struct ParsedFile {
21 pub path: PathBuf,
22 pub language: Language,
23 pub content: String,
24 pub tree: Tree,
25 pub parse_errors: Vec<ParseError>,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct ParseError {
31 pub location: SourceLocation,
32 pub message: String,
33}
34
35#[derive(Debug, Clone, Default, Serialize, Deserialize)]
37pub struct ParseStats {
38 pub files_parsed: usize,
39 pub files_failed: usize,
40 pub files_skipped: usize,
41 pub total_bytes: usize,
42 pub parse_errors: usize,
43}
44
45pub struct ParserEngine {
47 config: Arc<RmaConfig>,
48}
49
50impl ParserEngine {
51 pub fn new(config: RmaConfig) -> Self {
53 Self {
54 config: Arc::new(config),
55 }
56 }
57
58 #[instrument(skip(self, content), fields(path = %path.display()))]
60 pub fn parse_file(&self, path: &Path, content: &str) -> Result<ParsedFile> {
61 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
62
63 let language = Language::from_extension(ext);
64
65 if language == Language::Unknown {
66 return Err(RmaError::UnsupportedLanguage(ext.to_string()).into());
67 }
68
69 let mut parser = Parser::new();
70 let ts_language = languages::get_language(language)?;
71 parser.set_language(&ts_language)?;
72
73 let tree = parser
74 .parse(content, None)
75 .context("Failed to parse file")?;
76
77 let parse_errors = collect_parse_errors(&tree, path, content);
78
79 debug!(
80 "Parsed {} ({}) - {} errors",
81 path.display(),
82 language,
83 parse_errors.len()
84 );
85
86 Ok(ParsedFile {
87 path: path.to_path_buf(),
88 language,
89 content: content.to_string(),
90 tree,
91 parse_errors,
92 })
93 }
94
95 #[instrument(skip(self))]
97 pub fn parse_directory(&self, root: &Path) -> Result<(Vec<ParsedFile>, ParseStats)> {
98 info!("Starting parallel parse of {}", root.display());
99
100 let files = walker::collect_files(root, &self.config)?;
101 info!("Found {} files to parse", files.len());
102
103 let results: Vec<_> = files
104 .par_iter()
105 .filter_map(|path| match std::fs::read_to_string(path) {
106 Ok(content) => {
107 if content.len() > self.config.max_file_size {
108 warn!("Skipping large file: {}", path.display());
109 return None;
110 }
111 Some((path.clone(), content))
112 }
113 Err(e) => {
114 warn!("Failed to read {}: {}", path.display(), e);
115 None
116 }
117 })
118 .map(|(path, content)| {
119 let result = self.parse_file(&path, &content);
120 (path, result, content.len())
121 })
122 .collect();
123
124 let mut parsed_files = Vec::new();
125 let mut stats = ParseStats::default();
126
127 for (path, result, bytes) in results {
128 match result {
129 Ok(parsed) => {
130 stats.parse_errors += parsed.parse_errors.len();
131 stats.total_bytes += bytes;
132 stats.files_parsed += 1;
133 parsed_files.push(parsed);
134 }
135 Err(e) => {
136 if e.downcast_ref::<RmaError>()
137 .map(|e| matches!(e, RmaError::UnsupportedLanguage(_)))
138 .unwrap_or(false)
139 {
140 stats.files_skipped += 1;
141 } else {
142 debug!("Failed to parse {}: {}", path.display(), e);
143 stats.files_failed += 1;
144 }
145 }
146 }
147 }
148
149 info!(
150 "Parsing complete: {} parsed, {} failed, {} skipped",
151 stats.files_parsed, stats.files_failed, stats.files_skipped
152 );
153
154 Ok((parsed_files, stats))
155 }
156}
157
158fn collect_parse_errors(tree: &Tree, path: &Path, content: &str) -> Vec<ParseError> {
160 let mut errors = Vec::new();
161 let mut cursor = tree.walk();
162
163 collect_errors_recursive(&mut cursor, path, content, &mut errors);
164
165 errors
166}
167
168fn collect_errors_recursive(
169 cursor: &mut tree_sitter::TreeCursor,
170 path: &Path,
171 _content: &str,
172 errors: &mut Vec<ParseError>,
173) {
174 let node = cursor.node();
175
176 if node.is_error() || node.is_missing() {
177 let start = node.start_position();
178 let end = node.end_position();
179
180 errors.push(ParseError {
181 location: SourceLocation::new(
182 path.to_path_buf(),
183 start.row + 1,
184 start.column + 1,
185 end.row + 1,
186 end.column + 1,
187 ),
188 message: if node.is_missing() {
189 format!("Missing {}", node.kind())
190 } else {
191 "Syntax error".to_string()
192 },
193 });
194 }
195
196 if cursor.goto_first_child() {
197 loop {
198 collect_errors_recursive(cursor, path, _content, errors);
199 if !cursor.goto_next_sibling() {
200 break;
201 }
202 }
203 cursor.goto_parent();
204 }
205}
206
207pub trait AstVisitor {
209 fn visit_node(&mut self, node: Node, content: &str);
210}
211
212pub fn traverse_ast<V: AstVisitor>(tree: &Tree, content: &str, visitor: &mut V) {
214 let mut cursor = tree.walk();
215 traverse_recursive(&mut cursor, content, visitor);
216}
217
218fn traverse_recursive<V: AstVisitor>(
219 cursor: &mut tree_sitter::TreeCursor,
220 content: &str,
221 visitor: &mut V,
222) {
223 let node = cursor.node();
224 visitor.visit_node(node, content);
225
226 if cursor.goto_first_child() {
227 loop {
228 traverse_recursive(cursor, content, visitor);
229 if !cursor.goto_next_sibling() {
230 break;
231 }
232 }
233 cursor.goto_parent();
234 }
235}
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240
241 #[test]
242 fn test_parse_rust_file() {
243 let engine = ParserEngine::new(RmaConfig::default());
244 let content = r#"
245fn main() {
246 println!("Hello, world!");
247}
248"#;
249 let result = engine.parse_file(Path::new("test.rs"), content);
250 assert!(result.is_ok());
251 let parsed = result.unwrap();
252 assert_eq!(parsed.language, Language::Rust);
253 assert!(parsed.parse_errors.is_empty());
254 }
255
256 #[test]
257 fn test_parse_python_file() {
258 let engine = ParserEngine::new(RmaConfig::default());
259 let content = r#"
260def hello():
261 print("Hello, world!")
262
263if __name__ == "__main__":
264 hello()
265"#;
266 let result = engine.parse_file(Path::new("test.py"), content);
267 assert!(result.is_ok());
268 let parsed = result.unwrap();
269 assert_eq!(parsed.language, Language::Python);
270 }
271
272 #[test]
273 fn test_parse_javascript_file() {
274 let engine = ParserEngine::new(RmaConfig::default());
275 let content = r#"
276function hello() {
277 console.log("Hello, world!");
278}
279hello();
280"#;
281 let result = engine.parse_file(Path::new("test.js"), content);
282 assert!(result.is_ok());
283 let parsed = result.unwrap();
284 assert_eq!(parsed.language, Language::JavaScript);
285 }
286
287 #[test]
288 fn test_unsupported_language() {
289 let engine = ParserEngine::new(RmaConfig::default());
290 let result = engine.parse_file(Path::new("test.xyz"), "content");
291 assert!(result.is_err());
292 }
293}