1use crate::ast::{Language, Node};
4use crate::error::{Error, Result};
5use dashmap::DashMap;
6use std::path::{Path, PathBuf};
7use std::sync::Arc;
8use tree_sitter::Tree;
9
10#[derive(Debug, Clone)]
12pub struct ParseContext {
13 pub repo_id: String,
15 pub file_path: PathBuf,
17 pub old_tree: Option<Tree>,
19 pub content: String,
21}
22
23impl ParseContext {
24 pub fn new(repo_id: String, file_path: PathBuf, content: String) -> Self {
26 Self {
27 repo_id,
28 file_path,
29 content,
30 old_tree: None,
31 }
32 }
33
34 pub fn with_old_tree(mut self, tree: Tree) -> Self {
36 self.old_tree = Some(tree);
37 self
38 }
39}
40
41pub trait LanguageParser: Send + Sync {
43 fn language(&self) -> Language;
45
46 fn parse(&self, context: &ParseContext) -> Result<ParseResult>;
48}
49
50#[derive(Debug)]
52pub struct ParseResult {
53 pub tree: Tree,
55 pub nodes: Vec<Node>,
57 pub edges: Vec<crate::ast::Edge>,
59}
60
61pub struct LanguageRegistry {
63 parsers: DashMap<Language, Arc<dyn LanguageParser>>,
64}
65
66impl LanguageRegistry {
67 pub fn new() -> Self {
69 Self {
70 parsers: DashMap::new(),
71 }
72 }
73
74 pub fn register(&self, parser: Arc<dyn LanguageParser>) {
76 let lang = parser.language();
77 self.parsers.insert(lang, parser);
78 }
79
80 pub fn get(&self, language: Language) -> Option<Arc<dyn LanguageParser>> {
82 self.parsers.get(&language).map(|p| Arc::clone(&*p))
83 }
84
85 pub fn get_by_extension(&self, ext: &str) -> Option<Arc<dyn LanguageParser>> {
87 let lang = Language::from_extension(ext);
88 self.get(lang)
89 }
90}
91
92impl Default for LanguageRegistry {
93 fn default() -> Self {
94 Self::new()
95 }
96}
97
98pub struct ParserEngine {
100 registry: Arc<LanguageRegistry>,
102 tree_cache: DashMap<PathBuf, Tree>,
104}
105
106impl ParserEngine {
107 pub fn new(registry: Arc<LanguageRegistry>) -> Self {
109 Self {
110 registry,
111 tree_cache: DashMap::new(),
112 }
113 }
114
115 pub fn parse_file(&self, context: ParseContext) -> Result<ParseResult> {
117 let ext = context
119 .file_path
120 .extension()
121 .and_then(|s| s.to_str())
122 .ok_or_else(|| Error::parse(&context.file_path, "No file extension"))?;
123
124 let parser = self
126 .registry
127 .get_by_extension(ext)
128 .ok_or_else(|| Error::UnsupportedLanguage(ext.to_string()))?;
129
130 let result = parser.parse(&context)?;
132
133 self.tree_cache
135 .insert(context.file_path.clone(), result.tree.clone());
136
137 Ok(result)
138 }
139
140 pub fn parse_incremental(&self, mut context: ParseContext) -> Result<ParseResult> {
142 if context.old_tree.is_none() {
144 if let Some(old_tree) = self.tree_cache.get(&context.file_path) {
145 context.old_tree = Some(old_tree.clone());
146 }
147 }
148
149 self.parse_file(context)
150 }
151
152 pub fn clear_cache(&self) {
154 self.tree_cache.clear();
155 }
156
157 pub fn remove_from_cache(&self, path: &Path) {
159 self.tree_cache.remove(path);
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166 use crate::ast::{Edge, EdgeKind, NodeKind, Span};
167 use std::sync::atomic::{AtomicUsize, Ordering};
168
169 struct MockParser {
171 language: Language,
172 parse_count: Arc<AtomicUsize>,
173 }
174
175 impl MockParser {
176 fn new(language: Language) -> Self {
177 Self {
178 language,
179 parse_count: Arc::new(AtomicUsize::new(0)),
180 }
181 }
182
183 fn parse_count(&self) -> usize {
184 self.parse_count.load(Ordering::SeqCst)
185 }
186 }
187
188 impl LanguageParser for MockParser {
189 fn language(&self) -> Language {
190 self.language
191 }
192
193 fn parse(&self, context: &ParseContext) -> Result<ParseResult> {
194 self.parse_count.fetch_add(1, Ordering::SeqCst);
195
196 let mut parser = tree_sitter::Parser::new();
198 parser
199 .set_language(&tree_sitter_javascript::LANGUAGE.into())
200 .unwrap();
201 let tree = parser.parse(&context.content, None).unwrap();
202
203 let mut nodes = Vec::new();
205 let mut edges = Vec::new();
206
207 let module_span = Span::new(0, context.content.len(), 1, 1, 1, 1);
209 let module_node = crate::ast::Node::new(
210 &context.repo_id,
211 NodeKind::Module,
212 context.file_path.to_string_lossy().to_string(),
213 self.language,
214 context.file_path.clone(),
215 module_span,
216 );
217 nodes.push(module_node.clone());
218
219 if context.content.contains("function") {
220 let func_span = Span::new(0, 8, 1, 1, 1, 9);
221 let func_node = crate::ast::Node::new(
222 &context.repo_id,
223 NodeKind::Function,
224 "testFunction".to_string(),
225 self.language,
226 context.file_path.clone(),
227 func_span,
228 );
229 nodes.push(func_node.clone());
230
231 edges.push(Edge::new(module_node.id, func_node.id, EdgeKind::Calls));
233 }
234
235 Ok(ParseResult { tree, nodes, edges })
236 }
237 }
238
239 #[test]
240 fn test_language_registry() {
241 let registry = LanguageRegistry::new();
242 assert!(registry.get(Language::JavaScript).is_none());
243
244 let parser = Arc::new(MockParser::new(Language::JavaScript));
246 registry.register(parser.clone());
247
248 assert!(registry.get(Language::JavaScript).is_some());
250
251 assert!(registry.get_by_extension("js").is_some());
253 assert!(registry.get_by_extension("ts").is_none()); }
255
256 #[test]
257 fn test_parse_context() {
258 let context = ParseContext::new(
259 "test_repo".to_string(),
260 PathBuf::from("test.js"),
261 "console.log('hello');".to_string(),
262 );
263
264 assert_eq!(context.repo_id, "test_repo");
265 assert_eq!(context.file_path, PathBuf::from("test.js"));
266 assert!(context.old_tree.is_none());
267 }
268
269 #[test]
270 fn test_parser_engine_basic() {
271 let registry = Arc::new(LanguageRegistry::new());
272 let parser = Arc::new(MockParser::new(Language::JavaScript));
273 registry.register(parser.clone());
274
275 let engine = ParserEngine::new(registry);
276 let context = ParseContext::new(
277 "test_repo".to_string(),
278 PathBuf::from("test.js"),
279 "function hello() {}".to_string(),
280 );
281
282 let result = engine.parse_file(context).unwrap();
283 assert_eq!(result.nodes.len(), 2); assert_eq!(result.edges.len(), 1); assert_eq!(parser.parse_count(), 1);
286 }
287
288 #[test]
289 fn test_parser_engine_unsupported_language() {
290 let registry = Arc::new(LanguageRegistry::new());
291 let engine = ParserEngine::new(registry);
292
293 let context = ParseContext::new(
294 "test_repo".to_string(),
295 PathBuf::from("test.unknown"),
296 "some content".to_string(),
297 );
298
299 let result = engine.parse_file(context);
300 assert!(result.is_err());
301 match result.unwrap_err() {
302 Error::UnsupportedLanguage(ext) => assert_eq!(ext, "unknown"),
303 _ => panic!("Expected UnsupportedLanguage error"),
304 }
305 }
306
307 #[test]
308 fn test_parser_engine_no_extension() {
309 let registry = Arc::new(LanguageRegistry::new());
310 let engine = ParserEngine::new(registry);
311
312 let context = ParseContext::new(
313 "test_repo".to_string(),
314 PathBuf::from("README"),
315 "some content".to_string(),
316 );
317
318 let result = engine.parse_file(context);
319 assert!(result.is_err());
320 match result.unwrap_err() {
321 Error::Parse { file, message } => {
322 assert_eq!(file, PathBuf::from("README"));
323 assert!(message.contains("No file extension"));
324 }
325 _ => panic!("Expected Parse error"),
326 }
327 }
328
329 #[test]
330 fn test_parser_engine_caching() {
331 let registry = Arc::new(LanguageRegistry::new());
332 let parser = Arc::new(MockParser::new(Language::JavaScript));
333 registry.register(parser.clone());
334
335 let engine = ParserEngine::new(registry);
336 let file_path = PathBuf::from("test.js");
337
338 let context1 = ParseContext::new(
340 "test_repo".to_string(),
341 file_path.clone(),
342 "function one() {}".to_string(),
343 );
344 let _result1 = engine.parse_file(context1).unwrap();
345
346 let context2 = ParseContext::new(
348 "test_repo".to_string(),
349 file_path.clone(),
350 "function two() {}".to_string(),
351 );
352 let result2 = engine.parse_incremental(context2).unwrap();
353
354 assert_eq!(result2.nodes.len(), 2);
355 assert_eq!(parser.parse_count(), 2); }
357
358 #[test]
359 fn test_parser_engine_cache_management() {
360 let registry = Arc::new(LanguageRegistry::new());
361 registry.register(Arc::new(MockParser::new(Language::JavaScript)));
362
363 let engine = ParserEngine::new(registry);
364 let file_path = PathBuf::from("test.js");
365
366 let context = ParseContext::new(
368 "test_repo".to_string(),
369 file_path.clone(),
370 "function test() {}".to_string(),
371 );
372 let _result = engine.parse_file(context).unwrap();
373
374 engine.remove_from_cache(&file_path);
376
377 engine.clear_cache();
379
380 }
382
383 #[test]
384 fn test_parse_result_validation() {
385 let registry = Arc::new(LanguageRegistry::new());
386 registry.register(Arc::new(MockParser::new(Language::JavaScript)));
387
388 let engine = ParserEngine::new(registry);
389 let context = ParseContext::new(
390 "test_repo".to_string(),
391 PathBuf::from("test.js"),
392 "const x = 42;".to_string(),
393 );
394
395 let result = engine.parse_file(context).unwrap();
396
397 assert!(!result.nodes.is_empty());
399 for node in &result.nodes {
400 assert!(!node.name.is_empty());
401 assert_eq!(node.lang, Language::JavaScript);
402 }
403
404 for edge in &result.edges {
406 let source_exists = result.nodes.iter().any(|n| n.id == edge.source);
408 let target_exists = result.nodes.iter().any(|n| n.id == edge.target);
409 assert!(source_exists || target_exists); }
411 }
412
413 #[test]
414 fn test_thread_safety() {
415 use std::thread;
416
417 let registry = Arc::new(LanguageRegistry::new());
418 registry.register(Arc::new(MockParser::new(Language::JavaScript)));
419 registry.register(Arc::new(MockParser::new(Language::Python)));
420
421 let engine = Arc::new(ParserEngine::new(registry));
422
423 let mut handles = vec![];
424
425 for i in 0..10 {
427 let engine_clone = Arc::clone(&engine);
428 let handle = thread::spawn(move || {
429 let ext = if i % 2 == 0 { "js" } else { "py" };
430 let context = ParseContext::new(
431 "test_repo".to_string(),
432 PathBuf::from(format!("test{}.{}", i, ext)),
433 format!("function test{}() {{}}", i),
434 );
435 engine_clone.parse_file(context).unwrap()
436 });
437 handles.push(handle);
438 }
439
440 for handle in handles {
442 let result = handle.join().unwrap();
443 assert!(!result.nodes.is_empty());
444 }
445 }
446}