1use crate::ast::{Language, Node};
4use crate::error::{Error, Result};
5use dashmap::DashMap;
6use std::path::{Path, PathBuf};
7use std::sync::Arc;
8use tree_sitter::Tree;
9
10#[derive(Debug, Clone)]
12pub struct ParseContext {
13 pub repo_id: String,
15 pub file_path: PathBuf,
17 pub old_tree: Option<Tree>,
19 pub content: String,
21}
22
23impl ParseContext {
24 pub fn new(repo_id: String, file_path: PathBuf, content: String) -> Self {
26 Self {
27 repo_id,
28 file_path,
29 content,
30 old_tree: None,
31 }
32 }
33
34 pub fn with_old_tree(mut self, tree: Tree) -> Self {
36 self.old_tree = Some(tree);
37 self
38 }
39}
40
41pub trait LanguageParser: Send + Sync {
43 fn language(&self) -> Language;
45
46 fn parse(&self, context: &ParseContext) -> Result<ParseResult>;
48}
49
50#[derive(Debug)]
52pub struct ParseResult {
53 pub tree: Tree,
55 pub nodes: Vec<Node>,
57 pub edges: Vec<crate::ast::Edge>,
59}
60
61pub struct LanguageRegistry {
63 parsers: DashMap<Language, Arc<dyn LanguageParser>>,
64}
65
66impl LanguageRegistry {
67 pub fn new() -> Self {
69 Self {
70 parsers: DashMap::new(),
71 }
72 }
73
74 pub fn register(&self, parser: Arc<dyn LanguageParser>) {
76 let lang = parser.language();
77 self.parsers.insert(lang, parser);
78 }
79
80 pub fn get(&self, language: Language) -> Option<Arc<dyn LanguageParser>> {
82 self.parsers.get(&language).map(|p| Arc::clone(&*p))
83 }
84
85 pub fn get_by_extension(&self, ext: &str) -> Option<Arc<dyn LanguageParser>> {
87 let lang = Language::from_extension(ext);
88 self.get(lang)
89 }
90}
91
92impl Default for LanguageRegistry {
93 fn default() -> Self {
94 Self::new()
95 }
96}
97
98pub struct ParserEngine {
100 registry: Arc<LanguageRegistry>,
102 tree_cache: DashMap<PathBuf, Tree>,
104}
105
106impl ParserEngine {
107 pub fn new(registry: Arc<LanguageRegistry>) -> Self {
109 Self {
110 registry,
111 tree_cache: DashMap::new(),
112 }
113 }
114
115 pub fn parse_file(&self, context: ParseContext) -> Result<ParseResult> {
117 let ext = context
119 .file_path
120 .extension()
121 .and_then(|s| s.to_str())
122 .ok_or_else(|| Error::parse(&context.file_path, "No file extension"))?;
123
124 let parser = self
126 .registry
127 .get_by_extension(ext)
128 .ok_or_else(|| Error::unsupported_language(ext.to_string()))?;
129
130 let result = parser.parse(&context)?;
132
133 self.tree_cache
135 .insert(context.file_path.clone(), result.tree.clone());
136
137 Ok(result)
138 }
139
140 pub fn parse_incremental(&self, mut context: ParseContext) -> Result<ParseResult> {
142 if context.old_tree.is_none() {
144 if let Some(old_tree) = self.tree_cache.get(&context.file_path) {
145 context.old_tree = Some(old_tree.clone());
146 }
147 }
148
149 self.parse_file(context)
150 }
151
152 pub fn clear_cache(&self) {
154 self.tree_cache.clear();
155 }
156
157 pub fn remove_from_cache(&self, path: &Path) {
159 self.tree_cache.remove(path);
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166 use crate::ast::{Edge, EdgeKind, NodeKind, Span};
167 use std::sync::atomic::{AtomicUsize, Ordering};
168
169 struct MockParser {
171 language: Language,
172 parse_count: Arc<AtomicUsize>,
173 }
174
175 impl MockParser {
176 fn new(language: Language) -> Self {
177 Self {
178 language,
179 parse_count: Arc::new(AtomicUsize::new(0)),
180 }
181 }
182
183 fn parse_count(&self) -> usize {
184 self.parse_count.load(Ordering::SeqCst)
185 }
186 }
187
188 impl LanguageParser for MockParser {
189 fn language(&self) -> Language {
190 self.language
191 }
192
193 fn parse(&self, context: &ParseContext) -> Result<ParseResult> {
194 self.parse_count.fetch_add(1, Ordering::SeqCst);
195
196 let mut parser = tree_sitter::Parser::new();
198 parser
199 .set_language(&tree_sitter_javascript::LANGUAGE.into())
200 .unwrap();
201 let tree = parser.parse(&context.content, None).unwrap();
202
203 let mut nodes = Vec::new();
205 let mut edges = Vec::new();
206
207 let module_span = Span::new(0, context.content.len(), 1, 1, 1, 1);
209 let module_node = crate::ast::Node::new(
210 &context.repo_id,
211 NodeKind::Module,
212 context.file_path.to_string_lossy().to_string(),
213 self.language,
214 context.file_path.clone(),
215 module_span,
216 );
217 nodes.push(module_node.clone());
218
219 if context.content.contains("function") {
220 let func_span = Span::new(0, 8, 1, 1, 1, 9);
221 let func_node = crate::ast::Node::new(
222 &context.repo_id,
223 NodeKind::Function,
224 "testFunction".to_string(),
225 self.language,
226 context.file_path.clone(),
227 func_span,
228 );
229 nodes.push(func_node.clone());
230
231 edges.push(Edge::new(module_node.id, func_node.id, EdgeKind::Calls));
233 }
234
235 Ok(ParseResult { tree, nodes, edges })
236 }
237 }
238
239 #[test]
240 fn test_language_registry() {
241 let registry = LanguageRegistry::new();
242 assert!(registry.get(Language::JavaScript).is_none());
243
244 let parser = Arc::new(MockParser::new(Language::JavaScript));
246 registry.register(parser.clone());
247
248 assert!(
250 registry.get(Language::JavaScript).is_some(),
251 "JavaScript parser should be registered"
252 );
253 let js_parser = registry.get(Language::JavaScript).unwrap();
254 assert_eq!(
255 js_parser.language(),
256 Language::JavaScript,
257 "Parser should return correct language"
258 );
259 assert_eq!(
261 js_parser.language(),
262 parser.language(),
263 "Should return parser with same language"
264 );
265
266 assert!(
268 registry.get_by_extension("js").is_some(),
269 "Should find parser by .js extension"
270 );
271 let js_parser_by_ext = registry.get_by_extension("js").unwrap();
272 assert_eq!(
273 js_parser_by_ext.language(),
274 Language::JavaScript,
275 "Extension lookup should return JavaScript parser"
276 );
277 assert!(
278 registry.get_by_extension("ts").is_none(),
279 "Should not find parser for unregistered .ts extension"
280 );
281 }
282
283 #[test]
284 fn test_parse_context() {
285 let context = ParseContext::new(
286 "test_repo".to_string(),
287 PathBuf::from("test.js"),
288 "console.log('hello');".to_string(),
289 );
290
291 assert_eq!(context.repo_id, "test_repo");
292 assert_eq!(context.file_path, PathBuf::from("test.js"));
293 assert!(context.old_tree.is_none(), "Should be none");
294 }
295
296 #[test]
297 fn test_parser_engine_basic() {
298 let registry = Arc::new(LanguageRegistry::new());
299 let parser = Arc::new(MockParser::new(Language::JavaScript));
300 registry.register(parser.clone());
301
302 let engine = ParserEngine::new(registry);
303 let context = ParseContext::new(
304 "test_repo".to_string(),
305 PathBuf::from("test.js"),
306 "function hello() {}".to_string(),
307 );
308
309 let result = engine.parse_file(context).unwrap();
310 assert_eq!(result.nodes.len(), 2, "Should have 2 items"); assert_eq!(result.edges.len(), 1, "Should have 1 items"); assert_eq!(parser.parse_count(), 1);
313 }
314
315 #[test]
316 fn test_parser_engine_unsupported_language() {
317 let registry = Arc::new(LanguageRegistry::new());
318 let engine = ParserEngine::new(registry);
319
320 let context = ParseContext::new(
321 "test_repo".to_string(),
322 PathBuf::from("test.unknown"),
323 "some content".to_string(),
324 );
325
326 let result = engine.parse_file(context);
327 assert!(result.is_err());
328 match result.unwrap_err() {
329 Error::Validation { field, message, .. } => {
330 assert_eq!(field, "language");
331 assert!(message.contains("unknown"));
332 }
333 _ => panic!("Expected Validation error for unsupported language"),
334 }
335 }
336
337 #[test]
338 fn test_parser_engine_no_extension() {
339 let registry = Arc::new(LanguageRegistry::new());
340 let engine = ParserEngine::new(registry);
341
342 let context = ParseContext::new(
343 "test_repo".to_string(),
344 PathBuf::from("README"),
345 "some content".to_string(),
346 );
347
348 let result = engine.parse_file(context);
349 assert!(result.is_err());
350 match result.unwrap_err() {
351 Error::Parse { file, message, .. } => {
352 assert_eq!(file, PathBuf::from("README"));
353 assert!(message.contains("No file extension"));
354 }
355 _ => panic!("Expected Parse error"),
356 }
357 }
358
359 #[test]
360 fn test_parser_engine_caching() {
361 let registry = Arc::new(LanguageRegistry::new());
362 let parser = Arc::new(MockParser::new(Language::JavaScript));
363 registry.register(parser.clone());
364
365 let engine = ParserEngine::new(registry);
366 let file_path = PathBuf::from("test.js");
367
368 let context1 = ParseContext::new(
370 "test_repo".to_string(),
371 file_path.clone(),
372 "function one() {}".to_string(),
373 );
374 let _result1 = engine.parse_file(context1).unwrap();
375
376 let context2 = ParseContext::new(
378 "test_repo".to_string(),
379 file_path.clone(),
380 "function two() {}".to_string(),
381 );
382 let result2 = engine.parse_incremental(context2).unwrap();
383
384 assert_eq!(result2.nodes.len(), 2, "Should have 2 items");
385 assert_eq!(parser.parse_count(), 2); }
387
388 #[test]
389 fn test_parser_engine_cache_management() {
390 let registry = Arc::new(LanguageRegistry::new());
391 registry.register(Arc::new(MockParser::new(Language::JavaScript)));
392
393 let engine = ParserEngine::new(registry);
394 let file_path = PathBuf::from("test.js");
395
396 let context = ParseContext::new(
398 "test_repo".to_string(),
399 file_path.clone(),
400 "function test() {}".to_string(),
401 );
402 let _result = engine.parse_file(context).unwrap();
403
404 engine.remove_from_cache(&file_path);
406
407 engine.clear_cache();
409
410 }
412
413 #[test]
414 fn test_parse_result_validation() {
415 let registry = Arc::new(LanguageRegistry::new());
416 registry.register(Arc::new(MockParser::new(Language::JavaScript)));
417
418 let engine = ParserEngine::new(registry);
419 let context = ParseContext::new(
420 "test_repo".to_string(),
421 PathBuf::from("test.js"),
422 "const x = 42;".to_string(),
423 );
424
425 let result = engine.parse_file(context).unwrap();
426
427 assert!(!result.nodes.is_empty(), "Should not be empty");
429 for node in &result.nodes {
430 assert!(!node.name.is_empty(), "Should not be empty");
431 assert_eq!(node.lang, Language::JavaScript);
432 }
433
434 for edge in &result.edges {
436 let source_exists = result.nodes.iter().any(|n| n.id == edge.source);
438 let target_exists = result.nodes.iter().any(|n| n.id == edge.target);
439 assert!(source_exists || target_exists); }
441 }
442
443 #[test]
444 fn test_thread_safety() {
445 use std::thread;
446
447 let registry = Arc::new(LanguageRegistry::new());
448 registry.register(Arc::new(MockParser::new(Language::JavaScript)));
449 registry.register(Arc::new(MockParser::new(Language::Python)));
450
451 let engine = Arc::new(ParserEngine::new(registry));
452
453 let mut handles = vec![];
454
455 for i in 0..10 {
457 let engine_clone = Arc::clone(&engine);
458 let handle = thread::spawn(move || {
459 let ext = if i % 2 == 0 { "js" } else { "py" };
460 let context = ParseContext::new(
461 "test_repo".to_string(),
462 PathBuf::from(format!("test{i}.{ext}")),
463 format!("function test{i}() {{}}"),
464 );
465 engine_clone.parse_file(context).unwrap()
466 });
467 handles.push(handle);
468 }
469
470 for handle in handles {
472 let result = handle.join().unwrap();
473 assert!(!result.nodes.is_empty(), "Should not be empty");
474 }
475 }
476}