1use crate::ast::{Language, Node};
4use crate::error::{Error, Result};
5use dashmap::DashMap;
6use std::path::{Path, PathBuf};
7use std::sync::Arc;
8use tree_sitter::Tree;
9
10#[derive(Debug, Clone)]
12pub struct ParseContext {
13 pub repo_id: String,
15 pub file_path: PathBuf,
17 pub old_tree: Option<Tree>,
19 pub content: String,
21}
22
23impl ParseContext {
24 pub fn new(repo_id: String, file_path: PathBuf, content: String) -> Self {
26 Self {
27 repo_id,
28 file_path,
29 content,
30 old_tree: None,
31 }
32 }
33
34 pub fn with_old_tree(mut self, tree: Tree) -> Self {
36 self.old_tree = Some(tree);
37 self
38 }
39}
40
41pub trait LanguageParser: Send + Sync {
43 fn language(&self) -> Language;
45
46 fn parse(&self, context: &ParseContext) -> Result<ParseResult>;
48}
49
50#[derive(Debug)]
52pub struct ParseResult {
53 pub tree: Tree,
55 pub nodes: Vec<Node>,
57 pub edges: Vec<crate::ast::Edge>,
59}
60
61pub struct LanguageRegistry {
63 parsers: DashMap<Language, Arc<dyn LanguageParser>>,
64}
65
66impl LanguageRegistry {
67 pub fn new() -> Self {
69 Self {
70 parsers: DashMap::new(),
71 }
72 }
73
74 pub fn register(&self, parser: Arc<dyn LanguageParser>) {
76 let lang = parser.language();
77 self.parsers.insert(lang, parser);
78 }
79
80 pub fn get(&self, language: Language) -> Option<Arc<dyn LanguageParser>> {
82 self.parsers.get(&language).map(|p| Arc::clone(&*p))
83 }
84
85 pub fn get_by_extension(&self, ext: &str) -> Option<Arc<dyn LanguageParser>> {
87 let lang = Language::from_extension(ext);
88 self.get(lang)
89 }
90}
91
92impl Default for LanguageRegistry {
93 fn default() -> Self {
94 Self::new()
95 }
96}
97
98pub struct ParserEngine {
100 registry: Arc<LanguageRegistry>,
102 tree_cache: DashMap<PathBuf, Tree>,
104}
105
106impl ParserEngine {
107 pub fn new(registry: Arc<LanguageRegistry>) -> Self {
109 Self {
110 registry,
111 tree_cache: DashMap::new(),
112 }
113 }
114
115 pub fn parse_file(&self, context: ParseContext) -> Result<ParseResult> {
117 let ext = context
119 .file_path
120 .extension()
121 .and_then(|s| s.to_str())
122 .ok_or_else(|| Error::parse(&context.file_path, "No file extension"))?;
123
124 let parser = self
126 .registry
127 .get_by_extension(ext)
128 .ok_or_else(|| Error::unsupported_language(ext.to_string()))?;
129
130 let result = parser.parse(&context)?;
132
133 self.tree_cache
135 .insert(context.file_path.clone(), result.tree.clone());
136
137 Ok(result)
138 }
139
140 pub fn parse_incremental(&self, mut context: ParseContext) -> Result<ParseResult> {
142 if context.old_tree.is_none() {
144 if let Some(old_tree) = self.tree_cache.get(&context.file_path) {
145 context.old_tree = Some(old_tree.clone());
146 }
147 }
148
149 self.parse_file(context)
150 }
151
152 pub fn clear_cache(&self) {
154 self.tree_cache.clear();
155 }
156
157 pub fn remove_from_cache(&self, path: &Path) {
159 self.tree_cache.remove(path);
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166 use crate::ast::{Edge, EdgeKind, NodeKind, Span};
167 use std::sync::atomic::{AtomicUsize, Ordering};
168
169 struct MockParser {
171 language: Language,
172 parse_count: Arc<AtomicUsize>,
173 }
174
175 impl MockParser {
176 fn new(language: Language) -> Self {
177 Self {
178 language,
179 parse_count: Arc::new(AtomicUsize::new(0)),
180 }
181 }
182
183 fn parse_count(&self) -> usize {
184 self.parse_count.load(Ordering::SeqCst)
185 }
186 }
187
188 impl LanguageParser for MockParser {
189 fn language(&self) -> Language {
190 self.language
191 }
192
193 fn parse(&self, context: &ParseContext) -> Result<ParseResult> {
194 self.parse_count.fetch_add(1, Ordering::SeqCst);
195
196 let mut parser = tree_sitter::Parser::new();
198 parser
199 .set_language(&tree_sitter_javascript::LANGUAGE.into())
200 .unwrap();
201 let tree = parser.parse(&context.content, None).unwrap();
202
203 let mut nodes = Vec::new();
205 let mut edges = Vec::new();
206
207 let module_span = Span::new(0, context.content.len(), 1, 1, 1, 1);
209 let module_node = crate::ast::Node::new(
210 &context.repo_id,
211 NodeKind::Module,
212 context.file_path.to_string_lossy().to_string(),
213 self.language,
214 context.file_path.clone(),
215 module_span,
216 );
217 nodes.push(module_node.clone());
218
219 if context.content.contains("function") {
220 let func_span = Span::new(0, 8, 1, 1, 1, 9);
221 let func_node = crate::ast::Node::new(
222 &context.repo_id,
223 NodeKind::Function,
224 "testFunction".to_string(),
225 self.language,
226 context.file_path.clone(),
227 func_span,
228 );
229 nodes.push(func_node.clone());
230
231 edges.push(Edge::new(module_node.id, func_node.id, EdgeKind::Calls));
233 }
234
235 Ok(ParseResult { tree, nodes, edges })
236 }
237 }
238
239 #[test]
240 fn test_language_registry() {
241 let registry = LanguageRegistry::new();
242 assert!(registry.get(Language::JavaScript).is_none());
243
244 let parser = Arc::new(MockParser::new(Language::JavaScript));
246 registry.register(parser.clone());
247
248 assert!(registry.get(Language::JavaScript).is_some());
250
251 assert!(registry.get_by_extension("js").is_some());
253 assert!(registry.get_by_extension("ts").is_none()); }
255
256 #[test]
257 fn test_parse_context() {
258 let context = ParseContext::new(
259 "test_repo".to_string(),
260 PathBuf::from("test.js"),
261 "console.log('hello');".to_string(),
262 );
263
264 assert_eq!(context.repo_id, "test_repo");
265 assert_eq!(context.file_path, PathBuf::from("test.js"));
266 assert!(context.old_tree.is_none());
267 }
268
269 #[test]
270 fn test_parser_engine_basic() {
271 let registry = Arc::new(LanguageRegistry::new());
272 let parser = Arc::new(MockParser::new(Language::JavaScript));
273 registry.register(parser.clone());
274
275 let engine = ParserEngine::new(registry);
276 let context = ParseContext::new(
277 "test_repo".to_string(),
278 PathBuf::from("test.js"),
279 "function hello() {}".to_string(),
280 );
281
282 let result = engine.parse_file(context).unwrap();
283 assert_eq!(result.nodes.len(), 2); assert_eq!(result.edges.len(), 1); assert_eq!(parser.parse_count(), 1);
286 }
287
288 #[test]
289 fn test_parser_engine_unsupported_language() {
290 let registry = Arc::new(LanguageRegistry::new());
291 let engine = ParserEngine::new(registry);
292
293 let context = ParseContext::new(
294 "test_repo".to_string(),
295 PathBuf::from("test.unknown"),
296 "some content".to_string(),
297 );
298
299 let result = engine.parse_file(context);
300 assert!(result.is_err());
301 match result.unwrap_err() {
302 Error::Validation { field, message, .. } => {
303 assert_eq!(field, "language");
304 assert!(message.contains("unknown"));
305 }
306 _ => panic!("Expected Validation error for unsupported language"),
307 }
308 }
309
310 #[test]
311 fn test_parser_engine_no_extension() {
312 let registry = Arc::new(LanguageRegistry::new());
313 let engine = ParserEngine::new(registry);
314
315 let context = ParseContext::new(
316 "test_repo".to_string(),
317 PathBuf::from("README"),
318 "some content".to_string(),
319 );
320
321 let result = engine.parse_file(context);
322 assert!(result.is_err());
323 match result.unwrap_err() {
324 Error::Parse { file, message, .. } => {
325 assert_eq!(file, PathBuf::from("README"));
326 assert!(message.contains("No file extension"));
327 }
328 _ => panic!("Expected Parse error"),
329 }
330 }
331
332 #[test]
333 fn test_parser_engine_caching() {
334 let registry = Arc::new(LanguageRegistry::new());
335 let parser = Arc::new(MockParser::new(Language::JavaScript));
336 registry.register(parser.clone());
337
338 let engine = ParserEngine::new(registry);
339 let file_path = PathBuf::from("test.js");
340
341 let context1 = ParseContext::new(
343 "test_repo".to_string(),
344 file_path.clone(),
345 "function one() {}".to_string(),
346 );
347 let _result1 = engine.parse_file(context1).unwrap();
348
349 let context2 = ParseContext::new(
351 "test_repo".to_string(),
352 file_path.clone(),
353 "function two() {}".to_string(),
354 );
355 let result2 = engine.parse_incremental(context2).unwrap();
356
357 assert_eq!(result2.nodes.len(), 2);
358 assert_eq!(parser.parse_count(), 2); }
360
361 #[test]
362 fn test_parser_engine_cache_management() {
363 let registry = Arc::new(LanguageRegistry::new());
364 registry.register(Arc::new(MockParser::new(Language::JavaScript)));
365
366 let engine = ParserEngine::new(registry);
367 let file_path = PathBuf::from("test.js");
368
369 let context = ParseContext::new(
371 "test_repo".to_string(),
372 file_path.clone(),
373 "function test() {}".to_string(),
374 );
375 let _result = engine.parse_file(context).unwrap();
376
377 engine.remove_from_cache(&file_path);
379
380 engine.clear_cache();
382
383 }
385
386 #[test]
387 fn test_parse_result_validation() {
388 let registry = Arc::new(LanguageRegistry::new());
389 registry.register(Arc::new(MockParser::new(Language::JavaScript)));
390
391 let engine = ParserEngine::new(registry);
392 let context = ParseContext::new(
393 "test_repo".to_string(),
394 PathBuf::from("test.js"),
395 "const x = 42;".to_string(),
396 );
397
398 let result = engine.parse_file(context).unwrap();
399
400 assert!(!result.nodes.is_empty());
402 for node in &result.nodes {
403 assert!(!node.name.is_empty());
404 assert_eq!(node.lang, Language::JavaScript);
405 }
406
407 for edge in &result.edges {
409 let source_exists = result.nodes.iter().any(|n| n.id == edge.source);
411 let target_exists = result.nodes.iter().any(|n| n.id == edge.target);
412 assert!(source_exists || target_exists); }
414 }
415
416 #[test]
417 fn test_thread_safety() {
418 use std::thread;
419
420 let registry = Arc::new(LanguageRegistry::new());
421 registry.register(Arc::new(MockParser::new(Language::JavaScript)));
422 registry.register(Arc::new(MockParser::new(Language::Python)));
423
424 let engine = Arc::new(ParserEngine::new(registry));
425
426 let mut handles = vec![];
427
428 for i in 0..10 {
430 let engine_clone = Arc::clone(&engine);
431 let handle = thread::spawn(move || {
432 let ext = if i % 2 == 0 { "js" } else { "py" };
433 let context = ParseContext::new(
434 "test_repo".to_string(),
435 PathBuf::from(format!("test{}.{}", i, ext)),
436 format!("function test{}() {{}}", i),
437 );
438 engine_clone.parse_file(context).unwrap()
439 });
440 handles.push(handle);
441 }
442
443 for handle in handles {
445 let result = handle.join().unwrap();
446 assert!(!result.nodes.is_empty());
447 }
448 }
449}