codeprism_core/parser/
mod.rs

1//! Parser engine for incremental parsing
2
3use crate::ast::{Language, Node};
4use crate::error::{Error, Result};
5use dashmap::DashMap;
6use std::path::{Path, PathBuf};
7use std::sync::Arc;
8use tree_sitter::Tree;
9
10/// Parser context for incremental parsing
11#[derive(Debug, Clone)]
12pub struct ParseContext {
13    /// Repository ID
14    pub repo_id: String,
15    /// File path being parsed
16    pub file_path: PathBuf,
17    /// Previous tree for incremental parsing
18    pub old_tree: Option<Tree>,
19    /// File content
20    pub content: String,
21}
22
23impl ParseContext {
24    /// Create a new parse context
25    pub fn new(repo_id: String, file_path: PathBuf, content: String) -> Self {
26        Self {
27            repo_id,
28            file_path,
29            content,
30            old_tree: None,
31        }
32    }
33
34    /// Set the old tree for incremental parsing
35    pub fn with_old_tree(mut self, tree: Tree) -> Self {
36        self.old_tree = Some(tree);
37        self
38    }
39}
40
41/// Language parser trait
42pub trait LanguageParser: Send + Sync {
43    /// Get the language this parser handles
44    fn language(&self) -> Language;
45
46    /// Parse a file and extract nodes and edges
47    fn parse(&self, context: &ParseContext) -> Result<ParseResult>;
48}
49
50/// Result of parsing a file
51#[derive(Debug)]
52pub struct ParseResult {
53    /// The parsed tree
54    pub tree: Tree,
55    /// Extracted nodes
56    pub nodes: Vec<Node>,
57    /// Extracted edges
58    pub edges: Vec<crate::ast::Edge>,
59}
60
61/// Registry for language parsers
62pub struct LanguageRegistry {
63    parsers: DashMap<Language, Arc<dyn LanguageParser>>,
64}
65
66impl LanguageRegistry {
67    /// Create a new language registry
68    pub fn new() -> Self {
69        Self {
70            parsers: DashMap::new(),
71        }
72    }
73
74    /// Register a language parser
75    pub fn register(&self, parser: Arc<dyn LanguageParser>) {
76        let lang = parser.language();
77        self.parsers.insert(lang, parser);
78    }
79
80    /// Get a parser for a language
81    pub fn get(&self, language: Language) -> Option<Arc<dyn LanguageParser>> {
82        self.parsers.get(&language).map(|p| Arc::clone(&*p))
83    }
84
85    /// Get a parser for a file extension
86    pub fn get_by_extension(&self, ext: &str) -> Option<Arc<dyn LanguageParser>> {
87        let lang = Language::from_extension(ext);
88        self.get(lang)
89    }
90}
91
92impl Default for LanguageRegistry {
93    fn default() -> Self {
94        Self::new()
95    }
96}
97
98/// Main parser engine
99pub struct ParserEngine {
100    /// Language registry
101    registry: Arc<LanguageRegistry>,
102    /// Cache of parsed trees
103    tree_cache: DashMap<PathBuf, Tree>,
104}
105
106impl ParserEngine {
107    /// Create a new parser engine
108    pub fn new(registry: Arc<LanguageRegistry>) -> Self {
109        Self {
110            registry,
111            tree_cache: DashMap::new(),
112        }
113    }
114
115    /// Parse a file
116    pub fn parse_file(&self, context: ParseContext) -> Result<ParseResult> {
117        // Detect language from file extension
118        let ext = context
119            .file_path
120            .extension()
121            .and_then(|s| s.to_str())
122            .ok_or_else(|| Error::parse(&context.file_path, "No file extension"))?;
123
124        // Get the appropriate parser
125        let parser = self
126            .registry
127            .get_by_extension(ext)
128            .ok_or_else(|| Error::unsupported_language(ext.to_string()))?;
129
130        // Parse the file
131        let result = parser.parse(&context)?;
132
133        // Cache the tree
134        self.tree_cache
135            .insert(context.file_path.clone(), result.tree.clone());
136
137        Ok(result)
138    }
139
140    /// Parse a file incrementally
141    pub fn parse_incremental(&self, mut context: ParseContext) -> Result<ParseResult> {
142        // Try to get the old tree from cache
143        if context.old_tree.is_none() {
144            if let Some(old_tree) = self.tree_cache.get(&context.file_path) {
145                context.old_tree = Some(old_tree.clone());
146            }
147        }
148
149        self.parse_file(context)
150    }
151
152    /// Clear the tree cache
153    pub fn clear_cache(&self) {
154        self.tree_cache.clear();
155    }
156
157    /// Remove a specific file from the cache
158    pub fn remove_from_cache(&self, path: &Path) {
159        self.tree_cache.remove(path);
160    }
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166    use crate::ast::{Edge, EdgeKind, NodeKind, Span};
167    use std::sync::atomic::{AtomicUsize, Ordering};
168
169    // Mock parser for testing
170    struct MockParser {
171        language: Language,
172        parse_count: Arc<AtomicUsize>,
173    }
174
175    impl MockParser {
176        fn new(language: Language) -> Self {
177            Self {
178                language,
179                parse_count: Arc::new(AtomicUsize::new(0)),
180            }
181        }
182
183        fn parse_count(&self) -> usize {
184            self.parse_count.load(Ordering::SeqCst)
185        }
186    }
187
188    impl LanguageParser for MockParser {
189        fn language(&self) -> Language {
190            self.language
191        }
192
193        fn parse(&self, context: &ParseContext) -> Result<ParseResult> {
194            self.parse_count.fetch_add(1, Ordering::SeqCst);
195
196            // Create a real tree using tree-sitter
197            let mut parser = tree_sitter::Parser::new();
198            parser
199                .set_language(&tree_sitter_javascript::LANGUAGE.into())
200                .unwrap();
201            let tree = parser.parse(&context.content, None).unwrap();
202
203            // Create mock nodes based on content
204            let mut nodes = Vec::new();
205            let mut edges = Vec::new();
206
207            // Simple mock: create a module node and a function node if "function" is in content
208            let module_span = Span::new(0, context.content.len(), 1, 1, 1, 1);
209            let module_node = crate::ast::Node::new(
210                &context.repo_id,
211                NodeKind::Module,
212                context.file_path.to_string_lossy().to_string(),
213                self.language,
214                context.file_path.clone(),
215                module_span,
216            );
217            nodes.push(module_node.clone());
218
219            if context.content.contains("function") {
220                let func_span = Span::new(0, 8, 1, 1, 1, 9);
221                let func_node = crate::ast::Node::new(
222                    &context.repo_id,
223                    NodeKind::Function,
224                    "testFunction".to_string(),
225                    self.language,
226                    context.file_path.clone(),
227                    func_span,
228                );
229                nodes.push(func_node.clone());
230
231                // Add an edge from module to function
232                edges.push(Edge::new(module_node.id, func_node.id, EdgeKind::Calls));
233            }
234
235            Ok(ParseResult { tree, nodes, edges })
236        }
237    }
238
239    #[test]
240    fn test_language_registry() {
241        let registry = LanguageRegistry::new();
242        assert!(registry.get(Language::JavaScript).is_none());
243
244        // Register a mock parser
245        let parser = Arc::new(MockParser::new(Language::JavaScript));
246        registry.register(parser.clone());
247
248        // Test direct language lookup with functionality validation
249        assert!(
250            registry.get(Language::JavaScript).is_some(),
251            "JavaScript parser should be registered"
252        );
253        let js_parser = registry.get(Language::JavaScript).unwrap();
254        assert_eq!(
255            js_parser.language(),
256            Language::JavaScript,
257            "Parser should return correct language"
258        );
259        // Verify we get the same language (ptr_eq doesn't work with trait objects)
260        assert_eq!(
261            js_parser.language(),
262            parser.language(),
263            "Should return parser with same language"
264        );
265
266        // Test extension lookup with functionality validation
267        assert!(
268            registry.get_by_extension("js").is_some(),
269            "Should find parser by .js extension"
270        );
271        let js_parser_by_ext = registry.get_by_extension("js").unwrap();
272        assert_eq!(
273            js_parser_by_ext.language(),
274            Language::JavaScript,
275            "Extension lookup should return JavaScript parser"
276        );
277        assert!(
278            registry.get_by_extension("ts").is_none(),
279            "Should not find parser for unregistered .ts extension"
280        );
281    }
282
283    #[test]
284    fn test_parse_context() {
285        let context = ParseContext::new(
286            "test_repo".to_string(),
287            PathBuf::from("test.js"),
288            "console.log('hello');".to_string(),
289        );
290
291        assert_eq!(context.repo_id, "test_repo");
292        assert_eq!(context.file_path, PathBuf::from("test.js"));
293        assert!(context.old_tree.is_none(), "Should be none");
294    }
295
296    #[test]
297    fn test_parser_engine_basic() {
298        let registry = Arc::new(LanguageRegistry::new());
299        let parser = Arc::new(MockParser::new(Language::JavaScript));
300        registry.register(parser.clone());
301
302        let engine = ParserEngine::new(registry);
303        let context = ParseContext::new(
304            "test_repo".to_string(),
305            PathBuf::from("test.js"),
306            "function hello() {}".to_string(),
307        );
308
309        let result = engine.parse_file(context).unwrap();
310        assert_eq!(result.nodes.len(), 2, "Should have 2 items"); // Module + Function
311        assert_eq!(result.edges.len(), 1, "Should have 1 items"); // Module -> Function
312        assert_eq!(parser.parse_count(), 1);
313    }
314
315    #[test]
316    fn test_parser_engine_unsupported_language() {
317        let registry = Arc::new(LanguageRegistry::new());
318        let engine = ParserEngine::new(registry);
319
320        let context = ParseContext::new(
321            "test_repo".to_string(),
322            PathBuf::from("test.unknown"),
323            "some content".to_string(),
324        );
325
326        let result = engine.parse_file(context);
327        assert!(result.is_err());
328        match result.unwrap_err() {
329            Error::Validation { field, message, .. } => {
330                assert_eq!(field, "language");
331                assert!(message.contains("unknown"));
332            }
333            _ => panic!("Expected Validation error for unsupported language"),
334        }
335    }
336
337    #[test]
338    fn test_parser_engine_no_extension() {
339        let registry = Arc::new(LanguageRegistry::new());
340        let engine = ParserEngine::new(registry);
341
342        let context = ParseContext::new(
343            "test_repo".to_string(),
344            PathBuf::from("README"),
345            "some content".to_string(),
346        );
347
348        let result = engine.parse_file(context);
349        assert!(result.is_err());
350        match result.unwrap_err() {
351            Error::Parse { file, message, .. } => {
352                assert_eq!(file, PathBuf::from("README"));
353                assert!(message.contains("No file extension"));
354            }
355            _ => panic!("Expected Parse error"),
356        }
357    }
358
359    #[test]
360    fn test_parser_engine_caching() {
361        let registry = Arc::new(LanguageRegistry::new());
362        let parser = Arc::new(MockParser::new(Language::JavaScript));
363        registry.register(parser.clone());
364
365        let engine = ParserEngine::new(registry);
366        let file_path = PathBuf::from("test.js");
367
368        // First parse
369        let context1 = ParseContext::new(
370            "test_repo".to_string(),
371            file_path.clone(),
372            "function one() {}".to_string(),
373        );
374        let _result1 = engine.parse_file(context1).unwrap();
375
376        // Second parse - should use cached tree for incremental
377        let context2 = ParseContext::new(
378            "test_repo".to_string(),
379            file_path.clone(),
380            "function two() {}".to_string(),
381        );
382        let result2 = engine.parse_incremental(context2).unwrap();
383
384        assert_eq!(result2.nodes.len(), 2, "Should have 2 items");
385        assert_eq!(parser.parse_count(), 2); // Both parses executed
386    }
387
388    #[test]
389    fn test_parser_engine_cache_management() {
390        let registry = Arc::new(LanguageRegistry::new());
391        registry.register(Arc::new(MockParser::new(Language::JavaScript)));
392
393        let engine = ParserEngine::new(registry);
394        let file_path = PathBuf::from("test.js");
395
396        // Parse a file
397        let context = ParseContext::new(
398            "test_repo".to_string(),
399            file_path.clone(),
400            "function test() {}".to_string(),
401        );
402        let _result = engine.parse_file(context).unwrap();
403
404        // Remove from cache
405        engine.remove_from_cache(&file_path);
406
407        // Clear entire cache
408        engine.clear_cache();
409
410        // Test passes if no panic
411    }
412
413    #[test]
414    fn test_parse_result_validation() {
415        let registry = Arc::new(LanguageRegistry::new());
416        registry.register(Arc::new(MockParser::new(Language::JavaScript)));
417
418        let engine = ParserEngine::new(registry);
419        let context = ParseContext::new(
420            "test_repo".to_string(),
421            PathBuf::from("test.js"),
422            "const x = 42;".to_string(),
423        );
424
425        let result = engine.parse_file(context).unwrap();
426
427        // Validate nodes
428        assert!(!result.nodes.is_empty(), "Should not be empty");
429        for node in &result.nodes {
430            assert!(!node.name.is_empty(), "Should not be empty");
431            assert_eq!(node.lang, Language::JavaScript);
432        }
433
434        // Validate edges
435        for edge in &result.edges {
436            // Ensure edge endpoints exist in nodes
437            let source_exists = result.nodes.iter().any(|n| n.id == edge.source);
438            let target_exists = result.nodes.iter().any(|n| n.id == edge.target);
439            assert!(source_exists || target_exists); // At least one should exist in our mock
440        }
441    }
442
443    #[test]
444    fn test_thread_safety() {
445        use std::thread;
446
447        let registry = Arc::new(LanguageRegistry::new());
448        registry.register(Arc::new(MockParser::new(Language::JavaScript)));
449        registry.register(Arc::new(MockParser::new(Language::Python)));
450
451        let engine = Arc::new(ParserEngine::new(registry));
452
453        let mut handles = vec![];
454
455        // Spawn multiple threads parsing different files
456        for i in 0..10 {
457            let engine_clone = Arc::clone(&engine);
458            let handle = thread::spawn(move || {
459                let ext = if i % 2 == 0 { "js" } else { "py" };
460                let context = ParseContext::new(
461                    "test_repo".to_string(),
462                    PathBuf::from(format!("test{i}.{ext}")),
463                    format!("function test{i}() {{}}"),
464                );
465                engine_clone.parse_file(context).unwrap()
466            });
467            handles.push(handle);
468        }
469
470        // Wait for all threads and verify results
471        for handle in handles {
472            let result = handle.join().unwrap();
473            assert!(!result.nodes.is_empty(), "Should not be empty");
474        }
475    }
476}