codeprism_core/parser/
mod.rs

1//! Parser engine for incremental parsing
2
3use crate::ast::{Language, Node};
4use crate::error::{Error, Result};
5use dashmap::DashMap;
6use std::path::{Path, PathBuf};
7use std::sync::Arc;
8use tree_sitter::Tree;
9
10/// Parser context for incremental parsing
11#[derive(Debug, Clone)]
12pub struct ParseContext {
13    /// Repository ID
14    pub repo_id: String,
15    /// File path being parsed
16    pub file_path: PathBuf,
17    /// Previous tree for incremental parsing
18    pub old_tree: Option<Tree>,
19    /// File content
20    pub content: String,
21}
22
23impl ParseContext {
24    /// Create a new parse context
25    pub fn new(repo_id: String, file_path: PathBuf, content: String) -> Self {
26        Self {
27            repo_id,
28            file_path,
29            content,
30            old_tree: None,
31        }
32    }
33
34    /// Set the old tree for incremental parsing
35    pub fn with_old_tree(mut self, tree: Tree) -> Self {
36        self.old_tree = Some(tree);
37        self
38    }
39}
40
41/// Language parser trait
42pub trait LanguageParser: Send + Sync {
43    /// Get the language this parser handles
44    fn language(&self) -> Language;
45
46    /// Parse a file and extract nodes and edges
47    fn parse(&self, context: &ParseContext) -> Result<ParseResult>;
48}
49
50/// Result of parsing a file
51#[derive(Debug)]
52pub struct ParseResult {
53    /// The parsed tree
54    pub tree: Tree,
55    /// Extracted nodes
56    pub nodes: Vec<Node>,
57    /// Extracted edges
58    pub edges: Vec<crate::ast::Edge>,
59}
60
61/// Registry for language parsers
62pub struct LanguageRegistry {
63    parsers: DashMap<Language, Arc<dyn LanguageParser>>,
64}
65
66impl LanguageRegistry {
67    /// Create a new language registry
68    pub fn new() -> Self {
69        Self {
70            parsers: DashMap::new(),
71        }
72    }
73
74    /// Register a language parser
75    pub fn register(&self, parser: Arc<dyn LanguageParser>) {
76        let lang = parser.language();
77        self.parsers.insert(lang, parser);
78    }
79
80    /// Get a parser for a language
81    pub fn get(&self, language: Language) -> Option<Arc<dyn LanguageParser>> {
82        self.parsers.get(&language).map(|p| Arc::clone(&*p))
83    }
84
85    /// Get a parser for a file extension
86    pub fn get_by_extension(&self, ext: &str) -> Option<Arc<dyn LanguageParser>> {
87        let lang = Language::from_extension(ext);
88        self.get(lang)
89    }
90}
91
92impl Default for LanguageRegistry {
93    fn default() -> Self {
94        Self::new()
95    }
96}
97
98/// Main parser engine
99pub struct ParserEngine {
100    /// Language registry
101    registry: Arc<LanguageRegistry>,
102    /// Cache of parsed trees
103    tree_cache: DashMap<PathBuf, Tree>,
104}
105
106impl ParserEngine {
107    /// Create a new parser engine
108    pub fn new(registry: Arc<LanguageRegistry>) -> Self {
109        Self {
110            registry,
111            tree_cache: DashMap::new(),
112        }
113    }
114
115    /// Parse a file
116    pub fn parse_file(&self, context: ParseContext) -> Result<ParseResult> {
117        // Detect language from file extension
118        let ext = context
119            .file_path
120            .extension()
121            .and_then(|s| s.to_str())
122            .ok_or_else(|| Error::parse(&context.file_path, "No file extension"))?;
123
124        // Get the appropriate parser
125        let parser = self
126            .registry
127            .get_by_extension(ext)
128            .ok_or_else(|| Error::unsupported_language(ext.to_string()))?;
129
130        // Parse the file
131        let result = parser.parse(&context)?;
132
133        // Cache the tree
134        self.tree_cache
135            .insert(context.file_path.clone(), result.tree.clone());
136
137        Ok(result)
138    }
139
140    /// Parse a file incrementally
141    pub fn parse_incremental(&self, mut context: ParseContext) -> Result<ParseResult> {
142        // Try to get the old tree from cache
143        if context.old_tree.is_none() {
144            if let Some(old_tree) = self.tree_cache.get(&context.file_path) {
145                context.old_tree = Some(old_tree.clone());
146            }
147        }
148
149        self.parse_file(context)
150    }
151
152    /// Clear the tree cache
153    pub fn clear_cache(&self) {
154        self.tree_cache.clear();
155    }
156
157    /// Remove a specific file from the cache
158    pub fn remove_from_cache(&self, path: &Path) {
159        self.tree_cache.remove(path);
160    }
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166    use crate::ast::{Edge, EdgeKind, NodeKind, Span};
167    use std::sync::atomic::{AtomicUsize, Ordering};
168
169    // Mock parser for testing
170    struct MockParser {
171        language: Language,
172        parse_count: Arc<AtomicUsize>,
173    }
174
175    impl MockParser {
176        fn new(language: Language) -> Self {
177            Self {
178                language,
179                parse_count: Arc::new(AtomicUsize::new(0)),
180            }
181        }
182
183        fn parse_count(&self) -> usize {
184            self.parse_count.load(Ordering::SeqCst)
185        }
186    }
187
188    impl LanguageParser for MockParser {
189        fn language(&self) -> Language {
190            self.language
191        }
192
193        fn parse(&self, context: &ParseContext) -> Result<ParseResult> {
194            self.parse_count.fetch_add(1, Ordering::SeqCst);
195
196            // Create a real tree using tree-sitter
197            let mut parser = tree_sitter::Parser::new();
198            parser
199                .set_language(&tree_sitter_javascript::LANGUAGE.into())
200                .unwrap();
201            let tree = parser.parse(&context.content, None).unwrap();
202
203            // Create mock nodes based on content
204            let mut nodes = Vec::new();
205            let mut edges = Vec::new();
206
207            // Simple mock: create a module node and a function node if "function" is in content
208            let module_span = Span::new(0, context.content.len(), 1, 1, 1, 1);
209            let module_node = crate::ast::Node::new(
210                &context.repo_id,
211                NodeKind::Module,
212                context.file_path.to_string_lossy().to_string(),
213                self.language,
214                context.file_path.clone(),
215                module_span,
216            );
217            nodes.push(module_node.clone());
218
219            if context.content.contains("function") {
220                let func_span = Span::new(0, 8, 1, 1, 1, 9);
221                let func_node = crate::ast::Node::new(
222                    &context.repo_id,
223                    NodeKind::Function,
224                    "testFunction".to_string(),
225                    self.language,
226                    context.file_path.clone(),
227                    func_span,
228                );
229                nodes.push(func_node.clone());
230
231                // Add an edge from module to function
232                edges.push(Edge::new(module_node.id, func_node.id, EdgeKind::Calls));
233            }
234
235            Ok(ParseResult { tree, nodes, edges })
236        }
237    }
238
239    #[test]
240    fn test_language_registry() {
241        let registry = LanguageRegistry::new();
242        assert!(registry.get(Language::JavaScript).is_none());
243
244        // Register a mock parser
245        let parser = Arc::new(MockParser::new(Language::JavaScript));
246        registry.register(parser.clone());
247
248        // Test direct language lookup
249        assert!(registry.get(Language::JavaScript).is_some());
250
251        // Test extension lookup
252        assert!(registry.get_by_extension("js").is_some());
253        assert!(registry.get_by_extension("ts").is_none()); // Not registered
254    }
255
256    #[test]
257    fn test_parse_context() {
258        let context = ParseContext::new(
259            "test_repo".to_string(),
260            PathBuf::from("test.js"),
261            "console.log('hello');".to_string(),
262        );
263
264        assert_eq!(context.repo_id, "test_repo");
265        assert_eq!(context.file_path, PathBuf::from("test.js"));
266        assert!(context.old_tree.is_none());
267    }
268
269    #[test]
270    fn test_parser_engine_basic() {
271        let registry = Arc::new(LanguageRegistry::new());
272        let parser = Arc::new(MockParser::new(Language::JavaScript));
273        registry.register(parser.clone());
274
275        let engine = ParserEngine::new(registry);
276        let context = ParseContext::new(
277            "test_repo".to_string(),
278            PathBuf::from("test.js"),
279            "function hello() {}".to_string(),
280        );
281
282        let result = engine.parse_file(context).unwrap();
283        assert_eq!(result.nodes.len(), 2); // Module + Function
284        assert_eq!(result.edges.len(), 1); // Module -> Function
285        assert_eq!(parser.parse_count(), 1);
286    }
287
288    #[test]
289    fn test_parser_engine_unsupported_language() {
290        let registry = Arc::new(LanguageRegistry::new());
291        let engine = ParserEngine::new(registry);
292
293        let context = ParseContext::new(
294            "test_repo".to_string(),
295            PathBuf::from("test.unknown"),
296            "some content".to_string(),
297        );
298
299        let result = engine.parse_file(context);
300        assert!(result.is_err());
301        match result.unwrap_err() {
302            Error::Validation { field, message, .. } => {
303                assert_eq!(field, "language");
304                assert!(message.contains("unknown"));
305            }
306            _ => panic!("Expected Validation error for unsupported language"),
307        }
308    }
309
310    #[test]
311    fn test_parser_engine_no_extension() {
312        let registry = Arc::new(LanguageRegistry::new());
313        let engine = ParserEngine::new(registry);
314
315        let context = ParseContext::new(
316            "test_repo".to_string(),
317            PathBuf::from("README"),
318            "some content".to_string(),
319        );
320
321        let result = engine.parse_file(context);
322        assert!(result.is_err());
323        match result.unwrap_err() {
324            Error::Parse { file, message, .. } => {
325                assert_eq!(file, PathBuf::from("README"));
326                assert!(message.contains("No file extension"));
327            }
328            _ => panic!("Expected Parse error"),
329        }
330    }
331
332    #[test]
333    fn test_parser_engine_caching() {
334        let registry = Arc::new(LanguageRegistry::new());
335        let parser = Arc::new(MockParser::new(Language::JavaScript));
336        registry.register(parser.clone());
337
338        let engine = ParserEngine::new(registry);
339        let file_path = PathBuf::from("test.js");
340
341        // First parse
342        let context1 = ParseContext::new(
343            "test_repo".to_string(),
344            file_path.clone(),
345            "function one() {}".to_string(),
346        );
347        let _result1 = engine.parse_file(context1).unwrap();
348
349        // Second parse - should use cached tree for incremental
350        let context2 = ParseContext::new(
351            "test_repo".to_string(),
352            file_path.clone(),
353            "function two() {}".to_string(),
354        );
355        let result2 = engine.parse_incremental(context2).unwrap();
356
357        assert_eq!(result2.nodes.len(), 2);
358        assert_eq!(parser.parse_count(), 2); // Both parses executed
359    }
360
361    #[test]
362    fn test_parser_engine_cache_management() {
363        let registry = Arc::new(LanguageRegistry::new());
364        registry.register(Arc::new(MockParser::new(Language::JavaScript)));
365
366        let engine = ParserEngine::new(registry);
367        let file_path = PathBuf::from("test.js");
368
369        // Parse a file
370        let context = ParseContext::new(
371            "test_repo".to_string(),
372            file_path.clone(),
373            "function test() {}".to_string(),
374        );
375        let _result = engine.parse_file(context).unwrap();
376
377        // Remove from cache
378        engine.remove_from_cache(&file_path);
379
380        // Clear entire cache
381        engine.clear_cache();
382
383        // Test passes if no panic
384    }
385
386    #[test]
387    fn test_parse_result_validation() {
388        let registry = Arc::new(LanguageRegistry::new());
389        registry.register(Arc::new(MockParser::new(Language::JavaScript)));
390
391        let engine = ParserEngine::new(registry);
392        let context = ParseContext::new(
393            "test_repo".to_string(),
394            PathBuf::from("test.js"),
395            "const x = 42;".to_string(),
396        );
397
398        let result = engine.parse_file(context).unwrap();
399
400        // Validate nodes
401        assert!(!result.nodes.is_empty());
402        for node in &result.nodes {
403            assert!(!node.name.is_empty());
404            assert_eq!(node.lang, Language::JavaScript);
405        }
406
407        // Validate edges
408        for edge in &result.edges {
409            // Ensure edge endpoints exist in nodes
410            let source_exists = result.nodes.iter().any(|n| n.id == edge.source);
411            let target_exists = result.nodes.iter().any(|n| n.id == edge.target);
412            assert!(source_exists || target_exists); // At least one should exist in our mock
413        }
414    }
415
416    #[test]
417    fn test_thread_safety() {
418        use std::thread;
419
420        let registry = Arc::new(LanguageRegistry::new());
421        registry.register(Arc::new(MockParser::new(Language::JavaScript)));
422        registry.register(Arc::new(MockParser::new(Language::Python)));
423
424        let engine = Arc::new(ParserEngine::new(registry));
425
426        let mut handles = vec![];
427
428        // Spawn multiple threads parsing different files
429        for i in 0..10 {
430            let engine_clone = Arc::clone(&engine);
431            let handle = thread::spawn(move || {
432                let ext = if i % 2 == 0 { "js" } else { "py" };
433                let context = ParseContext::new(
434                    "test_repo".to_string(),
435                    PathBuf::from(format!("test{}.{}", i, ext)),
436                    format!("function test{}() {{}}", i),
437                );
438                engine_clone.parse_file(context).unwrap()
439            });
440            handles.push(handle);
441        }
442
443        // Wait for all threads and verify results
444        for handle in handles {
445            let result = handle.join().unwrap();
446            assert!(!result.nodes.is_empty());
447        }
448    }
449}