codeprism_lang_rust/
parser.rs

1//! Rust parser implementation
2
3use crate::ast_mapper::AstMapper;
4use crate::error::{Error, Result};
5use crate::types::{Edge, Language, Node};
6use std::path::{Path, PathBuf};
7use tree_sitter::{Parser, Tree};
8
9/// Parse context for Rust files
10#[derive(Debug, Clone)]
11pub struct ParseContext {
12    /// Repository ID
13    pub repo_id: String,
14    /// File path being parsed
15    pub file_path: PathBuf,
16    /// Previous tree for incremental parsing
17    pub old_tree: Option<Tree>,
18    /// File content
19    pub content: String,
20}
21
22/// Parse result containing nodes and edges
23#[derive(Debug)]
24pub struct ParseResult {
25    /// The parsed tree
26    pub tree: Tree,
27    /// Extracted nodes
28    pub nodes: Vec<Node>,
29    /// Extracted edges
30    pub edges: Vec<Edge>,
31}
32
33/// Rust parser
34pub struct RustParser {
35    /// Tree-sitter parser for Rust
36    parser: Parser,
37}
38
39impl RustParser {
40    /// Create a new Rust parser
41    pub fn new() -> Self {
42        let mut parser = Parser::new();
43        parser
44            .set_language(&tree_sitter_rust::LANGUAGE.into())
45            .expect("Failed to load Rust grammar");
46
47        Self { parser }
48    }
49
50    /// Get the language for a file based on its extension
51    pub fn detect_language(path: &Path) -> Language {
52        // All Rust files are Rust language
53        match path.extension().and_then(|s| s.to_str()) {
54            Some("rs") => Language::Rust,
55            _ => Language::Rust, // Default to Rust
56        }
57    }
58
59    /// Parse a Rust file
60    pub fn parse(&mut self, context: &ParseContext) -> Result<ParseResult> {
61        let language = Self::detect_language(&context.file_path);
62
63        // Parse the file
64        let tree = self
65            .parser
66            .parse(&context.content, context.old_tree.as_ref())
67            .ok_or_else(|| Error::parse(&context.file_path, "Failed to parse file"))?;
68
69        // Extract nodes and edges
70        let mapper = AstMapper::new(
71            &context.repo_id,
72            context.file_path.clone(),
73            language,
74            &context.content,
75        );
76
77        let (nodes, edges) = mapper.extract(&tree)?;
78
79        Ok(ParseResult { tree, nodes, edges })
80    }
81}
82
83impl Default for RustParser {
84    fn default() -> Self {
85        Self::new()
86    }
87}
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92
93    #[test]
94    fn test_detect_language() {
95        assert_eq!(
96            RustParser::detect_language(Path::new("test.rs")),
97            Language::Rust
98        );
99    }
100
101    #[test]
102    fn test_parse_simple_rust() {
103        let mut parser = RustParser::new();
104        let context = ParseContext {
105            repo_id: "test_repo".to_string(),
106            file_path: PathBuf::from("test.rs"),
107            old_tree: None,
108            content: "fn hello() -> &'static str {\n    \"world\"\n}".to_string(),
109        };
110
111        let result = parser.parse(&context).unwrap();
112        assert!(!result.nodes.is_empty());
113
114        // Should have at least a module node and a function node
115        assert!(result
116            .nodes
117            .iter()
118            .any(|n| matches!(n.kind, crate::types::NodeKind::Module)));
119        assert!(result
120            .nodes
121            .iter()
122            .any(|n| matches!(n.kind, crate::types::NodeKind::Function)));
123    }
124
125    #[test]
126    fn test_parse_struct() {
127        let mut parser = RustParser::new();
128        let context = ParseContext {
129            repo_id: "test_repo".to_string(),
130            file_path: PathBuf::from("test.rs"),
131            old_tree: None,
132            content: "struct Point { x: i32, y: i32 }".to_string(),
133        };
134
135        let result = parser.parse(&context).unwrap();
136        assert!(!result.nodes.is_empty());
137
138        // Should have module and struct nodes
139        assert!(result
140            .nodes
141            .iter()
142            .any(|n| matches!(n.kind, crate::types::NodeKind::Module)));
143        assert!(result
144            .nodes
145            .iter()
146            .any(|n| matches!(n.kind, crate::types::NodeKind::Struct)));
147    }
148
149    #[test]
150    fn test_parse_trait_and_impl() {
151        let mut parser = RustParser::new();
152        let context = ParseContext {
153            repo_id: "test_repo".to_string(),
154            file_path: PathBuf::from("test.rs"),
155            old_tree: None,
156            content:
157                "trait Display { fn fmt(&self); }\nimpl Display for String { fn fmt(&self) {} }"
158                    .to_string(),
159        };
160
161        let result = parser.parse(&context).unwrap();
162        assert!(!result.nodes.is_empty());
163
164        // Should have trait and impl nodes
165        assert!(result
166            .nodes
167            .iter()
168            .any(|n| matches!(n.kind, crate::types::NodeKind::Trait)));
169        assert!(result
170            .nodes
171            .iter()
172            .any(|n| matches!(n.kind, crate::types::NodeKind::Impl)));
173    }
174
175    #[test]
176    fn test_parse_use_statements() {
177        let mut parser = RustParser::new();
178        let context = ParseContext {
179            repo_id: "test_repo".to_string(),
180            file_path: PathBuf::from("test.rs"),
181            old_tree: None,
182            content: "use std::collections::HashMap;\nuse serde::{Serialize, Deserialize};"
183                .to_string(),
184        };
185
186        let result = parser.parse(&context).unwrap();
187
188        let use_nodes: Vec<_> = result
189            .nodes
190            .iter()
191            .filter(|n| matches!(n.kind, crate::types::NodeKind::Use))
192            .collect();
193
194        // Should have at least one use node
195        assert!(!use_nodes.is_empty());
196    }
197
198    #[test]
199    fn test_parse_enum() {
200        let mut parser = RustParser::new();
201        let context = ParseContext {
202            repo_id: "test_repo".to_string(),
203            file_path: PathBuf::from("test.rs"),
204            old_tree: None,
205            content: "enum Color { Red, Green, Blue }".to_string(),
206        };
207
208        let result = parser.parse(&context).unwrap();
209
210        // Should have enum node
211        assert!(result
212            .nodes
213            .iter()
214            .any(|n| matches!(n.kind, crate::types::NodeKind::Enum)));
215    }
216
217    #[test]
218    fn test_incremental_parsing() {
219        let mut parser = RustParser::new();
220
221        // First parse
222        let context1 = ParseContext {
223            repo_id: "test_repo".to_string(),
224            file_path: PathBuf::from("test.rs"),
225            old_tree: None,
226            content: "fn foo() -> i32 {\n    1\n}".to_string(),
227        };
228        let result1 = parser.parse(&context1).unwrap();
229
230        // Second parse with small edit
231        let context2 = ParseContext {
232            repo_id: "test_repo".to_string(),
233            file_path: PathBuf::from("test.rs"),
234            old_tree: Some(result1.tree),
235            content: "fn foo() -> i32 {\n    2\n}".to_string(),
236        };
237        let result2 = parser.parse(&context2).unwrap();
238
239        // Both should have the same structure
240        assert_eq!(result1.nodes.len(), result2.nodes.len());
241
242        // Function should still be found
243        let func1 = result1
244            .nodes
245            .iter()
246            .find(|n| matches!(n.kind, crate::types::NodeKind::Function))
247            .unwrap();
248        let func2 = result2
249            .nodes
250            .iter()
251            .find(|n| matches!(n.kind, crate::types::NodeKind::Function))
252            .unwrap();
253
254        assert_eq!(func1.name, "foo");
255        assert_eq!(func2.name, "foo");
256    }
257
258    #[test]
259    fn test_ownership_patterns() {
260        let mut parser = RustParser::new();
261        let context = ParseContext {
262            repo_id: "test_repo".to_string(),
263            file_path: PathBuf::from("test.rs"),
264            old_tree: None,
265            content: "fn process_data(data: Vec<String>, buffer: &mut [u8], reference: &str) -> &str { reference }".to_string(),
266        };
267
268        let result = parser.parse(&context).unwrap();
269
270        // Should have function and parameter nodes
271        let func_nodes: Vec<_> = result
272            .nodes
273            .iter()
274            .filter(|n| matches!(n.kind, crate::types::NodeKind::Function))
275            .collect();
276        assert_eq!(func_nodes.len(), 1);
277
278        let param_nodes: Vec<_> = result
279            .nodes
280            .iter()
281            .filter(|n| matches!(n.kind, crate::types::NodeKind::Parameter))
282            .collect();
283
284        // Should have parameters with ownership information
285        assert!(param_nodes.len() >= 3);
286
287        // Check that at least one parameter has ownership metadata
288        let has_ownership_metadata = param_nodes.iter().any(|node| {
289            node.metadata
290                .as_object()
291                .is_some_and(|metadata| metadata.contains_key("ownership"))
292        });
293        assert!(has_ownership_metadata);
294    }
295
296    #[test]
297    fn test_lifetime_annotations() {
298        let mut parser = RustParser::new();
299        let context = ParseContext {
300            repo_id: "test_repo".to_string(),
301            file_path: PathBuf::from("test.rs"),
302            old_tree: None,
303            content: "fn longest<'a>(x: &'a str, y: &'a str) -> &'a str { if x.len() > y.len() { x } else { y } }".to_string(),
304        };
305
306        let result = parser.parse(&context).unwrap();
307
308        // Should have lifetime nodes
309        let lifetime_nodes: Vec<_> = result
310            .nodes
311            .iter()
312            .filter(|n| matches!(n.kind, crate::types::NodeKind::Lifetime))
313            .collect();
314
315        // Should have at least one lifetime node
316        assert!(!lifetime_nodes.is_empty());
317
318        // Check for 'a lifetime
319        let has_a_lifetime = lifetime_nodes.iter().any(|node| node.name.contains("'a"));
320        assert!(has_a_lifetime);
321    }
322
323    #[test]
324    fn test_trait_bounds_and_impl() {
325        let mut parser = RustParser::new();
326        let context = ParseContext {
327            repo_id: "test_repo".to_string(),
328            file_path: PathBuf::from("test.rs"),
329            old_tree: None,
330            content: "trait Clone { fn clone(&self) -> Self; }\nstruct Point { x: i32, y: i32 }\nimpl Clone for Point { fn clone(&self) -> Self { *self } }".to_string(),
331        };
332
333        let result = parser.parse(&context).unwrap();
334
335        // Should have trait and impl nodes
336        let trait_nodes: Vec<_> = result
337            .nodes
338            .iter()
339            .filter(|n| matches!(n.kind, crate::types::NodeKind::Trait))
340            .collect();
341        assert_eq!(trait_nodes.len(), 1);
342
343        let impl_nodes: Vec<_> = result
344            .nodes
345            .iter()
346            .filter(|n| matches!(n.kind, crate::types::NodeKind::Impl))
347            .collect();
348        assert_eq!(impl_nodes.len(), 1);
349
350        // Check impl metadata
351        let impl_node = &impl_nodes[0];
352        assert!(impl_node.metadata.as_object().is_some_and(|metadata| {
353            metadata.get("impl_type") == Some(&serde_json::Value::String("trait_impl".to_string()))
354        }));
355    }
356
357    #[test]
358    fn test_derive_attributes() {
359        let mut parser = RustParser::new();
360        let context = ParseContext {
361            repo_id: "test_repo".to_string(),
362            file_path: PathBuf::from("test.rs"),
363            old_tree: None,
364            content: "#[derive(Debug, Clone, PartialEq)]\nstruct Point { x: i32, y: i32 }"
365                .to_string(),
366        };
367
368        let result = parser.parse(&context).unwrap();
369
370        // Should have attribute nodes
371        let attr_nodes: Vec<_> = result
372            .nodes
373            .iter()
374            .filter(|n| matches!(n.kind, crate::types::NodeKind::Attribute))
375            .collect();
376
377        assert!(!attr_nodes.is_empty());
378
379        // Check for derive attribute with traits
380        let has_derive_attr = attr_nodes.iter().any(|node| {
381            node.name.contains("derive")
382                && node.metadata.as_object().is_some_and(|metadata| {
383                    metadata.get("attribute_type")
384                        == Some(&serde_json::Value::String("derive".to_string()))
385                })
386        });
387        assert!(has_derive_attr);
388    }
389
390    #[test]
391    fn test_macro_invocations() {
392        let mut parser = RustParser::new();
393        let context = ParseContext {
394            repo_id: "test_repo".to_string(),
395            file_path: PathBuf::from("test.rs"),
396            old_tree: None,
397            content: "fn main() { println!(\"Hello, world!\"); vec![1, 2, 3]; }".to_string(),
398        };
399
400        let result = parser.parse(&context).unwrap();
401
402        // Should have call nodes for macro invocations
403        let call_nodes: Vec<_> = result
404            .nodes
405            .iter()
406            .filter(|n| matches!(n.kind, crate::types::NodeKind::Call))
407            .collect();
408
409        // Should have at least println! and vec! macro calls
410        assert!(call_nodes.len() >= 2);
411
412        // Check for macro call names
413        let has_println = call_nodes.iter().any(|node| node.name.contains("println!"));
414        let has_vec = call_nodes.iter().any(|node| node.name.contains("vec!"));
415
416        assert!(has_println);
417        assert!(has_vec);
418    }
419}