codeprism_lang_python/
parser.rs

1//! Python parser implementation
2
3use crate::ast_mapper::AstMapper;
4use crate::error::{Error, Result};
5use crate::types::{Edge, Language, Node};
6use std::path::{Path, PathBuf};
7use tree_sitter::{Parser, Tree};
8
9/// Parse context for Python files
10#[derive(Debug, Clone)]
11pub struct ParseContext {
12    /// Repository ID
13    pub repo_id: String,
14    /// File path being parsed
15    pub file_path: PathBuf,
16    /// Previous tree for incremental parsing
17    pub old_tree: Option<Tree>,
18    /// File content
19    pub content: String,
20}
21
22/// Parse result containing nodes and edges
23#[derive(Debug)]
24pub struct ParseResult {
25    /// The parsed tree
26    pub tree: Tree,
27    /// Extracted nodes
28    pub nodes: Vec<Node>,
29    /// Extracted edges
30    pub edges: Vec<Edge>,
31}
32
33/// Python parser
34pub struct PythonParser {
35    /// Tree-sitter parser for Python
36    parser: Parser,
37}
38
39impl PythonParser {
40    /// Create a new Python parser
41    pub fn new() -> Self {
42        let mut parser = Parser::new();
43        parser
44            .set_language(&tree_sitter_python::LANGUAGE.into())
45            .expect("Failed to load Python grammar");
46
47        Self { parser }
48    }
49
50    /// Get the language for a file based on its extension
51    pub fn detect_language(path: &Path) -> Language {
52        // All Python files are Python language
53        match path.extension().and_then(|s| s.to_str()) {
54            Some("py") | Some("pyw") => Language::Python,
55            _ => Language::Python, // Default to Python
56        }
57    }
58
59    /// Parse a Python file
60    pub fn parse(&mut self, context: &ParseContext) -> Result<ParseResult> {
61        let language = Self::detect_language(&context.file_path);
62
63        // Parse the file
64        let tree = self
65            .parser
66            .parse(&context.content, context.old_tree.as_ref())
67            .ok_or_else(|| Error::parse(&context.file_path, "Failed to parse file"))?;
68
69        // Extract nodes and edges
70        let mapper = AstMapper::new(
71            &context.repo_id,
72            context.file_path.clone(),
73            language,
74            &context.content,
75        );
76
77        let (nodes, edges) = mapper.extract(&tree)?;
78
79        Ok(ParseResult { tree, nodes, edges })
80    }
81}
82
83impl Default for PythonParser {
84    fn default() -> Self {
85        Self::new()
86    }
87}
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92
93    #[test]
94    fn test_detect_language() {
95        assert_eq!(
96            PythonParser::detect_language(Path::new("test.py")),
97            Language::Python
98        );
99        assert_eq!(
100            PythonParser::detect_language(Path::new("test.pyw")),
101            Language::Python
102        );
103    }
104
105    #[test]
106    fn test_parse_simple_python() {
107        let mut parser = PythonParser::new();
108        let context = ParseContext {
109            repo_id: "test_repo".to_string(),
110            file_path: PathBuf::from("test.py"),
111            old_tree: None,
112            content: "def hello():\n    return 'world'".to_string(),
113        };
114
115        let result = parser.parse(&context).unwrap();
116        assert!(!result.nodes.is_empty());
117
118        // Should have at least a module node and a function node
119        assert!(result
120            .nodes
121            .iter()
122            .any(|n| matches!(n.kind, crate::types::NodeKind::Module)));
123        assert!(result
124            .nodes
125            .iter()
126            .any(|n| matches!(n.kind, crate::types::NodeKind::Function)));
127    }
128
129    #[test]
130    fn test_parse_class() {
131        let mut parser = PythonParser::new();
132        let context = ParseContext {
133            repo_id: "test_repo".to_string(),
134            file_path: PathBuf::from("test.py"),
135            old_tree: None,
136            content: "class MyClass:\n    def method(self):\n        pass".to_string(),
137        };
138
139        let result = parser.parse(&context).unwrap();
140        assert!(!result.nodes.is_empty());
141
142        // Should have module, class, and method nodes
143        assert!(result
144            .nodes
145            .iter()
146            .any(|n| matches!(n.kind, crate::types::NodeKind::Module)));
147        assert!(result
148            .nodes
149            .iter()
150            .any(|n| matches!(n.kind, crate::types::NodeKind::Class)));
151        assert!(result
152            .nodes
153            .iter()
154            .any(|n| matches!(n.kind, crate::types::NodeKind::Method)));
155    }
156
157    #[test]
158    fn test_incremental_parsing() {
159        let mut parser = PythonParser::new();
160
161        // First parse
162        let context1 = ParseContext {
163            repo_id: "test_repo".to_string(),
164            file_path: PathBuf::from("test.py"),
165            old_tree: None,
166            content: "def foo():\n    return 1".to_string(),
167        };
168        let result1 = parser.parse(&context1).unwrap();
169
170        // Second parse with small edit
171        let context2 = ParseContext {
172            repo_id: "test_repo".to_string(),
173            file_path: PathBuf::from("test.py"),
174            old_tree: Some(result1.tree),
175            content: "def foo():\n    return 2".to_string(),
176        };
177        let result2 = parser.parse(&context2).unwrap();
178
179        // Both should have the same structure
180        assert_eq!(result1.nodes.len(), result2.nodes.len());
181
182        // Function should still be found
183        let func1 = result1
184            .nodes
185            .iter()
186            .find(|n| matches!(n.kind, crate::types::NodeKind::Function))
187            .unwrap();
188        let func2 = result2
189            .nodes
190            .iter()
191            .find(|n| matches!(n.kind, crate::types::NodeKind::Function))
192            .unwrap();
193
194        assert_eq!(func1.name, "foo");
195        assert_eq!(func2.name, "foo");
196    }
197
198    #[test]
199    fn test_parse_multiple_functions() {
200        let mut parser = PythonParser::new();
201        let context = ParseContext {
202            repo_id: "test_repo".to_string(),
203            file_path: PathBuf::from("test.py"),
204            old_tree: None,
205            content: "def foo():\n    pass\n\ndef bar():\n    pass".to_string(),
206        };
207
208        let result = parser.parse(&context).unwrap();
209
210        println!("Parsed nodes:");
211        for node in &result.nodes {
212            println!("  {:?} - {}", node.kind, node.name);
213        }
214
215        // Should have a module and two functions
216        assert!(result.nodes.len() >= 3);
217
218        let func_nodes: Vec<_> = result
219            .nodes
220            .iter()
221            .filter(|n| matches!(n.kind, crate::types::NodeKind::Function))
222            .collect();
223
224        assert_eq!(func_nodes.len(), 2);
225        assert!(func_nodes.iter().any(|n| n.name == "foo"));
226        assert!(func_nodes.iter().any(|n| n.name == "bar"));
227    }
228
229    #[test]
230    fn test_parse_imports() {
231        let mut parser = PythonParser::new();
232        let context = ParseContext {
233            repo_id: "test_repo".to_string(),
234            file_path: PathBuf::from("test.py"),
235            old_tree: None,
236            content: "import os\nfrom sys import path\nimport json as j".to_string(),
237        };
238
239        let result = parser.parse(&context).unwrap();
240
241        let import_nodes: Vec<_> = result
242            .nodes
243            .iter()
244            .filter(|n| matches!(n.kind, crate::types::NodeKind::Import))
245            .collect();
246
247        // Should have at least one import node
248        assert!(!import_nodes.is_empty());
249    }
250}