codeprism_lang_python/
parser.rs1use crate::ast_mapper::AstMapper;
4use crate::error::{Error, Result};
5use crate::types::{Edge, Language, Node};
6use std::path::{Path, PathBuf};
7use tree_sitter::{Parser, Tree};
8
9#[derive(Debug, Clone)]
11pub struct ParseContext {
12 pub repo_id: String,
14 pub file_path: PathBuf,
16 pub old_tree: Option<Tree>,
18 pub content: String,
20}
21
22#[derive(Debug)]
24pub struct ParseResult {
25 pub tree: Tree,
27 pub nodes: Vec<Node>,
29 pub edges: Vec<Edge>,
31}
32
33pub struct PythonParser {
35 parser: Parser,
37}
38
39impl PythonParser {
40 pub fn new() -> Self {
42 let mut parser = Parser::new();
43 parser
44 .set_language(&tree_sitter_python::LANGUAGE.into())
45 .expect("Failed to load Python grammar");
46
47 Self { parser }
48 }
49
50 pub fn detect_language(path: &Path) -> Language {
52 match path.extension().and_then(|s| s.to_str()) {
54 Some("py") | Some("pyw") => Language::Python,
55 _ => Language::Python, }
57 }
58
59 pub fn parse(&mut self, context: &ParseContext) -> Result<ParseResult> {
61 let language = Self::detect_language(&context.file_path);
62
63 let tree = self
65 .parser
66 .parse(&context.content, context.old_tree.as_ref())
67 .ok_or_else(|| Error::parse(&context.file_path, "Failed to parse file"))?;
68
69 let mapper = AstMapper::new(
71 &context.repo_id,
72 context.file_path.clone(),
73 language,
74 &context.content,
75 );
76
77 let (nodes, edges) = mapper.extract(&tree)?;
78
79 Ok(ParseResult { tree, nodes, edges })
80 }
81}
82
83impl Default for PythonParser {
84 fn default() -> Self {
85 Self::new()
86 }
87}
88
89#[cfg(test)]
90mod tests {
91 use super::*;
92
93 #[test]
94 fn test_detect_language() {
95 assert_eq!(
96 PythonParser::detect_language(Path::new("test.py")),
97 Language::Python
98 );
99 assert_eq!(
100 PythonParser::detect_language(Path::new("test.pyw")),
101 Language::Python
102 );
103 }
104
105 #[test]
106 fn test_parse_simple_python() {
107 let mut parser = PythonParser::new();
108 let context = ParseContext {
109 repo_id: "test_repo".to_string(),
110 file_path: PathBuf::from("test.py"),
111 old_tree: None,
112 content: "def hello():\n return 'world'".to_string(),
113 };
114
115 let result = parser.parse(&context).unwrap();
116 assert!(!result.nodes.is_empty());
117
118 assert!(result
120 .nodes
121 .iter()
122 .any(|n| matches!(n.kind, crate::types::NodeKind::Module)));
123 assert!(result
124 .nodes
125 .iter()
126 .any(|n| matches!(n.kind, crate::types::NodeKind::Function)));
127 }
128
129 #[test]
130 fn test_parse_class() {
131 let mut parser = PythonParser::new();
132 let context = ParseContext {
133 repo_id: "test_repo".to_string(),
134 file_path: PathBuf::from("test.py"),
135 old_tree: None,
136 content: "class MyClass:\n def method(self):\n pass".to_string(),
137 };
138
139 let result = parser.parse(&context).unwrap();
140 assert!(!result.nodes.is_empty());
141
142 assert!(result
144 .nodes
145 .iter()
146 .any(|n| matches!(n.kind, crate::types::NodeKind::Module)));
147 assert!(result
148 .nodes
149 .iter()
150 .any(|n| matches!(n.kind, crate::types::NodeKind::Class)));
151 assert!(result
152 .nodes
153 .iter()
154 .any(|n| matches!(n.kind, crate::types::NodeKind::Method)));
155 }
156
157 #[test]
158 fn test_incremental_parsing() {
159 let mut parser = PythonParser::new();
160
161 let context1 = ParseContext {
163 repo_id: "test_repo".to_string(),
164 file_path: PathBuf::from("test.py"),
165 old_tree: None,
166 content: "def foo():\n return 1".to_string(),
167 };
168 let result1 = parser.parse(&context1).unwrap();
169
170 let context2 = ParseContext {
172 repo_id: "test_repo".to_string(),
173 file_path: PathBuf::from("test.py"),
174 old_tree: Some(result1.tree),
175 content: "def foo():\n return 2".to_string(),
176 };
177 let result2 = parser.parse(&context2).unwrap();
178
179 assert_eq!(result1.nodes.len(), result2.nodes.len());
181
182 let func1 = result1
184 .nodes
185 .iter()
186 .find(|n| matches!(n.kind, crate::types::NodeKind::Function))
187 .unwrap();
188 let func2 = result2
189 .nodes
190 .iter()
191 .find(|n| matches!(n.kind, crate::types::NodeKind::Function))
192 .unwrap();
193
194 assert_eq!(func1.name, "foo");
195 assert_eq!(func2.name, "foo");
196 }
197
198 #[test]
199 fn test_parse_multiple_functions() {
200 let mut parser = PythonParser::new();
201 let context = ParseContext {
202 repo_id: "test_repo".to_string(),
203 file_path: PathBuf::from("test.py"),
204 old_tree: None,
205 content: "def foo():\n pass\n\ndef bar():\n pass".to_string(),
206 };
207
208 let result = parser.parse(&context).unwrap();
209
210 println!("Parsed nodes:");
211 for node in &result.nodes {
212 println!(" {:?} - {}", node.kind, node.name);
213 }
214
215 assert!(result.nodes.len() >= 3);
217
218 let func_nodes: Vec<_> = result
219 .nodes
220 .iter()
221 .filter(|n| matches!(n.kind, crate::types::NodeKind::Function))
222 .collect();
223
224 assert_eq!(func_nodes.len(), 2);
225 assert!(func_nodes.iter().any(|n| n.name == "foo"));
226 assert!(func_nodes.iter().any(|n| n.name == "bar"));
227 }
228
229 #[test]
230 fn test_parse_imports() {
231 let mut parser = PythonParser::new();
232 let context = ParseContext {
233 repo_id: "test_repo".to_string(),
234 file_path: PathBuf::from("test.py"),
235 old_tree: None,
236 content: "import os\nfrom sys import path\nimport json as j".to_string(),
237 };
238
239 let result = parser.parse(&context).unwrap();
240
241 let import_nodes: Vec<_> = result
242 .nodes
243 .iter()
244 .filter(|n| matches!(n.kind, crate::types::NodeKind::Import))
245 .collect();
246
247 assert!(!import_nodes.is_empty());
249 }
250}