1use codegraph::{CodeGraph, NodeId};
8use codegraph_parser_api::{CodeParser, FileInfo, ParserConfig, ParserError, ParserMetrics};
9use std::path::Path;
10use std::sync::Mutex;
11use std::time::{Duration, Instant};
12
13pub struct PythonParser {
15 config: ParserConfig,
16 metrics: Mutex<ParserMetrics>,
17}
18
19impl PythonParser {
20 pub fn new() -> Self {
22 Self {
23 config: ParserConfig::default(),
24 metrics: Mutex::new(ParserMetrics::default()),
25 }
26 }
27
28 pub fn with_config(config: ParserConfig) -> Self {
30 Self {
31 config,
32 metrics: Mutex::new(ParserMetrics::default()),
33 }
34 }
35
36 fn update_metrics(
38 &self,
39 success: bool,
40 duration: Duration,
41 entities: usize,
42 relationships: usize,
43 ) {
44 let mut metrics = self.metrics.lock().unwrap();
45 metrics.files_attempted += 1;
46 if success {
47 metrics.files_succeeded += 1;
48 } else {
49 metrics.files_failed += 1;
50 }
51 metrics.total_parse_time += duration;
52 metrics.total_entities += entities;
53 metrics.total_relationships += relationships;
54 }
55
56 fn ir_to_graph(
58 &self,
59 ir: &codegraph_parser_api::CodeIR,
60 graph: &mut CodeGraph,
61 file_path: &Path,
62 ) -> Result<FileInfo, ParserError> {
63 use codegraph::{EdgeType, NodeType, PropertyMap};
64 use std::collections::HashMap;
65
66 let mut node_map: HashMap<String, NodeId> = HashMap::new();
67 let mut function_ids = Vec::new();
68 let mut class_ids = Vec::new();
69 let mut trait_ids = Vec::new();
70 let mut import_ids = Vec::new();
71
72 let file_id = if let Some(ref module) = ir.module {
74 let mut props = PropertyMap::new()
75 .with("name", module.name.clone())
76 .with("path", module.path.clone())
77 .with("language", module.language.clone())
78 .with("line_count", module.line_count.to_string());
79
80 if let Some(ref doc) = module.doc_comment {
81 props = props.with("doc", doc.clone());
82 }
83
84 let id = graph
85 .add_node(NodeType::CodeFile, props)
86 .map_err(|e| ParserError::GraphError(e.to_string()))?;
87 node_map.insert(module.name.clone(), id);
88 id
89 } else {
90 let file_name = file_path
92 .file_stem()
93 .and_then(|s| s.to_str())
94 .unwrap_or("unknown")
95 .to_string();
96 let props = PropertyMap::new()
97 .with("name", file_name.clone())
98 .with("path", file_path.display().to_string())
99 .with("language", "python");
100
101 let id = graph
102 .add_node(NodeType::CodeFile, props)
103 .map_err(|e| ParserError::GraphError(e.to_string()))?;
104 node_map.insert(file_name, id);
105 id
106 };
107
108 for func in &ir.functions {
110 let mut props = PropertyMap::new()
111 .with("name", func.name.clone())
112 .with("signature", func.signature.clone())
113 .with("visibility", func.visibility.clone())
114 .with("line_start", func.line_start.to_string())
115 .with("line_end", func.line_end.to_string())
116 .with("is_async", func.is_async.to_string())
117 .with("is_static", func.is_static.to_string());
118
119 if let Some(ref doc) = func.doc_comment {
120 props = props.with("doc", doc.clone());
121 }
122 if let Some(ref return_type) = func.return_type {
123 props = props.with("return_type", return_type.clone());
124 }
125
126 let func_id = graph
127 .add_node(NodeType::Function, props)
128 .map_err(|e| ParserError::GraphError(e.to_string()))?;
129
130 node_map.insert(func.name.clone(), func_id);
131 function_ids.push(func_id);
132
133 graph
135 .add_edge(file_id, func_id, EdgeType::Contains, PropertyMap::new())
136 .map_err(|e| ParserError::GraphError(e.to_string()))?;
137 }
138
139 for class in &ir.classes {
141 let mut props = PropertyMap::new()
142 .with("name", class.name.clone())
143 .with("visibility", class.visibility.clone())
144 .with("line_start", class.line_start.to_string())
145 .with("line_end", class.line_end.to_string())
146 .with("is_abstract", class.is_abstract.to_string());
147
148 if let Some(ref doc) = class.doc_comment {
149 props = props.with("doc", doc.clone());
150 }
151
152 let class_id = graph
153 .add_node(NodeType::Class, props)
154 .map_err(|e| ParserError::GraphError(e.to_string()))?;
155
156 node_map.insert(class.name.clone(), class_id);
157 class_ids.push(class_id);
158
159 graph
161 .add_edge(file_id, class_id, EdgeType::Contains, PropertyMap::new())
162 .map_err(|e| ParserError::GraphError(e.to_string()))?;
163
164 for method in &class.methods {
166 let method_name = format!("{}.{}", class.name, method.name);
167 let mut method_props = PropertyMap::new()
168 .with("name", method_name.clone())
169 .with("signature", method.signature.clone())
170 .with("visibility", method.visibility.clone())
171 .with("line_start", method.line_start.to_string())
172 .with("line_end", method.line_end.to_string())
173 .with("is_method", "true")
174 .with("parent_class", class.name.clone());
175
176 if let Some(ref doc) = method.doc_comment {
177 method_props = method_props.with("doc", doc.clone());
178 }
179
180 let method_id = graph
181 .add_node(NodeType::Function, method_props)
182 .map_err(|e| ParserError::GraphError(e.to_string()))?;
183
184 node_map.insert(method_name, method_id);
185 function_ids.push(method_id);
186
187 graph
189 .add_edge(class_id, method_id, EdgeType::Contains, PropertyMap::new())
190 .map_err(|e| ParserError::GraphError(e.to_string()))?;
191 }
192 }
193
194 for trait_entity in &ir.traits {
196 let mut props = PropertyMap::new()
197 .with("name", trait_entity.name.clone())
198 .with("visibility", trait_entity.visibility.clone())
199 .with("line_start", trait_entity.line_start.to_string())
200 .with("line_end", trait_entity.line_end.to_string());
201
202 if let Some(ref doc) = trait_entity.doc_comment {
203 props = props.with("doc", doc.clone());
204 }
205
206 let trait_id = graph
207 .add_node(NodeType::Interface, props)
208 .map_err(|e| ParserError::GraphError(e.to_string()))?;
209
210 node_map.insert(trait_entity.name.clone(), trait_id);
211 trait_ids.push(trait_id);
212
213 graph
215 .add_edge(file_id, trait_id, EdgeType::Contains, PropertyMap::new())
216 .map_err(|e| ParserError::GraphError(e.to_string()))?;
217 }
218
219 for import in &ir.imports {
221 let imported_module = &import.imported;
222
223 let import_id = if let Some(&existing_id) = node_map.get(imported_module) {
225 existing_id
226 } else {
227 let props = PropertyMap::new()
228 .with("name", imported_module.clone())
229 .with("is_external", "true");
230
231 let id = graph
232 .add_node(NodeType::Module, props)
233 .map_err(|e| ParserError::GraphError(e.to_string()))?;
234 node_map.insert(imported_module.clone(), id);
235 id
236 };
237
238 import_ids.push(import_id);
239
240 let mut edge_props = PropertyMap::new();
242 if let Some(ref alias) = import.alias {
243 edge_props = edge_props.with("alias", alias.clone());
244 }
245 if import.is_wildcard {
246 edge_props = edge_props.with("is_wildcard", "true");
247 }
248 if !import.symbols.is_empty() {
249 edge_props = edge_props.with("symbols", import.symbols.join(","));
250 }
251 graph
252 .add_edge(file_id, import_id, EdgeType::Imports, edge_props)
253 .map_err(|e| ParserError::GraphError(e.to_string()))?;
254 }
255
256 for call in &ir.calls {
258 if let (Some(&caller_id), Some(&callee_id)) =
259 (node_map.get(&call.caller), node_map.get(&call.callee))
260 {
261 let edge_props = PropertyMap::new()
262 .with("call_site_line", call.call_site_line.to_string())
263 .with("is_direct", call.is_direct.to_string());
264
265 graph
266 .add_edge(caller_id, callee_id, EdgeType::Calls, edge_props)
267 .map_err(|e| ParserError::GraphError(e.to_string()))?;
268 }
269 }
270
271 for inheritance in &ir.inheritance {
273 if let (Some(&child_id), Some(&parent_id)) = (
274 node_map.get(&inheritance.child),
275 node_map.get(&inheritance.parent),
276 ) {
277 let edge_props = PropertyMap::new().with("order", inheritance.order.to_string());
278
279 graph
280 .add_edge(child_id, parent_id, EdgeType::Extends, edge_props)
281 .map_err(|e| ParserError::GraphError(e.to_string()))?;
282 }
283 }
284
285 let line_count = if let Some(ref module) = ir.module {
287 module.line_count
288 } else {
289 0
290 };
291
292 Ok(FileInfo {
293 file_path: file_path.to_path_buf(),
294 file_id,
295 functions: function_ids,
296 classes: class_ids,
297 traits: trait_ids,
298 imports: import_ids,
299 parse_time: Duration::ZERO, line_count,
301 byte_count: 0, })
303 }
304}
305
306impl Default for PythonParser {
307 fn default() -> Self {
308 Self::new()
309 }
310}
311
312impl CodeParser for PythonParser {
313 fn language(&self) -> &str {
314 "python"
315 }
316
317 fn file_extensions(&self) -> &[&str] {
318 &[".py", ".pyw"]
319 }
320
321 fn parse_file(&self, path: &Path, graph: &mut CodeGraph) -> Result<FileInfo, ParserError> {
322 let start = Instant::now();
323
324 if !self.can_parse(path) {
326 return Err(ParserError::ParseError(
327 path.to_path_buf(),
328 "Invalid file extension for Python parser".to_string(),
329 ));
330 }
331
332 let source = std::fs::read_to_string(path)
334 .map_err(|e| ParserError::IoError(path.to_path_buf(), e))?;
335
336 let byte_count = source.len();
338 if byte_count > self.config.max_file_size {
339 self.update_metrics(false, start.elapsed(), 0, 0);
340 return Err(ParserError::FileTooLarge(path.to_path_buf(), byte_count));
341 }
342
343 let mut file_info = self.parse_source(&source, path, graph)?;
345 file_info.byte_count = byte_count;
346
347 Ok(file_info)
348 }
349
350 fn parse_source(
351 &self,
352 source: &str,
353 file_path: &Path,
354 graph: &mut CodeGraph,
355 ) -> Result<FileInfo, ParserError> {
356 let start = Instant::now();
357
358 if source.len() > self.config.max_file_size {
360 self.update_metrics(false, start.elapsed(), 0, 0);
361 return Err(ParserError::FileTooLarge(
362 file_path.to_path_buf(),
363 source.len(),
364 ));
365 }
366
367 let old_config = crate::config::ParserConfig {
370 include_private: !self.config.skip_private,
371 include_tests: !self.config.skip_tests,
372 max_file_size: self.config.max_file_size,
373 parallel: self.config.parallel,
374 num_threads: self.config.parallel_workers,
375 ..Default::default()
376 };
377
378 let ir = crate::extractor::extract(source, file_path, &old_config).map_err(|e| {
379 self.update_metrics(false, start.elapsed(), 0, 0);
380 ParserError::ParseError(file_path.to_path_buf(), e)
381 })?;
382
383 let entity_count = ir.entity_count();
385 let relationship_count = ir.relationship_count();
386
387 let mut file_info = self.ir_to_graph(&ir, graph, file_path)?;
389
390 let duration = start.elapsed();
392 file_info.parse_time = duration;
393 file_info.byte_count = source.len();
394
395 self.update_metrics(true, duration, entity_count, relationship_count);
396
397 Ok(file_info)
398 }
399
400 fn config(&self) -> &ParserConfig {
401 &self.config
402 }
403
404 fn metrics(&self) -> ParserMetrics {
405 self.metrics.lock().unwrap().clone()
406 }
407
408 fn reset_metrics(&mut self) {
409 *self.metrics.lock().unwrap() = ParserMetrics::default();
410 }
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416
417 #[test]
418 fn test_python_parser_new() {
419 let parser = PythonParser::new();
420 assert_eq!(parser.language(), "python");
421 }
422
423 #[test]
424 fn test_python_parser_file_extensions() {
425 let parser = PythonParser::new();
426 let exts = parser.file_extensions();
427 assert_eq!(exts.len(), 2);
428 assert!(exts.contains(&".py"));
429 assert!(exts.contains(&".pyw"));
430 }
431
432 #[test]
433 fn test_python_parser_can_parse() {
434 let parser = PythonParser::new();
435 assert!(parser.can_parse(Path::new("test.py")));
436 assert!(parser.can_parse(Path::new("test.pyw")));
437 assert!(!parser.can_parse(Path::new("test.rs")));
438 assert!(!parser.can_parse(Path::new("test.txt")));
439 }
440
441 #[test]
442 fn test_metrics_initial_state() {
443 let parser = PythonParser::new();
444 let metrics = parser.metrics();
445 assert_eq!(metrics.files_attempted, 0);
446 assert_eq!(metrics.files_succeeded, 0);
447 assert_eq!(metrics.files_failed, 0);
448 }
449}