Skip to main content

codegraph_python/
parser_impl.rs

1//! Implementation of the CodeParser trait for Python
2//!
3//! This module provides the PythonParser struct that implements the
4//! codegraph-parser-api::CodeParser trait, making the Python parser compatible
5//! with the unified parser API.
6
7use codegraph::{CodeGraph, NodeId};
8use codegraph_parser_api::{CodeParser, FileInfo, ParserConfig, ParserError, ParserMetrics};
9use std::path::Path;
10use std::sync::Mutex;
11use std::time::{Duration, Instant};
12
13/// Python language parser implementing the CodeParser trait
14pub struct PythonParser {
15    config: ParserConfig,
16    metrics: Mutex<ParserMetrics>,
17}
18
19impl PythonParser {
20    /// Create a new Python parser with default configuration
21    pub fn new() -> Self {
22        Self {
23            config: ParserConfig::default(),
24            metrics: Mutex::new(ParserMetrics::default()),
25        }
26    }
27
28    /// Create a new Python parser with custom configuration
29    pub fn with_config(config: ParserConfig) -> Self {
30        Self {
31            config,
32            metrics: Mutex::new(ParserMetrics::default()),
33        }
34    }
35
36    /// Update metrics after parsing a file
37    fn update_metrics(
38        &self,
39        success: bool,
40        duration: Duration,
41        entities: usize,
42        relationships: usize,
43    ) {
44        let mut metrics = self.metrics.lock().unwrap();
45        metrics.files_attempted += 1;
46        if success {
47            metrics.files_succeeded += 1;
48        } else {
49            metrics.files_failed += 1;
50        }
51        metrics.total_parse_time += duration;
52        metrics.total_entities += entities;
53        metrics.total_relationships += relationships;
54    }
55
56    /// Convert CodeIR to graph nodes and return FileInfo
57    fn ir_to_graph(
58        &self,
59        ir: &codegraph_parser_api::CodeIR,
60        graph: &mut CodeGraph,
61        file_path: &Path,
62    ) -> Result<FileInfo, ParserError> {
63        use codegraph::{EdgeType, NodeType, PropertyMap};
64        use std::collections::HashMap;
65
66        let mut node_map: HashMap<String, NodeId> = HashMap::new();
67        let mut function_ids = Vec::new();
68        let mut class_ids = Vec::new();
69        let mut trait_ids = Vec::new();
70        let mut import_ids = Vec::new();
71
72        // Create module/file node
73        let file_id = if let Some(ref module) = ir.module {
74            let mut props = PropertyMap::new()
75                .with("name", module.name.clone())
76                .with("path", module.path.clone())
77                .with("language", module.language.clone())
78                .with("line_count", module.line_count.to_string());
79
80            if let Some(ref doc) = module.doc_comment {
81                props = props.with("doc", doc.clone());
82            }
83
84            let id = graph
85                .add_node(NodeType::CodeFile, props)
86                .map_err(|e| ParserError::GraphError(e.to_string()))?;
87            node_map.insert(module.name.clone(), id);
88            id
89        } else {
90            // Create a default file node
91            let file_name = file_path
92                .file_stem()
93                .and_then(|s| s.to_str())
94                .unwrap_or("unknown")
95                .to_string();
96            let props = PropertyMap::new()
97                .with("name", file_name.clone())
98                .with("path", file_path.display().to_string())
99                .with("language", "python");
100
101            let id = graph
102                .add_node(NodeType::CodeFile, props)
103                .map_err(|e| ParserError::GraphError(e.to_string()))?;
104            node_map.insert(file_name, id);
105            id
106        };
107
108        // Add functions
109        for func in &ir.functions {
110            let mut props = PropertyMap::new()
111                .with("name", func.name.clone())
112                .with("path", file_path.display().to_string())
113                .with("signature", func.signature.clone())
114                .with("visibility", func.visibility.clone())
115                .with("line_start", func.line_start.to_string())
116                .with("line_end", func.line_end.to_string())
117                .with("is_async", func.is_async.to_string())
118                .with("is_static", func.is_static.to_string());
119
120            if let Some(ref doc) = func.doc_comment {
121                props = props.with("doc", doc.clone());
122            }
123            if let Some(ref return_type) = func.return_type {
124                props = props.with("return_type", return_type.clone());
125            }
126
127            let func_id = graph
128                .add_node(NodeType::Function, props)
129                .map_err(|e| ParserError::GraphError(e.to_string()))?;
130
131            node_map.insert(func.name.clone(), func_id);
132            function_ids.push(func_id);
133
134            // Link function to file
135            graph
136                .add_edge(file_id, func_id, EdgeType::Contains, PropertyMap::new())
137                .map_err(|e| ParserError::GraphError(e.to_string()))?;
138        }
139
140        // Add classes
141        for class in &ir.classes {
142            let mut props = PropertyMap::new()
143                .with("name", class.name.clone())
144                .with("path", file_path.display().to_string())
145                .with("visibility", class.visibility.clone())
146                .with("line_start", class.line_start.to_string())
147                .with("line_end", class.line_end.to_string())
148                .with("is_abstract", class.is_abstract.to_string());
149
150            if let Some(ref doc) = class.doc_comment {
151                props = props.with("doc", doc.clone());
152            }
153
154            let class_id = graph
155                .add_node(NodeType::Class, props)
156                .map_err(|e| ParserError::GraphError(e.to_string()))?;
157
158            node_map.insert(class.name.clone(), class_id);
159            class_ids.push(class_id);
160
161            // Link class to file
162            graph
163                .add_edge(file_id, class_id, EdgeType::Contains, PropertyMap::new())
164                .map_err(|e| ParserError::GraphError(e.to_string()))?;
165
166            // Add methods
167            for method in &class.methods {
168                let method_name = format!("{}.{}", class.name, method.name);
169                let mut method_props = PropertyMap::new()
170                    .with("name", method_name.clone())
171                    .with("path", file_path.display().to_string())
172                    .with("signature", method.signature.clone())
173                    .with("visibility", method.visibility.clone())
174                    .with("line_start", method.line_start.to_string())
175                    .with("line_end", method.line_end.to_string())
176                    .with("is_method", "true")
177                    .with("parent_class", class.name.clone());
178
179                if let Some(ref doc) = method.doc_comment {
180                    method_props = method_props.with("doc", doc.clone());
181                }
182
183                let method_id = graph
184                    .add_node(NodeType::Function, method_props)
185                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
186
187                node_map.insert(method_name, method_id);
188                function_ids.push(method_id);
189
190                // Link method to class
191                graph
192                    .add_edge(class_id, method_id, EdgeType::Contains, PropertyMap::new())
193                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
194            }
195        }
196
197        // Add traits (protocols in Python)
198        for trait_entity in &ir.traits {
199            let mut props = PropertyMap::new()
200                .with("name", trait_entity.name.clone())
201                .with("path", file_path.display().to_string())
202                .with("visibility", trait_entity.visibility.clone())
203                .with("line_start", trait_entity.line_start.to_string())
204                .with("line_end", trait_entity.line_end.to_string());
205
206            if let Some(ref doc) = trait_entity.doc_comment {
207                props = props.with("doc", doc.clone());
208            }
209
210            let trait_id = graph
211                .add_node(NodeType::Interface, props)
212                .map_err(|e| ParserError::GraphError(e.to_string()))?;
213
214            node_map.insert(trait_entity.name.clone(), trait_id);
215            trait_ids.push(trait_id);
216
217            // Link trait to file
218            graph
219                .add_edge(file_id, trait_id, EdgeType::Contains, PropertyMap::new())
220                .map_err(|e| ParserError::GraphError(e.to_string()))?;
221        }
222
223        // Add import nodes and relationships
224        for import in &ir.imports {
225            let imported_module = &import.imported;
226
227            // Create or get import node
228            let import_id = if let Some(&existing_id) = node_map.get(imported_module) {
229                existing_id
230            } else {
231                let props = PropertyMap::new()
232                    .with("name", imported_module.clone())
233                    .with("is_external", "true");
234
235                let id = graph
236                    .add_node(NodeType::Module, props)
237                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
238                node_map.insert(imported_module.clone(), id);
239                id
240            };
241
242            import_ids.push(import_id);
243
244            // Create import edge from file to imported module
245            let mut edge_props = PropertyMap::new();
246            if let Some(ref alias) = import.alias {
247                edge_props = edge_props.with("alias", alias.clone());
248            }
249            if import.is_wildcard {
250                edge_props = edge_props.with("is_wildcard", "true");
251            }
252            if !import.symbols.is_empty() {
253                edge_props = edge_props.with("symbols", import.symbols.join(","));
254            }
255            graph
256                .add_edge(file_id, import_id, EdgeType::Imports, edge_props)
257                .map_err(|e| ParserError::GraphError(e.to_string()))?;
258        }
259
260        // Add call relationships
261        // Track unresolved calls per caller for cross-file resolution
262        let mut unresolved_calls: std::collections::HashMap<String, Vec<String>> =
263            std::collections::HashMap::new();
264
265        for call in &ir.calls {
266            if let Some(&caller_id) = node_map.get(&call.caller) {
267                if let Some(&callee_id) = node_map.get(&call.callee) {
268                    // Both caller and callee are in this file - create direct edge
269                    let edge_props = PropertyMap::new()
270                        .with("call_site_line", call.call_site_line.to_string())
271                        .with("is_direct", call.is_direct.to_string());
272
273                    graph
274                        .add_edge(caller_id, callee_id, EdgeType::Calls, edge_props)
275                        .map_err(|e| ParserError::GraphError(e.to_string()))?;
276                } else {
277                    // Callee not found in this file - store for cross-file resolution
278                    unresolved_calls
279                        .entry(call.caller.clone())
280                        .or_default()
281                        .push(call.callee.clone());
282                }
283            }
284        }
285
286        // Store unresolved calls on caller nodes for post-processing
287        for (caller_name, callees) in unresolved_calls {
288            if let Some(&caller_id) = node_map.get(&caller_name) {
289                if let Ok(node) = graph.get_node(caller_id) {
290                    let existing = node.properties.get_string("unresolved_calls").unwrap_or("");
291                    let mut all_callees: Vec<&str> = if existing.is_empty() {
292                        Vec::new()
293                    } else {
294                        existing.split(',').collect()
295                    };
296                    for callee in &callees {
297                        if !all_callees.contains(&callee.as_str()) {
298                            all_callees.push(callee);
299                        }
300                    }
301                    let new_props = node
302                        .properties
303                        .clone()
304                        .with("unresolved_calls", all_callees.join(","));
305                    let _ = graph.update_node_properties(caller_id, new_props);
306                }
307            }
308        }
309
310        // Add inheritance relationships
311        for inheritance in &ir.inheritance {
312            if let (Some(&child_id), Some(&parent_id)) = (
313                node_map.get(&inheritance.child),
314                node_map.get(&inheritance.parent),
315            ) {
316                let edge_props = PropertyMap::new().with("order", inheritance.order.to_string());
317
318                graph
319                    .add_edge(child_id, parent_id, EdgeType::Extends, edge_props)
320                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
321            }
322        }
323
324        // Add implementation relationships (class implements protocol/interface)
325        for impl_rel in &ir.implementations {
326            if let (Some(&implementor_id), Some(&trait_id)) = (
327                node_map.get(&impl_rel.implementor),
328                node_map.get(&impl_rel.trait_name),
329            ) {
330                graph
331                    .add_edge(
332                        implementor_id,
333                        trait_id,
334                        EdgeType::Implements,
335                        PropertyMap::new(),
336                    )
337                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
338            }
339        }
340
341        // Count source lines
342        let line_count = if let Some(ref module) = ir.module {
343            module.line_count
344        } else {
345            0
346        };
347
348        Ok(FileInfo {
349            file_path: file_path.to_path_buf(),
350            file_id,
351            functions: function_ids,
352            classes: class_ids,
353            traits: trait_ids,
354            imports: import_ids,
355            parse_time: Duration::ZERO, // Will be set by caller
356            line_count,
357            byte_count: 0, // Will be set by caller
358        })
359    }
360}
361
362impl Default for PythonParser {
363    fn default() -> Self {
364        Self::new()
365    }
366}
367
368impl CodeParser for PythonParser {
369    fn language(&self) -> &str {
370        "python"
371    }
372
373    fn file_extensions(&self) -> &[&str] {
374        &[".py", ".pyw"]
375    }
376
377    fn parse_file(&self, path: &Path, graph: &mut CodeGraph) -> Result<FileInfo, ParserError> {
378        let start = Instant::now();
379
380        // Check file extension
381        if !self.can_parse(path) {
382            return Err(ParserError::ParseError(
383                path.to_path_buf(),
384                "Invalid file extension for Python parser".to_string(),
385            ));
386        }
387
388        // Read file
389        let source = std::fs::read_to_string(path)
390            .map_err(|e| ParserError::IoError(path.to_path_buf(), e))?;
391
392        // Check file size
393        let byte_count = source.len();
394        if byte_count > self.config.max_file_size {
395            self.update_metrics(false, start.elapsed(), 0, 0);
396            return Err(ParserError::FileTooLarge(path.to_path_buf(), byte_count));
397        }
398
399        // Parse source
400        let mut file_info = self.parse_source(&source, path, graph)?;
401        file_info.byte_count = byte_count;
402
403        Ok(file_info)
404    }
405
406    fn parse_source(
407        &self,
408        source: &str,
409        file_path: &Path,
410        graph: &mut CodeGraph,
411    ) -> Result<FileInfo, ParserError> {
412        let start = Instant::now();
413
414        // Check size limit
415        if source.len() > self.config.max_file_size {
416            self.update_metrics(false, start.elapsed(), 0, 0);
417            return Err(ParserError::FileTooLarge(
418                file_path.to_path_buf(),
419                source.len(),
420            ));
421        }
422
423        // Extract entities using existing extractor
424        // Convert ParserConfig to old config format
425        let old_config = crate::config::ParserConfig {
426            include_private: !self.config.skip_private,
427            include_tests: !self.config.skip_tests,
428            max_file_size: self.config.max_file_size,
429            parallel: self.config.parallel,
430            num_threads: self.config.parallel_workers,
431            ..Default::default()
432        };
433
434        let ir = crate::extractor::extract(source, file_path, &old_config).map_err(|e| {
435            self.update_metrics(false, start.elapsed(), 0, 0);
436            ParserError::ParseError(file_path.to_path_buf(), e)
437        })?;
438
439        // Count entities and relationships
440        let entity_count = ir.entity_count();
441        let relationship_count = ir.relationship_count();
442
443        // Convert IR to graph
444        let mut file_info = self.ir_to_graph(&ir, graph, file_path)?;
445
446        // Set timing and update metrics
447        let duration = start.elapsed();
448        file_info.parse_time = duration;
449        file_info.byte_count = source.len();
450
451        self.update_metrics(true, duration, entity_count, relationship_count);
452
453        Ok(file_info)
454    }
455
456    fn config(&self) -> &ParserConfig {
457        &self.config
458    }
459
460    fn metrics(&self) -> ParserMetrics {
461        self.metrics.lock().unwrap().clone()
462    }
463
464    fn reset_metrics(&mut self) {
465        *self.metrics.lock().unwrap() = ParserMetrics::default();
466    }
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472
473    #[test]
474    fn test_python_parser_new() {
475        let parser = PythonParser::new();
476        assert_eq!(parser.language(), "python");
477    }
478
479    #[test]
480    fn test_python_parser_file_extensions() {
481        let parser = PythonParser::new();
482        let exts = parser.file_extensions();
483        assert_eq!(exts.len(), 2);
484        assert!(exts.contains(&".py"));
485        assert!(exts.contains(&".pyw"));
486    }
487
488    #[test]
489    fn test_python_parser_can_parse() {
490        let parser = PythonParser::new();
491        assert!(parser.can_parse(Path::new("test.py")));
492        assert!(parser.can_parse(Path::new("test.pyw")));
493        assert!(!parser.can_parse(Path::new("test.rs")));
494        assert!(!parser.can_parse(Path::new("test.txt")));
495    }
496
497    #[test]
498    fn test_metrics_initial_state() {
499        let parser = PythonParser::new();
500        let metrics = parser.metrics();
501        assert_eq!(metrics.files_attempted, 0);
502        assert_eq!(metrics.files_succeeded, 0);
503        assert_eq!(metrics.files_failed, 0);
504    }
505
506    #[test]
507    fn test_implements_edge_creation() {
508        use codegraph::{CodeGraph, EdgeType};
509        use codegraph_parser_api::{
510            ClassEntity, CodeIR, ImplementationRelation, ModuleEntity, TraitEntity,
511        };
512        use std::path::PathBuf;
513
514        let parser = PythonParser::new();
515
516        // Create IR with a class implementing a protocol (Python's equivalent of interface)
517        let mut ir = CodeIR::new(PathBuf::from("test.py"));
518        ir.set_module(ModuleEntity::new("test", "test.py", "python"));
519        ir.add_class(ClassEntity::new("MyClass", 1, 20));
520        ir.add_trait(TraitEntity::new("MyProtocol", 22, 30));
521        ir.add_implementation(ImplementationRelation::new("MyClass", "MyProtocol"));
522
523        let mut graph = CodeGraph::in_memory().unwrap();
524        let file_info = parser
525            .ir_to_graph(&ir, &mut graph, Path::new("test.py"))
526            .unwrap();
527
528        assert_eq!(file_info.classes.len(), 1);
529        assert_eq!(file_info.traits.len(), 1);
530
531        // Find class and protocol node IDs
532        let class_id = file_info.classes[0];
533        let protocol_id = file_info.traits[0];
534
535        // Verify implements edge was created
536        let edges = graph.get_edges_between(class_id, protocol_id).unwrap();
537        assert!(
538            !edges.is_empty(),
539            "Should have implements edge between class and protocol"
540        );
541
542        let edge = graph.get_edge(edges[0]).unwrap();
543        assert_eq!(
544            edge.edge_type,
545            EdgeType::Implements,
546            "Edge should be of type Implements"
547        );
548    }
549}