codegraph_python/
parser_impl.rs

1//! Implementation of the CodeParser trait for Python
2//!
3//! This module provides the PythonParser struct that implements the
4//! codegraph-parser-api::CodeParser trait, making the Python parser compatible
5//! with the unified parser API.
6
7use codegraph::{CodeGraph, NodeId};
8use codegraph_parser_api::{CodeParser, FileInfo, ParserConfig, ParserError, ParserMetrics};
9use std::path::Path;
10use std::sync::Mutex;
11use std::time::{Duration, Instant};
12
13/// Python language parser implementing the CodeParser trait
14pub struct PythonParser {
15    config: ParserConfig,
16    metrics: Mutex<ParserMetrics>,
17}
18
19impl PythonParser {
20    /// Create a new Python parser with default configuration
21    pub fn new() -> Self {
22        Self {
23            config: ParserConfig::default(),
24            metrics: Mutex::new(ParserMetrics::default()),
25        }
26    }
27
28    /// Create a new Python parser with custom configuration
29    pub fn with_config(config: ParserConfig) -> Self {
30        Self {
31            config,
32            metrics: Mutex::new(ParserMetrics::default()),
33        }
34    }
35
36    /// Update metrics after parsing a file
37    fn update_metrics(
38        &self,
39        success: bool,
40        duration: Duration,
41        entities: usize,
42        relationships: usize,
43    ) {
44        let mut metrics = self.metrics.lock().unwrap();
45        metrics.files_attempted += 1;
46        if success {
47            metrics.files_succeeded += 1;
48        } else {
49            metrics.files_failed += 1;
50        }
51        metrics.total_parse_time += duration;
52        metrics.total_entities += entities;
53        metrics.total_relationships += relationships;
54    }
55
56    /// Convert CodeIR to graph nodes and return FileInfo
57    fn ir_to_graph(
58        &self,
59        ir: &codegraph_parser_api::CodeIR,
60        graph: &mut CodeGraph,
61        file_path: &Path,
62    ) -> Result<FileInfo, ParserError> {
63        use codegraph::{EdgeType, NodeType, PropertyMap};
64        use std::collections::HashMap;
65
66        let mut node_map: HashMap<String, NodeId> = HashMap::new();
67        let mut function_ids = Vec::new();
68        let mut class_ids = Vec::new();
69        let mut trait_ids = Vec::new();
70        let mut import_ids = Vec::new();
71
72        // Create module/file node
73        let file_id = if let Some(ref module) = ir.module {
74            let mut props = PropertyMap::new()
75                .with("name", module.name.clone())
76                .with("path", module.path.clone())
77                .with("language", module.language.clone())
78                .with("line_count", module.line_count.to_string());
79
80            if let Some(ref doc) = module.doc_comment {
81                props = props.with("doc", doc.clone());
82            }
83
84            let id = graph
85                .add_node(NodeType::CodeFile, props)
86                .map_err(|e| ParserError::GraphError(e.to_string()))?;
87            node_map.insert(module.name.clone(), id);
88            id
89        } else {
90            // Create a default file node
91            let file_name = file_path
92                .file_stem()
93                .and_then(|s| s.to_str())
94                .unwrap_or("unknown")
95                .to_string();
96            let props = PropertyMap::new()
97                .with("name", file_name.clone())
98                .with("path", file_path.display().to_string())
99                .with("language", "python");
100
101            let id = graph
102                .add_node(NodeType::CodeFile, props)
103                .map_err(|e| ParserError::GraphError(e.to_string()))?;
104            node_map.insert(file_name, id);
105            id
106        };
107
108        // Add functions
109        for func in &ir.functions {
110            let mut props = PropertyMap::new()
111                .with("name", func.name.clone())
112                .with("signature", func.signature.clone())
113                .with("visibility", func.visibility.clone())
114                .with("line_start", func.line_start.to_string())
115                .with("line_end", func.line_end.to_string())
116                .with("is_async", func.is_async.to_string())
117                .with("is_static", func.is_static.to_string());
118
119            if let Some(ref doc) = func.doc_comment {
120                props = props.with("doc", doc.clone());
121            }
122            if let Some(ref return_type) = func.return_type {
123                props = props.with("return_type", return_type.clone());
124            }
125
126            let func_id = graph
127                .add_node(NodeType::Function, props)
128                .map_err(|e| ParserError::GraphError(e.to_string()))?;
129
130            node_map.insert(func.name.clone(), func_id);
131            function_ids.push(func_id);
132
133            // Link function to file
134            graph
135                .add_edge(file_id, func_id, EdgeType::Contains, PropertyMap::new())
136                .map_err(|e| ParserError::GraphError(e.to_string()))?;
137        }
138
139        // Add classes
140        for class in &ir.classes {
141            let mut props = PropertyMap::new()
142                .with("name", class.name.clone())
143                .with("visibility", class.visibility.clone())
144                .with("line_start", class.line_start.to_string())
145                .with("line_end", class.line_end.to_string())
146                .with("is_abstract", class.is_abstract.to_string());
147
148            if let Some(ref doc) = class.doc_comment {
149                props = props.with("doc", doc.clone());
150            }
151
152            let class_id = graph
153                .add_node(NodeType::Class, props)
154                .map_err(|e| ParserError::GraphError(e.to_string()))?;
155
156            node_map.insert(class.name.clone(), class_id);
157            class_ids.push(class_id);
158
159            // Link class to file
160            graph
161                .add_edge(file_id, class_id, EdgeType::Contains, PropertyMap::new())
162                .map_err(|e| ParserError::GraphError(e.to_string()))?;
163
164            // Add methods
165            for method in &class.methods {
166                let method_name = format!("{}.{}", class.name, method.name);
167                let mut method_props = PropertyMap::new()
168                    .with("name", method_name.clone())
169                    .with("signature", method.signature.clone())
170                    .with("visibility", method.visibility.clone())
171                    .with("line_start", method.line_start.to_string())
172                    .with("line_end", method.line_end.to_string())
173                    .with("is_method", "true")
174                    .with("parent_class", class.name.clone());
175
176                if let Some(ref doc) = method.doc_comment {
177                    method_props = method_props.with("doc", doc.clone());
178                }
179
180                let method_id = graph
181                    .add_node(NodeType::Function, method_props)
182                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
183
184                node_map.insert(method_name, method_id);
185                function_ids.push(method_id);
186
187                // Link method to class
188                graph
189                    .add_edge(class_id, method_id, EdgeType::Contains, PropertyMap::new())
190                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
191            }
192        }
193
194        // Add traits (protocols in Python)
195        for trait_entity in &ir.traits {
196            let mut props = PropertyMap::new()
197                .with("name", trait_entity.name.clone())
198                .with("visibility", trait_entity.visibility.clone())
199                .with("line_start", trait_entity.line_start.to_string())
200                .with("line_end", trait_entity.line_end.to_string());
201
202            if let Some(ref doc) = trait_entity.doc_comment {
203                props = props.with("doc", doc.clone());
204            }
205
206            let trait_id = graph
207                .add_node(NodeType::Interface, props)
208                .map_err(|e| ParserError::GraphError(e.to_string()))?;
209
210            node_map.insert(trait_entity.name.clone(), trait_id);
211            trait_ids.push(trait_id);
212
213            // Link trait to file
214            graph
215                .add_edge(file_id, trait_id, EdgeType::Contains, PropertyMap::new())
216                .map_err(|e| ParserError::GraphError(e.to_string()))?;
217        }
218
219        // Add import nodes and relationships
220        for import in &ir.imports {
221            let imported_module = &import.imported;
222
223            // Create or get import node
224            let import_id = if let Some(&existing_id) = node_map.get(imported_module) {
225                existing_id
226            } else {
227                let props = PropertyMap::new()
228                    .with("name", imported_module.clone())
229                    .with("is_external", "true");
230
231                let id = graph
232                    .add_node(NodeType::Module, props)
233                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
234                node_map.insert(imported_module.clone(), id);
235                id
236            };
237
238            import_ids.push(import_id);
239
240            // Create import edge from file to imported module
241            let mut edge_props = PropertyMap::new();
242            if let Some(ref alias) = import.alias {
243                edge_props = edge_props.with("alias", alias.clone());
244            }
245            if import.is_wildcard {
246                edge_props = edge_props.with("is_wildcard", "true");
247            }
248            if !import.symbols.is_empty() {
249                edge_props = edge_props.with("symbols", import.symbols.join(","));
250            }
251            graph
252                .add_edge(file_id, import_id, EdgeType::Imports, edge_props)
253                .map_err(|e| ParserError::GraphError(e.to_string()))?;
254        }
255
256        // Add call relationships
257        // Track unresolved calls per caller for cross-file resolution
258        let mut unresolved_calls: std::collections::HashMap<String, Vec<String>> =
259            std::collections::HashMap::new();
260
261        for call in &ir.calls {
262            if let Some(&caller_id) = node_map.get(&call.caller) {
263                if let Some(&callee_id) = node_map.get(&call.callee) {
264                    // Both caller and callee are in this file - create direct edge
265                    let edge_props = PropertyMap::new()
266                        .with("call_site_line", call.call_site_line.to_string())
267                        .with("is_direct", call.is_direct.to_string());
268
269                    graph
270                        .add_edge(caller_id, callee_id, EdgeType::Calls, edge_props)
271                        .map_err(|e| ParserError::GraphError(e.to_string()))?;
272                } else {
273                    // Callee not found in this file - store for cross-file resolution
274                    unresolved_calls
275                        .entry(call.caller.clone())
276                        .or_default()
277                        .push(call.callee.clone());
278                }
279            }
280        }
281
282        // Store unresolved calls on caller nodes for post-processing
283        for (caller_name, callees) in unresolved_calls {
284            if let Some(&caller_id) = node_map.get(&caller_name) {
285                if let Ok(node) = graph.get_node(caller_id) {
286                    let existing = node.properties.get_string("unresolved_calls").unwrap_or("");
287                    let mut all_callees: Vec<&str> = if existing.is_empty() {
288                        Vec::new()
289                    } else {
290                        existing.split(',').collect()
291                    };
292                    for callee in &callees {
293                        if !all_callees.contains(&callee.as_str()) {
294                            all_callees.push(callee);
295                        }
296                    }
297                    let new_props = node
298                        .properties
299                        .clone()
300                        .with("unresolved_calls", all_callees.join(","));
301                    let _ = graph.update_node_properties(caller_id, new_props);
302                }
303            }
304        }
305
306        // Add inheritance relationships
307        for inheritance in &ir.inheritance {
308            if let (Some(&child_id), Some(&parent_id)) = (
309                node_map.get(&inheritance.child),
310                node_map.get(&inheritance.parent),
311            ) {
312                let edge_props = PropertyMap::new().with("order", inheritance.order.to_string());
313
314                graph
315                    .add_edge(child_id, parent_id, EdgeType::Extends, edge_props)
316                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
317            }
318        }
319
320        // Add implementation relationships (class implements protocol/interface)
321        for impl_rel in &ir.implementations {
322            if let (Some(&implementor_id), Some(&trait_id)) = (
323                node_map.get(&impl_rel.implementor),
324                node_map.get(&impl_rel.trait_name),
325            ) {
326                graph
327                    .add_edge(
328                        implementor_id,
329                        trait_id,
330                        EdgeType::Implements,
331                        PropertyMap::new(),
332                    )
333                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
334            }
335        }
336
337        // Count source lines
338        let line_count = if let Some(ref module) = ir.module {
339            module.line_count
340        } else {
341            0
342        };
343
344        Ok(FileInfo {
345            file_path: file_path.to_path_buf(),
346            file_id,
347            functions: function_ids,
348            classes: class_ids,
349            traits: trait_ids,
350            imports: import_ids,
351            parse_time: Duration::ZERO, // Will be set by caller
352            line_count,
353            byte_count: 0, // Will be set by caller
354        })
355    }
356}
357
358impl Default for PythonParser {
359    fn default() -> Self {
360        Self::new()
361    }
362}
363
364impl CodeParser for PythonParser {
365    fn language(&self) -> &str {
366        "python"
367    }
368
369    fn file_extensions(&self) -> &[&str] {
370        &[".py", ".pyw"]
371    }
372
373    fn parse_file(&self, path: &Path, graph: &mut CodeGraph) -> Result<FileInfo, ParserError> {
374        let start = Instant::now();
375
376        // Check file extension
377        if !self.can_parse(path) {
378            return Err(ParserError::ParseError(
379                path.to_path_buf(),
380                "Invalid file extension for Python parser".to_string(),
381            ));
382        }
383
384        // Read file
385        let source = std::fs::read_to_string(path)
386            .map_err(|e| ParserError::IoError(path.to_path_buf(), e))?;
387
388        // Check file size
389        let byte_count = source.len();
390        if byte_count > self.config.max_file_size {
391            self.update_metrics(false, start.elapsed(), 0, 0);
392            return Err(ParserError::FileTooLarge(path.to_path_buf(), byte_count));
393        }
394
395        // Parse source
396        let mut file_info = self.parse_source(&source, path, graph)?;
397        file_info.byte_count = byte_count;
398
399        Ok(file_info)
400    }
401
402    fn parse_source(
403        &self,
404        source: &str,
405        file_path: &Path,
406        graph: &mut CodeGraph,
407    ) -> Result<FileInfo, ParserError> {
408        let start = Instant::now();
409
410        // Check size limit
411        if source.len() > self.config.max_file_size {
412            self.update_metrics(false, start.elapsed(), 0, 0);
413            return Err(ParserError::FileTooLarge(
414                file_path.to_path_buf(),
415                source.len(),
416            ));
417        }
418
419        // Extract entities using existing extractor
420        // Convert ParserConfig to old config format
421        let old_config = crate::config::ParserConfig {
422            include_private: !self.config.skip_private,
423            include_tests: !self.config.skip_tests,
424            max_file_size: self.config.max_file_size,
425            parallel: self.config.parallel,
426            num_threads: self.config.parallel_workers,
427            ..Default::default()
428        };
429
430        let ir = crate::extractor::extract(source, file_path, &old_config).map_err(|e| {
431            self.update_metrics(false, start.elapsed(), 0, 0);
432            ParserError::ParseError(file_path.to_path_buf(), e)
433        })?;
434
435        // Count entities and relationships
436        let entity_count = ir.entity_count();
437        let relationship_count = ir.relationship_count();
438
439        // Convert IR to graph
440        let mut file_info = self.ir_to_graph(&ir, graph, file_path)?;
441
442        // Set timing and update metrics
443        let duration = start.elapsed();
444        file_info.parse_time = duration;
445        file_info.byte_count = source.len();
446
447        self.update_metrics(true, duration, entity_count, relationship_count);
448
449        Ok(file_info)
450    }
451
452    fn config(&self) -> &ParserConfig {
453        &self.config
454    }
455
456    fn metrics(&self) -> ParserMetrics {
457        self.metrics.lock().unwrap().clone()
458    }
459
460    fn reset_metrics(&mut self) {
461        *self.metrics.lock().unwrap() = ParserMetrics::default();
462    }
463}
464
465#[cfg(test)]
466mod tests {
467    use super::*;
468
469    #[test]
470    fn test_python_parser_new() {
471        let parser = PythonParser::new();
472        assert_eq!(parser.language(), "python");
473    }
474
475    #[test]
476    fn test_python_parser_file_extensions() {
477        let parser = PythonParser::new();
478        let exts = parser.file_extensions();
479        assert_eq!(exts.len(), 2);
480        assert!(exts.contains(&".py"));
481        assert!(exts.contains(&".pyw"));
482    }
483
484    #[test]
485    fn test_python_parser_can_parse() {
486        let parser = PythonParser::new();
487        assert!(parser.can_parse(Path::new("test.py")));
488        assert!(parser.can_parse(Path::new("test.pyw")));
489        assert!(!parser.can_parse(Path::new("test.rs")));
490        assert!(!parser.can_parse(Path::new("test.txt")));
491    }
492
493    #[test]
494    fn test_metrics_initial_state() {
495        let parser = PythonParser::new();
496        let metrics = parser.metrics();
497        assert_eq!(metrics.files_attempted, 0);
498        assert_eq!(metrics.files_succeeded, 0);
499        assert_eq!(metrics.files_failed, 0);
500    }
501
502    #[test]
503    fn test_implements_edge_creation() {
504        use codegraph::{CodeGraph, EdgeType};
505        use codegraph_parser_api::{
506            ClassEntity, CodeIR, ImplementationRelation, ModuleEntity, TraitEntity,
507        };
508        use std::path::PathBuf;
509
510        let parser = PythonParser::new();
511
512        // Create IR with a class implementing a protocol (Python's equivalent of interface)
513        let mut ir = CodeIR::new(PathBuf::from("test.py"));
514        ir.set_module(ModuleEntity::new("test", "test.py", "python"));
515        ir.add_class(ClassEntity::new("MyClass", 1, 20));
516        ir.add_trait(TraitEntity::new("MyProtocol", 22, 30));
517        ir.add_implementation(ImplementationRelation::new("MyClass", "MyProtocol"));
518
519        let mut graph = CodeGraph::in_memory().unwrap();
520        let file_info = parser
521            .ir_to_graph(&ir, &mut graph, Path::new("test.py"))
522            .unwrap();
523
524        assert_eq!(file_info.classes.len(), 1);
525        assert_eq!(file_info.traits.len(), 1);
526
527        // Find class and protocol node IDs
528        let class_id = file_info.classes[0];
529        let protocol_id = file_info.traits[0];
530
531        // Verify implements edge was created
532        let edges = graph.get_edges_between(class_id, protocol_id).unwrap();
533        assert!(
534            !edges.is_empty(),
535            "Should have implements edge between class and protocol"
536        );
537
538        let edge = graph.get_edge(edges[0]).unwrap();
539        assert_eq!(
540            edge.edge_type,
541            EdgeType::Implements,
542            "Edge should be of type Implements"
543        );
544    }
545}