codegraph_python/
parser_impl.rs

1//! Implementation of the CodeParser trait for Python
2//!
3//! This module provides the PythonParser struct that implements the
4//! codegraph-parser-api::CodeParser trait, making the Python parser compatible
5//! with the unified parser API.
6
7use codegraph::{CodeGraph, NodeId};
8use codegraph_parser_api::{CodeParser, FileInfo, ParserConfig, ParserError, ParserMetrics};
9use std::path::Path;
10use std::sync::Mutex;
11use std::time::{Duration, Instant};
12
13/// Python language parser implementing the CodeParser trait
14pub struct PythonParser {
15    config: ParserConfig,
16    metrics: Mutex<ParserMetrics>,
17}
18
19impl PythonParser {
20    /// Create a new Python parser with default configuration
21    pub fn new() -> Self {
22        Self {
23            config: ParserConfig::default(),
24            metrics: Mutex::new(ParserMetrics::default()),
25        }
26    }
27
28    /// Create a new Python parser with custom configuration
29    pub fn with_config(config: ParserConfig) -> Self {
30        Self {
31            config,
32            metrics: Mutex::new(ParserMetrics::default()),
33        }
34    }
35
36    /// Update metrics after parsing a file
37    fn update_metrics(
38        &self,
39        success: bool,
40        duration: Duration,
41        entities: usize,
42        relationships: usize,
43    ) {
44        let mut metrics = self.metrics.lock().unwrap();
45        metrics.files_attempted += 1;
46        if success {
47            metrics.files_succeeded += 1;
48        } else {
49            metrics.files_failed += 1;
50        }
51        metrics.total_parse_time += duration;
52        metrics.total_entities += entities;
53        metrics.total_relationships += relationships;
54    }
55
56    /// Convert CodeIR to graph nodes and return FileInfo
57    fn ir_to_graph(
58        &self,
59        ir: &codegraph_parser_api::CodeIR,
60        graph: &mut CodeGraph,
61        file_path: &Path,
62    ) -> Result<FileInfo, ParserError> {
63        use codegraph::{EdgeType, NodeType, PropertyMap};
64        use std::collections::HashMap;
65
66        let mut node_map: HashMap<String, NodeId> = HashMap::new();
67        let mut function_ids = Vec::new();
68        let mut class_ids = Vec::new();
69        let mut trait_ids = Vec::new();
70        let mut import_ids = Vec::new();
71
72        // Create module/file node
73        let file_id = if let Some(ref module) = ir.module {
74            let mut props = PropertyMap::new()
75                .with("name", module.name.clone())
76                .with("path", module.path.clone())
77                .with("language", module.language.clone())
78                .with("line_count", module.line_count.to_string());
79
80            if let Some(ref doc) = module.doc_comment {
81                props = props.with("doc", doc.clone());
82            }
83
84            let id = graph
85                .add_node(NodeType::CodeFile, props)
86                .map_err(|e| ParserError::GraphError(e.to_string()))?;
87            node_map.insert(module.name.clone(), id);
88            id
89        } else {
90            // Create a default file node
91            let file_name = file_path
92                .file_stem()
93                .and_then(|s| s.to_str())
94                .unwrap_or("unknown")
95                .to_string();
96            let props = PropertyMap::new()
97                .with("name", file_name.clone())
98                .with("path", file_path.display().to_string())
99                .with("language", "python");
100
101            let id = graph
102                .add_node(NodeType::CodeFile, props)
103                .map_err(|e| ParserError::GraphError(e.to_string()))?;
104            node_map.insert(file_name, id);
105            id
106        };
107
108        // Add functions
109        for func in &ir.functions {
110            let mut props = PropertyMap::new()
111                .with("name", func.name.clone())
112                .with("signature", func.signature.clone())
113                .with("visibility", func.visibility.clone())
114                .with("line_start", func.line_start.to_string())
115                .with("line_end", func.line_end.to_string())
116                .with("is_async", func.is_async.to_string())
117                .with("is_static", func.is_static.to_string());
118
119            if let Some(ref doc) = func.doc_comment {
120                props = props.with("doc", doc.clone());
121            }
122            if let Some(ref return_type) = func.return_type {
123                props = props.with("return_type", return_type.clone());
124            }
125
126            let func_id = graph
127                .add_node(NodeType::Function, props)
128                .map_err(|e| ParserError::GraphError(e.to_string()))?;
129
130            node_map.insert(func.name.clone(), func_id);
131            function_ids.push(func_id);
132
133            // Link function to file
134            graph
135                .add_edge(file_id, func_id, EdgeType::Contains, PropertyMap::new())
136                .map_err(|e| ParserError::GraphError(e.to_string()))?;
137        }
138
139        // Add classes
140        for class in &ir.classes {
141            let mut props = PropertyMap::new()
142                .with("name", class.name.clone())
143                .with("visibility", class.visibility.clone())
144                .with("line_start", class.line_start.to_string())
145                .with("line_end", class.line_end.to_string())
146                .with("is_abstract", class.is_abstract.to_string());
147
148            if let Some(ref doc) = class.doc_comment {
149                props = props.with("doc", doc.clone());
150            }
151
152            let class_id = graph
153                .add_node(NodeType::Class, props)
154                .map_err(|e| ParserError::GraphError(e.to_string()))?;
155
156            node_map.insert(class.name.clone(), class_id);
157            class_ids.push(class_id);
158
159            // Link class to file
160            graph
161                .add_edge(file_id, class_id, EdgeType::Contains, PropertyMap::new())
162                .map_err(|e| ParserError::GraphError(e.to_string()))?;
163
164            // Add methods
165            for method in &class.methods {
166                let method_name = format!("{}.{}", class.name, method.name);
167                let mut method_props = PropertyMap::new()
168                    .with("name", method_name.clone())
169                    .with("signature", method.signature.clone())
170                    .with("visibility", method.visibility.clone())
171                    .with("line_start", method.line_start.to_string())
172                    .with("line_end", method.line_end.to_string())
173                    .with("is_method", "true")
174                    .with("parent_class", class.name.clone());
175
176                if let Some(ref doc) = method.doc_comment {
177                    method_props = method_props.with("doc", doc.clone());
178                }
179
180                let method_id = graph
181                    .add_node(NodeType::Function, method_props)
182                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
183
184                node_map.insert(method_name, method_id);
185                function_ids.push(method_id);
186
187                // Link method to class
188                graph
189                    .add_edge(class_id, method_id, EdgeType::Contains, PropertyMap::new())
190                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
191            }
192        }
193
194        // Add traits (protocols in Python)
195        for trait_entity in &ir.traits {
196            let mut props = PropertyMap::new()
197                .with("name", trait_entity.name.clone())
198                .with("visibility", trait_entity.visibility.clone())
199                .with("line_start", trait_entity.line_start.to_string())
200                .with("line_end", trait_entity.line_end.to_string());
201
202            if let Some(ref doc) = trait_entity.doc_comment {
203                props = props.with("doc", doc.clone());
204            }
205
206            let trait_id = graph
207                .add_node(NodeType::Interface, props)
208                .map_err(|e| ParserError::GraphError(e.to_string()))?;
209
210            node_map.insert(trait_entity.name.clone(), trait_id);
211            trait_ids.push(trait_id);
212
213            // Link trait to file
214            graph
215                .add_edge(file_id, trait_id, EdgeType::Contains, PropertyMap::new())
216                .map_err(|e| ParserError::GraphError(e.to_string()))?;
217        }
218
219        // Add import nodes and relationships
220        for import in &ir.imports {
221            let imported_module = &import.imported;
222
223            // Create or get import node
224            let import_id = if let Some(&existing_id) = node_map.get(imported_module) {
225                existing_id
226            } else {
227                let props = PropertyMap::new()
228                    .with("name", imported_module.clone())
229                    .with("is_external", "true");
230
231                let id = graph
232                    .add_node(NodeType::Module, props)
233                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
234                node_map.insert(imported_module.clone(), id);
235                id
236            };
237
238            import_ids.push(import_id);
239
240            // Create import edge from file to imported module
241            let mut edge_props = PropertyMap::new();
242            if let Some(ref alias) = import.alias {
243                edge_props = edge_props.with("alias", alias.clone());
244            }
245            if import.is_wildcard {
246                edge_props = edge_props.with("is_wildcard", "true");
247            }
248            if !import.symbols.is_empty() {
249                edge_props = edge_props.with("symbols", import.symbols.join(","));
250            }
251            graph
252                .add_edge(file_id, import_id, EdgeType::Imports, edge_props)
253                .map_err(|e| ParserError::GraphError(e.to_string()))?;
254        }
255
256        // Add call relationships
257        for call in &ir.calls {
258            if let (Some(&caller_id), Some(&callee_id)) =
259                (node_map.get(&call.caller), node_map.get(&call.callee))
260            {
261                let edge_props = PropertyMap::new()
262                    .with("call_site_line", call.call_site_line.to_string())
263                    .with("is_direct", call.is_direct.to_string());
264
265                graph
266                    .add_edge(caller_id, callee_id, EdgeType::Calls, edge_props)
267                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
268            }
269        }
270
271        // Add inheritance relationships
272        for inheritance in &ir.inheritance {
273            if let (Some(&child_id), Some(&parent_id)) = (
274                node_map.get(&inheritance.child),
275                node_map.get(&inheritance.parent),
276            ) {
277                let edge_props = PropertyMap::new().with("order", inheritance.order.to_string());
278
279                graph
280                    .add_edge(child_id, parent_id, EdgeType::Extends, edge_props)
281                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
282            }
283        }
284
285        // Add implementation relationships (class implements protocol/interface)
286        for impl_rel in &ir.implementations {
287            if let (Some(&implementor_id), Some(&trait_id)) = (
288                node_map.get(&impl_rel.implementor),
289                node_map.get(&impl_rel.trait_name),
290            ) {
291                graph
292                    .add_edge(
293                        implementor_id,
294                        trait_id,
295                        EdgeType::Implements,
296                        PropertyMap::new(),
297                    )
298                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
299            }
300        }
301
302        // Count source lines
303        let line_count = if let Some(ref module) = ir.module {
304            module.line_count
305        } else {
306            0
307        };
308
309        Ok(FileInfo {
310            file_path: file_path.to_path_buf(),
311            file_id,
312            functions: function_ids,
313            classes: class_ids,
314            traits: trait_ids,
315            imports: import_ids,
316            parse_time: Duration::ZERO, // Will be set by caller
317            line_count,
318            byte_count: 0, // Will be set by caller
319        })
320    }
321}
322
323impl Default for PythonParser {
324    fn default() -> Self {
325        Self::new()
326    }
327}
328
329impl CodeParser for PythonParser {
330    fn language(&self) -> &str {
331        "python"
332    }
333
334    fn file_extensions(&self) -> &[&str] {
335        &[".py", ".pyw"]
336    }
337
338    fn parse_file(&self, path: &Path, graph: &mut CodeGraph) -> Result<FileInfo, ParserError> {
339        let start = Instant::now();
340
341        // Check file extension
342        if !self.can_parse(path) {
343            return Err(ParserError::ParseError(
344                path.to_path_buf(),
345                "Invalid file extension for Python parser".to_string(),
346            ));
347        }
348
349        // Read file
350        let source = std::fs::read_to_string(path)
351            .map_err(|e| ParserError::IoError(path.to_path_buf(), e))?;
352
353        // Check file size
354        let byte_count = source.len();
355        if byte_count > self.config.max_file_size {
356            self.update_metrics(false, start.elapsed(), 0, 0);
357            return Err(ParserError::FileTooLarge(path.to_path_buf(), byte_count));
358        }
359
360        // Parse source
361        let mut file_info = self.parse_source(&source, path, graph)?;
362        file_info.byte_count = byte_count;
363
364        Ok(file_info)
365    }
366
367    fn parse_source(
368        &self,
369        source: &str,
370        file_path: &Path,
371        graph: &mut CodeGraph,
372    ) -> Result<FileInfo, ParserError> {
373        let start = Instant::now();
374
375        // Check size limit
376        if source.len() > self.config.max_file_size {
377            self.update_metrics(false, start.elapsed(), 0, 0);
378            return Err(ParserError::FileTooLarge(
379                file_path.to_path_buf(),
380                source.len(),
381            ));
382        }
383
384        // Extract entities using existing extractor
385        // Convert ParserConfig to old config format
386        let old_config = crate::config::ParserConfig {
387            include_private: !self.config.skip_private,
388            include_tests: !self.config.skip_tests,
389            max_file_size: self.config.max_file_size,
390            parallel: self.config.parallel,
391            num_threads: self.config.parallel_workers,
392            ..Default::default()
393        };
394
395        let ir = crate::extractor::extract(source, file_path, &old_config).map_err(|e| {
396            self.update_metrics(false, start.elapsed(), 0, 0);
397            ParserError::ParseError(file_path.to_path_buf(), e)
398        })?;
399
400        // Count entities and relationships
401        let entity_count = ir.entity_count();
402        let relationship_count = ir.relationship_count();
403
404        // Convert IR to graph
405        let mut file_info = self.ir_to_graph(&ir, graph, file_path)?;
406
407        // Set timing and update metrics
408        let duration = start.elapsed();
409        file_info.parse_time = duration;
410        file_info.byte_count = source.len();
411
412        self.update_metrics(true, duration, entity_count, relationship_count);
413
414        Ok(file_info)
415    }
416
417    fn config(&self) -> &ParserConfig {
418        &self.config
419    }
420
421    fn metrics(&self) -> ParserMetrics {
422        self.metrics.lock().unwrap().clone()
423    }
424
425    fn reset_metrics(&mut self) {
426        *self.metrics.lock().unwrap() = ParserMetrics::default();
427    }
428}
429
430#[cfg(test)]
431mod tests {
432    use super::*;
433
434    #[test]
435    fn test_python_parser_new() {
436        let parser = PythonParser::new();
437        assert_eq!(parser.language(), "python");
438    }
439
440    #[test]
441    fn test_python_parser_file_extensions() {
442        let parser = PythonParser::new();
443        let exts = parser.file_extensions();
444        assert_eq!(exts.len(), 2);
445        assert!(exts.contains(&".py"));
446        assert!(exts.contains(&".pyw"));
447    }
448
449    #[test]
450    fn test_python_parser_can_parse() {
451        let parser = PythonParser::new();
452        assert!(parser.can_parse(Path::new("test.py")));
453        assert!(parser.can_parse(Path::new("test.pyw")));
454        assert!(!parser.can_parse(Path::new("test.rs")));
455        assert!(!parser.can_parse(Path::new("test.txt")));
456    }
457
458    #[test]
459    fn test_metrics_initial_state() {
460        let parser = PythonParser::new();
461        let metrics = parser.metrics();
462        assert_eq!(metrics.files_attempted, 0);
463        assert_eq!(metrics.files_succeeded, 0);
464        assert_eq!(metrics.files_failed, 0);
465    }
466
467    #[test]
468    fn test_implements_edge_creation() {
469        use codegraph::{CodeGraph, EdgeType};
470        use codegraph_parser_api::{
471            ClassEntity, CodeIR, ImplementationRelation, ModuleEntity, TraitEntity,
472        };
473        use std::path::PathBuf;
474
475        let parser = PythonParser::new();
476
477        // Create IR with a class implementing a protocol (Python's equivalent of interface)
478        let mut ir = CodeIR::new(PathBuf::from("test.py"));
479        ir.set_module(ModuleEntity::new("test", "test.py", "python"));
480        ir.add_class(ClassEntity::new("MyClass", 1, 20));
481        ir.add_trait(TraitEntity::new("MyProtocol", 22, 30));
482        ir.add_implementation(ImplementationRelation::new("MyClass", "MyProtocol"));
483
484        let mut graph = CodeGraph::in_memory().unwrap();
485        let file_info = parser
486            .ir_to_graph(&ir, &mut graph, Path::new("test.py"))
487            .unwrap();
488
489        assert_eq!(file_info.classes.len(), 1);
490        assert_eq!(file_info.traits.len(), 1);
491
492        // Find class and protocol node IDs
493        let class_id = file_info.classes[0];
494        let protocol_id = file_info.traits[0];
495
496        // Verify implements edge was created
497        let edges = graph.get_edges_between(class_id, protocol_id).unwrap();
498        assert!(
499            !edges.is_empty(),
500            "Should have implements edge between class and protocol"
501        );
502
503        let edge = graph.get_edge(edges[0]).unwrap();
504        assert_eq!(
505            edge.edge_type,
506            EdgeType::Implements,
507            "Edge should be of type Implements"
508        );
509    }
510}