codegraph_python/
parser_impl.rs

1//! Implementation of the CodeParser trait for Python
2//!
3//! This module provides the PythonParser struct that implements the
4//! codegraph-parser-api::CodeParser trait, making the Python parser compatible
5//! with the unified parser API.
6
7use codegraph::{CodeGraph, NodeId};
8use codegraph_parser_api::{CodeParser, FileInfo, ParserConfig, ParserError, ParserMetrics};
9use std::path::Path;
10use std::sync::Mutex;
11use std::time::{Duration, Instant};
12
13/// Python language parser implementing the CodeParser trait
14pub struct PythonParser {
15    config: ParserConfig,
16    metrics: Mutex<ParserMetrics>,
17}
18
19impl PythonParser {
20    /// Create a new Python parser with default configuration
21    pub fn new() -> Self {
22        Self {
23            config: ParserConfig::default(),
24            metrics: Mutex::new(ParserMetrics::default()),
25        }
26    }
27
28    /// Create a new Python parser with custom configuration
29    pub fn with_config(config: ParserConfig) -> Self {
30        Self {
31            config,
32            metrics: Mutex::new(ParserMetrics::default()),
33        }
34    }
35
36    /// Update metrics after parsing a file
37    fn update_metrics(
38        &self,
39        success: bool,
40        duration: Duration,
41        entities: usize,
42        relationships: usize,
43    ) {
44        let mut metrics = self.metrics.lock().unwrap();
45        metrics.files_attempted += 1;
46        if success {
47            metrics.files_succeeded += 1;
48        } else {
49            metrics.files_failed += 1;
50        }
51        metrics.total_parse_time += duration;
52        metrics.total_entities += entities;
53        metrics.total_relationships += relationships;
54    }
55
56    /// Convert CodeIR to graph nodes and return FileInfo
57    fn ir_to_graph(
58        &self,
59        ir: &codegraph_parser_api::CodeIR,
60        graph: &mut CodeGraph,
61        file_path: &Path,
62    ) -> Result<FileInfo, ParserError> {
63        use codegraph::{EdgeType, NodeType, PropertyMap};
64        use std::collections::HashMap;
65
66        let mut node_map: HashMap<String, NodeId> = HashMap::new();
67        let mut function_ids = Vec::new();
68        let mut class_ids = Vec::new();
69        let mut trait_ids = Vec::new();
70        let mut import_ids = Vec::new();
71
72        // Create module/file node
73        let file_id = if let Some(ref module) = ir.module {
74            let mut props = PropertyMap::new()
75                .with("name", module.name.clone())
76                .with("path", module.path.clone())
77                .with("language", module.language.clone())
78                .with("line_count", module.line_count.to_string());
79
80            if let Some(ref doc) = module.doc_comment {
81                props = props.with("doc", doc.clone());
82            }
83
84            let id = graph
85                .add_node(NodeType::CodeFile, props)
86                .map_err(|e| ParserError::GraphError(e.to_string()))?;
87            node_map.insert(module.name.clone(), id);
88            id
89        } else {
90            // Create a default file node
91            let file_name = file_path
92                .file_stem()
93                .and_then(|s| s.to_str())
94                .unwrap_or("unknown")
95                .to_string();
96            let props = PropertyMap::new()
97                .with("name", file_name.clone())
98                .with("path", file_path.display().to_string())
99                .with("language", "python");
100
101            let id = graph
102                .add_node(NodeType::CodeFile, props)
103                .map_err(|e| ParserError::GraphError(e.to_string()))?;
104            node_map.insert(file_name, id);
105            id
106        };
107
108        // Add functions
109        for func in &ir.functions {
110            let mut props = PropertyMap::new()
111                .with("name", func.name.clone())
112                .with("signature", func.signature.clone())
113                .with("visibility", func.visibility.clone())
114                .with("line_start", func.line_start.to_string())
115                .with("line_end", func.line_end.to_string())
116                .with("is_async", func.is_async.to_string())
117                .with("is_static", func.is_static.to_string());
118
119            if let Some(ref doc) = func.doc_comment {
120                props = props.with("doc", doc.clone());
121            }
122            if let Some(ref return_type) = func.return_type {
123                props = props.with("return_type", return_type.clone());
124            }
125
126            let func_id = graph
127                .add_node(NodeType::Function, props)
128                .map_err(|e| ParserError::GraphError(e.to_string()))?;
129
130            node_map.insert(func.name.clone(), func_id);
131            function_ids.push(func_id);
132
133            // Link function to file
134            graph
135                .add_edge(file_id, func_id, EdgeType::Contains, PropertyMap::new())
136                .map_err(|e| ParserError::GraphError(e.to_string()))?;
137        }
138
139        // Add classes
140        for class in &ir.classes {
141            let mut props = PropertyMap::new()
142                .with("name", class.name.clone())
143                .with("visibility", class.visibility.clone())
144                .with("line_start", class.line_start.to_string())
145                .with("line_end", class.line_end.to_string())
146                .with("is_abstract", class.is_abstract.to_string());
147
148            if let Some(ref doc) = class.doc_comment {
149                props = props.with("doc", doc.clone());
150            }
151
152            let class_id = graph
153                .add_node(NodeType::Class, props)
154                .map_err(|e| ParserError::GraphError(e.to_string()))?;
155
156            node_map.insert(class.name.clone(), class_id);
157            class_ids.push(class_id);
158
159            // Link class to file
160            graph
161                .add_edge(file_id, class_id, EdgeType::Contains, PropertyMap::new())
162                .map_err(|e| ParserError::GraphError(e.to_string()))?;
163
164            // Add methods
165            for method in &class.methods {
166                let method_name = format!("{}.{}", class.name, method.name);
167                let mut method_props = PropertyMap::new()
168                    .with("name", method_name.clone())
169                    .with("signature", method.signature.clone())
170                    .with("visibility", method.visibility.clone())
171                    .with("line_start", method.line_start.to_string())
172                    .with("line_end", method.line_end.to_string())
173                    .with("is_method", "true")
174                    .with("parent_class", class.name.clone());
175
176                if let Some(ref doc) = method.doc_comment {
177                    method_props = method_props.with("doc", doc.clone());
178                }
179
180                let method_id = graph
181                    .add_node(NodeType::Function, method_props)
182                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
183
184                node_map.insert(method_name, method_id);
185                function_ids.push(method_id);
186
187                // Link method to class
188                graph
189                    .add_edge(class_id, method_id, EdgeType::Contains, PropertyMap::new())
190                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
191            }
192        }
193
194        // Add traits (protocols in Python)
195        for trait_entity in &ir.traits {
196            let mut props = PropertyMap::new()
197                .with("name", trait_entity.name.clone())
198                .with("visibility", trait_entity.visibility.clone())
199                .with("line_start", trait_entity.line_start.to_string())
200                .with("line_end", trait_entity.line_end.to_string());
201
202            if let Some(ref doc) = trait_entity.doc_comment {
203                props = props.with("doc", doc.clone());
204            }
205
206            let trait_id = graph
207                .add_node(NodeType::Interface, props)
208                .map_err(|e| ParserError::GraphError(e.to_string()))?;
209
210            node_map.insert(trait_entity.name.clone(), trait_id);
211            trait_ids.push(trait_id);
212
213            // Link trait to file
214            graph
215                .add_edge(file_id, trait_id, EdgeType::Contains, PropertyMap::new())
216                .map_err(|e| ParserError::GraphError(e.to_string()))?;
217        }
218
219        // Add import nodes and relationships
220        for import in &ir.imports {
221            let imported_module = &import.imported;
222
223            // Create or get import node
224            let import_id = if let Some(&existing_id) = node_map.get(imported_module) {
225                existing_id
226            } else {
227                let props = PropertyMap::new()
228                    .with("name", imported_module.clone())
229                    .with("is_external", "true");
230
231                let id = graph
232                    .add_node(NodeType::Module, props)
233                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
234                node_map.insert(imported_module.clone(), id);
235                id
236            };
237
238            import_ids.push(import_id);
239
240            // Create import edge from file to imported module
241            let mut edge_props = PropertyMap::new();
242            if let Some(ref alias) = import.alias {
243                edge_props = edge_props.with("alias", alias.clone());
244            }
245            if import.is_wildcard {
246                edge_props = edge_props.with("is_wildcard", "true");
247            }
248            if !import.symbols.is_empty() {
249                edge_props = edge_props.with("symbols", import.symbols.join(","));
250            }
251            graph
252                .add_edge(file_id, import_id, EdgeType::Imports, edge_props)
253                .map_err(|e| ParserError::GraphError(e.to_string()))?;
254        }
255
256        // Add call relationships
257        for call in &ir.calls {
258            if let (Some(&caller_id), Some(&callee_id)) =
259                (node_map.get(&call.caller), node_map.get(&call.callee))
260            {
261                let edge_props = PropertyMap::new()
262                    .with("call_site_line", call.call_site_line.to_string())
263                    .with("is_direct", call.is_direct.to_string());
264
265                graph
266                    .add_edge(caller_id, callee_id, EdgeType::Calls, edge_props)
267                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
268            }
269        }
270
271        // Add inheritance relationships
272        for inheritance in &ir.inheritance {
273            if let (Some(&child_id), Some(&parent_id)) = (
274                node_map.get(&inheritance.child),
275                node_map.get(&inheritance.parent),
276            ) {
277                let edge_props = PropertyMap::new().with("order", inheritance.order.to_string());
278
279                graph
280                    .add_edge(child_id, parent_id, EdgeType::Extends, edge_props)
281                    .map_err(|e| ParserError::GraphError(e.to_string()))?;
282            }
283        }
284
285        // Count source lines
286        let line_count = if let Some(ref module) = ir.module {
287            module.line_count
288        } else {
289            0
290        };
291
292        Ok(FileInfo {
293            file_path: file_path.to_path_buf(),
294            file_id,
295            functions: function_ids,
296            classes: class_ids,
297            traits: trait_ids,
298            imports: import_ids,
299            parse_time: Duration::ZERO, // Will be set by caller
300            line_count,
301            byte_count: 0, // Will be set by caller
302        })
303    }
304}
305
306impl Default for PythonParser {
307    fn default() -> Self {
308        Self::new()
309    }
310}
311
312impl CodeParser for PythonParser {
313    fn language(&self) -> &str {
314        "python"
315    }
316
317    fn file_extensions(&self) -> &[&str] {
318        &[".py", ".pyw"]
319    }
320
321    fn parse_file(&self, path: &Path, graph: &mut CodeGraph) -> Result<FileInfo, ParserError> {
322        let start = Instant::now();
323
324        // Check file extension
325        if !self.can_parse(path) {
326            return Err(ParserError::ParseError(
327                path.to_path_buf(),
328                "Invalid file extension for Python parser".to_string(),
329            ));
330        }
331
332        // Read file
333        let source = std::fs::read_to_string(path)
334            .map_err(|e| ParserError::IoError(path.to_path_buf(), e))?;
335
336        // Check file size
337        let byte_count = source.len();
338        if byte_count > self.config.max_file_size {
339            self.update_metrics(false, start.elapsed(), 0, 0);
340            return Err(ParserError::FileTooLarge(path.to_path_buf(), byte_count));
341        }
342
343        // Parse source
344        let mut file_info = self.parse_source(&source, path, graph)?;
345        file_info.byte_count = byte_count;
346
347        Ok(file_info)
348    }
349
350    fn parse_source(
351        &self,
352        source: &str,
353        file_path: &Path,
354        graph: &mut CodeGraph,
355    ) -> Result<FileInfo, ParserError> {
356        let start = Instant::now();
357
358        // Check size limit
359        if source.len() > self.config.max_file_size {
360            self.update_metrics(false, start.elapsed(), 0, 0);
361            return Err(ParserError::FileTooLarge(
362                file_path.to_path_buf(),
363                source.len(),
364            ));
365        }
366
367        // Extract entities using existing extractor
368        // Convert ParserConfig to old config format
369        let old_config = crate::config::ParserConfig {
370            include_private: !self.config.skip_private,
371            include_tests: !self.config.skip_tests,
372            max_file_size: self.config.max_file_size,
373            parallel: self.config.parallel,
374            num_threads: self.config.parallel_workers,
375            ..Default::default()
376        };
377
378        let ir = crate::extractor::extract(source, file_path, &old_config).map_err(|e| {
379            self.update_metrics(false, start.elapsed(), 0, 0);
380            ParserError::ParseError(file_path.to_path_buf(), e)
381        })?;
382
383        // Count entities and relationships
384        let entity_count = ir.entity_count();
385        let relationship_count = ir.relationship_count();
386
387        // Convert IR to graph
388        let mut file_info = self.ir_to_graph(&ir, graph, file_path)?;
389
390        // Set timing and update metrics
391        let duration = start.elapsed();
392        file_info.parse_time = duration;
393        file_info.byte_count = source.len();
394
395        self.update_metrics(true, duration, entity_count, relationship_count);
396
397        Ok(file_info)
398    }
399
400    fn config(&self) -> &ParserConfig {
401        &self.config
402    }
403
404    fn metrics(&self) -> ParserMetrics {
405        self.metrics.lock().unwrap().clone()
406    }
407
408    fn reset_metrics(&mut self) {
409        *self.metrics.lock().unwrap() = ParserMetrics::default();
410    }
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416
417    #[test]
418    fn test_python_parser_new() {
419        let parser = PythonParser::new();
420        assert_eq!(parser.language(), "python");
421    }
422
423    #[test]
424    fn test_python_parser_file_extensions() {
425        let parser = PythonParser::new();
426        let exts = parser.file_extensions();
427        assert_eq!(exts.len(), 2);
428        assert!(exts.contains(&".py"));
429        assert!(exts.contains(&".pyw"));
430    }
431
432    #[test]
433    fn test_python_parser_can_parse() {
434        let parser = PythonParser::new();
435        assert!(parser.can_parse(Path::new("test.py")));
436        assert!(parser.can_parse(Path::new("test.pyw")));
437        assert!(!parser.can_parse(Path::new("test.rs")));
438        assert!(!parser.can_parse(Path::new("test.txt")));
439    }
440
441    #[test]
442    fn test_metrics_initial_state() {
443        let parser = PythonParser::new();
444        let metrics = parser.metrics();
445        assert_eq!(metrics.files_attempted, 0);
446        assert_eq!(metrics.files_succeeded, 0);
447        assert_eq!(metrics.files_failed, 0);
448    }
449}