1use crate::docstring::parse_docstring;
4use crate::model::*;
5use std::path::Path;
6use tree_sitter::{Node, Parser};
7
8pub struct PythonParser {
9 parser: Parser,
10}
11
12impl PythonParser {
13 pub fn new() -> Self {
14 let mut parser = Parser::new();
15 parser
16 .set_language(&tree_sitter_python::LANGUAGE.into())
17 .expect("Failed to load Python grammar");
18 Self { parser }
19 }
20
21 pub fn parse_file(&mut self, path: &Path) -> crate::error::Result<PythonModule> {
28 use crate::error::PlisskenError;
29
30 let content =
31 std::fs::read_to_string(path).map_err(|e| PlisskenError::file_read(path, e))?;
32 self.parse_str(&content, path)
33 }
34
35 pub fn parse_str(&mut self, content: &str, path: &Path) -> crate::error::Result<PythonModule> {
41 use crate::error::PlisskenError;
42
43 let tree = self
44 .parser
45 .parse(content, None)
46 .ok_or_else(|| PlisskenError::python_parse(path, "failed to parse Python source"))?;
47
48 let root = tree.root_node();
49
50 let docstring = extract_module_docstring(&root, content);
52 let parsed_doc = docstring.as_ref().map(|d| parse_docstring(d));
54
55 let items = extract_module_items(&root, content, path);
57
58 Ok(PythonModule {
59 path: path.display().to_string(),
60 docstring,
61 parsed_doc,
62 items,
63 source_type: SourceType::Python,
64 source: SourceSpan::new(
65 path.to_path_buf(),
66 1,
67 content.lines().count().max(1),
68 content,
69 ),
70 })
71 }
72}
73
74impl Default for PythonParser {
75 fn default() -> Self {
76 Self::new()
77 }
78}
79
80fn extract_module_docstring(root: &Node, content: &str) -> Option<String> {
85 let mut cursor = root.walk();
87 for child in root.children(&mut cursor) {
88 if child.kind() == "expression_statement" {
89 let mut child_cursor = child.walk();
90 for expr in child.children(&mut child_cursor) {
91 if expr.kind() == "string" {
92 return extract_string_content(&expr, content);
93 }
94 }
95 } else if child.kind() != "comment" {
96 break;
98 }
99 }
100 None
101}
102
103fn extract_module_items(root: &Node, content: &str, path: &Path) -> Vec<PythonItem> {
104 let mut items = Vec::new();
105
106 let mut cursor = root.walk();
108 let children: Vec<_> = root.children(&mut cursor).collect();
109
110 let mut i = 0;
111 while i < children.len() {
112 let child = &children[i];
113 match child.kind() {
114 "class_definition" => {
115 items.push(PythonItem::Class(extract_class(child, content, path)));
116 }
117 "function_definition" => {
118 items.push(PythonItem::Function(extract_function(child, content, path)));
119 }
120 "decorated_definition" => {
121 let decorators = extract_decorators(child, content);
123 let mut inner_cursor = child.walk();
124 for inner in child.children(&mut inner_cursor) {
125 match inner.kind() {
126 "class_definition" => {
127 let mut class = extract_class(&inner, content, path);
128 class.decorators = decorators.clone();
129 items.push(PythonItem::Class(class));
130 }
131 "function_definition" => {
132 let mut func = extract_function(&inner, content, path);
133 func.decorators = decorators.clone();
134 for dec in &func.decorators {
136 if dec == "staticmethod" {
137 func.is_staticmethod = true;
138 } else if dec == "classmethod" {
139 func.is_classmethod = true;
140 } else if dec == "property" || dec.starts_with("property.") {
141 func.is_property = true;
142 }
143 }
144 items.push(PythonItem::Function(func));
145 }
146 _ => {}
147 }
148 }
149 }
150 "expression_statement" => {
151 let mut inner_cursor = child.walk();
153 for inner in child.children(&mut inner_cursor) {
154 if inner.kind() == "assignment"
155 && let Some(mut var) = extract_variable(&inner, content)
156 {
157 if i + 1 < children.len() {
160 let next = &children[i + 1];
161 if next.kind() == "expression_statement"
162 && let Some(docstring) = extract_expression_string(next, content)
163 {
164 var.docstring = Some(docstring);
165 i += 1; }
167 }
168 items.push(PythonItem::Variable(var));
169 }
170 }
171 }
172 _ => {}
173 }
174 i += 1;
175 }
176
177 items
178}
179
180fn extract_expression_string(node: &Node, content: &str) -> Option<String> {
182 let mut cursor = node.walk();
183 for child in node.children(&mut cursor) {
184 if child.kind() == "string" {
185 return extract_string_content(&child, content);
186 }
187 }
188 None
189}
190
191fn extract_class(node: &Node, content: &str, path: &Path) -> PythonClass {
192 let name = node
193 .child_by_field_name("name")
194 .map(|n| node_text(&n, content))
195 .unwrap_or_default();
196
197 let bases = extract_bases(node, content);
199
200 let body = node.child_by_field_name("body");
202 let (docstring, methods, attributes) = if let Some(body) = body {
203 extract_class_body(&body, content, path)
204 } else {
205 (None, vec![], vec![])
206 };
207 let parsed_doc = docstring.as_ref().map(|d| parse_docstring(d));
209
210 let start_line = node.start_position().row + 1;
211 let end_line = node.end_position().row + 1;
212 let source_text = extract_source_text(node, content);
213
214 PythonClass {
215 name,
216 docstring,
217 parsed_doc,
218 bases,
219 methods,
220 attributes,
221 decorators: vec![],
222 rust_impl: None,
223 source: SourceSpan {
224 location: SourceLocation {
225 file: path.to_path_buf(),
226 line_start: start_line,
227 line_end: end_line,
228 },
229 source: source_text,
230 },
231 }
232}
233
234fn extract_bases(node: &Node, content: &str) -> Vec<String> {
235 let mut bases = Vec::new();
236
237 if let Some(args) = node.child_by_field_name("superclasses") {
238 let mut cursor = args.walk();
239 for child in args.children(&mut cursor) {
240 match child.kind() {
241 "identifier" | "attribute" => {
242 bases.push(node_text(&child, content));
243 }
244 "argument_list" => {
245 let mut inner_cursor = child.walk();
247 for inner in child.children(&mut inner_cursor) {
248 if inner.kind() == "identifier" || inner.kind() == "attribute" {
249 bases.push(node_text(&inner, content));
250 }
251 }
252 }
253 _ => {}
254 }
255 }
256 }
257
258 bases
259}
260
261fn extract_class_body(
262 body: &Node,
263 content: &str,
264 path: &Path,
265) -> (Option<String>, Vec<PythonFunction>, Vec<PythonVariable>) {
266 let mut docstring = None;
267 let mut methods = Vec::new();
268 let mut attributes = Vec::new();
269 let mut first_item = true;
270
271 let mut cursor = body.walk();
272 for child in body.children(&mut cursor) {
273 match child.kind() {
274 "expression_statement" if first_item => {
275 let mut inner_cursor = child.walk();
277 for inner in child.children(&mut inner_cursor) {
278 if inner.kind() == "string" {
279 docstring = extract_string_content(&inner, content);
280 }
281 }
282 first_item = false;
283 }
284 "function_definition" => {
285 methods.push(extract_function(&child, content, path));
286 first_item = false;
287 }
288 "decorated_definition" => {
289 let decorators = extract_decorators(&child, content);
290 let mut inner_cursor = child.walk();
291 for inner in child.children(&mut inner_cursor) {
292 if inner.kind() == "function_definition" {
293 let mut func = extract_function(&inner, content, path);
294 func.decorators = decorators.clone();
295 for dec in &func.decorators {
297 if dec == "staticmethod" {
298 func.is_staticmethod = true;
299 } else if dec == "classmethod" {
300 func.is_classmethod = true;
301 } else if dec == "property" || dec.starts_with("property.") {
302 func.is_property = true;
303 }
304 }
305 methods.push(func);
306 }
307 }
308 first_item = false;
309 }
310 "expression_statement" => {
311 let mut inner_cursor = child.walk();
313 for inner in child.children(&mut inner_cursor) {
314 if inner.kind() == "assignment"
315 && let Some(var) = extract_variable(&inner, content)
316 {
317 attributes.push(var);
318 }
319 }
320 first_item = false;
321 }
322 _ => {
323 if child.kind() != "comment" && child.kind() != "pass_statement" {
324 first_item = false;
325 }
326 }
327 }
328 }
329
330 (docstring, methods, attributes)
331}
332
333fn extract_function(node: &Node, content: &str, path: &Path) -> PythonFunction {
334 let name = node
335 .child_by_field_name("name")
336 .map(|n| node_text(&n, content))
337 .unwrap_or_default();
338
339 let is_async = node.kind() == "function_definition"
340 && node.child(0).map(|c| c.kind() == "async").unwrap_or(false);
341
342 let (params, signature_str) = extract_parameters(node, content);
344
345 let return_type = node
347 .child_by_field_name("return_type")
348 .map(|n| node_text(&n, content));
349
350 let docstring = node
352 .child_by_field_name("body")
353 .and_then(|body| extract_function_docstring(&body, content));
354 let parsed_doc = docstring.as_ref().map(|d| parse_docstring(d));
356
357 let start_line = node.start_position().row + 1;
358 let end_line = node.end_position().row + 1;
359 let source_text = extract_source_text(node, content);
360
361 let full_sig = if let Some(ret) = &return_type {
363 format!("def {}({}) -> {}", name, signature_str, ret)
364 } else {
365 format!("def {}({})", name, signature_str)
366 };
367
368 PythonFunction {
369 name,
370 docstring,
371 signature_str: full_sig,
372 signature: PythonFunctionSig {
373 params,
374 return_type,
375 },
376 decorators: vec![],
377 is_async,
378 is_staticmethod: false,
379 is_classmethod: false,
380 is_property: false,
381 parsed_doc,
382 rust_impl: None,
383 source: SourceSpan {
384 location: SourceLocation {
385 file: path.to_path_buf(),
386 line_start: start_line,
387 line_end: end_line,
388 },
389 source: source_text,
390 },
391 }
392}
393
394fn extract_parameters(node: &Node, content: &str) -> (Vec<PythonParam>, String) {
395 let mut params = Vec::new();
396 let mut param_strs = Vec::new();
397
398 if let Some(params_node) = node.child_by_field_name("parameters") {
399 let mut cursor = params_node.walk();
400 for child in params_node.children(&mut cursor) {
401 match child.kind() {
402 "identifier" => {
403 let name = node_text(&child, content);
404 param_strs.push(name.clone());
405 params.push(PythonParam {
406 name,
407 ty: None,
408 default: None,
409 });
410 }
411 "typed_parameter" => {
412 let name = child
413 .child_by_field_name("name")
414 .or_else(|| child.child(0))
415 .map(|n| node_text(&n, content))
416 .unwrap_or_default();
417 let ty = child
418 .child_by_field_name("type")
419 .map(|n| node_text(&n, content));
420
421 let param_str = if let Some(ref t) = ty {
422 format!("{}: {}", name, t)
423 } else {
424 name.clone()
425 };
426 param_strs.push(param_str);
427
428 params.push(PythonParam {
429 name,
430 ty,
431 default: None,
432 });
433 }
434 "default_parameter" => {
435 let name = child
436 .child_by_field_name("name")
437 .or_else(|| child.child(0))
438 .map(|n| node_text(&n, content))
439 .unwrap_or_default();
440 let value = child
441 .child_by_field_name("value")
442 .map(|n| node_text(&n, content));
443
444 let param_str = if let Some(ref v) = value {
445 format!("{}={}", name, v)
446 } else {
447 name.clone()
448 };
449 param_strs.push(param_str);
450
451 params.push(PythonParam {
452 name,
453 ty: None,
454 default: value,
455 });
456 }
457 "typed_default_parameter" => {
458 let name = child
459 .child_by_field_name("name")
460 .or_else(|| child.child(0))
461 .map(|n| node_text(&n, content))
462 .unwrap_or_default();
463 let ty = child
464 .child_by_field_name("type")
465 .map(|n| node_text(&n, content));
466 let value = child
467 .child_by_field_name("value")
468 .map(|n| node_text(&n, content));
469
470 let param_str = match (&ty, &value) {
471 (Some(t), Some(v)) => format!("{}: {} = {}", name, t, v),
472 (Some(t), None) => format!("{}: {}", name, t),
473 (None, Some(v)) => format!("{} = {}", name, v),
474 (None, None) => name.clone(),
475 };
476 param_strs.push(param_str);
477
478 params.push(PythonParam {
479 name,
480 ty,
481 default: value,
482 });
483 }
484 "list_splat_pattern" | "dictionary_splat_pattern" => {
485 let text = node_text(&child, content);
486 param_strs.push(text.clone());
487 params.push(PythonParam {
488 name: text,
489 ty: None,
490 default: None,
491 });
492 }
493 "*" => {
494 param_strs.push("*".to_string());
495 }
496 "/" => {
497 param_strs.push("/".to_string());
498 }
499 _ => {}
500 }
501 }
502 }
503
504 (params, param_strs.join(", "))
505}
506
507fn extract_function_docstring(body: &Node, content: &str) -> Option<String> {
508 let mut cursor = body.walk();
509 for child in body.children(&mut cursor) {
510 if child.kind() == "expression_statement" {
511 let mut inner_cursor = child.walk();
512 for inner in child.children(&mut inner_cursor) {
513 if inner.kind() == "string" {
514 return extract_string_content(&inner, content);
515 }
516 }
517 }
518 if child.kind() != "comment" {
520 break;
521 }
522 }
523 None
524}
525
526fn extract_decorators(node: &Node, content: &str) -> Vec<String> {
527 let mut decorators = Vec::new();
528 let mut cursor = node.walk();
529
530 for child in node.children(&mut cursor) {
531 if child.kind() == "decorator" {
532 let text = node_text(&child, content);
534 let decorator = text.strip_prefix('@').unwrap_or(&text).to_string();
535 decorators.push(decorator);
536 }
537 }
538
539 decorators
540}
541
542fn extract_variable(node: &Node, content: &str) -> Option<PythonVariable> {
543 let left = node.child_by_field_name("left")?;
545
546 if left.kind() == "identifier" {
547 let name = node_text(&left, content);
548 let ty = node
549 .child_by_field_name("type")
550 .map(|n| node_text(&n, content));
551 let value = node
552 .child_by_field_name("right")
553 .map(|n| node_text(&n, content));
554
555 return Some(PythonVariable {
556 name,
557 ty,
558 value,
559 docstring: None,
560 });
561 }
562
563 None
564}
565
566fn extract_string_content(node: &Node, content: &str) -> Option<String> {
567 let text = node_text(node, content);
568
569 let trimmed = if text.starts_with("\"\"\"") || text.starts_with("'''") {
571 text.trim_start_matches("\"\"\"")
572 .trim_start_matches("'''")
573 .trim_end_matches("\"\"\"")
574 .trim_end_matches("'''")
575 } else if text.starts_with('"') || text.starts_with('\'') {
576 text.trim_start_matches('"')
577 .trim_start_matches('\'')
578 .trim_end_matches('"')
579 .trim_end_matches('\'')
580 } else {
581 &text
582 };
583
584 let trimmed = trimmed
586 .trim_start_matches('r')
587 .trim_start_matches('f')
588 .trim_start_matches('b');
589
590 if trimmed.is_empty() {
591 None
592 } else {
593 Some(dedent(trimmed))
595 }
596}
597
598fn dedent(text: &str) -> String {
601 let lines: Vec<&str> = text.lines().collect();
602 if lines.is_empty() {
603 return text.to_string();
604 }
605
606 let min_indent = lines
608 .iter()
609 .skip(1) .filter(|line| !line.trim().is_empty())
611 .map(|line| line.len() - line.trim_start().len())
612 .min()
613 .unwrap_or(0);
614
615 if min_indent == 0 {
616 return text.to_string();
617 }
618
619 let mut result = Vec::new();
621 for (i, line) in lines.iter().enumerate() {
622 if i == 0 {
623 result.push(line.trim().to_string());
624 } else if line.len() >= min_indent {
625 result.push(line[min_indent..].to_string());
626 } else {
627 result.push(line.trim().to_string());
628 }
629 }
630 result.join("\n")
631}
632
633fn extract_source_text(node: &Node, content: &str) -> String {
634 let start = node.start_byte();
635 let end = node.end_byte();
636 content[start..end].to_string()
637}
638
639fn node_text(node: &Node, content: &str) -> String {
640 let start = node.start_byte();
641 let end = node.end_byte();
642 content[start..end].to_string()
643}
644
645impl super::traits::Parser for PythonParser {
650 fn parse_file(&mut self, path: &Path) -> crate::error::Result<super::traits::Module> {
651 PythonParser::parse_file(self, path).map(super::traits::Module::Python)
652 }
653
654 fn parse_str(
655 &mut self,
656 content: &str,
657 virtual_path: &Path,
658 ) -> crate::error::Result<super::traits::Module> {
659 PythonParser::parse_str(self, content, virtual_path).map(super::traits::Module::Python)
660 }
661
662 fn language(&self) -> super::traits::ParserLanguage {
663 super::traits::ParserLanguage::Python
664 }
665
666 fn name(&self) -> &'static str {
667 "Python"
668 }
669
670 fn extensions(&self) -> &'static [&'static str] {
671 &["py", "pyi"]
672 }
673}
674
675#[cfg(test)]
676mod tests {
677 use super::*;
678
679 #[test]
680 fn test_parse_empty() {
681 let mut parser = PythonParser::new();
682 let result = parser.parse_str("", Path::new("test.py"));
683 assert!(result.is_ok());
684 }
685
686 #[test]
687 fn test_parse_module_docstring() {
688 let mut parser = PythonParser::new();
689 let code = r#""""A test module.
690
691This module does testing.
692"""
693
694def foo():
695 pass
696"#;
697 let result = parser.parse_str(code, Path::new("test.py")).unwrap();
698 assert!(result.docstring.is_some());
699 assert!(result.docstring.as_ref().unwrap().contains("test module"));
700 }
701
702 #[test]
703 fn test_parse_class() {
704 let mut parser = PythonParser::new();
705 let code = r#"
706class MyClass(Base):
707 """A test class."""
708
709 def __init__(self, value: int):
710 """Initialize."""
711 self.value = value
712
713 def get_value(self) -> int:
714 """Get the value."""
715 return self.value
716"#;
717 let result = parser.parse_str(code, Path::new("test.py")).unwrap();
718 assert_eq!(result.items.len(), 1);
719
720 if let PythonItem::Class(c) = &result.items[0] {
721 assert_eq!(c.name, "MyClass");
722 assert!(c.docstring.as_ref().unwrap().contains("test class"));
723 assert_eq!(c.bases, vec!["Base"]);
724 assert_eq!(c.methods.len(), 2);
725 assert_eq!(c.methods[0].name, "__init__");
726 assert_eq!(c.methods[1].name, "get_value");
727 } else {
728 panic!("Expected class");
729 }
730 }
731
732 #[test]
733 fn test_parse_function_with_types() {
734 let mut parser = PythonParser::new();
735 let code = r#"
736def process(data: List[str], count: int = 10) -> Optional[str]:
737 """Process data."""
738 pass
739"#;
740 let result = parser.parse_str(code, Path::new("test.py")).unwrap();
741
742 if let PythonItem::Function(f) = &result.items[0] {
743 assert_eq!(f.name, "process");
744 assert!(f.docstring.as_ref().unwrap().contains("Process data"));
745 assert_eq!(f.signature.params.len(), 2);
746 assert_eq!(f.signature.params[0].name, "data");
747 assert_eq!(f.signature.params[0].ty, Some("List[str]".to_string()));
748 assert_eq!(f.signature.params[1].name, "count");
749 assert_eq!(f.signature.params[1].default, Some("10".to_string()));
750 assert_eq!(f.signature.return_type, Some("Optional[str]".to_string()));
751 } else {
752 panic!("Expected function");
753 }
754 }
755
756 #[test]
757 fn test_parse_decorated_class() {
758 let mut parser = PythonParser::new();
759 let code = r#"
760@dataclass
761class Point:
762 """A point in 2D space."""
763 x: float
764 y: float
765"#;
766 let result = parser.parse_str(code, Path::new("test.py")).unwrap();
767
768 if let PythonItem::Class(c) = &result.items[0] {
769 assert_eq!(c.name, "Point");
770 assert_eq!(c.decorators, vec!["dataclass"]);
771 } else {
772 panic!("Expected class");
773 }
774 }
775
776 #[test]
777 fn test_parse_property() {
778 let mut parser = PythonParser::new();
779 let code = r#"
780class Foo:
781 @property
782 def value(self) -> int:
783 return self._value
784
785 @staticmethod
786 def create() -> "Foo":
787 return Foo()
788"#;
789 let result = parser.parse_str(code, Path::new("test.py")).unwrap();
790
791 if let PythonItem::Class(c) = &result.items[0] {
792 assert_eq!(c.methods.len(), 2);
793 assert!(c.methods[0].is_property);
794 assert!(c.methods[1].is_staticmethod);
795 } else {
796 panic!("Expected class");
797 }
798 }
799
800 #[test]
801 fn test_parse_async_function() {
802 let mut parser = PythonParser::new();
803 let code = r#"
804async def fetch(url: str) -> bytes:
805 """Fetch data from URL."""
806 pass
807"#;
808 let result = parser.parse_str(code, Path::new("test.py")).unwrap();
809
810 if let PythonItem::Function(f) = &result.items[0] {
811 assert!(f.is_async);
812 assert_eq!(f.name, "fetch");
813 } else {
814 panic!("Expected function");
815 }
816 }
817
818 #[test]
819 fn test_parse_pure_python_fixture() {
820 use crate::test_fixtures::pure_python;
821
822 let mut parser = PythonParser::new();
823 let fixture_path = pure_python::scheduler();
824
825 let result = parser.parse_file(&fixture_path).unwrap();
826
827 assert!(result.docstring.is_some());
829 assert!(
830 result
831 .docstring
832 .as_ref()
833 .unwrap()
834 .contains("Task scheduler")
835 );
836
837 let scheduler = result.items.iter().find_map(|i| {
839 if let PythonItem::Class(c) = i {
840 if c.name == "Scheduler" {
841 return Some(c);
842 }
843 }
844 None
845 });
846
847 assert!(scheduler.is_some(), "Scheduler class not found");
848 let scheduler = scheduler.unwrap();
849 assert!(scheduler.docstring.is_some());
850 assert!(scheduler.methods.len() >= 5, "Expected at least 5 methods");
851
852 let init = scheduler.methods.iter().find(|m| m.name == "__init__");
854 assert!(init.is_some(), "__init__ method not found");
855
856 let has_property = scheduler.methods.iter().any(|m| m.is_property);
858 assert!(has_property, "Expected at least one property");
859 }
860
861 #[test]
862 fn test_parse_enum_class() {
863 use crate::test_fixtures::pure_python;
864
865 let mut parser = PythonParser::new();
866 let fixture_path = pure_python::task();
867
868 let result = parser.parse_file(&fixture_path).unwrap();
869
870 let task_status = result.items.iter().find_map(|i| {
872 if let PythonItem::Class(c) = i {
873 if c.name == "TaskStatus" {
874 return Some(c);
875 }
876 }
877 None
878 });
879
880 assert!(task_status.is_some(), "TaskStatus class not found");
881 let task_status = task_status.unwrap();
882 assert!(task_status.bases.iter().any(|b| b == "Enum"));
883 }
884}