qex_core/chunk/languages/
python.rs1use super::{extract_preceding_comments, find_name, NodeMetadata, LanguageChunker};
2use crate::chunk::ChunkType;
3
4pub struct PythonChunker;
5
6impl LanguageChunker for PythonChunker {
7 fn tree_sitter_language(&self) -> tree_sitter::Language {
8 tree_sitter_python::LANGUAGE.into()
9 }
10
11 fn language_name(&self) -> &str {
12 "python"
13 }
14
15 fn file_extensions(&self) -> &[&str] {
16 &["py", "pyi"]
17 }
18
19 fn is_splittable(&self, node_type: &str) -> bool {
20 matches!(
21 node_type,
22 "function_definition" | "class_definition" | "decorated_definition"
23 )
24 }
25
26 fn has_nested_chunks(&self, node_type: &str) -> bool {
27 matches!(node_type, "class_definition" | "decorated_definition")
28 }
29
30 fn classify_node(&self, node_type: &str, parent_name: Option<&str>) -> ChunkType {
31 match node_type {
32 "class_definition" => ChunkType::Class,
33 "function_definition" if parent_name.is_some() => ChunkType::Method,
34 "function_definition" => ChunkType::Function,
35 "decorated_definition" => ChunkType::Function,
36 _ => ChunkType::ModuleLevel,
37 }
38 }
39
40 fn extract_metadata(&self, node: tree_sitter::Node, source: &str) -> NodeMetadata {
41 let mut meta = NodeMetadata::default();
42
43 match node.kind() {
44 "function_definition" => {
45 meta.name = find_name(node, source);
46 meta.is_async = {
47 let text = &source[node.start_byte()..node.end_byte()];
49 text.starts_with("async ")
50 };
51 meta.docstring = extract_python_docstring(node, source);
53 }
54 "class_definition" => {
55 meta.name = find_name(node, source);
56 meta.docstring = extract_python_docstring(node, source);
57 }
58 "decorated_definition" => {
59 let mut cursor = node.walk();
61 for child in node.children(&mut cursor) {
62 if child.kind() == "decorator" {
63 let text = &source[child.start_byte()..child.end_byte()];
64 meta.decorators.push(text.to_string());
65 } else if child.kind() == "function_definition" || child.kind() == "class_definition" {
66 let inner = self.extract_metadata(child, source);
67 meta.name = inner.name;
68 meta.docstring = inner.docstring;
69 meta.is_async = inner.is_async;
70 }
71 }
72 }
73 _ => {
74 meta.name = find_name(node, source);
75 meta.docstring = extract_preceding_comments(node, source);
76 }
77 }
78
79 meta
80 }
81}
82
83fn extract_python_docstring(node: tree_sitter::Node, source: &str) -> Option<String> {
84 let mut cursor = node.walk();
86 for child in node.children(&mut cursor) {
87 if child.kind() == "block" {
88 let mut block_cursor = child.walk();
89 for stmt in child.children(&mut block_cursor) {
90 if stmt.kind() == "expression_statement" {
91 let mut stmt_cursor = stmt.walk();
92 for expr in stmt.children(&mut stmt_cursor) {
93 if expr.kind() == "string" {
94 let text = &source[expr.start_byte()..expr.end_byte()];
95 let cleaned = text
96 .trim_start_matches("\"\"\"")
97 .trim_end_matches("\"\"\"")
98 .trim_start_matches("'''")
99 .trim_end_matches("'''")
100 .trim();
101 return Some(cleaned.to_string());
102 }
103 }
104 }
105 break;
107 }
108 }
109 }
110 None
111}