1use crate::{ContainerBody, Import, Language, LanguageSymbols, Visibility};
4use tree_sitter::Node;
5
6pub struct Python;
12
13impl Language for Python {
14 fn name(&self) -> &'static str {
15 "Python"
16 }
17 fn extensions(&self) -> &'static [&'static str] {
18 &["py", "pyi", "pyw"]
19 }
20 fn grammar_name(&self) -> &'static str {
21 "python"
22 }
23
24 fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
25 Some(self)
26 }
27
28 fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
29 extract_docstring(node, content)
30 }
31
32 fn extract_implements(&self, node: &Node, content: &str) -> crate::ImplementsInfo {
33 let mut implements = Vec::new();
34 if let Some(superclasses) = node.child_by_field_name("superclasses") {
35 let mut cursor = superclasses.walk();
36 for child in superclasses.children(&mut cursor) {
37 if child.kind() == "identifier" {
38 implements.push(content[child.byte_range()].to_string());
39 }
40 }
41 }
42 crate::ImplementsInfo {
43 is_interface: false,
44 implements,
45 }
46 }
47
48 fn build_signature(&self, node: &Node, content: &str) -> String {
49 let name = match self.node_name(node, content) {
50 Some(n) => n,
51 None => {
52 return content[node.byte_range()]
53 .lines()
54 .next()
55 .unwrap_or("")
56 .trim()
57 .to_string();
58 }
59 };
60
61 if node.kind() == "class_definition" {
62 let bases = node
63 .child_by_field_name("superclasses")
64 .map(|b| &content[b.byte_range()])
65 .unwrap_or("");
66 if bases.is_empty() {
67 format!("class {}", name)
68 } else {
69 format!("class {}{}", name, bases)
70 }
71 } else {
72 let is_async = node
74 .child(0)
75 .map(|c| &content[c.byte_range()] == "async")
76 .unwrap_or(false);
77 let prefix = if is_async { "async def" } else { "def" };
78 let params = node
79 .child_by_field_name("parameters")
80 .map(|p| &content[p.byte_range()])
81 .unwrap_or("()");
82 let return_type = node
83 .child_by_field_name("return_type")
84 .map(|r| format!(" -> {}", &content[r.byte_range()]))
85 .unwrap_or_default();
86 format!("{} {}{}{}", prefix, name, params, return_type)
87 }
88 }
89
90 fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
91 let line = node.start_position().row + 1;
92
93 match node.kind() {
94 "import_statement" => {
95 let mut imports = Vec::new();
97 let mut cursor = node.walk();
98 for child in node.children(&mut cursor) {
99 if child.kind() == "dotted_name" {
100 let module = content[child.byte_range()].to_string();
101 imports.push(Import {
102 module,
103 names: Vec::new(),
104 alias: None,
105 is_wildcard: false,
106 is_relative: false,
107 line,
108 });
109 } else if child.kind() == "aliased_import"
110 && let Some(name) = child.child_by_field_name("name")
111 {
112 let module = content[name.byte_range()].to_string();
113 let alias = child
114 .child_by_field_name("alias")
115 .map(|a| content[a.byte_range()].to_string());
116 imports.push(Import {
117 module,
118 names: Vec::new(),
119 alias,
120 is_wildcard: false,
121 is_relative: false,
122 line,
123 });
124 }
125 }
126 imports
127 }
128 "import_from_statement" => {
129 let module = node
131 .child_by_field_name("module_name")
132 .map(|m| content[m.byte_range()].to_string())
133 .unwrap_or_default();
134
135 let text = &content[node.byte_range()];
137 let is_relative = text.starts_with("from .");
138
139 let mut names = Vec::new();
140 let mut is_wildcard = false;
141 let module_end = node
142 .child_by_field_name("module_name")
143 .map(|m| m.end_byte())
144 .unwrap_or(0);
145
146 let mut cursor = node.walk();
147 for child in node.children(&mut cursor) {
148 match child.kind() {
149 "dotted_name" | "identifier" if child.start_byte() > module_end => {
150 names.push(content[child.byte_range()].to_string());
151 }
152 "aliased_import" => {
153 if let Some(name) = child.child_by_field_name("name") {
154 names.push(content[name.byte_range()].to_string());
155 }
156 }
157 "wildcard_import" => {
158 is_wildcard = true;
159 }
160 _ => {}
161 }
162 }
163
164 vec![Import {
165 module,
166 names,
167 alias: None,
168 is_wildcard,
169 is_relative,
170 line,
171 }]
172 }
173 _ => Vec::new(),
174 }
175 }
176
177 fn format_import(&self, import: &Import, names: Option<&[&str]>) -> String {
178 let names_to_use: Vec<&str> = names
179 .map(|n| n.to_vec())
180 .unwrap_or_else(|| import.names.iter().map(|s| s.as_str()).collect());
181
182 if import.is_wildcard {
183 format!("from {} import *", import.module)
184 } else if names_to_use.is_empty() {
185 if let Some(ref alias) = import.alias {
186 format!("import {} as {}", import.module, alias)
187 } else {
188 format!("import {}", import.module)
189 }
190 } else {
191 format!("from {} import {}", import.module, names_to_use.join(", "))
192 }
193 }
194
195 fn extract_attributes(&self, node: &Node, content: &str) -> Vec<String> {
196 extract_decorators(node, content)
197 }
198
199 fn get_visibility(&self, node: &Node, content: &str) -> Visibility {
200 if let Some(name) = self.node_name(node, content) {
201 if name.starts_with("__") && name.ends_with("__") {
202 Visibility::Public } else if name.starts_with("__") {
204 Visibility::Private } else if name.starts_with('_') {
206 Visibility::Protected } else {
208 Visibility::Public
209 }
210 } else {
211 Visibility::Public
212 }
213 }
214
215 fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
216 let name = symbol.name.as_str();
217 match symbol.kind {
218 crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
219 crate::SymbolKind::Class => name.starts_with("Test") && name.len() > 4,
220 crate::SymbolKind::Module => name == "tests" || name == "test" || name == "__tests__",
221 _ => false,
222 }
223 }
224
225 fn test_file_globs(&self) -> &'static [&'static str] {
226 &["**/test_*.py", "**/*_test.py"]
227 }
228
229 fn extract_module_doc(&self, src: &str) -> Option<String> {
230 extract_python_module_doc(src)
231 }
232
233 fn body_has_docstring(&self, body: &Node, content: &str) -> bool {
234 let _ = content;
235 body.child(0)
236 .map(|c| {
237 c.kind() == "string"
238 || (c.kind() == "expression_statement"
239 && c.child(0).map(|n| n.kind() == "string").unwrap_or(false))
240 })
241 .unwrap_or(false)
242 }
243
244 fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
245 node.child_by_field_name("body")
246 }
247
248 fn analyze_container_body(
249 &self,
250 body_node: &Node,
251 content: &str,
252 inner_indent: &str,
253 ) -> Option<ContainerBody> {
254 let mut cursor = body_node.walk();
255 let children: Vec<_> = body_node.children(&mut cursor).collect();
256
257 if children.is_empty() {
258 return Some(ContainerBody {
259 content_start: body_node.start_byte(),
260 content_end: body_node.end_byte(),
261 inner_indent: inner_indent.to_string(),
262 is_empty: true,
263 });
264 }
265
266 let mut first_real_idx = 0;
267 for (i, child) in children.iter().enumerate() {
268 let is_docstring = if child.kind() == "expression_statement" {
269 let mut child_cursor = child.walk();
270 child
271 .children(&mut child_cursor)
272 .next()
273 .map(|fc| fc.kind() == "string")
274 .unwrap_or(false)
275 } else {
276 child.kind() == "string"
277 };
278 if is_docstring && i == 0 {
279 first_real_idx = i + 1;
280 continue;
281 }
282 break;
283 }
284
285 let is_empty = children.iter().skip(first_real_idx).all(|c| {
286 c.kind() == "pass_statement"
287 || c.kind() == "string"
288 || (c.kind() == "expression_statement"
289 && c.child(0).map(|fc| fc.kind() == "string").unwrap_or(false))
290 });
291
292 let content_start = if first_real_idx < children.len() {
293 let child_start = children[first_real_idx].start_byte();
294 content[..child_start]
295 .rfind('\n')
296 .map(|i| i + 1)
297 .unwrap_or(child_start)
298 } else if !children.is_empty() {
299 let last_end = children.last().unwrap().end_byte();
301 if last_end < content.len() && content.as_bytes()[last_end] == b'\n' {
302 last_end + 1
303 } else {
304 last_end
305 }
306 } else {
307 body_node.start_byte()
308 };
309
310 Some(ContainerBody {
311 content_start,
312 content_end: body_node.end_byte(),
313 inner_indent: inner_indent.to_string(),
314 is_empty,
315 })
316 }
317}
318
319impl LanguageSymbols for Python {}
320
321fn extract_python_module_doc(src: &str) -> Option<String> {
326 let mut lines = src.lines().peekable();
327 loop {
329 match lines.peek() {
330 Some(line) => {
331 let t = line.trim();
332 if t.starts_with("#!") || t.starts_with("# -*-") || t.starts_with("# coding") {
333 lines.next();
334 } else {
335 break;
336 }
337 }
338 None => return None,
339 }
340 }
341 let remaining: String = lines.collect::<Vec<_>>().join("\n");
342 let trimmed = remaining.trim_start();
343
344 let (quote, rest) = if let Some(rest) = trimmed.strip_prefix("\"\"\"") {
346 ("\"\"\"", rest)
347 } else if let Some(rest) = trimmed.strip_prefix("'''") {
348 ("'''", rest)
349 } else {
350 return None;
351 };
352
353 let end = rest.find(quote)?;
355 let doc = rest[..end].trim();
356 if doc.is_empty() {
357 None
358 } else {
359 Some(doc.to_string())
360 }
361}
362
363fn extract_docstring(node: &Node, content: &str) -> Option<String> {
369 let body = node.child_by_field_name("body")?;
370 let first = body.child(0)?;
371
372 let string_node = match first.kind() {
376 "string" => Some(first),
377 "expression_statement" => first.child(0).filter(|n| n.kind() == "string"),
378 _ => None,
379 }?;
380
381 let mut cursor = string_node.walk();
383 for child in string_node.children(&mut cursor) {
384 if child.kind() == "string_content" {
385 let doc = content[child.byte_range()].trim();
386 if !doc.is_empty() {
387 return Some(doc.to_string());
388 }
389 }
390 }
391
392 let text = &content[string_node.byte_range()];
394 let doc = text
395 .trim_start_matches("\"\"\"")
396 .trim_start_matches("'''")
397 .trim_start_matches('"')
398 .trim_start_matches('\'')
399 .trim_end_matches("\"\"\"")
400 .trim_end_matches("'''")
401 .trim_end_matches('"')
402 .trim_end_matches('\'')
403 .trim();
404
405 if !doc.is_empty() {
406 Some(doc.to_string())
407 } else {
408 None
409 }
410}
411
412fn extract_decorators(node: &Node, content: &str) -> Vec<String> {
417 let mut attrs = Vec::new();
418 if let Some(parent) = node.parent()
419 && parent.kind() == "decorated_definition"
420 {
421 let mut cursor = parent.walk();
422 for child in parent.children(&mut cursor) {
423 if child.kind() == "decorator" {
424 attrs.push(content[child.byte_range()].to_string());
425 }
426 }
427 }
428 attrs
429}
430
431#[cfg(test)]
432mod tests {
433 use super::*;
434 use crate::GrammarLoader;
435 use tree_sitter::Parser;
436
437 struct ParseResult {
438 tree: tree_sitter::Tree,
439 #[allow(dead_code)]
440 loader: GrammarLoader,
441 }
442
443 fn parse_python(content: &str) -> ParseResult {
444 let loader = GrammarLoader::new();
445 let language = loader.get("python").ok().unwrap();
446 let mut parser = Parser::new();
447 parser.set_language(&language).unwrap();
448 ParseResult {
449 tree: parser.parse(content, None).unwrap(),
450 loader,
451 }
452 }
453
454 #[test]
455 fn test_python_extract_function() {
456 let support = Python;
457 let content = r#"def foo(x: int) -> str:
458 """Convert to string."""
459 return str(x)
460"#;
461 let result = parse_python(content);
462 let root = result.tree.root_node();
463
464 let mut cursor = root.walk();
466 let func = root
467 .children(&mut cursor)
468 .find(|n| n.kind() == "function_definition")
469 .unwrap();
470
471 let sig = support.build_signature(&func, content);
472 let doc = support.extract_docstring(&func, content);
473 assert_eq!(support.node_name(&func, content), Some("foo"));
474 assert!(sig.contains("def foo(x: int) -> str"));
475 assert_eq!(doc, Some("Convert to string.".to_string()));
476 }
477
478 #[test]
479 fn test_python_extract_class() {
480 let support = Python;
481 let content = r#"class Foo(Bar):
482 """A foo class."""
483 pass
484"#;
485 let result = parse_python(content);
486 let root = result.tree.root_node();
487
488 let mut cursor = root.walk();
489 let class = root
490 .children(&mut cursor)
491 .find(|n| n.kind() == "class_definition")
492 .unwrap();
493
494 let sig = support.build_signature(&class, content);
495 let doc = support.extract_docstring(&class, content);
496 assert_eq!(support.node_name(&class, content), Some("Foo"));
497 assert!(sig.contains("class Foo(Bar)"));
498 assert_eq!(doc, Some("A foo class.".to_string()));
499 }
500
501 #[test]
502 fn test_python_visibility() {
503 let support = Python;
504 let content = r#"def public(): pass
505def _protected(): pass
506def __private(): pass
507def __dunder__(): pass
508"#;
509 let result = parse_python(content);
510 let root = result.tree.root_node();
511
512 let mut cursor = root.walk();
513 let funcs: Vec<_> = root
514 .children(&mut cursor)
515 .filter(|n| n.kind() == "function_definition")
516 .collect();
517
518 assert_eq!(
519 support.get_visibility(&funcs[0], content),
520 Visibility::Public
521 );
522 assert_eq!(
523 support.get_visibility(&funcs[1], content),
524 Visibility::Protected
525 );
526 assert_eq!(
527 support.get_visibility(&funcs[2], content),
528 Visibility::Private
529 );
530 assert_eq!(
531 support.get_visibility(&funcs[3], content),
532 Visibility::Public
533 ); }
535
536 #[test]
541 fn unused_node_kinds_audit() {
542 use crate::validate_unused_kinds_audit;
543
544 #[rustfmt::skip]
554 let documented_unused: &[&str] = &[
555 "aliased_import", "block", "expression_list", "import_prefix", "lambda_parameters", "parenthesized_expression","relative_import", "tuple_expression", "wildcard_import", "case_pattern", "class_pattern", "elif_clause", "else_clause", "finally_clause", "for_in_clause", "if_clause", "with_clause", "with_item", "await", "format_expression", "format_specifier", "named_expression", "yield", "constrained_type", "generic_type", "member_type", "splat_type", "type", "type_alias_statement", "type_conversion", "type_parameter", "typed_default_parameter", "typed_parameter", "union_type", "binary_operator", "boolean_operator", "comparison_operator", "not_operator", "unary_operator", "exec_statement", "print_statement", "decorated_definition", "delete_statement", "future_import_statement", "global_statement", "nonlocal_statement", "pass_statement", "lambda",
617 "import_statement",
618 "continue_statement",
619 "raise_statement",
620 "case_clause",
621 "generator_expression",
622 "assert_statement",
623 "if_statement",
624 "while_statement",
625 "with_statement",
626 "try_statement",
627 "import_from_statement",
628 "return_statement",
629 "except_clause",
630 "dictionary_comprehension",
631 "conditional_expression",
632 "match_statement",
633 "set_comprehension",
634 "for_statement",
635 "list_comprehension",
636 "break_statement",
637 ];
638
639 validate_unused_kinds_audit(&Python, documented_unused)
640 .expect("Python unused node kinds audit failed");
641 }
642}