1use std::path::{Path, PathBuf};
4
5use crate::{
6 ContainerBody, Import, ImportSpec, Language, LanguageSymbols, ModuleId, ModuleResolver,
7 Resolution, ResolverConfig, Visibility,
8};
9use tree_sitter::Node;
10
11pub struct Python;
17
18impl Language for Python {
19 fn name(&self) -> &'static str {
20 "Python"
21 }
22 fn extensions(&self) -> &'static [&'static str] {
23 &["py", "pyi", "pyw"]
24 }
25 fn grammar_name(&self) -> &'static str {
26 "python"
27 }
28
29 fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
30 Some(self)
31 }
32
33 fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
34 extract_docstring(node, content)
35 }
36
37 fn extract_implements(&self, node: &Node, content: &str) -> crate::ImplementsInfo {
38 let mut implements = Vec::new();
39 if let Some(superclasses) = node.child_by_field_name("superclasses") {
40 let mut cursor = superclasses.walk();
41 for child in superclasses.children(&mut cursor) {
42 if child.kind() == "identifier" {
43 implements.push(content[child.byte_range()].to_string());
44 }
45 }
46 }
47 crate::ImplementsInfo {
48 is_interface: false,
49 implements,
50 }
51 }
52
53 fn build_signature(&self, node: &Node, content: &str) -> String {
54 let name = match self.node_name(node, content) {
55 Some(n) => n,
56 None => {
57 return content[node.byte_range()]
58 .lines()
59 .next()
60 .unwrap_or("")
61 .trim()
62 .to_string();
63 }
64 };
65
66 if node.kind() == "class_definition" {
67 let bases = node
68 .child_by_field_name("superclasses")
69 .map(|b| &content[b.byte_range()])
70 .unwrap_or("");
71 if bases.is_empty() {
72 format!("class {}", name)
73 } else {
74 format!("class {}{}", name, bases)
75 }
76 } else {
77 let is_async = node
79 .child(0)
80 .map(|c| &content[c.byte_range()] == "async")
81 .unwrap_or(false);
82 let prefix = if is_async { "async def" } else { "def" };
83 let params = node
84 .child_by_field_name("parameters")
85 .map(|p| &content[p.byte_range()])
86 .unwrap_or("()");
87 let return_type = node
88 .child_by_field_name("return_type")
89 .map(|r| format!(" -> {}", &content[r.byte_range()]))
90 .unwrap_or_default();
91 format!("{} {}{}{}", prefix, name, params, return_type)
92 }
93 }
94
95 fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
96 let line = node.start_position().row + 1;
97
98 match node.kind() {
99 "import_statement" => {
100 let mut imports = Vec::new();
102 let mut cursor = node.walk();
103 for child in node.children(&mut cursor) {
104 if child.kind() == "dotted_name" {
105 let module = content[child.byte_range()].to_string();
106 imports.push(Import {
107 module,
108 names: Vec::new(),
109 alias: None,
110 is_wildcard: false,
111 is_relative: false,
112 line,
113 });
114 } else if child.kind() == "aliased_import"
115 && let Some(name) = child.child_by_field_name("name")
116 {
117 let module = content[name.byte_range()].to_string();
118 let alias = child
119 .child_by_field_name("alias")
120 .map(|a| content[a.byte_range()].to_string());
121 imports.push(Import {
122 module,
123 names: Vec::new(),
124 alias,
125 is_wildcard: false,
126 is_relative: false,
127 line,
128 });
129 }
130 }
131 imports
132 }
133 "import_from_statement" => {
134 let module = node
136 .child_by_field_name("module_name")
137 .map(|m| content[m.byte_range()].to_string())
138 .unwrap_or_default();
139
140 let text = &content[node.byte_range()];
142 let is_relative = text.starts_with("from .");
143
144 let mut names = Vec::new();
145 let mut is_wildcard = false;
146 let module_end = node
147 .child_by_field_name("module_name")
148 .map(|m| m.end_byte())
149 .unwrap_or(0);
150
151 let mut cursor = node.walk();
152 for child in node.children(&mut cursor) {
153 match child.kind() {
154 "dotted_name" | "identifier" if child.start_byte() > module_end => {
155 names.push(content[child.byte_range()].to_string());
156 }
157 "aliased_import" => {
158 if let Some(name) = child.child_by_field_name("name") {
159 names.push(content[name.byte_range()].to_string());
160 }
161 }
162 "wildcard_import" => {
163 is_wildcard = true;
164 }
165 _ => {}
166 }
167 }
168
169 vec![Import {
170 module,
171 names,
172 alias: None,
173 is_wildcard,
174 is_relative,
175 line,
176 }]
177 }
178 _ => Vec::new(),
179 }
180 }
181
182 fn format_import(&self, import: &Import, names: Option<&[&str]>) -> String {
183 let names_to_use: Vec<&str> = names
184 .map(|n| n.to_vec())
185 .unwrap_or_else(|| import.names.iter().map(|s| s.as_str()).collect());
186
187 if import.is_wildcard {
188 format!("from {} import *", import.module)
189 } else if names_to_use.is_empty() {
190 if let Some(ref alias) = import.alias {
191 format!("import {} as {}", import.module, alias)
192 } else {
193 format!("import {}", import.module)
194 }
195 } else {
196 format!("from {} import {}", import.module, names_to_use.join(", "))
197 }
198 }
199
200 fn extract_attributes(&self, node: &Node, content: &str) -> Vec<String> {
201 extract_decorators(node, content)
202 }
203
204 fn get_visibility(&self, node: &Node, content: &str) -> Visibility {
205 if let Some(name) = self.node_name(node, content) {
206 if name.starts_with("__") && name.ends_with("__") {
207 Visibility::Public } else if name.starts_with("__") {
209 Visibility::Private } else if name.starts_with('_') {
211 Visibility::Protected } else {
213 Visibility::Public
214 }
215 } else {
216 Visibility::Public
217 }
218 }
219
220 fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
221 let name = symbol.name.as_str();
222 match symbol.kind {
223 crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
224 crate::SymbolKind::Class => name.starts_with("Test") && name.len() > 4,
225 crate::SymbolKind::Module => name == "tests" || name == "test" || name == "__tests__",
226 _ => false,
227 }
228 }
229
230 fn test_file_globs(&self) -> &'static [&'static str] {
231 &["**/test_*.py", "**/*_test.py"]
232 }
233
234 fn extract_module_doc(&self, src: &str) -> Option<String> {
235 extract_python_module_doc(src)
236 }
237
238 fn body_has_docstring(&self, body: &Node, content: &str) -> bool {
239 let _ = content;
240 body.child(0)
241 .map(|c| {
242 c.kind() == "string"
243 || (c.kind() == "expression_statement"
244 && c.child(0).map(|n| n.kind() == "string").unwrap_or(false))
245 })
246 .unwrap_or(false)
247 }
248
249 fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
250 node.child_by_field_name("body")
251 }
252
253 fn analyze_container_body(
254 &self,
255 body_node: &Node,
256 content: &str,
257 inner_indent: &str,
258 ) -> Option<ContainerBody> {
259 let mut cursor = body_node.walk();
260 let children: Vec<_> = body_node.children(&mut cursor).collect();
261
262 if children.is_empty() {
263 return Some(ContainerBody {
264 content_start: body_node.start_byte(),
265 content_end: body_node.end_byte(),
266 inner_indent: inner_indent.to_string(),
267 is_empty: true,
268 });
269 }
270
271 let mut first_real_idx = 0;
272 for (i, child) in children.iter().enumerate() {
273 let is_docstring = if child.kind() == "expression_statement" {
274 let mut child_cursor = child.walk();
275 child
276 .children(&mut child_cursor)
277 .next()
278 .map(|fc| fc.kind() == "string")
279 .unwrap_or(false)
280 } else {
281 child.kind() == "string"
282 };
283 if is_docstring && i == 0 {
284 first_real_idx = i + 1;
285 continue;
286 }
287 break;
288 }
289
290 let is_empty = children.iter().skip(first_real_idx).all(|c| {
291 c.kind() == "pass_statement"
292 || c.kind() == "string"
293 || (c.kind() == "expression_statement"
294 && c.child(0).map(|fc| fc.kind() == "string").unwrap_or(false))
295 });
296
297 let content_start = if first_real_idx < children.len() {
298 let child_start = children[first_real_idx].start_byte();
299 content[..child_start]
300 .rfind('\n')
301 .map(|i| i + 1)
302 .unwrap_or(child_start)
303 } else if !children.is_empty() {
304 let last_end = children.last().unwrap().end_byte();
306 if last_end < content.len() && content.as_bytes()[last_end] == b'\n' {
307 last_end + 1
308 } else {
309 last_end
310 }
311 } else {
312 body_node.start_byte()
313 };
314
315 Some(ContainerBody {
316 content_start,
317 content_end: body_node.end_byte(),
318 inner_indent: inner_indent.to_string(),
319 is_empty,
320 })
321 }
322
323 fn module_resolver(&self) -> Option<&dyn ModuleResolver> {
324 static RESOLVER: PythonModuleResolver = PythonModuleResolver;
325 Some(&RESOLVER)
326 }
327}
328
329impl LanguageSymbols for Python {}
330
331pub struct PythonModuleResolver;
342
343impl ModuleResolver for PythonModuleResolver {
344 fn workspace_config(&self, root: &Path) -> ResolverConfig {
345 let mut search_roots: Vec<PathBuf> = Vec::new();
346
347 let src_dir = root.join("src");
349 if src_dir.is_dir() {
350 search_roots.push(src_dir);
351 }
352
353 ResolverConfig {
354 workspace_root: root.to_path_buf(),
355 path_mappings: Vec::new(),
356 search_roots,
357 }
358 }
359
360 fn module_of_file(&self, _root: &Path, file: &Path, cfg: &ResolverConfig) -> Vec<ModuleId> {
361 let ext = file.extension().and_then(|e| e.to_str()).unwrap_or("");
362 if !matches!(ext, "py" | "pyi" | "pyw") {
363 return Vec::new();
364 }
365
366 let file_dir = match file.parent() {
368 Some(d) => d,
369 None => return Vec::new(),
370 };
371
372 let root = &cfg.workspace_root;
375
376 let package_root = find_package_root(file_dir, root, &cfg.search_roots);
378
379 let rel = file.strip_prefix(&package_root).unwrap_or(file);
380
381 let components: Vec<&str> = rel
382 .components()
383 .filter_map(|c| {
384 if let std::path::Component::Normal(s) = c {
385 s.to_str()
386 } else {
387 None
388 }
389 })
390 .collect();
391
392 if components.is_empty() {
393 return Vec::new();
394 }
395
396 let last = *components.last().unwrap();
397 let module_path = if last == "__init__.py" {
398 if components.len() == 1 {
400 return Vec::new(); }
402 components[..components.len() - 1].join(".")
403 } else {
404 let stem = last.strip_suffix(".py").unwrap_or(last);
405 let mut parts: Vec<&str> = components[..components.len() - 1].to_vec();
406 parts.push(stem);
407 parts.join(".")
408 };
409
410 if module_path.is_empty() {
411 return Vec::new();
412 }
413
414 vec![ModuleId {
415 canonical_path: module_path,
416 }]
417 }
418
419 fn resolve(&self, from_file: &Path, spec: &ImportSpec, cfg: &ResolverConfig) -> Resolution {
420 let ext = from_file.extension().and_then(|e| e.to_str()).unwrap_or("");
421 if !matches!(ext, "py" | "pyi" | "pyw") {
422 return Resolution::NotApplicable;
423 }
424
425 let raw = &spec.raw;
426
427 if spec.is_relative {
429 return resolve_python_relative(from_file, raw, cfg);
430 }
431
432 let search_bases: Vec<PathBuf> = std::iter::once(cfg.workspace_root.clone())
434 .chain(cfg.search_roots.iter().cloned())
435 .collect();
436
437 let module_rel = raw.replace('.', "/");
438 for base in &search_bases {
439 let as_file = base.join(format!("{}.py", module_rel));
441 if as_file.exists() {
442 let exported_name = spec.names.first().cloned().unwrap_or_default();
443 return Resolution::Resolved(as_file, exported_name);
444 }
445 let as_pkg = base.join(&module_rel).join("__init__.py");
447 if as_pkg.exists() {
448 let exported_name = spec.names.first().cloned().unwrap_or_default();
449 return Resolution::Resolved(as_pkg, exported_name);
450 }
451 }
452
453 Resolution::NotFound
454 }
455}
456
457fn find_package_root(file_dir: &Path, workspace_root: &Path, search_roots: &[PathBuf]) -> PathBuf {
461 for sr in search_roots {
463 if file_dir.starts_with(sr) {
464 return sr.clone();
465 }
466 }
467
468 let mut current = file_dir.to_path_buf();
470 let mut last_package_parent = workspace_root.to_path_buf();
471
472 loop {
473 if current.join("__init__.py").exists() {
474 if let Some(parent) = current.parent() {
475 last_package_parent = parent.to_path_buf();
476 if parent == workspace_root || !parent.starts_with(workspace_root) {
477 break;
478 }
479 current = parent.to_path_buf();
480 } else {
481 break;
482 }
483 } else {
484 last_package_parent = current.clone();
486 break;
487 }
488 }
489
490 last_package_parent
491}
492
493fn resolve_python_relative(from_file: &Path, raw: &str, _cfg: &ResolverConfig) -> Resolution {
499 let file_dir = match from_file.parent() {
500 Some(d) => d,
501 None => return Resolution::NotFound,
502 };
503
504 let dot_count = raw.chars().take_while(|&c| c == '.').count();
506 let module_part = &raw[dot_count..];
507
508 let mut base = file_dir.to_path_buf();
510 for _ in 1..dot_count {
511 base = match base.parent() {
512 Some(p) => p.to_path_buf(),
513 None => return Resolution::NotFound,
514 };
515 }
516
517 if module_part.is_empty() {
518 return Resolution::NotFound; }
521
522 let module_rel = module_part.replace('.', "/");
523
524 let as_file = base.join(format!("{}.py", module_rel));
526 if as_file.exists() {
527 return Resolution::Resolved(as_file, String::new());
528 }
529 let as_pkg = base.join(&module_rel).join("__init__.py");
531 if as_pkg.exists() {
532 return Resolution::Resolved(as_pkg, String::new());
533 }
534
535 Resolution::NotFound
536}
537
538fn extract_python_module_doc(src: &str) -> Option<String> {
543 let mut lines = src.lines().peekable();
544 loop {
546 match lines.peek() {
547 Some(line) => {
548 let t = line.trim();
549 if t.starts_with("#!") || t.starts_with("# -*-") || t.starts_with("# coding") {
550 lines.next();
551 } else {
552 break;
553 }
554 }
555 None => return None,
556 }
557 }
558 let remaining: String = lines.collect::<Vec<_>>().join("\n");
559 let trimmed = remaining.trim_start();
560
561 let (quote, rest) = if let Some(rest) = trimmed.strip_prefix("\"\"\"") {
563 ("\"\"\"", rest)
564 } else if let Some(rest) = trimmed.strip_prefix("'''") {
565 ("'''", rest)
566 } else {
567 return None;
568 };
569
570 let end = rest.find(quote)?;
572 let doc = rest[..end].trim();
573 if doc.is_empty() {
574 None
575 } else {
576 Some(doc.to_string())
577 }
578}
579
580fn extract_docstring(node: &Node, content: &str) -> Option<String> {
586 let body = node.child_by_field_name("body")?;
587 let first = body.child(0)?;
588
589 let string_node = match first.kind() {
593 "string" => Some(first),
594 "expression_statement" => first.child(0).filter(|n| n.kind() == "string"),
595 _ => None,
596 }?;
597
598 let mut cursor = string_node.walk();
600 for child in string_node.children(&mut cursor) {
601 if child.kind() == "string_content" {
602 let doc = content[child.byte_range()].trim();
603 if !doc.is_empty() {
604 return Some(doc.to_string());
605 }
606 }
607 }
608
609 let text = &content[string_node.byte_range()];
611 let doc = text
612 .trim_start_matches("\"\"\"")
613 .trim_start_matches("'''")
614 .trim_start_matches('"')
615 .trim_start_matches('\'')
616 .trim_end_matches("\"\"\"")
617 .trim_end_matches("'''")
618 .trim_end_matches('"')
619 .trim_end_matches('\'')
620 .trim();
621
622 if !doc.is_empty() {
623 Some(doc.to_string())
624 } else {
625 None
626 }
627}
628
629fn extract_decorators(node: &Node, content: &str) -> Vec<String> {
634 let mut attrs = Vec::new();
635 if let Some(parent) = node.parent()
636 && parent.kind() == "decorated_definition"
637 {
638 let mut cursor = parent.walk();
639 for child in parent.children(&mut cursor) {
640 if child.kind() == "decorator" {
641 attrs.push(content[child.byte_range()].to_string());
642 }
643 }
644 }
645 attrs
646}
647
648#[cfg(test)]
649mod tests {
650 use super::*;
651 use crate::GrammarLoader;
652 use tree_sitter::Parser;
653
654 struct ParseResult {
655 tree: tree_sitter::Tree,
656 #[allow(dead_code)]
657 loader: GrammarLoader,
658 }
659
660 fn parse_python(content: &str) -> ParseResult {
661 let loader = GrammarLoader::new();
662 let language = loader.get("python").ok().unwrap();
663 let mut parser = Parser::new();
664 parser.set_language(&language).unwrap();
665 ParseResult {
666 tree: parser.parse(content, None).unwrap(),
667 loader,
668 }
669 }
670
671 #[test]
672 fn test_python_extract_function() {
673 let support = Python;
674 let content = r#"def foo(x: int) -> str:
675 """Convert to string."""
676 return str(x)
677"#;
678 let result = parse_python(content);
679 let root = result.tree.root_node();
680
681 let mut cursor = root.walk();
683 let func = root
684 .children(&mut cursor)
685 .find(|n| n.kind() == "function_definition")
686 .unwrap();
687
688 let sig = support.build_signature(&func, content);
689 let doc = support.extract_docstring(&func, content);
690 assert_eq!(support.node_name(&func, content), Some("foo"));
691 assert!(sig.contains("def foo(x: int) -> str"));
692 assert_eq!(doc, Some("Convert to string.".to_string()));
693 }
694
695 #[test]
696 fn test_python_extract_class() {
697 let support = Python;
698 let content = r#"class Foo(Bar):
699 """A foo class."""
700 pass
701"#;
702 let result = parse_python(content);
703 let root = result.tree.root_node();
704
705 let mut cursor = root.walk();
706 let class = root
707 .children(&mut cursor)
708 .find(|n| n.kind() == "class_definition")
709 .unwrap();
710
711 let sig = support.build_signature(&class, content);
712 let doc = support.extract_docstring(&class, content);
713 assert_eq!(support.node_name(&class, content), Some("Foo"));
714 assert!(sig.contains("class Foo(Bar)"));
715 assert_eq!(doc, Some("A foo class.".to_string()));
716 }
717
718 #[test]
719 fn test_python_visibility() {
720 let support = Python;
721 let content = r#"def public(): pass
722def _protected(): pass
723def __private(): pass
724def __dunder__(): pass
725"#;
726 let result = parse_python(content);
727 let root = result.tree.root_node();
728
729 let mut cursor = root.walk();
730 let funcs: Vec<_> = root
731 .children(&mut cursor)
732 .filter(|n| n.kind() == "function_definition")
733 .collect();
734
735 assert_eq!(
736 support.get_visibility(&funcs[0], content),
737 Visibility::Public
738 );
739 assert_eq!(
740 support.get_visibility(&funcs[1], content),
741 Visibility::Protected
742 );
743 assert_eq!(
744 support.get_visibility(&funcs[2], content),
745 Visibility::Private
746 );
747 assert_eq!(
748 support.get_visibility(&funcs[3], content),
749 Visibility::Public
750 ); }
752
753 #[test]
758 fn unused_node_kinds_audit() {
759 use crate::validate_unused_kinds_audit;
760
761 #[rustfmt::skip]
771 let documented_unused: &[&str] = &[
772 "aliased_import", "block", "expression_list", "import_prefix", "lambda_parameters", "parenthesized_expression","relative_import", "tuple_expression", "wildcard_import", "case_pattern", "class_pattern", "elif_clause", "else_clause", "finally_clause", "for_in_clause", "if_clause", "with_clause", "with_item", "await", "format_expression", "format_specifier", "named_expression", "yield", "constrained_type", "generic_type", "member_type", "splat_type", "type", "type_alias_statement", "type_conversion", "type_parameter", "typed_default_parameter", "typed_parameter", "union_type", "binary_operator", "boolean_operator", "comparison_operator", "not_operator", "unary_operator", "exec_statement", "print_statement", "decorated_definition", "delete_statement", "future_import_statement", "global_statement", "nonlocal_statement", "pass_statement", "lambda",
834 "import_statement",
835 "continue_statement",
836 "raise_statement",
837 "case_clause",
838 "generator_expression",
839 "assert_statement",
840 "if_statement",
841 "while_statement",
842 "with_statement",
843 "try_statement",
844 "import_from_statement",
845 "return_statement",
846 "except_clause",
847 "dictionary_comprehension",
848 "conditional_expression",
849 "match_statement",
850 "set_comprehension",
851 "for_statement",
852 "list_comprehension",
853 "break_statement",
854 ];
855
856 validate_unused_kinds_audit(&Python, documented_unused)
857 .expect("Python unused node kinds audit failed");
858 }
859}