1use super::{
4 count_block_ancestors, count_dead_code_with, count_duplicate_imports_with, count_nested_blocks,
5 count_params, is_boolean_or_null, is_common_safe_number, is_inside_declaration,
6 is_repeating_chars, max_scope_depth, FunctionNode, LanguageAdapter,
7};
8use crate::language::Language;
9use crate::treesitter::engine::ParsedFile;
10use crate::treesitter::query::QueryCapture;
11use regex::Regex;
12use std::sync::LazyLock;
13
14const STANDARD_DUNDERS: &[&str] = &[
15 "__init__",
16 "__new__",
17 "__del__",
18 "__repr__",
19 "__str__",
20 "__bytes__",
21 "__format__",
22 "__lt__",
23 "__le__",
24 "__eq__",
25 "__ne__",
26 "__gt__",
27 "__ge__",
28 "__hash__",
29 "__bool__",
30 "__getattr__",
31 "__getattribute__",
32 "__setattr__",
33 "__delattr__",
34 "__call__",
35 "__len__",
36 "__getitem__",
37 "__setitem__",
38 "__delitem__",
39 "__iter__",
40 "__next__",
41 "__reversed__",
42 "__contains__",
43 "__enter__",
44 "__exit__",
45 "__aenter__",
46 "__aexit__",
47 "__await__",
48 "__aiter__",
49 "__anext__",
50 "__add__",
51 "__sub__",
52 "__mul__",
53 "__truediv__",
54 "__floordiv__",
55 "__mod__",
56 "__divmod__",
57 "__pow__",
58 "__lshift__",
59 "__rshift__",
60 "__and__",
61 "__xor__",
62 "__or__",
63 "__radd__",
64 "__rsub__",
65 "__rmul__",
66 "__rtruediv__",
67 "__rfloordiv__",
68 "__rmod__",
69 "__rdivmod__",
70 "__rpow__",
71 "__rlshift__",
72 "__rrshift__",
73 "__rand__",
74 "__rxor__",
75 "__ror__",
76 "__iadd__",
77 "__isub__",
78 "__imul__",
79 "__itruediv__",
80 "__ifloordiv__",
81 "__imod__",
82 "__ipow__",
83 "__ilshift__",
84 "__irshift__",
85 "__iand__",
86 "__ixor__",
87 "__ior__",
88 "__neg__",
89 "__pos__",
90 "__abs__",
91 "__invert__",
92 "__complex__",
93 "__int__",
94 "__float__",
95 "__round__",
96 "__index__",
97 "__copy__",
98 "__deepcopy__",
99 "__sizeof__",
100 "__reduce__",
101 "__reduce_ex__",
102 "__getnewargs__",
103 "__getstate__",
104 "__setstate__",
105 "__dir__",
106 "__class__",
107 "__subclasshook__",
108 "__init_subclass__",
109 "__instancecheck__",
110 "__subclasscheck__",
111 "__fspath__",
112 "__prepare__",
113 "__slots__",
114];
115
116const PYTHON_STDLIB_MODULES: &[&str] = &[
117 "os",
118 "sys",
119 "re",
120 "json",
121 "math",
122 "datetime",
123 "time",
124 "collections",
125 "functools",
126 "itertools",
127 "typing",
128 "pathlib",
129 "io",
130 "abc",
131 "copy",
132 "enum",
133 "dataclasses",
134 "logging",
135 "unittest",
136 "argparse",
137 "subprocess",
138 "threading",
139 "multiprocessing",
140 "socket",
141 "http",
142 "urllib",
143 "email",
144 "html",
145 "xml",
146 "csv",
147 "hashlib",
148 "hmac",
149 "secrets",
150 "base64",
151 "struct",
152 "pickle",
153 "shelve",
154 "sqlite3",
155 "gzip",
156 "zipfile",
157 "tarfile",
158 "shutil",
159 "tempfile",
160 "glob",
161 "fnmatch",
162 "contextlib",
163 "textwrap",
164 "string",
165 "operator",
166 "bisect",
167 "heapq",
168 "array",
169 "weakref",
170 "types",
171 "pprint",
172 "warnings",
173 "traceback",
174 "inspect",
175 "importlib",
176 "pkgutil",
177 "pdb",
178 "profile",
179 "timeit",
180 "dis",
181 "ast",
182 "token",
183 "tokenize",
184 "keyword",
185 "platform",
186 "ctypes",
187 "concurrent",
188 "asyncio",
189 "signal",
190 "mmap",
191 "codecs",
192 "locale",
193 "gettext",
194 "unicodedata",
195 "difflib",
196];
197
198const ACCEPTABLE_WILDCARD_MODULES: &[&str] = &[
199 "manim",
200 "numpy",
201 "matplotlib",
202 "pytest",
203 "tensorflow",
204 "torch",
205 "tkinter",
206 "PyQt5",
207 "PySide6",
208 "gi.repository",
209];
210
211const PYTHON_PATTERNS: &[&str] = &[
212 "(except_clause) @pc_clause",
214 "[(function_definition name: (identifier) @py_name) @py_fn]",
216 "(assignment left: (identifier) @nv_var (#match? @nv_var \"^[a-z]$\"))",
218 "(assignment left: (identifier) @nv_name)",
220 "(class_definition name: (identifier) @nv_cls)",
222 "(call function: (identifier) @dp_fn (#eq? @dp_fn \"print\"))",
224 "(function_definition parameters: (parameters) @ep_params)",
226 "[(integer) @mn_num (float) @mn_num]",
228 "(wildcard_import) @py_wi",
230];
231
232pub struct PythonAdapter;
233
234impl LanguageAdapter for PythonAdapter {
235 fn language(&self) -> Language {
236 Language::Python
237 }
238
239 fn query_patterns(&self) -> &[&str] {
240 PYTHON_PATTERNS
241 }
242
243 fn count_panic_calls(&self, file: &ParsedFile) -> usize {
244 self.count_panic_from_batch(file, &self.batch_captures(file))
245 }
246
247 fn extract_functions(&self, file: &ParsedFile) -> Vec<FunctionNode> {
248 self.extract_functions_from_batch(file, &self.batch_captures(file))
249 }
250
251 fn max_nesting_depth(&self, file: &ParsedFile) -> usize {
252 max_scope_depth(file.root_node(), 0)
253 }
254
255 fn count_naming_violations(&self, file: &ParsedFile) -> usize {
256 self.count_naming_from_batch(file, &self.batch_captures(file))
257 }
258
259 fn count_deeply_nested_blocks(&self, file: &ParsedFile) -> usize {
260 let threshold = 5;
261 let mut count = 0;
262 count_nested_blocks(file.root_node(), 0, threshold, &mut count);
263 count
264 }
265
266 fn count_debug_calls(&self, file: &ParsedFile) -> usize {
267 self.count_debug_from_batch(file, &self.batch_captures(file))
268 }
269
270 fn count_excessive_params(&self, file: &ParsedFile, threshold: usize) -> usize {
271 self.count_excessive_from_batch_with(file, &self.batch_captures(file), threshold)
272 }
273
274 fn count_magic_numbers(&self, file: &ParsedFile) -> usize {
275 self.count_magic_from_batch(file, &self.batch_captures(file))
276 }
277
278 fn count_dead_code(&self, file: &ParsedFile) -> usize {
279 count_dead_code_with(
280 file,
281 &["return", "return None", "raise", "break", "continue"],
282 &["return ", "raise ", "sys.exit(", "exit(", "quit("],
283 "#",
284 )
285 }
286
287 fn count_duplicate_imports(&self, file: &ParsedFile) -> usize {
288 count_duplicate_imports_with(file, &["import ", "from "])
289 }
290
291 fn count_python_issues(&self, file: &ParsedFile) -> usize {
292 self.count_python_from_batch(file, &self.batch_captures(file))
293 }
294
295 fn count_panic_from_batch<'a>(
298 &self,
299 _file: &ParsedFile,
300 batch: &[Vec<QueryCapture<'a>>],
301 ) -> usize {
302 let mut count = 0;
303 for m in batch {
304 for c in m {
305 if c.name == "pc_clause" {
306 if let Some(value) = c.node.child_by_field_name("value") {
307 if let Ok(vtext) = value.utf8_text(_file.content.as_bytes()) {
308 if vtext == "BaseException" || vtext == "Exception" {
309 count += 1;
310 }
311 }
312 } else {
313 count += 1;
315 }
316 }
317 }
318 }
319 count
320 }
321
322 fn extract_functions_from_batch<'a>(
323 &self,
324 _file: &ParsedFile,
325 batch: &[Vec<QueryCapture<'a>>],
326 ) -> Vec<FunctionNode> {
327 let mut functions = Vec::new();
328 for m in batch {
329 let has_py = m.iter().any(|c| c.name.starts_with("py_"));
330 if !has_py {
331 continue;
332 }
333 let mut name = String::new();
334 let mut start_line = 0usize;
335 let mut end_line = 0usize;
336 for c in m {
337 match c.name.as_str() {
338 "py_name" => name = c.text.to_string(),
339 "py_fn" => {
340 start_line = c.node.start_position().row + 1;
341 end_line = c.node.end_position().row + 1;
342 }
343 _ => {}
344 }
345 }
346 if !name.is_empty() {
347 let nesting_depth = count_block_ancestors(m);
348 functions.push(FunctionNode {
349 name,
350 start_line,
351 end_line,
352 nesting_depth,
353 });
354 }
355 }
356 functions
357 }
358
359 fn count_naming_from_batch<'a>(
360 &self,
361 _file: &ParsedFile,
362 batch: &[Vec<QueryCapture<'a>>],
363 ) -> usize {
364 let mut count = 0usize;
365 let idiomatic_single: &[&str] = &["e", "x", "i", "j", "k", "f"];
366
367 static TERRIBLE_RE: LazyLock<Option<Regex>> = LazyLock::new(|| {
368 Regex::new(
369 r"^(data|info|temp|tmp|val|value|thing|stuff|obj|object|manager|handler|helper|util|utils)(\d+)?$",
370 ).ok()
371 });
372 let terrible_re = TERRIBLE_RE.as_ref();
373 let meaningless: &[&str] = &[
374 "foo", "bar", "baz", "qux", "quux", "quuz", "aaa", "bbb", "ccc", "ddd", "eee", "xxx",
375 "yyy", "zzz", "test1", "test2", "test3",
376 ];
377
378 for m in batch {
379 for c in m {
380 match c.name.as_str() {
381 "nv_var" if !idiomatic_single.contains(&c.text) => {
382 count += 1;
383 }
384 "nv_name" => {
385 let name = c.text;
386 let name_lower = name.to_lowercase();
387 if let Some(re) = terrible_re {
388 if re.is_match(&name_lower) {
389 count += 1;
390 continue;
391 }
392 }
393 if meaningless.contains(&name) || is_repeating_chars(name) {
394 count += 1;
395 }
396 }
397 "nv_cls" if c.text.chars().next().is_some_and(|ch| ch.is_lowercase()) => {
398 count += 1;
399 }
400 "py_name" => {
401 if count > 2000 {
403 continue;
404 }
405 let name = c.text;
406 if name.starts_with("__") || name.starts_with('_') {
407 continue;
408 }
409 if name.chars().any(|ch| ch.is_uppercase()) {
410 count += 1;
411 }
412 }
413 _ => {}
414 }
415 }
416 }
417 count
418 }
419
420 fn count_debug_from_batch<'a>(
421 &self,
422 _file: &ParsedFile,
423 batch: &[Vec<QueryCapture<'a>>],
424 ) -> usize {
425 batch
426 .iter()
427 .filter(|m| m.iter().any(|c| c.name == "dp_fn"))
428 .count()
429 }
430
431 fn count_excessive_from_batch<'a>(
432 &self,
433 _file: &ParsedFile,
434 batch: &[Vec<QueryCapture<'a>>],
435 ) -> usize {
436 self.count_excessive_from_batch_with(_file, batch, 5)
437 }
438
439 fn count_magic_from_batch<'a>(
440 &self,
441 _file: &ParsedFile,
442 batch: &[Vec<QueryCapture<'a>>],
443 ) -> usize {
444 let mut count = 0;
445 for m in batch {
446 for c in m {
447 if c.name == "mn_num" && !is_inside_declaration(c.node) {
448 let text = c.text;
449 if text != "0"
450 && text != "1"
451 && !is_common_safe_number(text)
452 && !is_boolean_or_null(text)
453 {
454 count += 1;
455 }
456 }
457 }
458 }
459 count
460 }
461
462 fn count_python_from_batch<'a>(
463 &self,
464 file: &ParsedFile,
465 batch: &[Vec<QueryCapture<'a>>],
466 ) -> usize {
467 let mut count = 0;
468
469 for m in batch {
470 for c in m {
471 match c.name.as_str() {
472 "py_wi" => {
474 let line = c.node.start_position().row;
475 let acceptable = file.content.lines().nth(line).is_some_and(|src_line| {
476 ACCEPTABLE_WILDCARD_MODULES
477 .iter()
478 .any(|m| src_line.contains(&format!("from {} import *", m)))
479 });
480 if !acceptable {
481 count += 1;
482 }
483 }
484 "py_name" => {
486 let name = c.text;
487 if name.starts_with("__")
488 && name.ends_with("__")
489 && !STANDARD_DUNDERS.contains(&name)
490 {
491 count += 1;
492 }
493 }
494 _ => {}
495 }
496 }
497 }
498
499 for line in file.content.lines() {
501 let trimmed = line.trim();
502 if trimmed.starts_with('#') {
503 continue;
504 }
505 if (trimmed.contains("== True") || trimmed.contains("== False"))
506 && !trimmed.contains("is True")
507 && !trimmed.contains("is False")
508 {
509 count += 1;
510 }
511 if trimmed.contains("== None") && !trimmed.contains("is None") {
512 count += 1;
513 }
514 if trimmed.contains("!= None") && !trimmed.contains("is not None") {
515 count += 1;
516 }
517 if trimmed.contains("# type: ignore") {
518 count += 1;
519 }
520 if !trimmed.starts_with('#')
521 && !trimmed.starts_with("\"")
522 && !trimmed.starts_with("'")
523 && trimmed.contains(".format(")
524 && !trimmed.contains("f-string")
525 {
526 count += 1;
527 }
528 if trimmed.matches('%').count() >= 2
529 && !trimmed.contains("'%")
530 && !trimmed.contains("\"%")
531 && (trimmed.contains("%s") || trimmed.contains("%d") || trimmed.contains("%r"))
532 {
533 count += 1;
534 }
535 }
536
537 let mut seen_third_party = false;
539 for line in file.content.lines() {
540 let trimmed = line.trim();
541 if trimmed.is_empty() || trimmed.starts_with('#') {
542 continue;
543 }
544 if !trimmed.starts_with("import ") && !trimmed.starts_with("from ") {
545 if !trimmed.is_empty() {
546 seen_third_party = false;
547 }
548 continue;
549 }
550 let module = if trimmed.starts_with("from ") {
551 trimmed
552 .strip_prefix("from ")
553 .unwrap_or("")
554 .split_whitespace()
555 .next()
556 .unwrap_or("")
557 } else {
558 trimmed
559 .strip_prefix("import ")
560 .unwrap_or("")
561 .split_whitespace()
562 .next()
563 .unwrap_or("")
564 };
565 if module.starts_with('.') {
566 continue;
567 }
568 let top_module = module.split('.').next().unwrap_or(module);
569 if !PYTHON_STDLIB_MODULES.contains(&top_module) {
570 seen_third_party = true;
571 } else if seen_third_party {
572 count += 1;
573 }
574 }
575
576 count
577 }
578}
579
580impl PythonAdapter {
581 fn count_excessive_from_batch_with<'a>(
582 &self,
583 _file: &ParsedFile,
584 batch: &[Vec<QueryCapture<'a>>],
585 threshold: usize,
586 ) -> usize {
587 let mut count = 0;
588 for m in batch {
589 for c in m {
590 if c.name == "ep_params" && count_params(c.text) > threshold {
591 count += 1;
592 }
593 }
594 }
595 count
596 }
597}
598
599#[cfg(test)]
600mod tests {
601 use super::super::parse_code;
602 use super::*;
603
604 fn parse_python(code: &str) -> ParsedFile {
605 parse_code(code, "test.py").expect("parse")
606 }
607
608 #[test]
609 fn test_python_count_panic_calls_bare_except() {
610 let code = r#"
611try:
612 do_something()
613except:
614 pass
615"#;
616 let file = parse_python(code);
617 let adapter = PythonAdapter;
618 assert_eq!(adapter.count_panic_calls(&file), 1, "bare except = 1");
619 }
620
621 #[test]
622 fn test_python_count_panic_calls_base_exception() {
623 let code = r#"
624try:
625 do_something()
626except BaseException:
627 pass
628"#;
629 let file = parse_python(code);
630 let adapter = PythonAdapter;
631 assert_eq!(
632 adapter.count_panic_calls(&file),
633 1,
634 "except BaseException = 1"
635 );
636 }
637
638 #[test]
639 fn test_python_count_panic_calls_specific_ok() {
640 let code = r#"
641try:
642 do_something()
643except ValueError:
644 pass
645"#;
646 let file = parse_python(code);
647 let adapter = PythonAdapter;
648 assert_eq!(adapter.count_panic_calls(&file), 0, "specific except = 0");
649 }
650
651 #[test]
652 fn test_python_naming_single_letter() {
653 let code = "a = 1\nb = 2\n";
654 let file = parse_python(code);
655 let adapter = PythonAdapter;
656 assert_eq!(adapter.count_naming_violations(&file), 2, "a and b");
657 }
658
659 #[test]
660 fn test_python_naming_camel_case_fn() {
661 let code = "def getData(): pass\n";
662 let file = parse_python(code);
663 let adapter = PythonAdapter;
664 assert_eq!(adapter.count_naming_violations(&file), 1, "camelCase fn");
665 }
666
667 #[test]
668 fn test_python_debug_print() {
669 let code = r#"
670print("hello")
671print(x)
672"#;
673 let file = parse_python(code);
674 let adapter = PythonAdapter;
675 assert_eq!(adapter.count_debug_calls(&file), 2, "two print calls");
676 }
677
678 #[test]
679 fn test_python_debug_clean() {
680 let code = "result = add(1, 2)\n";
681 let file = parse_python(code);
682 let adapter = PythonAdapter;
683 assert_eq!(adapter.count_debug_calls(&file), 0, "no debug calls");
684 }
685
686 #[test]
687 fn test_python_extract_functions() {
688 let code = "def foo(): pass\ndef bar(x): return x\n";
689 let file = parse_python(code);
690 let adapter = PythonAdapter;
691 let fns = adapter.extract_functions(&file);
692 assert_eq!(fns.len(), 2, "2 functions");
693 assert_eq!(fns[0].name, "foo");
694 assert_eq!(fns[1].name, "bar");
695 }
696
697 #[test]
698 fn test_python_excessive_params() {
699 let code = "def process(a, b, c, d, e, f): pass\n";
700 let file = parse_python(code);
701 let adapter = PythonAdapter;
702 assert_eq!(adapter.count_excessive_params(&file, 5), 1, "6 > 5");
703 }
704
705 #[test]
706 fn test_python_magic_numbers() {
707 let code = "foo(42)\nbar(100)\n";
708 let file = parse_python(code);
709 let adapter = PythonAdapter;
710 assert_eq!(adapter.count_magic_numbers(&file), 2);
711 }
712
713 #[test]
714 fn test_python_magic_numbers_skips_trivial() {
715 let code = "x = 1 + 0\n";
716 let file = parse_python(code);
717 let adapter = PythonAdapter;
718 assert_eq!(adapter.count_magic_numbers(&file), 0, "0 and 1 skipped");
719 }
720
721 #[test]
722 fn test_python_dead_code_after_return() {
723 let code = r#"
724def foo():
725 return 42
726 print("dead")
727"#;
728 let file = parse_python(code);
729 let adapter = PythonAdapter;
730 assert_eq!(adapter.count_dead_code(&file), 1);
731 }
732
733 #[test]
734 fn test_python_dead_code_after_raise() {
735 let code = r#"
736def foo():
737 raise ValueError("bad")
738 x = 1
739"#;
740 let file = parse_python(code);
741 let adapter = PythonAdapter;
742 assert_eq!(adapter.count_dead_code(&file), 1);
743 }
744
745 #[test]
746 fn test_python_duplicate_imports() {
747 let code = "import os\nimport sys\nimport os\n";
748 let file = parse_python(code);
749 let adapter = PythonAdapter;
750 assert_eq!(adapter.count_duplicate_imports(&file), 1);
751 }
752}