Skip to main content

garbage_code_hunter/language/adapter/
mod.rs

1//! LanguageAdapter trait — unified semantic extraction from parsed AST.
2//!
3//! SignalDetectors delegate to LanguageAdapter instead of writing
4//! per-language tree-sitter queries directly. This makes detectors
5//! language-agnostic and consolidates query logic per language.
6
7mod c;
8mod c_cpp_common;
9mod cpp;
10mod go;
11mod helpers;
12mod java;
13mod js;
14mod python;
15mod ruby;
16mod rust;
17mod swift;
18mod ts;
19mod zig;
20
21pub use self::c::CAdapter;
22pub use self::cpp::CppAdapter;
23pub use self::go::GoAdapter;
24pub use self::java::JavaAdapter;
25pub use self::js::JSAdapter;
26pub use self::python::PythonAdapter;
27pub use self::ruby::RubyAdapter;
28pub use self::rust::RustAdapter;
29pub use self::swift::SwiftAdapter;
30pub use self::ts::TSAdapter;
31pub use self::zig::ZigAdapter;
32
33use crate::language::Language;
34use crate::treesitter::engine::ParsedFile;
35use crate::treesitter::query::{collect_captures_multi, QueryCapture};
36
37/// Metadata for a function extracted from source code.
38#[derive(Debug, Clone)]
39pub struct FunctionNode {
40    pub name: String,
41    pub start_line: usize,
42    pub end_line: usize,
43    pub nesting_depth: usize,
44}
45
46/// All adapter-computed counts in a single batch result.
47#[derive(Debug, Clone, Default)]
48pub struct AdapterCounts {
49    pub functions: Vec<FunctionNode>,
50    pub panic_calls: usize,
51    pub naming_violations: usize,
52    pub deeply_nested_blocks: usize,
53    pub debug_calls: usize,
54    pub excessive_params: usize,
55    pub unsafe_blocks: usize,
56    pub magic_numbers: usize,
57    pub commented_out_lines: usize,
58    pub todo_markers: usize,
59    pub goroutine_spawns: usize,
60    pub defer_in_loop: usize,
61    pub go_conventions: usize,
62    pub python_issues: usize,
63    pub java_issues: usize,
64    pub ruby_issues: usize,
65    pub c_issues: usize,
66    pub ts_issues: usize,
67    pub js_issues: usize,
68    pub swift_issues: usize,
69    pub dead_code: usize,
70    pub duplicate_imports: usize,
71}
72
73/// LanguageAdapter provides language-specific semantic extraction.
74///
75/// Each supported language has an adapter implementation that knows
76/// the tree-sitter query patterns for that language. SignalDetectors
77/// use these methods instead of writing per-language queries.
78pub trait LanguageAdapter: Send + Sync {
79    fn language(&self) -> Language;
80
81    fn count_panic_calls(&self, file: &ParsedFile) -> usize;
82
83    fn extract_functions(&self, file: &ParsedFile) -> Vec<FunctionNode>;
84
85    fn max_nesting_depth(&self, file: &ParsedFile) -> usize;
86
87    fn count_naming_violations(&self, file: &ParsedFile) -> usize;
88
89    fn count_deeply_nested_blocks(&self, file: &ParsedFile) -> usize;
90
91    fn count_debug_calls(&self, file: &ParsedFile) -> usize;
92
93    fn count_excessive_params(&self, file: &ParsedFile, threshold: usize) -> usize;
94
95    fn count_unsafe_blocks(&self, file: &ParsedFile) -> usize {
96        let _ = file;
97        0
98    }
99
100    fn count_magic_numbers(&self, file: &ParsedFile) -> usize {
101        let _ = file;
102        0
103    }
104
105    /// Count goroutine spawns (Go-specific).
106    fn count_goroutine_spawns(&self, file: &ParsedFile) -> usize {
107        let _ = file;
108        0
109    }
110
111    /// Count `defer` statements inside `for` loops (Go-specific).
112    fn count_defer_in_loop(&self, file: &ParsedFile) -> usize {
113        let _ = file;
114        0
115    }
116
117    /// Count Go convention violations: uppercase error strings,
118    /// context.Context not first param, if-else with return.
119    fn count_go_convention_violations(&self, file: &ParsedFile) -> usize {
120        let _ = file;
121        0
122    }
123
124    /// Count Python-specific code issues: wildcard imports, redundant
125    /// bool comparisons, identity comparison violations, type:ignore comments,
126    /// legacy string formatting, custom dunder methods, import order.
127    fn count_python_issues(&self, file: &ParsedFile) -> usize {
128        let _ = file;
129        0
130    }
131
132    /// Count Java-specific code issues: empty catch, missing javadoc,
133    /// try-finally close, string concat in loop, wildcard imports.
134    fn count_java_issues(&self, file: &ParsedFile) -> usize {
135        let _ = file;
136        0
137    }
138
139    /// Count Ruby-specific code issues: global variables, bare rescue,
140    /// missing frozen_string_literal, negated if, predicate naming, indent.
141    fn count_ruby_issues(&self, file: &ParsedFile) -> usize {
142        let _ = file;
143        0
144    }
145
146    /// Count C/C++ code issues: goto, new-expression, sizeof-type, free-mismatch, malloc-check.
147    fn count_c_issues(&self, file: &ParsedFile) -> usize {
148        let _ = file;
149        0
150    }
151
152    /// Count TypeScript code issues: any-type, prefer-interface, no-enum.
153    fn count_ts_issues(&self, file: &ParsedFile) -> usize {
154        let _ = file;
155        0
156    }
157
158    /// Count JavaScript code issues: eval, with, ==/var, alert.
159    fn count_js_issues(&self, file: &ParsedFile) -> usize {
160        let _ = file;
161        0
162    }
163
164    /// Count Swift code issues: force-unwrap, try!, implicitly unwrapped optionals.
165    fn count_swift_issues(&self, file: &ParsedFile) -> usize {
166        let _ = file;
167        0
168    }
169
170    /// Count dead code blocks — unreachable code after return/break/continue/panic.
171    fn count_dead_code(&self, file: &ParsedFile) -> usize {
172        let _ = file;
173        0
174    }
175
176    /// Count duplicate import statements in a file (language-specific).
177    fn count_duplicate_imports(&self, file: &ParsedFile) -> usize {
178        let _ = file;
179        0
180    }
181
182    /// Count commented-out code blocks in the file.
183    /// Default implementation uses content-based detection.
184    fn count_commented_out_code(&self, file: &ParsedFile) -> usize {
185        let line_comment = file.language.line_comment();
186        let mut total = 0;
187        let mut block_size = 0;
188        for line in file.content.lines() {
189            let trimmed = line.trim();
190            if trimmed.starts_with(line_comment) {
191                // Guard: skip doc-comments (///, /**) but do NOT skip //// (4-slash lines)
192                if (trimmed.starts_with("///") && !trimmed.starts_with("////"))
193                    || trimmed.starts_with("/**")
194                {
195                    if block_size > 0 {
196                        total += block_size;
197                        block_size = 0;
198                    }
199                    continue;
200                }
201                let text = trimmed.strip_prefix(line_comment).unwrap_or("").trim();
202                let is_code = CODEC_PATTERNS.iter().any(|p| text.contains(p));
203                if is_code || block_size > 0 {
204                    block_size += 1;
205                }
206            } else if !trimmed.is_empty() {
207                if block_size >= 3 {
208                    total += block_size;
209                }
210                block_size = 0;
211            }
212        }
213        if block_size >= 3 {
214            total += block_size;
215        }
216        total
217    }
218
219    /// Count TODO/FIXME/BUG/HACK markers in comments.
220    /// Default implementation uses content-based detection.
221    fn count_todo_markers(&self, file: &ParsedFile) -> usize {
222        let line_comment = file.language.line_comment();
223        let mut count = 0;
224        for line in file.content.lines() {
225            let trimmed = line.trim();
226            if let Some(pos) = trimmed.find(line_comment) {
227                // Ensure the comment marker is genuinely a comment, not a # inside a string
228                // literal (e.g. `{"#TODO": "done"}` in Python). Real comments start at position
229                // 0 or are preceded by a space/tab in the trimmed line (inline comment).
230                if pos > 0 {
231                    let prev = trimmed.as_bytes()[pos - 1];
232                    if prev != b' ' && prev != b'\t' {
233                        continue;
234                    }
235                }
236                let comment = trimmed[pos + line_comment.len()..].trim().to_uppercase();
237                if comment.starts_with("TODO")
238                    || comment.contains(" TODO ")
239                    || comment.starts_with("FIXME")
240                    || comment.contains(" FIXME ")
241                    || comment.starts_with("BUG")
242                    || comment.contains(" BUG ")
243                    || comment.starts_with("HACK")
244                    || comment.contains(" HACK ")
245                {
246                    count += 1;
247                }
248            }
249        }
250        count
251    }
252
253    /// Return merged tree-sitter query patterns for this language.
254    ///
255    /// Each pattern must use prefixed capture names (e.g. `@pc_method`)
256    /// to avoid collisions across patterns. Override in each adapter.
257    fn query_patterns(&self) -> &[&str] {
258        &[]
259    }
260
261    /// Run all query patterns in a single cursor traversal.
262    ///
263    /// Default implementation calls `collect_captures_multi` with
264    /// the patterns from `query_patterns()`.
265    fn batch_captures<'a>(&self, file: &'a ParsedFile) -> Vec<Vec<QueryCapture<'a>>> {
266        let patterns = self.query_patterns();
267        if patterns.is_empty() {
268            return Vec::new();
269        }
270        collect_captures_multi(file, patterns).unwrap_or_default()
271    }
272
273    /// Compute all adapter counts in a single batch pass.
274    ///
275    /// This is the main entry point for `StyleIr::from_parsed()`.
276    /// It calls `batch_captures()` once and passes the result to
277    /// `_from_batch` helper methods, avoiding redundant traversals.
278    fn compute_all(&self, file: &ParsedFile) -> AdapterCounts {
279        let batch = self.batch_captures(file);
280        AdapterCounts {
281            functions: self.extract_functions_from_batch(file, &batch),
282            panic_calls: self.count_panic_from_batch(file, &batch),
283            naming_violations: self.count_naming_from_batch(file, &batch),
284            deeply_nested_blocks: self.count_deeply_nested_blocks(file),
285            debug_calls: self.count_debug_from_batch(file, &batch),
286            excessive_params: self.count_excessive_from_batch(file, &batch),
287            unsafe_blocks: self.count_unsafe_from_batch(file, &batch),
288            magic_numbers: self.count_magic_from_batch(file, &batch),
289            commented_out_lines: self.count_commented_out_code(file),
290            todo_markers: self.count_todo_markers(file),
291            goroutine_spawns: self.count_goroutine_from_batch(file, &batch),
292            defer_in_loop: self.count_defer_in_loop(file),
293            go_conventions: self.count_go_convention_from_batch(file, &batch),
294            python_issues: self.count_python_from_batch(file, &batch),
295            java_issues: self.count_java_from_batch(file, &batch),
296            ruby_issues: self.count_ruby_from_batch(file, &batch),
297            c_issues: self.count_c_from_batch(file, &batch),
298            ts_issues: self.count_ts_from_batch(file, &batch),
299            js_issues: self.count_js_from_batch(file, &batch),
300            swift_issues: self.count_swift_from_batch(file, &batch),
301            dead_code: self.count_dead_code(file),
302            duplicate_imports: self.count_duplicate_imports(file),
303        }
304    }
305
306    fn extract_functions_from_batch<'a>(
307        &self,
308        file: &ParsedFile,
309        _batch: &[Vec<QueryCapture<'a>>],
310    ) -> Vec<FunctionNode> {
311        self.extract_functions(file)
312    }
313
314    fn count_panic_from_batch<'a>(
315        &self,
316        file: &ParsedFile,
317        _batch: &[Vec<QueryCapture<'a>>],
318    ) -> usize {
319        self.count_panic_calls(file)
320    }
321
322    fn count_naming_from_batch<'a>(
323        &self,
324        file: &ParsedFile,
325        _batch: &[Vec<QueryCapture<'a>>],
326    ) -> usize {
327        self.count_naming_violations(file)
328    }
329
330    fn count_debug_from_batch<'a>(
331        &self,
332        file: &ParsedFile,
333        _batch: &[Vec<QueryCapture<'a>>],
334    ) -> usize {
335        self.count_debug_calls(file)
336    }
337
338    fn count_excessive_from_batch<'a>(
339        &self,
340        file: &ParsedFile,
341        _batch: &[Vec<QueryCapture<'a>>],
342    ) -> usize {
343        self.count_excessive_params(file, 5)
344    }
345
346    fn count_unsafe_from_batch<'a>(
347        &self,
348        file: &ParsedFile,
349        _batch: &[Vec<QueryCapture<'a>>],
350    ) -> usize {
351        self.count_unsafe_blocks(file)
352    }
353
354    fn count_magic_from_batch<'a>(
355        &self,
356        file: &ParsedFile,
357        _batch: &[Vec<QueryCapture<'a>>],
358    ) -> usize {
359        self.count_magic_numbers(file)
360    }
361
362    fn count_goroutine_from_batch<'a>(
363        &self,
364        file: &ParsedFile,
365        _batch: &[Vec<QueryCapture<'a>>],
366    ) -> usize {
367        self.count_goroutine_spawns(file)
368    }
369
370    fn count_go_convention_from_batch<'a>(
371        &self,
372        file: &ParsedFile,
373        _batch: &[Vec<QueryCapture<'a>>],
374    ) -> usize {
375        self.count_go_convention_violations(file)
376    }
377
378    fn count_python_from_batch<'a>(
379        &self,
380        file: &ParsedFile,
381        _batch: &[Vec<QueryCapture<'a>>],
382    ) -> usize {
383        self.count_python_issues(file)
384    }
385
386    fn count_java_from_batch<'a>(
387        &self,
388        file: &ParsedFile,
389        _batch: &[Vec<QueryCapture<'a>>],
390    ) -> usize {
391        self.count_java_issues(file)
392    }
393
394    fn count_ruby_from_batch<'a>(
395        &self,
396        file: &ParsedFile,
397        _batch: &[Vec<QueryCapture<'a>>],
398    ) -> usize {
399        self.count_ruby_issues(file)
400    }
401
402    fn count_c_from_batch<'a>(&self, file: &ParsedFile, _batch: &[Vec<QueryCapture<'a>>]) -> usize {
403        self.count_c_issues(file)
404    }
405
406    fn count_ts_from_batch<'a>(
407        &self,
408        file: &ParsedFile,
409        _batch: &[Vec<QueryCapture<'a>>],
410    ) -> usize {
411        self.count_ts_issues(file)
412    }
413
414    fn count_js_from_batch<'a>(
415        &self,
416        file: &ParsedFile,
417        _batch: &[Vec<QueryCapture<'a>>],
418    ) -> usize {
419        self.count_js_issues(file)
420    }
421
422    fn count_swift_from_batch<'a>(
423        &self,
424        file: &ParsedFile,
425        _batch: &[Vec<QueryCapture<'a>>],
426    ) -> usize {
427        self.count_swift_issues(file)
428    }
429}
430
431const CODEC_PATTERNS: &[&str] = &[
432    "fn ", "if ", "else", "for ", "while ", "struct ", "enum ", "impl ", "let ", "return ", "use ",
433    "mod ", "break", "continue", "{", "}", "(", ")", "[", "]", ";", "=", "==", "!=", "&&", "||",
434    "->", "::",
435];
436
437/// Shared dead-code detection: count unreachable lines after terminating statements.
438pub(crate) fn count_dead_code_with(
439    file: &ParsedFile,
440    bare_terminators: &[&str],
441    prefix_terminators: &[&str],
442    line_comment: &str,
443) -> usize {
444    let mut count = 0;
445    let mut dead_start: Option<usize> = None;
446    for (line_num, line) in file.content.lines().enumerate() {
447        let trimmed = line.trim();
448        if bare_terminators.contains(&trimmed)
449            || prefix_terminators.iter().any(|p| trimmed.starts_with(p))
450        {
451            dead_start = Some(line_num + 2);
452            continue;
453        }
454        if let Some(start) = dead_start {
455            if trimmed.is_empty() || trimmed.starts_with(line_comment) {
456                continue;
457            }
458            if line_comment == "//" && (trimmed.starts_with("/*") || trimmed.starts_with("*")) {
459                continue;
460            }
461            if trimmed == "}" || trimmed.starts_with("} else") || trimmed.starts_with("} else if") {
462                dead_start = None;
463                continue;
464            }
465            if line_num + 1 >= start {
466                count += 1;
467                dead_start = None;
468            }
469        }
470    }
471    count
472}
473
474/// Shared duplicate-import detection: count duplicate lines matching any prefix.
475pub(crate) fn count_duplicate_imports_with(file: &ParsedFile, prefixes: &[&str]) -> usize {
476    let mut seen = std::collections::HashSet::new();
477    let mut count = 0;
478    for line in file.content.lines() {
479        let trimmed = line.trim();
480        if prefixes.iter().any(|p| trimmed.starts_with(p)) && !seen.insert(trimmed.to_string()) {
481            count += 1;
482        }
483    }
484    count
485}
486
487/// Dispatch to the correct LanguageAdapter for a given language.
488pub fn adapter_for(lang: Language) -> Option<&'static dyn LanguageAdapter> {
489    match lang {
490        Language::Rust => Some(&RustAdapter),
491        Language::Python => Some(&PythonAdapter),
492        Language::Go => Some(&GoAdapter),
493        Language::JavaScript => Some(&JSAdapter),
494        Language::Ruby => Some(&RubyAdapter),
495        Language::TypeScript => Some(&TSAdapter),
496        Language::Java => Some(&JavaAdapter),
497        Language::C => Some(&CAdapter),
498        Language::Cpp => Some(&CppAdapter),
499        Language::Swift => Some(&SwiftAdapter),
500        Language::Zig => Some(&ZigAdapter),
501        _ => None,
502    }
503}
504
505/// Re-export helpers for use by sibling adapters.
506pub(crate) use helpers::{
507    count_block_ancestors, count_nested_blocks, count_params, is_boolean_or_null,
508    is_common_safe_number, is_inside_declaration, is_repeating_chars, max_scope_depth,
509    MEANINGLESS_NAMES,
510};
511
512#[cfg(test)]
513pub(crate) use helpers::parse_code;