Skip to main content

garbage_code_hunter/style_ir/
mod.rs

1//! Style IR — language-neutral style facts extracted from parsed source.
2//!
3//! This module is intentionally smaller than a general AST. It only stores
4//! facts needed by scoring, signal detection, and friend-style feedback.
5
6use crate::language::adapter::{adapter_for, FunctionNode};
7use crate::language::Language;
8use crate::treesitter::engine::ParsedFile;
9use serde::Serialize;
10
11/// Stable threshold facts included in the Style IR summary output.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
13pub struct StyleIrThresholdSummary {
14    pub excessive_param_threshold: usize,
15    pub god_function_line_threshold: usize,
16}
17
18/// Stable JSON-ready summary of a Style IR snapshot.
19#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
20pub struct StyleIrSummary {
21    pub language: String,
22    pub line_count: usize,
23    pub function_count: usize,
24    pub god_function_count: usize,
25    pub panic_call_count: usize,
26    pub naming_violation_count: usize,
27    pub deeply_nested_block_count: usize,
28    pub debug_call_count: usize,
29    pub goroutine_spawn_count: usize,
30    pub defer_in_loop_count: usize,
31    pub go_convention_count: usize,
32    pub python_issue_count: usize,
33    pub java_issue_count: usize,
34    pub ruby_issue_count: usize,
35    pub c_issue_count: usize,
36    pub ts_issue_count: usize,
37    pub js_issue_count: usize,
38    pub swift_issue_count: usize,
39    pub dead_code_count: usize,
40    pub duplicate_import_count: usize,
41    pub excessive_param_count: usize,
42    pub unsafe_block_count: usize,
43    pub magic_number_count: usize,
44    pub commented_out_lines: usize,
45    pub todo_count: usize,
46    pub over_engineering_count: usize,
47    pub code_smell_count: usize,
48    pub is_clean_signal_baseline: bool,
49    pub thresholds: StyleIrThresholdSummary,
50}
51
52/// Language-neutral style facts for one parsed source file.
53///
54/// Style IR stores observed facts, not user-facing interpretation. Detector
55/// methods may combine facts into weighted signals, but the public fields stay
56/// close to adapter-extracted source evidence.
57#[derive(Debug, Clone)]
58pub struct StyleIr {
59    /// Source language used by the adapter that produced this IR.
60    pub language: Language,
61
62    /// Physical source line count measured from the original file content.
63    pub line_count: usize,
64
65    /// Function facts required by structure-level style detectors.
66    pub functions: Vec<FunctionNode>,
67
68    /// Count of panic-prone calls or macros such as unwrap, expect, and panic.
69    pub panic_call_count: usize,
70
71    /// Count of adapter-defined naming violations such as unclear identifiers.
72    pub naming_violation_count: usize,
73
74    /// Count of block nodes that cross the nesting threshold.
75    pub deeply_nested_block_count: usize,
76
77    /// Count of debug or temporary-output calls such as println and dbg.
78    pub debug_call_count: usize,
79
80    /// Count of functions whose parameter count exceeds the stable threshold.
81    pub excessive_param_count: usize,
82
83    /// Count of explicit unsafe blocks observed in the source file.
84    pub unsafe_block_count: usize,
85
86    /// Count of literal numbers that adapters classify as magic numbers.
87    pub magic_number_count: usize,
88
89    /// Count of commented-out code lines (blocks of 3+ consecutive comment lines).
90    pub commented_out_lines: usize,
91
92    /// Count of TODO/FIXME/BUG/HACK markers in comments.
93    pub todo_count: usize,
94
95    /// Count of goroutine `go` statement spawns (Go-specific).
96    pub goroutine_spawn_count: usize,
97
98    /// Count of `defer` statements inside `for` loops (Go-specific).
99    pub defer_in_loop_count: usize,
100
101    /// Count of Go convention violations (error string case, context order, else-return).
102    pub go_convention_count: usize,
103
104    /// Count of Python-specific code issues (wildcard imports, bool comparisons,
105    /// identity violations, type:ignore, legacy formatting, custom dunders, import order).
106    pub python_issue_count: usize,
107
108    /// Count of Java-specific code issues (empty catch, missing javadoc,
109    /// try-finally close, string concat in loop, wildcard imports).
110    pub java_issue_count: usize,
111
112    /// Count of Ruby-specific code issues (global variables, bare rescue,
113    /// frozen_string_literal, negated if, predicate naming, indent).
114    pub ruby_issue_count: usize,
115
116    /// Count of C/C++ code issues (goto, new-expression, sizeof-type, etc.).
117    pub c_issue_count: usize,
118
119    /// Count of TypeScript code issues (any-type, prefer-interface, no-enum).
120    pub ts_issue_count: usize,
121
122    /// Count of JavaScript code issues (eval, with, ==, var, alert).
123    pub js_issue_count: usize,
124
125    /// Count of Swift code issues (force-unwrap, try!, implicitly unwrapped optionals).
126    pub swift_issue_count: usize,
127
128    /// Count of dead code blocks — unreachable code after return/break/continue/panic.
129    pub dead_code_count: usize,
130
131    /// Count of duplicate import statements.
132    pub duplicate_import_count: usize,
133}
134
135impl StyleIr {
136    const EXCESSIVE_PARAM_THRESHOLD: usize = 5;
137    const GOD_FUNCTION_LINE_THRESHOLD: usize = 50;
138
139    /// Build Style IR from a tree-sitter parsed file.
140    ///
141    /// Returns `None` when a language has no semantic adapter yet. Callers can
142    /// keep using legacy rule logic while individual detectors migrate to IR.
143    pub fn from_parsed(file: &ParsedFile) -> Option<Self> {
144        let adapter = adapter_for(file.language)?;
145        let counts = adapter.compute_all(file);
146        Some(Self {
147            language: file.language,
148            line_count: file.content.lines().count(),
149            functions: counts.functions,
150            panic_call_count: counts.panic_calls,
151            naming_violation_count: counts.naming_violations,
152            deeply_nested_block_count: counts.deeply_nested_blocks,
153            debug_call_count: counts.debug_calls,
154            excessive_param_count: counts.excessive_params,
155            unsafe_block_count: counts.unsafe_blocks,
156            magic_number_count: counts.magic_numbers,
157            commented_out_lines: counts.commented_out_lines,
158            todo_count: counts.todo_markers,
159            goroutine_spawn_count: counts.goroutine_spawns,
160            defer_in_loop_count: counts.defer_in_loop,
161            go_convention_count: counts.go_conventions,
162            python_issue_count: counts.python_issues,
163            java_issue_count: counts.java_issues,
164            ruby_issue_count: counts.ruby_issues,
165            c_issue_count: counts.c_issues,
166            ts_issue_count: counts.ts_issues,
167            js_issue_count: counts.js_issues,
168            swift_issue_count: counts.swift_issues,
169            dead_code_count: counts.dead_code,
170            duplicate_import_count: counts.duplicate_imports,
171        })
172    }
173
174    /// Count functions that exceed the project-level god-function threshold.
175    pub fn god_function_count(&self) -> usize {
176        self.functions
177            .iter()
178            .filter(|function| {
179                function.end_line.saturating_sub(function.start_line)
180                    > Self::GOD_FUNCTION_LINE_THRESHOLD
181            })
182            .count()
183    }
184
185    /// Count the combined over-engineering signal violations.
186    pub fn over_engineering_count(&self) -> usize {
187        self.god_function_count() + self.excessive_param_count + self.goroutine_spawn_count
188    }
189
190    /// Count the combined code-smell signal violations.
191    pub fn code_smell_count(&self) -> usize {
192        self.unsafe_block_count * 2
193            + self.magic_number_count
194            + self.go_convention_count
195            + self.python_issue_count
196            + self.java_issue_count
197            + self.ruby_issue_count
198            + self.c_issue_count
199            + self.ts_issue_count
200            + self.js_issue_count
201            + self.swift_issue_count
202            + self.dead_code_count
203            + self.duplicate_import_count
204    }
205
206    /// Build a stable, JSON-ready summary for downstream consumers.
207    pub fn summary(&self) -> StyleIrSummary {
208        StyleIrSummary {
209            language: self.language.display_name().to_string(),
210            line_count: self.line_count,
211            function_count: self.functions.len(),
212            god_function_count: self.god_function_count(),
213            panic_call_count: self.panic_call_count,
214            naming_violation_count: self.naming_violation_count,
215            deeply_nested_block_count: self.deeply_nested_block_count,
216            debug_call_count: self.debug_call_count,
217            excessive_param_count: self.excessive_param_count,
218            unsafe_block_count: self.unsafe_block_count,
219            magic_number_count: self.magic_number_count,
220            commented_out_lines: self.commented_out_lines,
221            todo_count: self.todo_count,
222            goroutine_spawn_count: self.goroutine_spawn_count,
223            defer_in_loop_count: self.defer_in_loop_count,
224            go_convention_count: self.go_convention_count,
225            python_issue_count: self.python_issue_count,
226            java_issue_count: self.java_issue_count,
227            ruby_issue_count: self.ruby_issue_count,
228            c_issue_count: self.c_issue_count,
229            ts_issue_count: self.ts_issue_count,
230            js_issue_count: self.js_issue_count,
231            swift_issue_count: self.swift_issue_count,
232            dead_code_count: self.dead_code_count,
233            duplicate_import_count: self.duplicate_import_count,
234            over_engineering_count: self.over_engineering_count(),
235            code_smell_count: self.code_smell_count(),
236            is_clean_signal_baseline: self.is_clean_signal_baseline(),
237            thresholds: StyleIrThresholdSummary {
238                excessive_param_threshold: Self::EXCESSIVE_PARAM_THRESHOLD,
239                god_function_line_threshold: Self::GOD_FUNCTION_LINE_THRESHOLD,
240            },
241        }
242    }
243
244    /// Return true when the IR has no extracted style signals.
245    pub fn is_clean_signal_baseline(&self) -> bool {
246        self.panic_call_count == 0
247            && self.naming_violation_count == 0
248            && self.deeply_nested_block_count == 0
249            && self.debug_call_count == 0
250            && self.excessive_param_count == 0
251            && self.unsafe_block_count == 0
252            && self.magic_number_count == 0
253            && self.commented_out_lines == 0
254            && self.todo_count == 0
255            && self.goroutine_spawn_count == 0
256            && self.defer_in_loop_count == 0
257            && self.go_convention_count == 0
258            && self.python_issue_count == 0
259            && self.java_issue_count == 0
260            && self.ruby_issue_count == 0
261            && self.c_issue_count == 0
262            && self.ts_issue_count == 0
263            && self.js_issue_count == 0
264            && self.swift_issue_count == 0
265            && self.dead_code_count == 0
266            && self.duplicate_import_count == 0
267    }
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273    use crate::treesitter::engine::TreeSitterEngine;
274    use std::path::PathBuf;
275
276    fn parse_rust(code: &str) -> ParsedFile {
277        let engine = TreeSitterEngine::new();
278        engine
279            .parse_file(&PathBuf::from("sample.rs"), code)
280            .expect("Rust parser should parse valid source")
281    }
282
283    /// Objective: Verify Style IR extracts panic-related facts from Rust code.
284    /// Invariants: Panic call count is language-neutral and line count is stable.
285    #[test]
286    fn test_style_ir_panic_count() {
287        let file = parse_rust("fn main() { value.unwrap(); panic!(\"boom\"); }");
288        let ir = StyleIr::from_parsed(&file).expect("Rust should have a style adapter");
289
290        assert_eq!(ir.language, Language::Rust);
291        assert_eq!(ir.line_count, 1);
292        assert_eq!(ir.panic_call_count, 2);
293    }
294
295    /// Objective: Verify clean code has no baseline signal counts.
296    /// Invariants: A simple function should not emit direct style signals.
297    #[test]
298    fn test_style_ir_clean_baseline() {
299        let file = parse_rust("fn add(left: i32, right: i32) -> i32 { left + right }");
300        let ir = StyleIr::from_parsed(&file).expect("Rust should have a style adapter");
301
302        assert!(ir.is_clean_signal_baseline());
303    }
304
305    /// Objective: Verify Style IR extracts naming violations from Rust code.
306    /// Invariants: A single-letter local variable is counted exactly once.
307    #[test]
308    fn test_style_ir_naming_count() {
309        let file = parse_rust("fn main() { let a = 1; }");
310        let ir = StyleIr::from_parsed(&file).expect("Rust should have a style adapter");
311
312        assert_eq!(ir.naming_violation_count, 1);
313    }
314
315    /// Objective: Verify Style IR extracts deeply nested block counts.
316    /// Invariants: Six nested blocks must cross the direct signal threshold.
317    #[test]
318    fn test_style_ir_nested_count() {
319        let file = parse_rust(
320            r#"
321fn main() {
322    if true {
323        if true {
324            if true {
325                if true {
326                    if true {
327                        if true {
328                            let value = 1;
329                        }
330                    }
331                }
332            }
333        }
334    }
335}
336"#,
337        );
338        let ir = StyleIr::from_parsed(&file).expect("Rust should have a style adapter");
339
340        assert!(
341            ir.deeply_nested_block_count >= 1,
342            "deep nesting should produce at least one style fact"
343        );
344    }
345
346    /// Objective: Verify Style IR extracts debug-output style facts.
347    /// Invariants: println! and dbg! each contribute one debug call.
348    #[test]
349    fn test_style_ir_debug_count() {
350        let file = parse_rust(
351            r#"
352fn main() {
353    println!("hello");
354    dbg!(42);
355}
356"#,
357        );
358        let ir = StyleIr::from_parsed(&file).expect("Rust should have a style adapter");
359
360        assert_eq!(ir.debug_call_count, 2);
361    }
362
363    /// Objective: Verify Style IR preserves excessive-parameter threshold semantics.
364    /// Invariants: Six parameters must count as one over-engineering signal.
365    #[test]
366    fn test_style_ir_over_engineering_count() {
367        let file = parse_rust(
368            r#"
369fn process(a: i32, b: i32, c: i32, d: i32, e: i32, f: i32) -> i32 {
370    a + b + c + d + e + f
371}
372"#,
373        );
374        let ir = StyleIr::from_parsed(&file).expect("Rust should have a style adapter");
375
376        assert_eq!(ir.excessive_param_count, 1);
377        assert_eq!(ir.over_engineering_count(), 1);
378    }
379
380    /// Objective: Verify Style IR preserves code-smell weighting.
381    /// Invariants: Unsafe blocks count double and magic numbers count once.
382    #[test]
383    fn test_style_ir_code_smell_count() {
384        let file = parse_rust(
385            r#"
386fn main() {
387    unsafe {
388        let ptr = 42 as *const i32;
389        let _ = *ptr;
390    }
391    foo(100);
392}
393"#,
394        );
395        let ir = StyleIr::from_parsed(&file).expect("Rust should have a style adapter");
396
397        assert!(ir.unsafe_block_count >= 1);
398        assert!(ir.magic_number_count >= 1);
399        assert_eq!(
400            ir.code_smell_count(),
401            ir.unsafe_block_count * 2 + ir.magic_number_count
402        );
403    }
404
405    /// Objective: Verify the Style IR summary exposes stable JSON-ready fields.
406    /// Invariants: Summary counts must mirror the underlying Style IR snapshot.
407    #[test]
408    fn test_style_ir_summary_schema() {
409        let file = parse_rust(
410            r#"
411fn process(a: i32, b: i32, c: i32, d: i32, e: i32, f: i32) -> i32 {
412    unsafe {
413        let value = 42;
414        value + a + b + c + d + e + f
415    }
416}
417"#,
418        );
419        let ir = StyleIr::from_parsed(&file).expect("Rust should have a style adapter");
420        let summary = ir.summary();
421
422        assert_eq!(summary.language, "Rust");
423        assert_eq!(summary.line_count, ir.line_count);
424        assert_eq!(summary.function_count, ir.functions.len());
425        assert_eq!(summary.god_function_count, ir.god_function_count());
426        assert_eq!(summary.excessive_param_count, ir.excessive_param_count);
427        assert_eq!(summary.unsafe_block_count, ir.unsafe_block_count);
428        assert_eq!(summary.code_smell_count, ir.code_smell_count());
429        assert_eq!(summary.over_engineering_count, ir.over_engineering_count());
430        assert_eq!(summary.thresholds.excessive_param_threshold, 5);
431        assert_eq!(summary.thresholds.god_function_line_threshold, 50);
432
433        let json = serde_json::to_value(&summary).expect("summary should serialize");
434        assert!(
435            json.get("language").is_some(),
436            "summary JSON should include language"
437        );
438        assert!(
439            json.get("thresholds").is_some(),
440            "summary JSON should include thresholds"
441        );
442    }
443}