Skip to main content

cha_core/plugins/
dead_code.rs

1use std::collections::{HashMap, HashSet};
2
3use crate::{AnalysisContext, Finding, Location, Plugin, Severity, SmellCategory};
4
5/// Detect non-exported functions/classes that may be dead code.
6///
7/// Three signals stack:
8/// - `is_in_file_referenced` — same-file usage via AST identifier scan
9///   (precise; substring matches in strings / comments don't count).
10/// - `ctx.project.is_called_externally` — cross-file call graph from parser.
11/// - `collect_token_concat_targets` — for C/C++ files, scan `#define ... ##`
12///   macros and per-call-site invocations to recover potential function names
13///   that the macro would expand to (e.g. `_handle##X##Attr` paired with
14///   `STYLE_DEF(color, Color, ...)` produces a plausible `_handleColorAttr`).
15///   These names are added to the in-file reference set so X-macro dispatch
16///   tables don't drown the file in false positives. Imperfect but vastly
17///   better than the previous "any `#define ##` skips the whole file" nuke.
18///
19/// When `ctx.tree` is unavailable, falls back to the legacy substring scan.
20pub struct DeadCodeAnalyzer {
21    /// Names treated as entry points / framework callbacks. Functions with
22    /// these names are never reported as dead, even when nothing else
23    /// references them.
24    pub entry_points: Vec<String>,
25}
26
27impl Default for DeadCodeAnalyzer {
28    fn default() -> Self {
29        Self {
30            entry_points: default_entry_points(),
31        }
32    }
33}
34
35fn default_entry_points() -> Vec<String> {
36    [
37        // Rust
38        "main",
39        "new",
40        "default",
41        "drop",
42        "fmt",
43        // Python
44        "__init__",
45        "__new__",
46        "__call__",
47        "__enter__",
48        "__exit__",
49        "__del__",
50        // Go
51        "init",
52        // C
53        "_start",
54        // Tokio / async runtimes
55        "tokio_main",
56        "main_async",
57    ]
58    .iter()
59    .map(|s| (*s).to_string())
60    .collect()
61}
62
63impl Plugin for DeadCodeAnalyzer {
64    fn name(&self) -> &str {
65        "dead_code"
66    }
67
68    fn smells(&self) -> Vec<String> {
69        vec!["dead_code".into()]
70    }
71
72    fn description(&self) -> &str {
73        "Unexported and unreferenced code"
74    }
75
76    fn analyze(&self, ctx: &AnalysisContext) -> Vec<Finding> {
77        let positions = build_identifier_positions(ctx);
78        let mut findings = Vec::new();
79        check_dead_functions(ctx, &positions, &self.entry_points, &mut findings);
80        check_dead_classes(ctx, &positions, &mut findings);
81        findings
82    }
83}
84
85/// Substring matches in strings/comments don't count because we walk AST nodes.
86/// Returns `None` when AST is unavailable so callers fall back to legacy scan.
87fn build_identifier_positions(ctx: &AnalysisContext) -> Option<IdentifierPositions> {
88    let tree = ctx.tree?;
89    let lang = ctx.ts_language?;
90    let source = ctx.file.content.as_bytes();
91
92    // Capture all identifier-like nodes — function-pointer assignments, struct
93    // initializers, type references, calls all count as references.
94    let mut by_name: HashMap<String, Vec<u32>> = HashMap::new();
95    for pat in [
96        "(identifier) @x",
97        "(type_identifier) @x",
98        "(field_identifier) @x",
99        "(property_identifier) @x",
100    ] {
101        for matches in crate::query::run_query(tree, lang, source, pat) {
102            for cap in matches {
103                by_name.entry(cap.text).or_default().push(cap.start_line);
104            }
105        }
106    }
107
108    let mut tokens: HashSet<String> = HashSet::new();
109    if matches!(ctx.model.language.as_str(), "c" | "cpp") {
110        tokens.extend(collect_token_concat_targets(&ctx.file.content));
111    }
112
113    Some(IdentifierPositions { by_name, tokens })
114}
115
116struct IdentifierPositions {
117    /// name → 1-based line numbers where the identifier appears.
118    by_name: HashMap<String, Vec<u32>>,
119    /// Names plausibly produced by token-concat macros (C/C++ only).
120    tokens: HashSet<String>,
121}
122
123impl IdentifierPositions {
124    /// Token-concat targets count as referenced regardless of position
125    /// because parsers don't macro-expand.
126    fn referenced(&self, name: &str, def_start: usize, def_end: usize) -> bool {
127        if self.tokens.contains(name) {
128            return true;
129        }
130        let Some(lines) = self.by_name.get(name) else {
131            return false;
132        };
133        for line in lines {
134            let l = *line as usize;
135            if l < def_start || l > def_end {
136                return true;
137            }
138        }
139        false
140    }
141}
142
143/// Heuristic recovery of names produced by token-paste macros.
144///
145/// Strategy:
146/// 1. Find every `#define NAME(...)` whose body contains `##`.
147/// 2. From those bodies, pull tokens of the form `prefix##ARG##suffix`.
148/// 3. Then scan the file for `NAME(...)` invocations and try every argument
149///    in the call site as the paste candidate (we don't reliably know which
150///    parameter the body pasted, especially for body-less macro lookups).
151/// 4. Combine prefix + arg + suffix to get plausible expansion names.
152///
153/// Imperfect — multi-paste, nested macros, and parameter renaming all break
154/// this — but it covers the common dispatch-table case (e.g. thorvg's
155/// `STYLE_DEF(color, Color, ...)` paired with `_handle##Field##Attr`).
156fn collect_token_concat_targets(content: &str) -> HashSet<String> {
157    let mut targets = HashSet::new();
158    for tmpl in find_concat_define_templates(content) {
159        for call_args in find_macro_invocation_args(content, &tmpl.name) {
160            for arg in &call_args {
161                for (prefix, suffix) in &tmpl.paste_slots {
162                    targets.insert(format!("{prefix}{arg}{suffix}"));
163                }
164            }
165        }
166    }
167    targets
168}
169
170struct ConcatTemplate {
171    name: String,
172    /// (prefix, suffix) for each `prefix##arg##suffix` slot in the body.
173    paste_slots: Vec<(String, String)>,
174}
175
176// cha:ignore cognitive_complexity
177fn find_concat_define_templates(content: &str) -> Vec<ConcatTemplate> {
178    let mut out = Vec::new();
179    let mut current_define: Option<(String, String)> = None;
180    for line in content.lines() {
181        let t = line.trim_start();
182        if let Some(rest) = t.strip_prefix("#define ") {
183            let (name_part, body) = match rest.split_once(')') {
184                Some(pair) => pair,
185                None => continue,
186            };
187            let name = name_part
188                .split_once('(')
189                .map(|(n, _)| n.trim())
190                .unwrap_or("")
191                .to_string();
192            if name.is_empty() {
193                continue;
194            }
195            current_define = Some((name, body.to_string()));
196        } else if let Some((_, body)) = current_define.as_mut() {
197            body.push_str(line);
198        }
199        let line_continues = line.trim_end().ends_with('\\');
200        if !line_continues && let Some((name, body)) = current_define.take() {
201            let slots = extract_paste_slots(&body);
202            if !slots.is_empty() {
203                out.push(ConcatTemplate {
204                    name,
205                    paste_slots: slots,
206                });
207            }
208        }
209    }
210    out
211}
212
213/// Pull `prefix##X##suffix` tokens from a body. Returns `(prefix, suffix)`
214/// pairs assuming the parameter being pasted is the first macro arg
215/// (heuristic — covers `STYLE_DEF(color, Color, ...)` style).
216// cha:ignore cognitive_complexity
217// cha:ignore high_complexity
218fn extract_paste_slots(body: &str) -> Vec<(String, String)> {
219    let mut slots = Vec::new();
220    let bytes = body.as_bytes();
221    let mut i = 0;
222    while i + 2 < bytes.len() {
223        if &bytes[i..i + 2] == b"##" {
224            let mut start = i;
225            while start > 0 && is_ident_byte(bytes[start - 1]) {
226                start -= 1;
227            }
228            let prefix = std::str::from_utf8(&bytes[start..i])
229                .unwrap_or("")
230                .to_string();
231            let mut mid = i + 2;
232            while mid < bytes.len() && is_ident_byte(bytes[mid]) {
233                mid += 1;
234            }
235            let mut suffix = String::new();
236            if mid + 2 <= bytes.len() && &bytes[mid..mid + 2] == b"##" {
237                let mut end = mid + 2;
238                while end < bytes.len() && is_ident_byte(bytes[end]) {
239                    end += 1;
240                }
241                suffix = std::str::from_utf8(&bytes[mid + 2..end])
242                    .unwrap_or("")
243                    .to_string();
244                i = end;
245            } else {
246                i = mid;
247            }
248            if !prefix.is_empty() || !suffix.is_empty() {
249                slots.push((prefix, suffix));
250            }
251        } else {
252            i += 1;
253        }
254    }
255    slots
256}
257
258fn is_ident_byte(b: u8) -> bool {
259    b.is_ascii_alphanumeric() || b == b'_'
260}
261
262/// Return all simple-identifier arguments of every `macro_name(...)` invocation
263/// in `content`. Each invocation produces `Vec<String>` (one per arg position).
264/// Args that aren't bare identifiers (literals, complex expressions) are
265/// dropped — they wouldn't survive `##` paste anyway.
266// cha:ignore high_complexity
267fn find_macro_invocation_args(content: &str, macro_name: &str) -> Vec<Vec<String>> {
268    let mut out = Vec::new();
269    for line in content.lines() {
270        let t = line.trim_start();
271        if !t.starts_with(macro_name) {
272            continue;
273        }
274        let rest = &t[macro_name.len()..];
275        // Word boundary: STYLE_DEF must not match STYLE_DEFINE.
276        if rest
277            .as_bytes()
278            .first()
279            .is_some_and(|b| b.is_ascii_alphanumeric() || *b == b'_')
280        {
281            continue;
282        }
283        if !rest.trim_start().starts_with('(') {
284            continue;
285        }
286        let after_paren_pos = rest.find('(').map(|p| p + 1).unwrap_or(0);
287        let inside_to_eol = &rest[after_paren_pos..];
288        // Stop at the matching `)` if present on this line.
289        let inside = inside_to_eol.split(')').next().unwrap_or(inside_to_eol);
290        let args: Vec<String> = inside
291            .split(',')
292            .map(|s| {
293                s.trim()
294                    .trim_matches(|c: char| !c.is_ascii_alphanumeric() && c != '_')
295                    .to_string()
296            })
297            .filter(|s| !s.is_empty() && s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_'))
298            .collect();
299        if !args.is_empty() {
300            out.push(args);
301        }
302    }
303    out
304}
305
306/// Flag unexported, unreferenced functions as potential dead code.
307fn check_dead_functions(
308    ctx: &AnalysisContext,
309    positions: &Option<IdentifierPositions>,
310    entry_points: &[String],
311    findings: &mut Vec<Finding>,
312) {
313    for f in &ctx.model.functions {
314        if f.is_exported || entry_points.iter().any(|e| e == &f.name) {
315            continue;
316        }
317        if is_referenced(
318            positions,
319            &ctx.file.content,
320            &f.name,
321            f.start_line,
322            f.end_line,
323        ) {
324            continue;
325        }
326        if let Some(p) = ctx.project
327            && p.is_called_externally(&f.name, &ctx.file.path)
328        {
329            continue;
330        }
331        findings.push(make_dead_code_finding(
332            ctx,
333            f.start_line,
334            f.name_col,
335            f.name_end_col,
336            &f.name,
337            "Function",
338        ));
339    }
340}
341
342/// Flag unexported, unreferenced classes as potential dead code.
343fn check_dead_classes(
344    ctx: &AnalysisContext,
345    positions: &Option<IdentifierPositions>,
346    findings: &mut Vec<Finding>,
347) {
348    for c in &ctx.model.classes {
349        if c.is_exported {
350            continue;
351        }
352        if is_referenced(
353            positions,
354            &ctx.file.content,
355            &c.name,
356            c.start_line,
357            c.end_line,
358        ) {
359            continue;
360        }
361        if let Some(p) = ctx.project
362            && p.is_called_externally(&c.name, &ctx.file.path)
363        {
364            continue;
365        }
366        findings.push(make_dead_code_finding(
367            ctx,
368            c.start_line,
369            c.name_col,
370            c.name_end_col,
371            &c.name,
372            "Class",
373        ));
374    }
375}
376
377/// Build a dead code finding for a given symbol.
378fn make_dead_code_finding(
379    ctx: &AnalysisContext,
380    start_line: usize,
381    name_col: usize,
382    name_end_col: usize,
383    name: &str,
384    kind: &str,
385) -> Finding {
386    Finding {
387        smell_name: "dead_code".into(),
388        category: SmellCategory::Dispensables,
389        severity: Severity::Hint,
390        location: Location {
391            path: ctx.file.path.clone(),
392            start_line,
393            start_col: name_col,
394            end_line: start_line,
395            end_col: name_end_col,
396            name: Some(name.to_string()),
397        },
398        message: format!("{} `{}` is not exported and may be unused", kind, name),
399        suggested_refactorings: vec!["Remove dead code".into()],
400        ..Default::default()
401    }
402}
403
404/// Use the AST-derived identifier positions when available; fall back to
405/// substring scan when no tree was attached (legacy unit-test path).
406fn is_referenced(
407    positions: &Option<IdentifierPositions>,
408    content: &str,
409    name: &str,
410    def_start: usize,
411    def_end: usize,
412) -> bool {
413    match positions {
414        Some(idx) => idx.referenced(name, def_start, def_end),
415        None => is_in_file_referenced_legacy(content, name, def_start, def_end),
416    }
417}
418
419/// Pre-AST fallback: substring scan over each line, skipping definition lines.
420/// Kept only for the case where ctx.tree is None (e.g. unit tests that build
421/// SourceModel by hand).
422fn is_in_file_referenced_legacy(
423    content: &str,
424    name: &str,
425    def_start: usize,
426    def_end: usize,
427) -> bool {
428    for (i, line) in content.lines().enumerate() {
429        let line_num = i + 1;
430        if line_num >= def_start && line_num <= def_end {
431            continue;
432        }
433        if line.contains(name) {
434            return true;
435        }
436    }
437    false
438}
439
440#[cfg(test)]
441mod tests {
442    use super::*;
443
444    #[test]
445    fn extracts_simple_paste_slots() {
446        // `_handle##name##Attr` → prefix `_handle`, suffix `Attr`
447        let slots = extract_paste_slots(" _handle##name##Attr ");
448        assert_eq!(slots, vec![("_handle".to_string(), "Attr".to_string())]);
449    }
450
451    #[test]
452    fn extracts_paste_with_only_prefix() {
453        // `foo##name` (no trailing paste) → prefix `foo`, suffix ``
454        let slots = extract_paste_slots(" foo##name ");
455        assert_eq!(slots, vec![("foo".to_string(), "".to_string())]);
456    }
457
458    #[test]
459    fn finds_all_macro_args() {
460        let src = "STYLE_DEF(color, Color, X);\nSTYLE_DEF(fill, Fill, Y);";
461        let args = find_macro_invocation_args(src, "STYLE_DEF");
462        assert_eq!(args.len(), 2);
463        assert_eq!(args[0], vec!["color", "Color", "X"]);
464        assert_eq!(args[1], vec!["fill", "Fill", "Y"]);
465    }
466
467    #[test]
468    fn token_concat_recovers_synthetic_targets() {
469        let src = "\
470#define STYLE_DEF(short, Long) _handle##Long##Attr
471STYLE_DEF(color, Color)
472STYLE_DEF(fill, Fill)
473";
474        // Try every arg per invocation — `Color` produces the real name.
475        let targets = collect_token_concat_targets(src);
476        assert!(targets.contains("_handleColorAttr"));
477        assert!(targets.contains("_handleFillAttr"));
478    }
479}