Skip to main content

mollify_core/
deadcode.rs

1//! Dead-code engine: reachability-based unused files and unused top-level
2//! symbols, with confidence tiers (RESEARCH.md §4 / PLAN.md §4).
3
4use crate::fingerprint::fingerprint;
5use mollify_graph::ModuleGraph;
6use mollify_parse::DefKind;
7use mollify_types::{Action, Category, Confidence, Finding, Location, Severity};
8use rustc_hash::FxHashMap;
9
10/// Run dead-code analysis over the graph.
11pub fn analyze(graph: &ModuleGraph) -> Vec<Finding> {
12    let mut findings = Vec::new();
13    unused_files(graph, &mut findings);
14    unused_symbols(graph, &mut findings);
15    unused_imports(graph, &mut findings);
16    unused_locals(graph, &mut findings);
17    unreachable_code(graph, &mut findings);
18    duplicate_exports(graph, &mut findings);
19    findings
20}
21
22/// Flag a re-export surface (`__init__.py`) that binds the **same name** from
23/// two different modules — the later import silently shadows the earlier, so one
24/// re-export is dead and the public API is ambiguous (fallow's "duplicate
25/// export"). Confidence `likely`; skipped under a dynamic sink.
26fn duplicate_exports(graph: &ModuleGraph, out: &mut Vec<Finding>) {
27    for m in &graph.modules {
28        if m.path.file_name() != Some("__init__.py") || m.parsed.has_dynamic_sink {
29            continue;
30        }
31        // binding name -> (first source module, first line)
32        let mut first: FxHashMap<&str, (&str, u32)> = FxHashMap::default();
33        for imp in &m.parsed.imports {
34            if imp.is_star {
35                continue;
36            }
37            for b in &imp.bindings {
38                match first.get(b.as_str()) {
39                    None => {
40                        first.insert(b.as_str(), (imp.module.as_str(), imp.line));
41                    }
42                    Some(&(src, _)) if src == imp.module => {} // same source: not a conflict
43                    Some(_) => {
44                        let rule = "duplicate-export";
45                        out.push(Finding {
46                            fingerprint: fingerprint(
47                                rule,
48                                &[m.path.as_str(), b, &imp.line.to_string()],
49                            ),
50                            rule: rule.into(),
51                            category: Category::Architecture,
52                            severity: Severity::Warn,
53                            confidence: Confidence::Likely,
54                            attribution: None,
55                            reason: format!(
56                                "`{b}` is re-exported from multiple modules here; the later import shadows the earlier"
57                            ),
58                            location: Location {
59                                path: m.path.clone(),
60                                line: imp.line,
61                                column: 0,
62                                end_line: None,
63                            },
64                            actions: vec![Action {
65                                kind: "dedupe-export".into(),
66                                description: format!(
67                                    "Keep a single source for `{b}` in this package's public API"
68                                ),
69                                auto_fixable: false,
70                                suppression_comment: Some(format!("# mollify: ignore[{rule}]")),
71                            }],
72                        });
73                    }
74                }
75            }
76        }
77    }
78}
79
80/// Flag statements that can never execute because they follow an unconditional
81/// terminator (`return`/`raise`/`break`/`continue`/`sys.exit()`) in the same
82/// block (ruff F-series / vulture parity). Syntactic and exact → `certain`, but
83/// never auto-fixed (the dead statement may document intent).
84fn unreachable_code(graph: &ModuleGraph, out: &mut Vec<Finding>) {
85    for m in &graph.modules {
86        for u in &m.parsed.unreachable {
87            let rule = "unreachable-code";
88            out.push(Finding {
89                fingerprint: fingerprint(rule, &[m.path.as_str(), &u.line.to_string()]),
90                rule: rule.into(),
91                category: Category::DeadCode,
92                severity: Severity::Warn,
93                confidence: Confidence::Certain,
94                attribution: None,
95                reason: format!("code after `{}` can never execute", u.after),
96                location: Location {
97                    path: m.path.clone(),
98                    line: u.line,
99                    column: 0,
100                    end_line: None,
101                },
102                actions: vec![Action {
103                    kind: "remove-unreachable".into(),
104                    description: format!("Remove the unreachable code after `{}`", u.after),
105                    auto_fixable: false,
106                    suppression_comment: Some(format!("# mollify: ignore[{rule}]")),
107                }],
108            });
109        }
110    }
111}
112
113/// Flag unused local variables (`unused-variable`, ruff F841) and parameters
114/// (`unused-parameter`) from the parser's per-function scope analysis. Never
115/// auto-fixable: the assignment's right-hand side may have side effects.
116fn unused_locals(graph: &ModuleGraph, out: &mut Vec<Finding>) {
117    for m in &graph.modules {
118        for s in &m.parsed.scope_findings {
119            let (rule, kind, confidence) = if s.is_param {
120                ("unused-parameter", "parameter", Confidence::Uncertain)
121            } else {
122                ("unused-variable", "local variable", Confidence::Likely)
123            };
124            out.push(Finding {
125                fingerprint: fingerprint(rule, &[m.path.as_str(), &s.name, &s.line.to_string()]),
126                rule: rule.into(),
127                category: Category::DeadCode,
128                severity: Severity::Warn,
129                confidence,
130                attribution: None,
131                reason: format!("{kind} `{}` is assigned but never used", s.name),
132                location: Location {
133                    path: m.path.clone(),
134                    line: s.line,
135                    column: 0,
136                    end_line: None,
137                },
138                actions: vec![Action {
139                    kind: "remove-binding".into(),
140                    description: format!(
141                        "Remove the unused {kind} `{}` (or prefix it with `_`)",
142                        s.name
143                    ),
144                    auto_fixable: false,
145                    suppression_comment: Some(format!("# mollify: ignore[{rule}]")),
146                }],
147            });
148        }
149    }
150}
151
152/// Flag unused imports. A *whole-statement*-unused import (every binding unused)
153/// is `certain` + auto-fixable (the line can be deleted). A *partially*-unused
154/// `from x import a, b` (some names used) reports each unused name as `likely`
155/// (not auto-fixed — rewriting the line precisely is left to the human). Skips
156/// `import *`, `__init__.py` re-exports (downgraded), and dynamic-sink modules.
157fn unused_imports(graph: &ModuleGraph, out: &mut Vec<Finding>) {
158    use rustc_hash::FxHashSet;
159    for m in &graph.modules {
160        let local: FxHashSet<&str> = m.parsed.local_uses.iter().map(|s| s.as_str()).collect();
161        let dunder_all: Option<&Vec<String>> = m.parsed.dunder_all.as_ref();
162        let is_init = m.path.file_name().is_some_and(|f| f == "__init__.py");
163        for imp in &m.parsed.imports {
164            if imp.is_star || imp.bindings.is_empty() || imp.type_checking_only {
165                continue; // star imports / unparsed bindings / type-only: skip
166            }
167            let is_used = |b: &String| {
168                local.contains(b.as_str()) || dunder_all.is_some_and(|all| all.contains(b))
169            };
170            let unused: Vec<&String> = imp.bindings.iter().filter(|b| !is_used(b)).collect();
171            if unused.is_empty() {
172                continue;
173            }
174            let whole = unused.len() == imp.bindings.len();
175            let rule = "unused-import";
176            if whole {
177                // Entire statement unused → safe to delete the line.
178                let what = format!("`{}`", imp.bindings.join("`, `"));
179                let confidence = if is_init || m.parsed.has_dynamic_sink {
180                    Confidence::Uncertain
181                } else {
182                    Confidence::Certain
183                };
184                out.push(Finding {
185                    fingerprint: fingerprint(
186                        rule,
187                        &[
188                            m.path.as_str(),
189                            &imp.line.to_string(),
190                            &imp.bindings.join(","),
191                        ],
192                    ),
193                    rule: rule.into(),
194                    category: Category::DeadCode,
195                    severity: Severity::Warn,
196                    confidence,
197                    attribution: None,
198                    reason: format!("import {what} is never used in this module"),
199                    location: Location {
200                        path: m.path.clone(),
201                        line: imp.line,
202                        column: 0,
203                        end_line: None,
204                    },
205                    actions: vec![Action {
206                        kind: "remove-import".into(),
207                        description: format!("Remove the unused import {what}"),
208                        auto_fixable: confidence == Confidence::Certain,
209                        suppression_comment: Some(format!("# mollify: ignore[{rule}]")),
210                    }],
211                });
212            } else {
213                // Some names still used: report each unused name (not auto-fixed,
214                // since rewriting a shared import line precisely is risky).
215                for name in unused {
216                    out.push(Finding {
217                        fingerprint: fingerprint(
218                            rule,
219                            &[m.path.as_str(), &imp.line.to_string(), name],
220                        ),
221                        rule: rule.into(),
222                        category: Category::DeadCode,
223                        severity: Severity::Warn,
224                        confidence: Confidence::Likely,
225                        attribution: None,
226                        reason: format!(
227                            "imported name `{name}` is never used (other names on this import are)"
228                        ),
229                        location: Location {
230                            path: m.path.clone(),
231                            line: imp.line,
232                            column: 0,
233                            end_line: None,
234                        },
235                        actions: vec![Action {
236                            kind: "remove-import-name".into(),
237                            description: format!("Remove `{name}` from the import"),
238                            auto_fixable: false,
239                            suppression_comment: Some(format!("# mollify: ignore[{rule}]")),
240                        }],
241                    });
242                }
243            }
244        }
245    }
246}
247
248fn unused_files(graph: &ModuleGraph, out: &mut Vec<Finding>) {
249    for m in graph.unused_files() {
250        // A file that cannot be reached is a strong signal, but dynamic imports
251        // anywhere in the project mean we can't be certain it is never loaded.
252        let confidence = if graph.global_dynamic {
253            Confidence::Uncertain
254        } else {
255            Confidence::Likely
256        };
257        out.push(Finding {
258            fingerprint: fingerprint("unused-file", &[m.path.as_str()]),
259            rule: "unused-file".into(),
260            category: Category::DeadCode,
261            severity: Severity::Warn,
262            confidence,
263            attribution: None,
264            reason: format!(
265                "module `{}` is never imported and is not an entry point",
266                m.dotted
267            ),
268            location: Location {
269                path: m.path.clone(),
270                line: 1,
271                column: 0,
272                end_line: None,
273            },
274            actions: vec![Action {
275                kind: "remove-file".into(),
276                description: format!("Delete unused module `{}`", m.path),
277                auto_fixable: false, // file deletion is never auto-applied
278                suppression_comment: Some("# mollify: ignore[unused-file]".into()),
279            }],
280        });
281    }
282}
283
284fn unused_symbols(graph: &ModuleGraph, out: &mut Vec<Finding>) {
285    for m in &graph.modules {
286        // Count how many top-level defs share each name (to discount def sites).
287        let mut def_counts: FxHashMap<&str, u32> = FxHashMap::default();
288        for d in &m.parsed.definitions {
289            *def_counts.entry(d.name.as_str()).or_insert(0) += 1;
290        }
291        let dunder_all: Option<&Vec<String>> = m.parsed.dunder_all.as_ref();
292
293        for d in &m.parsed.definitions {
294            // Skip dunder/special names and explicit public API (`__all__`).
295            if d.name.starts_with("__") && d.name.ends_with("__") {
296                continue;
297            }
298            if let Some(all) = dunder_all {
299                if all.contains(&d.name) {
300                    continue; // declared public API — treat as used
301                }
302            }
303            // Framework-registered symbols (routes, tasks, fixtures, CLI
304            // commands, signal receivers, validators, …) are reached even with
305            // zero in-repo callers — the dominant false-positive killer.
306            if crate::plugins::is_framework_entry(d) {
307                continue;
308            }
309            let defs_named = def_counts.get(d.name.as_str()).copied().unwrap_or(1);
310            if graph.symbol_used(m.id, &d.name, defs_named) {
311                continue;
312            }
313
314            // Confidence tiering.
315            let confidence = if m.parsed.has_dynamic_sink {
316                Confidence::Uncertain
317            } else if d.private_by_convention {
318                Confidence::Certain
319            } else {
320                Confidence::Likely
321            };
322
323            let kind_str = match d.kind {
324                DefKind::Function => "function",
325                DefKind::Class => "class",
326                DefKind::Variable => "variable",
327            };
328            let rule = "unused-export";
329            out.push(Finding {
330                fingerprint: fingerprint(rule, &[m.path.as_str(), &d.name]),
331                rule: rule.into(),
332                category: Category::DeadCode,
333                severity: Severity::Warn,
334                confidence,
335                attribution: None,
336                reason: format!(
337                    "{kind_str} `{}` has no reachable references in the project",
338                    d.name
339                ),
340                location: Location {
341                    path: m.path.clone(),
342                    line: d.line,
343                    column: 0,
344                    end_line: Some(d.end_line),
345                },
346                actions: vec![Action {
347                    kind: "remove-symbol".into(),
348                    description: format!("Delete unused {kind_str} `{}`", d.name),
349                    // Only Certain findings are ever auto-fixable.
350                    auto_fixable: confidence == Confidence::Certain,
351                    suppression_comment: Some(format!("# mollify: ignore[{rule}]")),
352                }],
353            });
354        }
355    }
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361    use camino::{Utf8Path, Utf8PathBuf};
362    use mollify_graph::discover_python_files;
363
364    fn write(dir: &Utf8Path, rel: &str, src: &str) {
365        let p = dir.join(rel);
366        std::fs::create_dir_all(p.parent().unwrap()).unwrap();
367        std::fs::write(p, src).unwrap();
368    }
369
370    fn temp(tag: &str) -> Utf8PathBuf {
371        let base =
372            std::env::temp_dir().join(format!("mollify-core-dc-{}-{tag}", std::process::id()));
373        let _ = std::fs::remove_dir_all(&base);
374        Utf8PathBuf::from_path_buf(base).unwrap()
375    }
376
377    #[test]
378    fn flags_unused_public_function_as_likely() {
379        let d = temp("pub");
380        write(&d, "__main__.py", "from lib import used\nused()\n");
381        write(
382            &d,
383            "lib.py",
384            "def used():\n    return 1\n\ndef dead():\n    return 2\n",
385        );
386        let files = discover_python_files(&d);
387        let g = ModuleGraph::build(&d, &files);
388        let f = analyze(&g);
389        let dead: Vec<_> = f.iter().filter(|x| x.rule == "unused-export").collect();
390        assert_eq!(dead.len(), 1, "got {f:?}");
391        assert!(dead[0].reason.contains("dead"));
392        assert_eq!(dead[0].confidence, Confidence::Likely);
393        assert!(!dead[0].actions[0].auto_fixable);
394        std::fs::remove_dir_all(&d).ok();
395    }
396
397    #[test]
398    fn private_unused_is_certain_and_autofixable() {
399        let d = temp("priv");
400        write(&d, "__main__.py", "print('hi')\n");
401        write(&d, "lib.py", "def _dead():\n    return 2\n");
402        let files = discover_python_files(&d);
403        let g = ModuleGraph::build(&d, &files);
404        let f = analyze(&g);
405        let s = f.iter().find(|x| x.rule == "unused-export").unwrap();
406        assert_eq!(s.confidence, Confidence::Certain);
407        assert!(s.actions[0].auto_fixable);
408        std::fs::remove_dir_all(&d).ok();
409    }
410
411    #[test]
412    fn framework_decorator_suppresses_unused() {
413        let d = temp("fw");
414        write(
415            &d,
416            "__main__.py",
417            "import app
418",
419        );
420        write(
421            &d,
422            "app.py",
423            "import app
424
425@app.route('/x')
426def view():
427    return 1
428",
429        );
430        let files = discover_python_files(&d);
431        let g = ModuleGraph::build(&d, &files);
432        let f = analyze(&g);
433        assert!(
434            !f.iter().any(|x| x.reason.contains("`view`")),
435            "route should be reached, got {f:?}"
436        );
437        std::fs::remove_dir_all(&d).ok();
438    }
439
440    #[test]
441    fn flags_unused_import_and_respects_usage_and_aliases() {
442        let d = temp("imp");
443        write(&d, "__main__.py", "print('hi')\n");
444        write(
445            &d,
446            "lib.py",
447            "import os\nimport sys\nfrom typing import List\nfrom typing import Dict\n\ndef f(x: List):\n    return sys.argv\n",
448        );
449        let files = discover_python_files(&d);
450        let g = ModuleGraph::build(&d, &files);
451        let f = analyze(&g);
452        let imps: Vec<_> = f.iter().filter(|x| x.rule == "unused-import").collect();
453        // `os` and `Dict` are unused; `sys` and `List` are used. (Partial-line
454        // unused names are intentionally not flagged — only whole statements.)
455        assert!(
456            imps.iter().any(|x| x.reason.contains("`os`")),
457            "got {imps:?}"
458        );
459        assert!(
460            imps.iter().any(|x| x.reason.contains("`Dict`")),
461            "got {imps:?}"
462        );
463        assert!(!imps.iter().any(|x| x.reason.contains("`sys`")));
464        assert!(!imps.iter().any(|x| x.reason.contains("`List`")));
465        // Regular-module unused imports are certain + auto-fixable.
466        assert!(
467            imps.iter()
468                .find(|x| x.reason.contains("`os`"))
469                .unwrap()
470                .actions[0]
471                .auto_fixable
472        );
473        std::fs::remove_dir_all(&d).ok();
474    }
475
476    #[test]
477    fn flags_unused_local_and_param_but_not_used_ones() {
478        let d = temp("scope");
479        write(&d, "__main__.py", "import lib\nlib.f(1, 2)\n");
480        write(
481            &d,
482            "lib.py",
483            "def f(used_p, dead_p):\n    dead_local = compute()\n    kept = used_p + 1\n    return kept\n",
484        );
485        let files = discover_python_files(&d);
486        let g = ModuleGraph::build(&d, &files);
487        let f = analyze(&g);
488        assert!(
489            f.iter()
490                .any(|x| x.rule == "unused-variable" && x.reason.contains("dead_local")),
491            "got {f:?}"
492        );
493        assert!(
494            f.iter()
495                .any(|x| x.rule == "unused-parameter" && x.reason.contains("dead_p")),
496            "got {f:?}"
497        );
498        assert!(!f.iter().any(|x| x.reason.contains("`kept`")));
499        assert!(!f.iter().any(|x| x.reason.contains("used_p")));
500        std::fs::remove_dir_all(&d).ok();
501    }
502
503    #[test]
504    fn comma_import_unused_names_get_distinct_fingerprints() {
505        let d = temp("commaimp");
506        write(&d, "__main__.py", "print('hi')\n");
507        write(&d, "lib.py", "import os, sys\n");
508        let files = discover_python_files(&d);
509        let g = ModuleGraph::build(&d, &files);
510        let f = analyze(&g);
511        let imps: Vec<_> = f.iter().filter(|x| x.rule == "unused-import").collect();
512        assert_eq!(
513            imps.len(),
514            2,
515            "expected one finding per unused name, got {imps:?}"
516        );
517        assert_ne!(
518            imps[0].fingerprint, imps[1].fingerprint,
519            "fingerprints must be unique per finding: {imps:?}"
520        );
521        std::fs::remove_dir_all(&d).ok();
522    }
523
524    #[test]
525    fn type_checking_and_string_annotation_imports_not_flagged() {
526        let d = temp("tc");
527        write(&d, "__main__.py", "import lib\nlib.f(None)\n");
528        write(
529            &d,
530            "lib.py",
531            "from typing import TYPE_CHECKING\nif TYPE_CHECKING:\n    from collections import OrderedDict\n\ndef f(x: \"OrderedDict\"):\n    return x\n",
532        );
533        let files = discover_python_files(&d);
534        let g = ModuleGraph::build(&d, &files);
535        let f = analyze(&g);
536        assert!(
537            !f.iter().any(|x| x.rule == "unused-import"),
538            "TYPE_CHECKING + string-annotation import wrongly flagged: {f:?}"
539        );
540        std::fs::remove_dir_all(&d).ok();
541    }
542
543    #[test]
544    fn flags_partial_unused_import_name() {
545        let d = temp("partial");
546        write(&d, "__main__.py", "import lib\nlib.f()\n");
547        write(
548            &d,
549            "lib.py",
550            "from typing import List, Dict\n\ndef f() -> List:\n    return []\n",
551        );
552        let files = discover_python_files(&d);
553        let g = ModuleGraph::build(&d, &files);
554        let f = analyze(&g);
555        // Dict unused (List used) → partial report, not auto-fixable.
556        let dict = f
557            .iter()
558            .find(|x| x.rule == "unused-import" && x.reason.contains("`Dict`"));
559        assert!(dict.is_some(), "got {f:?}");
560        assert!(!dict.unwrap().actions[0].auto_fixable);
561        assert!(!f.iter().any(|x| x.reason.contains("`List`")));
562        std::fs::remove_dir_all(&d).ok();
563    }
564
565    #[test]
566    fn init_unused_import_is_uncertain_reexport() {
567        let d = temp("impinit");
568        write(&d, "__init__.py", "from .sub import thing\n");
569        write(&d, "sub.py", "thing = 1\n");
570        let files = discover_python_files(&d);
571        let g = ModuleGraph::build(&d, &files);
572        let f = analyze(&g);
573        let imp = f.iter().find(|x| x.rule == "unused-import");
574        // Present, but never auto-fixed (re-export idiom).
575        if let Some(imp) = imp {
576            assert_eq!(imp.confidence, Confidence::Uncertain);
577            assert!(!imp.actions[0].auto_fixable);
578        }
579        std::fs::remove_dir_all(&d).ok();
580    }
581
582    #[test]
583    fn flags_unreachable_code_after_return() {
584        let d = temp("unreach");
585        write(&d, "__main__.py", "import lib\nlib.f()\n");
586        write(
587            &d,
588            "lib.py",
589            "def f():\n    return 1\n    print('never')\n\ndef g(x):\n    if x:\n        raise ValueError\n        cleanup()\n    return x\n",
590        );
591        let files = discover_python_files(&d);
592        let g = ModuleGraph::build(&d, &files);
593        let f = analyze(&g);
594        let ur: Vec<_> = f.iter().filter(|x| x.rule == "unreachable-code").collect();
595        // `print` after `return` is line 3; `cleanup()` after `raise` is line 8.
596        assert_eq!(ur.len(), 2, "got {ur:?}");
597        assert!(ur
598            .iter()
599            .any(|x| x.reason.contains("return") && x.location.line == 3));
600        assert!(ur
601            .iter()
602            .any(|x| x.reason.contains("raise") && x.location.line == 8));
603        assert!(ur.iter().all(|x| x.confidence == Confidence::Certain));
604        std::fs::remove_dir_all(&d).ok();
605    }
606
607    #[test]
608    fn flags_duplicate_reexport_in_init() {
609        let d = temp("dupexport");
610        write(
611            &d,
612            "pkg/__init__.py",
613            "from .a import Thing\nfrom .b import Thing\nfrom .a import Other\n",
614        );
615        write(&d, "pkg/a.py", "class Thing:\n    pass\n\nOther = 1\n");
616        write(&d, "pkg/b.py", "class Thing:\n    pass\n");
617        let files = discover_python_files(&d);
618        let g = ModuleGraph::build(&d, &files);
619        let f = analyze(&g);
620        let dup: Vec<_> = f.iter().filter(|x| x.rule == "duplicate-export").collect();
621        // `Thing` is re-exported from .a and .b → one duplicate at line 2.
622        assert_eq!(dup.len(), 1, "got {dup:?}");
623        assert!(dup[0].reason.contains("Thing") && dup[0].location.line == 2);
624        std::fs::remove_dir_all(&d).ok();
625    }
626
627    #[test]
628    fn dunder_all_suppresses() {
629        let d = temp("all");
630        write(
631            &d,
632            "__init__.py",
633            "__all__ = ['api']\ndef api():\n    return 1\n",
634        );
635        let files = discover_python_files(&d);
636        let g = ModuleGraph::build(&d, &files);
637        let f = analyze(&g);
638        assert!(!f.iter().any(|x| x.reason.contains("`api`")));
639        std::fs::remove_dir_all(&d).ok();
640    }
641}