Skip to main content

codelens_engine/
phantom_modules.rs

1//! Detects "phantom" module declarations — `mod NAME;` lines whose target
2//! is never `use`d anywhere else in the workspace. Complements the
3//! `find_dead_code_v2` file-level pass: that one flags files with no
4//! importers in the import graph, this one catches the prerequisite step
5//! (a `mod` line that should never have been written or that survives a
6//! deletion cascade).
7//!
8//! Heuristic, not authoritative — `pub mod` declarations are still
9//! reported because re-export patterns (`pub use foo::*`) can keep them
10//! useful, but a private `mod foo;` with no `use` references on the
11//! parent symbol path is almost always cleanup-eligible.
12
13use crate::project::{collect_files, ProjectRoot};
14use anyhow::Result;
15use regex::Regex;
16use serde::Serialize;
17use std::collections::HashSet;
18use std::path::Path;
19use std::sync::LazyLock;
20
21/// Matches `[pub(...)] mod NAME;` (declaration form, not `mod NAME { ... }`).
22static MOD_DECL_RE: LazyLock<Regex> = LazyLock::new(|| {
23    Regex::new(r"(?m)^\s*(?P<vis>pub(?:\([^)]*\))?\s+)?mod\s+(?P<name>[A-Za-z_][A-Za-z0-9_]*)\s*;")
24        .unwrap()
25});
26
27#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
28pub struct PhantomModuleEntry {
29    pub parent_file: String,
30    pub module_name: String,
31    pub line: usize,
32    pub visibility: &'static str,
33    pub kind: &'static str,
34}
35
36/// Finds Rust `mod NAME;` declarations whose `NAME` does not appear as a
37/// path segment anywhere else in the workspace.
38///
39/// Match strategy (v1, regex-only):
40/// 1. Collect every `mod NAME;` declaration with its parent file and line.
41/// 2. Build a set of *referenced* module names by scanning all Rust source
42///    for tokens that look like `NAME::`, `::NAME;`, or `::NAME::`.
43/// 3. Any declared `NAME` not in the set is a phantom.
44///
45/// Tradeoffs:
46/// - Reports `pub mod` too — re-export patterns may keep them useful;
47///   visibility is reported so callers can filter.
48/// - Does not understand path aliases (`use foo as bar;`); we still catch
49///   the original name on either side of the alias.
50pub fn find_phantom_modules(
51    project: &ProjectRoot,
52    max_results: usize,
53) -> Result<Vec<PhantomModuleEntry>> {
54    let mut declarations: Vec<PhantomModuleEntry> = Vec::new();
55    let mut referenced: HashSet<String> = HashSet::new();
56    let candidates = collect_files(project.as_path(), is_rust_file)?;
57
58    for path in &candidates {
59        let source = match std::fs::read_to_string(path) {
60            Ok(s) => s,
61            Err(_) => continue,
62        };
63        let relative = project.to_relative(path);
64        if is_excluded_path(&relative) {
65            continue;
66        }
67        scan_declarations(&source, &relative, &mut declarations);
68        collect_referenced_names(&source, &mut referenced);
69    }
70
71    let mut phantoms: Vec<PhantomModuleEntry> = declarations
72        .into_iter()
73        .filter(|d| !referenced.contains(&d.module_name))
74        .filter(|d| !is_test_module_name(&d.module_name))
75        .collect();
76
77    phantoms.sort_by(|a, b| {
78        a.parent_file
79            .cmp(&b.parent_file)
80            .then(a.line.cmp(&b.line))
81            .then(a.module_name.cmp(&b.module_name))
82    });
83    if max_results > 0 && phantoms.len() > max_results {
84        phantoms.truncate(max_results);
85    }
86    Ok(phantoms)
87}
88
89fn scan_declarations(source: &str, file: &str, out: &mut Vec<PhantomModuleEntry>) {
90    for caps in MOD_DECL_RE.captures_iter(source) {
91        let name = match caps.name("name") {
92            Some(m) => m.as_str().to_owned(),
93            None => continue,
94        };
95        let mod_start = caps.get(0).map(|m| m.start()).unwrap_or(0);
96        // Codex P2 (PR #151): skip mod declarations that are gated by
97        // `#[cfg(test)]` (or `#[cfg(any(test, ...))]`). Test-only mods are
98        // already excluded from production semantics by the compiler; they
99        // do not need a workspace path-reference to justify their existence,
100        // and reporting them just adds noise.
101        if line_before_is_cfg_test(source, mod_start) {
102            continue;
103        }
104        let visibility = if caps.name("vis").is_some() {
105            "public"
106        } else {
107            "private"
108        };
109        let line = source[..mod_start].matches('\n').count() + 1;
110        out.push(PhantomModuleEntry {
111            parent_file: file.to_owned(),
112            module_name: name,
113            line,
114            visibility,
115            kind: "rust_mod_declaration",
116        });
117    }
118}
119
120/// Returns true when the line immediately above `offset` is a positive
121/// `#[cfg(test)]`-style attribute (i.e. test-only). Walks one line back,
122/// skipping blank lines but not other attributes.
123///
124/// Codex P2 (PR #154): the previous predicate matched any cfg attribute
125/// containing the substring `test`, which incorrectly skipped
126/// `#[cfg(not(test))] mod live;` (production-only). Now: explicit
127/// rejection of `not(test)` patterns.
128fn line_before_is_cfg_test(source: &str, offset: usize) -> bool {
129    let line_start = source[..offset]
130        .rfind('\n')
131        .map(|i| i + 1)
132        .unwrap_or(offset);
133    if line_start == 0 {
134        return false;
135    }
136    let mut prev_end = line_start - 1;
137    loop {
138        let prev_start = source[..prev_end].rfind('\n').map(|i| i + 1).unwrap_or(0);
139        let prev_line = source[prev_start..prev_end].trim();
140        if !prev_line.is_empty() {
141            return is_positive_cfg_test_attribute(prev_line);
142        }
143        if prev_start == 0 {
144            return false;
145        }
146        prev_end = prev_start - 1;
147    }
148}
149
150fn is_positive_cfg_test_attribute(line: &str) -> bool {
151    if !line.starts_with("#[cfg") {
152        return false;
153    }
154    // Reject negation forms: `#[cfg(not(test))]`, `#[cfg(all(not(test), ...))]`,
155    // and `#[cfg(any(not(test), ...))]`. These gate code INTO production,
156    // not out of it.
157    if line.contains("not(test)") {
158        return false;
159    }
160    line.contains("test")
161}
162
163/// Adds every identifier that participates in any `::`-adjacent position
164/// into the referenced set, plus single-segment `use NAME;` lines (codex
165/// P2 from PR #151). Three regexes:
166///   - `IDENT::` matches leading and middle segments (`crate::foo::bar`
167///     → `crate`, `foo`).
168///   - `::IDENT` matches trailing segments (`crate::foo::bar` → `bar`).
169///   - `use NAME(\s+as\s+ALIAS)?\s*;` matches single-segment imports of a
170///     sibling module (`use ghost;`) so that re-exporting modules don't
171///     show up as phantom.
172fn collect_referenced_names(source: &str, into: &mut HashSet<String>) {
173    static LEADING_RE: LazyLock<Regex> =
174        LazyLock::new(|| Regex::new(r"([A-Za-z_][A-Za-z0-9_]*)::").unwrap());
175    static TRAILING_RE: LazyLock<Regex> =
176        LazyLock::new(|| Regex::new(r"::([A-Za-z_][A-Za-z0-9_]*)").unwrap());
177    static SINGLE_USE_RE: LazyLock<Regex> = LazyLock::new(|| {
178        Regex::new(
179            r"(?m)^\s*(?:pub(?:\([^)]*\))?\s+)?use\s+([A-Za-z_][A-Za-z0-9_]*)(?:\s+as\s+[A-Za-z_][A-Za-z0-9_]*)?\s*;",
180        )
181        .unwrap()
182    });
183    for caps in LEADING_RE.captures_iter(source) {
184        if let Some(m) = caps.get(1) {
185            into.insert(m.as_str().to_owned());
186        }
187    }
188    for caps in TRAILING_RE.captures_iter(source) {
189        if let Some(m) = caps.get(1) {
190            into.insert(m.as_str().to_owned());
191        }
192    }
193    for caps in SINGLE_USE_RE.captures_iter(source) {
194        if let Some(m) = caps.get(1) {
195            into.insert(m.as_str().to_owned());
196        }
197    }
198}
199
200fn is_rust_file(path: &Path) -> bool {
201    path.extension().and_then(|s| s.to_str()) == Some("rs")
202}
203
204fn is_excluded_path(relative: &str) -> bool {
205    if relative == "crates/codelens-engine/src/phantom_modules.rs" {
206        return true;
207    }
208    let lower = relative.to_ascii_lowercase();
209    if lower.ends_with("_tests.rs") || lower.ends_with("_test.rs") {
210        return true;
211    }
212    lower.split('/').any(|seg| {
213        matches!(
214            seg,
215            "tests"
216                | "test"
217                | "bench"
218                | "benches"
219                | "examples"
220                | "fixtures"
221                | "integration_tests"
222                | "http_tests"
223        )
224    })
225}
226
227fn is_test_module_name(name: &str) -> bool {
228    name.ends_with("_tests") || name.ends_with("_test") || name == "tests" || name == "test"
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234
235    #[test]
236    fn detects_unreferenced_private_mod() {
237        let mut decls = Vec::new();
238        scan_declarations("mod ghost;\nmod live;\n", "lib.rs", &mut decls);
239        assert_eq!(decls.len(), 2);
240        assert_eq!(decls[0].module_name, "ghost");
241        assert_eq!(decls[0].visibility, "private");
242        assert_eq!(decls[1].module_name, "live");
243    }
244
245    #[test]
246    fn detects_pub_mod_as_public() {
247        let mut decls = Vec::new();
248        scan_declarations("pub mod api;\n", "lib.rs", &mut decls);
249        assert_eq!(decls.len(), 1);
250        assert_eq!(decls[0].visibility, "public");
251    }
252
253    #[test]
254    fn skips_inline_mod_blocks() {
255        let mut decls = Vec::new();
256        scan_declarations("mod inline { fn x() {} }\n", "lib.rs", &mut decls);
257        // inline `mod NAME { ... }` should NOT match (no trailing `;`)
258        assert!(decls.is_empty(), "got: {:?}", decls);
259    }
260
261    #[test]
262    fn cfg_not_test_is_not_treated_as_cfg_test() {
263        // Codex P2 (PR #154): #[cfg(not(test))] is production-only — must NOT
264        // be skipped by the cfg-test filter.
265        let mut decls = Vec::new();
266        scan_declarations(
267            "#[cfg(not(test))]\nmod live;\n#[cfg(any(not(test), feature = \"x\"))]\nmod live2;\n",
268            "lib.rs",
269            &mut decls,
270        );
271        assert_eq!(decls.len(), 2, "got: {:?}", decls);
272        assert_eq!(decls[0].module_name, "live");
273        assert_eq!(decls[1].module_name, "live2");
274    }
275
276    #[test]
277    fn skips_cfg_test_gated_mod() {
278        // Codex P2 (PR #151): `#[cfg(test)] mod tests;` and the `any(test, ...)`
279        // form must not be reported as phantom — the compiler already gates
280        // them out of production semantics.
281        let mut decls = Vec::new();
282        scan_declarations(
283            "#[cfg(test)]\nmod tests;\n#[cfg(any(test, feature = \"x\"))]\nmod fixtures;\nmod live;\n",
284            "lib.rs",
285            &mut decls,
286        );
287        assert_eq!(decls.len(), 1, "got: {:?}", decls);
288        assert_eq!(decls[0].module_name, "live");
289    }
290
291    #[test]
292    fn single_segment_use_keeps_module_alive() {
293        // Codex P2 (PR #151): `use foo;` must register `foo` as referenced
294        // so a sibling `mod foo;` is not flagged phantom.
295        let mut set = HashSet::new();
296        collect_referenced_names("use foo;\npub use bar as renamed;\n", &mut set);
297        assert!(
298            set.contains("foo"),
299            "single-segment `use foo;` missed: {:?}",
300            set
301        );
302        assert!(
303            set.contains("bar"),
304            "single-segment `pub use bar as renamed;` missed: {:?}",
305            set
306        );
307    }
308
309    #[test]
310    fn referenced_set_picks_up_path_segments() {
311        let mut set = HashSet::new();
312        collect_referenced_names("use crate::foo::bar;\nlet z = self::baz::x();\n", &mut set);
313        assert!(set.contains("foo"));
314        assert!(set.contains("bar"));
315        assert!(set.contains("baz"));
316    }
317
318    #[test]
319    fn referenced_set_picks_up_pub_use_with_braces() {
320        // Real false-positive shape from dogfooding: `pub use dead_code::{A, B, C};`
321        // The path `dead_code::A` is the first multi-segment chunk before the `{`,
322        // and the regex must catch `dead_code` so the `mod dead_code;` line above
323        // is not mis-flagged as phantom.
324        let mut set = HashSet::new();
325        collect_referenced_names(
326            "pub use dead_code::{DeadCodeEntryV2, find_dead_code, find_dead_code_v2};",
327            &mut set,
328        );
329        assert!(set.contains("dead_code"), "missing dead_code in {:?}", set);
330    }
331
332    #[test]
333    #[ignore]
334    fn dogfood_self_repo() {
335        // Run with: cargo test -p codelens-engine phantom_modules::tests::dogfood_self_repo -- --ignored --nocapture
336        // Derive workspace root from CARGO_MANIFEST_DIR so contributor's
337        // clone path works without hardcoding (codex P2 from PR #149).
338        let repo = std::env::var("CODELENS_REPO_ROOT").unwrap_or_else(|_| {
339            std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
340                .ancestors()
341                .nth(2)
342                .expect("workspace root not found above CARGO_MANIFEST_DIR")
343                .to_string_lossy()
344                .into_owned()
345        });
346        let project = crate::project::ProjectRoot::new(repo).expect("project root");
347        let results = super::find_phantom_modules(&project, 200).expect("find_phantom_modules");
348        eprintln!("\n=== {} phantom mod declarations ===\n", results.len());
349        for r in &results {
350            eprintln!(
351                "  {} (vis={}) at {}:{}",
352                r.module_name, r.visibility, r.parent_file, r.line
353            );
354        }
355    }
356
357    #[test]
358    fn is_excluded_path_skips_test_dirs() {
359        assert!(is_excluded_path("crates/foo/tests/x.rs"));
360        assert!(is_excluded_path("crates/foo/src/x_tests.rs"));
361        assert!(!is_excluded_path("crates/foo/src/lib.rs"));
362        assert!(is_excluded_path(
363            "crates/codelens-engine/src/phantom_modules.rs"
364        ));
365    }
366}