1use crate::project::{ProjectRoot, collect_files};
14use anyhow::Result;
15use regex::Regex;
16use serde::Serialize;
17use std::collections::HashSet;
18use std::path::Path;
19use std::sync::LazyLock;
20
21static MOD_DECL_RE: LazyLock<Regex> = LazyLock::new(|| {
23 Regex::new(r"(?m)^\s*(?P<vis>pub(?:\([^)]*\))?\s+)?mod\s+(?P<name>[A-Za-z_][A-Za-z0-9_]*)\s*;")
24 .unwrap()
25});
26
27#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
28pub struct PhantomModuleEntry {
29 pub parent_file: String,
30 pub module_name: String,
31 pub line: usize,
32 pub visibility: &'static str,
33 pub kind: &'static str,
34}
35
36pub fn find_phantom_modules(
51 project: &ProjectRoot,
52 max_results: usize,
53) -> Result<Vec<PhantomModuleEntry>> {
54 let mut declarations: Vec<PhantomModuleEntry> = Vec::new();
55 let mut referenced: HashSet<String> = HashSet::new();
56 let candidates = collect_files(project.as_path(), is_rust_file)?;
57
58 for path in &candidates {
59 let source = match std::fs::read_to_string(path) {
60 Ok(s) => s,
61 Err(_) => continue,
62 };
63 let relative = project.to_relative(path);
64 if is_excluded_path(&relative) {
65 continue;
66 }
67 scan_declarations(&source, &relative, &mut declarations);
68 collect_referenced_names(&source, &mut referenced);
69 }
70
71 let mut phantoms: Vec<PhantomModuleEntry> = declarations
78 .into_iter()
79 .filter(|d| !referenced.contains(&d.module_name))
80 .filter(|d| !is_test_module_name(&d.module_name))
81 .filter(|d| !is_impl_extension_or_reexport(project.as_path(), d))
82 .collect();
83
84 phantoms.sort_by(|a, b| {
85 a.parent_file
86 .cmp(&b.parent_file)
87 .then(a.line.cmp(&b.line))
88 .then(a.module_name.cmp(&b.module_name))
89 });
90 if max_results > 0 && phantoms.len() > max_results {
91 phantoms.truncate(max_results);
92 }
93 Ok(phantoms)
94}
95
96fn scan_declarations(source: &str, file: &str, out: &mut Vec<PhantomModuleEntry>) {
97 for caps in MOD_DECL_RE.captures_iter(source) {
98 let name = match caps.name("name") {
99 Some(m) => m.as_str().to_owned(),
100 None => continue,
101 };
102 let mod_start = caps.get(0).map(|m| m.start()).unwrap_or(0);
103 if line_before_is_cfg_test(source, mod_start) {
109 continue;
110 }
111 let visibility = if caps.name("vis").is_some() {
112 "public"
113 } else {
114 "private"
115 };
116 let line = source[..mod_start].matches('\n').count() + 1;
117 out.push(PhantomModuleEntry {
118 parent_file: file.to_owned(),
119 module_name: name,
120 line,
121 visibility,
122 kind: "rust_mod_declaration",
123 });
124 }
125}
126
127fn line_before_is_cfg_test(source: &str, offset: usize) -> bool {
136 let line_start = source[..offset]
137 .rfind('\n')
138 .map(|i| i + 1)
139 .unwrap_or(offset);
140 if line_start == 0 {
141 return false;
142 }
143 let mut prev_end = line_start - 1;
144 loop {
145 let prev_start = source[..prev_end].rfind('\n').map(|i| i + 1).unwrap_or(0);
146 let prev_line = source[prev_start..prev_end].trim();
147 if !prev_line.is_empty() {
148 return is_positive_cfg_test_attribute(prev_line);
149 }
150 if prev_start == 0 {
151 return false;
152 }
153 prev_end = prev_start - 1;
154 }
155}
156
157fn is_positive_cfg_test_attribute(line: &str) -> bool {
158 if !line.starts_with("#[cfg") {
159 return false;
160 }
161 if line.contains("not(test)") {
165 return false;
166 }
167 line.contains("test")
168}
169
170fn collect_referenced_names(source: &str, into: &mut HashSet<String>) {
180 static LEADING_RE: LazyLock<Regex> =
181 LazyLock::new(|| Regex::new(r"([A-Za-z_][A-Za-z0-9_]*)::").unwrap());
182 static TRAILING_RE: LazyLock<Regex> =
183 LazyLock::new(|| Regex::new(r"::([A-Za-z_][A-Za-z0-9_]*)").unwrap());
184 static SINGLE_USE_RE: LazyLock<Regex> = LazyLock::new(|| {
185 Regex::new(
186 r"(?m)^\s*(?:pub(?:\([^)]*\))?\s+)?use\s+([A-Za-z_][A-Za-z0-9_]*)(?:\s+as\s+[A-Za-z_][A-Za-z0-9_]*)?\s*;",
187 )
188 .unwrap()
189 });
190 for caps in LEADING_RE.captures_iter(source) {
191 if let Some(m) = caps.get(1) {
192 into.insert(m.as_str().to_owned());
193 }
194 }
195 for caps in TRAILING_RE.captures_iter(source) {
196 if let Some(m) = caps.get(1) {
197 into.insert(m.as_str().to_owned());
198 }
199 }
200 for caps in SINGLE_USE_RE.captures_iter(source) {
201 if let Some(m) = caps.get(1) {
202 into.insert(m.as_str().to_owned());
203 }
204 }
205}
206
207fn is_rust_file(path: &Path) -> bool {
208 path.extension().and_then(|s| s.to_str()) == Some("rs")
209}
210
211fn is_excluded_path(relative: &str) -> bool {
212 if relative == "crates/codelens-engine/src/phantom_modules.rs" {
213 return true;
214 }
215 let lower = relative.to_ascii_lowercase();
216 if lower.ends_with("_tests.rs") || lower.ends_with("_test.rs") {
217 return true;
218 }
219 lower.split('/').any(|seg| {
220 matches!(
221 seg,
222 "tests"
223 | "test"
224 | "bench"
225 | "benches"
226 | "examples"
227 | "fixtures"
228 | "integration_tests"
229 | "http_tests"
230 )
231 })
232}
233
234fn is_test_module_name(name: &str) -> bool {
235 name.ends_with("_tests") || name.ends_with("_test") || name == "tests" || name == "test"
236}
237
238fn is_impl_extension_or_reexport(project_root: &Path, decl: &PhantomModuleEntry) -> bool {
252 let child = match find_child_module_file(project_root, decl) {
253 Some(p) => p,
254 None => return false,
255 };
256 let source = match std::fs::read_to_string(&child) {
257 Ok(s) => s,
258 Err(_) => return false,
259 };
260 let has_pub_decl = source.lines().any(|l| {
265 l.starts_with("pub fn ")
266 || l.starts_with("pub(crate) fn ")
267 || l.starts_with("pub struct ")
268 || l.starts_with("pub(crate) struct ")
269 || l.starts_with("pub enum ")
270 || l.starts_with("pub(crate) enum ")
271 || l.starts_with("pub const ")
272 || l.starts_with("pub(crate) const ")
273 || l.starts_with("pub static ")
274 || l.starts_with("pub(crate) static ")
275 || l.starts_with("pub trait ")
276 || l.starts_with("pub(crate) trait ")
277 || l.starts_with("pub type ")
278 || l.starts_with("pub(crate) type ")
279 });
280 if has_pub_decl {
281 return false;
282 }
283 let has_reexport = source
286 .lines()
287 .any(|l| l.starts_with("pub use ") || l.starts_with("pub(crate) use "));
288 if has_reexport {
289 return true;
290 }
291 let local_types: Vec<&str> = source.lines().filter_map(extract_local_type_name).collect();
298
299 source.lines().any(|l| {
300 if !(l.starts_with("impl ") || l.starts_with("impl<")) {
301 return false;
302 }
303 match extract_impl_target_type(l) {
304 Some(target) => !local_types.contains(&target),
305 None => false,
306 }
307 })
308}
309
310fn extract_impl_target_type(line: &str) -> Option<&str> {
319 let after_impl = line.strip_prefix("impl").unwrap_or(line);
320 let after_generics = if let Some(rest) = after_impl.strip_prefix('<') {
322 let depth_end = rest.find('>')?;
323 &rest[depth_end + 1..]
324 } else {
325 after_impl
326 };
327 let after_generics = after_generics.trim_start();
328 let target_segment = if let Some(idx) = after_generics.find(" for ") {
330 &after_generics[idx + 5..]
331 } else {
332 after_generics
333 };
334 extract_leading_type_ident(target_segment)
335}
336
337fn extract_leading_type_ident(segment: &str) -> Option<&str> {
340 let trimmed = segment.trim();
341 let last = trimmed.rsplit("::").next().unwrap_or(trimmed);
343 let end = last
344 .find(|c: char| !c.is_alphanumeric() && c != '_')
345 .unwrap_or(last.len());
346 let name = &last[..end];
347 if name.is_empty() { None } else { Some(name) }
348}
349
350fn extract_local_type_name(line: &str) -> Option<&str> {
355 let stripped = line
356 .strip_prefix("pub(crate) ")
357 .or_else(|| line.strip_prefix("pub "))
358 .unwrap_or(line);
359 for kw in ["struct ", "enum ", "trait "] {
360 if let Some(rest) = stripped.strip_prefix(kw) {
361 return extract_leading_type_ident(rest);
362 }
363 }
364 None
365}
366
367fn find_child_module_file(
368 project_root: &Path,
369 decl: &PhantomModuleEntry,
370) -> Option<std::path::PathBuf> {
371 let parent_path = project_root.join(&decl.parent_file);
372 let parent_dir = parent_path.parent()?;
373 let parent_stem = parent_path.file_stem()?.to_str()?;
374 let candidates = [
375 parent_dir.join(format!("{}.rs", decl.module_name)),
376 parent_dir.join(&decl.module_name).join("mod.rs"),
377 parent_dir
378 .join(parent_stem)
379 .join(format!("{}.rs", decl.module_name)),
380 parent_dir
381 .join(parent_stem)
382 .join(&decl.module_name)
383 .join("mod.rs"),
384 ];
385 candidates.into_iter().find(|p| p.exists())
386}
387
388#[cfg(test)]
389mod tests {
390 use super::*;
391
392 #[test]
393 fn detects_unreferenced_private_mod() {
394 let mut decls = Vec::new();
395 scan_declarations("mod ghost;\nmod live;\n", "lib.rs", &mut decls);
396 assert_eq!(decls.len(), 2);
397 assert_eq!(decls[0].module_name, "ghost");
398 assert_eq!(decls[0].visibility, "private");
399 assert_eq!(decls[1].module_name, "live");
400 }
401
402 #[test]
403 fn detects_pub_mod_as_public() {
404 let mut decls = Vec::new();
405 scan_declarations("pub mod api;\n", "lib.rs", &mut decls);
406 assert_eq!(decls.len(), 1);
407 assert_eq!(decls[0].visibility, "public");
408 }
409
410 #[test]
411 fn skips_inline_mod_blocks() {
412 let mut decls = Vec::new();
413 scan_declarations("mod inline { fn x() {} }\n", "lib.rs", &mut decls);
414 assert!(decls.is_empty(), "got: {:?}", decls);
416 }
417
418 #[test]
419 fn cfg_not_test_is_not_treated_as_cfg_test() {
420 let mut decls = Vec::new();
423 scan_declarations(
424 "#[cfg(not(test))]\nmod live;\n#[cfg(any(not(test), feature = \"x\"))]\nmod live2;\n",
425 "lib.rs",
426 &mut decls,
427 );
428 assert_eq!(decls.len(), 2, "got: {:?}", decls);
429 assert_eq!(decls[0].module_name, "live");
430 assert_eq!(decls[1].module_name, "live2");
431 }
432
433 #[test]
434 fn skips_cfg_test_gated_mod() {
435 let mut decls = Vec::new();
439 scan_declarations(
440 "#[cfg(test)]\nmod tests;\n#[cfg(any(test, feature = \"x\"))]\nmod fixtures;\nmod live;\n",
441 "lib.rs",
442 &mut decls,
443 );
444 assert_eq!(decls.len(), 1, "got: {:?}", decls);
445 assert_eq!(decls[0].module_name, "live");
446 }
447
448 #[test]
449 fn single_segment_use_keeps_module_alive() {
450 let mut set = HashSet::new();
453 collect_referenced_names("use foo;\npub use bar as renamed;\n", &mut set);
454 assert!(
455 set.contains("foo"),
456 "single-segment `use foo;` missed: {:?}",
457 set
458 );
459 assert!(
460 set.contains("bar"),
461 "single-segment `pub use bar as renamed;` missed: {:?}",
462 set
463 );
464 }
465
466 #[test]
467 fn referenced_set_picks_up_path_segments() {
468 let mut set = HashSet::new();
469 collect_referenced_names("use crate::foo::bar;\nlet z = self::baz::x();\n", &mut set);
470 assert!(set.contains("foo"));
471 assert!(set.contains("bar"));
472 assert!(set.contains("baz"));
473 }
474
475 #[test]
476 fn referenced_set_picks_up_pub_use_with_braces() {
477 let mut set = HashSet::new();
482 collect_referenced_names(
483 "pub use dead_code::{DeadCodeEntryV2, find_dead_code, find_dead_code_v2};",
484 &mut set,
485 );
486 assert!(set.contains("dead_code"), "missing dead_code in {:?}", set);
487 }
488
489 #[test]
490 #[ignore]
491 fn dogfood_self_repo() {
492 let repo = std::env::var("CODELENS_REPO_ROOT").unwrap_or_else(|_| {
496 std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
497 .ancestors()
498 .nth(2)
499 .expect("workspace root not found above CARGO_MANIFEST_DIR")
500 .to_string_lossy()
501 .into_owned()
502 });
503 let project = crate::project::ProjectRoot::new(repo).expect("project root");
504 let results = super::find_phantom_modules(&project, 200).expect("find_phantom_modules");
505 eprintln!("\n=== {} phantom mod declarations ===\n", results.len());
506 for r in &results {
507 eprintln!(
508 " {} (vis={}) at {}:{}",
509 r.module_name, r.visibility, r.parent_file, r.line
510 );
511 }
512 }
513
514 #[test]
515 fn is_excluded_path_skips_test_dirs() {
516 assert!(is_excluded_path("crates/foo/tests/x.rs"));
517 assert!(is_excluded_path("crates/foo/src/x_tests.rs"));
518 assert!(!is_excluded_path("crates/foo/src/lib.rs"));
519 assert!(is_excluded_path(
520 "crates/codelens-engine/src/phantom_modules.rs"
521 ));
522 }
523
524 #[test]
525 fn impl_target_extraction_handles_common_shapes() {
526 assert_eq!(extract_impl_target_type("impl Foo {"), Some("Foo"));
527 assert_eq!(extract_impl_target_type("impl<T> Foo<T> {"), Some("Foo"));
528 assert_eq!(
529 extract_impl_target_type("impl Display for Bar {"),
530 Some("Bar")
531 );
532 assert_eq!(
533 extract_impl_target_type("impl<G: Clone> Iterator for Baz<G> {"),
534 Some("Baz")
535 );
536 assert_eq!(
537 extract_impl_target_type("impl crate::a::Foo {"),
538 Some("Foo")
539 );
540 }
541
542 #[test]
543 fn local_type_recognition_picks_up_pub_and_private() {
544 assert_eq!(extract_local_type_name("struct Local;"), Some("Local"));
545 assert_eq!(
546 extract_local_type_name("pub struct PubLocal {"),
547 Some("PubLocal")
548 );
549 assert_eq!(
550 extract_local_type_name("pub(crate) enum Mode {"),
551 Some("Mode")
552 );
553 assert_eq!(extract_local_type_name("trait Foo {"), Some("Foo"));
554 assert_eq!(extract_local_type_name("fn helper() {"), None);
555 }
556
557 #[test]
558 fn unreferenced_module_with_only_local_impl_is_reported() {
559 let (_temp_dir, dir) = crate::test_helpers::make_unique_temp_dir("phantom-impl-local-");
563 let src_dir = dir.join("crates").join("c").join("src");
564 std::fs::create_dir_all(&src_dir).unwrap();
565 std::fs::write(src_dir.join("lib.rs"), "mod stale;\n").unwrap();
566 std::fs::write(
567 src_dir.join("stale.rs"),
568 "struct Local;\n\nimpl Local {\n fn helper(&self) {}\n}\n",
569 )
570 .unwrap();
571
572 let project = crate::project::ProjectRoot::new(dir.to_str().unwrap()).expect("root");
573 let entries = find_phantom_modules(&project, 100).expect("scan ok");
574 let stale_reported = entries
575 .iter()
576 .any(|e| e.module_name == "stale" && e.parent_file.contains("lib.rs"));
577 assert!(
578 stale_reported,
579 "module containing only impl on a local type must be reported as phantom; got {:?}",
580 entries
581 );
582
583 std::fs::remove_dir_all(&dir).ok();
584 }
585
586 #[test]
587 fn module_with_impl_on_external_type_is_not_phantom() {
588 let (_temp_dir, dir) = crate::test_helpers::make_unique_temp_dir("phantom-impl-extern-");
592 let src_dir = dir.join("crates").join("c").join("src");
593 std::fs::create_dir_all(&src_dir).unwrap();
594 std::fs::write(src_dir.join("lib.rs"), "mod analysis;\n").unwrap();
595 std::fs::write(
596 src_dir.join("analysis.rs"),
597 "use crate::AppState;\n\nimpl AppState {\n pub(crate) fn enqueue(&self) {}\n}\n",
598 )
599 .unwrap();
600
601 let project = crate::project::ProjectRoot::new(dir.to_str().unwrap()).expect("root");
602 let entries = find_phantom_modules(&project, 100).expect("scan ok");
603 let analysis_reported = entries
604 .iter()
605 .any(|e| e.module_name == "analysis" && e.parent_file.contains("lib.rs"));
606 assert!(
607 !analysis_reported,
608 "split-impl module on an external type must remain exempt; got {:?}",
609 entries
610 );
611
612 std::fs::remove_dir_all(&dir).ok();
613 }
614}