use crate::project::{collect_files, ProjectRoot};
use anyhow::Result;
use regex::Regex;
use serde::Serialize;
use std::collections::HashSet;
use std::path::Path;
use std::sync::LazyLock;
static MOD_DECL_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?m)^\s*(?P<vis>pub(?:\([^)]*\))?\s+)?mod\s+(?P<name>[A-Za-z_][A-Za-z0-9_]*)\s*;")
.unwrap()
});
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct PhantomModuleEntry {
pub parent_file: String,
pub module_name: String,
pub line: usize,
pub visibility: &'static str,
pub kind: &'static str,
}
pub fn find_phantom_modules(
project: &ProjectRoot,
max_results: usize,
) -> Result<Vec<PhantomModuleEntry>> {
let mut declarations: Vec<PhantomModuleEntry> = Vec::new();
let mut referenced: HashSet<String> = HashSet::new();
let candidates = collect_files(project.as_path(), is_rust_file)?;
for path in &candidates {
let source = match std::fs::read_to_string(path) {
Ok(s) => s,
Err(_) => continue,
};
let relative = project.to_relative(path);
if is_excluded_path(&relative) {
continue;
}
scan_declarations(&source, &relative, &mut declarations);
collect_referenced_names(&source, &mut referenced);
}
let mut phantoms: Vec<PhantomModuleEntry> = declarations
.into_iter()
.filter(|d| !referenced.contains(&d.module_name))
.filter(|d| !is_test_module_name(&d.module_name))
.collect();
phantoms.sort_by(|a, b| {
a.parent_file
.cmp(&b.parent_file)
.then(a.line.cmp(&b.line))
.then(a.module_name.cmp(&b.module_name))
});
if max_results > 0 && phantoms.len() > max_results {
phantoms.truncate(max_results);
}
Ok(phantoms)
}
fn scan_declarations(source: &str, file: &str, out: &mut Vec<PhantomModuleEntry>) {
for caps in MOD_DECL_RE.captures_iter(source) {
let name = match caps.name("name") {
Some(m) => m.as_str().to_owned(),
None => continue,
};
let visibility = if caps.name("vis").is_some() {
"public"
} else {
"private"
};
let line = caps
.get(0)
.map(|m| source[..m.start()].matches('\n').count() + 1)
.unwrap_or(0);
out.push(PhantomModuleEntry {
parent_file: file.to_owned(),
module_name: name,
line,
visibility,
kind: "rust_mod_declaration",
});
}
}
fn collect_referenced_names(source: &str, into: &mut HashSet<String>) {
static LEADING_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"([A-Za-z_][A-Za-z0-9_]*)::").unwrap());
static TRAILING_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"::([A-Za-z_][A-Za-z0-9_]*)").unwrap());
for caps in LEADING_RE.captures_iter(source) {
if let Some(m) = caps.get(1) {
into.insert(m.as_str().to_owned());
}
}
for caps in TRAILING_RE.captures_iter(source) {
if let Some(m) = caps.get(1) {
into.insert(m.as_str().to_owned());
}
}
}
fn is_rust_file(path: &Path) -> bool {
path.extension().and_then(|s| s.to_str()) == Some("rs")
}
fn is_excluded_path(relative: &str) -> bool {
if relative == "crates/codelens-engine/src/phantom_modules.rs" {
return true;
}
let lower = relative.to_ascii_lowercase();
if lower.ends_with("_tests.rs") || lower.ends_with("_test.rs") {
return true;
}
lower.split('/').any(|seg| {
matches!(
seg,
"tests"
| "test"
| "bench"
| "benches"
| "examples"
| "fixtures"
| "integration_tests"
| "http_tests"
)
})
}
fn is_test_module_name(name: &str) -> bool {
name.ends_with("_tests") || name.ends_with("_test") || name == "tests" || name == "test"
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detects_unreferenced_private_mod() {
let mut decls = Vec::new();
scan_declarations("mod ghost;\nmod live;\n", "lib.rs", &mut decls);
assert_eq!(decls.len(), 2);
assert_eq!(decls[0].module_name, "ghost");
assert_eq!(decls[0].visibility, "private");
assert_eq!(decls[1].module_name, "live");
}
#[test]
fn detects_pub_mod_as_public() {
let mut decls = Vec::new();
scan_declarations("pub mod api;\n", "lib.rs", &mut decls);
assert_eq!(decls.len(), 1);
assert_eq!(decls[0].visibility, "public");
}
#[test]
fn skips_inline_mod_blocks() {
let mut decls = Vec::new();
scan_declarations("mod inline { fn x() {} }\n", "lib.rs", &mut decls);
assert!(decls.is_empty(), "got: {:?}", decls);
}
#[test]
fn referenced_set_picks_up_path_segments() {
let mut set = HashSet::new();
collect_referenced_names("use crate::foo::bar;\nlet z = self::baz::x();\n", &mut set);
assert!(set.contains("foo"));
assert!(set.contains("bar"));
assert!(set.contains("baz"));
}
#[test]
fn referenced_set_picks_up_pub_use_with_braces() {
let mut set = HashSet::new();
collect_referenced_names(
"pub use dead_code::{DeadCodeEntryV2, find_dead_code, find_dead_code_v2};",
&mut set,
);
assert!(set.contains("dead_code"), "missing dead_code in {:?}", set);
}
#[test]
#[ignore]
fn dogfood_self_repo() {
let repo = std::env::var("CODELENS_REPO_ROOT")
.unwrap_or_else(|_| "/Users/bagjaeseog/codelens-mcp-plugin".to_owned());
let project = crate::project::ProjectRoot::new(repo).expect("project root");
let results = super::find_phantom_modules(&project, 200).expect("find_phantom_modules");
eprintln!("\n=== {} phantom mod declarations ===\n", results.len());
for r in &results {
eprintln!(
" {} (vis={}) at {}:{}",
r.module_name, r.visibility, r.parent_file, r.line
);
}
}
#[test]
fn is_excluded_path_skips_test_dirs() {
assert!(is_excluded_path("crates/foo/tests/x.rs"));
assert!(is_excluded_path("crates/foo/src/x_tests.rs"));
assert!(!is_excluded_path("crates/foo/src/lib.rs"));
assert!(is_excluded_path(
"crates/codelens-engine/src/phantom_modules.rs"
));
}
}