use std::collections::HashSet;
use std::path::Path;
use ripvec_core::{
entry_points::detector_for,
repo_map::{
CallRef, DeadCodeConfidence, Definition, FileNode, build_graph_from_files_pub,
compute_confidence, compute_dead_code,
},
};
fn def(name: &str, start_line: u32, end_line: u32) -> Definition {
Definition {
name: name.to_string(),
kind: "function_item".to_string(),
start_line,
end_line,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 0,
calls: vec![],
decorator: None,
lsp_kind_hint: None,
}
}
fn def_calling(name: &str, start_line: u32, end_line: u32, callee: (u32, u16)) -> Definition {
Definition {
name: name.to_string(),
kind: "function_item".to_string(),
start_line,
end_line,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 0,
calls: vec![CallRef {
name: "callee".to_string(),
qualified_path: None,
receiver_type: None,
byte_offset: 0,
resolved: Some(callee),
}],
decorator: None,
lsp_kind_hint: None,
}
}
fn file_node(path: &str, defs: Vec<Definition>) -> FileNode {
FileNode {
path: path.to_string(),
defs,
imports: vec![],
}
}
#[test]
fn compute_dead_code_excludes_reachable_defs() {
let files = vec![
FileNode {
path: "src/entries.rs".to_string(),
defs: vec![
def_calling("entry_a", 1, 10, (1, 0)),
def_calling("entry_b", 12, 20, (2, 0)),
def_calling("entry_c", 22, 30, (2, 2)),
],
imports: vec![],
},
FileNode {
path: "src/reachable1.rs".to_string(),
defs: vec![
def_calling("reachable_a", 1, 5, (1, 1)),
def("reachable_b", 7, 12),
],
imports: vec![],
},
FileNode {
path: "src/reachable2.rs".to_string(),
defs: vec![
def_calling("reachable_c", 1, 5, (2, 1)),
def("reachable_d", 7, 12),
def("reachable_e", 14, 18),
],
imports: vec![],
},
file_node("src/dead_a.rs", vec![def("dead_x", 1, 5)]),
file_node("src/dead_b.rs", vec![def("dead_y", 1, 5)]),
];
let graph = build_graph_from_files_pub(files);
let entries: HashSet<usize> = vec![0, 1, 2].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
assert_eq!(
report.total_dead_defs, 2,
"expected 2 dead defs (dead_x, dead_y), got {}",
report.total_dead_defs
);
assert_eq!(
report.dead_clusters.len(),
2,
"expected 2 dead clusters, got {}",
report.dead_clusters.len()
);
for cluster in &report.dead_clusters {
assert_eq!(
cluster.size, 1,
"each isolated dead def is a cluster of size 1"
);
}
assert_eq!(
report.total_live_defs, 8,
"expected 8 live defs, got {}",
report.total_live_defs
);
}
#[test]
fn compute_dead_code_groups_connected_components() {
let files = vec![
file_node("src/main.rs", vec![def("main", 1, 10)]),
FileNode {
path: "src/dead_chain.rs".to_string(),
defs: vec![
def_calling("dead_a", 1, 5, (1, 1)),
def_calling("dead_b", 7, 12, (1, 2)),
def("dead_c", 14, 18),
],
imports: vec![],
},
];
let graph = build_graph_from_files_pub(files);
let entries: HashSet<usize> = vec![0].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
assert_eq!(
report.total_dead_defs, 3,
"dead_a, dead_b, dead_c should be dead"
);
assert_eq!(
report.dead_clusters.len(),
1,
"connected chain should form 1 cluster, got {}",
report.dead_clusters.len()
);
assert_eq!(
report.dead_clusters[0].size, 3,
"cluster must have all 3 dead members"
);
}
#[test]
fn compute_dead_code_sorts_clusters_by_size_descending() {
let files = vec![
file_node("src/main.rs", vec![def("main", 1, 5)]),
FileNode {
path: "src/chain5.rs".to_string(),
defs: vec![
def_calling("a", 1, 2, (1, 1)),
def_calling("b", 3, 4, (1, 2)),
def_calling("c", 5, 6, (1, 3)),
def_calling("d", 7, 8, (1, 4)),
def("e", 9, 10),
],
imports: vec![],
},
FileNode {
path: "src/chain3.rs".to_string(),
defs: vec![
def_calling("p", 1, 2, (2, 1)),
def_calling("q", 3, 4, (2, 2)),
def("r", 5, 6),
],
imports: vec![],
},
file_node("src/single.rs", vec![def("z", 1, 2)]),
];
let graph = build_graph_from_files_pub(files);
let entries: HashSet<usize> = vec![0].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
assert_eq!(report.dead_clusters.len(), 3, "expected 3 clusters");
let sizes: Vec<usize> = report.dead_clusters.iter().map(|c| c.size).collect();
assert_eq!(
sizes[0], 5,
"largest cluster first (size 5), got sizes: {sizes:?}"
);
assert_eq!(sizes[1], 3, "second cluster (size 3), got sizes: {sizes:?}");
assert_eq!(
sizes[2], 1,
"smallest cluster (size 1), got sizes: {sizes:?}"
);
}
#[test]
fn compute_dead_code_excludes_test_paths_when_requested() {
let files = vec![
file_node("tests/lib_test.rs", vec![def("test_foo", 1, 10)]),
file_node("src/dead.rs", vec![def("dead_prod", 1, 5)]),
file_node("src/main.rs", vec![def("main_prod", 1, 5)]),
];
let graph = build_graph_from_files_pub(files);
let entries_with_test: HashSet<usize> = vec![0, 2].into_iter().collect();
let report_incl = compute_dead_code(&graph, &entries_with_test, true);
assert_eq!(
report_incl.total_dead_defs, 1,
"with test paths: only dead_prod should be dead, got {}",
report_incl.total_dead_defs
);
let report_excl = compute_dead_code(&graph, &entries_with_test, false);
assert_eq!(
report_excl.total_dead_defs, 2,
"without test paths: test_foo + dead_prod should both be dead, got {}",
report_excl.total_dead_defs
);
}
#[test]
#[ignore = "expensive -- loads full ripvec corpus; run with --include-ignored"]
fn compute_dead_code_real_corpus_smoke() {
use ripvec_core::repo_map::build_graph;
let workspace_root = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.parent()
.unwrap();
let root = workspace_root.join("crates");
let graph = build_graph(&root).expect("build_graph on ripvec crates/");
let mut entries: HashSet<usize> = HashSet::new();
for (file_idx, file) in graph.files.iter().enumerate() {
let ext = Path::new(&file.path)
.extension()
.and_then(|e| e.to_str())
.unwrap_or_default();
let Some(detector) = detector_for(ext) else {
continue;
};
let abs_path = root.join(&file.path);
let Ok(source) = std::fs::read_to_string(&abs_path) else {
continue;
};
let detections = detector.detect(&source, &abs_path);
if detections.is_empty() {
continue;
}
let base = graph.def_offsets[file_idx];
for ep in &detections {
for (def_idx, def) in file.defs.iter().enumerate() {
if def.start_line == ep.line || def.name == ep.name {
entries.insert(base + def_idx);
break;
}
}
}
}
let report = compute_dead_code(&graph, &entries, true);
assert!(
report.dead_fraction >= 0.0 && report.dead_fraction <= 0.60,
"dead_fraction {:.3} outside expected [0.0, 0.60] for ripvec crates/; \
see test docstring for engine-limitation context",
report.dead_fraction
);
assert!(
!report.dead_clusters.is_empty(),
"expected at least one dead cluster on real corpus"
);
eprintln!(
"ripvec corpus: {} defs total, {} dead ({:.1}%), {} clusters",
report.total_dead_defs + report.total_live_defs,
report.total_dead_defs,
report.dead_fraction * 100.0,
report.dead_clusters.len()
);
}
#[test]
fn compute_dead_code_cluster_root_is_highest_rank() {
let files = vec![
file_node("src/main.rs", vec![def("main", 1, 5)]),
FileNode {
path: "src/dead_cluster.rs".to_string(),
defs: vec![
def_calling("d0", 1, 2, (1, 1)),
def_calling("d1", 3, 4, (1, 2)),
def("d2", 5, 6),
],
imports: vec![],
},
];
let graph = build_graph_from_files_pub(files);
let entries: HashSet<usize> = vec![0].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
assert_eq!(
report.dead_clusters.len(),
1,
"should have exactly 1 dead cluster"
);
let cluster = &report.dead_clusters[0];
assert_eq!(cluster.size, 3, "cluster must have 3 members");
let root_rank = graph.def_ranks[cluster.root_def_idx];
for &member in &cluster.member_def_indices {
let member_rank = graph.def_ranks[member];
assert!(
root_rank >= member_rank,
"root (flat {}, rank {:.6}) must have rank >= member (flat {}, rank {:.6})",
cluster.root_def_idx,
root_rank,
member,
member_rank
);
}
}
#[test]
fn dead_code_report_confidence_high_on_strong_entry_coverage() {
let total_defs = 50_usize;
let library_exports = 5_usize;
let framework_dispatched = 0_usize;
let main_entries = 1_usize;
let test_entries = 0_usize;
let ffi_entries = 0_usize;
let confidence = compute_confidence(
total_defs,
library_exports,
framework_dispatched,
main_entries,
test_entries,
ffi_entries,
);
assert_eq!(
confidence,
DeadCodeConfidence::High,
"6/50 = 12% coverage, 5 LibraryExports, 1 Main should yield High; got {confidence:?}"
);
}
#[test]
fn dead_code_report_confidence_low_when_few_entries_detected() {
let total_defs = 1000_usize;
let library_exports = 0_usize;
let framework_dispatched = 0_usize;
let main_entries = 2_usize;
let test_entries = 800_usize;
let ffi_entries = 0_usize;
let confidence = compute_confidence(
total_defs,
library_exports,
framework_dispatched,
main_entries,
test_entries,
ffi_entries,
);
assert_eq!(
confidence,
DeadCodeConfidence::Low,
"test_dominant (800/802 = 99.8%) should yield Low; got {confidence:?}"
);
}
#[test]
fn dead_code_report_confidence_medium_in_between() {
let total_defs = 200_usize;
let library_exports = 1_usize;
let framework_dispatched = 0_usize;
let main_entries = 3_usize;
let test_entries = 5_usize; let ffi_entries = 0_usize;
let confidence = compute_confidence(
total_defs,
library_exports,
framework_dispatched,
main_entries,
test_entries,
ffi_entries,
);
assert_eq!(
confidence,
DeadCodeConfidence::Medium,
"9/200 = 4.5% coverage, 1 LibraryExport, 3 Main, 5 tests (not dominant) should yield Medium; got {confidence:?}"
);
}
#[test]
fn dead_code_report_confidence_serializes_as_snake_case() {
let high_json = serde_json::to_string(&DeadCodeConfidence::High).expect("serialize High");
let medium_json = serde_json::to_string(&DeadCodeConfidence::Medium).expect("serialize Medium");
let low_json = serde_json::to_string(&DeadCodeConfidence::Low).expect("serialize Low");
assert_eq!(high_json, "\"high\"", "High must serialize as \"high\"");
assert_eq!(
medium_json, "\"medium\"",
"Medium must serialize as \"medium\""
);
assert_eq!(low_json, "\"low\"", "Low must serialize as \"low\"");
let high_rt: DeadCodeConfidence = serde_json::from_str(&high_json).expect("deserialize high");
let medium_rt: DeadCodeConfidence =
serde_json::from_str(&medium_json).expect("deserialize medium");
let low_rt: DeadCodeConfidence = serde_json::from_str(&low_json).expect("deserialize low");
assert_eq!(high_rt, DeadCodeConfidence::High);
assert_eq!(medium_rt, DeadCodeConfidence::Medium);
assert_eq!(low_rt, DeadCodeConfidence::Low);
}
#[test]
fn compute_dead_code_walks_all_callees_past_max_neighbors_cap() {
let mut entry_calls = Vec::new();
for callee_di in 1u16..=7 {
entry_calls.push(CallRef {
name: format!("helper{callee_di}"),
qualified_path: None,
receiver_type: None,
byte_offset: 0,
resolved: Some((0u32, callee_di)),
});
}
let entry = Definition {
name: "entry".to_string(),
kind: "function_item".to_string(),
start_line: 1,
end_line: 5,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 0,
calls: entry_calls,
decorator: None,
lsp_kind_hint: None,
};
let mut defs = vec![entry];
for i in 1u16..=6 {
defs.push(def(
&format!("helper{i}"),
10 + (i as u32) * 5,
12 + (i as u32) * 5,
));
}
defs.push(def_calling("helper7", 100, 105, (0u32, 8)));
defs.push(def("deep1", 200, 205));
let files = vec![file_node("src/lib.rs", defs)];
let graph = build_graph_from_files_pub(files);
assert!(
graph.def_callees[0].len() <= 25,
"precondition: def_callees neighbor list is bounded by MAX_NEIGHBORS=25"
);
assert!(
graph.def_edges.len() >= 8,
"precondition: def_edges holds all 7 entry→helper edges + helper7→deep1"
);
let entries: HashSet<usize> = vec![0].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
assert_eq!(
report.total_dead_defs,
0,
"all 7 helpers + deep1 transitively reachable from entry: \
dead_clusters={:?}",
report
.dead_clusters
.iter()
.map(|c| (c.size, c.root_def_idx))
.collect::<Vec<_>>()
);
assert!(
report.dead_clusters.is_empty(),
"no clusters expected when every def is reachable"
);
}
#[test]
fn dead_code_completes_on_high_fanout_synthetic_graph() {
const N_CALLEES: u16 = 10_000;
let mut defs = Vec::with_capacity(usize::from(N_CALLEES) + 1);
let hub_calls: Vec<CallRef> = (1..=N_CALLEES)
.map(|di| CallRef {
name: format!("callee_{di}"),
qualified_path: None,
receiver_type: None,
byte_offset: 0,
resolved: Some((0u32, di)),
})
.collect();
defs.push(Definition {
name: "hub".to_string(),
kind: "function_item".to_string(),
start_line: 1,
end_line: 5,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 0,
calls: hub_calls,
decorator: None,
lsp_kind_hint: None,
});
for di in 1..=N_CALLEES {
defs.push(def(
&format!("callee_{di}"),
10 + u32::from(di) * 2,
11 + u32::from(di) * 2,
));
}
let files = vec![file_node("src/hub.rs", defs)];
let t0 = std::time::Instant::now();
let graph = build_graph_from_files_pub(files);
let entries: HashSet<usize> = vec![0].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
let elapsed = t0.elapsed();
assert!(
elapsed.as_secs() < 30,
"compute_dead_code on 10k-fanout hub took {elapsed:?}, expected < 30 s"
);
assert_eq!(
report.total_dead_defs, 0,
"all callees should be reachable from the hub; got {} dead",
report.total_dead_defs
);
assert_eq!(
report.total_live_defs,
usize::from(N_CALLEES) + 1,
"expected hub + N_CALLEES live; got {}",
report.total_live_defs
);
}
#[test]
fn dead_code_uses_full_edges_not_truncated_neighbors() {
let entry_calls: Vec<CallRef> = (1u16..=10)
.map(|di| CallRef {
name: format!("helper{di}"),
qualified_path: None,
receiver_type: None,
byte_offset: 0,
resolved: Some((0u32, di)),
})
.collect();
let entry = Definition {
name: "entry".to_string(),
kind: "function_item".to_string(),
start_line: 1,
end_line: 5,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 0,
calls: entry_calls,
decorator: None,
lsp_kind_hint: None,
};
let mut defs = vec![entry];
for i in 1u16..=9 {
defs.push(def(
&format!("helper{i}"),
10 + u32::from(i) * 5,
12 + u32::from(i) * 5,
));
}
defs.push(def_calling("helper10", 200, 205, (0u32, 11)));
defs.push(def("deep1", 300, 305));
let files = vec![file_node("src/hub.rs", defs)];
let graph = build_graph_from_files_pub(files);
assert!(
graph.def_callees[0].len() <= 25,
"precondition: def_callees neighbor list is bounded by MAX_NEIGHBORS=25"
);
assert!(
graph.def_edges.len() >= 11,
"precondition: def_edges holds all 11 untruncated call edges (got {})",
graph.def_edges.len()
);
let entries: HashSet<usize> = vec![0].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
assert_eq!(
report.total_dead_defs,
0,
"all 10 helpers + deep1 reachable from entry via untruncated edges; \
dead_clusters={:?}",
report
.dead_clusters
.iter()
.map(|c| (c.size, c.root_def_idx))
.collect::<Vec<_>>()
);
}
#[test]
fn dead_code_memory_bounded_at_scale() {
const N_HUBS: u16 = 100;
const FANOUT: u16 = 100;
let mut hub_defs: Vec<Definition> = Vec::with_capacity(usize::from(N_HUBS));
let mut callee_defs: Vec<Definition> =
Vec::with_capacity(usize::from(N_HUBS) * usize::from(FANOUT));
for h in 0..N_HUBS {
let calls: Vec<CallRef> = (0..FANOUT)
.map(|c| {
let callee_di = h * FANOUT + c;
CallRef {
name: format!("callee_{h}_{c}"),
qualified_path: None,
receiver_type: None,
byte_offset: 0,
resolved: Some((1u32, callee_di)),
}
})
.collect();
hub_defs.push(Definition {
name: format!("hub_{h}"),
kind: "function_item".to_string(),
start_line: 1 + u32::from(h) * 10,
end_line: 5 + u32::from(h) * 10,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 0,
calls,
decorator: None,
lsp_kind_hint: None,
});
}
for h in 0..N_HUBS {
for c in 0..FANOUT {
callee_defs.push(def(
&format!("callee_{h}_{c}"),
1 + u32::from(c) * 3,
2 + u32::from(c) * 3,
));
}
}
let files = vec![
file_node("src/hubs.rs", hub_defs),
file_node("src/callees.rs", callee_defs),
];
let graph = build_graph_from_files_pub(files);
let t0 = std::time::Instant::now();
let entries: HashSet<usize> = vec![0].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
let elapsed = t0.elapsed();
assert!(
elapsed.as_secs() < 30,
"compute_dead_code on 100×100 hub-fanout took {elapsed:?}, expected < 30 s"
);
assert_eq!(
report.total_live_defs, 101,
"hub_0 + 100 callees = 101 live, got {}",
report.total_live_defs
);
assert_eq!(
report.total_dead_defs, 9_999,
"99 unreached hubs + 9_900 unreached callees = 9_999 dead, got {}",
report.total_dead_defs
);
}