use std::collections::HashSet;
use std::path::Path;
use ripvec_core::{
entry_points::detector_for,
repo_map::{CallRef, Definition, FileNode, build_graph_from_files_pub, compute_dead_code},
};
fn def(name: &str, start_line: u32, end_line: u32) -> Definition {
Definition {
name: name.to_string(),
kind: "function_item".to_string(),
start_line,
end_line,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 0,
calls: vec![],
}
}
fn def_calling(name: &str, start_line: u32, end_line: u32, callee: (u32, u16)) -> Definition {
Definition {
name: name.to_string(),
kind: "function_item".to_string(),
start_line,
end_line,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 0,
calls: vec![CallRef {
name: "callee".to_string(),
qualified_path: None,
receiver_type: None,
byte_offset: 0,
resolved: Some(callee),
}],
}
}
fn file_node(path: &str, defs: Vec<Definition>) -> FileNode {
FileNode {
path: path.to_string(),
defs,
imports: vec![],
}
}
#[test]
fn compute_dead_code_excludes_reachable_defs() {
let files = vec![
FileNode {
path: "src/entries.rs".to_string(),
defs: vec![
def_calling("entry_a", 1, 10, (1, 0)),
def_calling("entry_b", 12, 20, (2, 0)),
def_calling("entry_c", 22, 30, (2, 2)),
],
imports: vec![],
},
FileNode {
path: "src/reachable1.rs".to_string(),
defs: vec![
def_calling("reachable_a", 1, 5, (1, 1)),
def("reachable_b", 7, 12),
],
imports: vec![],
},
FileNode {
path: "src/reachable2.rs".to_string(),
defs: vec![
def_calling("reachable_c", 1, 5, (2, 1)),
def("reachable_d", 7, 12),
def("reachable_e", 14, 18),
],
imports: vec![],
},
file_node("src/dead_a.rs", vec![def("dead_x", 1, 5)]),
file_node("src/dead_b.rs", vec![def("dead_y", 1, 5)]),
];
let graph = build_graph_from_files_pub(files);
let entries: HashSet<usize> = vec![0, 1, 2].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
assert_eq!(
report.total_dead_defs, 2,
"expected 2 dead defs (dead_x, dead_y), got {}",
report.total_dead_defs
);
assert_eq!(
report.dead_clusters.len(),
2,
"expected 2 dead clusters, got {}",
report.dead_clusters.len()
);
for cluster in &report.dead_clusters {
assert_eq!(
cluster.size, 1,
"each isolated dead def is a cluster of size 1"
);
}
assert_eq!(
report.total_live_defs, 8,
"expected 8 live defs, got {}",
report.total_live_defs
);
}
#[test]
fn compute_dead_code_groups_connected_components() {
let files = vec![
file_node("src/main.rs", vec![def("main", 1, 10)]),
FileNode {
path: "src/dead_chain.rs".to_string(),
defs: vec![
def_calling("dead_a", 1, 5, (1, 1)),
def_calling("dead_b", 7, 12, (1, 2)),
def("dead_c", 14, 18),
],
imports: vec![],
},
];
let graph = build_graph_from_files_pub(files);
let entries: HashSet<usize> = vec![0].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
assert_eq!(
report.total_dead_defs, 3,
"dead_a, dead_b, dead_c should be dead"
);
assert_eq!(
report.dead_clusters.len(),
1,
"connected chain should form 1 cluster, got {}",
report.dead_clusters.len()
);
assert_eq!(
report.dead_clusters[0].size, 3,
"cluster must have all 3 dead members"
);
}
#[test]
fn compute_dead_code_sorts_clusters_by_size_descending() {
let files = vec![
file_node("src/main.rs", vec![def("main", 1, 5)]),
FileNode {
path: "src/chain5.rs".to_string(),
defs: vec![
def_calling("a", 1, 2, (1, 1)),
def_calling("b", 3, 4, (1, 2)),
def_calling("c", 5, 6, (1, 3)),
def_calling("d", 7, 8, (1, 4)),
def("e", 9, 10),
],
imports: vec![],
},
FileNode {
path: "src/chain3.rs".to_string(),
defs: vec![
def_calling("p", 1, 2, (2, 1)),
def_calling("q", 3, 4, (2, 2)),
def("r", 5, 6),
],
imports: vec![],
},
file_node("src/single.rs", vec![def("z", 1, 2)]),
];
let graph = build_graph_from_files_pub(files);
let entries: HashSet<usize> = vec![0].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
assert_eq!(report.dead_clusters.len(), 3, "expected 3 clusters");
let sizes: Vec<usize> = report.dead_clusters.iter().map(|c| c.size).collect();
assert_eq!(
sizes[0], 5,
"largest cluster first (size 5), got sizes: {sizes:?}"
);
assert_eq!(sizes[1], 3, "second cluster (size 3), got sizes: {sizes:?}");
assert_eq!(
sizes[2], 1,
"smallest cluster (size 1), got sizes: {sizes:?}"
);
}
#[test]
fn compute_dead_code_excludes_test_paths_when_requested() {
let files = vec![
file_node("tests/lib_test.rs", vec![def("test_foo", 1, 10)]),
file_node("src/dead.rs", vec![def("dead_prod", 1, 5)]),
file_node("src/main.rs", vec![def("main_prod", 1, 5)]),
];
let graph = build_graph_from_files_pub(files);
let entries_with_test: HashSet<usize> = vec![0, 2].into_iter().collect();
let report_incl = compute_dead_code(&graph, &entries_with_test, true);
assert_eq!(
report_incl.total_dead_defs, 1,
"with test paths: only dead_prod should be dead, got {}",
report_incl.total_dead_defs
);
let report_excl = compute_dead_code(&graph, &entries_with_test, false);
assert_eq!(
report_excl.total_dead_defs, 2,
"without test paths: test_foo + dead_prod should both be dead, got {}",
report_excl.total_dead_defs
);
}
#[test]
#[ignore = "expensive -- loads full ripvec corpus; run with --include-ignored"]
fn compute_dead_code_real_corpus_smoke() {
use ripvec_core::repo_map::build_graph;
let root = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.parent()
.unwrap();
let graph = build_graph(root).expect("build_graph on ripvec root");
let mut entries: HashSet<usize> = HashSet::new();
for (file_idx, file) in graph.files.iter().enumerate() {
let ext = Path::new(&file.path)
.extension()
.and_then(|e| e.to_str())
.unwrap_or_default();
let Some(detector) = detector_for(ext) else {
continue;
};
let abs_path = root.join(&file.path);
let Ok(source) = std::fs::read_to_string(&abs_path) else {
continue;
};
let detections = detector.detect(&source, &abs_path);
if detections.is_empty() {
continue;
}
let base = graph.def_offsets[file_idx];
for ep in &detections {
for (def_idx, def) in file.defs.iter().enumerate() {
if def.start_line == ep.line || def.name == ep.name {
entries.insert(base + def_idx);
break;
}
}
}
}
let report = compute_dead_code(&graph, &entries, true);
assert!(
report.dead_fraction >= 0.0 && report.dead_fraction <= 0.40,
"dead_fraction {:.3} outside expected [0.0, 0.40] for ripvec corpus",
report.dead_fraction
);
assert!(
!report.dead_clusters.is_empty(),
"expected at least one dead cluster on real corpus"
);
eprintln!(
"ripvec corpus: {} defs total, {} dead ({:.1}%), {} clusters",
report.total_dead_defs + report.total_live_defs,
report.total_dead_defs,
report.dead_fraction * 100.0,
report.dead_clusters.len()
);
}
#[test]
fn compute_dead_code_cluster_root_is_highest_rank() {
let files = vec![
file_node("src/main.rs", vec![def("main", 1, 5)]),
FileNode {
path: "src/dead_cluster.rs".to_string(),
defs: vec![
def_calling("d0", 1, 2, (1, 1)),
def_calling("d1", 3, 4, (1, 2)),
def("d2", 5, 6),
],
imports: vec![],
},
];
let graph = build_graph_from_files_pub(files);
let entries: HashSet<usize> = vec![0].into_iter().collect();
let report = compute_dead_code(&graph, &entries, true);
assert_eq!(
report.dead_clusters.len(),
1,
"should have exactly 1 dead cluster"
);
let cluster = &report.dead_clusters[0];
assert_eq!(cluster.size, 3, "cluster must have 3 members");
let root_rank = graph.def_ranks[cluster.root_def_idx];
for &member in &cluster.member_def_indices {
let member_rank = graph.def_ranks[member];
assert!(
root_rank >= member_rank,
"root (flat {}, rank {:.6}) must have rank >= member (flat {}, rank {:.6})",
cluster.root_def_idx,
root_rank,
member,
member_rank
);
}
}