ripvec-core 4.1.0

Semantic code + document search engine. Cacheless static-embedding + cross-encoder rerank by default; optional ModernBERT/BGE transformer engines with GPU backends. Tree-sitter chunking, hybrid BM25 + PageRank, composable ranking layers.
Documentation
//! Integration tests for `compute_dead_code` (4.1.0 Front X2/X3).
//!
//! All tests operate on synthetic [`RepoGraph`]s built via
//! [`build_graph_from_files_pub`] so there is no disk I/O except for the
//! `#[ignore]`-d smoke test.

use std::collections::HashSet;
use std::path::Path;

use ripvec_core::{
    entry_points::detector_for,
    repo_map::{CallRef, Definition, FileNode, build_graph_from_files_pub, compute_dead_code},
};

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

fn def(name: &str, start_line: u32, end_line: u32) -> Definition {
    Definition {
        name: name.to_string(),
        kind: "function_item".to_string(),
        start_line,
        end_line,
        scope: String::new(),
        signature: None,
        start_byte: 0,
        end_byte: 0,
        calls: vec![],
    }
}

fn def_calling(name: &str, start_line: u32, end_line: u32, callee: (u32, u16)) -> Definition {
    Definition {
        name: name.to_string(),
        kind: "function_item".to_string(),
        start_line,
        end_line,
        scope: String::new(),
        signature: None,
        start_byte: 0,
        end_byte: 0,
        calls: vec![CallRef {
            name: "callee".to_string(),
            qualified_path: None,
            receiver_type: None,
            byte_offset: 0,
            resolved: Some(callee),
        }],
    }
}

fn file_node(path: &str, defs: Vec<Definition>) -> FileNode {
    FileNode {
        path: path.to_string(),
        defs,
        imports: vec![],
    }
}

// ---------------------------------------------------------------------------
// Test 1: compute_dead_code_excludes_reachable_defs
// ---------------------------------------------------------------------------

#[test]
fn compute_dead_code_excludes_reachable_defs() {
    let files = vec![
        FileNode {
            path: "src/entries.rs".to_string(),
            defs: vec![
                def_calling("entry_a", 1, 10, (1, 0)),
                def_calling("entry_b", 12, 20, (2, 0)),
                def_calling("entry_c", 22, 30, (2, 2)),
            ],
            imports: vec![],
        },
        FileNode {
            path: "src/reachable1.rs".to_string(),
            defs: vec![
                def_calling("reachable_a", 1, 5, (1, 1)),
                def("reachable_b", 7, 12),
            ],
            imports: vec![],
        },
        FileNode {
            path: "src/reachable2.rs".to_string(),
            defs: vec![
                def_calling("reachable_c", 1, 5, (2, 1)),
                def("reachable_d", 7, 12),
                def("reachable_e", 14, 18),
            ],
            imports: vec![],
        },
        file_node("src/dead_a.rs", vec![def("dead_x", 1, 5)]),
        file_node("src/dead_b.rs", vec![def("dead_y", 1, 5)]),
    ];

    let graph = build_graph_from_files_pub(files);
    let entries: HashSet<usize> = vec![0, 1, 2].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);

    assert_eq!(
        report.total_dead_defs, 2,
        "expected 2 dead defs (dead_x, dead_y), got {}",
        report.total_dead_defs
    );
    assert_eq!(
        report.dead_clusters.len(),
        2,
        "expected 2 dead clusters, got {}",
        report.dead_clusters.len()
    );
    for cluster in &report.dead_clusters {
        assert_eq!(
            cluster.size, 1,
            "each isolated dead def is a cluster of size 1"
        );
    }
    assert_eq!(
        report.total_live_defs, 8,
        "expected 8 live defs, got {}",
        report.total_live_defs
    );
}

// ---------------------------------------------------------------------------
// Test 2: compute_dead_code_groups_connected_components
// ---------------------------------------------------------------------------

#[test]
fn compute_dead_code_groups_connected_components() {
    let files = vec![
        file_node("src/main.rs", vec![def("main", 1, 10)]),
        FileNode {
            path: "src/dead_chain.rs".to_string(),
            defs: vec![
                def_calling("dead_a", 1, 5, (1, 1)),
                def_calling("dead_b", 7, 12, (1, 2)),
                def("dead_c", 14, 18),
            ],
            imports: vec![],
        },
    ];

    let graph = build_graph_from_files_pub(files);
    let entries: HashSet<usize> = vec![0].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);

    assert_eq!(
        report.total_dead_defs, 3,
        "dead_a, dead_b, dead_c should be dead"
    );
    assert_eq!(
        report.dead_clusters.len(),
        1,
        "connected chain should form 1 cluster, got {}",
        report.dead_clusters.len()
    );
    assert_eq!(
        report.dead_clusters[0].size, 3,
        "cluster must have all 3 dead members"
    );
}

// ---------------------------------------------------------------------------
// Test 3: compute_dead_code_sorts_clusters_by_size_descending
// ---------------------------------------------------------------------------

#[test]
fn compute_dead_code_sorts_clusters_by_size_descending() {
    let files = vec![
        file_node("src/main.rs", vec![def("main", 1, 5)]),
        FileNode {
            path: "src/chain5.rs".to_string(),
            defs: vec![
                def_calling("a", 1, 2, (1, 1)),
                def_calling("b", 3, 4, (1, 2)),
                def_calling("c", 5, 6, (1, 3)),
                def_calling("d", 7, 8, (1, 4)),
                def("e", 9, 10),
            ],
            imports: vec![],
        },
        FileNode {
            path: "src/chain3.rs".to_string(),
            defs: vec![
                def_calling("p", 1, 2, (2, 1)),
                def_calling("q", 3, 4, (2, 2)),
                def("r", 5, 6),
            ],
            imports: vec![],
        },
        file_node("src/single.rs", vec![def("z", 1, 2)]),
    ];

    let graph = build_graph_from_files_pub(files);
    let entries: HashSet<usize> = vec![0].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);

    assert_eq!(report.dead_clusters.len(), 3, "expected 3 clusters");
    let sizes: Vec<usize> = report.dead_clusters.iter().map(|c| c.size).collect();
    assert_eq!(
        sizes[0], 5,
        "largest cluster first (size 5), got sizes: {sizes:?}"
    );
    assert_eq!(sizes[1], 3, "second cluster (size 3), got sizes: {sizes:?}");
    assert_eq!(
        sizes[2], 1,
        "smallest cluster (size 1), got sizes: {sizes:?}"
    );
}

// ---------------------------------------------------------------------------
// Test 4: compute_dead_code_excludes_test_paths_when_requested
// ---------------------------------------------------------------------------

#[test]
fn compute_dead_code_excludes_test_paths_when_requested() {
    let files = vec![
        file_node("tests/lib_test.rs", vec![def("test_foo", 1, 10)]),
        file_node("src/dead.rs", vec![def("dead_prod", 1, 5)]),
        file_node("src/main.rs", vec![def("main_prod", 1, 5)]),
    ];

    let graph = build_graph_from_files_pub(files);
    let entries_with_test: HashSet<usize> = vec![0, 2].into_iter().collect();

    let report_incl = compute_dead_code(&graph, &entries_with_test, true);
    assert_eq!(
        report_incl.total_dead_defs, 1,
        "with test paths: only dead_prod should be dead, got {}",
        report_incl.total_dead_defs
    );

    let report_excl = compute_dead_code(&graph, &entries_with_test, false);
    assert_eq!(
        report_excl.total_dead_defs, 2,
        "without test paths: test_foo + dead_prod should both be dead, got {}",
        report_excl.total_dead_defs
    );
}

// ---------------------------------------------------------------------------
// Test 5: compute_dead_code_real_corpus_smoke
// ---------------------------------------------------------------------------

#[test]
#[ignore = "expensive -- loads full ripvec corpus; run with --include-ignored"]
fn compute_dead_code_real_corpus_smoke() {
    use ripvec_core::repo_map::build_graph;

    let root = Path::new(env!("CARGO_MANIFEST_DIR"))
        .parent()
        .unwrap()
        .parent()
        .unwrap();

    let graph = build_graph(root).expect("build_graph on ripvec root");

    let mut entries: HashSet<usize> = HashSet::new();
    for (file_idx, file) in graph.files.iter().enumerate() {
        let ext = Path::new(&file.path)
            .extension()
            .and_then(|e| e.to_str())
            .unwrap_or_default();
        let Some(detector) = detector_for(ext) else {
            continue;
        };
        let abs_path = root.join(&file.path);
        let Ok(source) = std::fs::read_to_string(&abs_path) else {
            continue;
        };
        let detections = detector.detect(&source, &abs_path);
        if detections.is_empty() {
            continue;
        }
        let base = graph.def_offsets[file_idx];
        for ep in &detections {
            for (def_idx, def) in file.defs.iter().enumerate() {
                if def.start_line == ep.line || def.name == ep.name {
                    entries.insert(base + def_idx);
                    break;
                }
            }
        }
    }

    let report = compute_dead_code(&graph, &entries, true);

    assert!(
        report.dead_fraction >= 0.0 && report.dead_fraction <= 0.40,
        "dead_fraction {:.3} outside expected [0.0, 0.40] for ripvec corpus",
        report.dead_fraction
    );
    assert!(
        !report.dead_clusters.is_empty(),
        "expected at least one dead cluster on real corpus"
    );

    eprintln!(
        "ripvec corpus: {} defs total, {} dead ({:.1}%), {} clusters",
        report.total_dead_defs + report.total_live_defs,
        report.total_dead_defs,
        report.dead_fraction * 100.0,
        report.dead_clusters.len()
    );
}

// ---------------------------------------------------------------------------
// Test 6: compute_dead_code_cluster_root_is_highest_rank
// ---------------------------------------------------------------------------

#[test]
fn compute_dead_code_cluster_root_is_highest_rank() {
    let files = vec![
        file_node("src/main.rs", vec![def("main", 1, 5)]),
        FileNode {
            path: "src/dead_cluster.rs".to_string(),
            defs: vec![
                def_calling("d0", 1, 2, (1, 1)),
                def_calling("d1", 3, 4, (1, 2)),
                def("d2", 5, 6),
            ],
            imports: vec![],
        },
    ];

    let graph = build_graph_from_files_pub(files);
    let entries: HashSet<usize> = vec![0].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);

    assert_eq!(
        report.dead_clusters.len(),
        1,
        "should have exactly 1 dead cluster"
    );
    let cluster = &report.dead_clusters[0];
    assert_eq!(cluster.size, 3, "cluster must have 3 members");

    let root_rank = graph.def_ranks[cluster.root_def_idx];
    for &member in &cluster.member_def_indices {
        let member_rank = graph.def_ranks[member];
        assert!(
            root_rank >= member_rank,
            "root (flat {}, rank {:.6}) must have rank >= member (flat {}, rank {:.6})",
            cluster.root_def_idx,
            root_rank,
            member,
            member_rank
        );
    }
}