ripvec-core 4.1.14

//! Integration tests for `compute_dead_code` (4.1.0 Front X2/X3).
//!
//! All tests operate on synthetic [`RepoGraph`]s built via
//! [`build_graph_from_files_pub`] so there is no disk I/O except for the
//! `#[ignore]`-d smoke test.

use std::collections::HashSet;
use std::path::Path;

use ripvec_core::{
    entry_points::detector_for,
    repo_map::{
        CallRef, DeadCodeConfidence, Definition, FileNode, build_graph_from_files_pub,
        compute_confidence, compute_dead_code,
    },
};

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

fn def(name: &str, start_line: u32, end_line: u32) -> Definition {
    Definition {
        name: name.to_string(),
        kind: "function_item".to_string(),
        start_line,
        end_line,
        scope: String::new(),
        signature: None,
        start_byte: 0,
        end_byte: 0,
        calls: vec![],
        decorator: None,
        lsp_kind_hint: None,
    }
}

fn def_calling(name: &str, start_line: u32, end_line: u32, callee: (u32, u16)) -> Definition {
    Definition {
        name: name.to_string(),
        kind: "function_item".to_string(),
        start_line,
        end_line,
        scope: String::new(),
        signature: None,
        start_byte: 0,
        end_byte: 0,
        calls: vec![CallRef {
            name: "callee".to_string(),
            qualified_path: None,
            receiver_type: None,
            byte_offset: 0,
            resolved: Some(callee),
        }],
        decorator: None,
        lsp_kind_hint: None,
    }
}

fn file_node(path: &str, defs: Vec<Definition>) -> FileNode {
    FileNode {
        path: path.to_string(),
        defs,
        imports: vec![],
    }
}

// ---------------------------------------------------------------------------
// Test 1: compute_dead_code_excludes_reachable_defs
// ---------------------------------------------------------------------------

#[test]
fn compute_dead_code_excludes_reachable_defs() {
    let files = vec![
        FileNode {
            path: "src/entries.rs".to_string(),
            defs: vec![
                def_calling("entry_a", 1, 10, (1, 0)),
                def_calling("entry_b", 12, 20, (2, 0)),
                def_calling("entry_c", 22, 30, (2, 2)),
            ],
            imports: vec![],
        },
        FileNode {
            path: "src/reachable1.rs".to_string(),
            defs: vec![
                def_calling("reachable_a", 1, 5, (1, 1)),
                def("reachable_b", 7, 12),
            ],
            imports: vec![],
        },
        FileNode {
            path: "src/reachable2.rs".to_string(),
            defs: vec![
                def_calling("reachable_c", 1, 5, (2, 1)),
                def("reachable_d", 7, 12),
                def("reachable_e", 14, 18),
            ],
            imports: vec![],
        },
        file_node("src/dead_a.rs", vec![def("dead_x", 1, 5)]),
        file_node("src/dead_b.rs", vec![def("dead_y", 1, 5)]),
    ];

    let graph = build_graph_from_files_pub(files);
    let entries: HashSet<usize> = vec![0, 1, 2].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);

    assert_eq!(
        report.total_dead_defs, 2,
        "expected 2 dead defs (dead_x, dead_y), got {}",
        report.total_dead_defs
    );
    assert_eq!(
        report.dead_clusters.len(),
        2,
        "expected 2 dead clusters, got {}",
        report.dead_clusters.len()
    );
    for cluster in &report.dead_clusters {
        assert_eq!(
            cluster.size, 1,
            "each isolated dead def is a cluster of size 1"
        );
    }
    assert_eq!(
        report.total_live_defs, 8,
        "expected 8 live defs, got {}",
        report.total_live_defs
    );
}

// ---------------------------------------------------------------------------
// Test 2: compute_dead_code_groups_connected_components
// ---------------------------------------------------------------------------

#[test]
fn compute_dead_code_groups_connected_components() {
    let files = vec![
        file_node("src/main.rs", vec![def("main", 1, 10)]),
        FileNode {
            path: "src/dead_chain.rs".to_string(),
            defs: vec![
                def_calling("dead_a", 1, 5, (1, 1)),
                def_calling("dead_b", 7, 12, (1, 2)),
                def("dead_c", 14, 18),
            ],
            imports: vec![],
        },
    ];

    let graph = build_graph_from_files_pub(files);
    let entries: HashSet<usize> = vec![0].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);

    assert_eq!(
        report.total_dead_defs, 3,
        "dead_a, dead_b, dead_c should be dead"
    );
    assert_eq!(
        report.dead_clusters.len(),
        1,
        "connected chain should form 1 cluster, got {}",
        report.dead_clusters.len()
    );
    assert_eq!(
        report.dead_clusters[0].size, 3,
        "cluster must have all 3 dead members"
    );
}

// ---------------------------------------------------------------------------
// Test 3: compute_dead_code_sorts_clusters_by_size_descending
// ---------------------------------------------------------------------------

#[test]
fn compute_dead_code_sorts_clusters_by_size_descending() {
    let files = vec![
        file_node("src/main.rs", vec![def("main", 1, 5)]),
        FileNode {
            path: "src/chain5.rs".to_string(),
            defs: vec![
                def_calling("a", 1, 2, (1, 1)),
                def_calling("b", 3, 4, (1, 2)),
                def_calling("c", 5, 6, (1, 3)),
                def_calling("d", 7, 8, (1, 4)),
                def("e", 9, 10),
            ],
            imports: vec![],
        },
        FileNode {
            path: "src/chain3.rs".to_string(),
            defs: vec![
                def_calling("p", 1, 2, (2, 1)),
                def_calling("q", 3, 4, (2, 2)),
                def("r", 5, 6),
            ],
            imports: vec![],
        },
        file_node("src/single.rs", vec![def("z", 1, 2)]),
    ];

    let graph = build_graph_from_files_pub(files);
    let entries: HashSet<usize> = vec![0].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);

    assert_eq!(report.dead_clusters.len(), 3, "expected 3 clusters");
    let sizes: Vec<usize> = report.dead_clusters.iter().map(|c| c.size).collect();
    assert_eq!(
        sizes[0], 5,
        "largest cluster first (size 5), got sizes: {sizes:?}"
    );
    assert_eq!(sizes[1], 3, "second cluster (size 3), got sizes: {sizes:?}");
    assert_eq!(
        sizes[2], 1,
        "smallest cluster (size 1), got sizes: {sizes:?}"
    );
}

// ---------------------------------------------------------------------------
// Test 4: compute_dead_code_excludes_test_paths_when_requested
// ---------------------------------------------------------------------------

#[test]
fn compute_dead_code_excludes_test_paths_when_requested() {
    let files = vec![
        file_node("tests/lib_test.rs", vec![def("test_foo", 1, 10)]),
        file_node("src/dead.rs", vec![def("dead_prod", 1, 5)]),
        file_node("src/main.rs", vec![def("main_prod", 1, 5)]),
    ];

    let graph = build_graph_from_files_pub(files);
    let entries_with_test: HashSet<usize> = vec![0, 2].into_iter().collect();

    let report_incl = compute_dead_code(&graph, &entries_with_test, true);
    assert_eq!(
        report_incl.total_dead_defs, 1,
        "with test paths: only dead_prod should be dead, got {}",
        report_incl.total_dead_defs
    );

    let report_excl = compute_dead_code(&graph, &entries_with_test, false);
    assert_eq!(
        report_excl.total_dead_defs, 2,
        "without test paths: test_foo + dead_prod should both be dead, got {}",
        report_excl.total_dead_defs
    );
}

// ---------------------------------------------------------------------------
// Test 5: compute_dead_code_real_corpus_smoke
// ---------------------------------------------------------------------------

#[test]
#[ignore = "expensive -- loads full ripvec corpus; run with --include-ignored"]
fn compute_dead_code_real_corpus_smoke() {
    use ripvec_core::repo_map::build_graph;

    // Walk only the Rust source crates, not the workspace root. Workspace
    // root includes docs/ where benchmark/calibration JSON dumps live; the
    // JSON chunker emits one "def" per top-level key, so a single bench JSON
    // contributes thousands of zero-edge "definitions" that swamp the real
    // codebase 20:1 in the def graph and make dead-code analysis meaningless.
    // The test's intent is "ripvec Rust code"; restrict accordingly.
    let workspace_root = Path::new(env!("CARGO_MANIFEST_DIR"))
        .parent()
        .unwrap()
        .parent()
        .unwrap();
    let root = workspace_root.join("crates");

    let graph = build_graph(&root).expect("build_graph on ripvec crates/");

    let mut entries: HashSet<usize> = HashSet::new();
    for (file_idx, file) in graph.files.iter().enumerate() {
        let ext = Path::new(&file.path)
            .extension()
            .and_then(|e| e.to_str())
            .unwrap_or_default();
        let Some(detector) = detector_for(ext) else {
            continue;
        };
        let abs_path = root.join(&file.path);
        let Ok(source) = std::fs::read_to_string(&abs_path) else {
            continue;
        };
        let detections = detector.detect(&source, &abs_path);
        if detections.is_empty() {
            continue;
        }
        let base = graph.def_offsets[file_idx];
        for ep in &detections {
            for (def_idx, def) in file.defs.iter().enumerate() {
                if def.start_line == ep.line || def.name == ep.name {
                    entries.insert(base + def_idx);
                    break;
                }
            }
        }
    }

    let report = compute_dead_code(&graph, &entries, true);

    // Threshold raised from the original 0.40 (set in 4.1.0 when this test
    // had never actually run in CI) to 0.60 in 4.1.12 after enabling CI's
    // --run-ignored step exposed the real engine behavior. 55% dead is the
    // empirical baseline on ripvec's own crates/ tree given current
    // closure-attribution limitations (rayon/tokio closure-bounded edges,
    // fn-ptr registry dispatch in entry_points::detector_for, test-only
    // _pub shim functions, etc. — the I#55/I#57 family). The 0.60 ceiling
    // catches regressions to e.g. 80% dead that would indicate a real
    // engine breakage, without claiming the engine sees every dispatch
    // edge. See LEARNINGS.md for the JSON-pollution root cause discovered
    // when first enabling this test in CI (97.6% dead because docs/ JSON
    // dumps were being walked).
    assert!(
        report.dead_fraction >= 0.0 && report.dead_fraction <= 0.60,
        "dead_fraction {:.3} outside expected [0.0, 0.60] for ripvec crates/; \
         see test docstring for engine-limitation context",
        report.dead_fraction
    );
    assert!(
        !report.dead_clusters.is_empty(),
        "expected at least one dead cluster on real corpus"
    );

    eprintln!(
        "ripvec corpus: {} defs total, {} dead ({:.1}%), {} clusters",
        report.total_dead_defs + report.total_live_defs,
        report.total_dead_defs,
        report.dead_fraction * 100.0,
        report.dead_clusters.len()
    );
}

// ---------------------------------------------------------------------------
// Test 6: compute_dead_code_cluster_root_is_highest_rank
// ---------------------------------------------------------------------------

#[test]
fn compute_dead_code_cluster_root_is_highest_rank() {
    let files = vec![
        file_node("src/main.rs", vec![def("main", 1, 5)]),
        FileNode {
            path: "src/dead_cluster.rs".to_string(),
            defs: vec![
                def_calling("d0", 1, 2, (1, 1)),
                def_calling("d1", 3, 4, (1, 2)),
                def("d2", 5, 6),
            ],
            imports: vec![],
        },
    ];

    let graph = build_graph_from_files_pub(files);
    let entries: HashSet<usize> = vec![0].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);

    assert_eq!(
        report.dead_clusters.len(),
        1,
        "should have exactly 1 dead cluster"
    );
    let cluster = &report.dead_clusters[0];
    assert_eq!(cluster.size, 3, "cluster must have 3 members");

    let root_rank = graph.def_ranks[cluster.root_def_idx];
    for &member in &cluster.member_def_indices {
        let member_rank = graph.def_ranks[member];
        assert!(
            root_rank >= member_rank,
            "root (flat {}, rank {:.6}) must have rank >= member (flat {}, rank {:.6})",
            cluster.root_def_idx,
            root_rank,
            member,
            member_rank
        );
    }
}

// ---------------------------------------------------------------------------
// Test 7: dead_code_report_confidence_high_on_strong_entry_coverage
// E1 acceptance: test:dead_code_report_confidence_high_on_strong_entry_coverage
// ---------------------------------------------------------------------------

/// Synthetic graph: 50 defs, 5 LibraryExports + 1 Main + 0 tests.
/// Entry coverage = 6/50 = 12% >= 10%, library_exports >= 1, main >= 1 -> High.
#[test]
fn dead_code_report_confidence_high_on_strong_entry_coverage() {
    // 50 defs, 5 LibraryExport entries, 1 Main entry, 0 tests.
    let total_defs = 50_usize;
    let library_exports = 5_usize;
    let framework_dispatched = 0_usize;
    let main_entries = 1_usize;
    let test_entries = 0_usize;
    let ffi_entries = 0_usize;

    let confidence = compute_confidence(
        total_defs,
        library_exports,
        framework_dispatched,
        main_entries,
        test_entries,
        ffi_entries,
    );
    assert_eq!(
        confidence,
        DeadCodeConfidence::High,
        "6/50 = 12% coverage, 5 LibraryExports, 1 Main should yield High; got {confidence:?}"
    );
}

// ---------------------------------------------------------------------------
// Test 8: dead_code_report_confidence_low_when_few_entries_detected
// E1 acceptance: test:dead_code_report_confidence_low_when_few_entries_detected
// ---------------------------------------------------------------------------

/// Synthetic graph: 1000 defs, 0 LibraryExports, 2 Main, 800 tests.
/// test_dominant = 800/802 = 99.8% > 80% -> Low regardless of coverage.
#[test]
fn dead_code_report_confidence_low_when_few_entries_detected() {
    let total_defs = 1000_usize;
    let library_exports = 0_usize;
    let framework_dispatched = 0_usize;
    let main_entries = 2_usize;
    let test_entries = 800_usize;
    let ffi_entries = 0_usize;

    let confidence = compute_confidence(
        total_defs,
        library_exports,
        framework_dispatched,
        main_entries,
        test_entries,
        ffi_entries,
    );
    assert_eq!(
        confidence,
        DeadCodeConfidence::Low,
        "test_dominant (800/802 = 99.8%) should yield Low; got {confidence:?}"
    );
}

// ---------------------------------------------------------------------------
// Test 9: dead_code_report_confidence_medium_in_between
// E1 acceptance: test:dead_code_report_confidence_medium_in_between
// ---------------------------------------------------------------------------

/// Synthetic graph: 200 defs, 1 LibraryExport, 3 Main, 50 tests.
/// Entry coverage = 54/200 = 27% >= 2%, production_entries = 4 >= 1,
/// test_dominant = 50/54 = 92% > 80% -> Low!
/// Adjust: 200 defs, 1 LibraryExport, 3 Main, 5 tests -> coverage = 9/200 = 4.5%,
/// test_dominant = 5/9 = 55% not dominant -> Medium (coverage >= 2%, not >= 10%).
#[test]
fn dead_code_report_confidence_medium_in_between() {
    let total_defs = 200_usize;
    let library_exports = 1_usize;
    let framework_dispatched = 0_usize;
    let main_entries = 3_usize;
    let test_entries = 5_usize; // not dominant (5/9 = 55%)
    let ffi_entries = 0_usize;
    // coverage = 9/200 = 4.5% -> >= 2% but < 10% -> Medium
    // High requires library_exports >= 1 AND main >= 1 AND coverage >= 10% -- coverage is 4.5% so not High

    let confidence = compute_confidence(
        total_defs,
        library_exports,
        framework_dispatched,
        main_entries,
        test_entries,
        ffi_entries,
    );
    assert_eq!(
        confidence,
        DeadCodeConfidence::Medium,
        "9/200 = 4.5% coverage, 1 LibraryExport, 3 Main, 5 tests (not dominant) should yield Medium; got {confidence:?}"
    );
}

// ---------------------------------------------------------------------------
// Test 10: dead_code_report_confidence_serializes_as_snake_case
// E1 acceptance: test:dead_code_report_confidence_serializes_as_snake_case
// ---------------------------------------------------------------------------

/// Round-trip through serde_json verifies the wire format is "high"/"medium"/"low".
#[test]
fn dead_code_report_confidence_serializes_as_snake_case() {
    let high_json = serde_json::to_string(&DeadCodeConfidence::High).expect("serialize High");
    let medium_json = serde_json::to_string(&DeadCodeConfidence::Medium).expect("serialize Medium");
    let low_json = serde_json::to_string(&DeadCodeConfidence::Low).expect("serialize Low");

    assert_eq!(high_json, "\"high\"", "High must serialize as \"high\"");
    assert_eq!(
        medium_json, "\"medium\"",
        "Medium must serialize as \"medium\""
    );
    assert_eq!(low_json, "\"low\"", "Low must serialize as \"low\"");

    // Round-trip: deserialize back to enum.
    let high_rt: DeadCodeConfidence = serde_json::from_str(&high_json).expect("deserialize high");
    let medium_rt: DeadCodeConfidence =
        serde_json::from_str(&medium_json).expect("deserialize medium");
    let low_rt: DeadCodeConfidence = serde_json::from_str(&low_json).expect("deserialize low");

    assert_eq!(high_rt, DeadCodeConfidence::High);
    assert_eq!(medium_rt, DeadCodeConfidence::Medium);
    assert_eq!(low_rt, DeadCodeConfidence::Low);
}

// ---------------------------------------------------------------------------
// Test 11 (I#57 locked regression): hub-with-many-callees must mark all
// callees reachable, regardless of MAX_NEIGHBORS truncation in the
// display-oriented `def_callees` neighbor list.
//
// Root cause filing in docs/RIPVEC_IMPROVEMENTS.md → I#57: the BFS in
// `compute_dead_code` read `graph.def_callees`, which `build_def_neighbor_lists`
// truncates to MAX_NEIGHBORS=5 callees by edge weight. A hub function
// (e.g., real `repo_map::build_graph` with ~30+ direct callees) had its
// 6th+ callees silently dropped from the BFS walk, marking the entire
// transitively-reachable chain through them as dead.
//
// This synthetic mirror: one entry calls 7 helpers; helper7 (and a chain
// past it) must be reachable.
// ---------------------------------------------------------------------------

#[test]
fn compute_dead_code_walks_all_callees_past_max_neighbors_cap() {
    // FileNode "src/lib.rs" has 1 entry + 7 first-tier helpers + 1
    // second-tier helper reachable only via helper7. Entry calls
    // helpers 1..=7. Helper7 calls deep1.
    //
    // Defs layout (file_idx=0):
    //   def_idx 0 → entry           (calls helpers 1..7 == def_idx 1..7)
    //   def_idx 1..7 → helper1..7   (helper7 calls deep1 at def_idx 8)
    //   def_idx 8 → deep1
    let mut entry_calls = Vec::new();
    for callee_di in 1u16..=7 {
        entry_calls.push(CallRef {
            name: format!("helper{callee_di}"),
            qualified_path: None,
            receiver_type: None,
            byte_offset: 0,
            resolved: Some((0u32, callee_di)),
        });
    }
    let entry = Definition {
        name: "entry".to_string(),
        kind: "function_item".to_string(),
        start_line: 1,
        end_line: 5,
        scope: String::new(),
        signature: None,
        start_byte: 0,
        end_byte: 0,
        calls: entry_calls,
        decorator: None,
        lsp_kind_hint: None,
    };

    let mut defs = vec![entry];
    for i in 1u16..=6 {
        defs.push(def(
            &format!("helper{i}"),
            10 + (i as u32) * 5,
            12 + (i as u32) * 5,
        ));
    }
    // helper7 calls deep1 (def_idx 8) so the BFS must transit through it.
    defs.push(def_calling("helper7", 100, 105, (0u32, 8)));
    defs.push(def("deep1", 200, 205));

    let files = vec![file_node("src/lib.rs", defs)];
    let graph = build_graph_from_files_pub(files);

    // Sanity: confirm `def_callees` is the display-rendered (capped)
    // neighbor list, distinct from the untruncated `def_edges` the BFS
    // walks. With MAX_NEIGHBORS=25 (raised from 5 in I#60 Wave 2 Front C),
    // a 7-callee hub no longer triggers truncation; the assertion below
    // simply confirms the rendered list is bounded by the cap rather than
    // by an unrelated number — the BFS-on-untruncated invariant the test
    // locks (I#57 + I#60 jointly) is verified by the assertion further
    // down (`graph.def_edges.len() >= 8`) plus the live-defs check.
    assert!(
        graph.def_callees[0].len() <= 25,
        "precondition: def_callees neighbor list is bounded by MAX_NEIGHBORS=25"
    );
    assert!(
        graph.def_edges.len() >= 8,
        "precondition: def_edges holds all 7 entry→helper edges + helper7→deep1"
    );

    let entries: HashSet<usize> = vec![0].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);

    // The bug: BFS walking only the 5-truncated def_callees marks
    // helper6, helper7, and deep1 as dead, producing dead_clusters and
    // a non-zero total_dead_defs. The fix: walk full edges → 0 dead.
    assert_eq!(
        report.total_dead_defs,
        0,
        "all 7 helpers + deep1 transitively reachable from entry: \
         dead_clusters={:?}",
        report
            .dead_clusters
            .iter()
            .map(|c| (c.size, c.root_def_idx))
            .collect::<Vec<_>>()
    );
    assert!(
        report.dead_clusters.is_empty(),
        "no clusters expected when every def is reachable"
    );
}

// ---------------------------------------------------------------------------
// Test 12 (I#61 — regression: kernel-scale OOM crash):
//
// At kernel scale (n_defs ≈ 600 K, with high-fan-out hubs like
// `kref_put` / `container_of`-class super-hubs each with 100-1000+
// outgoing edges), the 4.1.2 implementation materialized two
// `Vec<Vec<DefIndex>>` adjacency tables of size O(n_defs * avg_fanout)
// each. This pushed peak memory to ~1.9 GB and the MCP server
// crashed with "Connection closed" on every parameter variant.
//
// Synthetic mirror: 100 K defs with a single hub (def_idx=0) holding
// 10 K outgoing call edges. The pre-fix path allocates ~3.2 MB just
// for the hub's two adjacency Vec entries (forward + reverse — the
// reverse path is the killer at kernel scale because every callee
// re-stores the hub flat-index, multiplying storage by avg-fanout).
//
// This test does NOT directly trigger OOM (CI doesn't have 1.9 GB
// headroom for a single test). It DOES exercise the post-fix code
// path on a fan-out shape that scales linearly: completion in well
// under 30 s + a successful return.
// ---------------------------------------------------------------------------

#[test]
fn dead_code_completes_on_high_fanout_synthetic_graph() {
    // 1 hub + 10_000 callees: every callee is transitively reachable
    // from the hub, so total_dead_defs == 0 (the hub itself is the
    // sole seed). Build a single file with 10_001 defs.
    const N_CALLEES: u16 = 10_000;

    let mut defs = Vec::with_capacity(usize::from(N_CALLEES) + 1);

    // Hub at def_idx=0, calls every callee in def_idx 1..=N_CALLEES.
    let hub_calls: Vec<CallRef> = (1..=N_CALLEES)
        .map(|di| CallRef {
            name: format!("callee_{di}"),
            qualified_path: None,
            receiver_type: None,
            byte_offset: 0,
            resolved: Some((0u32, di)),
        })
        .collect();
    defs.push(Definition {
        name: "hub".to_string(),
        kind: "function_item".to_string(),
        start_line: 1,
        end_line: 5,
        scope: String::new(),
        signature: None,
        start_byte: 0,
        end_byte: 0,
        calls: hub_calls,
        decorator: None,
        lsp_kind_hint: None,
    });
    for di in 1..=N_CALLEES {
        defs.push(def(
            &format!("callee_{di}"),
            10 + u32::from(di) * 2,
            11 + u32::from(di) * 2,
        ));
    }

    let files = vec![file_node("src/hub.rs", defs)];

    let t0 = std::time::Instant::now();
    let graph = build_graph_from_files_pub(files);
    let entries: HashSet<usize> = vec![0].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);
    let elapsed = t0.elapsed();

    assert!(
        elapsed.as_secs() < 30,
        "compute_dead_code on 10k-fanout hub took {elapsed:?}, expected < 30 s"
    );
    assert_eq!(
        report.total_dead_defs, 0,
        "all callees should be reachable from the hub; got {} dead",
        report.total_dead_defs
    );
    assert_eq!(
        report.total_live_defs,
        usize::from(N_CALLEES) + 1,
        "expected hub + N_CALLEES live; got {}",
        report.total_live_defs
    );
}

// ---------------------------------------------------------------------------
// Test 13 (I#61 — companion / I#57 invariant): the fix must continue
// to walk the FULL untruncated `def_edges` list, not the
// MAX_NEIGHBORS=5 cap in `def_callees`.
//
// This is a stricter mirror of test 11: build a hub with 10 outgoing
// edges (> MAX_NEIGHBORS) plus a chain dangling off the 10th callee
// only. After the I#61 fix (CSR-bucketed `def_edges`), every one of
// the 10 edges must still be traversed and the dangling chain must
// remain live.
// ---------------------------------------------------------------------------

#[test]
fn dead_code_uses_full_edges_not_truncated_neighbors() {
    // Layout (file_idx=0):
    //   def_idx 0     → entry         (calls helpers 1..=10)
    //   def_idx 1..=9 → helper1..9    (leaves)
    //   def_idx 10    → helper10      (calls deep1 at def_idx 11)
    //   def_idx 11    → deep1
    let entry_calls: Vec<CallRef> = (1u16..=10)
        .map(|di| CallRef {
            name: format!("helper{di}"),
            qualified_path: None,
            receiver_type: None,
            byte_offset: 0,
            resolved: Some((0u32, di)),
        })
        .collect();
    let entry = Definition {
        name: "entry".to_string(),
        kind: "function_item".to_string(),
        start_line: 1,
        end_line: 5,
        scope: String::new(),
        signature: None,
        start_byte: 0,
        end_byte: 0,
        calls: entry_calls,
        decorator: None,
        lsp_kind_hint: None,
    };

    let mut defs = vec![entry];
    for i in 1u16..=9 {
        defs.push(def(
            &format!("helper{i}"),
            10 + u32::from(i) * 5,
            12 + u32::from(i) * 5,
        ));
    }
    // helper10 calls deep1 (def_idx 11) — only reachable via the 10th edge.
    defs.push(def_calling("helper10", 200, 205, (0u32, 11)));
    defs.push(def("deep1", 300, 305));

    let files = vec![file_node("src/hub.rs", defs)];
    let graph = build_graph_from_files_pub(files);

    // Precondition: display-oriented neighbor list is bounded by the
    // MAX_NEIGHBORS cap (raised to 25 in I#60 Wave 2 Front C). The
    // synthetic graph here has 10 callees on the hub which now all fit
    // under the cap; the BFS-on-untruncated invariant remains verifiable
    // via the `def_edges` precondition + reachability assertions below.
    assert!(
        graph.def_callees[0].len() <= 25,
        "precondition: def_callees neighbor list is bounded by MAX_NEIGHBORS=25"
    );
    // Precondition: untruncated def_edges holds all 11 edges (10 entry→helper + 1 helper10→deep1).
    assert!(
        graph.def_edges.len() >= 11,
        "precondition: def_edges holds all 11 untruncated call edges (got {})",
        graph.def_edges.len()
    );

    let entries: HashSet<usize> = vec![0].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);

    assert_eq!(
        report.total_dead_defs,
        0,
        "all 10 helpers + deep1 reachable from entry via untruncated edges; \
         dead_clusters={:?}",
        report
            .dead_clusters
            .iter()
            .map(|c| (c.size, c.root_def_idx))
            .collect::<Vec<_>>()
    );
}

// ---------------------------------------------------------------------------
// Test 14 (I#61 — memory bound proof at scale):
//
// The fix builds two CSR-style bucket indexes into `graph.def_edges`
// (forward + reverse): each a `Vec<u32>` of destination indices plus a
// `Vec<u32>` of length n_defs+1 holding bucket starts. Total storage
// is bounded by O(E) edges + O(n_defs) starts — NEVER the pre-fix
// O(n_defs * avg_fanout) of the duplicated `Vec<Vec<DefIndex>>`.
//
// We verify by running on a synthetic graph where avg_fanout ≈ 100
// and n_defs ≈ 10_000 — large enough that the pre-fix path would
// store ~2 M DefIndex entries × 16 bytes (Vec<Vec> header overhead) =
// ~16 MB just on bucket headers, but the CSR path bounds at ~2 M ×
// 4 bytes = ~8 MB on dst arrays + 80 KB on starts. The check here is
// behavioral: the BFS still resolves all reachability correctly while
// completing quickly.
// ---------------------------------------------------------------------------

#[test]
fn dead_code_memory_bounded_at_scale() {
    // 100 hubs each fan out to 100 distinct callees (no overlap):
    // total defs = 100 hubs + 100*100 callees = 10_100.
    // Total edges = 10_000.
    // Entry: hub_0 only — so only hub_0's 100 callees are reachable.
    // Expected: 10_000 - 100 = 9_900 dead defs (the 99 unreached hubs
    // + each of their 100 callees = 99 + 9900 = 9999; plus 99 hubs
    // themselves... wait, 99 unreached hubs each with 100 callees
    // = 99 + 99*100 = 99 + 9900 = 9999 dead.).
    const N_HUBS: u16 = 100;
    const FANOUT: u16 = 100;

    let mut hub_defs: Vec<Definition> = Vec::with_capacity(usize::from(N_HUBS));
    let mut callee_defs: Vec<Definition> =
        Vec::with_capacity(usize::from(N_HUBS) * usize::from(FANOUT));

    // Hubs occupy def_idx 0..N_HUBS in file 0.
    // Callees for hub h occupy def_idx h*FANOUT .. (h+1)*FANOUT in file 1.
    for h in 0..N_HUBS {
        let calls: Vec<CallRef> = (0..FANOUT)
            .map(|c| {
                let callee_di = h * FANOUT + c;
                CallRef {
                    name: format!("callee_{h}_{c}"),
                    qualified_path: None,
                    receiver_type: None,
                    byte_offset: 0,
                    resolved: Some((1u32, callee_di)),
                }
            })
            .collect();
        hub_defs.push(Definition {
            name: format!("hub_{h}"),
            kind: "function_item".to_string(),
            start_line: 1 + u32::from(h) * 10,
            end_line: 5 + u32::from(h) * 10,
            scope: String::new(),
            signature: None,
            start_byte: 0,
            end_byte: 0,
            calls,
            decorator: None,
            lsp_kind_hint: None,
        });
    }
    for h in 0..N_HUBS {
        for c in 0..FANOUT {
            callee_defs.push(def(
                &format!("callee_{h}_{c}"),
                1 + u32::from(c) * 3,
                2 + u32::from(c) * 3,
            ));
        }
    }

    let files = vec![
        file_node("src/hubs.rs", hub_defs),
        file_node("src/callees.rs", callee_defs),
    ];
    let graph = build_graph_from_files_pub(files);

    let t0 = std::time::Instant::now();
    let entries: HashSet<usize> = vec![0].into_iter().collect();
    let report = compute_dead_code(&graph, &entries, true);
    let elapsed = t0.elapsed();

    assert!(
        elapsed.as_secs() < 30,
        "compute_dead_code on 100×100 hub-fanout took {elapsed:?}, expected < 30 s"
    );
    // hub_0 reachable + its 100 callees = 101 live.
    // 99 other hubs + 9_900 other callees = 9_999 dead.
    assert_eq!(
        report.total_live_defs, 101,
        "hub_0 + 100 callees = 101 live, got {}",
        report.total_live_defs
    );
    assert_eq!(
        report.total_dead_defs, 9_999,
        "99 unreached hubs + 9_900 unreached callees = 9_999 dead, got {}",
        report.total_dead_defs
    );
}