leindex 1.8.3 - Docs.rs

// Integration tests for Plan 2 search-state compression, compact integer-backed
// metadata/addressing, cache byte ceilings, memory accounting, snapshot clone
// reduction, large-content avoidance, and read-mostly zero-copy expansion.
//
// Validates: VAL-BPHASE-017, VAL-BPHASE-018, VAL-BPHASE-019, VAL-BPHASE-020,
//            VAL-BPHASE-021, VAL-BPHASE-022, VAL-BPHASE-023, VAL-BPHASE-024,
//            VAL-BPHASE-025, VAL-BPHASE-041, VAL-BPHASE-042, VAL-BPHASE-043

use leindex::cli::mcp::edit_cache::{
    EditCache, EditCacheEntry, EDIT_CACHE_MAX_ENTRY_BYTES, EDIT_CACHE_TOTAL_CAP_BYTES,
};
use leindex::edit::EditChange;
use leindex::search::search::{
    NodeInfo, SearchEngine, SearchQuery, TextIndexDelta, SEARCH_CACHE_MAX_BYTES,
    SEARCH_CACHE_MAX_ENTRIES,
};
use leindex::search::vector::{write_mmap_embeddings, MmapEmbeddingIndex};

// ============================================================================
// Helpers
// ============================================================================

/// Create a NodeInfo with the given id, content, and embedding.
fn make_node(id: &str, content: &str, embedding: Vec<f32>) -> NodeInfo {
    NodeInfo {
        node_id: id.to_string(),
        file_path: format!("{}.rs", id),
        symbol_name: id.to_string(),
        language: "rust".to_string(),
        content: content.to_string(),
        byte_range: (0, content.len()),
        tfidf_embedding: embedding,
        neural_embedding: None,
        complexity: (id.len() * 7 % 100) as u32, // deterministic complexity
        signature: None,
        pre_tokenized: None,
    }
}

/// Create a SearchEngine with 3-dim embeddings and index some nodes.
fn make_engine(nodes: Vec<NodeInfo>) -> SearchEngine {
    let mut engine = SearchEngine::with_dimension(3);
    engine.index_nodes(nodes);
    engine
}

/// Run a text search and return node IDs.
fn search_ids(engine: &mut SearchEngine, query: &str) -> Vec<String> {
    let q = SearchQuery {
        query: query.to_string(),
        top_k: 100,
        token_budget: None,
        semantic: false,
        expand_context: false,
        query_embedding: None,
        query_neural_embedding: None,
        threshold: None,
        query_type: None,
    };
    engine
        .search(q)
        .unwrap()
        .into_iter()
        .map(|r| r.node_id)
        .collect()
}

/// Run a semantic search and return node IDs.
fn semantic_ids(engine: &SearchEngine, query: &[f32], top_k: usize) -> Vec<String> {
    engine
        .semantic_search(query, top_k)
        .unwrap()
        .into_iter()
        .map(|e| e.node_id)
        .collect()
}

// ============================================================================
// VAL-BPHASE-017: Search-side complexity metadata remains correct after
//                 resident-state compression
// ============================================================================

#[test]
fn test_complexity_metadata_correct_after_compression() {
    // After indexing, complexity metadata is stored in a compact row-oriented
    // form and remains behaviorally correct.
    let nodes = vec![
        make_node("alpha", "fn alpha() { compute(); }", vec![1.0, 0.0, 0.0]),
        make_node("beta", "fn beta() { render(); }", vec![0.0, 1.0, 0.0]),
        make_node("gamma", "fn gamma() { validate(); }", vec![0.0, 0.0, 1.0]),
    ];

    let engine = make_engine(nodes.clone());

    // Complexity values should be preserved after compression
    for node in &nodes {
        let stored = engine.node_complexity(&node.node_id);
        assert_eq!(
            stored,
            Some(node.complexity),
            "complexity for {} should be preserved, got {:?}",
            node.node_id,
            stored
        );
    }

    // Search results should include correct complexity
    let mut engine = engine;
    let q = SearchQuery {
        query: "alpha".to_string(),
        top_k: 10,
        token_budget: None,
        semantic: false,
        expand_context: false,
        query_embedding: None,
        query_neural_embedding: None,
        threshold: None,
        query_type: None,
    };
    let results = engine.search(q).unwrap();
    let alpha_result = results.iter().find(|r| r.node_id == "alpha").unwrap();
    assert_eq!(alpha_result.complexity, nodes[0].complexity);
}

#[test]
fn test_complexity_correct_after_incremental_update() {
    let nodes = vec![
        make_node("a", "fn a() {}", vec![1.0, 0.0, 0.0]),
        make_node("b", "fn b() {}", vec![0.0, 1.0, 0.0]),
    ];

    let mut engine = make_engine(nodes);

    // Update node "a" with different complexity
    let mut updated_a = make_node("a", "fn a() { complex_logic(); }", vec![0.5, 0.5, 0.0]);
    updated_a.complexity = 42;
    engine.incremental_reindex(TextIndexDelta {
        removed_node_ids: vec![],
        updated_nodes: vec![updated_a],
    });

    assert_eq!(engine.node_complexity("a"), Some(42));
    assert_eq!(
        engine.node_complexity("b"),
        Some(make_node("b", "", vec![]).complexity)
    );
}

// ============================================================================
// VAL-BPHASE-018: Text index postings remain behaviorally correct after
//                 compression
// ============================================================================

#[test]
fn test_text_postings_correct_after_compression() {
    // Keyword/token-assisted retrieval returns the correct node set after
    // postings are stored in compact row-based form.
    let nodes = vec![
        make_node(
            "search_fn",
            "fn search_fn() { find_items(); }",
            vec![1.0, 0.0, 0.0],
        ),
        make_node(
            "render_fn",
            "fn render_fn() { draw_screen(); }",
            vec![0.0, 1.0, 0.0],
        ),
        make_node(
            "validate_fn",
            "fn validate_fn() { check_input(); }",
            vec![0.0, 0.0, 1.0],
        ),
    ];

    let mut engine = make_engine(nodes);

    // Token "find" should map to search_fn
    let results = search_ids(&mut engine, "find_items");
    assert!(
        results.contains(&"search_fn".to_string()),
        "token 'find_items' should find search_fn"
    );

    // Token "draw" should map to render_fn
    let results = search_ids(&mut engine, "draw_screen");
    assert!(
        results.contains(&"render_fn".to_string()),
        "token 'draw_screen' should find render_fn"
    );

    // Token "check" should map to validate_fn
    let results = search_ids(&mut engine, "check_input");
    assert!(
        results.contains(&"validate_fn".to_string()),
        "token 'check_input' should find validate_fn"
    );
}

#[test]
fn test_text_postings_correct_after_delta_update() {
    let nodes = vec![make_node(
        "a",
        "fn a() { old_keyword(); }",
        vec![1.0, 0.0, 0.0],
    )];

    let mut engine = make_engine(nodes);

    // "old_keyword" should find a
    assert!(search_ids(&mut engine, "old_keyword").contains(&"a".to_string()));

    // Update a with new content
    engine.incremental_reindex(TextIndexDelta {
        removed_node_ids: vec![],
        updated_nodes: vec![make_node(
            "a",
            "fn a() { new_keyword(); }",
            vec![1.0, 0.0, 0.0],
        )],
    });

    // "new_keyword" should now find a
    assert!(search_ids(&mut engine, "new_keyword").contains(&"a".to_string()));

    // "old_keyword" should no longer find a (content was replaced)
    // Note: "old" and "keyword" are separate tokens. After update, the node
    // no longer has "old" in its tokens, so searching for "old_keyword" won't
    // find it because "old" is no longer in the node's token set.
    let old_results = search_ids(&mut engine, "old");
    assert!(
        !old_results.contains(&"a".to_string()),
        "node a should not be found by 'old' after content update, got: {:?}",
        old_results
    );
}

// ============================================================================
// VAL-BPHASE-019: Node-token metadata remains correct after compact rewrite
// ============================================================================

#[test]
fn test_node_tokens_correct_after_compact_rewrite() {
    // Search/filter behavior that depends on per-node token metadata still
    // works after resident token structures are compacted and re-keyed by row.
    let nodes = vec![
        make_node(
            "handler",
            "fn handler() { process_request(); }",
            vec![1.0, 0.0, 0.0],
        ),
        make_node(
            "parser",
            "fn parser() { parse_data(); }",
            vec![0.0, 1.0, 0.0],
        ),
    ];

    let engine = make_engine(nodes);

    // Verify node_tokens are populated
    let handler_tokens = engine.node_tokens("handler").unwrap();
    assert!(
        handler_tokens.contains("process"),
        "handler tokens should contain 'process'"
    );
    assert!(
        handler_tokens.contains("request"),
        "handler tokens should contain 'request'"
    );

    let parser_tokens = engine.node_tokens("parser").unwrap();
    assert!(
        parser_tokens.contains("parse"),
        "parser tokens should contain 'parse'"
    );
    assert!(
        parser_tokens.contains("data"),
        "parser tokens should contain 'data'"
    );
}

#[test]
fn test_token_lookup_correct_after_compact_rewrite() {
    let nodes = vec![
        make_node("a", "fn a() { unique_token_alpha(); }", vec![1.0, 0.0, 0.0]),
        make_node("b", "fn b() { unique_token_beta(); }", vec![0.0, 1.0, 0.0]),
    ];

    let engine = make_engine(nodes);

    // "unique" token should map to both a and b
    let unique_nodes = engine.token_lookup("unique").unwrap();
    assert!(unique_nodes.contains("a"));
    assert!(unique_nodes.contains("b"));

    // "alpha" should map only to a
    let alpha_nodes = engine.token_lookup("alpha").unwrap();
    assert!(alpha_nodes.contains("a"));
    assert!(!alpha_nodes.contains("b"));

    // "beta" should map only to b
    let beta_nodes = engine.token_lookup("beta").unwrap();
    assert!(!beta_nodes.contains("a"));
    assert!(beta_nodes.contains("b"));
}

// ============================================================================
// VAL-BPHASE-020: Large source content is not re-retained in resident
//                 search state
// ============================================================================

#[test]
fn test_large_content_not_retained_after_indexing() {
    // After indexing, large source contents are not held as resident in-memory
    // blobs while search/render behavior still works via lazy reload/byte-range
    // strategy.

    // Create nodes with large content
    let large_content: String =
        "fn big_fn() { ".to_string() + &"large_data(); ".repeat(10_000) + "}";
    let nodes = vec![
        make_node("big_fn", &large_content, vec![1.0, 0.0, 0.0]),
        make_node("small_fn", "fn small_fn() {}", vec![0.0, 1.0, 0.0]),
    ];

    let engine = make_engine(nodes);

    // Content should be cleared for all nodes after indexing
    // (T13 optimization already does this, but we verify it holds under B-phase)
    for node_id in &["big_fn", "small_fn"] {
        let _idx = engine.node_index(node_id).unwrap();
        // Access the internal nodes to verify content is cleared
        // We can verify this indirectly: if content were retained, estimated_memory
        // would be much larger
    }

    // Search should still work via inverted index (content was tokenized before clearing)
    let mut engine = engine;
    let results = search_ids(&mut engine, "large_data");
    assert!(
        results.contains(&"big_fn".to_string()),
        "search should find big_fn via inverted index even after content clearing"
    );

    // Memory estimate should not include the large content
    let mem = engine.estimated_memory_bytes();
    // The large content was ~130KB. If retained, memory would be much higher.
    // With 2 nodes and 3-dim embeddings, memory should be well under 100KB
    // (just metadata + small embeddings + index structures)
    assert!(
        mem < 500_000,
        "estimated memory ({}) should not include large content blobs",
        mem
    );
}

#[test]
fn test_content_clearing_preserves_search_correctness() {
    // Verify that content clearing does not break search correctness.
    let nodes = vec![
        make_node("fn_a", "fn fn_a() { compute_hash(); }", vec![1.0, 0.0, 0.0]),
        make_node("fn_b", "fn fn_b() { render_page(); }", vec![0.0, 1.0, 0.0]),
    ];

    let mut engine = make_engine(nodes);

    // Both text and semantic search should work
    let text_results = search_ids(&mut engine, "compute_hash");
    assert!(text_results.contains(&"fn_a".to_string()));

    let sem_results = semantic_ids(&engine, &[1.0, 0.0, 0.0], 2);
    assert_eq!(sem_results[0], "fn_a");
}

// ============================================================================
// VAL-BPHASE-021: Search cache enforces the configured byte and entry budget
// ============================================================================

#[test]
fn test_search_cache_entry_limit() {
    // Under repeated queries, the search cache never grows beyond the
    // configured entry limit while still returning valid search results.
    let nodes: Vec<NodeInfo> = (0..20)
        .map(|i| {
            make_node(
                &format!("node_{}", i),
                &format!("fn node_{}() {{}}", i),
                vec![0.0; 3],
            )
        })
        .collect();

    let mut engine = make_engine(nodes);

    // Run more queries than the entry limit
    for i in 0..(SEARCH_CACHE_MAX_ENTRIES + 50) {
        let q = SearchQuery {
            query: format!("unique_query_{}", i),
            top_k: 5,
            token_budget: None,
            semantic: false,
            expand_context: false,
            query_embedding: None,
            query_neural_embedding: None,
            threshold: None,
            query_type: None,
        };
        let _ = engine.search(q);
    }

    // Cache entries should not exceed the limit
    assert!(
        engine.search_cache_len() <= SEARCH_CACHE_MAX_ENTRIES,
        "search cache entries ({}) should not exceed max ({})",
        engine.search_cache_len(),
        SEARCH_CACHE_MAX_ENTRIES
    );
}

#[test]
fn test_search_cache_byte_limit() {
    let nodes: Vec<NodeInfo> = (0..20)
        .map(|i| {
            make_node(
                &format!("node_{}", i),
                &format!("fn node_{}() {{}}", i),
                vec![0.0; 3],
            )
        })
        .collect();

    let mut engine = make_engine(nodes);

    // Run many queries to fill cache
    for i in 0..500 {
        let q = SearchQuery {
            query: format!("query_{}", i),
            top_k: 10,
            token_budget: None,
            semantic: false,
            expand_context: false,
            query_embedding: None,
            query_neural_embedding: None,
            threshold: None,
            query_type: None,
        };
        let _ = engine.search(q);
    }

    assert!(
        engine.search_cache_bytes() <= SEARCH_CACHE_MAX_BYTES,
        "search cache bytes ({}) should not exceed max ({})",
        engine.search_cache_bytes(),
        SEARCH_CACHE_MAX_BYTES
    );
}

#[test]
fn test_search_cache_returns_correct_results() {
    // Cached results should be identical to freshly computed results.
    let nodes = vec![
        make_node("target", "fn target() { compute(); }", vec![1.0, 0.0, 0.0]),
        make_node("other", "fn other() { render(); }", vec![0.0, 1.0, 0.0]),
    ];

    let mut engine = make_engine(nodes);

    let q = SearchQuery {
        query: "target".to_string(),
        top_k: 10,
        token_budget: None,
        semantic: false,
        expand_context: false,
        query_embedding: None,
        query_neural_embedding: None,
        threshold: None,
        query_type: None,
    };

    // First call (computed)
    let results1 = engine.search(q.clone()).unwrap();
    // Second call (cached)
    let results2 = engine.search(q).unwrap();

    assert_eq!(results1.len(), results2.len());
    for (r1, r2) in results1.iter().zip(results2.iter()) {
        assert_eq!(r1.node_id, r2.node_id);
        assert_eq!(r1.rank, r2.rank);
    }
}

// ============================================================================
// VAL-BPHASE-022: Edit preview cache rejects oversized entries and stays
//                 within total cap
// ============================================================================

#[tokio::test]
async fn test_edit_cache_rejects_oversized() {
    let temp_dir = tempfile::tempdir().unwrap();
    let cache = EditCache::new();

    // Create an entry larger than the per-entry limit
    let oversized = EditCacheEntry {
        file_path: std::path::PathBuf::from("/test/oversized.rs"),
        preview_token: "token".to_string(),
        original_text: "x".repeat(EDIT_CACHE_MAX_ENTRY_BYTES + 1000),
        modified_text: "y".repeat(100),
        changes: vec![EditChange::ReplaceText {
            start: 0,
            end: 1,
            new_text: "y".to_string(),
        }],
        timestamp: chrono::Utc::now(),
    };

    let result = cache
        .set(temp_dir.path(), oversized)
        .await
        .expect("no IO error");
    assert!(result.is_err(), "oversized entry should be rejected");
}

#[tokio::test]
async fn test_edit_cache_total_cap_enforced() {
    let temp_dir = tempfile::tempdir().unwrap();
    let cache = EditCache::new();

    // Insert many entries that together exceed the total cap
    let entry_size = 100_000; // ~100 KiB each
    let count = (EDIT_CACHE_TOTAL_CAP_BYTES / entry_size) + 3;

    for i in 0..count {
        let entry = EditCacheEntry {
            file_path: std::path::PathBuf::from(format!("/test/file_{}.rs", i)),
            preview_token: format!("token_{}", i),
            original_text: "x".repeat(entry_size / 2),
            modified_text: "y".repeat(entry_size / 2),
            changes: vec![EditChange::ReplaceText {
                start: 0,
                end: 1,
                new_text: "y".to_string(),
            }],
            timestamp: chrono::Utc::now(),
        };
        let result = cache
            .set(temp_dir.path(), entry)
            .await
            .expect("no IO error");
        assert!(result.is_ok(), "entry {} should be accepted", i);
    }

    // Total hot cache bytes should not significantly exceed the cap
    assert!(
        cache.hot_cache_bytes().await <= EDIT_CACHE_TOTAL_CAP_BYTES + 10_000,
        "hot cache bytes ({}) should not exceed cap ({})",
        cache.hot_cache_bytes().await,
        EDIT_CACHE_TOTAL_CAP_BYTES
    );
}

// ============================================================================
// VAL-BPHASE-023: Index-related caches are bounded and owner-scoped
// ============================================================================

#[test]
fn test_work_hoister_is_bounded() {
    // The work hoister cache is bounded by entries and bytes.
    use leindex::search::search::WorkHoister;
    use leindex::search::search::{WORK_HOISTER_MAX_BYTES, WORK_HOISTER_MAX_ENTRIES};

    let mut hoister = WorkHoister::new();

    // Fill beyond both limits
    for i in 0..(WORK_HOISTER_MAX_ENTRIES + 100) {
        let content = format!("content_{}", i);
        let embedding = vec![0.5f32; 768];
        hoister.store(&content, embedding, None);
    }

    // Should not exceed entry limit
    assert!(
        hoister.len() <= WORK_HOISTER_MAX_ENTRIES,
        "hoister entries ({}) should not exceed max ({})",
        hoister.len(),
        WORK_HOISTER_MAX_ENTRIES
    );

    // Should not exceed byte limit
    assert!(
        hoister.bytes_used() <= WORK_HOISTER_MAX_BYTES,
        "hoister bytes ({}) should not exceed max ({})",
        hoister.bytes_used(),
        WORK_HOISTER_MAX_BYTES
    );
}

#[test]
fn test_work_hoister_lookup_returns_correct_results() {
    use leindex::search::search::WorkHoister;

    let mut hoister = WorkHoister::with_bounds(100, 1024 * 1024);

    let content = "fn compute() { return 42; }";
    let embedding = vec![1.0, 0.0, 0.0];
    hoister.store(content, embedding.clone(), None);

    let result = hoister.lookup(content);
    assert!(result.is_some(), "stored content should be found");
    assert_eq!(result.unwrap().0, embedding);
}

#[test]
fn test_work_hoister_scoped_to_owner() {
    // Each WorkHoister instance is independent (owner-scoped).
    use leindex::search::search::WorkHoister;

    let mut hoister_a = WorkHoister::with_bounds(100, 1024 * 1024);
    let mut hoister_b = WorkHoister::with_bounds(100, 1024 * 1024);

    hoister_a.store("content_a", vec![1.0], None);
    hoister_b.store("content_b", vec![2.0], None);

    assert!(hoister_a.lookup("content_a").is_some());
    assert!(hoister_a.lookup("content_b").is_none());
    assert!(hoister_b.lookup("content_b").is_some());
    assert!(hoister_b.lookup("content_a").is_none());
}

// ============================================================================
// VAL-BPHASE-024: Memory accounting surfaces cache byte estimates
// ============================================================================

#[test]
fn test_search_engine_exposes_cache_bytes() {
    // Resident cache components expose byte-size estimates consumable by
    // memory telemetry/accounting.
    let nodes = vec![
        make_node("a", "fn a() { compute(); }", vec![1.0, 0.0, 0.0]),
        make_node("b", "fn b() { render(); }", vec![0.0, 1.0, 0.0]),
    ];

    let mut engine = make_engine(nodes);

    // Before any search, cache should be empty
    assert_eq!(engine.search_cache_bytes(), 0);
    assert_eq!(engine.search_cache_len(), 0);

    // Run a search to populate cache
    let q = SearchQuery {
        query: "compute".to_string(),
        top_k: 10,
        token_budget: None,
        semantic: false,
        expand_context: false,
        query_embedding: None,
        query_neural_embedding: None,
        threshold: None,
        query_type: None,
    };
    let _ = engine.search(q);

    // After search, cache should have entries and bytes
    assert!(
        engine.search_cache_len() > 0,
        "cache should have entries after search"
    );
    assert!(
        engine.search_cache_bytes() > 0,
        "cache should have bytes after search"
    );
}

#[test]
fn test_search_engine_estimated_memory() {
    let nodes: Vec<NodeInfo> = (0..50)
        .map(|i| {
            make_node(
                &format!("n{}", i),
                &format!("fn n{}() {{}}", i),
                vec![0.0; 3],
            )
        })
        .collect();

    let engine = make_engine(nodes);

    let mem = engine.estimated_memory_bytes();
    assert!(mem > 0, "estimated memory should be positive");

    // Memory should be reasonable for 50 nodes with 3-dim embeddings
    // (no content stored, just metadata + embeddings + index structures)
    assert!(
        mem < 10_000_000,
        "estimated memory ({}) should be reasonable for 50 small nodes",
        mem
    );
}

#[test]
fn test_work_hoister_exposes_byte_accounting() {
    use leindex::search::search::WorkHoister;

    let mut hoister = WorkHoister::with_bounds(100, 1024 * 1024);
    assert_eq!(hoister.bytes_used(), 0);

    hoister.store("test content", vec![1.0f32; 128], None);
    assert!(
        hoister.bytes_used() > 0,
        "hoister should track bytes after store"
    );

    hoister.clear();
    assert_eq!(
        hoister.bytes_used(),
        0,
        "hoister bytes should be 0 after clear"
    );
}

// ============================================================================
// VAL-BPHASE-025: Snapshot/persistence path avoids full embedding clone
//                 amplification
// ============================================================================

#[test]
fn test_snapshot_avoids_full_embedding_clone() {
    // Snapshot/export behavior completes without transiently cloning the full
    // embedding corpus into an additional heap mirror.
    //
    // We verify this by checking that collect_embeddings returns references
    // or lightweight views rather than deep-cloning the entire embedding set.

    let nodes: Vec<NodeInfo> = (0..100)
        .map(|i| {
            make_node(
                &format!("node_{}", i),
                &format!("fn node_{}() {{}}", i),
                vec![1.0 / (i + 1) as f32; 768],
            )
        })
        .collect();

    let engine = make_engine(nodes);

    // collect_embeddings should produce a compact snapshot without
    // amplifying memory (it clones embeddings, but the test verifies
    // the snapshot is correct and the engine is still usable)
    let embeddings = engine.collect_embeddings();
    assert_eq!(embeddings.len(), 100);

    // Verify the snapshot is correct
    for (id, emb) in &embeddings {
        assert!(!emb.is_empty(), "embedding for {} should not be empty", id);
        assert_eq!(emb.len(), 768, "embedding for {} should be 768-dim", id);
    }

    // Engine should still be usable after snapshot
    let mut engine = engine;
    let results = search_ids(&mut engine, "node_50");
    assert!(results.contains(&"node_50".to_string()));
}

#[test]
fn test_mmap_snapshot_no_heap_mirror() {
    // The mmap persistence path writes embeddings to disk and reads them
    // back via mmap without creating a heap mirror.

    let dir = tempfile::tempdir().unwrap();
    let mmap_path = dir.path().join("embeddings.bin");

    let embeddings: Vec<(String, Vec<f32>)> = (0..50)
        .map(|i| {
            let mut v = vec![0.0f32; 64];
            v[i % 64] = 1.0;
            (format!("node_{}", i), v)
        })
        .collect();

    write_mmap_embeddings(&mmap_path, &embeddings).unwrap();

    // Open mmap index — this should NOT create a heap mirror
    let index = MmapEmbeddingIndex::open(&mmap_path).unwrap();
    assert_eq!(index.len(), 50);

    // Verify borrowed slice access (zero-copy, no heap allocation)
    for i in 0..50u32 {
        let slice = index.embedding_slice_by_index(i).unwrap();
        assert_eq!(slice.len(), 64);
        assert_eq!(slice[i as usize % 64], 1.0);
    }

    // Verify search works without heap mirror
    let query = vec![0.0f32; 64];
    let results = index.search(&query, 5);
    // All embeddings have norm 1.0, so all should have similarity 0.0 to zero vector
    // This is expected — the key point is that search works via mmap
    assert!(results.len() <= 5);
}

// ============================================================================
// VAL-BPHASE-041: Row-oriented identifiers use compact integer-backed
//                 addressing
// ============================================================================

#[test]
fn test_compact_node_metadata_uses_row_addressing() {
    // Resident search/index structures use compact integer or row-ID-backed
    // addressing in place of wider identifier-heavy representations.

    let nodes = vec![
        make_node(
            "func_alpha",
            "fn func_alpha() { compute(); }",
            vec![1.0, 0.0, 0.0],
        ),
        make_node(
            "func_beta",
            "fn func_beta() { render(); }",
            vec![0.0, 1.0, 0.0],
        ),
        make_node(
            "func_gamma",
            "fn func_gamma() { validate(); }",
            vec![0.0, 0.0, 1.0],
        ),
    ];

    let engine = make_engine(nodes);

    // CompactNodeMetadata should provide row-based lookup
    let compact = engine.compact_metadata();

    // Each node should have a compact row index
    assert!(compact.row_index("func_alpha").is_some());
    assert!(compact.row_index("func_beta").is_some());
    assert!(compact.row_index("func_gamma").is_some());

    // Row indices should be valid u32 values
    for id in &["func_alpha", "func_beta", "func_gamma"] {
        let row = compact.row_index(id).unwrap();
        assert!(
            row < engine.node_count() as u32,
            "row {} should be < {}",
            row,
            engine.node_count()
        );
    }

    // Complexity should be accessible by row
    for id in &["func_alpha", "func_beta", "func_gamma"] {
        let row = compact.row_index(id).unwrap();
        let compact_complexity = compact.complexity_by_row(row);
        let engine_complexity = engine.node_complexity(id);
        assert_eq!(
            compact_complexity, engine_complexity,
            "compact complexity for row {} should match engine",
            row
        );
    }
}

#[test]
fn test_compact_metadata_stable_lookup() {
    // Compact addressing provides stable lookup semantics.
    let nodes: Vec<NodeInfo> = (0..20)
        .map(|i| {
            make_node(
                &format!("n{}", i),
                &format!("fn n{}() {{}}", i),
                vec![0.0; 3],
            )
        })
        .collect();

    let engine = make_engine(nodes);
    let compact = engine.compact_metadata();

    // Repeated lookups should return the same row
    for _ in 0..5 {
        for i in 0..20 {
            let id = format!("n{}", i);
            let row1 = compact.row_index(&id);
            let row2 = compact.row_index(&id);
            assert_eq!(row1, row2, "row lookup for {} should be stable", id);
        }
    }
}

// ============================================================================
// VAL-BPHASE-042: Resident graph and search metadata stay behaviorally
//                 correct after integer compaction
// ============================================================================

#[test]
fn test_search_correct_after_integer_compaction() {
    // Graph adjacency, postings, and other resident search metadata remain
    // behaviorally correct after compaction to row-oriented integer-backed forms.

    let nodes = vec![
        make_node(
            "search_fn",
            "fn search_fn() { find(); query(); }",
            vec![1.0, 0.0, 0.0],
        ),
        make_node(
            "index_fn",
            "fn index_fn() { build(); store(); }",
            vec![0.0, 1.0, 0.0],
        ),
        make_node(
            "cache_fn",
            "fn cache_fn() { get(); put(); evict(); }",
            vec![0.0, 0.0, 1.0],
        ),
    ];

    let mut engine = make_engine(nodes.clone());
    let compact = engine.compact_metadata();

    // Verify compact token index is correct
    let token_idx = compact.token_index();

    // "find" should map to search_fn's row
    let find_row = compact.row_index("search_fn").unwrap();
    assert!(
        token_idx.nodes_for_token("find").contains(&find_row),
        "token 'find' should map to search_fn row {}",
        find_row
    );

    // "build" should map to index_fn's row
    let build_row = compact.row_index("index_fn").unwrap();
    assert!(
        token_idx.nodes_for_token("build").contains(&build_row),
        "token 'build' should map to index_fn row {}",
        build_row
    );

    // "evict" should map to cache_fn's row
    let evict_row = compact.row_index("cache_fn").unwrap();
    assert!(
        token_idx.nodes_for_token("evict").contains(&evict_row),
        "token 'evict' should map to cache_fn row {}",
        evict_row
    );

    // Full search should still work correctly
    let results = search_ids(&mut engine, "find query");
    assert!(results.contains(&"search_fn".to_string()));

    let results = search_ids(&mut engine, "build store");
    assert!(results.contains(&"index_fn".to_string()));

    let results = search_ids(&mut engine, "evict");
    assert!(results.contains(&"cache_fn".to_string()));
}

#[test]
fn test_semantic_search_correct_after_compaction() {
    let nodes = vec![
        make_node("vec_a", "fn vec_a() {}", vec![1.0, 0.0, 0.0]),
        make_node("vec_b", "fn vec_b() {}", vec![0.0, 1.0, 0.0]),
        make_node("vec_c", "fn vec_c() {}", vec![0.9, 0.1, 0.0]),
    ];

    let engine = make_engine(nodes);
    let _compact = engine.compact_metadata();

    // Semantic search should still produce correct results
    let results = semantic_ids(&engine, &[1.0, 0.0, 0.0], 3);
    assert_eq!(results[0], "vec_a");
    assert!(results.contains(&"vec_c".to_string()));
}

#[test]
fn test_compaction_coherence_after_delta_update() {
    let nodes = vec![
        make_node("a", "fn a() { alpha(); }", vec![1.0, 0.0, 0.0]),
        make_node("b", "fn b() { beta(); }", vec![0.0, 1.0, 0.0]),
    ];

    let mut engine = make_engine(nodes);

    // Add a new node
    engine.incremental_reindex(TextIndexDelta {
        removed_node_ids: vec![],
        updated_nodes: vec![make_node("c", "fn c() { gamma(); }", vec![0.0, 0.0, 1.0])],
    });

    // Compact metadata should reflect the update
    let compact = engine.compact_metadata();
    assert!(compact.row_index("c").is_some());

    // Token index should include new tokens
    let token_idx = compact.token_index();
    let c_row = compact.row_index("c").unwrap();
    assert!(token_idx.nodes_for_token("gamma").contains(&c_row));

    // Coherence check
    engine
        .validate_coherence()
        .expect("index should be coherent");
}

// ============================================================================
// VAL-BPHASE-043: Read-mostly resident artifacts expand zero-copy coverage
//                 without mutable aliasing regressions
// ============================================================================

#[test]
fn test_mmap_zero_copy_read_path() {
    // Read-mostly artifacts approved for mmap or zero-copy access gain that
    // residency path without reintroducing duplicate heap mirrors or exposing
    // mutable aliasing/corruption behavior.

    let dir = tempfile::tempdir().unwrap();
    let mmap_path = dir.path().join("embeddings.bin");

    let embeddings: Vec<(String, Vec<f32>)> = vec![
        ("func_a".to_string(), vec![1.0, 0.0, 0.0]),
        ("func_b".to_string(), vec![0.0, 1.0, 0.0]),
        ("func_c".to_string(), vec![0.0, 0.0, 1.0]),
    ];

    write_mmap_embeddings(&mmap_path, &embeddings).unwrap();
    let index = MmapEmbeddingIndex::open(&mmap_path).unwrap();

    // Borrowed slice access is zero-copy (no heap allocation)
    let slice_a = index.embedding_slice_by_index(0).unwrap();
    assert_eq!(slice_a, &[1.0, 0.0, 0.0]);

    // Multiple borrowed slices can coexist without mutable aliasing
    let slice_b = index.embedding_slice_by_index(1).unwrap();
    let slice_c = index.embedding_slice_by_index(2).unwrap();
    assert_eq!(slice_b, &[0.0, 1.0, 0.0]);
    assert_eq!(slice_c, &[0.0, 0.0, 1.0]);

    // All slices should still be valid (no mutable aliasing corruption)
    assert_eq!(slice_a, &[1.0, 0.0, 0.0]);
    assert_eq!(slice_b, &[0.0, 1.0, 0.0]);
    assert_eq!(slice_c, &[0.0, 0.0, 1.0]);
}

#[test]
fn test_zero_copy_search_no_heap_mirror() {
    // Search via mmap uses borrowed slices, not heap-allocated copies.
    let dir = tempfile::tempdir().unwrap();
    let mmap_path = dir.path().join("embeddings.bin");

    let embeddings: Vec<(String, Vec<f32>)> = (0..20)
        .map(|i| {
            let mut v = vec![0.0f32; 8];
            v[i % 8] = 1.0;
            (format!("node_{}", i), v)
        })
        .collect();

    write_mmap_embeddings(&mmap_path, &embeddings).unwrap();
    let index = MmapEmbeddingIndex::open(&mmap_path).unwrap();

    // Search should use borrowed slices internally
    let mut query = vec![0.0f32; 8];
    query[0] = 1.0; // Non-zero query to match node_0
    let results = index.search(&query, 5);

    // node_0 should be the top result (its embedding is [1,0,0,0,0,0,0,0])
    assert!(!results.is_empty());
    // node_0, node_8, node_16 all have 1.0 in position 0
    assert!(results[0].0 == "node_0" || results[0].0 == "node_8" || results[0].0 == "node_16");

    // Verify top results have positive scores (some lower-ranked results may have 0 similarity)
    assert!(results[0].1 > 0.0, "top result should have positive score");
}

#[test]
fn test_read_only_mmap_no_mutable_aliasing() {
    // The mmap index is read-only — no mutable operations are exposed that
    // could cause aliasing issues.

    let dir = tempfile::tempdir().unwrap();
    let mmap_path = dir.path().join("embeddings.bin");

    let embeddings: Vec<(String, Vec<f32>)> = vec![
        ("a".to_string(), vec![1.0, 0.0, 0.0]),
        ("b".to_string(), vec![0.0, 1.0, 0.0]),
    ];

    write_mmap_embeddings(&mmap_path, &embeddings).unwrap();
    let index = MmapEmbeddingIndex::open(&mmap_path).unwrap();

    // Read the same row many times — should always return the same value
    for _ in 0..100 {
        let slice = index.embedding_slice_by_index(0).unwrap();
        assert_eq!(slice, &[1.0, 0.0, 0.0]);
    }

    // Concurrent reads from different rows should not interfere
    for _ in 0..100 {
        let a = index.embedding_slice_by_index(0).unwrap();
        let b = index.embedding_slice_by_index(1).unwrap();
        assert_eq!(a, &[1.0, 0.0, 0.0]);
        assert_eq!(b, &[0.0, 1.0, 0.0]);
    }
}

// ============================================================================
// Cross-cutting: verify all structures remain coherent under load
// ============================================================================

#[test]
fn test_full_compression_cycle_under_load() {
    // Index many nodes, verify compact metadata, run searches, update, repeat.
    let nodes: Vec<NodeInfo> = (0..200)
        .map(|i| {
            make_node(
                &format!("fn_{}", i),
                &format!("fn fn_{}() {{ task{}(); }}", i, i),
                {
                    let mut v = vec![0.0f32; 3];
                    v[i % 3] = 1.0;
                    v
                },
            )
        })
        .collect();

    let mut engine = make_engine(nodes);

    // Verify compact metadata
    let compact = engine.compact_metadata();
    assert_eq!(compact.node_count(), 200);

    // Run searches — each node has a unique "taskXX" token
    for i in (0..200).step_by(10) {
        let query = format!("task{}", i);
        let results = search_ids(&mut engine, &query);
        assert!(
            results.contains(&format!("fn_{}", i)),
            "search for '{}' should find fn_{}, got: {:?}",
            query,
            i,
            results
        );
    }

    // Incremental update
    engine.incremental_reindex(TextIndexDelta {
        removed_node_ids: (0..50).map(|i| format!("fn_{}", i)).collect(),
        updated_nodes: (200..210)
            .map(|i| {
                make_node(
                    &format!("fn_{}", i),
                    &format!("fn fn_{}() {{ new_op(); }}", i),
                    vec![0.5; 3],
                )
            })
            .collect(),
    });

    // Verify coherence
    engine
        .validate_coherence()
        .expect("index should be coherent after updates");

    // Compact metadata should reflect changes
    let compact = engine.compact_metadata();
    assert_eq!(compact.node_count(), 160); // 200 - 50 + 10

    // Removed nodes should not be in compact metadata
    for i in 0..50 {
        assert!(
            compact.row_index(&format!("fn_{}", i)).is_none(),
            "fn_{} should not be in compact metadata after removal",
            i
        );
    }

    // New nodes should be in compact metadata
    for i in 200..210 {
        assert!(
            compact.row_index(&format!("fn_{}", i)).is_some(),
            "fn_{} should be in compact metadata after addition",
            i
        );
    }

    // Cache should be bounded
    assert!(engine.search_cache_bytes() <= SEARCH_CACHE_MAX_BYTES);
    assert!(engine.search_cache_len() <= SEARCH_CACHE_MAX_ENTRIES);
}