geographdb-core 0.3.1

Geometric graph database core - 3D spatial indexing for code analysis
Documentation
//! Minimal regression test for section table corruption bug
//!
//! This test reproduces the bug where section_count becomes garbage (e.g., 5506944)
//! and section entries get corrupted (offset=0x0 for sections that should have data).

use geographdb_core::storage::sectioned::{decode_header, decode_section_entry, SectionedStorage};
use std::path::PathBuf;
use tempfile::TempDir;

/// Helper to read raw bytes from a file at a specific offset
fn read_raw_bytes(path: &PathBuf, offset: u64, size: usize) -> Vec<u8> {
    let mut f = std::fs::File::open(path).unwrap();
    use std::io::{Read, Seek, SeekFrom};
    f.seek(SeekFrom::Start(offset)).unwrap();
    let mut buf = vec![0u8; size];
    f.read_exact(&mut buf).unwrap();
    buf
}

#[test]
fn test_section_count_corruption_regression() {
    // Phase 1: Create a sectioned file with multiple sections
    let temp_dir = TempDir::new().unwrap();
    let db_path = temp_dir.path().join("test_corruption.geo");

    eprintln!("[TEST] Creating sectioned file at: {:?}", db_path);

    let mut storage = SectionedStorage::create(&db_path).unwrap();

    // Create 4 sections like Magellan does
    let sections = [
        ("GRAPH", 1024u64),
        ("CFG", 2048u64),
        ("SYMBOLS", 4096u64),
        ("COMPLEXITY", 512u64),
    ];

    for (name, capacity) in &sections {
        eprintln!(
            "[TEST] Creating section '{}' with capacity {}",
            name, capacity
        );
        storage
            .create_section(name, *capacity, 0)
            .unwrap_or_else(|_| panic!("Failed to create section {}", name));
    }

    // Write non-trivial data to each section
    let test_data = [
        ("GRAPH", b"GRAPH_DATA_HERE".as_slice()),
        ("CFG", b"CFG_DATA_HERE_MORE".as_slice()),
        ("SYMBOLS", b"SYMBOLS_DATA_EVEN_MORE".as_slice()),
        ("COMPLEXITY", b"COMPLEXITY".as_slice()),
    ];

    for (name, data) in &test_data {
        eprintln!("[TEST] Writing {} bytes to section '{}'", data.len(), name);
        storage
            .write_section(name, data)
            .unwrap_or_else(|_| panic!("Failed to write section {}", name));
    }

    // Flush to persist
    eprintln!("[TEST] Flushing storage...");
    storage.flush().expect("Failed to flush");

    // Phase 2: Inspect raw bytes on disk BEFORE reopening
    // This is the key - we need to see what's actually on disk

    // Read header bytes
    let header_bytes = read_raw_bytes(&db_path, 0, 128);
    eprintln!("[TEST] Raw header bytes (first 64):");
    eprintln!("[TEST]   {:?}", &header_bytes[..64.min(header_bytes.len())]);

    // Decode header to get section table location
    let header = decode_header(&header_bytes.try_into().unwrap()).unwrap();
    eprintln!("[TEST] Decoded header from disk:");
    eprintln!(
        "[TEST]   section_table_offset: {}",
        header.section_table_offset
    );
    eprintln!("[TEST]   section_count: {}", header.section_count);
    eprintln!("[TEST]   next_data_offset: {}", header.next_data_offset);

    // CRITICAL ASSERTION: section_count must be exactly 4
    assert_eq!(
        header.section_count, 4,
        "Section count corruption: expected 4, got {}",
        header.section_count
    );

    // Read section table entries
    eprintln!("[TEST] Reading section table entries from disk...");
    for i in 0..header.section_count {
        let entry_offset = header.section_table_offset + (i * 64);
        let entry_bytes = read_raw_bytes(&db_path, entry_offset, 64);
        let entry = decode_section_entry(&entry_bytes.try_into().unwrap()).unwrap();

        eprintln!(
            "[TEST]   Entry {}: name={}, offset={}, len={}, capacity={}",
            i, entry.name, entry.offset, entry.length, entry.capacity
        );

        // CRITICAL ASSERTION: offset must NOT be 0 for sections with data
        if entry.length > 0 {
            assert_ne!(
                entry.offset, 0,
                "Section '{}' has corrupted offset (0x0) but has length {}",
                entry.name, entry.length
            );
        }
    }

    // Phase 3: Reopen and verify in-memory state matches disk
    eprintln!("[TEST] Reopening storage...");
    drop(storage);

    let mut storage2 = SectionedStorage::open(&db_path).unwrap();

    eprintln!(
        "[TEST] After reopen, section_count: {}",
        storage2.section_count()
    );
    assert_eq!(
        storage2.section_count(),
        4,
        "After reopen: expected 4 sections, got {}",
        storage2.section_count()
    );

    // Verify each section's data
    for (name, expected_data) in &test_data {
        eprintln!("[TEST] Reading section '{}'...", name);
        let read_data = storage2
            .read_section(name)
            .unwrap_or_else(|_| panic!("Failed to read section {}", name));

        assert_eq!(
            &read_data[..],
            *expected_data,
            "Data mismatch for section '{}'",
            name
        );
    }

    eprintln!("[TEST] SUCCESS: No corruption detected");
}

#[test]
fn test_multiple_flush_cycles_corruption() {
    // Test multiple flush cycles - this is where the bug might manifest
    let temp_dir = TempDir::new().unwrap();
    let db_path = temp_dir.path().join("test_multi_flush.geo");

    eprintln!("[TEST] Testing multiple flush cycles...");

    let mut storage = SectionedStorage::create(&db_path).unwrap();

    // First flush with 2 sections
    storage.create_section("A", 256, 0).unwrap();
    storage.write_section("A", b"DATA_A").unwrap();
    storage.flush().unwrap();

    eprintln!("[TEST] After first flush, checking disk...");
    let header_bytes = read_raw_bytes(&db_path, 0, 128);
    let header = decode_header(&header_bytes.try_into().unwrap()).unwrap();
    assert_eq!(
        header.section_count, 1,
        "After first flush: expected 1 section"
    );

    // Add more sections and flush again
    storage.create_section("B", 256, 0).unwrap();
    storage.write_section("B", b"DATA_B").unwrap();
    storage.flush().unwrap();

    eprintln!("[TEST] After second flush, checking disk...");
    let header_bytes = read_raw_bytes(&db_path, 0, 128);
    let header = decode_header(&header_bytes.try_into().unwrap()).unwrap();
    assert_eq!(
        header.section_count, 2,
        "After second flush: expected 2 sections, got {} (corruption?)",
        header.section_count
    );

    // Third flush - add one more
    storage.create_section("C", 256, 0).unwrap();
    storage.write_section("C", b"DATA_C").unwrap();
    storage.flush().unwrap();

    eprintln!("[TEST] After third flush, checking disk...");
    let header_bytes = read_raw_bytes(&db_path, 0, 128);
    let header = decode_header(&header_bytes.try_into().unwrap()).unwrap();
    assert_eq!(
        header.section_count, 3,
        "After third flush: expected 3 sections, got {} (corruption?)",
        header.section_count
    );

    // Verify all data is readable after reopen
    drop(storage);
    let mut storage2 = SectionedStorage::open(&db_path).unwrap();

    assert_eq!(storage2.read_section("A").unwrap(), b"DATA_A");
    assert_eq!(storage2.read_section("B").unwrap(), b"DATA_B");
    assert_eq!(storage2.read_section("C").unwrap(), b"DATA_C");

    eprintln!("[TEST] SUCCESS: Multiple flush cycles work correctly");
}