ragc-core 0.1.1

Core compression and decompression algorithms for the AGC genome compression format
Documentation
// Test Rust splitter integration with realistic yeast chr5 data
// Uses actual dataset with significant splitters to verify byte-identical output

use ragc_core::agc_compress_ffi::{compress_with_cpp_agc, compress_with_rust_splitters};
use std::process::Command;
use std::fs;

fn main() -> anyhow::Result<()> {
    println!("=== Testing Rust Splitters with Yeast chr5 ===\n");

    let test_dir = "/tmp/ragc_rust_splitter_chr5";
    fs::create_dir_all(test_dir)?;

    // Use existing yeast chr5 test files
    let sample_files = vec![
        ("AAA".to_string(), "/tmp/yeast_samples/sample_AAA_chr5.fa".to_string()),
        ("AAB".to_string(), "/tmp/yeast_samples/sample_AAB_chr5.fa".to_string()),
        ("AAC".to_string(), "/tmp/yeast_samples/sample_AAC_chr5.fa".to_string()),
    ];

    // Check if test files exist
    for (name, path) in &sample_files {
        if !std::path::Path::new(path).exists() {
            eprintln!("Error: Test file not found: {}", path);
            eprintln!("Run: scripts/prepare_yeast_chr5.sh");
            return Err(anyhow::anyhow!("Missing test files"));
        }
    }

    // Parameters from previous tests (matching C++ AGC)
    let kmer_length = 21;
    let segment_size = 10000;
    let min_match_length = 20;
    let pack_cardinality = 50;
    let concatenated_genomes = false;
    let adaptive_compression = false;
    let verbosity = 1;  // Reduce noise
    let no_threads = 1;  // Single-threaded for determinism
    let fallback_frac = 0.0;

    // Test 1: Native C++ AGC (baseline)
    println!("1. Creating archive with native C++ AGC splitters...");
    let cpp_archive = format!("{}/native_cpp.agc", test_dir);
    let start = std::time::Instant::now();
    compress_with_cpp_agc(
        &cpp_archive,
        &sample_files,
        kmer_length,
        segment_size,
        min_match_length,
        pack_cardinality,
        concatenated_genomes,
        adaptive_compression,
        verbosity,
        no_threads,
        fallback_frac,
    )?;
    let cpp_time = start.elapsed();

    let cpp_size = fs::metadata(&cpp_archive)?.len();
    let cpp_sha = get_sha256(&cpp_archive)?;
    println!("   Time: {:.2}s", cpp_time.as_secs_f64());
    println!("   Size: {} bytes ({:.1} KB)", cpp_size, cpp_size as f64 / 1024.0);
    println!("   SHA256: {}", cpp_sha);

    // Test 2: C++ AGC with Rust-computed splitters
    println!("\n2. Creating archive with Rust-computed splitters...");
    let rust_archive = format!("{}/rust_splitters.agc", test_dir);
    let start = std::time::Instant::now();
    compress_with_rust_splitters(
        &rust_archive,
        &sample_files,
        kmer_length,
        segment_size,
        min_match_length,
        pack_cardinality,
        concatenated_genomes,
        adaptive_compression,
        verbosity,
        no_threads,
        fallback_frac,
    )?;
    let rust_time = start.elapsed();

    let rust_size = fs::metadata(&rust_archive)?.len();
    let rust_sha = get_sha256(&rust_archive)?;
    println!("   Time: {:.2}s", rust_time.as_secs_f64());
    println!("   Size: {} bytes ({:.1} KB)", rust_size, rust_size as f64 / 1024.0);
    println!("   SHA256: {}", rust_sha);

    // Compare results
    println!("\n=== Comparison ===");
    println!("Time difference: {:.2}s", (rust_time.as_secs_f64() - cpp_time.as_secs_f64()).abs());
    println!("Size difference: {} bytes", (rust_size as i64 - cpp_size as i64).abs());

    if cpp_sha == rust_sha {
        println!("\n✅ SUCCESS: Archives are BYTE-IDENTICAL!");
        println!("   First component replacement (splitters) verified!");
        println!("   Ready to replace next component.");
    } else {
        println!("\n❌ FAILURE: Archives differ");
        println!("   Native C++:     {}", cpp_sha);
        println!("   Rust splitters: {}", rust_sha);
        println!("   Size diff: {} bytes ({:.1}%)",
                 rust_size as i64 - cpp_size as i64,
                 ((rust_size as f64 - cpp_size as f64) / cpp_size as f64) * 100.0);

        // Show first differences
        println!("\n   Checking binary differences...");
        let diff_result = Command::new("cmp")
            .args(&["-l", &cpp_archive, &rust_archive])
            .output()?;

        if !diff_result.stdout.is_empty() {
            let diff_lines: Vec<&str> = std::str::from_utf8(&diff_result.stdout)?
                .lines()
                .take(5)
                .collect();
            println!("   First 5 byte differences (offset, cpp, rust):");
            for line in diff_lines {
                println!("      {}", line);
            }
        }
    }

    println!("\n=== Archives saved to {} ===", test_dir);
    Ok(())
}

fn get_sha256(path: &str) -> anyhow::Result<String> {
    let output = Command::new("sha256sum")
        .arg(path)
        .output()?;

    let stdout = String::from_utf8(output.stdout)?;
    let hash = stdout.split_whitespace().next()
        .ok_or_else(|| anyhow::anyhow!("Failed to parse sha256sum output"))?;

    Ok(hash.to_string())
}