ragc-core 0.1.1

Core compression and decompression algorithms for the AGC genome compression format
Documentation
// Test Rust splitter integration with yeast5_chrV.fa (5 samples, PanSN format)
// Uses actual multi-sample dataset to verify byte-identical output

use ragc_core::agc_compress_ffi::{compress_with_cpp_agc, compress_with_rust_splitters};
use std::process::Command;
use std::fs;

fn main() -> anyhow::Result<()> {
    println!("=== Testing Rust Splitters with yeast5 chrV ===\n");

    let test_dir = "/tmp/ragc_rust_splitter_yeast5";
    fs::create_dir_all(test_dir)?;

    // Use existing test file (PanSN format: 5 samples in one file)
    let test_file = "./test_minimal/yeast5_chrV.fa";

    if !std::path::Path::new(test_file).exists() {
        eprintln!("Error: Test file not found: {}", test_file);
        return Err(anyhow::anyhow!("Missing test file"));
    }

    let sample_files = vec![
        ("yeast5".to_string(), test_file.to_string()),
    ];

    // Parameters from CLAUDE.md (matching verified tests)
    let kmer_length = 21;
    let segment_size = 10000;
    let min_match_length = 20;
    let pack_cardinality = 50;
    let concatenated_genomes = true;  // PanSN format
    let adaptive_compression = false;
    let verbosity = 1;  // Reduce noise
    let no_threads = 1;  // Single-threaded for determinism
    let fallback_frac = 0.0;

    println!("Input: {} ({:.1} MB)",
             test_file,
             fs::metadata(test_file)?.len() as f64 / (1024.0 * 1024.0));

    // Test 1: Native C++ AGC (baseline)
    println!("\n1. Creating archive with native C++ AGC splitters...");
    let cpp_archive = format!("{}/native_cpp.agc", test_dir);
    let start = std::time::Instant::now();
    compress_with_cpp_agc(
        &cpp_archive,
        &sample_files,
        kmer_length,
        segment_size,
        min_match_length,
        pack_cardinality,
        concatenated_genomes,
        adaptive_compression,
        verbosity,
        no_threads,
        fallback_frac,
    )?;
    let cpp_time = start.elapsed();

    let cpp_size = fs::metadata(&cpp_archive)?.len();
    let cpp_sha = get_sha256(&cpp_archive)?;
    println!("   Time: {:.2}s", cpp_time.as_secs_f64());
    println!("   Size: {:.1} KB", cpp_size as f64 / 1024.0);
    println!("   SHA256: {}", cpp_sha);

    // Test 2: C++ AGC with Rust-computed splitters
    println!("\n2. Creating archive with Rust-computed splitters...");
    let rust_archive = format!("{}/rust_splitters.agc", test_dir);
    let start = std::time::Instant::now();
    compress_with_rust_splitters(
        &rust_archive,
        &sample_files,
        kmer_length,
        segment_size,
        min_match_length,
        pack_cardinality,
        concatenated_genomes,
        adaptive_compression,
        verbosity,
        no_threads,
        fallback_frac,
    )?;
    let rust_time = start.elapsed();

    let rust_size = fs::metadata(&rust_archive)?.len();
    let rust_sha = get_sha256(&rust_archive)?;
    println!("   Time: {:.2}s", rust_time.as_secs_f64());
    println!("   Size: {:.1} KB", rust_size as f64 / 1024.0);
    println!("   SHA256: {}", rust_sha);

    // Compare results
    println!("\n=== Comparison ===");
    let time_diff = rust_time.as_secs_f64() - cpp_time.as_secs_f64();
    let size_diff = rust_size as i64 - cpp_size as i64;

    println!("Time difference: {:+.2}s ({:+.1}%)",
             time_diff,
             (time_diff / cpp_time.as_secs_f64()) * 100.0);
    println!("Size difference: {:+} bytes", size_diff);

    if cpp_sha == rust_sha {
        println!("\n✅ SUCCESS: Archives are BYTE-IDENTICAL!");
        println!("   ✓ Splitter detection replaced with Rust");
        println!("   ✓ Compression output unchanged");
        println!("   ✓ First component replacement verified!");
        println!("\n   Ready to replace next component.");
    } else {
        println!("\n❌ FAILURE: Archives differ");
        println!("   Native C++:     {}", cpp_sha);
        println!("   Rust splitters: {}", rust_sha);

        if size_diff != 0 {
            println!("   Size diff: {:+.1}%",
                     (size_diff as f64 / cpp_size as f64) * 100.0);
        }

        // Show first differences
        println!("\n   Binary diff (first 10 bytes):");
        let diff_result = Command::new("cmp")
            .args(&["-l", &cpp_archive, &rust_archive])
            .output()?;

        if !diff_result.stdout.is_empty() {
            let diff_output = std::str::from_utf8(&diff_result.stdout)?;
            for (i, line) in diff_output.lines().take(10).enumerate() {
                println!("      {} {}", i+1, line);
            }

            let total_diffs = diff_output.lines().count();
            if total_diffs > 10 {
                println!("      ... ({} more differences)", total_diffs - 10);
            }
        }
    }

    println!("\n=== Archives saved to {} ===", test_dir);
    Ok(())
}

fn get_sha256(path: &str) -> anyhow::Result<String> {
    let output = Command::new("sha256sum")
        .arg(path)
        .output()?;

    let stdout = String::from_utf8(output.stdout)?;
    let hash = stdout.split_whitespace().next()
        .ok_or_else(|| anyhow::anyhow!("Failed to parse sha256sum output"))?;

    Ok(hash.to_string())
}