ragc-core 0.1.1

Core compression and decompression algorithms for the AGC genome compression format
Documentation
#![allow(clippy::all)]
// Test streaming queue compressor with first 10 yeast samples (sequential multi-file mode)

use ragc_core::{StreamingQueueCompressor, StreamingQueueConfig};
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;

#[test]
#[ignore] // Requires external yeast data files in ~/scrapy/yeast235_samples
fn test_streaming_queue_yeast10() -> anyhow::Result<()> {
    let samples_dir = std::env::var("HOME").unwrap() + "/scrapy/yeast235_samples";

    // First 10 yeast samples (sequential processing for multi-file mode)
    let sample_files = vec![
        "AAA#0.fa", "AAB#0.fa", "AAC#0.fa", "AAR#0.fa", "ABA#0.fa", "ABH#0.fa", "ACA#0.fa",
        "ACH#0.fa", "ADE#0.fa", "ADI#0.fa",
    ];

    let config = StreamingQueueConfig {
        k: 21,
        segment_size: 10000,
        min_match_len: 20,
        queue_capacity: 2 * 1024 * 1024 * 1024,
        num_threads: 4,
        verbosity: 2,
        compression_level: 17,
        adaptive_mode: false, // Test without adaptive mode first
    };

    // Detect splitters from first sample (matching batch mode behavior)
    let first_sample_path = format!("{}/{}", samples_dir, sample_files[0]);
    println!("Detecting splitters from first sample: {}", sample_files[0]);
    let (splitters, _, _) = ragc_core::determine_splitters_streaming(
        Path::new(&first_sample_path),
        config.k as usize,
        config.segment_size as usize,
    )?;
    println!("Found {} splitters", splitters.len());

    let mut compressor = StreamingQueueCompressor::with_splitters(
        "/tmp/test_yeast10_streaming_queue.agc",
        config,
        splitters,
    )?;

    // Process each sample file sequentially (multi-file mode)
    for sample_file in sample_files {
        let file_path = format!("{}/{}", samples_dir, sample_file);
        println!("Processing {}...", sample_file);

        let file = File::open(&file_path)?;
        let reader = BufReader::new(file);

        let mut current_sample = String::new();
        let mut current_contig = String::new();
        let mut current_data = Vec::new();

        for line in reader.lines() {
            let line = line?;
            if line.starts_with('>') {
                // Flush previous contig if any
                if !current_data.is_empty() {
                    compressor.push(
                        current_sample.clone(),
                        current_contig.clone(),
                        current_data.clone(),
                    )?;
                    current_data.clear();
                }

                // Parse header: >AAA#0#chr1
                let parts: Vec<&str> = line[1..].split('#').collect();
                if parts.len() >= 2 {
                    current_sample = format!("{}#{}", parts[0], parts[1]);
                    current_contig = if parts.len() >= 3 {
                        parts[2].to_string()
                    } else {
                        "unknown".to_string()
                    };
                }
            } else {
                // Add sequence data
                for byte in line.bytes() {
                    let base = match byte {
                        b'A' | b'a' => 0u8,
                        b'C' | b'c' => 1u8,
                        b'G' | b'g' => 2u8,
                        b'T' | b't' => 3u8,
                        _ => 0u8, // Treat N as A
                    };
                    current_data.push(base);
                }
            }
        }

        // Flush last contig from this sample
        if !current_data.is_empty() {
            compressor.push(current_sample, current_contig, current_data)?;
        }
    }

    compressor.finalize()?;

    println!("Archive created successfully!");
    Ok(())
}