#![allow(clippy::all)]
use ragc_core::{create_agc_archive, determine_splitters_streaming};
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
#[test]
#[ignore] fn test_yeast3_streaming_compression() {
let home = env::var("HOME").expect("HOME not set");
let base_dir = PathBuf::from(home).join("scrapy/yeast235_samples");
if !base_dir.exists() {
eprintln!("Skipping test: yeast samples not found at {:?}", base_dir);
return;
}
let mut sample_files = Vec::new();
let samples = ["AAA#0.fa", "AAB#0.fa", "AAC#0.fa"];
for sample in &samples {
let path = base_dir.join(sample);
if !path.exists() {
eprintln!("Skipping test: sample not found: {:?}", path);
return;
}
let sample_name = sample.trim_end_matches(".fa").replace("#", "_");
sample_files.push((sample_name, path.to_string_lossy().to_string()));
}
println!("Testing with {} yeast samples", sample_files.len());
let output_path = "/tmp/test_yeast3_streaming.agc";
let _ = fs::remove_file(output_path);
let kmer_length = 21;
let segment_size = 1000;
let num_threads = 4;
let adaptive_mode = false;
let concatenated_genomes = false;
let verbosity = 1;
println!("Step 1: Determining splitters from reference sample...");
let reference_path = Path::new(&sample_files[0].1);
let (splitters, candidates, duplicates) =
determine_splitters_streaming(reference_path, kmer_length, segment_size as usize)
.expect("Failed to determine splitters");
println!(" Found {} splitters", splitters.len());
println!(" Found {} candidate k-mers", candidates.len());
println!(" Found {} duplicated k-mers", duplicates.len());
println!("Step 2: Compressing {} samples...", sample_files.len());
let result = create_agc_archive(
output_path,
sample_files.clone(),
splitters,
candidates,
duplicates,
kmer_length,
segment_size,
num_threads,
adaptive_mode,
concatenated_genomes,
verbosity,
);
assert!(result.is_ok(), "Compression failed: {:?}", result.err());
println!(" ✓ Compression successful");
let metadata = fs::metadata(output_path).expect("Archive file not found");
let file_size = metadata.len();
println!("Step 3: Verifying archive...");
println!(
" Archive size: {} bytes ({:.2} MB)",
file_size,
file_size as f64 / 1_048_576.0
);
assert!(file_size > 1024, "Archive too small: {} bytes", file_size);
assert!(
file_size < 50_000_000,
"Archive too large: {} bytes",
file_size
);
println!(" ✓ Archive size reasonable");
println!("Step 3.5: Verifying with RAGC decompressor...");
{
use ragc_core::{Decompressor, DecompressorConfig};
let config = DecompressorConfig::default();
let decompressor = Decompressor::open(output_path, config)
.expect("Failed to open archive with RAGC decompressor");
let samples = decompressor.list_samples();
println!(" RAGC found {} samples: {:?}", samples.len(), samples);
assert_eq!(
samples.len(),
3,
"RAGC decompressor expected 3 samples, found {}",
samples.len()
);
println!(" ✓ RAGC decompressor can read archive");
println!(" ✓ Found all 3 samples");
}
if Path::new("/home/erik/agc/bin/agc").exists() {
println!("Step 4: Verifying with C++ AGC...");
use std::process::Command;
let output = Command::new("/home/erik/agc/bin/agc")
.args(&["listset", output_path])
.output()
.expect("Failed to run C++ AGC");
if output.status.success() {
let samples_list = String::from_utf8_lossy(&output.stdout);
println!(" Samples in archive:\n{}", samples_list);
let sample_count = samples_list.lines().count();
assert_eq!(
sample_count, 3,
"Expected 3 samples, found {}",
sample_count
);
println!(" ✓ C++ AGC can read archive");
println!(" ✓ Found all 3 samples");
} else {
let error = String::from_utf8_lossy(&output.stderr);
eprintln!(" Warning: C++ AGC failed to read archive: {}", error);
}
} else {
println!("Step 4: Skipped (C++ AGC not found)");
}
println!("\n=== Test Complete ===");
println!("Archive created at: {}", output_path);
}