#![allow(clippy::all)]
//! Integration tests for C++ compatibility
//!
//! These tests verify that ragc produces archives that are bit-compatible
//! with C++ AGC and can read C++ AGC archives correctly.
use ragc_core::{Compressor, CompressorConfig, Decompressor, DecompressorConfig};
use sha2::{Digest, Sha256};
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
#[allow(dead_code)]
fn get_test_data_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../test-data")
}
fn compute_file_hash(path: &Path) -> String {
let data = fs::read(path).expect("Failed to read file");
let hash = Sha256::digest(&data);
format!("{hash:x}")
}
fn contig_to_string(contig: &[u8]) -> String {
contig
.iter()
.map(|&b| match b {
0 => 'A',
1 => 'C',
2 => 'G',
3 => 'T',
_ => 'N',
})
.collect()
}
fn create_test_fasta(path: &Path) {
let content = r#">seq1
ACGTACGTACGTACGTACGTACGTACGTACGT
>seq2
ACGTACGTACGTACGTACGTACGTACGTACGTNNNNNNNNNNNN
"#;
fs::write(path, content).expect("Failed to write test FASTA");
}
#[test]
fn test_ragc_creates_valid_archive() {
let test_dir = std::env::temp_dir();
let fasta_path = test_dir.join("test_compat.fasta");
let archive_path = test_dir.join("test_compat_ragc.agc");
// Create test FASTA
create_test_fasta(&fasta_path);
// Create archive with ragc
let config = CompressorConfig::default();
let mut compressor = Compressor::new(archive_path.to_str().unwrap(), config)
.expect("Failed to create compressor");
compressor
.add_fasta_file("test_sample", &fasta_path)
.expect("Failed to add FASTA");
compressor.finalize().expect("Failed to finalize archive");
assert!(archive_path.exists(), "Archive was not created");
// Verify archive can be read back
let config = DecompressorConfig::default();
let decompressor =
Decompressor::open(archive_path.to_str().unwrap(), config).expect("Failed to open archive");
let samples = decompressor.list_samples();
assert_eq!(samples.len(), 1);
assert_eq!(samples[0], "test_sample");
// Clean up
let _ = fs::remove_file(&fasta_path);
let _ = fs::remove_file(&archive_path);
}
#[test]
fn test_ragc_rust_roundtrip() {
let test_dir = std::env::temp_dir();
let fasta_path = test_dir.join("test_roundtrip.fasta");
let archive_path = test_dir.join("test_roundtrip.agc");
let output_path = test_dir.join("test_roundtrip_out.fasta");
// Create test FASTA
create_test_fasta(&fasta_path);
let original_hash = compute_file_hash(&fasta_path);
// Compress with ragc
let config = CompressorConfig::default();
let mut compressor = Compressor::new(archive_path.to_str().unwrap(), config)
.expect("Failed to create compressor");
compressor
.add_fasta_file("test_sample", &fasta_path)
.expect("Failed to add FASTA");
compressor.finalize().expect("Failed to finalize archive");
// Decompress with ragc
let config = DecompressorConfig::default();
let mut decompressor =
Decompressor::open(archive_path.to_str().unwrap(), config).expect("Failed to open archive");
let sequences = decompressor
.get_sample("test_sample")
.expect("Failed to extract sample");
// Write output
let mut output_content = String::new();
for (name, contig) in sequences {
output_content.push_str(&format!(">{name}\n"));
output_content.push_str(&contig_to_string(&contig));
output_content.push('\n');
}
fs::write(&output_path, output_content).expect("Failed to write output");
let output_hash = compute_file_hash(&output_path);
assert_eq!(
original_hash, output_hash,
"Roundtrip produced different data! Original: {original_hash}, Output: {output_hash}"
);
// Clean up
let _ = fs::remove_file(&fasta_path);
let _ = fs::remove_file(&archive_path);
let _ = fs::remove_file(&output_path);
}
#[test]
#[ignore] // HashMap iteration order is non-deterministic for security reasons
fn test_deterministic_compression() {
// Test that ragc produces identical archives for identical inputs
// NOTE: This test is expected to fail due to HashMap randomization
let test_dir = std::env::temp_dir();
let fasta_path = test_dir.join("test_deterministic.fasta");
let archive1_path = test_dir.join("test_deterministic_1.agc");
let archive2_path = test_dir.join("test_deterministic_2.agc");
create_test_fasta(&fasta_path);
// Create first archive
let config = CompressorConfig::default();
let mut compressor1 = Compressor::new(archive1_path.to_str().unwrap(), config.clone())
.expect("Failed to create compressor 1");
compressor1
.add_fasta_file("test_sample", &fasta_path)
.expect("Failed to add FASTA 1");
compressor1.finalize().expect("Failed to finalize 1");
// Create second archive
let mut compressor2 = Compressor::new(archive2_path.to_str().unwrap(), config)
.expect("Failed to create compressor 2");
compressor2
.add_fasta_file("test_sample", &fasta_path)
.expect("Failed to add FASTA 2");
compressor2.finalize().expect("Failed to finalize 2");
// Compare hashes
let hash1 = compute_file_hash(&archive1_path);
let hash2 = compute_file_hash(&archive2_path);
assert_eq!(hash1, hash2,
"Archives differ! This means compression is non-deterministic.\nArchive 1: {hash1}\nArchive 2: {hash2}");
// Clean up
let _ = fs::remove_file(&fasta_path);
let _ = fs::remove_file(&archive1_path);
let _ = fs::remove_file(&archive2_path);
}
#[cfg(test)]
mod with_cpp_agc {
use super::*;
fn cpp_agc_available() -> bool {
Command::new("agc").arg("--version").output().is_ok()
}
#[test]
fn test_cpp_can_read_ragc_archives() {
if !cpp_agc_available() {
eprintln!("Skipping C++ compatibility test: C++ agc not found");
return;
}
let test_dir = std::env::temp_dir();
let fasta_path = test_dir.join("test_cpp_read.fasta");
let archive_path = test_dir.join("test_cpp_read.agc");
let output_path = test_dir.join("test_cpp_read_out.fasta");
// Create test FASTA
create_test_fasta(&fasta_path);
let original_hash = compute_file_hash(&fasta_path);
// Create archive with ragc
let config = CompressorConfig::default();
let mut compressor = Compressor::new(archive_path.to_str().unwrap(), config)
.expect("Failed to create compressor");
compressor
.add_fasta_file("test_sample", &fasta_path)
.expect("Failed to add FASTA");
compressor.finalize().expect("Failed to finalize archive");
// Extract with C++ agc
let status = Command::new("agc")
.arg("getset")
.arg(archive_path.to_str().unwrap())
.arg("test_sample")
.output()
.expect("Failed to run C++ agc");
assert!(
status.status.success(),
"C++ agc failed to extract: {}",
String::from_utf8_lossy(&status.stderr)
);
fs::write(&output_path, &status.stdout).expect("Failed to write output");
let output_hash = compute_file_hash(&output_path);
assert_eq!(
original_hash, output_hash,
"C++ extracted different data!\nOriginal: {original_hash}\nC++ Output: {output_hash}"
);
// Clean up
let _ = fs::remove_file(&fasta_path);
let _ = fs::remove_file(&archive_path);
let _ = fs::remove_file(&output_path);
}
#[test]
fn test_ragc_can_read_cpp_archives() {
if !cpp_agc_available() {
eprintln!("Skipping C++ compatibility test: C++ agc not found");
return;
}
let test_dir = std::env::temp_dir();
let fasta_path = test_dir.join("test_ragc_read.fasta");
let archive_path = test_dir.join("test_ragc_read.agc");
let output_path = test_dir.join("test_ragc_read_out.fasta");
// Create test FASTA
create_test_fasta(&fasta_path);
let original_hash = compute_file_hash(&fasta_path);
// Create archive with C++ agc
let status = Command::new("agc")
.arg("create")
.arg("-o")
.arg(archive_path.to_str().unwrap())
.arg(fasta_path.to_str().unwrap())
.status()
.expect("Failed to run C++ agc create");
assert!(status.success(), "C++ agc failed to create archive");
// Extract with ragc
let config = DecompressorConfig::default();
let mut decompressor = Decompressor::open(archive_path.to_str().unwrap(), config)
.expect("Failed to open C++ archive");
let sequences = decompressor
.get_sample("test_ragc_read")
.expect("Failed to extract sample");
let mut output_content = String::new();
for (name, contig) in sequences {
output_content.push_str(&format!(">{name}\n"));
output_content.push_str(&contig_to_string(&contig));
output_content.push('\n');
}
fs::write(&output_path, output_content).expect("Failed to write output");
let output_hash = compute_file_hash(&output_path);
assert_eq!(
original_hash, output_hash,
"ragc extracted different data from C++ archive!\nOriginal: {original_hash}\nragc Output: {output_hash}"
);
// Clean up
let _ = fs::remove_file(&fasta_path);
let _ = fs::remove_file(&archive_path);
let _ = fs::remove_file(&output_path);
}
/// Test N-base handling in round-trip compression/decompression
#[test]
fn test_n_bases_round_trip() {
let test_dir = std::env::temp_dir();
let archive_path = test_dir.join("test_n_bases.agc");
// Test various N-base patterns
let test_cases = vec![
("single_n", "ACGTACGTNACGTACGT"), // Single N
("short_n_run", "ACGTNNACGT"), // 2 N's (< MIN_NRUN_LEN)
("medium_n_run", "ACGTNNNACGT"), // 3 N's (< MIN_NRUN_LEN)
("long_n_run", "ACGTNNNNNNNNACGT"), // 8 N's (triggers run encoding)
("very_long_n_run", "ACGTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNACGT"), // 32 N's
("mixed", "ACGTNACGTNNNACGTNNNNNNNNACGT"), // Mixed single + runs
("all_n", "NNNNNNNNNNNN"), // All N's
("start_n", "NNNACGTACGT"), // N's at start
("end_n", "ACGTACGTNNN"), // N's at end
("alternating", "NANANANANANA"), // Alternating N/A
];
for (name, sequence) in test_cases {
eprintln!("Testing N-base pattern: {name} ({sequence})");
// Create archive
let config = CompressorConfig::default();
let mut compressor = Compressor::new(archive_path.to_str().unwrap(), config)
.expect("Failed to create compressor");
// Convert sequence to numeric encoding
let numeric: Vec<u8> = sequence
.bytes()
.map(|b| match b {
b'A' => 0,
b'C' => 1,
b'G' => 2,
b'T' => 3,
b'N' => 4,
_ => panic!("Invalid base: {}", b as char),
})
.collect();
compressor
.add_contig("test_sample", name, numeric.clone())
.expect("Failed to add contig");
compressor.finalize().expect("Failed to finalize");
// Extract and verify
let config = DecompressorConfig::default();
let mut decompressor = Decompressor::open(archive_path.to_str().unwrap(), config)
.expect("Failed to open archive");
let extracted = decompressor
.get_contig("test_sample", name)
.expect("Failed to extract contig");
assert_eq!(
numeric, extracted,
"N-base round-trip failed for pattern '{name}'\nExpected: {numeric:?}\nGot: {extracted:?}"
);
// Also verify as string
let extracted_str = contig_to_string(&extracted);
assert_eq!(
sequence, extracted_str,
"N-base string round-trip failed for pattern '{name}'"
);
decompressor.close().expect("Failed to close decompressor");
let _ = fs::remove_file(&archive_path);
}
}
/// Test N-base compatibility with C++ AGC
#[test]
fn test_n_bases_cpp_compat() {
if !cpp_agc_available() {
eprintln!("Skipping C++ N-base compatibility test: C++ agc not found");
return;
}
let test_dir = std::env::temp_dir();
let fasta_path = test_dir.join("test_n_compat.fasta");
let cpp_archive = test_dir.join("test_n_cpp.agc");
let ragc_archive = test_dir.join("test_n_ragc.agc");
// Create test FASTA with various N patterns
let content = r#">test_n_sample#1#single_n
ACGTACGTNACGTACGT
>test_n_sample#1#short_n_run
ACGTNNACGT
>test_n_sample#1#long_n_run
ACGTNNNNNNNNACGT
>test_n_sample#1#mixed
ACGTNACGTNNNACGTNNNNNNNNACGT
>test_n_sample#1#very_long_n_run
ACGTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNACGT
"#;
fs::write(&fasta_path, content).expect("Failed to write test FASTA");
// Test 1: C++ AGC → RAGC
let status = Command::new("agc")
.arg("create")
.arg("-o")
.arg(cpp_archive.to_str().unwrap())
.arg(fasta_path.to_str().unwrap())
.status()
.expect("Failed to run C++ agc create");
assert!(status.success(), "C++ agc failed to create archive");
let config = DecompressorConfig::default();
let mut decompressor = Decompressor::open(cpp_archive.to_str().unwrap(), config)
.expect("Failed to open C++ archive");
let sequences = decompressor
.get_sample("test_n_compat")
.expect("Failed to extract sample from C++ archive");
for (name, contig) in sequences {
let contig_str = contig_to_string(&contig);
eprintln!("C++ AGC → RAGC: {name} = {contig_str}");
assert!(
!contig_str.contains('X'),
"Invalid base found in {name} extracted from C++ AGC"
);
// Verify N's are present where expected
if name.contains("_n") {
assert!(
contig_str.contains('N'),
"N-bases missing from {name} extracted from C++ AGC"
);
}
}
// Test 2: RAGC → C++ AGC
let config = CompressorConfig::default();
let mut compressor = Compressor::new(ragc_archive.to_str().unwrap(), config)
.expect("Failed to create RAGC compressor");
// Add test sequences with N's
let test_seq: Vec<u8> = vec![0, 1, 2, 3, 4, 4, 4, 4, 0, 1, 2, 3]; // ACGTNNNNACGT
compressor
.add_contig("test_ragc", "n_test", test_seq.clone())
.expect("Failed to add contig");
compressor
.finalize()
.expect("Failed to finalize RAGC archive");
// Extract with C++ AGC
let output = Command::new("agc")
.arg("getset")
.arg(ragc_archive.to_str().unwrap())
.arg("test_ragc")
.output()
.expect("Failed to run C++ agc getset");
eprintln!(
"C++ AGC stdout: {}",
String::from_utf8_lossy(&output.stdout)
);
eprintln!(
"C++ AGC stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
eprintln!("C++ AGC exit status: {}", output.status);
assert!(
output.status.success(),
"C++ agc failed to extract from RAGC archive"
);
let extracted = String::from_utf8_lossy(&output.stdout);
eprintln!("RAGC → C++ AGC extraction:\n{extracted}");
// Verify N's are present
assert!(
!extracted.is_empty(),
"C++ AGC extraction produced empty output"
);
assert!(
extracted.contains('N'),
"N-bases missing from C++ AGC extraction of RAGC archive"
);
assert!(
extracted.contains("NNNN"),
"N-run missing from C++ AGC extraction"
);
// Clean up
let _ = fs::remove_file(&fasta_path);
let _ = fs::remove_file(&cpp_archive);
let _ = fs::remove_file(&ragc_archive);
}
}