use clap::Parser;
use fgumi_lib::commands::command::Command;
use fgumi_lib::commands::correct::CorrectUmis;
use fgumi_raw_bam::RawRecord;
use noodles::bam;
use noodles::sam::alignment::io::Write as AlignmentWrite;
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
use crate::helpers::bam_generator::{create_minimal_header, create_umi_family, to_record_buf};
fn create_umi_bam(path: &PathBuf, families: Vec<Vec<RawRecord>>) {
let header = create_minimal_header("chr1", 10000);
let mut writer =
bam::io::Writer::new(fs::File::create(path).expect("Failed to create BAM file"));
writer.write_header(&header).expect("Failed to write header");
for family in families {
for record in &family {
writer
.write_alignment_record(&header, &to_record_buf(record))
.expect("Failed to write record");
}
}
writer.try_finish().expect("Failed to finish BAM");
}
fn create_whitelist(path: &PathBuf, umis: &[&str]) {
fs::write(path, umis.join("\n")).expect("Failed to write whitelist");
}
#[test]
fn test_correct_command_basic() {
let temp_dir = TempDir::new().unwrap();
let input_bam = temp_dir.path().join("input.bam");
let output_bam = temp_dir.path().join("output.bam");
let whitelist = temp_dir.path().join("whitelist.txt");
let correct_reads = create_umi_family("ACGTACGT", 5, "correct", "AAAAGGGG", 30);
let error_reads = create_umi_family("ACGTACGA", 2, "error", "AAAAGGGG", 30);
create_umi_bam(&input_bam, vec![correct_reads, error_reads]);
create_whitelist(&whitelist, &["ACGTACGT"]);
let cmd = CorrectUmis::try_parse_from([
"correct",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_bam.to_str().unwrap(),
"--umi-files",
whitelist.to_str().unwrap(),
"--max-mismatches",
"1",
"--min-distance",
"1",
"--compression-level",
"1",
])
.expect("failed to parse correct args");
cmd.execute("fgumi correct").expect("Correct command failed");
assert!(output_bam.exists(), "Output BAM not created");
let mut reader = bam::io::Reader::new(fs::File::open(&output_bam).unwrap());
let _header = reader.read_header().unwrap();
let count = reader.records().count();
assert_eq!(count, 7, "Should have all 7 reads in output");
}
#[test]
fn test_correct_command_with_metrics() {
let temp_dir = TempDir::new().unwrap();
let input_bam = temp_dir.path().join("input.bam");
let output_bam = temp_dir.path().join("output.bam");
let whitelist = temp_dir.path().join("whitelist.txt");
let metrics = temp_dir.path().join("metrics.tsv");
let reads = create_umi_family("ACGTACGT", 3, "read", "AAAAGGGG", 30);
create_umi_bam(&input_bam, vec![reads]);
create_whitelist(&whitelist, &["ACGTACGT"]);
let cmd = CorrectUmis::try_parse_from([
"correct",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_bam.to_str().unwrap(),
"--umi-files",
whitelist.to_str().unwrap(),
"--max-mismatches",
"1",
"--min-distance",
"1",
"--metrics",
metrics.to_str().unwrap(),
"--compression-level",
"1",
])
.expect("failed to parse correct args");
cmd.execute("fgumi correct").expect("Correct command with metrics failed");
assert!(metrics.exists(), "Metrics file not created");
let content = fs::read_to_string(&metrics).unwrap();
assert!(!content.is_empty(), "Metrics file should not be empty");
}
#[test]
fn test_correct_command_with_rejects() {
let temp_dir = TempDir::new().unwrap();
let input_bam = temp_dir.path().join("input.bam");
let output_bam = temp_dir.path().join("output.bam");
let rejects_bam = temp_dir.path().join("rejects.bam");
let whitelist = temp_dir.path().join("whitelist.txt");
let uncorrectable = create_umi_family("TTTTTTTT", 2, "far", "AAAAGGGG", 30);
let correctable = create_umi_family("ACGTACGT", 3, "exact", "AAAAGGGG", 30);
create_umi_bam(&input_bam, vec![correctable, uncorrectable]);
create_whitelist(&whitelist, &["ACGTACGT"]);
let cmd = CorrectUmis::try_parse_from([
"correct",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_bam.to_str().unwrap(),
"--umi-files",
whitelist.to_str().unwrap(),
"--max-mismatches",
"1",
"--min-distance",
"1",
"--rejects",
rejects_bam.to_str().unwrap(),
"--compression-level",
"1",
])
.expect("failed to parse correct args");
cmd.execute("fgumi correct").expect("Correct command with rejects failed");
assert!(rejects_bam.exists(), "Rejects BAM not created");
}
#[test]
fn test_correct_command_rejects_streaming_threaded_integrity() {
let temp_dir = TempDir::new().unwrap();
let input_bam = temp_dir.path().join("input.bam");
let output_bam = temp_dir.path().join("output.bam");
let rejects_bam = temp_dir.path().join("rejects.bam");
let whitelist = temp_dir.path().join("whitelist.txt");
let far_family_size: u32 = 400;
let corr_small = create_umi_family("ACGTACGT", 3, "c1_exact", "AAAAGGGG", 30);
let corr_big = create_umi_family("ACGTACGT", 30, "c2_exact", "AAAAGGGG", 30);
let far_a = create_umi_family("TTTTTTTT", far_family_size as usize, "far_a", "AAAAGGGG", 30);
let far_b = create_umi_family("GGGGGGGG", far_family_size as usize, "far_b", "AAAAGGGG", 30);
let far_c = create_umi_family("CCCCCCCC", far_family_size as usize, "far_c", "AAAAGGGG", 30);
let expected_order: Vec<String> = ["far_a", "far_b", "far_c"]
.into_iter()
.flat_map(|base| (0..far_family_size).map(move |i| format!("{base}_{i}")))
.collect();
let expected_names: std::collections::HashSet<String> =
expected_order.iter().cloned().collect();
let expected_rejects = expected_names.len();
create_umi_bam(&input_bam, vec![corr_small, corr_big, far_a, far_b, far_c]);
create_whitelist(&whitelist, &["ACGTACGT"]);
let cmd = CorrectUmis::try_parse_from([
"correct",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_bam.to_str().unwrap(),
"--umi-files",
whitelist.to_str().unwrap(),
"--max-mismatches",
"1",
"--min-distance",
"1",
"--rejects",
rejects_bam.to_str().unwrap(),
"--threads",
"4",
"--compression-level",
"1",
])
.expect("failed to parse correct args");
cmd.execute("fgumi correct").expect("Correct command with threaded rejects failed");
assert!(rejects_bam.exists(), "Rejects BAM not created");
crate::helpers::assertions::assert_has_bgzf_eof(&rejects_bam);
crate::helpers::assertions::assert_rejects_header_matches_input(&rejects_bam, &input_bam);
let mut reader = bam::io::Reader::new(fs::File::open(&rejects_bam).unwrap());
let _header = reader.read_header().unwrap();
let mut observed_order: Vec<String> = Vec::with_capacity(expected_rejects);
for result in reader.records() {
let record = result.expect("Failed to read reject record");
let name = record.name().expect("reject record missing read name").to_string();
observed_order.push(name);
}
assert_eq!(
observed_order.len(),
expected_rejects,
"rejects BAM should contain one record per uncorrectable input read",
);
assert_eq!(observed_order, expected_order, "rejects should be emitted in batch-input order",);
let observed_names: std::collections::HashSet<String> =
observed_order.iter().cloned().collect();
assert_eq!(observed_names, expected_names, "unexpected reject-name set");
}