use fgoxide::io::DelimFile;
use fgumi_lib::metrics::UmiGroupingMetrics;
use noodles::bam;
use noodles::sam::alignment::io::Write as AlignmentWrite;
use std::fs;
use std::path::PathBuf;
use std::process::Command;
use tempfile::TempDir;
use crate::helpers::bam_generator::{create_minimal_header, create_umi_family, to_record_buf};
#[test]
fn test_group_command_writes_new_metrics() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_bam = temp_dir.path().join("output.bam");
let metrics_file = temp_dir.path().join("metrics.txt");
create_test_input_bam(&input_bam);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"group",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_bam.to_str().unwrap(),
"--strategy",
"identity",
"--edits",
"0",
"--grouping-metrics",
metrics_file.to_str().unwrap(),
"--compression-level",
"1",
])
.status()
.expect("Failed to run group command");
assert!(status.success(), "Group command failed");
assert!(output_bam.exists(), "Output BAM not created");
assert!(metrics_file.exists(), "Metrics file not created");
let metrics: Vec<UmiGroupingMetrics> =
DelimFile::default().read_tsv(&metrics_file).expect("Failed to read metrics file");
assert_eq!(metrics.len(), 1, "Expected exactly one metrics record");
let metric = &metrics[0];
assert!(metric.total_records > 0, "total_records should be positive");
assert!(metric.accepted_records > 0, "accepted_records should be positive");
assert!(metric.unique_molecule_ids > 0, "unique_molecule_ids should be positive");
let _ = metric.discarded_non_pf;
let _ = metric.discarded_poor_alignment;
let _ = metric.discarded_ns_in_umi;
let _ = metric.discarded_umi_too_short;
}
#[test]
fn test_group_command_rejects_n_bases_in_umi() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_bam = temp_dir.path().join("output.bam");
let metrics_file = temp_dir.path().join("metrics.txt");
create_test_bam_with_n_umis(&input_bam);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"group",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_bam.to_str().unwrap(),
"--strategy",
"identity",
"--edits",
"0",
"--grouping-metrics",
metrics_file.to_str().unwrap(),
"--compression-level",
"1",
])
.status()
.expect("Failed to run group command");
assert!(status.success());
let metrics: Vec<UmiGroupingMetrics> =
DelimFile::default().read_tsv(&metrics_file).expect("Failed to read metrics");
let metric = &metrics[0];
assert!(metric.discarded_ns_in_umi > 0, "Should have discarded reads with N in UMI");
}
fn create_test_input_bam(path: &PathBuf) {
let header = create_minimal_header("chr1", 10000);
let mut writer =
bam::io::Writer::new(fs::File::create(path).expect("Failed to create BAM file"));
writer.write_header(&header).expect("Failed to write header");
let family1 = create_umi_family("AAAAAAAA", 10, "family1", "ACGTACGT", 30);
let family2 = create_umi_family("CCCCCCCC", 5, "family2", "TGCATGCA", 30);
let family3 = create_umi_family("GGGGGGGG", 15, "family3", "ATCGATCG", 30);
for record in family1.iter().chain(family2.iter()).chain(family3.iter()) {
writer
.write_alignment_record(&header, &to_record_buf(record))
.expect("Failed to write record");
}
writer.try_finish().expect("Failed to finish BAM");
}
fn create_test_bam_with_n_umis(path: &PathBuf) {
let header = create_minimal_header("chr1", 10000);
let mut writer =
bam::io::Writer::new(fs::File::create(path).expect("Failed to create BAM file"));
writer.write_header(&header).expect("Failed to write header");
let good_family = create_umi_family("AAAAAAAA", 5, "good", "ACGTACGT", 30);
let bad_family = create_umi_family("NNNNNNNN", 3, "bad", "TGCATGCA", 30);
for record in good_family.iter().chain(bad_family.iter()) {
writer
.write_alignment_record(&header, &to_record_buf(record))
.expect("Failed to write record");
}
writer.try_finish().expect("Failed to finish BAM");
}