use fgoxide::io::DelimFile;
use fgumi_lib::metrics::shared::UmiMetric;
use fgumi_lib::metrics::simplex::{SimplexFamilySizeMetric, SimplexYieldMetric};
use fgumi_lib::sam::builder::RecordBuilder;
use noodles::bam;
use noodles::sam::Header;
use noodles::sam::alignment::io::Write as AlignmentWrite;
use noodles::sam::alignment::record_buf::RecordBuf;
use std::fs;
use std::path::PathBuf;
use std::process::Command;
use tempfile::TempDir;
use crate::helpers::bam_generator::create_minimal_header;
fn create_simplex_pair(
name: &str,
ref_id: usize,
pos1: usize,
pos2: usize,
rx_umi: &str,
mi_tag: &str,
) -> (RecordBuf, RecordBuf) {
let sequence = "ACGTACGTACGTACGT";
let quality = 30u8;
let r1 = RecordBuilder::new()
.name(name)
.sequence(sequence)
.qualities(&vec![quality; sequence.len()])
.paired(true)
.first_segment(true)
.reference_sequence_id(ref_id)
.alignment_start(pos1)
.mapping_quality(60)
.tag("RX", rx_umi)
.tag("MI", mi_tag)
.build();
let r2 = RecordBuilder::new()
.name(name)
.sequence(sequence)
.qualities(&vec![quality; sequence.len()])
.paired(true)
.first_segment(false)
.reference_sequence_id(ref_id)
.alignment_start(pos2)
.mapping_quality(60)
.reverse_complement(true)
.tag("RX", rx_umi)
.tag("MI", mi_tag)
.build();
(r1, r2)
}
fn create_simplex_test_bam(path: &PathBuf, header: &Header) {
let mut writer =
bam::io::Writer::new(fs::File::create(path).expect("Failed to create BAM file"));
writer.write_header(header).expect("Failed to write header");
let mut records = Vec::new();
for i in 0..3 {
let (r1, r2) = create_simplex_pair(&format!("fam1_{i}"), 0, 100, 200, "AAA", "1/A");
records.push(r1);
records.push(r2);
}
let (r1, r2) = create_simplex_pair("fam2_0", 0, 100, 200, "CCC", "2/A");
records.push(r1);
records.push(r2);
for i in 0..2 {
let (r1, r2) = create_simplex_pair(&format!("fam3_{i}"), 0, 300, 400, "GGG", "3/A");
records.push(r1);
records.push(r2);
}
for record in &records {
writer.write_alignment_record(header, record).expect("Failed to write record");
}
writer.finish(header).expect("Failed to finish BAM");
}
#[test]
fn test_simplex_metrics_command_creates_output_files() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_prefix = temp_dir.path().join("output");
let header = create_minimal_header("chr1", 10000);
create_simplex_test_bam(&input_bam, &header);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"simplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_prefix.to_str().unwrap(),
])
.status()
.expect("Failed to run simplex-metrics command");
assert!(status.success(), "simplex-metrics command failed");
let family_sizes_path = format!("{}.family_sizes.txt", output_prefix.display());
let yield_metrics_path = format!("{}.simplex_yield_metrics.txt", output_prefix.display());
let umi_counts_path = format!("{}.umi_counts.txt", output_prefix.display());
assert!(std::path::Path::new(&family_sizes_path).exists(), "family_sizes.txt not created");
assert!(
std::path::Path::new(&yield_metrics_path).exists(),
"simplex_yield_metrics.txt not created"
);
assert!(std::path::Path::new(&umi_counts_path).exists(), "umi_counts.txt not created");
}
#[test]
fn test_simplex_metrics_command_family_sizes() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_prefix = temp_dir.path().join("output");
let header = create_minimal_header("chr1", 10000);
create_simplex_test_bam(&input_bam, &header);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"simplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_prefix.to_str().unwrap(),
])
.status()
.expect("Failed to run simplex-metrics command");
assert!(status.success());
let family_sizes_path = format!("{}.family_sizes.txt", output_prefix.display());
let metrics: Vec<SimplexFamilySizeMetric> =
DelimFile::default().read_tsv(&family_sizes_path).expect("Failed to read family sizes");
assert!(!metrics.is_empty(), "Should have family size metrics");
let size_1 = metrics.iter().find(|m| m.family_size == 1).unwrap();
assert_eq!(size_1.ss_count, 1, "One SS family of size 1 (family 2)");
let size_2 = metrics.iter().find(|m| m.family_size == 2).unwrap();
assert_eq!(size_2.ss_count, 1, "One SS family of size 2 (family 3)");
let size_3 = metrics.iter().find(|m| m.family_size == 3).unwrap();
assert_eq!(size_3.ss_count, 1, "One SS family of size 3 (family 1)");
}
#[test]
fn test_simplex_metrics_command_yield_metrics() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_prefix = temp_dir.path().join("output");
let header = create_minimal_header("chr1", 10000);
create_simplex_test_bam(&input_bam, &header);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"simplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_prefix.to_str().unwrap(),
])
.status()
.expect("Failed to run simplex-metrics command");
assert!(status.success());
let yield_path = format!("{}.simplex_yield_metrics.txt", output_prefix.display());
let metrics: Vec<SimplexYieldMetric> =
DelimFile::default().read_tsv(&yield_path).expect("Failed to read yield metrics");
assert_eq!(metrics.len(), 20, "Should have 20 downsampling fractions");
let full = metrics.last().expect("Should have at least one yield metric");
assert!((full.fraction - 1.0).abs() < 0.01);
assert_eq!(full.ss_families, 3, "Should have 3 SS families at 100%");
assert_eq!(full.cs_families, 2, "Should have 2 CS families at 100%");
}
#[test]
fn test_simplex_metrics_command_umi_metrics() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_prefix = temp_dir.path().join("output");
let header = create_minimal_header("chr1", 10000);
create_simplex_test_bam(&input_bam, &header);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"simplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_prefix.to_str().unwrap(),
])
.status()
.expect("Failed to run simplex-metrics command");
assert!(status.success());
let umi_counts_path = format!("{}.umi_counts.txt", output_prefix.display());
let metrics: Vec<UmiMetric> =
DelimFile::default().read_tsv(&umi_counts_path).expect("Failed to read UMI counts");
assert!(!metrics.is_empty(), "Should have UMI metrics");
let total_raw: usize = metrics.iter().map(|m| m.raw_observations).sum();
assert!(total_raw > 0, "Should have raw UMI observations");
}
#[test]
fn test_simplex_metrics_min_reads_parameter() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_default = temp_dir.path().join("output_default");
let output_strict = temp_dir.path().join("output_strict");
let header = create_minimal_header("chr1", 10000);
create_simplex_test_bam(&input_bam, &header);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"simplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_default.to_str().unwrap(),
])
.status()
.expect("Failed to run simplex-metrics (default)");
assert!(status.success());
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"simplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_strict.to_str().unwrap(),
"--min-reads",
"3",
])
.status()
.expect("Failed to run simplex-metrics (strict)");
assert!(status.success());
let default_yield_path = format!("{}.simplex_yield_metrics.txt", output_default.display());
let strict_yield_path = format!("{}.simplex_yield_metrics.txt", output_strict.display());
let default_metrics: Vec<SimplexYieldMetric> =
DelimFile::default().read_tsv(&default_yield_path).unwrap();
let strict_metrics: Vec<SimplexYieldMetric> =
DelimFile::default().read_tsv(&strict_yield_path).unwrap();
let default_full = default_metrics.last().expect("Should have at least one yield metric");
let strict_full = strict_metrics.last().expect("Should have at least one yield metric");
assert_eq!(default_full.ss_consensus_families, 3);
assert_eq!(strict_full.ss_consensus_families, 1);
}