use fgoxide::io::DelimFile;
use fgumi_lib::metrics::duplex::{DuplexFamilySizeMetric, FamilySizeMetric};
use fgumi_lib::metrics::shared::UmiMetric;
use fgumi_raw_bam::{RawRecord, SamBuilder, flags};
use noodles::bam;
use noodles::sam::Header;
use noodles::sam::alignment::io::Write as AlignmentWrite;
use std::fs;
use std::path::PathBuf;
use std::process::Command;
use tempfile::TempDir;
use crate::helpers::bam_generator::{create_minimal_header, to_record_buf};
#[allow(clippy::too_many_arguments)]
fn create_duplex_pair(
name: &str,
ref_id: i32,
pos1: i32,
pos2: i32,
rx_umi: &str,
mi_tag: &str,
strand1_forward: bool,
strand2_forward: bool,
) -> (RawRecord, RawRecord) {
let sequence = b"ACGTACGTACGTACGT";
let quality = 30u8;
let cigar_op = u32::try_from(sequence.len()).expect("sequence.len() fits u32") << 4;
let r1_flags = flags::PAIRED
| flags::FIRST_SEGMENT
| if strand1_forward { 0 } else { flags::REVERSE }
| if strand2_forward { 0 } else { flags::MATE_REVERSE };
let r2_flags = flags::PAIRED
| flags::LAST_SEGMENT
| if strand2_forward { 0 } else { flags::REVERSE }
| if strand1_forward { 0 } else { flags::MATE_REVERSE };
let tlen = pos2 - pos1 + i32::try_from(sequence.len()).expect("sequence.len() fits i32");
let r1 = {
let mut b = SamBuilder::new();
b.read_name(name.as_bytes())
.sequence(sequence)
.qualities(&vec![quality; sequence.len()])
.flags(r1_flags)
.ref_id(ref_id)
.pos(pos1 - 1)
.mapq(60)
.cigar_ops(&[cigar_op])
.mate_ref_id(ref_id)
.mate_pos(pos2 - 1)
.template_length(tlen)
.add_string_tag(b"RX", rx_umi.as_bytes())
.add_string_tag(b"MI", mi_tag.as_bytes());
b.build()
};
let r2 = {
let mut b = SamBuilder::new();
b.read_name(name.as_bytes())
.sequence(sequence)
.qualities(&vec![quality; sequence.len()])
.flags(r2_flags)
.ref_id(ref_id)
.pos(pos2 - 1)
.mapq(60)
.cigar_ops(&[cigar_op])
.mate_ref_id(ref_id)
.mate_pos(pos1 - 1)
.template_length(-tlen)
.add_string_tag(b"RX", rx_umi.as_bytes())
.add_string_tag(b"MI", mi_tag.as_bytes());
b.build()
};
(r1, r2)
}
fn create_duplex_test_bam(path: &PathBuf, header: &Header) {
let mut writer =
bam::io::Writer::new(fs::File::create(path).expect("Failed to create BAM file"));
writer.write_header(header).expect("Failed to write header");
let mut records = Vec::new();
for i in 0..2 {
let (r1, r2) = create_duplex_pair(
&format!("fam1_a_{i}"),
0,
100,
200,
"AAA-TTT",
"1/A",
true, false, );
records.push(r1);
records.push(r2);
}
let (r1, r2) = create_duplex_pair(
"fam1_b_0", 0, 100, 200, "TTT-AAA", "1/B", false, true, );
records.push(r1);
records.push(r2);
let (r1, r2) = create_duplex_pair(
"fam2_a_0", 0, 300, 400, "CCC-GGG", "2/A", true, false, );
records.push(r1);
records.push(r2);
for i in 0..3 {
let (r1, r2) = create_duplex_pair(
&format!("fam3_a_{i}"),
0,
500,
600,
"GGG-CCC",
"3/A",
true, false, );
records.push(r1);
records.push(r2);
}
for i in 0..2 {
let (r1, r2) = create_duplex_pair(
&format!("fam3_b_{i}"),
0,
500,
600,
"CCC-GGG",
"3/B",
false, true, );
records.push(r1);
records.push(r2);
}
for record in &records {
writer
.write_alignment_record(header, &to_record_buf(record))
.expect("Failed to write record");
}
writer.finish(header).expect("Failed to finish BAM");
}
#[test]
fn test_duplex_metrics_command_creates_output_files() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_prefix = temp_dir.path().join("output");
let header = create_minimal_header("chr1", 10000);
create_duplex_test_bam(&input_bam, &header);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"duplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_prefix.to_str().unwrap(),
])
.status()
.expect("Failed to run duplex-metrics command");
assert!(status.success(), "duplex-metrics command failed");
let family_sizes_path = format!("{}.family_sizes.txt", output_prefix.display());
let duplex_family_sizes_path = format!("{}.duplex_family_sizes.txt", output_prefix.display());
let yield_metrics_path = format!("{}.duplex_yield_metrics.txt", output_prefix.display());
let umi_counts_path = format!("{}.umi_counts.txt", output_prefix.display());
assert!(std::path::Path::new(&family_sizes_path).exists(), "family_sizes.txt not created");
assert!(
std::path::Path::new(&duplex_family_sizes_path).exists(),
"duplex_family_sizes.txt not created"
);
assert!(
std::path::Path::new(&yield_metrics_path).exists(),
"duplex_yield_metrics.txt not created"
);
assert!(std::path::Path::new(&umi_counts_path).exists(), "umi_counts.txt not created");
}
#[test]
fn test_duplex_metrics_command_family_sizes() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_prefix = temp_dir.path().join("output");
let header = create_minimal_header("chr1", 10000);
create_duplex_test_bam(&input_bam, &header);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"duplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_prefix.to_str().unwrap(),
])
.status()
.expect("Failed to run duplex-metrics command");
assert!(status.success(), "duplex-metrics command failed");
let family_sizes_path = format!("{}.family_sizes.txt", output_prefix.display());
let metrics: Vec<FamilySizeMetric> =
DelimFile::default().read_tsv(&family_sizes_path).expect("Failed to read family sizes");
assert!(!metrics.is_empty(), "Should have family size metrics");
let size_1 = metrics.iter().find(|m| m.family_size == 1);
assert!(size_1.is_some(), "Should have family size 1");
}
#[test]
fn test_duplex_metrics_command_duplex_family_sizes() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_prefix = temp_dir.path().join("output");
let header = create_minimal_header("chr1", 10000);
create_duplex_test_bam(&input_bam, &header);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"duplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_prefix.to_str().unwrap(),
])
.status()
.expect("Failed to run duplex-metrics command");
assert!(status.success(), "duplex-metrics command failed");
let duplex_family_sizes_path = format!("{}.duplex_family_sizes.txt", output_prefix.display());
let metrics: Vec<DuplexFamilySizeMetric> = DelimFile::default()
.read_tsv(&duplex_family_sizes_path)
.expect("Failed to read duplex family sizes");
assert!(!metrics.is_empty(), "Should have duplex family size metrics");
for m in &metrics {
assert!(m.ab_size > 0 || m.ba_size > 0, "Family should have at least one strand");
assert!(m.count > 0, "Family count should be positive");
}
}
#[test]
fn test_duplex_metrics_command_umi_metrics() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_prefix = temp_dir.path().join("output");
let header = create_minimal_header("chr1", 10000);
create_duplex_test_bam(&input_bam, &header);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"duplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_prefix.to_str().unwrap(),
])
.status()
.expect("Failed to run duplex-metrics command");
assert!(status.success(), "duplex-metrics command failed");
let umi_counts_path = format!("{}.umi_counts.txt", output_prefix.display());
let metrics: Vec<UmiMetric> =
DelimFile::default().read_tsv(&umi_counts_path).expect("Failed to read UMI counts");
assert!(!metrics.is_empty(), "Should have UMI metrics");
let total_raw: usize = metrics.iter().map(|m| m.raw_observations).sum();
assert!(total_raw > 0, "Should have raw UMI observations");
}
#[test]
fn test_duplex_metrics_command_with_duplex_umi_counts() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_prefix = temp_dir.path().join("output");
let header = create_minimal_header("chr1", 10000);
create_duplex_test_bam(&input_bam, &header);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"duplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_prefix.to_str().unwrap(),
"--duplex-umi-counts",
])
.status()
.expect("Failed to run duplex-metrics command");
assert!(status.success(), "duplex-metrics command failed with --duplex-umi-counts");
let duplex_umi_counts_path = format!("{}.duplex_umi_counts.txt", output_prefix.display());
assert!(
std::path::Path::new(&duplex_umi_counts_path).exists(),
"duplex_umi_counts.txt not created when --duplex-umi-counts is specified"
);
}
#[test]
fn test_duplex_metrics_command_min_reads_parameters() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_prefix_default = temp_dir.path().join("output_default");
let output_prefix_strict = temp_dir.path().join("output_strict");
let header = create_minimal_header("chr1", 10000);
create_duplex_test_bam(&input_bam, &header);
let status_default = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"duplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_prefix_default.to_str().unwrap(),
])
.status()
.expect("Failed to run duplex-metrics command (default)");
assert!(status_default.success(), "duplex-metrics command failed (default)");
let status_strict = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"duplex-metrics",
"--input",
input_bam.to_str().unwrap(),
"--output",
output_prefix_strict.to_str().unwrap(),
"--min-ab-reads",
"2",
"--min-ba-reads",
"2",
])
.status()
.expect("Failed to run duplex-metrics command (strict)");
assert!(status_strict.success(), "duplex-metrics command failed (strict)");
let default_path = format!("{}.duplex_family_sizes.txt", output_prefix_default.display());
let strict_path = format!("{}.duplex_family_sizes.txt", output_prefix_strict.display());
let default_metrics: Vec<DuplexFamilySizeMetric> = DelimFile::default()
.read_tsv(&default_path)
.expect("Failed to read default duplex metrics");
let strict_metrics: Vec<DuplexFamilySizeMetric> =
DelimFile::default().read_tsv(&strict_path).expect("Failed to read strict duplex metrics");
assert!(
default_metrics.len() >= strict_metrics.len(),
"Strict parameters should result in fewer or equal duplex families"
);
}