use fgumi_lib::sam::SamTag;
use fgumi_raw_bam::{RawRecord, SamBuilder, flags};
use noodles::bam;
use noodles::sam;
use noodles::sam::alignment::io::Write as AlignmentWrite;
use noodles::sam::alignment::record::data::field::Tag;
use noodles::sam::alignment::record_buf::RecordBuf;
use std::fs;
use std::io::Write;
use std::path::Path;
use std::process::{Command, Stdio};
use tempfile::TempDir;
use crate::helpers::bam_generator::{create_minimal_header, create_test_reference, to_record_buf};
fn create_unmapped_bam(path: &Path, records: &[RawRecord]) {
let header = sam::Header::default();
let mut writer =
bam::io::Writer::new(fs::File::create(path).expect("Failed to create unmapped BAM"));
writer.write_header(&header).expect("Failed to write header");
for record in records {
writer
.write_alignment_record(&header, &to_record_buf(record))
.expect("Failed to write record");
}
writer.finish(&header).expect("Failed to finish BAM");
}
fn create_mapped_sam(path: &Path, header: &sam::Header, records: &[RawRecord]) {
let file = fs::File::create(path).expect("Failed to create mapped SAM");
let mut writer = sam::io::Writer::new(file);
writer.write_header(header).expect("Failed to write header");
for record in records {
writer
.write_alignment_record(header, &to_record_buf(record))
.expect("Failed to write record");
}
}
fn create_mapped_bam(path: &Path, header: &sam::Header, records: &[RawRecord]) {
let mut writer =
bam::io::Writer::new(fs::File::create(path).expect("Failed to create mapped BAM"));
writer.write_header(header).expect("Failed to write header");
for record in records {
writer
.write_alignment_record(header, &to_record_buf(record))
.expect("Failed to write record");
}
writer.finish(header).expect("Failed to finish BAM");
}
#[test]
fn test_zipper_basic_merge() {
let temp_dir = TempDir::new().unwrap();
let unmapped_bam = temp_dir.path().join("unmapped.bam");
let mapped_sam = temp_dir.path().join("mapped.sam");
let output_bam = temp_dir.path().join("output.bam");
let ref_path = create_test_reference(temp_dir.path());
let unmapped_records = vec![
{
let mut b = SamBuilder::new();
b.read_name(b"read1")
.sequence(b"ACGTACGT")
.qualities(&[30; 8])
.flags(flags::UNMAPPED)
.add_string_tag(b"RX", b"AACCGGTT")
.add_string_tag(b"QX", b"IIIIIIII");
b.build()
},
{
let mut b = SamBuilder::new();
b.read_name(b"read2")
.sequence(b"TGCATGCA")
.qualities(&[30; 8])
.flags(flags::UNMAPPED)
.add_string_tag(b"RX", b"GGTTCCAA")
.add_string_tag(b"QX", b"IIIIIIII");
b.build()
},
];
create_unmapped_bam(&unmapped_bam, &unmapped_records);
let mapped_header = create_minimal_header("chr1", 10000);
let mapped_records = vec![
{
let mut b = SamBuilder::new();
b.read_name(b"read1")
.sequence(b"ACGTACGT")
.qualities(&[30; 8])
.ref_id(0)
.pos(99)
.mapq(60)
.cigar_ops(&[8 << 4]); b.build()
},
{
let mut b = SamBuilder::new();
b.read_name(b"read2")
.sequence(b"TGCATGCA")
.qualities(&[30; 8])
.ref_id(0)
.pos(199)
.mapq(60)
.cigar_ops(&[8 << 4]); b.build()
},
];
create_mapped_sam(&mapped_sam, &mapped_header, &mapped_records);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"zipper",
"--input",
mapped_sam.to_str().unwrap(),
"--unmapped",
unmapped_bam.to_str().unwrap(),
"--reference",
ref_path.to_str().unwrap(),
"--output",
output_bam.to_str().unwrap(),
"--compression-level",
"1",
])
.status()
.expect("Failed to run zipper command");
assert!(status.success(), "Zipper command failed");
assert!(output_bam.exists(), "Output BAM not created");
let mut reader = bam::io::Reader::new(fs::File::open(&output_bam).unwrap());
let header = reader.read_header().unwrap();
let records: Vec<RecordBuf> = reader.record_bufs(&header).map(|r| r.unwrap()).collect();
assert_eq!(records.len(), 2, "Should have 2 records in output");
let rx_tag = Tag::from(SamTag::RX);
for record in &records {
assert!(record.data().get(&rx_tag).is_some(), "Output record should have RX tag");
}
}
#[test]
fn test_zipper_tag_removal() {
let temp_dir = TempDir::new().unwrap();
let unmapped_bam = temp_dir.path().join("unmapped.bam");
let mapped_sam = temp_dir.path().join("mapped.sam");
let output_bam = temp_dir.path().join("output.bam");
let ref_path = create_test_reference(temp_dir.path());
let unmapped_records = vec![{
let mut b = SamBuilder::new();
b.read_name(b"read1")
.sequence(b"ACGTACGT")
.qualities(&[30; 8])
.flags(flags::UNMAPPED)
.add_string_tag(b"RX", b"AACCGGTT")
.add_string_tag(b"XY", b"REMOVE_ME");
b.build()
}];
create_unmapped_bam(&unmapped_bam, &unmapped_records);
let mapped_header = create_minimal_header("chr1", 10000);
let mapped_records = vec![{
let mut b = SamBuilder::new();
b.read_name(b"read1")
.sequence(b"ACGTACGT")
.qualities(&[30; 8])
.ref_id(0)
.pos(99)
.mapq(60)
.cigar_ops(&[8 << 4]); b.build()
}];
create_mapped_sam(&mapped_sam, &mapped_header, &mapped_records);
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"zipper",
"--input",
mapped_sam.to_str().unwrap(),
"--unmapped",
unmapped_bam.to_str().unwrap(),
"--reference",
ref_path.to_str().unwrap(),
"--output",
output_bam.to_str().unwrap(),
"--tags-to-remove",
"XY",
"--compression-level",
"1",
])
.status()
.expect("Failed to run zipper command");
assert!(status.success(), "Zipper command with --tags-to-remove failed");
let mut reader = bam::io::Reader::new(fs::File::open(&output_bam).unwrap());
let header = reader.read_header().unwrap();
let records: Vec<RecordBuf> = reader.record_bufs(&header).map(|r| r.unwrap()).collect();
assert_eq!(records.len(), 1);
let rx_tag = Tag::from(SamTag::RX);
let xy_tag = Tag::from(SamTag::new(b'X', b'Y'));
assert!(records[0].data().get(&rx_tag).is_some(), "RX tag should be present");
assert!(records[0].data().get(&xy_tag).is_none(), "XY tag should have been removed");
}
#[test]
fn test_zipper_missing_input() {
let temp_dir = TempDir::new().unwrap();
let output_bam = temp_dir.path().join("output.bam");
let ref_path = create_test_reference(temp_dir.path());
let missing_mapped = temp_dir.path().join("missing.mapped.sam");
let missing_unmapped = temp_dir.path().join("missing.unmapped.bam");
let status = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"zipper",
"--input",
missing_mapped.to_str().unwrap(),
"--unmapped",
missing_unmapped.to_str().unwrap(),
"--reference",
ref_path.to_str().unwrap(),
"--output",
output_bam.to_str().unwrap(),
])
.status()
.expect("Failed to run zipper command");
assert!(!status.success(), "Zipper should fail for nonexistent input");
}
#[test]
fn test_zipper_bam_mapped_input() {
let temp_dir = TempDir::new().unwrap();
let unmapped_bam = temp_dir.path().join("unmapped.bam");
let mapped_bam = temp_dir.path().join("mapped.bam");
let output_bam = temp_dir.path().join("output.bam");
let ref_path = create_test_reference(temp_dir.path());
let unmapped_records = vec![
{
let mut b = SamBuilder::new();
b.read_name(b"read1")
.sequence(b"ACGTACGT")
.qualities(&[30; 8])
.flags(flags::UNMAPPED)
.add_string_tag(b"RX", b"AACCGGTT")
.add_string_tag(b"QX", b"IIIIIIII");
b.build()
},
{
let mut b = SamBuilder::new();
b.read_name(b"read2")
.sequence(b"TGCATGCA")
.qualities(&[30; 8])
.flags(flags::UNMAPPED)
.add_string_tag(b"RX", b"GGTTCCAA")
.add_string_tag(b"QX", b"IIIIIIII");
b.build()
},
];
create_unmapped_bam(&unmapped_bam, &unmapped_records);
let mapped_header = create_minimal_header("chr1", 10000);
let mapped_records = vec![
{
let mut b = SamBuilder::new();
b.read_name(b"read1")
.sequence(b"ACGTACGT")
.qualities(&[30; 8])
.ref_id(0)
.pos(99)
.mapq(60)
.cigar_ops(&[8 << 4]); b.build()
},
{
let mut b = SamBuilder::new();
b.read_name(b"read2")
.sequence(b"TGCATGCA")
.qualities(&[30; 8])
.ref_id(0)
.pos(199)
.mapq(60)
.cigar_ops(&[8 << 4]); b.build()
},
];
create_mapped_bam(&mapped_bam, &mapped_header, &mapped_records);
let output = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"zipper",
"--input",
mapped_bam.to_str().unwrap(),
"--unmapped",
unmapped_bam.to_str().unwrap(),
"--reference",
ref_path.to_str().unwrap(),
"--output",
output_bam.to_str().unwrap(),
"--compression-level",
"1",
])
.output()
.expect("Failed to run zipper command");
assert!(
output.status.success(),
"Zipper command failed with BAM input: {}",
String::from_utf8_lossy(&output.stderr)
);
assert!(output_bam.exists(), "Output BAM not created");
let mut reader = bam::io::Reader::new(fs::File::open(&output_bam).unwrap());
let header = reader.read_header().unwrap();
let records: Vec<RecordBuf> = reader.record_bufs(&header).map(|r| r.unwrap()).collect();
assert_eq!(records.len(), 2, "Should have 2 records in output");
let rx_tag = Tag::from(SamTag::RX);
for record in &records {
assert!(record.data().get(&rx_tag).is_some(), "Output record should have RX tag");
}
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(stderr.contains("BAM input detected"), "Should warn about BAM input. stderr: {stderr}");
}
#[test]
fn test_zipper_bam_stdin_input() {
let temp_dir = TempDir::new().unwrap();
let unmapped_bam = temp_dir.path().join("unmapped.bam");
let mapped_bam = temp_dir.path().join("mapped.bam");
let output_bam = temp_dir.path().join("output.bam");
let ref_path = create_test_reference(temp_dir.path());
let unmapped_records = vec![
{
let mut b = SamBuilder::new();
b.read_name(b"read1")
.sequence(b"ACGTACGT")
.qualities(&[30; 8])
.flags(flags::UNMAPPED)
.add_string_tag(b"RX", b"AACCGGTT")
.add_string_tag(b"QX", b"IIIIIIII");
b.build()
},
{
let mut b = SamBuilder::new();
b.read_name(b"read2")
.sequence(b"TGCATGCA")
.qualities(&[30; 8])
.flags(flags::UNMAPPED)
.add_string_tag(b"RX", b"GGTTCCAA")
.add_string_tag(b"QX", b"IIIIIIII");
b.build()
},
];
create_unmapped_bam(&unmapped_bam, &unmapped_records);
let mapped_header = create_minimal_header("chr1", 10000);
let mapped_records = vec![
{
let mut b = SamBuilder::new();
b.read_name(b"read1")
.sequence(b"ACGTACGT")
.qualities(&[30; 8])
.ref_id(0)
.pos(99)
.mapq(60)
.cigar_ops(&[8 << 4]); b.build()
},
{
let mut b = SamBuilder::new();
b.read_name(b"read2")
.sequence(b"TGCATGCA")
.qualities(&[30; 8])
.ref_id(0)
.pos(199)
.mapq(60)
.cigar_ops(&[8 << 4]); b.build()
},
];
create_mapped_bam(&mapped_bam, &mapped_header, &mapped_records);
let bam_bytes = fs::read(&mapped_bam).expect("read mapped BAM bytes");
let mut child = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"zipper",
"--unmapped",
unmapped_bam.to_str().unwrap(),
"--reference",
ref_path.to_str().unwrap(),
"--output",
output_bam.to_str().unwrap(),
"--compression-level",
"1",
])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.expect("Failed to spawn zipper command");
child
.stdin
.as_mut()
.expect("Failed to open child stdin")
.write_all(&bam_bytes)
.expect("Failed to write BAM bytes to stdin");
let output = child.wait_with_output().expect("Failed to wait for zipper");
assert!(
output.status.success(),
"Zipper command failed with BAM on stdin: {}",
String::from_utf8_lossy(&output.stderr)
);
assert!(output_bam.exists(), "Output BAM not created");
let mut reader = bam::io::Reader::new(fs::File::open(&output_bam).unwrap());
let header = reader.read_header().unwrap();
let records: Vec<RecordBuf> = reader.record_bufs(&header).map(|r| r.unwrap()).collect();
assert_eq!(records.len(), 2, "Should have 2 records in output");
let rx_tag = Tag::from(SamTag::RX);
for record in &records {
assert!(record.data().get(&rx_tag).is_some(), "Output record should have RX tag");
}
}