use fgumi_raw_bam::{SamBuilder, flags};
use noodles::bam;
use noodles::sam::alignment::io::Write as AlignmentWrite;
use std::fs;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;
use std::process::Command;
use tempfile::TempDir;
use crate::helpers::bam_generator::{create_minimal_header, to_record_buf};
fn create_paired_bam(path: &PathBuf, read_pairs: Vec<(&str, &str, &str, &str, &str, bool)>) {
let header = create_minimal_header("chr1", 10000);
let mut writer =
bam::io::Writer::new(fs::File::create(path).expect("Failed to create BAM file"));
writer.write_header(&header).expect("Failed to write header");
for (name, seq1, qual1, seq2, qual2, r2_reverse) in read_pairs {
let q1: Vec<u8> = qual1.bytes().map(|b| b - 33).collect();
let q2: Vec<u8> = qual2.bytes().map(|b| b - 33).collect();
let r1 = {
let mut b = SamBuilder::new();
b.read_name(name.as_bytes())
.sequence(seq1.as_bytes())
.qualities(&q1)
.flags(flags::PAIRED | flags::FIRST_SEGMENT)
.ref_id(0)
.pos(99)
.mapq(60);
b.build()
};
writer.write_alignment_record(&header, &to_record_buf(&r1)).expect("Failed to write R1");
let r2_flags =
flags::PAIRED | flags::LAST_SEGMENT | if r2_reverse { flags::REVERSE } else { 0 };
let r2 = {
let mut b = SamBuilder::new();
b.read_name(name.as_bytes())
.sequence(seq2.as_bytes())
.qualities(&q2)
.flags(r2_flags)
.ref_id(0)
.pos(199)
.mapq(60);
b.build()
};
writer.write_alignment_record(&header, &to_record_buf(&r2)).expect("Failed to write R2");
}
writer.try_finish().expect("Failed to finish BAM");
}
fn parse_fastq_records(path: &PathBuf) -> Vec<(String, String, String)> {
let file = fs::File::open(path).expect("Failed to open FASTQ");
let reader = BufReader::new(file);
let lines: Vec<String> = reader.lines().map(|l| l.unwrap()).collect();
let mut records = Vec::new();
for chunk in lines.chunks(4) {
if chunk.len() == 4 {
let name = chunk[0].trim_start_matches('@').to_string();
let seq = chunk[1].clone();
let qual = chunk[3].clone();
records.push((name, seq, qual));
}
}
records
}
#[test]
fn test_fastq_basic() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_paired_bam(
&input_bam,
vec![
("read1", "ACGTACGT", "IIIIIIII", "TGCATGCA", "IIIIIIII", false),
("read2", "AAAACCCC", "IIIIIIII", "GGGGTTTT", "IIIIIIII", false),
],
);
let output = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args(["fastq", "-i", input_bam.to_str().unwrap()])
.output()
.expect("Failed to run fastq command");
assert!(
output.status.success(),
"Fastq command failed: {}",
String::from_utf8_lossy(&output.stderr)
);
fs::write(&output_fq, &output.stdout).expect("Failed to write output");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 4, "Should have 4 FASTQ records (2 pairs)");
assert_eq!(records[0].0, "read1/1");
assert_eq!(records[1].0, "read1/2");
assert_eq!(records[2].0, "read2/1");
assert_eq!(records[3].0, "read2/2");
assert_eq!(records[0].1, "ACGTACGT");
assert_eq!(records[1].1, "TGCATGCA");
assert_eq!(records[2].1, "AAAACCCC");
assert_eq!(records[3].1, "GGGGTTTT");
}
#[test]
fn test_fastq_reverse_complement() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_paired_bam(
&input_bam,
vec![
("read1", "ACGTACGT", "IIIIIIII", "AAAA", "IIII", true),
],
);
let output = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args(["fastq", "-i", input_bam.to_str().unwrap()])
.output()
.expect("Failed to run fastq command");
assert!(
output.status.success(),
"Fastq command failed: {}",
String::from_utf8_lossy(&output.stderr)
);
fs::write(&output_fq, &output.stdout).expect("Failed to write output");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 2);
assert_eq!(records[0].1, "ACGTACGT");
assert_eq!(records[1].1, "TTTT", "R2 should be reverse complemented from AAAA to TTTT");
}
#[test]
fn test_fastq_no_suffix() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_paired_bam(&input_bam, vec![("read1", "ACGT", "IIII", "TGCA", "IIII", false)]);
let output = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"fastq",
"-i",
input_bam.to_str().unwrap(),
"-n", ])
.output()
.expect("Failed to run fastq command");
assert!(output.status.success());
fs::write(&output_fq, &output.stdout).expect("Failed to write output");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 2);
assert_eq!(records[0].0, "read1");
assert_eq!(records[1].0, "read1");
}
#[test]
fn test_fastq_quality_encoding() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_paired_bam(&input_bam, vec![("read1", "ACGT", "!I?~", "TGCA", "IIII", false)]);
let output = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args(["fastq", "-i", input_bam.to_str().unwrap()])
.output()
.expect("Failed to run fastq command");
assert!(output.status.success());
fs::write(&output_fq, &output.stdout).expect("Failed to write output");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 2);
assert_eq!(records[0].2, "!I?~", "Quality scores should be preserved");
}
fn create_bam_with_flags(path: &PathBuf) {
let header = create_minimal_header("chr1", 10000);
let file = fs::File::create(path).expect("Failed to create BAM file");
let mut writer = bam::io::Writer::new(file);
writer.write_header(&header).expect("Failed to write header");
let primary = {
let mut b = SamBuilder::new();
b.read_name(b"primary")
.sequence(b"ACGT")
.qualities(&[30, 30, 30, 30])
.ref_id(0)
.pos(99)
.mapq(60);
b.build()
};
writer
.write_alignment_record(&header, &to_record_buf(&primary))
.expect("Failed to write primary");
let secondary = {
let mut b = SamBuilder::new();
b.read_name(b"secondary")
.sequence(b"TGCA")
.qualities(&[30, 30, 30, 30])
.flags(flags::SECONDARY)
.ref_id(0)
.pos(99)
.mapq(60);
b.build()
};
writer
.write_alignment_record(&header, &to_record_buf(&secondary))
.expect("Failed to write secondary");
let supplementary = {
let mut b = SamBuilder::new();
b.read_name(b"supplementary")
.sequence(b"GGGG")
.qualities(&[30, 30, 30, 30])
.flags(flags::SUPPLEMENTARY)
.ref_id(0)
.pos(99)
.mapq(60);
b.build()
};
writer
.write_alignment_record(&header, &to_record_buf(&supplementary))
.expect("Failed to write supplementary");
writer.try_finish().expect("Failed to finish BAM");
}
#[test]
fn test_fastq_exclude_flags() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_bam_with_flags(&input_bam);
let output = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args(["fastq", "-i", input_bam.to_str().unwrap()])
.output()
.expect("Failed to run fastq command");
assert!(
output.status.success(),
"Fastq command failed: {}",
String::from_utf8_lossy(&output.stderr)
);
fs::write(&output_fq, &output.stdout).expect("Failed to write output");
let records = parse_fastq_records(&output_fq);
assert_eq!(
records.len(),
1,
"Should only have primary read (secondary and supplementary excluded)"
);
assert!(records[0].0.starts_with("primary"));
}
#[test]
fn test_fastq_multithreaded() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
let read_pairs: Vec<(&str, &str, &str, &str, &str, bool)> = (0..10)
.map(|i| {
let name: &'static str = Box::leak(format!("read{i}").into_boxed_str());
(name, "ACGTACGT", "IIIIIIII", "TGCATGCA", "IIIIIIII", false)
})
.collect();
create_paired_bam(&input_bam, read_pairs);
let output = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args(["fastq", "-i", input_bam.to_str().unwrap(), "-@", "4"])
.output()
.expect("Failed to run fastq command");
assert!(
output.status.success(),
"Multithreaded fastq failed: {}",
String::from_utf8_lossy(&output.stderr)
);
fs::write(&output_fq, &output.stdout).expect("Failed to write output");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 20, "Should have 20 FASTQ records (10 pairs)");
}
#[test]
fn test_fastq_hex_flags() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_paired_bam(&input_bam, vec![("read1", "ACGT", "IIII", "TGCA", "IIII", false)]);
let output = Command::new(env!("CARGO_BIN_EXE_fgumi"))
.args([
"fastq",
"-i",
input_bam.to_str().unwrap(),
"-F",
"0x900", ])
.output()
.expect("Failed to run fastq command");
assert!(output.status.success());
fs::write(&output_fq, &output.stdout).expect("Failed to write output");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 2);
}