use clap::Parser;
use fgumi_lib::commands::command::Command;
use fgumi_lib::commands::fastq::Fastq;
use fgumi_raw_bam::{SamBuilder, flags};
use noodles::bam;
use noodles::sam::alignment::io::Write as AlignmentWrite;
use std::fs;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;
use tempfile::TempDir;
use crate::helpers::bam_generator::{create_minimal_header, to_record_buf};
fn create_paired_bam(path: &PathBuf, read_pairs: Vec<(&str, &str, &str, &str, &str, bool)>) {
let header = create_minimal_header("chr1", 10000);
let mut writer =
bam::io::Writer::new(fs::File::create(path).expect("Failed to create BAM file"));
writer.write_header(&header).expect("Failed to write header");
for (name, seq1, qual1, seq2, qual2, r2_reverse) in read_pairs {
let q1: Vec<u8> = qual1.bytes().map(|b| b - 33).collect();
let q2: Vec<u8> = qual2.bytes().map(|b| b - 33).collect();
let r1 = {
let mut b = SamBuilder::new();
b.read_name(name.as_bytes())
.sequence(seq1.as_bytes())
.qualities(&q1)
.flags(flags::PAIRED | flags::FIRST_SEGMENT)
.ref_id(0)
.pos(99)
.mapq(60);
b.build()
};
writer.write_alignment_record(&header, &to_record_buf(&r1)).expect("Failed to write R1");
let r2_flags =
flags::PAIRED | flags::LAST_SEGMENT | if r2_reverse { flags::REVERSE } else { 0 };
let r2 = {
let mut b = SamBuilder::new();
b.read_name(name.as_bytes())
.sequence(seq2.as_bytes())
.qualities(&q2)
.flags(r2_flags)
.ref_id(0)
.pos(199)
.mapq(60);
b.build()
};
writer.write_alignment_record(&header, &to_record_buf(&r2)).expect("Failed to write R2");
}
writer.try_finish().expect("Failed to finish BAM");
}
fn parse_fastq_records(path: &PathBuf) -> Vec<(String, String, String)> {
let file = fs::File::open(path).expect("Failed to open FASTQ");
let reader = BufReader::new(file);
let lines: Vec<String> = reader.lines().map(|l| l.unwrap()).collect();
let mut records = Vec::new();
for chunk in lines.chunks(4) {
if chunk.len() == 4 {
let name = chunk[0].trim_start_matches('@').to_string();
let seq = chunk[1].clone();
let qual = chunk[3].clone();
records.push((name, seq, qual));
}
}
records
}
#[test]
fn test_fastq_basic() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_paired_bam(
&input_bam,
vec![
("read1", "ACGTACGT", "IIIIIIII", "TGCATGCA", "IIIIIIII", false),
("read2", "AAAACCCC", "IIIIIIII", "GGGGTTTT", "IIIIIIII", false),
],
);
let cmd = Fastq::try_parse_from([
"fastq",
"-i",
input_bam.to_str().unwrap(),
"-o",
output_fq.to_str().unwrap(),
])
.expect("failed to parse fastq args");
cmd.execute("fgumi fastq").expect("fastq command failed");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 4, "Should have 4 FASTQ records (2 pairs)");
assert_eq!(records[0].0, "read1/1");
assert_eq!(records[1].0, "read1/2");
assert_eq!(records[2].0, "read2/1");
assert_eq!(records[3].0, "read2/2");
assert_eq!(records[0].1, "ACGTACGT");
assert_eq!(records[1].1, "TGCATGCA");
assert_eq!(records[2].1, "AAAACCCC");
assert_eq!(records[3].1, "GGGGTTTT");
}
#[test]
fn test_fastq_reverse_complement() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_paired_bam(
&input_bam,
vec![
("read1", "ACGTACGT", "IIIIIIII", "AAAA", "IIII", true),
],
);
let cmd = Fastq::try_parse_from([
"fastq",
"-i",
input_bam.to_str().unwrap(),
"-o",
output_fq.to_str().unwrap(),
])
.expect("failed to parse fastq args");
cmd.execute("fgumi fastq").expect("fastq command failed");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 2);
assert_eq!(records[0].1, "ACGTACGT");
assert_eq!(records[1].1, "TTTT", "R2 should be reverse complemented from AAAA to TTTT");
}
#[test]
fn test_fastq_no_suffix() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_paired_bam(&input_bam, vec![("read1", "ACGT", "IIII", "TGCA", "IIII", false)]);
let cmd = Fastq::try_parse_from([
"fastq",
"-i",
input_bam.to_str().unwrap(),
"-n",
"-o",
output_fq.to_str().unwrap(),
])
.expect("failed to parse fastq args");
cmd.execute("fgumi fastq").expect("fastq command failed");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 2);
assert_eq!(records[0].0, "read1");
assert_eq!(records[1].0, "read1");
}
#[test]
fn test_fastq_quality_encoding() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_paired_bam(&input_bam, vec![("read1", "ACGT", "!I?~", "TGCA", "IIII", false)]);
let cmd = Fastq::try_parse_from([
"fastq",
"-i",
input_bam.to_str().unwrap(),
"-o",
output_fq.to_str().unwrap(),
])
.expect("failed to parse fastq args");
cmd.execute("fgumi fastq").expect("fastq command failed");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 2);
assert_eq!(records[0].2, "!I?~", "Quality scores should be preserved");
}
fn create_bam_with_flags(path: &PathBuf) {
let header = create_minimal_header("chr1", 10000);
let file = fs::File::create(path).expect("Failed to create BAM file");
let mut writer = bam::io::Writer::new(file);
writer.write_header(&header).expect("Failed to write header");
let primary = {
let mut b = SamBuilder::new();
b.read_name(b"primary")
.sequence(b"ACGT")
.qualities(&[30, 30, 30, 30])
.ref_id(0)
.pos(99)
.mapq(60);
b.build()
};
writer
.write_alignment_record(&header, &to_record_buf(&primary))
.expect("Failed to write primary");
let secondary = {
let mut b = SamBuilder::new();
b.read_name(b"secondary")
.sequence(b"TGCA")
.qualities(&[30, 30, 30, 30])
.flags(flags::SECONDARY)
.ref_id(0)
.pos(99)
.mapq(60);
b.build()
};
writer
.write_alignment_record(&header, &to_record_buf(&secondary))
.expect("Failed to write secondary");
let supplementary = {
let mut b = SamBuilder::new();
b.read_name(b"supplementary")
.sequence(b"GGGG")
.qualities(&[30, 30, 30, 30])
.flags(flags::SUPPLEMENTARY)
.ref_id(0)
.pos(99)
.mapq(60);
b.build()
};
writer
.write_alignment_record(&header, &to_record_buf(&supplementary))
.expect("Failed to write supplementary");
writer.try_finish().expect("Failed to finish BAM");
}
#[test]
fn test_fastq_exclude_flags() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_bam_with_flags(&input_bam);
let cmd = Fastq::try_parse_from([
"fastq",
"-i",
input_bam.to_str().unwrap(),
"-o",
output_fq.to_str().unwrap(),
])
.expect("failed to parse fastq args");
cmd.execute("fgumi fastq").expect("fastq command failed");
let records = parse_fastq_records(&output_fq);
assert_eq!(
records.len(),
1,
"Should only have primary read (secondary and supplementary excluded)"
);
assert!(records[0].0.starts_with("primary"));
}
#[test]
fn test_fastq_multithreaded() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
let read_pairs: Vec<(&str, &str, &str, &str, &str, bool)> = (0..10)
.map(|i| {
let name: &'static str = Box::leak(format!("read{i}").into_boxed_str());
(name, "ACGTACGT", "IIIIIIII", "TGCATGCA", "IIIIIIII", false)
})
.collect();
create_paired_bam(&input_bam, read_pairs);
let cmd = Fastq::try_parse_from([
"fastq",
"-i",
input_bam.to_str().unwrap(),
"-@",
"4",
"-o",
output_fq.to_str().unwrap(),
])
.expect("failed to parse fastq args");
cmd.execute("fgumi fastq").expect("fastq command failed");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 20, "Should have 20 FASTQ records (10 pairs)");
}
#[test]
fn test_fastq_hex_flags() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_fq = temp_dir.path().join("output.fq");
create_paired_bam(&input_bam, vec![("read1", "ACGT", "IIII", "TGCA", "IIII", false)]);
let cmd = Fastq::try_parse_from([
"fastq",
"-i",
input_bam.to_str().unwrap(),
"-F",
"0x900", "-o",
output_fq.to_str().unwrap(),
])
.expect("failed to parse fastq args");
cmd.execute("fgumi fastq").expect("fastq command failed");
let records = parse_fastq_records(&output_fq);
assert_eq!(records.len(), 2);
}
#[test]
fn test_fastq_output_same_as_input_rejected() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
create_paired_bam(&input_bam, vec![("read1", "ACGT", "IIII", "TGCA", "IIII", false)]);
let input_size_before = std::fs::metadata(&input_bam).expect("stat input").len();
let cmd = Fastq::try_parse_from([
"fastq",
"-i",
input_bam.to_str().unwrap(),
"-o",
input_bam.to_str().unwrap(),
])
.expect("failed to parse fastq args");
let err =
cmd.execute("fgumi fastq").expect_err("execute must reject identical --input/--output");
assert!(err.to_string().contains("must differ"), "unexpected error message: {err}");
let input_size_after = std::fs::metadata(&input_bam).expect("stat input").len();
assert_eq!(
input_size_before, input_size_after,
"input BAM was truncated/clobbered by --output=--input"
);
}
#[cfg(unix)]
#[test]
fn test_fastq_output_symlink_to_input_rejected() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_bam = temp_dir.path().join("input.bam");
let output_link = temp_dir.path().join("output.bam");
create_paired_bam(&input_bam, vec![("read1", "ACGT", "IIII", "TGCA", "IIII", false)]);
std::os::unix::fs::symlink(&input_bam, &output_link).expect("create symlink");
let input_size_before = std::fs::metadata(&input_bam).expect("stat input").len();
let cmd = Fastq::try_parse_from([
"fastq",
"-i",
input_bam.to_str().unwrap(),
"-o",
output_link.to_str().unwrap(),
])
.expect("failed to parse fastq args");
let err =
cmd.execute("fgumi fastq").expect_err("execute must reject --output symlinked to --input");
assert!(err.to_string().contains("must differ"), "unexpected error message: {err}");
let input_size_after = std::fs::metadata(&input_bam).expect("stat input").len();
assert_eq!(
input_size_before, input_size_after,
"input BAM was truncated/clobbered through symlinked --output"
);
}