use crate::{CurrRead, Error, InputMods, OptionalTag, ThresholdState};
use rust_htslib::bam;
use std::iter;
use std::rc::Rc;
#[expect(clippy::missing_panics_doc, reason = "no error expected here")]
pub fn run<W, D>(
handle: &mut W,
bam_records: D,
mut mods: InputMods<OptionalTag>,
detailed: Option<bool>,
) -> Result<(), Error>
where
W: std::io::Write,
D: IntoIterator<Item = Result<Rc<bam::Record>, rust_htslib::errors::Error>>,
{
if detailed.is_none() {
match mods.mod_prob_filter {
ref mut v @ ThresholdState::GtEq(w) => *v = ThresholdState::GtEq(u8::max(128, w)),
ref mut v @ ThresholdState::InvertGtEqLtEq(w) => {
*v = ThresholdState::Both((128, w));
}
ref mut v @ ThresholdState::Both((w, x)) => {
*v = ThresholdState::Both((u8::max(128, w), x));
}
}
}
let mut is_first_record_written = vec![false].into_iter().chain(iter::repeat(true));
write!(handle, "[")?;
for k in bam_records {
let record = k?;
if is_first_record_written.next().expect("no error") {
writeln!(handle, ",")?;
} else {
writeln!(handle)?;
}
let curr_read = CurrRead::default()
.try_from_only_alignment(&record)?
.set_mod_data_restricted_options(&record, &mods)?;
write!(
handle,
"{}",
match detailed {
None => curr_read.to_string(),
Some(false) => serde_json::to_string(&curr_read)?,
Some(true) => serde_json::to_string_pretty(&curr_read)?,
}
)?;
}
writeln!(handle, "\n]")?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{InputModsBuilder, OrdPair, nanalogue_bam_reader};
use rust_htslib::bam::Read as _;
use serde_json::Value;
#[test]
fn run_with_example_2_zero_len() -> Result<(), Error> {
let mut reader = nanalogue_bam_reader("./examples/example_2_zero_len.sam")?;
let records: Vec<Result<Rc<bam::Record>, rust_htslib::errors::Error>> = reader
.rc_records()
.filter(|r| r.as_ref().map_or(true, |v| v.seq_len() > 0))
.collect();
let mut output_buffer = Vec::new();
run(
&mut output_buffer,
records.into_iter(),
InputMods::default(),
None,
)?;
let output_json = String::from_utf8(output_buffer)?;
let parsed: Value = serde_json::from_str(&output_json)?;
let expected = serde_json::json!([
{
"read_id": "read2",
"sequence_length": 48,
"contig": "dummyIII",
"reference_start": 23,
"reference_end": 71,
"alignment_length": 48,
"alignment_type": "primary_forward",
"mod_count": "NA"
}
]);
assert_eq!(parsed, expected);
Ok(())
}
#[test]
fn run_with_unmapped_filter() -> Result<(), Error> {
let mut reader = nanalogue_bam_reader("./examples/example_1.bam")?;
let records: Vec<Result<Rc<bam::Record>, rust_htslib::errors::Error>> = reader
.rc_records()
.filter(|r| r.as_ref().map_or(true, |v| v.flags() == 4))
.collect();
let mut output_buffer = Vec::new();
run(
&mut output_buffer,
records.into_iter(),
InputMods::default(),
None,
)?;
let output_json = String::from_utf8(output_buffer)?;
let parsed: Value = serde_json::from_str(&output_json)?;
let expected = serde_json::json!([
{
"read_id": "a4f36092-b4d5-47a9-813e-c22c3b477a0c",
"sequence_length": 48,
"alignment_type": "unmapped",
"mod_count": "G-7200:0;T+T:3;(probabilities >= 0.5020, PHRED base qual >= 0)"
}
]);
assert_eq!(parsed, expected);
Ok(())
}
#[test]
fn run_with_region_filter_dummy_i() -> Result<(), Error> {
let mut reader = nanalogue_bam_reader("./examples/example_1.bam")?;
let records: Vec<Result<Rc<bam::Record>, rust_htslib::errors::Error>> = reader
.rc_records()
.filter(|r| {
r.as_ref()
.map_or(true, |v| !v.is_unmapped() && v.tid() == 0)
})
.collect();
let mut output_buffer = Vec::new();
run(
&mut output_buffer,
records.into_iter(),
InputMods::default(),
None,
)?;
let output_json = String::from_utf8(output_buffer)?;
let parsed: Value = serde_json::from_str(&output_json)?;
let expected = serde_json::json!([
{
"read_id": "5d10eb9a-aae1-4db8-8ec6-7ebb34d32575",
"sequence_length": 8,
"contig": "dummyI",
"reference_start": 9,
"reference_end": 17,
"alignment_length": 8,
"alignment_type": "primary_forward",
"mod_count": "T+T:0;(probabilities >= 0.5020, PHRED base qual >= 0)"
}
]);
assert_eq!(parsed, expected);
Ok(())
}
fn run_example_6_test(
mod_options: InputMods<OptionalTag>,
expected: &Value,
) -> Result<(), Error> {
let mut reader = nanalogue_bam_reader("./examples/example_6.sam")?;
let records: Vec<Result<Rc<bam::Record>, rust_htslib::errors::Error>> =
reader.records().map(|r| r.map(Rc::new)).collect();
let mut output_buffer = Vec::new();
run(&mut output_buffer, records.into_iter(), mod_options, None)?;
let output_json = String::from_utf8(output_buffer)?;
let parsed: Value = serde_json::from_str(&output_json)?;
assert_eq!(parsed, *expected);
Ok(())
}
#[test]
fn run_with_example_6() -> Result<(), Error> {
run_example_6_test(
InputMods::default(),
&serde_json::json!([
{
"read_id": "5d10eb9a-aae1-4db8-8ec6-7ebb34d32575",
"sequence_length": 8,
"contig": "dummyI",
"reference_start": 9,
"reference_end": 17,
"alignment_length": 8,
"alignment_type": "primary_forward",
"mod_count": "NA"
},
{
"read_id": "fffffff1-10d2-49cb-8ca3-e8d48979001b",
"sequence_length": 33,
"contig": "dummyII",
"reference_start": 3,
"reference_end": 36,
"alignment_length": 33,
"alignment_type": "primary_reverse",
"mod_count": "T+T:1;(probabilities >= 0.5020, PHRED base qual >= 0)"
}
]),
)
}
#[test]
fn run_with_example_6_aggressive_filtering_1() -> Result<(), Error> {
let mod_options = InputModsBuilder::<OptionalTag>::default()
.mod_prob_filter(ThresholdState::GtEq(255))
.build()?;
run_example_6_test(
mod_options,
&serde_json::json!([
{
"read_id": "5d10eb9a-aae1-4db8-8ec6-7ebb34d32575",
"sequence_length": 8,
"contig": "dummyI",
"reference_start": 9,
"reference_end": 17,
"alignment_length": 8,
"alignment_type": "primary_forward",
"mod_count": "NA"
},
{
"read_id": "fffffff1-10d2-49cb-8ca3-e8d48979001b",
"sequence_length": 33,
"contig": "dummyII",
"reference_start": 3,
"reference_end": 36,
"alignment_length": 33,
"alignment_type": "primary_reverse",
"mod_count": "T+T:0;(probabilities >= 1.0000, PHRED base qual >= 0)"
}
]),
)
}
#[test]
fn run_with_example_6_aggressive_filtering_2() -> Result<(), Error> {
let mod_options = InputModsBuilder::<OptionalTag>::default()
.mod_prob_filter(ThresholdState::Both((
100,
OrdPair::new(200, 220).expect("no error"),
)))
.build()?;
run_example_6_test(
mod_options,
&serde_json::json!([
{
"read_id": "5d10eb9a-aae1-4db8-8ec6-7ebb34d32575",
"sequence_length": 8,
"contig": "dummyI",
"reference_start": 9,
"reference_end": 17,
"alignment_length": 8,
"alignment_type": "primary_forward",
"mod_count": "NA"
},
{
"read_id": "fffffff1-10d2-49cb-8ca3-e8d48979001b",
"sequence_length": 33,
"contig": "dummyII",
"reference_start": 3,
"reference_end": 36,
"alignment_length": 33,
"alignment_type": "primary_reverse",
"mod_count": "T+T:1;(probabilities >= 0.5020 and (probabilities < 0.7843 or > 0.8627), PHRED base qual >= 0)"
}
]),
)
}
#[test]
fn run_with_example_6_aggressive_filtering_3() -> Result<(), Error> {
let mod_options = InputModsBuilder::<OptionalTag>::default()
.mod_prob_filter(ThresholdState::InvertGtEqLtEq(
OrdPair::new(100, 110).expect("no error"),
))
.build()?;
run_example_6_test(
mod_options,
&serde_json::json!([
{
"read_id": "5d10eb9a-aae1-4db8-8ec6-7ebb34d32575",
"sequence_length": 8,
"contig": "dummyI",
"reference_start": 9,
"reference_end": 17,
"alignment_length": 8,
"alignment_type": "primary_forward",
"mod_count": "NA"
},
{
"read_id": "fffffff1-10d2-49cb-8ca3-e8d48979001b",
"sequence_length": 33,
"contig": "dummyII",
"reference_start": 3,
"reference_end": 36,
"alignment_length": 33,
"alignment_type": "primary_reverse",
"mod_count": "T+T:1;(probabilities >= 0.5020 and (probabilities < 0.3922 or > 0.4314), PHRED base qual >= 0)"
}
]),
)
}
#[test]
fn run_with_detailed_pretty_print() -> Result<(), Error> {
let mut reader = nanalogue_bam_reader("./examples/example_1.bam")?;
let records: Vec<Result<Rc<bam::Record>, rust_htslib::errors::Error>> =
reader.rc_records().collect();
let mut output_buffer = Vec::new();
run(
&mut output_buffer,
records.into_iter(),
InputMods::default(),
Some(true),
)?;
let output_json = String::from_utf8(output_buffer)?;
let expected_output =
std::fs::read_to_string("./examples/example_1_detailed_pretty_print")?;
let parsed_output: Value = serde_json::from_str(&output_json)?;
let parsed_expected: Value = serde_json::from_str(&expected_output)?;
assert_eq!(
parsed_output, parsed_expected,
"Pretty-printed detailed output should match expected file"
);
Ok(())
}
#[test]
fn run_with_detailed_compact_print() -> Result<(), Error> {
let mut reader = nanalogue_bam_reader("./examples/example_1.bam")?;
let records: Vec<Result<Rc<bam::Record>, rust_htslib::errors::Error>> =
reader.rc_records().collect();
let mut output_buffer = Vec::new();
run(
&mut output_buffer,
records.into_iter(),
InputMods::default(),
Some(false),
)?;
let output_json = String::from_utf8(output_buffer)?;
let expected_output = std::fs::read_to_string("./examples/example_1_detailed_print")?;
let parsed_output: Value = serde_json::from_str(&output_json)?;
let parsed_expected: Value = serde_json::from_str(&expected_output)?;
assert_eq!(
parsed_output, parsed_expected,
"Compact detailed output should match expected file"
);
Ok(())
}
}