use crate::{CurrRead, Error, F32Bw0and1, InputMods, InputWindowing, RequiredTag};
use rust_htslib::bam::Record;
use std::rc::Rc;
pub fn run<W, F, G, D>(
handle: &mut W,
bam_records: D,
window_options: InputWindowing,
mod_options: &InputMods<RequiredTag>,
window_function: F,
window_filter: G,
) -> Result<(), Error>
where
W: std::io::Write,
F: Fn(&[u8]) -> Result<F32Bw0and1, Error>,
G: Fn(&Vec<F32Bw0and1>) -> bool,
D: IntoIterator<Item = Result<Rc<Record>, rust_htslib::errors::Error>>,
{
for r in bam_records {
let record = r?;
let curr_read_state = CurrRead::default().try_from_only_alignment(&record)?;
let read_id = String::from(curr_read_state.read_id());
if match curr_read_state
.set_mod_data_restricted_options(&record, mod_options)?
.windowed_mod_data_restricted(
&window_function,
window_options,
mod_options.required_tag(),
)? {
v if !v.is_empty() => window_filter(&v),
_ => false,
} {
writeln!(handle, "{read_id}")?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{ModChar, ThresholdState, nanalogue_bam_reader};
use rust_htslib::bam::Read as _;
use std::num::NonZeroUsize;
fn run_find_modified_reads_test<F, G>(
bam_path: &str,
window_size: usize,
step_size: usize,
tag: char,
window_function: F,
window_filter: G,
) -> Result<Vec<String>, Error>
where
F: Fn(&[u8]) -> Result<F32Bw0and1, Error>,
G: Fn(&Vec<F32Bw0and1>) -> bool,
{
let mut reader = nanalogue_bam_reader(bam_path)?;
let records = reader.rc_records();
let window_options = InputWindowing {
win: NonZeroUsize::new(window_size).unwrap(),
step: NonZeroUsize::new(step_size).unwrap(),
};
let mod_options = InputMods::<RequiredTag> {
tag: RequiredTag {
tag: ModChar::new(tag),
},
mod_strand: None,
mod_prob_filter: ThresholdState::default(),
trim_read_ends_mod: 0,
base_qual_filter_mod: 0,
mod_region: None,
region_bed3: None,
};
let mut output = Vec::new();
run(
&mut output,
records,
window_options,
&mod_options,
window_function,
window_filter,
)?;
let output_str = String::from_utf8(output).expect("Invalid UTF-8 output");
let mut read_ids: Vec<String> = output_str.lines().map(ToString::to_string).collect();
read_ids.sort();
Ok(read_ids)
}
fn mean_window_function(mod_data: &[u8]) -> Result<F32Bw0and1, Error> {
if mod_data.is_empty() {
Ok(F32Bw0and1::zero())
} else {
let sum: u64 = mod_data.iter().map(|&x| u64::from(x)).sum();
let mean: u8 = u8::try_from(
sum.checked_div(u64::try_from(mod_data.len())?)
.expect("no error as we have checked len > 0"),
)?;
Ok(F32Bw0and1::from(mean))
}
}
#[test]
fn find_modified_reads_basic() -> Result<(), Error> {
let window_filter =
|windows: &Vec<F32Bw0and1>| -> bool { windows.iter().any(|&w| w.val() > 0.7) };
let read_ids = run_find_modified_reads_test(
"./examples/example_1.bam",
2,
1,
'T',
mean_window_function,
window_filter,
)?;
assert_eq!(read_ids, vec!["a4f36092-b4d5-47a9-813e-c22c3b477a0c"; 2]);
Ok(())
}
#[test]
fn find_modified_reads_reject_all() -> Result<(), Error> {
let window_filter = |_windows: &Vec<F32Bw0and1>| -> bool { false };
let read_ids = run_find_modified_reads_test(
"./examples/example_1.bam",
2,
1,
'T',
mean_window_function,
window_filter,
)?;
assert!(
read_ids.is_empty(),
"Expected no reads to pass the reject-all filter"
);
Ok(())
}
#[test]
fn find_modified_reads_accept_all() -> Result<(), Error> {
let window_filter = |windows: &Vec<F32Bw0and1>| -> bool { !windows.is_empty() };
let read_ids = run_find_modified_reads_test(
"./examples/example_1.bam",
2,
1,
'T',
mean_window_function,
window_filter,
)?;
assert_eq!(
read_ids,
vec![
"5d10eb9a-aae1-4db8-8ec6-7ebb34d32575",
"a4f36092-b4d5-47a9-813e-c22c3b477a0c",
"a4f36092-b4d5-47a9-813e-c22c3b477a0c",
"fffffff1-10d2-49cb-8ca3-e8d48979001b"
]
);
Ok(())
}
}