use crate::{AllowedAGCTN, CurrRead, Error, ModChar};
use rust_htslib::bam;
use std::collections::HashSet;
use std::io;
use std::rc::Rc;
pub fn run<W, D>(handle: &mut W, header: &bam::HeaderView, records: D) -> Result<(), Error>
where
W: io::Write,
D: Iterator<Item = Result<Rc<bam::Record>, rust_htslib::errors::Error>>,
{
writeln!(handle, "contigs_and_lengths:")?;
for tid in 0..header.target_count() {
let name = std::str::from_utf8(
header
.target_names()
.get(tid as usize)
.ok_or_else(|| Error::InvalidSeqLength(format!("tid {tid} out of bounds")))?,
)?;
let length = header.target_len(tid).ok_or_else(|| {
Error::InvalidSeqLength(format!("target_len returned None for tid {tid}"))
})?;
writeln!(handle, "{name}\t{length}")?;
}
writeln!(handle)?;
let mut modifications = HashSet::new();
for record_result in records {
let record = record_result?;
let curr_read = match CurrRead::try_from(record) {
Ok(cr) => cr,
Err(Error::ZeroSeqLen(_)) => continue,
Err(e) => return Err(e),
};
let base_mods = &curr_read.mod_data().0.base_mods;
for base_mod in base_mods {
let base = AllowedAGCTN::try_from(base_mod.modified_base)?;
let mod_char = ModChar::from(base_mod.modification_type);
let mod_string = format!("{}{}{}", base, base_mod.strand, mod_char);
let _: bool = modifications.insert(mod_string);
}
}
writeln!(handle, "modifications:")?;
if modifications.is_empty() {
writeln!(handle, "None")?;
} else {
let mut sorted_mods: Vec<_> = modifications.into_iter().collect();
sorted_mods.sort();
for mod_string in sorted_mods {
writeln!(handle, "{mod_string}")?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{BamRcRecords, InputBamBuilder, InputMods, OptionalTag, PathOrURLOrStdin};
use std::fs;
#[test]
fn peek_shows_example_1() {
let mut input_bam = InputBamBuilder::default()
.bam_path(PathOrURLOrStdin::Path("examples/example_1.bam".into()))
.build()
.expect("should build InputBam");
let mut reader =
bam::Reader::from_path("examples/example_1.bam").expect("should open BAM file");
let bam_rc_records = BamRcRecords::new(
&mut reader,
&mut input_bam,
&mut InputMods::<OptionalTag>::default(),
)
.expect("should create BamRcRecords");
let mut output = Vec::new();
run(
&mut output,
&bam_rc_records.header,
bam_rc_records.rc_records.take(100),
)
.expect("peek should succeed");
let expected = fs::read_to_string("examples/example_1_peek")
.expect("should read expected output file");
assert_eq!(
String::from_utf8(output).expect("output should be valid UTF-8"),
expected
);
}
#[test]
fn peek_shows_example_3() {
let mut input_bam = InputBamBuilder::default()
.bam_path(PathOrURLOrStdin::Path("examples/example_3.bam".into()))
.build()
.expect("should build InputBam");
let mut reader =
bam::Reader::from_path("examples/example_3.bam").expect("should open BAM file");
let bam_rc_records = BamRcRecords::new(
&mut reader,
&mut input_bam,
&mut InputMods::<OptionalTag>::default(),
)
.expect("should create BamRcRecords");
let mut output = Vec::new();
run(
&mut output,
&bam_rc_records.header,
bam_rc_records.rc_records.take(100),
)
.expect("peek should succeed");
let expected = fs::read_to_string("examples/example_3_peek")
.expect("should read expected output file");
assert_eq!(
String::from_utf8(output).expect("output should be valid UTF-8"),
expected
);
}
#[test]
fn peek_shows_simulated_example() {
use crate::simulate_mod_bam::{SimulationConfig, TempBamSimulation};
let config_json = r#"{
"contigs": {
"number": 1,
"len_range": [100, 100],
"repeated_seq": "ACGTACGTACGTACGT"
},
"reads": [{
"number": 20,
"mapq_range": [20, 30],
"base_qual_range": [20, 30],
"len_range": [0.8, 1.0],
"mods": [
{
"base": "C",
"is_strand_plus": true,
"mod_code": "m",
"win": [2],
"mod_range": [[0.7, 0.9]]
},
{
"base": "A",
"is_strand_plus": true,
"mod_code": "a",
"win": [3],
"mod_range": [[0.3, 0.5]]
},
{
"base": "T",
"is_strand_plus": false,
"mod_code": "t",
"win": [1],
"mod_range": [[0.5, 0.6]]
}
]
}]
}"#;
let config: SimulationConfig = serde_json::from_str(config_json).unwrap();
let sim = TempBamSimulation::new(config).unwrap();
let mut reader = bam::Reader::from_path(sim.bam_path()).unwrap();
let mut input_bam = InputBamBuilder::default()
.bam_path(PathOrURLOrStdin::Path(sim.bam_path().into()))
.build()
.expect("should build InputBam");
let bam_rc_records = BamRcRecords::new(
&mut reader,
&mut input_bam,
&mut InputMods::<OptionalTag>::default(),
)
.expect("should create BamRcRecords");
let mut output = Vec::new();
run(
&mut output,
&bam_rc_records.header,
bam_rc_records.rc_records.take(100),
)
.expect("peek should succeed");
let output_str = String::from_utf8(output).expect("output should be valid UTF-8");
let expected = "contigs_and_lengths:\ncontig_00000\t100\n\nmodifications:\nA+a\nC+m\nT-t\n";
assert_eq!(output_str, expected);
}
#[test]
fn peek_empty_file() {
use crate::simulate_mod_bam::{SimulationConfig, TempBamSimulation};
let config_json = r#"{
"contigs": {
"number": 1,
"len_range": [100, 100],
"repeated_seq": "ACGTACGTACGTACGT"
},
"reads": []
}"#;
let config: SimulationConfig = serde_json::from_str(config_json).unwrap();
let sim = TempBamSimulation::new(config).unwrap();
let mut reader = bam::Reader::from_path(sim.bam_path()).unwrap();
let mut input_bam = InputBamBuilder::default()
.bam_path(PathOrURLOrStdin::Path(sim.bam_path().into()))
.build()
.expect("should build InputBam");
let bam_rc_records = BamRcRecords::new(
&mut reader,
&mut input_bam,
&mut InputMods::<OptionalTag>::default(),
)
.expect("should create BamRcRecords");
let mut output = Vec::new();
run(
&mut output,
&bam_rc_records.header,
bam_rc_records.rc_records.take(100),
)
.expect("peek should succeed");
let output_str = String::from_utf8(output).expect("output should be valid UTF-8");
let expected = "contigs_and_lengths:\ncontig_00000\t100\n\nmodifications:\nNone\n";
assert_eq!(output_str, expected);
}
#[test]
fn peek_skips_zero_length_reads() {
use crate::nanalogue_bam_reader;
use rust_htslib::bam::Read as _;
let mut reader = nanalogue_bam_reader("./examples/example_2_zero_len.sam")
.expect("should open SAM file");
let header = reader.header().clone();
let mut output = Vec::new();
run(&mut output, &header, reader.rc_records().take(100)).expect("peek should succeed");
let output_str = String::from_utf8(output).expect("output should be valid UTF-8");
assert!(output_str.contains("contigs_and_lengths:"));
assert!(output_str.contains("modifications:"));
assert!(output_str.contains("dummyI"));
}
}