use anyhow::Result;
use bio::io::fasta::IndexedReader;
use log::*;
use perbase_lib::{
par_granges::{self, RegionProcessor},
position::{Position, mate_fix::MateResolutionStrategy, pileup_position::PileupPosition},
read_filter::{DefaultReadFilter, ReadFilter},
reference, utils,
};
use rust_htslib::{bam, bam::Read};
use std::{convert::TryInto, path::PathBuf};
use structopt::{StructOpt, clap::AppSettings};
#[derive(StructOpt)]
#[structopt(author, setting = AppSettings::ArgRequiredElseHelp)]
pub struct BaseDepth {
reads: PathBuf,
#[structopt(long, short = "r")]
ref_fasta: Option<PathBuf>,
#[structopt(long, short = "b")]
bed_file: Option<PathBuf>,
#[structopt(long, short = "B")]
bcf_file: Option<PathBuf>,
#[structopt(long, short = "o")]
output: Option<PathBuf>,
#[structopt(long, short = "Z")]
bgzip: bool,
#[structopt(long, short = "t", default_value = utils::NUM_CPU.as_str())]
threads: usize,
#[structopt(long, short = "T", default_value = "4")]
compression_threads: usize,
#[structopt(long, short = "L", default_value = "2")]
compression_level: u32,
#[structopt(long, short = "c", default_value=par_granges::CHUNKSIZE_STR.as_str())]
chunksize: u32,
#[structopt(long, short = "C", default_value = "0.15")]
channel_size_modifier: f64,
#[structopt(long, short = "f", default_value = "0")]
include_flags: u16,
#[structopt(long, short = "F", default_value = "0")]
exclude_flags: u16,
#[structopt(long, short = "m")]
mate_fix: bool,
#[structopt(long, default_value = "original")]
mate_resolution_strategy: MateResolutionStrategy,
#[structopt(long, short = "k")]
keep_zeros: bool,
#[structopt(long, short = "M")]
skip_merging_intervals: bool,
#[structopt(long, short = "q", default_value = "0")]
min_mapq: u8,
#[structopt(long, short = "Q")]
min_base_quality_score: Option<u8>,
#[structopt(long, short = "z")]
zero_base: bool,
#[structopt(long, default_value = "10")]
ref_cache_size: usize,
#[structopt(long, short = "D", default_value = "100000")]
max_depth: u32,
}
impl BaseDepth {
pub fn run(self) -> Result<()> {
info!("Running base-depth on: {:?}", self.reads);
let cpus = utils::determine_allowed_cpus(self.threads)?;
let mut writer = utils::get_writer(
&self.output,
self.bgzip,
true,
self.compression_threads,
self.compression_level,
)?;
let read_filter =
DefaultReadFilter::new(self.include_flags, self.exclude_flags, self.min_mapq);
let base_processor = BaseProcessor::new(
self.reads.clone(),
self.ref_fasta.clone(),
self.mate_fix,
self.mate_resolution_strategy,
self.keep_zeros,
if self.zero_base { 0 } else { 1 },
read_filter,
self.max_depth,
self.ref_cache_size,
self.min_base_quality_score,
);
let par_granges_runner = par_granges::ParGranges::new(
self.reads.clone(),
self.ref_fasta.clone(),
self.bed_file.clone(),
self.bcf_file.clone(),
!self.skip_merging_intervals,
Some(cpus),
Some(self.chunksize),
Some(self.channel_size_modifier),
base_processor,
);
let receiver = par_granges_runner.process()?;
for pos in receiver.into_iter() {
writer.serialize(pos)?
}
writer.flush()?;
Ok(())
}
}
struct BaseProcessor<F: ReadFilter> {
reads: PathBuf,
ref_fasta: Option<PathBuf>,
ref_buffer: Option<reference::Buffer>,
mate_fix: bool,
mate_fix_strategy: MateResolutionStrategy,
keep_zeros: bool,
coord_base: u32,
read_filter: F,
max_depth: u32,
max_depth_warnings_cutoff: u32,
min_base_quality_score: Option<u8>,
}
impl<F: ReadFilter> BaseProcessor<F> {
#[allow(clippy::too_many_arguments)]
fn new(
reads: PathBuf,
ref_fasta: Option<PathBuf>,
mate_fix: bool,
mate_fix_strategy: MateResolutionStrategy,
keep_zeros: bool,
coord_base: u32,
read_filter: F,
max_depth: u32,
ref_buffer_capacity: usize,
min_base_quality_score: Option<u8>,
) -> Self {
let ref_buffer = ref_fasta.as_ref().map(|ref_fasta| {
reference::Buffer::new(
IndexedReader::from_file(ref_fasta).expect("Reading Indexed FASTA"),
ref_buffer_capacity,
)
});
Self {
reads,
ref_fasta,
ref_buffer,
mate_fix,
mate_fix_strategy,
keep_zeros,
coord_base,
read_filter,
max_depth,
max_depth_warnings_cutoff: max_depth - (max_depth as f64 * 0.01) as u32,
min_base_quality_score,
}
}
}
impl<F: ReadFilter> RegionProcessor for BaseProcessor<F> {
type P = PileupPosition;
fn process_region(&self, tid: u32, start: u32, stop: u32) -> Vec<PileupPosition> {
trace!("Processing region {}(tid):{}-{}", tid, start, stop);
let mut reader =
bam::IndexedReader::from_path(&self.reads).expect("Indexed Reader for region");
if let Some(ref_fasta) = &self.ref_fasta {
reader.set_reference(ref_fasta).expect("Set ref");
}
let header = reader.header().to_owned();
reader.fetch((tid, start, stop)).expect("Fetched a region");
let mut pileup = reader.pileup();
pileup.set_max_depth(std::cmp::min(i32::MAX.try_into().unwrap(), self.max_depth));
let result: Vec<PileupPosition> = pileup
.flat_map(|p| {
let pileup = p.expect("Extracted a pileup");
let pileup_depth = pileup.depth();
if pileup.pos() >= start && pileup.pos() < stop {
let mut pos = if self.mate_fix {
PileupPosition::from_pileup_mate_aware(
pileup,
&header,
&self.read_filter,
self.min_base_quality_score,
self.mate_fix_strategy,
)
} else {
PileupPosition::from_pileup(
pileup,
&header,
&self.read_filter,
self.min_base_quality_score,
)
};
if let Some(buffer) = &self.ref_buffer {
let seq = buffer
.seq(&pos.ref_seq)
.expect("Fetched reference sequence");
pos.ref_base = Some(char::from(
*seq.get(pos.pos as usize)
.expect("Input SAM does not match reference"),
));
}
if pileup_depth > (self.max_depth_warnings_cutoff) {
pos.near_max_depth = true;
}
pos.pos += self.coord_base;
Some(pos)
} else {
None
}
})
.collect();
if self.keep_zeros {
let mut new_result = vec![];
let name = PileupPosition::compact_refseq(&header, tid);
let mut pos = start;
for position in result.into_iter() {
while pos < (position.pos - self.coord_base) {
new_result.push(PileupPosition::new(name.clone(), pos + self.coord_base));
pos += 1;
}
new_result.push(position);
pos += 1;
}
while pos < stop {
new_result.push(PileupPosition::new(name.clone(), pos + self.coord_base));
pos += 1;
}
new_result
} else {
result
}
}
}
#[cfg(test)]
#[allow(unused)]
mod tests {
use super::*;
use perbase_lib::position::{Position, pileup_position::PileupPosition};
use proptest::bits::u8;
use rstest::*;
use rust_htslib::{bam, bam::record::Record};
use smartstring::alias::*;
use std::{collections::HashMap, path::PathBuf};
use tempfile::{TempDir, tempdir};
#[fixture]
fn read_filter() -> DefaultReadFilter {
DefaultReadFilter::new(0, 512, 0)
}
#[fixture]
fn bamfile() -> (PathBuf, TempDir) {
let tempdir = tempdir().unwrap();
let path = tempdir.path().join("test.bam");
let mut header = bam::header::Header::new();
let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
chr1.push_tag(b"SN", &"chr1".to_owned());
chr1.push_tag(b"LN", &"100".to_owned());
let mut chr2 = bam::header::HeaderRecord::new(b"SQ");
chr2.push_tag(b"SN", &"chr2".to_owned());
chr2.push_tag(b"LN", &"100".to_owned());
header.push_record(&chr1);
header.push_record(&chr2);
let view = bam::HeaderView::from_header(&header);
let records = vec![
Record::from_sam(&view, b"ONE\t67\tchr1\t1\t40\t25M\tchr1\t50\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"TWO\t67\tchr1\t5\t40\t25M\tchr1\t55\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"THREE\t67\tchr1\t10\t40\t25M\tchr1\t60\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FOUR\t67\tchr1\t15\t40\t25M\tchr1\t65\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t67\tchr1\t20\t40\t25M\tchr1\t70\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"ONE\t147\tchr1\t50\t40\t25M\tchr1\t1\t75\tTTTTTTTTTTTTTTTTTTTTTTTTT\t#########################").unwrap(),
Record::from_sam(&view, b"TWO\t147\tchr1\t55\t40\t25M\tchr1\t5\t75\tGGGGGGGGGGGGGGGGGGGGGGGGG\t#########################").unwrap(),
Record::from_sam(&view, b"THREE\t147\tchr1\t60\t40\t25M\tchr1\t10\t75\tCCCCCCCCCCCCCCCCCCCCCCCCC\t#########################").unwrap(),
Record::from_sam(&view, b"FOUR\t147\tchr1\t65\t40\t25M\tchr1\t15\t75\tNNNNNNNNNNNNNNNNNNNNNNNNN\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t147\tchr1\t70\t40\t25M\tchr1\t20\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"ONE\t67\tchr2\t1\t40\t2M2I21M\tchr2\t50\t75\tAAGGGAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"TWO\t67\tchr2\t5\t40\t2M5D23M\tchr2\t55\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"THREE\t67\tchr2\t10\t40\t3M5N22M\tchr2\t60\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FOUR\t67\tchr2\t15\t40\t25M\tchr2\t65\t75\tATAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t67\tchr2\t20\t40\t25M\tchr2\t35\t40\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t147\tchr2\t35\t40\t25M\tchr2\t20\t40\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"ONE\t147\tchr2\t50\t40\t25M\tchr2\t1\t75\tAAAAAAAAAAAAAAAAAAAAAYAAA\t#########################").unwrap(),
Record::from_sam(&view, b"TWO\t147\tchr2\t55\t40\t25M\tchr2\t5\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"THREE\t147\tchr2\t60\t40\t25M\tchr2\t10\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FOUR\t659\tchr2\t65\t40\t25M\tchr2\t15\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
];
let mut writer =
bam::Writer::from_path(&path, &header, bam::Format::Bam).expect("Created writer");
for record in records.iter() {
writer.write(record).expect("Wrote record");
}
drop(writer); bam::index::build(&path, None, bam::index::Type::Bai, 1).unwrap();
(path, tempdir)
}
#[fixture]
fn non_mate_aware_positions(
bamfile: (PathBuf, TempDir),
read_filter: DefaultReadFilter,
) -> HashMap<String, Vec<PileupPosition>> {
let cpus = utils::determine_allowed_cpus(8).unwrap();
let base_processor = BaseProcessor::new(
bamfile.0.clone(),
None,
false,
MateResolutionStrategy::Original,
false,
1,
read_filter,
500_000,
cpus,
None,
);
let par_granges_runner = par_granges::ParGranges::new(
bamfile.0,
None,
None,
None,
true,
Some(cpus),
None,
Some(0.001),
base_processor,
);
let mut positions = HashMap::new();
par_granges_runner
.process()
.unwrap()
.into_iter()
.for_each(|p| {
let pos = positions.entry(p.ref_seq.clone()).or_insert_with(Vec::new);
pos.push(p)
});
positions
}
#[fixture]
fn non_mate_aware_keep_zeros_positions(
bamfile: (PathBuf, TempDir),
read_filter: DefaultReadFilter,
) -> HashMap<String, Vec<PileupPosition>> {
let cpus = utils::determine_allowed_cpus(8).unwrap();
let base_processor = BaseProcessor::new(
bamfile.0.clone(),
None,
false,
MateResolutionStrategy::Original,
true, 1,
read_filter,
500_000,
cpus,
None,
);
let par_granges_runner = par_granges::ParGranges::new(
bamfile.0,
None,
None,
None,
true,
Some(cpus),
None,
Some(0.001),
base_processor,
);
let mut positions = HashMap::new();
par_granges_runner
.process()
.unwrap()
.into_iter()
.for_each(|p| {
let pos = positions.entry(p.ref_seq.clone()).or_insert_with(Vec::new);
pos.push(p)
});
positions
}
#[fixture]
fn mate_aware_positions(
bamfile: (PathBuf, TempDir),
read_filter: DefaultReadFilter,
) -> HashMap<String, Vec<PileupPosition>> {
let cpus = utils::determine_allowed_cpus(8).unwrap();
let base_processor = BaseProcessor::new(
bamfile.0.clone(),
None,
true, MateResolutionStrategy::Original,
false,
1,
read_filter,
500_000,
cpus,
None,
);
let par_granges_runner = par_granges::ParGranges::new(
bamfile.0,
None,
None,
None,
true,
Some(cpus),
None,
Some(0.001),
base_processor,
);
let mut positions = HashMap::new();
par_granges_runner
.process()
.unwrap()
.into_iter()
.for_each(|p| {
let pos = positions.entry(p.ref_seq.clone()).or_insert_with(Vec::new);
pos.push(p)
});
positions
}
#[fixture]
fn mate_aware_keep_zeros_positions(
bamfile: (PathBuf, TempDir),
read_filter: DefaultReadFilter,
) -> HashMap<String, Vec<PileupPosition>> {
let cpus = utils::determine_allowed_cpus(8).unwrap();
let base_processor = BaseProcessor::new(
bamfile.0.clone(),
None,
false, MateResolutionStrategy::Original,
true, 1,
read_filter,
500_000,
cpus,
None,
);
let par_granges_runner = par_granges::ParGranges::new(
bamfile.0,
None,
None,
None,
true,
Some(cpus),
None,
Some(0.001),
base_processor,
);
let mut positions = HashMap::new();
par_granges_runner
.process()
.unwrap()
.into_iter()
.for_each(|p| {
let pos = positions.entry(p.ref_seq.clone()).or_insert_with(Vec::new);
pos.push(p)
});
positions
}
fn non_mate_aware_positions_base_qual(
bamfile: (PathBuf, TempDir),
read_filter: DefaultReadFilter,
base_quality: u8,
) -> HashMap<String, Vec<PileupPosition>> {
let cpus = utils::determine_allowed_cpus(8).unwrap();
let base_processor = BaseProcessor::new(
bamfile.0.clone(),
None,
false,
MateResolutionStrategy::Original,
false,
1,
read_filter,
500_000,
cpus,
Some(base_quality),
);
let par_granges_runner = par_granges::ParGranges::new(
bamfile.0,
None,
None,
None,
true,
Some(cpus),
None,
Some(0.001),
base_processor,
);
let mut positions = HashMap::new();
par_granges_runner
.process()
.unwrap()
.into_iter()
.for_each(|p| {
let pos = positions.entry(p.ref_seq.clone()).or_insert_with(Vec::new);
pos.push(p)
});
positions
}
fn mate_aware_positions_base_qual(
bamfile: (PathBuf, TempDir),
read_filter: DefaultReadFilter,
base_quality: u8,
) -> HashMap<String, Vec<PileupPosition>> {
let cpus = utils::determine_allowed_cpus(8).unwrap();
let base_processor = BaseProcessor::new(
bamfile.0.clone(),
None,
true, MateResolutionStrategy::Original,
false,
1,
read_filter,
500_000,
cpus,
Some(base_quality),
);
let par_granges_runner = par_granges::ParGranges::new(
bamfile.0,
None,
None,
None,
true,
Some(cpus),
None,
Some(0.001),
base_processor,
);
let mut positions = HashMap::new();
par_granges_runner
.process()
.unwrap()
.into_iter()
.for_each(|p| {
let pos = positions.entry(p.ref_seq.clone()).or_insert_with(Vec::new);
pos.push(p)
});
positions
}
#[rstest(
positions,
awareness_modifier,
case::mate_unaware(non_mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_aware(mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0)
)]
fn check_insertions(positions: HashMap<String, Vec<PileupPosition>>, awareness_modifier: u32) {
assert_eq!(positions.get("chr2").unwrap()[0].ins, 0);
assert_eq!(positions.get("chr2").unwrap()[1].ins, 1);
assert_eq!(positions.get("chr2").unwrap()[2].ins, 0);
}
#[rstest(
positions,
awareness_modifier,
case::mate_unaware(non_mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_aware(mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0)
)]
fn check_deletions(positions: HashMap<String, Vec<PileupPosition>>, awareness_modifier: u32) {
assert_eq!(positions.get("chr2").unwrap()[5].del, 0);
assert_eq!(positions.get("chr2").unwrap()[6].del, 1);
assert_eq!(positions.get("chr2").unwrap()[7].del, 1);
assert_eq!(positions.get("chr2").unwrap()[8].del, 1);
assert_eq!(positions.get("chr2").unwrap()[9].del, 1);
assert_eq!(positions.get("chr2").unwrap()[10].del, 1);
assert_eq!(positions.get("chr2").unwrap()[11].del, 0);
}
#[rstest(
positions,
awareness_modifier,
case::mate_unaware(non_mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_aware(mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0)
)]
fn check_refskips(positions: HashMap<String, Vec<PileupPosition>>, awareness_modifier: u32) {
assert_eq!(positions.get("chr2").unwrap()[11].ref_skip, 0);
assert_eq!(positions.get("chr2").unwrap()[12].ref_skip, 1);
assert_eq!(positions.get("chr2").unwrap()[13].ref_skip, 1);
assert_eq!(positions.get("chr2").unwrap()[14].ref_skip, 1);
assert_eq!(positions.get("chr2").unwrap()[15].ref_skip, 1);
assert_eq!(positions.get("chr2").unwrap()[16].ref_skip, 1);
assert_eq!(positions.get("chr2").unwrap()[17].ref_skip, 0);
}
#[rstest(
positions,
awareness_modifier,
case::mate_unaware(non_mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_aware(mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0)
)]
fn check_start(positions: HashMap<String, Vec<PileupPosition>>, awareness_modifier: u32) {
assert_eq!(positions.get("chr1").unwrap()[0].pos, 1);
assert_eq!(positions.get("chr1").unwrap()[4].pos, 5);
assert_eq!(positions.get("chr1").unwrap()[9].pos, 10);
assert_eq!(positions.get("chr1").unwrap()[14].pos, 15);
assert_eq!(positions.get("chr1").unwrap()[19].pos, 20);
assert_eq!(positions.get("chr1").unwrap()[25].pos, 26);
assert_eq!(positions.get("chr1").unwrap()[29].pos, 30);
assert_eq!(positions.get("chr1").unwrap()[34].pos, 35);
assert_eq!(positions.get("chr1").unwrap()[39].pos, 40);
assert_eq!(positions.get("chr1").unwrap()[72].pos, 78);
}
#[rstest(
positions,
keep_zeros,
case::mate_unaware(non_mate_aware_positions(bamfile(), read_filter()), false),
case::mate_aware(mate_aware_positions(bamfile(), read_filter()), false),
case::mate_unaware_bq(
non_mate_aware_positions_base_qual(bamfile(), read_filter(), 1),
false
),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 1), false),
case::mate_unaware_bq(
non_mate_aware_positions_base_qual(bamfile(), read_filter(), 3),
false
),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 3), false),
case::mate_unaware_keep_zeros(
non_mate_aware_keep_zeros_positions(bamfile(), read_filter()),
true
),
case::mate_aware_keep_zeros(
mate_aware_keep_zeros_positions(bamfile(), read_filter()),
true
)
)]
fn check_depths(positions: HashMap<String, Vec<PileupPosition>>, keep_zeros: bool) {
assert_eq!(positions.get("chr1").unwrap()[0].depth, 1);
assert_eq!(positions.get("chr1").unwrap()[1].depth, 1);
assert_eq!(positions.get("chr1").unwrap()[4].depth, 2);
assert_eq!(positions.get("chr1").unwrap()[9].depth, 3);
assert_eq!(positions.get("chr1").unwrap()[14].depth, 4);
assert_eq!(positions.get("chr1").unwrap()[19].depth, 5);
assert_eq!(positions.get("chr1").unwrap()[25].depth, 4);
assert_eq!(positions.get("chr1").unwrap()[29].depth, 3);
assert_eq!(positions.get("chr1").unwrap()[34].depth, 2);
assert_eq!(positions.get("chr1").unwrap()[39].depth, 1);
if !keep_zeros {
assert_eq!(positions.get("chr1").unwrap()[78 - 6].depth, 4);
} else {
assert_eq!(positions.get("chr1").unwrap().len(), 100);
for (i, p) in positions.get("chr1").unwrap().iter().enumerate() {
assert_eq!(p.pos, (i + 1).try_into().unwrap());
}
assert_eq!(positions.get("chr1").unwrap()[43].depth, 1);
assert_eq!(positions.get("chr1").unwrap()[44].depth, 0);
assert_eq!(positions.get("chr1").unwrap()[45].depth, 0);
assert_eq!(positions.get("chr1").unwrap()[46].depth, 0);
assert_eq!(positions.get("chr1").unwrap()[47].depth, 0);
assert_eq!(positions.get("chr1").unwrap()[48].depth, 0);
assert_eq!(positions.get("chr1").unwrap()[49].depth, 1);
assert_eq!(positions.get("chr1").unwrap()[53].depth, 1);
assert_eq!(positions.get("chr1").unwrap()[54].depth, 2);
assert_eq!(positions.get("chr1").unwrap()[55].depth, 2);
assert_eq!(positions.get("chr1").unwrap()[56].depth, 2);
assert_eq!(positions.get("chr1").unwrap()[57].depth, 2);
assert_eq!(positions.get("chr1").unwrap()[58].depth, 2);
assert_eq!(positions.get("chr1").unwrap()[59].depth, 3);
assert_eq!(positions.get("chr1").unwrap()[60].depth, 3);
assert_eq!(positions.get("chr1").unwrap()[61].depth, 3);
assert_eq!(positions.get("chr1").unwrap()[62].depth, 3);
assert_eq!(positions.get("chr1").unwrap()[63].depth, 3);
assert_eq!(positions.get("chr1").unwrap()[64].depth, 4);
assert_eq!(positions.get("chr1").unwrap()[68].depth, 4);
assert_eq!(positions.get("chr1").unwrap()[93].depth, 1);
assert_eq!(positions.get("chr1").unwrap()[94].depth, 0);
assert_eq!(positions.get("chr1").unwrap()[95].depth, 0);
assert_eq!(positions.get("chr1").unwrap()[96].depth, 0);
assert_eq!(positions.get("chr1").unwrap()[99].depth, 0);
}
}
#[rstest(
positions,
awareness_modifier,
case::mate_unaware(non_mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_aware(mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0)
)]
fn check_filters(positions: HashMap<String, Vec<PileupPosition>>, awareness_modifier: u32) {
assert_eq!(positions.get("chr2").unwrap()[81].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[84].depth, 0);
assert_eq!(positions.get("chr2").unwrap()[81].fail, 1);
assert_eq!(positions.get("chr2").unwrap()[84].fail, 1);
}
#[rstest(
positions,
awareness_modifier,
case::mate_unaware(non_mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_aware(mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0)
)]
fn check_depths_insertions(
positions: HashMap<String, Vec<PileupPosition>>,
awareness_modifier: u32,
) {
assert_eq!(positions.get("chr2").unwrap()[0].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[1].depth, 1); assert_eq!(positions.get("chr2").unwrap()[2].depth, 1);
}
#[rstest(
positions,
awareness_modifier,
case::mate_unaware(non_mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_aware(mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0)
)]
fn check_depths_deletions(
positions: HashMap<String, Vec<PileupPosition>>,
awareness_modifier: u32,
) {
assert_eq!(positions.get("chr2").unwrap()[5].depth, 2);
assert_eq!(positions.get("chr2").unwrap()[6].depth, 2); assert_eq!(positions.get("chr2").unwrap()[7].depth, 2); assert_eq!(positions.get("chr2").unwrap()[8].depth, 2); assert_eq!(positions.get("chr2").unwrap()[9].depth, 3); assert_eq!(positions.get("chr2").unwrap()[10].depth, 3); assert_eq!(positions.get("chr2").unwrap()[11].depth, 3);
}
#[rstest(
positions,
awareness_modifier,
case::mate_unaware(non_mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_aware(mate_aware_positions(bamfile(), read_filter()), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 0),
case::mate_unaware_bq(non_mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 0)
)]
fn check_depths_refskips(
positions: HashMap<String, Vec<PileupPosition>>,
awareness_modifier: u32,
) {
assert_eq!(positions.get("chr2").unwrap()[11].depth, 3);
assert_eq!(positions.get("chr2").unwrap()[12].depth, 2); assert_eq!(positions.get("chr2").unwrap()[13].depth, 2); assert_eq!(positions.get("chr2").unwrap()[14].depth, 3); assert_eq!(positions.get("chr2").unwrap()[15].depth, 3); assert_eq!(positions.get("chr2").unwrap()[16].depth, 3); assert_eq!(positions.get("chr2").unwrap()[17].depth, 4);
}
#[rustfmt::skip]
#[rstest(
positions,
awareness_modifier,
base_qual_filtered_all,
case::mate_unaware(non_mate_aware_positions(bamfile(), read_filter()), 0, false),
case::mate_aware(mate_aware_positions(bamfile(), read_filter()), 1, false),
case::mate_unaware_bq(
non_mate_aware_positions_base_qual(bamfile(), read_filter(), 1),
0,
false
),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 1), 1, false),
case::mate_unaware_bq(
non_mate_aware_positions_base_qual(bamfile(), read_filter(), 3),
0,
true
),
case::mate_aware_bq(mate_aware_positions_base_qual(bamfile(), read_filter(), 3), 1, true)
)]
fn check_mate_detection(
positions: HashMap<String, Vec<PileupPosition>>,
awareness_modifier: u32,
base_qual_filtered_all: bool,
) {
assert_eq!(positions.get("chr2").unwrap()[33].depth, 4);
assert_eq!(
positions.get("chr2").unwrap()[34].depth,
4 - awareness_modifier
); assert_eq!(
positions.get("chr2").unwrap()[35].depth,
4 - awareness_modifier
); assert_eq!(
positions.get("chr2").unwrap()[36].depth,
4 - awareness_modifier
); assert_eq!(
positions.get("chr2").unwrap()[37].depth,
4 - awareness_modifier
); assert_eq!(
positions.get("chr2").unwrap()[38].depth,
4 - awareness_modifier
); assert_eq!(
positions.get("chr2").unwrap()[39].depth,
2 - awareness_modifier
); assert_eq!(
positions.get("chr2").unwrap()[40].depth,
2 - awareness_modifier
); assert_eq!(
positions.get("chr2").unwrap()[41].depth,
2 - awareness_modifier
); assert_eq!(
positions.get("chr2").unwrap()[42].depth,
2 - awareness_modifier
); assert_eq!(
positions.get("chr2").unwrap()[43].depth,
2 - awareness_modifier
); assert_eq!(positions.get("chr2").unwrap()[44].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[33].a, if base_qual_filtered_all { 0 } else { 4 } );
assert_eq!(positions.get("chr2").unwrap()[34].a, if base_qual_filtered_all { 0 } else { 4 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[35].a, if base_qual_filtered_all { 0 } else { 4 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[36].a, if base_qual_filtered_all { 0 } else { 4 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[37].a, if base_qual_filtered_all { 0 } else { 4 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[38].a, if base_qual_filtered_all { 0 } else { 4 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[39].a, if base_qual_filtered_all { 0 } else { 2 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[40].a, if base_qual_filtered_all { 0 } else { 2 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[41].a, if base_qual_filtered_all { 0 } else { 2 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[42].a, if base_qual_filtered_all { 0 } else { 2 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[43].a, if base_qual_filtered_all { 0 } else { 2 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[44].a, if base_qual_filtered_all { 0 } else { 1 });
assert_eq!(positions.get("chr2").unwrap()[33].n, if !base_qual_filtered_all { 0 } else { 4 } );
assert_eq!(positions.get("chr2").unwrap()[34].n, if !base_qual_filtered_all { 0 } else { 4 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[35].n, if !base_qual_filtered_all { 0 } else { 4 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[36].n, if !base_qual_filtered_all { 0 } else { 4 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[37].n, if !base_qual_filtered_all { 0 } else { 4 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[38].n, if !base_qual_filtered_all { 0 } else { 4 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[39].n, if !base_qual_filtered_all { 0 } else { 2 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[40].n, if !base_qual_filtered_all { 0 } else { 2 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[41].n, if !base_qual_filtered_all { 0 } else { 2 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[42].n, if !base_qual_filtered_all { 0 } else { 2 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[43].n, if !base_qual_filtered_all { 0 } else { 2 - awareness_modifier }); assert_eq!(positions.get("chr2").unwrap()[44].n, if !base_qual_filtered_all { 0 } else { 1 });
}
}