use anyhow::Result;
use log::*;
use perbase_lib::{
par_granges::{self, RegionProcessor},
position::{
Position,
range_positions::{BedFormatRangePositions, RangePositions},
},
read_filter::{DefaultReadFilter, ReadFilter},
utils,
};
use rust_htslib::{bam, bam::Read, bam::ext::BamRecordExtensions, bam::record::Cigar};
use rust_lapper::{Interval, Lapper};
use smartstring::alias::String;
use std::{collections::HashMap, path::PathBuf};
use std::{convert::TryFrom, rc::Rc};
use structopt::{StructOpt, clap::AppSettings};
#[derive(StructOpt)]
#[structopt(author, setting = AppSettings::ArgRequiredElseHelp)]
pub struct OnlyDepth {
reads: PathBuf,
#[structopt(long, short = "r")]
ref_fasta: Option<PathBuf>,
#[structopt(long, short = "b")]
bed_file: Option<PathBuf>,
#[structopt(long, short = "B")]
bcf_file: Option<PathBuf>,
#[structopt(long, short = "o")]
output: Option<PathBuf>,
#[structopt(long)]
bed_format: bool,
#[structopt(long, short = "Z")]
bgzip: bool,
#[structopt(long, short = "t", default_value = utils::NUM_CPU.as_str())]
threads: usize,
#[structopt(long, short = "T", default_value = "4")]
compression_threads: usize,
#[structopt(long, short = "L", default_value = "2")]
compression_level: u32,
#[structopt(long, short = "c", default_value=par_granges::CHUNKSIZE_STR.as_str())]
chunksize: u32,
#[structopt(long, short = "C", default_value = "0.001")]
channel_size_modifier: f64,
#[structopt(long, short = "f", default_value = "0")]
include_flags: u16,
#[structopt(long, short = "F", default_value = "0")]
exclude_flags: u16,
#[structopt(long, short = "m", conflicts_with = "fast_mode")]
mate_fix: bool,
#[structopt(long, short = "x")]
fast_mode: bool,
#[structopt(long, short = "n")]
no_merge: bool,
#[structopt(long, short = "M")]
skip_merging_intervals: bool,
#[structopt(long, short = "k")]
keep_zeros: bool,
#[structopt(long, short = "q", default_value = "0")]
min_mapq: u8,
#[structopt(long, short = "z")]
zero_base: bool,
}
impl OnlyDepth {
pub fn run(self) -> Result<()> {
info!("Running only-depth on: {:?}", self.reads);
let cpus = utils::determine_allowed_cpus(self.threads)?;
let mut writer = utils::get_writer(
&self.output,
self.bgzip,
!self.bed_format,
self.compression_threads,
self.compression_level,
)?;
let read_filter =
DefaultReadFilter::new(self.include_flags, self.exclude_flags, self.min_mapq);
let processor = OnlyDepthProcessor::new(
self.reads.clone(),
self.ref_fasta.clone(),
self.mate_fix,
self.fast_mode,
self.no_merge,
self.keep_zeros,
if self.zero_base { 0 } else { 1 },
read_filter,
);
let par_granges_runner = par_granges::ParGranges::new(
self.reads.clone(),
self.ref_fasta.clone(),
self.bed_file.clone(),
self.bcf_file.clone(),
!self.skip_merging_intervals,
Some(cpus),
Some(self.chunksize),
Some(self.channel_size_modifier),
processor,
);
let receiver = par_granges_runner.process()?;
if self.bed_format {
for pos in receiver.into_iter() {
writer.serialize(BedFormatRangePositions::from(pos))?
}
} else {
for pos in receiver.into_iter() {
writer.serialize(pos)?
}
}
writer.flush()?;
Ok(())
}
#[inline]
fn maybe_overlaps_mate(record: &bam::Record) -> bool {
if !record.is_paired() || record.tid() != record.mtid() {
return false;
}
let tlen = record.insert_size().abs();
let read_len = record.reference_end() - record.reference_start();
(tlen / 2) < read_len || (record.mpos() - record.pos()).abs() < read_len
}
}
struct OnlyDepthProcessor<F: ReadFilter> {
reads: PathBuf,
ref_fasta: Option<PathBuf>,
mate_fix: bool,
fast_mode: bool,
no_merge: bool,
keep_zeros: bool,
read_filter: F,
coord_base: u32,
}
impl<F: ReadFilter> OnlyDepthProcessor<F> {
#[allow(clippy::too_many_arguments)]
fn new(
reads: PathBuf,
ref_fasta: Option<PathBuf>,
mate_fix: bool,
fast_mode: bool,
no_merge: bool,
keep_zeros: bool,
coord_base: u32,
read_filter: F,
) -> Self {
Self {
reads,
ref_fasta,
fast_mode,
mate_fix,
no_merge,
keep_zeros,
coord_base,
read_filter,
}
}
#[inline]
fn sum_counter(
&self,
counter: Vec<i32>,
contig: &str,
region_start: u32,
region_stop: u32,
) -> Vec<RangePositions> {
if self.no_merge {
let mut sum: i32 = 0;
let mut results = vec![];
for (i, count) in counter.iter().enumerate() {
sum += count;
if sum != 0 || self.keep_zeros {
let mut pos = RangePositions::new(
String::from(contig),
region_start + i as u32 + self.coord_base,
);
pos.depth = u32::try_from(sum).expect("All depths are positive");
pos.end = region_start + i as u32 + self.coord_base + 1;
results.push(pos);
}
}
results
} else {
let mut sum: i32 = 0;
let mut results = vec![];
let mut curr_start = region_start;
let mut curr_depth = counter[0];
for (i, count) in counter.iter().enumerate() {
sum += count;
if curr_depth != sum {
let mut pos =
RangePositions::new(String::from(contig), curr_start + self.coord_base);
pos.depth = u32::try_from(curr_depth).expect("All depths are positive");
pos.end = region_start + i as u32 + self.coord_base;
curr_start = region_start + i as u32;
curr_depth = sum;
results.push(pos);
}
if i == counter.len() - 1 {
let mut pos =
RangePositions::new(String::from(contig), curr_start + self.coord_base);
pos.depth = u32::try_from(curr_depth).expect("All depths are positive");
pos.end = region_stop + self.coord_base;
results.push(pos);
}
}
results
}
}
fn process_region(&self, tid: u32, start: u32, stop: u32) -> Vec<RangePositions> {
let mut reader =
bam::IndexedReader::from_path(&self.reads).expect("Indexed Reader for region");
if let Some(ref_fasta) = &self.ref_fasta {
reader.set_reference(ref_fasta).expect("Set ref");
}
let header = reader.header().to_owned();
reader.fetch((tid, start, stop)).expect("Fetched a region");
let mut counter: Vec<i32> = vec![0; (stop - start) as usize];
let mut maties = HashMap::new();
for record in reader
.rc_records()
.map(|r| r.expect("Read record"))
.filter(|read| self.read_filter.filter_read(read, None))
.flat_map(|record| IterAlignedBlocks::new(record, self.mate_fix))
{
let rec_start = u32::try_from(record.0).expect("check overflow");
let rec_stop = u32::try_from(record.1).expect("check overflow");
if rec_start >= stop || rec_stop <= start {
continue;
}
let adjusted_start = if rec_start < start {
0
} else {
(rec_start - start) as usize
};
let mut dont_count_stop = false; let adjusted_stop = if rec_stop >= stop {
dont_count_stop = true;
counter.len() - 1
} else {
(rec_stop - start) as usize
};
if self.mate_fix && record.2 {
let intervals = maties.entry(record.3).or_insert(vec![]);
intervals.push(Interval {
start: adjusted_start,
stop: adjusted_stop,
val: dont_count_stop,
});
} else {
counter[adjusted_start] += 1;
if !dont_count_stop {
counter[adjusted_stop] -= 1;
}
}
}
if self.mate_fix {
for (_qname, ivs) in maties.drain() {
let mut lapper = Lapper::new(ivs);
lapper.merge_overlaps();
for iv in lapper.intervals {
counter[iv.start] += 1;
if !iv.val {
counter[iv.stop] -= 1;
}
}
}
}
let contig = std::str::from_utf8(header.tid2name(tid)).unwrap();
self.sum_counter(counter, contig, start, stop)
}
fn process_region_fast(&self, tid: u32, start: u32, stop: u32) -> Vec<RangePositions> {
let mut reader =
bam::IndexedReader::from_path(&self.reads).expect("Indexed Reader for region");
if let Some(ref_fasta) = &self.ref_fasta {
reader.set_reference(ref_fasta).expect("Set ref");
}
let header = reader.header().to_owned();
reader.fetch((tid, start, stop)).expect("Fetched a region");
let mut counter: Vec<i32> = vec![0; (stop - start) as usize];
let mut maties = HashMap::new();
for record in reader
.rc_records()
.map(|r| r.expect("Read record"))
.filter(|read| self.read_filter.filter_read(read, None))
{
let rec_start = u32::try_from(record.reference_start()).expect("check overflow");
let rec_stop = u32::try_from(record.reference_end()).expect("check overflow");
let adjusted_start = if rec_start < start {
0
} else {
(rec_start - start) as usize
};
let mut dont_count_stop = false; let adjusted_stop = if rec_stop >= stop {
dont_count_stop = true;
counter.len() - 1
} else {
(rec_stop - start) as usize
};
if self.mate_fix && OnlyDepth::maybe_overlaps_mate(&record) {
let qname =
String::from(std::str::from_utf8(record.qname()).expect("Convert qname"));
let intervals = maties.entry(qname).or_insert(vec![]);
intervals.push(Interval {
start: adjusted_start,
stop: adjusted_stop,
val: dont_count_stop,
});
} else {
counter[adjusted_start] += 1;
if !dont_count_stop {
counter[adjusted_stop] -= 1;
}
}
}
if self.mate_fix {
for (_qname, ivs) in maties.drain() {
let mut lapper = Lapper::new(ivs);
lapper.merge_overlaps();
for iv in lapper.intervals {
counter[iv.start] += 1;
if !iv.val {
counter[iv.stop] -= 1;
}
}
}
}
let contig = std::str::from_utf8(header.tid2name(tid)).unwrap();
self.sum_counter(counter, contig, start, stop)
}
}
impl<F: ReadFilter> RegionProcessor for OnlyDepthProcessor<F> {
type P = RangePositions;
fn process_region(&self, tid: u32, start: u32, stop: u32) -> Vec<RangePositions> {
trace!("Processing region {}(tid):{}-{}", tid, start, stop);
if self.fast_mode {
self.process_region_fast(tid, start, stop)
} else {
self.process_region(tid, start, stop)
}
}
}
struct IterAlignedBlocks {
pos: i64,
cigar_index: usize,
cigar: bam::record::CigarStringView,
overlap_status: bool,
record: Rc<bam::Record>,
}
impl IterAlignedBlocks {
fn new(record: Rc<bam::Record>, check_mate: bool) -> Self {
let overlap = if check_mate {
OnlyDepth::maybe_overlaps_mate(&record)
} else {
false
};
Self {
pos: record.reference_start(),
cigar_index: 0,
cigar: record.cigar(),
overlap_status: overlap,
record,
}
}
}
impl Iterator for IterAlignedBlocks {
type Item = (i64, i64, bool, String);
fn next(&mut self) -> Option<Self::Item> {
while self.cigar_index < self.cigar.len() {
let entry = self.cigar[self.cigar_index];
match entry {
Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) | Cigar::Del(len) => {
let out_pos = self.pos;
self.pos += len as i64;
self.cigar_index += 1;
return Some((
out_pos,
out_pos + len as i64,
self.overlap_status,
String::from(
std::str::from_utf8(self.record.qname()).expect("Convert qname"),
),
));
}
Cigar::RefSkip(len) => self.pos += len as i64,
_ => (),
}
self.cigar_index += 1;
}
None
}
}
#[cfg(test)]
#[allow(unused)]
mod tests {
use super::*;
use perbase_lib::{
position::{Position, range_positions::RangePositions},
read_filter::DefaultReadFilter,
};
use rstest::*;
use rust_htslib::{bam, bam::record::Record};
use smartstring::alias::*;
use std::{collections::HashMap, path::PathBuf};
use tempfile::{TempDir, tempdir};
#[fixture]
fn read_filter() -> DefaultReadFilter {
DefaultReadFilter::new(0, 512, 0)
}
#[fixture]
fn bamfile() -> (PathBuf, TempDir) {
let tempdir = tempdir().unwrap();
let path = tempdir.path().join("test.bam");
let mut header = bam::header::Header::new();
let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
chr1.push_tag(b"SN", &"chr1".to_owned());
chr1.push_tag(b"LN", &"3000000".to_owned());
let mut chr2 = bam::header::HeaderRecord::new(b"SQ");
chr2.push_tag(b"SN", &"chr2".to_owned());
chr2.push_tag(b"LN", &"3000000".to_owned());
let mut chr3 = bam::header::HeaderRecord::new(b"SQ");
chr3.push_tag(b"SN", &"chr3".to_owned());
chr3.push_tag(b"LN", &"3000000".to_owned());
header.push_record(&chr1);
header.push_record(&chr2);
header.push_record(&chr3);
let view = bam::HeaderView::from_header(&header);
let records = vec![
Record::from_sam(&view, b"ONE\t67\tchr1\t1\t40\t25M\tchr1\t50\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"TWO\t67\tchr1\t5\t40\t25M\tchr1\t55\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"THREE\t67\tchr1\t10\t40\t25M\tchr1\t60\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FOUR\t67\tchr1\t15\t40\t25M\tchr1\t65\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t67\tchr1\t20\t40\t25M\tchr1\t70\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"ONE\t147\tchr1\t50\t40\t25M\tchr1\t1\t75\tTTTTTTTTTTTTTTTTTTTTTTTTT\t#########################").unwrap(),
Record::from_sam(&view, b"TWO\t147\tchr1\t55\t40\t25M\tchr1\t5\t75\tGGGGGGGGGGGGGGGGGGGGGGGGG\t#########################").unwrap(),
Record::from_sam(&view, b"THREE\t147\tchr1\t60\t40\t25M\tchr1\t10\t75\tCCCCCCCCCCCCCCCCCCCCCCCCC\t#########################").unwrap(),
Record::from_sam(&view, b"FOUR\t147\tchr1\t65\t40\t25M\tchr1\t15\t75\tNNNNNNNNNNNNNNNNNNNNNNNNN\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t147\tchr1\t70\t40\t25M\tchr1\t20\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t147\tchr1\t999976\t40\t25M\tchr1\t1000026\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t147\tchr1\t1000026\t40\t25M\tchr1\t999976\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"ONE\t67\tchr2\t1\t40\t2M2I21M\tchr2\t50\t75\tAAGGGAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"TWO\t67\tchr2\t5\t40\t2M5D23M\tchr2\t55\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"THREE\t67\tchr2\t10\t40\t3M5N22M\tchr2\t60\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FOUR\t67\tchr2\t15\t40\t25M\tchr2\t65\t75\tATAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t67\tchr2\t20\t40\t25M\tchr2\t35\t40\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t147\tchr2\t35\t40\t25M\tchr2\t20\t-40\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"ONE\t147\tchr2\t50\t40\t25M\tchr2\t1\t75\tAAAAAAAAAAAAAAAAAAAAAYAAA\t#########################").unwrap(),
Record::from_sam(&view, b"TWO\t147\tchr2\t55\t40\t25M\tchr2\t5\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"THREE\t147\tchr2\t60\t40\t25M\tchr2\t10\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FOUR\t659\tchr2\t65\t40\t25M\tchr2\t15\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"SIX\t67\tchr2\t120\t40\t25M\tchr2\t120\t40\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"SIX\t147\tchr2\t120\t40\t25M\tchr2\t120\t-40\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"SEVEN\t67\tchr2\t160\t40\t50M\tchr2\t150\t50\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\t##################################################").unwrap(),
Record::from_sam(&view, b"SEVEN\t147\tchr2\t175\t40\t25M\tchr2\t175\t-50\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"ONE\t67\tchr3\t1\t40\t2M2000000N23M\tchr3\t50\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"TWO\t67\tchr3\t5\t40\t25M\tchr3\t55\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"THREE\t67\tchr3\t10\t40\t23M2000000N2M\tchr3\t60\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FOUR\t67\tchr3\t15\t40\t25M\tchr3\t65\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t67\tchr3\t20\t40\t25M\tchr3\t70\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
Record::from_sam(&view, b"ONE\t147\tchr3\t50\t40\t25M\tchr3\t1\t75\tTTTTTTTTTTTTTTTTTTTTTTTTT\t#########################").unwrap(),
Record::from_sam(&view, b"TWO\t147\tchr3\t55\t40\t25M\tchr3\t5\t75\tGGGGGGGGGGGGGGGGGGGGGGGGG\t#########################").unwrap(),
Record::from_sam(&view, b"THREE\t147\tchr3\t60\t40\t25M\tchr3\t10\t75\tCCCCCCCCCCCCCCCCCCCCCCCCC\t#########################").unwrap(),
Record::from_sam(&view, b"FOUR\t147\tchr3\t65\t40\t25M\tchr3\t15\t75\tNNNNNNNNNNNNNNNNNNNNNNNNN\t#########################").unwrap(),
Record::from_sam(&view, b"FIVE\t147\tchr3\t70\t40\t25M\tchr3\t20\t75\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################").unwrap(),
];
let mut writer =
bam::Writer::from_path(&path, &header, bam::Format::Bam).expect("Created writer");
for record in records.iter() {
writer.write(record).expect("Wrote record");
}
drop(writer); bam::index::build(&path, None, bam::index::Type::Bai, 1).unwrap();
(path, tempdir)
}
#[rstest(
fast_mode => [true, false],
mate_fix => [true, false]
)]
fn test_can_parse(
fast_mode: bool,
mate_fix: bool,
bamfile: (PathBuf, TempDir),
read_filter: DefaultReadFilter,
) {
let cpus = utils::determine_allowed_cpus(8).unwrap();
let onlydepth_processor = OnlyDepthProcessor::new(
bamfile.0.clone(),
None,
mate_fix,
fast_mode,
false,
false,
0,
read_filter,
);
let par_granges_runner = par_granges::ParGranges::new(
bamfile.0,
None,
None,
None,
true,
Some(cpus),
Some(1_000_000),
Some(0.001),
onlydepth_processor,
);
let mut positions = HashMap::new();
par_granges_runner
.process()
.unwrap()
.into_iter()
.for_each(|p| {
let pos = positions.entry(p.ref_seq.clone()).or_insert(vec![]);
pos.push(p)
});
assert_eq!(positions.keys().len(), 3);
}
#[fixture]
fn vanilla_positions(
bamfile: (PathBuf, TempDir),
read_filter: DefaultReadFilter,
) -> HashMap<String, Vec<RangePositions>> {
let cpus = utils::determine_allowed_cpus(8).unwrap();
let onlydepth_processor = OnlyDepthProcessor::new(
bamfile.0.clone(),
None,
false,
false,
false,
false,
0,
read_filter,
);
let par_granges_runner = par_granges::ParGranges::new(
bamfile.0,
None,
None,
None,
true,
Some(cpus),
Some(1_000_000),
Some(0.001),
onlydepth_processor,
);
let mut positions = HashMap::new();
par_granges_runner
.process()
.unwrap()
.into_iter()
.for_each(|p| {
let pos = positions.entry(p.ref_seq.clone()).or_insert(vec![]);
pos.push(p)
});
positions
}
#[fixture]
fn vanilla_positions_mate_fix(
bamfile: (PathBuf, TempDir),
read_filter: DefaultReadFilter,
) -> HashMap<String, Vec<RangePositions>> {
let cpus = utils::determine_allowed_cpus(8).unwrap();
let onlydepth_processor = OnlyDepthProcessor::new(
bamfile.0.clone(),
None,
true,
false,
false,
false,
0,
read_filter,
);
let par_granges_runner = par_granges::ParGranges::new(
bamfile.0,
None,
None,
None,
true,
Some(cpus),
Some(1_000_000),
Some(0.001),
onlydepth_processor,
);
let mut positions = HashMap::new();
par_granges_runner
.process()
.unwrap()
.into_iter()
.for_each(|p| {
let pos = positions.entry(p.ref_seq.clone()).or_insert(vec![]);
pos.push(p)
});
positions
}
#[fixture]
fn fast_mode_positions(
bamfile: (PathBuf, TempDir),
read_filter: DefaultReadFilter,
) -> HashMap<String, Vec<RangePositions>> {
let cpus = utils::determine_allowed_cpus(8).unwrap();
let onlydepth_processor = OnlyDepthProcessor::new(
bamfile.0.clone(),
None,
false,
true,
false,
false,
0,
read_filter,
);
let par_granges_runner = par_granges::ParGranges::new(
bamfile.0,
None,
None,
None,
true,
Some(cpus),
None,
Some(0.001),
onlydepth_processor,
);
let mut positions = HashMap::new();
par_granges_runner
.process()
.unwrap()
.into_iter()
.for_each(|p| {
let pos = positions.entry(p.ref_seq.clone()).or_insert(vec![]);
pos.push(p)
});
positions
}
#[fixture]
fn fast_mode_positions_mate_fix(
bamfile: (PathBuf, TempDir),
read_filter: DefaultReadFilter,
) -> HashMap<String, Vec<RangePositions>> {
let cpus = utils::determine_allowed_cpus(8).unwrap();
let onlydepth_processor = OnlyDepthProcessor::new(
bamfile.0.clone(),
None,
true,
true,
false,
false,
0,
read_filter,
);
let par_granges_runner = par_granges::ParGranges::new(
bamfile.0,
None,
None,
None,
true,
Some(cpus),
None,
Some(0.001),
onlydepth_processor,
);
let mut positions = HashMap::new();
par_granges_runner
.process()
.unwrap()
.into_iter()
.for_each(|p| {
let pos = positions.entry(p.ref_seq.clone()).or_insert(vec![]);
pos.push(p)
});
positions
}
#[rstest(
positions,
awareness_modifier,
case::vanilla(vanilla_positions(bamfile(), read_filter()), 0),
case::fast_mode(fast_mode_positions(bamfile(), read_filter()), 0),
case::vanilla_mate_fix(vanilla_positions_mate_fix(bamfile(), read_filter()), 0),
case::fast_mode_mate_fix(fast_mode_positions_mate_fix(bamfile(), read_filter()), 0)
)]
fn check_depths(positions: HashMap<String, Vec<RangePositions>>, awareness_modifier: usize) {
assert_eq!(positions.get("chr1").unwrap()[0].depth, 1);
assert_eq!(positions.get("chr1").unwrap()[1].depth, 2);
assert_eq!(positions.get("chr1").unwrap()[2].depth, 3);
assert_eq!(positions.get("chr1").unwrap()[3].depth, 4);
assert_eq!(positions.get("chr1").unwrap()[4].depth, 5);
assert_eq!(positions.get("chr1").unwrap()[5].depth, 4);
assert_eq!(positions.get("chr1").unwrap()[6].depth, 3);
assert_eq!(positions.get("chr1").unwrap()[7].depth, 2);
assert_eq!(positions.get("chr1").unwrap()[8].depth, 1);
assert_eq!(positions.get("chr1").unwrap()[9].depth, 0);
assert_eq!(positions.get("chr1").unwrap()[13].depth, 4);
}
#[rstest(
positions,
awareness_modifier,
case::vanilla(vanilla_positions(bamfile(), read_filter()), 0),
case::fast_mode(fast_mode_positions(bamfile(), read_filter()), 0),
case::vanilla_mate_fix(vanilla_positions_mate_fix(bamfile(), read_filter()), 0),
case::fast_mode_mate_fix(fast_mode_positions_mate_fix(bamfile(), read_filter()), 0)
)]
fn check_ranges(positions: HashMap<String, Vec<RangePositions>>, awareness_modifier: usize) {
assert_eq!(positions.get("chr1").unwrap()[0].pos, 0);
assert_eq!(positions.get("chr1").unwrap()[1].pos, 4);
assert_eq!(positions.get("chr1").unwrap()[2].pos, 9);
assert_eq!(positions.get("chr1").unwrap()[3].pos, 14);
assert_eq!(positions.get("chr1").unwrap()[4].pos, 19);
assert_eq!(positions.get("chr1").unwrap()[5].pos, 25);
assert_eq!(positions.get("chr1").unwrap()[6].pos, 29);
assert_eq!(positions.get("chr1").unwrap()[7].pos, 34);
assert_eq!(positions.get("chr1").unwrap()[8].pos, 39);
assert_eq!(positions.get("chr1").unwrap()[9].pos, 44);
assert_eq!(positions.get("chr1").unwrap()[13].pos, 64);
assert_eq!(positions.get("chr1").unwrap()[0].end, 4);
assert_eq!(positions.get("chr1").unwrap()[1].end, 9);
assert_eq!(positions.get("chr1").unwrap()[2].end, 14);
assert_eq!(positions.get("chr1").unwrap()[3].end, 19);
assert_eq!(positions.get("chr1").unwrap()[4].end, 25);
assert_eq!(positions.get("chr1").unwrap()[5].end, 29);
assert_eq!(positions.get("chr1").unwrap()[6].end, 34);
assert_eq!(positions.get("chr1").unwrap()[7].end, 39);
assert_eq!(positions.get("chr1").unwrap()[8].end, 44);
assert_eq!(positions.get("chr1").unwrap()[9].end, 49);
assert_eq!(positions.get("chr1").unwrap()[13].end, 69);
}
#[rstest(
positions,
awareness_modifier,
case::vanilla(vanilla_positions(bamfile(), read_filter()), 2),
case::fast_mode(fast_mode_positions(bamfile(), read_filter()), 0),
case::vanilla_mate_fix(vanilla_positions_mate_fix(bamfile(), read_filter()), 2),
case::fast_mode_mate_fix(fast_mode_positions_mate_fix(bamfile(), read_filter()), 0)
)]
fn check_filters(positions: HashMap<String, Vec<RangePositions>>, awareness_modifier: usize) {
assert_eq!(
positions.get("chr2").unwrap()[11 + awareness_modifier].depth,
1
);
assert_eq!(
positions.get("chr2").unwrap()[12 + awareness_modifier].depth,
0
);
}
#[rstest(
positions,
awareness_modifier,
case::vanilla(vanilla_positions(bamfile(), read_filter()), 0),
case::fast_mode(fast_mode_positions(bamfile(), read_filter()), 0),
case::vanilla_mate_fix(vanilla_positions_mate_fix(bamfile(), read_filter()), 0),
case::fast_mode_mate_fix(fast_mode_positions_mate_fix(bamfile(), read_filter()), 0)
)]
fn check_depths_insertions(
positions: HashMap<String, Vec<RangePositions>>,
awareness_modifier: usize,
) {
assert_eq!(positions.get("chr2").unwrap()[0].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[0].pos, 0);
assert_eq!(positions.get("chr2").unwrap()[0].end, 4);
}
#[rstest(
positions,
awareness_modifier,
case::vanilla(vanilla_positions(bamfile(), read_filter()), 0),
case::fast_mode(fast_mode_positions(bamfile(), read_filter()), 0),
case::vanilla_mate_fix(vanilla_positions_mate_fix(bamfile(), read_filter()), 0),
case::fast_mode_mate_fix(fast_mode_positions_mate_fix(bamfile(), read_filter()), 0)
)]
fn check_depths_deletions(
positions: HashMap<String, Vec<RangePositions>>,
awareness_modifier: usize,
) {
assert_eq!(positions.get("chr2").unwrap()[1].depth, 2); assert_eq!(positions.get("chr2").unwrap()[1].pos, 4);
assert_eq!(positions.get("chr2").unwrap()[1].end, 9);
}
#[rstest(
positions,
awareness_modifier,
case::vanilla(vanilla_positions(bamfile(), read_filter()), 1),
case::fast_mode(fast_mode_positions(bamfile(), read_filter()), 0),
case::vanilla_mate_fix(vanilla_positions_mate_fix(bamfile(), read_filter()), 1),
case::fast_mode_mate_fix(fast_mode_positions_mate_fix(bamfile(), read_filter()), 0)
)]
fn check_depths_refskips(
positions: HashMap<String, Vec<RangePositions>>,
awareness_modifier: usize,
) {
if awareness_modifier == 1 {
assert_eq!(positions.get("chr2").unwrap()[2].depth, 3); assert_eq!(positions.get("chr2").unwrap()[2].pos, 9);
assert_eq!(positions.get("chr2").unwrap()[2].end, 12);
assert_eq!(positions.get("chr2").unwrap()[3].depth, 2); assert_eq!(positions.get("chr2").unwrap()[3].pos, 12);
assert_eq!(positions.get("chr2").unwrap()[3].end, 14);
} else {
assert_eq!(positions.get("chr2").unwrap()[2].depth, 3); assert_eq!(positions.get("chr2").unwrap()[2].pos, 9);
assert_eq!(positions.get("chr2").unwrap()[2].end, 14);
}
}
#[rstest(
positions,
awareness_modifier,
case::fast_mode(fast_mode_positions(bamfile(), read_filter()), 0),
case::fast_mode_mate_fix(fast_mode_positions_mate_fix(bamfile(), read_filter()), 1)
)]
fn check_mate_detection_fast(
positions: HashMap<String, Vec<RangePositions>>,
awareness_modifier: usize,
) {
assert_eq!(positions.get("chr2").unwrap()[5].depth, 4);
assert_eq!(positions.get("chr2").unwrap()[5].pos, 23);
if awareness_modifier == 1 {
assert_eq!(positions.get("chr2").unwrap()[5].end, 34);
assert_eq!(positions.get("chr2").unwrap()[6].pos, 34);
assert_eq!(positions.get("chr2").unwrap()[6].end, 39);
assert_eq!(positions.get("chr2").unwrap()[6].depth, 3);
assert_eq!(positions.get("chr2").unwrap()[7].pos, 39);
assert_eq!(positions.get("chr2").unwrap()[7].end, 49);
assert_eq!(positions.get("chr2").unwrap()[7].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[13].pos, 119);
assert_eq!(positions.get("chr2").unwrap()[13].end, 144);
assert_eq!(positions.get("chr2").unwrap()[13].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[15].pos, 159);
assert_eq!(positions.get("chr2").unwrap()[15].end, 209);
assert_eq!(positions.get("chr2").unwrap()[15].depth, 1);
} else {
assert_eq!(positions.get("chr2").unwrap()[5].end, 39);
assert_eq!(positions.get("chr2").unwrap()[6].pos, 39);
assert_eq!(positions.get("chr2").unwrap()[6].end, 44);
assert_eq!(positions.get("chr2").unwrap()[6].depth, 2);
assert_eq!(positions.get("chr2").unwrap()[7].pos, 44);
assert_eq!(positions.get("chr2").unwrap()[7].end, 49);
assert_eq!(positions.get("chr2").unwrap()[7].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[13].pos, 119);
assert_eq!(positions.get("chr2").unwrap()[13].end, 144);
assert_eq!(positions.get("chr2").unwrap()[13].depth, 2);
assert_eq!(positions.get("chr2").unwrap()[15].pos, 159);
assert_eq!(positions.get("chr2").unwrap()[15].end, 174);
assert_eq!(positions.get("chr2").unwrap()[15].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[16].pos, 174);
assert_eq!(positions.get("chr2").unwrap()[16].end, 199);
assert_eq!(positions.get("chr2").unwrap()[16].depth, 2);
assert_eq!(positions.get("chr2").unwrap()[17].pos, 199);
assert_eq!(positions.get("chr2").unwrap()[17].end, 209);
assert_eq!(positions.get("chr2").unwrap()[17].depth, 1);
}
assert_eq!(positions.get("chr2").unwrap()[8].pos, 49);
assert_eq!(positions.get("chr2").unwrap()[8].end, 54);
assert_eq!(positions.get("chr2").unwrap()[8].depth, 2);
}
#[rstest(
positions,
awareness_modifier,
case::vanilla(vanilla_positions(bamfile(), read_filter()), 0),
case::vanilla_mate_fix(vanilla_positions_mate_fix(bamfile(), read_filter()), 1)
)]
fn check_mate_detection_vanilla(
positions: HashMap<String, Vec<RangePositions>>,
awareness_modifier: usize,
) {
assert_eq!(positions.get("chr2").unwrap()[7].depth, 4);
assert_eq!(positions.get("chr2").unwrap()[7].pos, 23);
if awareness_modifier == 1 {
assert_eq!(positions.get("chr2").unwrap()[7].end, 34);
assert_eq!(positions.get("chr2").unwrap()[8].pos, 34);
assert_eq!(positions.get("chr2").unwrap()[8].end, 39);
assert_eq!(positions.get("chr2").unwrap()[8].depth, 3);
assert_eq!(positions.get("chr2").unwrap()[9].pos, 39);
assert_eq!(positions.get("chr2").unwrap()[9].end, 49);
assert_eq!(positions.get("chr2").unwrap()[9].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[15].pos, 119);
assert_eq!(positions.get("chr2").unwrap()[15].end, 144);
assert_eq!(positions.get("chr2").unwrap()[15].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[17].pos, 159);
assert_eq!(positions.get("chr2").unwrap()[17].end, 209);
assert_eq!(positions.get("chr2").unwrap()[17].depth, 1)
} else {
assert_eq!(positions.get("chr2").unwrap()[7].end, 39);
assert_eq!(positions.get("chr2").unwrap()[8].pos, 39);
assert_eq!(positions.get("chr2").unwrap()[8].end, 44);
assert_eq!(positions.get("chr2").unwrap()[8].depth, 2);
assert_eq!(positions.get("chr2").unwrap()[9].pos, 44);
assert_eq!(positions.get("chr2").unwrap()[9].end, 49);
assert_eq!(positions.get("chr2").unwrap()[9].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[15].pos, 119);
assert_eq!(positions.get("chr2").unwrap()[15].end, 144);
assert_eq!(positions.get("chr2").unwrap()[15].depth, 2);
assert_eq!(positions.get("chr2").unwrap()[17].pos, 159);
assert_eq!(positions.get("chr2").unwrap()[17].end, 174);
assert_eq!(positions.get("chr2").unwrap()[17].depth, 1);
assert_eq!(positions.get("chr2").unwrap()[18].pos, 174);
assert_eq!(positions.get("chr2").unwrap()[18].end, 199);
assert_eq!(positions.get("chr2").unwrap()[18].depth, 2);
assert_eq!(positions.get("chr2").unwrap()[19].pos, 199);
assert_eq!(positions.get("chr2").unwrap()[19].end, 209);
assert_eq!(positions.get("chr2").unwrap()[19].depth, 1)
}
assert_eq!(positions.get("chr2").unwrap()[10].pos, 49);
assert_eq!(positions.get("chr2").unwrap()[10].end, 54);
assert_eq!(positions.get("chr2").unwrap()[10].depth, 2);
}
#[rstest(
positions,
awareness_modifier,
case::vanilla(vanilla_positions(bamfile(), read_filter()), 0),
case::fast_mode(fast_mode_positions(bamfile(), read_filter()), 2),
case::vanilla_mate_fix(vanilla_positions_mate_fix(bamfile(), read_filter()), 0),
case::fast_mode_mate_fix(fast_mode_positions_mate_fix(bamfile(), read_filter()), 2)
)]
fn check_chunk_ends(
positions: HashMap<String, Vec<RangePositions>>,
awareness_modifier: usize,
) {
if awareness_modifier == 0 {
assert_eq!(positions.get("chr3").unwrap()[19].pos, 94);
assert_eq!(positions.get("chr3").unwrap()[19].end, 1_000_000);
assert_eq!(positions.get("chr3").unwrap()[19].depth, 0);
assert_eq!(positions.get("chr3").unwrap()[20].pos, 1_000_000);
assert_eq!(positions.get("chr3").unwrap()[20].end, 2_000_000);
assert_eq!(positions.get("chr3").unwrap()[20].depth, 0);
assert_eq!(positions.get("chr3").unwrap()[21].pos, 2_000_000);
assert_eq!(positions.get("chr3").unwrap()[21].end, 2_000_002);
assert_eq!(positions.get("chr3").unwrap()[21].depth, 0);
assert_eq!(positions.get("chr3").unwrap()[22].pos, 2_000_002);
assert_eq!(positions.get("chr3").unwrap()[22].end, 2_000_025);
assert_eq!(positions.get("chr3").unwrap()[22].depth, 1);
assert_eq!(positions.get("chr3").unwrap()[23].pos, 2_000_025);
assert_eq!(positions.get("chr3").unwrap()[23].end, 2_000_032);
assert_eq!(positions.get("chr3").unwrap()[23].depth, 0);
assert_eq!(positions.get("chr3").unwrap()[24].pos, 2_000_032);
assert_eq!(positions.get("chr3").unwrap()[24].end, 2_000_034);
assert_eq!(positions.get("chr3").unwrap()[24].depth, 1);
assert_eq!(positions.get("chr3").unwrap()[25].pos, 2_000_034);
assert_eq!(positions.get("chr3").unwrap()[25].end, 3_000_000);
assert_eq!(positions.get("chr3").unwrap()[25].depth, 0);
} else {
assert_eq!(positions.get("chr3").unwrap()[17].pos, 94);
assert_eq!(positions.get("chr3").unwrap()[17].end, 1_000_000);
assert_eq!(positions.get("chr3").unwrap()[17].depth, 2);
assert_eq!(positions.get("chr3").unwrap()[18].pos, 1_000_000);
assert_eq!(positions.get("chr3").unwrap()[18].end, 2_000_000);
assert_eq!(positions.get("chr3").unwrap()[18].depth, 2);
assert_eq!(positions.get("chr3").unwrap()[19].pos, 2_000_000);
assert_eq!(positions.get("chr3").unwrap()[19].end, 2_000_025);
assert_eq!(positions.get("chr3").unwrap()[19].depth, 2);
assert_eq!(positions.get("chr3").unwrap()[20].pos, 2_000_025);
assert_eq!(positions.get("chr3").unwrap()[20].end, 2_000_034);
assert_eq!(positions.get("chr3").unwrap()[20].depth, 1);
assert_eq!(positions.get("chr3").unwrap()[21].pos, 2_000_034);
assert_eq!(positions.get("chr3").unwrap()[21].end, 3_000_000);
assert_eq!(positions.get("chr3").unwrap()[21].depth, 0);
}
}
}