use std::collections::{vec_deque, VecDeque};
use std::error::Error;
use std::str;
use bam;
use bam::Read;
#[derive(Debug)]
pub struct RecordBuffer {
reader: bam::IndexedReader,
inner: VecDeque<bam::Record>,
overflow: Option<bam::Record>,
}
unsafe impl Sync for RecordBuffer {}
unsafe impl Send for RecordBuffer {}
impl RecordBuffer {
pub fn new(bam: bam::IndexedReader) -> Self {
RecordBuffer {
reader: bam,
inner: VecDeque::new(),
overflow: None,
}
}
fn start(&self) -> Option<u32> {
self.inner.front().map(|rec| rec.pos() as u32)
}
fn end(&self) -> Option<u32> {
self.inner.back().map(|rec| rec.pos() as u32)
}
fn tid(&self) -> Option<i32> {
self.inner.back().map(|rec| rec.tid())
}
#[allow(unused_assignments)] pub fn fetch(
&mut self,
chrom: &[u8],
start: u32,
end: u32,
) -> Result<(usize, usize), Box<Error>> {
let mut added = 0;
if self.overflow.is_some() {
added += 1;
self.inner.push_back(self.overflow.take().unwrap());
}
if let Some(tid) = self.reader.header.tid(chrom) {
let mut deleted = 0;
let window_start = start;
if self.inner.is_empty()
|| self.end().unwrap() < window_start
|| self.tid().unwrap() != tid as i32
|| self.start().unwrap() > window_start
{
let end = self.reader.header.target_len(tid).unwrap();
self.reader.fetch(tid, window_start, end)?;
deleted = self.inner.len();
self.inner.clear();
} else {
let to_remove = self
.inner
.iter()
.take_while(|rec| rec.pos() < window_start as i32)
.count();
for _ in 0..to_remove {
self.inner.pop_front();
}
deleted = to_remove;
}
loop {
let mut record = bam::Record::new();
if let Err(e) = self.reader.read(&mut record) {
if e.is_eof() {
break;
}
return Err(Box::new(e));
}
if record.is_unmapped() {
continue;
}
let pos = record.pos();
if pos >= end as i32 {
self.overflow = Some(record);
break;
} else {
self.inner.push_back(record);
added += 1;
}
}
Ok((added, deleted))
} else {
Err(Box::new(RecordBufferError::UnknownSequence(
str::from_utf8(chrom).unwrap().to_owned(),
)))
}
}
pub fn iter(&self) -> vec_deque::Iter<bam::Record> {
self.inner.iter()
}
pub fn iter_mut(&mut self) -> vec_deque::IterMut<bam::Record> {
self.inner.iter_mut()
}
pub fn len(&self) -> usize {
self.inner.len()
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum RecordBufferError {
UnknownSequence(chrom: String) {
description("unknown sequence")
display("sequence {} cannot be found in BAM", chrom)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use bam;
use itertools::Itertools;
#[test]
fn test_buffer() {
let reader = bam::IndexedReader::from_path(&"test/test.bam").unwrap();
let mut buffer = RecordBuffer::new(reader);
buffer.fetch(b"CHROMOSOME_I", 1, 5).unwrap();
{
let records = buffer.iter().collect_vec();
assert_eq!(records.len(), 6);
assert_eq!(records[0].pos(), 1);
assert_eq!(records[1].pos(), 1);
assert_eq!(records[2].pos(), 1);
assert_eq!(records[3].pos(), 1);
assert_eq!(records[4].pos(), 1);
assert_eq!(records[5].pos(), 1);
}
}
}