use std::io::{self, BufRead, Read, Seek};
use std::vec;
use noodles::bgzf::VirtualPosition;
pub type VirtualRange = (VirtualPosition, VirtualPosition);
pub struct BgzfChunkReader<R> {
reader: R,
chunks: vec::IntoIter<VirtualRange>,
state: State,
}
enum State {
Seek,
Read(VirtualPosition),
Done,
}
impl<R> BgzfChunkReader<R>
where
R: noodles::bgzf::io::BufRead + noodles::bgzf::io::Seek,
{
pub fn new(reader: R, chunks: Vec<VirtualRange>) -> Self {
Self {
reader,
chunks: chunks.into_iter(),
state: State::Seek,
}
}
}
impl<R> Read for BgzfChunkReader<R>
where
R: noodles::bgzf::io::BufRead + noodles::bgzf::io::Seek,
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut src = self.fill_buf()?;
let amt = src.read(buf)?;
self.consume(amt);
Ok(amt)
}
}
impl<R> BufRead for BgzfChunkReader<R>
where
R: noodles::bgzf::io::BufRead + noodles::bgzf::io::Seek,
{
fn fill_buf(&mut self) -> io::Result<&[u8]> {
loop {
match self.state {
State::Seek => {
self.state = match self.chunks.next() {
Some((start, end)) => {
self.reader.seek_to_virtual_position(start)?;
State::Read(end)
}
None => State::Done,
}
}
State::Read(chunk_end) => {
if self.reader.virtual_position() < chunk_end {
return self.reader.fill_buf();
} else {
self.state = State::Seek;
}
}
State::Done => return Ok(&[]),
}
}
}
fn consume(&mut self, amt: usize) {
self.reader.consume(amt);
}
}
pub struct ByteRangeReader<R> {
reader: R,
ranges: vec::IntoIter<(u64, u64)>,
state: ByteRangeState,
current_pos: u64,
}
enum ByteRangeState {
Seek,
Read(u64), Done,
}
impl<R> ByteRangeReader<R>
where
R: BufRead + Seek,
{
pub fn new(reader: R, ranges: Vec<(u64, u64)>) -> Self {
Self {
reader,
ranges: ranges.into_iter(),
state: ByteRangeState::Seek,
current_pos: 0,
}
}
}
impl<R> Read for ByteRangeReader<R>
where
R: BufRead + Seek,
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut src = self.fill_buf()?;
let amt = src.read(buf)?;
self.consume(amt);
Ok(amt)
}
}
impl<R> BufRead for ByteRangeReader<R>
where
R: BufRead + Seek,
{
fn fill_buf(&mut self) -> io::Result<&[u8]> {
loop {
match self.state {
ByteRangeState::Seek => match self.ranges.next() {
Some((start, end)) => {
self.reader.seek(io::SeekFrom::Start(start))?;
self.current_pos = start;
self.state = ByteRangeState::Read(end);
}
None => {
self.state = ByteRangeState::Done;
return Ok(&[]);
}
},
ByteRangeState::Read(range_end) => {
let remaining = range_end.saturating_sub(self.current_pos) as usize;
if remaining == 0 {
self.state = ByteRangeState::Seek;
continue;
}
let buf = self.reader.fill_buf()?;
let available = buf.len().min(remaining);
return Ok(&buf[..available]);
}
ByteRangeState::Done => return Ok(&[]),
}
}
}
fn consume(&mut self, amt: usize) {
self.reader.consume(amt);
self.current_pos += amt as u64;
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_byte_range_reader_single_range() {
let data = b"0123456789ABCDEFGHIJ";
let cursor = Cursor::new(data);
let ranges = vec![(5, 10)]; let mut reader = ByteRangeReader::new(cursor, ranges);
let mut buf = Vec::new();
reader.read_to_end(&mut buf).unwrap();
assert_eq!(buf, b"56789");
}
#[test]
fn test_byte_range_reader_multiple_ranges() {
let data = b"0123456789ABCDEFGHIJ";
let cursor = Cursor::new(data);
let ranges = vec![
(0, 3), (5, 8), (15, 20), ];
let mut reader = ByteRangeReader::new(cursor, ranges);
let mut buf = Vec::new();
reader.read_to_end(&mut buf).unwrap();
assert_eq!(buf, b"012567FGHIJ");
}
#[test]
fn test_byte_range_reader_empty_ranges() {
let data = b"0123456789";
let cursor = Cursor::new(data);
let ranges = vec![];
let mut reader = ByteRangeReader::new(cursor, ranges);
let mut buf = Vec::new();
reader.read_to_end(&mut buf).unwrap();
assert_eq!(buf, b"");
}
#[test]
fn test_byte_range_reader_zero_length_range() {
let data = b"0123456789";
let cursor = Cursor::new(data);
let ranges = vec![(5, 5)]; let mut reader = ByteRangeReader::new(cursor, ranges);
let mut buf = Vec::new();
reader.read_to_end(&mut buf).unwrap();
assert_eq!(buf, b"");
}
#[test]
fn test_byte_range_reader_full_file() {
let data = b"0123456789";
let cursor = Cursor::new(data);
let ranges = vec![(0, 10)]; let mut reader = ByteRangeReader::new(cursor, ranges);
let mut buf = Vec::new();
reader.read_to_end(&mut buf).unwrap();
assert_eq!(buf, b"0123456789");
}
#[test]
fn test_byte_range_reader_with_bufread() {
let data = b"0123456789ABCDEFGHIJ";
let cursor = Cursor::new(data);
let ranges = vec![(2, 7), (10, 15)]; let mut reader = ByteRangeReader::new(cursor, ranges);
let mut line = Vec::new();
reader.read_until(b'E', &mut line).unwrap();
assert_eq!(line, b"23456ABCDE");
}
}