mod builder;
pub mod header;
pub(crate) mod query;
mod record;
pub(crate) mod record_buf;
mod record_bufs;
use std::{
io::{self, BufRead},
iter,
};
use noodles_bgzf as bgzf;
use noodles_core::Region;
use noodles_csi::BinningIndex;
pub(crate) use self::record::read_record;
pub use self::{builder::Builder, query::Query, record_bufs::RecordBufs};
use self::{header::read_header, record_buf::read_record_buf};
use crate::{Header, Record, alignment::RecordBuf, header::ReferenceSequences};
#[derive(Debug)]
pub struct Reader<R> {
inner: R,
buf: Vec<u8>,
}
impl<R> Reader<R> {
pub fn get_ref(&self) -> &R {
&self.inner
}
pub fn get_mut(&mut self) -> &mut R {
&mut self.inner
}
pub fn into_inner(self) -> R {
self.inner
}
}
impl<R> Reader<R>
where
R: BufRead,
{
pub fn new(inner: R) -> Self {
Self::from(inner)
}
pub fn header_reader(&mut self) -> header::Reader<&mut R> {
header::Reader::new(&mut self.inner)
}
pub fn read_header(&mut self) -> io::Result<Header> {
read_header(&mut self.inner)
}
pub fn read_record_buf(
&mut self,
header: &Header,
record: &mut RecordBuf,
) -> io::Result<usize> {
read_record_buf(&mut self.inner, &mut self.buf, header, record)
}
pub fn record_bufs<'a>(&'a mut self, header: &'a Header) -> RecordBufs<'a, R> {
RecordBufs::new(self, header)
}
pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
read_record(&mut self.inner, record)
}
pub fn records(&mut self) -> impl Iterator<Item = io::Result<Record>> {
let mut record = Record::default();
iter::from_fn(move || match self.read_record(&mut record) {
Ok(0) => None,
Ok(_) => Some(Ok(record.clone())),
Err(e) => Some(Err(e)),
})
}
}
impl<R> Reader<R>
where
R: bgzf::io::BufRead + bgzf::io::Seek,
{
fn seek_to_first_record(&mut self) -> io::Result<bgzf::VirtualPosition> {
self.get_mut()
.seek_to_virtual_position(bgzf::VirtualPosition::default())?;
self.read_header()?;
Ok(self.get_ref().virtual_position())
}
pub fn query<'r, 'h: 'r, I>(
&'r mut self,
header: &'h Header,
index: &I,
region: &Region,
) -> io::Result<Query<'r, 'h, R>>
where
I: BinningIndex,
{
let reference_sequence_id = resolve_region(header.reference_sequences(), region)?;
let chunks = index.query(reference_sequence_id, region.interval())?;
Ok(Query::new(
self.get_mut(),
chunks,
header,
reference_sequence_id,
region.interval(),
))
}
pub fn query_unmapped<'r, I>(
&'r mut self,
index: &I,
) -> io::Result<impl Iterator<Item = io::Result<Record>> + use<'r, I, R>>
where
I: BinningIndex,
{
if let Some(pos) = index.last_first_record_start_position() {
self.get_mut().seek_to_virtual_position(pos)?;
} else {
self.seek_to_first_record()?;
}
let mut record = Record::default();
Ok(iter::from_fn(move || {
loop {
match self.read_record(&mut record) {
Ok(0) => return None,
Ok(_) => {
let result = record.flags().map(|flags| flags.is_unmapped());
match result {
Ok(true) => return Some(Ok(record.clone())),
Ok(false) => {}
Err(e) => return Some(Err(e)),
}
}
Err(e) => return Some(Err(e)),
}
}
}))
}
}
impl<R> From<R> for Reader<R>
where
R: BufRead,
{
fn from(inner: R) -> Self {
Self {
inner,
buf: Vec::new(),
}
}
}
impl<R> crate::alignment::io::Read<R> for Reader<R>
where
R: BufRead,
{
fn read_alignment_header(&mut self) -> io::Result<Header> {
self.read_header()
}
fn alignment_records<'a>(
&'a mut self,
_header: &'a Header,
) -> Box<dyn Iterator<Item = io::Result<Box<dyn crate::alignment::Record>>> + 'a> {
Box::new(self.records().map(|result| {
result.map(|record| Box::new(record) as Box<dyn crate::alignment::Record>)
}))
}
}
fn read_line<R>(reader: &mut R, buf: &mut Vec<u8>) -> io::Result<usize>
where
R: BufRead,
{
const LINE_FEED: u8 = b'\n';
const CARRIAGE_RETURN: u8 = b'\r';
match reader.read_until(LINE_FEED, buf)? {
0 => Ok(0),
n => {
if buf.ends_with(&[LINE_FEED]) {
buf.pop();
if buf.ends_with(&[CARRIAGE_RETURN]) {
buf.pop();
}
}
Ok(n)
}
}
}
pub(crate) fn resolve_region(
reference_sequences: &ReferenceSequences,
region: &Region,
) -> io::Result<usize> {
reference_sequences
.get_index_of(region.name())
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidInput,
format!(
"region reference sequence does not exist in reference sequences: {region:?}"
),
)
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_line() -> io::Result<()> {
fn t(buf: &mut Vec<u8>, mut reader: &[u8], expected: &[u8]) -> io::Result<()> {
buf.clear();
read_line(&mut reader, buf)?;
assert_eq!(buf, expected);
Ok(())
}
let mut buf = Vec::new();
t(&mut buf, b"noodles\n", b"noodles")?;
t(&mut buf, b"noodles\r\n", b"noodles")?;
t(&mut buf, b"noodles", b"noodles")?;
Ok(())
}
}