#[cfg(feature = "gzip")]
use std::io::BufReader;
#[cfg(any(feature = "gzip", not(feature = "mmap")))]
use std::io::Read;
use std::marker::PhantomData;
use std::path::Path;
use std::sync::Arc;
#[cfg(not(feature = "gzip"))]
use crate::io::storage::gzip_feature_error;
use crate::io::storage::{ByteSlice, SharedBytes, is_gz_path};
use crate::model::block::{BlockSlice, Blocks};
use crate::model::psl::{Psl, PslxSeq};
use crate::{PslError, parser::parse_psl_sequential};
#[cfg(feature = "parallel")]
use crate::parser::parse_psl_parallel;
#[cfg(feature = "parallel")]
use rayon::prelude::*;
#[cfg(feature = "gzip")]
use flate2::read::MultiGzDecoder;
#[cfg(feature = "mmap")]
use memmap2::MmapOptions;
#[derive(Debug, Clone, Copy, Default)]
pub struct ReaderOptions {
pub mrna: bool,
}
#[derive(Debug)]
pub struct Reader<T = Psl> {
_bytes: SharedBytes,
_blocks: Arc<Blocks>,
records: Vec<Psl>,
options: ReaderOptions,
_marker: PhantomData<T>,
}
impl Reader<Psl> {
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, PslError> {
Self::from_path_with(path, ReaderOptions::default())
}
pub fn from_path_with<P: AsRef<Path>>(
path: P,
options: ReaderOptions,
) -> Result<Self, PslError> {
let path = path.as_ref();
if is_gz_path(path) {
#[cfg(feature = "gzip")]
{
let buffer = read_gz(path)?;
return Self::build(
SharedBytes::from_owned(buffer),
ParseStrategy::Sequential,
options,
);
}
#[cfg(not(feature = "gzip"))]
{
return Err(gzip_feature_error());
}
}
#[cfg(feature = "mmap")]
{
let file = std::fs::File::open(path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
Self::build(
SharedBytes::from_mmap(mmap),
ParseStrategy::Sequential,
options,
)
}
#[cfg(not(feature = "mmap"))]
{
let mut data = Vec::new();
std::fs::File::open(path)?.read_to_end(&mut data)?;
Self::build(
SharedBytes::from_owned(data),
ParseStrategy::Sequential,
options,
)
}
}
#[cfg(feature = "mmap")]
pub fn from_mmap<P: AsRef<Path>>(path: P) -> Result<Self, PslError> {
let file = std::fs::File::open(path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
Self::build(
SharedBytes::from_mmap(mmap),
ParseStrategy::Sequential,
ReaderOptions::default(),
)
}
#[cfg(feature = "parallel")]
pub fn from_path_parallel<P: AsRef<Path>>(path: P) -> Result<Self, PslError> {
let path = path.as_ref();
if is_gz_path(path) {
#[cfg(feature = "gzip")]
{
let buffer = read_gz(path)?;
return Self::build(
SharedBytes::from_owned(buffer),
ParseStrategy::Parallel,
ReaderOptions::default(),
);
}
#[cfg(not(feature = "gzip"))]
{
return Err(gzip_feature_error());
}
}
#[cfg(feature = "mmap")]
{
let file = std::fs::File::open(path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
Self::build(
SharedBytes::from_mmap(mmap),
ParseStrategy::Parallel,
ReaderOptions::default(),
)
}
#[cfg(not(feature = "mmap"))]
{
let mut data = Vec::new();
std::fs::File::open(path)?.read_to_end(&mut data)?;
Self::build(
SharedBytes::from_owned(data),
ParseStrategy::Parallel,
ReaderOptions::default(),
)
}
}
pub fn from_owned_bytes(data: Vec<u8>) -> Result<Self, PslError> {
Self::build(
SharedBytes::from_owned(data),
ParseStrategy::Sequential,
ReaderOptions::default(),
)
}
#[cfg(feature = "parallel")]
pub fn from_owned_bytes_parallel(data: Vec<u8>) -> Result<Self, PslError> {
Self::build(
SharedBytes::from_owned(data),
ParseStrategy::Parallel,
ReaderOptions::default(),
)
}
fn build(
bytes: SharedBytes,
strategy: ParseStrategy,
options: ReaderOptions,
) -> Result<Self, PslError> {
let buf = bytes.as_slice();
let (metas, blocks) = match strategy {
ParseStrategy::Sequential => parse_psl_sequential(buf)?,
#[cfg(feature = "parallel")]
ParseStrategy::Parallel => parse_psl_parallel(buf)?,
};
let blocks_arc = Arc::new(blocks);
let records = metas
.into_iter()
.map(|meta| Psl {
matches: meta.matches,
mismatches: meta.mismatches,
rep_matches: meta.rep_matches,
n_count: meta.n_count,
query_num_insert: meta.query_num_insert,
query_base_insert: meta.query_base_insert,
reference_num_insert: meta.reference_num_insert,
reference_base_insert: meta.reference_base_insert,
query_size: meta.query_size,
query_start: meta.query_start,
query_end: meta.query_end,
reference_size: meta.reference_size,
reference_start: meta.reference_start,
reference_end: meta.reference_end,
strands: meta.strands,
query_name: ByteSlice::new(bytes.clone(), meta.query_name),
reference_name: ByteSlice::new(bytes.clone(), meta.reference_name),
blocks: BlockSlice::new(blocks_arc.clone(), meta.blocks),
seq: meta.seq.map(|(query, reference)| PslxSeq {
query_seq: ByteSlice::new(bytes.clone(), query),
reference_seq: ByteSlice::new(bytes.clone(), reference),
}),
})
.collect();
Ok(Reader {
_bytes: bytes,
_blocks: blocks_arc,
records,
options,
_marker: PhantomData,
})
}
pub fn records(&self) -> impl Iterator<Item = &Psl> {
self.records.iter()
}
pub fn as_slice(&self) -> &[Psl] {
&self.records
}
#[cfg(feature = "parallel")]
pub fn par_records(&self) -> impl ParallelIterator<Item = &Psl> {
self.records.par_iter()
}
pub fn options(&self) -> ReaderOptions {
self.options
}
pub fn len(&self) -> usize {
self.records.len()
}
pub fn is_empty(&self) -> bool {
self.records.is_empty()
}
}
#[cfg(feature = "gzip")]
fn read_gz(path: &Path) -> Result<Vec<u8>, PslError> {
let file = std::fs::File::open(path)?;
let reader = BufReader::new(file);
let mut decoder = MultiGzDecoder::new(reader);
let mut buffer = Vec::new();
decoder.read_to_end(&mut buffer)?;
Ok(buffer)
}
#[derive(Clone, Copy)]
enum ParseStrategy {
Sequential,
#[cfg(feature = "parallel")]
Parallel,
}