#[cfg(feature = "gzip")]
use std::io::BufReader;
#[cfg(any(feature = "gzip", not(feature = "mmap")))]
use std::io::Read;
use std::marker::PhantomData;
use std::path::Path;
use std::sync::Arc;
#[cfg(not(feature = "gzip"))]
use crate::io::storage::gzip_feature_error;
use crate::io::storage::{ByteSlice, SharedBytes, is_gz_path};
use crate::model::block::{Block, BlockSlice};
use crate::model::chain::Chain;
use crate::model::error::ChainError;
use crate::parser::parse_chains_sequential;
#[cfg(feature = "parallel")]
use crate::parser::parse_chains_parallel;
#[cfg(feature = "gzip")]
use flate2::read::MultiGzDecoder;
#[cfg(feature = "mmap")]
use memmap2::MmapOptions;
#[derive(Debug)]
pub struct Reader<T = Chain> {
_bytes: SharedBytes,
_blocks: Arc<Vec<Block>>,
chains: Vec<Chain>,
_marker: PhantomData<T>,
}
impl Reader<Chain> {
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, ChainError> {
let path = path.as_ref();
if is_gz_path(path) {
#[cfg(feature = "gzip")]
{
let file = std::fs::File::open(path)?;
let reader = BufReader::new(file);
let mut decoder = MultiGzDecoder::new(reader);
let mut buffer = Vec::new();
decoder.read_to_end(&mut buffer)?;
return Self::from_owned_bytes(buffer);
}
#[cfg(not(feature = "gzip"))]
{
return Err(gzip_feature_error());
}
}
#[cfg(feature = "mmap")]
{
Self::from_mmap(path)
}
#[cfg(not(feature = "mmap"))]
{
let mut data = Vec::new();
std::fs::File::open(path)?.read_to_end(&mut data)?;
Self::from_owned_bytes(data)
}
}
#[cfg(feature = "mmap")]
pub fn from_mmap<P: AsRef<Path>>(path: P) -> Result<Self, ChainError> {
let file = std::fs::File::open(path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
Self::build(SharedBytes::from_mmap(mmap), ParseStrategy::Sequential)
}
#[cfg(feature = "parallel")]
pub fn from_path_parallel<P: AsRef<Path>>(path: P) -> Result<Self, ChainError> {
let path = path.as_ref();
if is_gz_path(path) {
#[cfg(feature = "gzip")]
{
let file = std::fs::File::open(path)?;
let reader = BufReader::new(file);
let mut decoder = MultiGzDecoder::new(reader);
let mut buffer = Vec::new();
decoder.read_to_end(&mut buffer)?;
return Self::from_owned_bytes_parallel(buffer);
}
#[cfg(not(feature = "gzip"))]
{
return Err(gzip_feature_error());
}
}
#[cfg(feature = "mmap")]
{
let file = std::fs::File::open(path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
return Self::build(SharedBytes::from_mmap(mmap), ParseStrategy::Parallel);
}
#[cfg(not(feature = "mmap"))]
{
let mut data = Vec::new();
std::fs::File::open(path)?.read_to_end(&mut data)?;
Self::from_owned_bytes_parallel(data)
}
}
pub fn from_owned_bytes(data: Vec<u8>) -> Result<Self, ChainError> {
Self::build(SharedBytes::from_owned(data), ParseStrategy::Sequential)
}
#[cfg(feature = "parallel")]
pub fn from_owned_bytes_parallel(data: Vec<u8>) -> Result<Self, ChainError> {
Self::build(SharedBytes::from_owned(data), ParseStrategy::Parallel)
}
fn build(bytes: SharedBytes, strategy: ParseStrategy) -> Result<Self, ChainError> {
let buf = bytes.as_slice();
let (metas, blocks) = match strategy {
ParseStrategy::Sequential => parse_chains_sequential(buf)?,
#[cfg(feature = "parallel")]
ParseStrategy::Parallel => parse_chains_parallel(buf)?,
};
let blocks_arc: Arc<Vec<Block>> = Arc::new(blocks);
let chains = metas
.into_iter()
.map(|meta| Chain {
score: meta.score,
reference_name: ByteSlice::new(bytes.clone(), meta.reference_name),
reference_size: meta.reference_size,
reference_strand: meta.reference_strand,
reference_start: meta.reference_start,
reference_end: meta.reference_end,
query_name: ByteSlice::new(bytes.clone(), meta.query_name),
query_size: meta.query_size,
query_strand: meta.query_strand,
query_start: meta.query_start,
query_end: meta.query_end,
id: meta.id,
blocks: BlockSlice::new(blocks_arc.clone(), meta.blocks),
})
.collect();
Ok(Reader {
_bytes: bytes,
_blocks: blocks_arc,
chains,
_marker: PhantomData,
})
}
pub fn chains(&self) -> impl Iterator<Item = &Chain> {
self.chains.iter()
}
pub fn len(&self) -> usize {
self.chains.len()
}
pub fn is_empty(&self) -> bool {
self.chains.is_empty()
}
}
#[derive(Clone, Copy)]
enum ParseStrategy {
Sequential,
#[cfg(feature = "parallel")]
Parallel,
}