Struct ParMultiGzipReader

Source

pub struct ParMultiGzipReader<R>where
    R: Read,
{ /* private fields */ }

Expand description

This reader facilitates parallel decompression of BCF data compressed in the BGZF format—a specialized version of the multi-member gzip file format. It utilizes internal buffers to sequentially ingest compressed data from various gzip blocks, leveraging the rayon crate to achieve concurrent decompression. This design addresses the potential bottleneck in data processing speed that occurs when decompression is not executed in parallel, ensuring more efficient handling of compressed data streams. Example:

use bcf_reader::*;
use std::fs::File;
use std::io::BufReader;
use std::io::Write;
// read data generated by bcftools
// bcftools query -f '[\t%GT]\n' test.bcf | bgzip -c > test_gt.gz
let mut gt_str = String::new();
smart_reader("testdata/test_gt.gz")
    .unwrap()
    .read_to_string(&mut gt_str)
    .unwrap();
// read data via bcf-reader
let max_gzip_block_in_buffer = 10;
let reader = File::open("testdata/test.bcf").map(BufReader::new).unwrap();
let mut f =
    ParMultiGzipReader::from_reader(reader, max_gzip_block_in_buffer, None, None).unwrap();
let s = read_header(&mut f).unwrap();
let header = Header::from_string(&s).unwrap();
let mut record = Record::default();
let mut gt_str2 = Vec::<u8>::new();
while let Ok(_) = record.read(&mut f) {
    for (i, bn) in record.fmt_gt(&header).enumerate() {
        let bn = bn.unwrap();
        let (noploidy, dot, phased, allele) = bn.gt_val();
        assert_eq!(noploidy, false); // missing ploidy
        let mut sep = '\t';
        if i % 2 == 1 {
            if phased {
                sep = '|';
            } else {
                sep = '/';
            }
        }
        if dot {
            write!(gt_str2, "{sep}.").unwrap();
        } else {
            write!(gt_str2, "{sep}{allele}").unwrap();
        }
    }
    write!(gt_str2, "\n").unwrap();
}
let gt_str2 = String::from_utf8(gt_str2).unwrap();
// compare bcftools results and bcf-reader results
for (a, b) in gt_str
    .split(|c| (c == '\n') || (c == '\t'))
    .zip(gt_str2.split(|c| (c == '\n') || (c == '\t')))
{
    assert_eq!(a, b);
}

See ParMultiGzipReader::from_reader for an example to jump to a target genome interval.

ParMultiGzipReader

Struct ParMultiGzipReader Copy item path

Implementations§

impl<R> ParMultiGzipReader<R>where R: Read,

pub fn from_reader( reader: R, ngzip_max: usize, coffset: Option<u64>, uoffset: Option<u64>, ) -> Result<Self>

§Examples

§Parameters

§Returns

pub fn get_coffset_uoffset(&self) -> (u64, u64)

Trait Implementations§

impl<R> Read for ParMultiGzipReader<R>where R: Read,

fn read(&mut self, buf: &mut [u8]) -> Result<usize>

fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> Result<usize, Error>

fn is_read_vectored(&self) -> bool

fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize, Error>

fn read_to_string(&mut self, buf: &mut String) -> Result<usize, Error>

fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), Error>

fn read_buf(&mut self, buf: BorrowedCursor<'_>) -> Result<(), Error>

fn read_buf_exact(&mut self, cursor: BorrowedCursor<'_>) -> Result<(), Error>

fn by_ref(&mut self) -> &mut Selfwhere Self: Sized,

fn bytes(self) -> Bytes<Self>where Self: Sized,

fn chain<R>(self, next: R) -> Chain<Self, R>where R: Read, Self: Sized,

fn take(self, limit: u64) -> Take<Self>where Self: Sized,

fn read_array<const N: usize>(&mut self) -> Result<[u8; N], Error>where Self: Sized,

Auto Trait Implementations§

impl<R> Freeze for ParMultiGzipReader<R>where R: Freeze,

impl<R> RefUnwindSafe for ParMultiGzipReader<R>where R: RefUnwindSafe,

impl<R> Send for ParMultiGzipReader<R>where R: Send,

impl<R> Sync for ParMultiGzipReader<R>where R: Sync,

impl<R> Unpin for ParMultiGzipReader<R>where R: Unpin,

impl<R> UnwindSafe for ParMultiGzipReader<R>where R: UnwindSafe,

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<R> ReadBytesExt for Rwhere R: Read + ?Sized,

fn read_u8(&mut self) -> Result<u8, Error>

fn read_i8(&mut self) -> Result<i8, Error>

fn read_u16<T>(&mut self) -> Result<u16, Error>where T: ByteOrder,

fn read_i16<T>(&mut self) -> Result<i16, Error>where T: ByteOrder,

fn read_u24<T>(&mut self) -> Result<u32, Error>where T: ByteOrder,

fn read_i24<T>(&mut self) -> Result<i32, Error>where T: ByteOrder,

fn read_u32<T>(&mut self) -> Result<u32, Error>where T: ByteOrder,

fn read_i32<T>(&mut self) -> Result<i32, Error>where T: ByteOrder,

fn read_u48<T>(&mut self) -> Result<u64, Error>where T: ByteOrder,

fn read_i48<T>(&mut self) -> Result<i64, Error>where T: ByteOrder,

fn read_u64<T>(&mut self) -> Result<u64, Error>where T: ByteOrder,

fn read_i64<T>(&mut self) -> Result<i64, Error>where T: ByteOrder,

fn read_u128<T>(&mut self) -> Result<u128, Error>where T: ByteOrder,

fn read_i128<T>(&mut self) -> Result<i128, Error>where T: ByteOrder,

fn read_uint<T>(&mut self, nbytes: usize) -> Result<u64, Error>where T: ByteOrder,

fn read_int<T>(&mut self, nbytes: usize) -> Result<i64, Error>where T: ByteOrder,

fn read_uint128<T>(&mut self, nbytes: usize) -> Result<u128, Error>where T: ByteOrder,

fn read_int128<T>(&mut self, nbytes: usize) -> Result<i128, Error>where T: ByteOrder,

fn read_f32<T>(&mut self) -> Result<f32, Error>where T: ByteOrder,

fn read_f64<T>(&mut self) -> Result<f64, Error>where T: ByteOrder,

fn read_u16_into<T>(&mut self, dst: &mut [u16]) -> Result<(), Error>where T: ByteOrder,

fn read_u32_into<T>(&mut self, dst: &mut [u32]) -> Result<(), Error>where T: ByteOrder,

fn read_u64_into<T>(&mut self, dst: &mut [u64]) -> Result<(), Error>where T: ByteOrder,

fn read_u128_into<T>(&mut self, dst: &mut [u128]) -> Result<(), Error>where T: ByteOrder,

fn read_i8_into(&mut self, dst: &mut [i8]) -> Result<(), Error>

fn read_i16_into<T>(&mut self, dst: &mut [i16]) -> Result<(), Error>where T: ByteOrder,

fn read_i32_into<T>(&mut self, dst: &mut [i32]) -> Result<(), Error>where T: ByteOrder,

Struct ParMultiGzipReader

impl<R> ParMultiGzipReader<R>
where R: Read,

impl<R> Read for ParMultiGzipReader<R>
where R: Read,

fn by_ref(&mut self) -> &mut Self
where Self: Sized,

fn bytes(self) -> Bytes<Self>
where Self: Sized,

fn chain<R>(self, next: R) -> Chain<Self, R>
where R: Read, Self: Sized,

fn take(self, limit: u64) -> Take<Self>
where Self: Sized,

fn read_array<const N: usize>(&mut self) -> Result<[u8; N], Error>
where Self: Sized,

impl<R> Freeze for ParMultiGzipReader<R>
where R: Freeze,

impl<R> RefUnwindSafe for ParMultiGzipReader<R>
where R: RefUnwindSafe,

impl<R> Send for ParMultiGzipReader<R>
where R: Send,

impl<R> Sync for ParMultiGzipReader<R>
where R: Sync,

impl<R> Unpin for ParMultiGzipReader<R>
where R: Unpin,

impl<R> UnwindSafe for ParMultiGzipReader<R>
where R: UnwindSafe,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<R> ReadBytesExt for R
where R: Read + ?Sized,

fn read_u16<T>(&mut self) -> Result<u16, Error>
where T: ByteOrder,

fn read_i16<T>(&mut self) -> Result<i16, Error>
where T: ByteOrder,

fn read_u24<T>(&mut self) -> Result<u32, Error>
where T: ByteOrder,

fn read_i24<T>(&mut self) -> Result<i32, Error>
where T: ByteOrder,

fn read_u32<T>(&mut self) -> Result<u32, Error>
where T: ByteOrder,

fn read_i32<T>(&mut self) -> Result<i32, Error>
where T: ByteOrder,

fn read_u48<T>(&mut self) -> Result<u64, Error>
where T: ByteOrder,

fn read_i48<T>(&mut self) -> Result<i64, Error>
where T: ByteOrder,

fn read_u64<T>(&mut self) -> Result<u64, Error>
where T: ByteOrder,

fn read_i64<T>(&mut self) -> Result<i64, Error>
where T: ByteOrder,

fn read_u128<T>(&mut self) -> Result<u128, Error>
where T: ByteOrder,

fn read_i128<T>(&mut self) -> Result<i128, Error>
where T: ByteOrder,

fn read_uint<T>(&mut self, nbytes: usize) -> Result<u64, Error>
where T: ByteOrder,

fn read_int<T>(&mut self, nbytes: usize) -> Result<i64, Error>
where T: ByteOrder,

fn read_uint128<T>(&mut self, nbytes: usize) -> Result<u128, Error>
where T: ByteOrder,

fn read_int128<T>(&mut self, nbytes: usize) -> Result<i128, Error>
where T: ByteOrder,

fn read_f32<T>(&mut self) -> Result<f32, Error>
where T: ByteOrder,

fn read_f64<T>(&mut self) -> Result<f64, Error>
where T: ByteOrder,

fn read_u16_into<T>(&mut self, dst: &mut [u16]) -> Result<(), Error>
where T: ByteOrder,

fn read_u32_into<T>(&mut self, dst: &mut [u32]) -> Result<(), Error>
where T: ByteOrder,

fn read_u64_into<T>(&mut self, dst: &mut [u64]) -> Result<(), Error>
where T: ByteOrder,

fn read_u128_into<T>(&mut self, dst: &mut [u128]) -> Result<(), Error>
where T: ByteOrder,

fn read_i16_into<T>(&mut self, dst: &mut [i16]) -> Result<(), Error>
where T: ByteOrder,

fn read_i32_into<T>(&mut self, dst: &mut [i32]) -> Result<(), Error>
where T: ByteOrder,

fn read_i64_into<T>(&mut self, dst: &mut [i64]) -> Result<(), Error>
where T: ByteOrder,

fn read_i128_into<T>(&mut self, dst: &mut [i128]) -> Result<(), Error>
where T: ByteOrder,

fn read_f32_into<T>(&mut self, dst: &mut [f32]) -> Result<(), Error>
where T: ByteOrder,

fn read_f32_into_unchecked<T>(&mut self, dst: &mut [f32]) -> Result<(), Error>
where T: ByteOrder,

fn read_f64_into<T>(&mut self, dst: &mut [f64]) -> Result<(), Error>
where T: ByteOrder,

fn read_f64_into_unchecked<T>(&mut self, dst: &mut [f64]) -> Result<(), Error>
where T: ByteOrder,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,