use std::{io, path};
use flate2::bufread::MultiGzDecoder;
use std::io::prelude::*;
pub fn is_gzipped(header: &[u8]) -> bool {
header.starts_with(b"\x1f\x8b")
}
pub fn is_gzipped_extension(path: path::PathBuf) -> (bool, path::PathBuf) {
if let Some(ext) = path.extension() {
if ext.eq_ignore_ascii_case("gz") {
(true, path.with_extension(""))
} else {
(false, path)
}
} else {
(false, path)
}
}
pub struct RestartableGzDecoder<R: BufRead + Seek> {
handle: Option<MultiGzDecoder<R>>,
offset: u64,
}
impl<R: BufRead + Seek> RestartableGzDecoder<R> {
pub fn new(handle: R) -> Self {
Self {
handle: Some(MultiGzDecoder::new(handle)),
offset: 0,
}
}
fn reset(&mut self) -> io::Result<u64> {
let handle = self.handle.take().unwrap();
let mut inner = handle.into_inner();
let res = inner.seek(io::SeekFrom::Start(0));
self.handle = Some(MultiGzDecoder::new(inner));
self.offset = 0;
res
}
}
impl<R: BufRead + Seek> Read for RestartableGzDecoder<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let handle = self.handle.as_mut().unwrap();
match handle.read(buf) {
Ok(b) => {
self.offset += b as u64;
Ok(b)
}
Err(e) => Err(e),
}
}
}
impl<R: BufRead + Seek> Seek for RestartableGzDecoder<R> {
fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
match pos {
io::SeekFrom::Start(o) => {
self.reset()?;
let mut buf = vec![0u8; o as usize];
self.read_exact(&mut buf)?;
Ok(o)
}
io::SeekFrom::End(_) => Err(io::Error::new(
io::ErrorKind::Unsupported,
"Cannot seek relative to end of a gzip stream",
)),
io::SeekFrom::Current(o) => match o {
0 => Ok(self.offset),
_ if o < 0 => {
if o.unsigned_abs() > self.offset {
Err(io::Error::new(
io::ErrorKind::Unsupported,
"Cannot earlier than the start of the stream",
))
} else {
self.seek(io::SeekFrom::Start((self.offset as i64 + o) as u64))
}
}
_ => {
let mut buf = vec![0; o as usize];
self.read_exact(&mut buf)?;
Ok(self.offset)
}
},
}
}
}
#[cfg(test)]
mod test {
use std::fs;
use super::*;
#[test]
fn exercise_restartable() -> io::Result<()> {
let handle = io::BufReader::new(fs::File::open("test/data/small.mzML.gz")?);
let mut reader = RestartableGzDecoder::new(handle);
reader.seek(io::SeekFrom::Current(5113415))?;
let mut buf = String::new();
reader.read_to_string(&mut buf)?;
assert!(buf.starts_with("<indexList count=\"2\">"));
reader.seek(io::SeekFrom::Start(5145))?;
buf.clear();
io::BufReader::new(&mut reader).read_line(&mut buf)?;
assert!(buf.contains("controllerType=0 controllerNumber=1 scan=1"));
Ok(())
}
}