use crate::compress::flate;
use crate::encoding::binary::{ByteOrder, LITTLE_ENDIAN};
use crate::errors;
use crate::hash::crc32;
use crate::io as ggio;
use crate::time;
pub(super) const GZIP_ID1: u8 = 0x1f;
const GZIP_ID2: u8 = 0x8b;
const GZIP_DEFLATE: u8 = 8;
const _FLAG_TEXT: u8 = 1 << 0;
const FLAG_HDR_CRC: u8 = 1 << 1;
const FLAG_EXTRA: u8 = 1 << 2;
const FLAG_NAME: u8 = 1 << 3;
const FLAG_COMMENT: u8 = 1 << 4;
pub const ERR_CHECKSUM_MSG: &str = "gzip: invalid checksum";
pub const ERR_INVALID_HEADER: &str = "gzip: invalid header";
fn get_err_checksum() -> std::io::Error {
errors::new_stdio_other_error(ERR_CHECKSUM_MSG.to_string())
}
fn get_err_invalid_header() -> std::io::Error {
errors::new_stdio_other_error(ERR_INVALID_HEADER.to_string())
}
#[derive(Debug)]
pub struct Header {
pub comment: Option<String>, pub extra: Option<Vec<u8>>, pub mod_time: time::Time, pub name: Option<String>, pub os: u8, }
struct ReadState {
digest: u32, buf: Vec<u8>,
}
pub struct Reader<'a, Input: std::io::BufRead> {
pub header: Option<Header>, read_state: ReadState,
decompressor: flate::Reader<&'a mut Input>,
size: u32, err: Option<std::io::Error>,
multistream: bool,
}
impl<'a, Input: std::io::BufRead> Reader<'a, Input> {
pub fn new(r: &'a mut Input) -> std::io::Result<Self> {
let mut read_state = ReadState::new();
let header = read_state.read_header(r)?;
let decompressor = flate::Reader::new(r);
Ok(Self {
header,
decompressor,
read_state,
size: 0,
err: None,
multistream: true,
})
}
pub fn reset(&mut self, r: &'a mut Input) -> std::io::Result<()> {
self.read_state = ReadState::new();
self.header = self.read_state.read_header(r)?;
self.decompressor.reset(r, &[]);
self.multistream = true;
self.size = 0;
self.err = None;
Ok(())
}
pub fn reset_state(&mut self) -> std::io::Result<()> {
self.read_state = ReadState::new();
self.header = self
.read_state
.read_header(self.decompressor.input_reader())?;
self.decompressor.reset_state(&[]);
self.multistream = true;
self.size = 0;
self.err = None;
Ok(())
}
pub fn multistream(&mut self, ok: bool) {
self.multistream = ok;
}
pub fn close(&mut self) -> std::io::Result<()> {
self.decompressor.close()
}
pub fn is_eof(&self) -> bool {
self.header.is_none()
}
}
impl<Input: std::io::BufRead> crate::io::Reader for Reader<'_, Input> {
fn read(&mut self, p: &mut [u8]) -> ggio::IoRes {
let mut n = 0;
if self.err.is_some() {
return (0, errors::copy_stdio_option_error(&self.err));
}
while n == 0 {
if self.is_eof() {
return ggio::EOF;
}
let res = crate::io::Reader::read(&mut self.decompressor, p);
self.read_state.digest =
crc32::update(self.read_state.digest, &crc32::IEEE_TABLE, &p[..res.0]);
self.size += res.0 as u32;
if !ggio::is_eof(&res) {
return res;
}
(n, self.err) = res;
{
let mut buf = [0; 8];
if let Err(err) = self.decompressor.input_reader().read_exact(&mut buf) {
self.err = Some(err);
return (n, errors::copy_stdio_option_error(&self.err));
}
let digest = LITTLE_ENDIAN.uint32(&buf[..4]);
let size = LITTLE_ENDIAN.uint32(&buf[4..8]);
if digest != self.read_state.digest || size != self.size {
self.err = Some(get_err_checksum());
return (n, errors::copy_stdio_option_error(&self.err));
}
}
self.read_state.digest = 0;
self.size = 0;
if !self.multistream {
return (n, None);
}
self.err = None;
self.read_state = ReadState::new();
self.header = match self
.read_state
.read_header(self.decompressor.input_reader())
{
Ok(header) => header,
Err(err) => {
return (n, Some(err));
}
};
self.decompressor.reset_state(&[]);
}
(n, None)
}
}
impl<Input: std::io::BufRead> std::io::Read for Reader<'_, Input> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let res = crate::io::Reader::read(self, buf);
if res.0 > 0 {
Ok(res.0)
} else if ggio::is_eof(&res) {
return Ok(0);
} else {
return Err(res.1.unwrap());
}
}
}
impl ReadState {
fn new() -> Self {
Self {
digest: 0,
buf: vec![0; 512],
}
}
fn read_header<T: std::io::BufRead>(
&mut self,
r: &mut T,
) -> Result<Option<Header>, std::io::Error> {
let res = ggio::read_full(r, &mut self.buf[..10]);
if res.0 == 0 && ggio::is_unexpected_eof(&res) {
return Ok(None);
}
if let Some(err) = res.1 {
return Err(err);
}
if self.buf[0] != GZIP_ID1 || self.buf[1] != GZIP_ID2 || self.buf[2] != GZIP_DEFLATE {
return Err(get_err_invalid_header());
}
let flg = self.buf[3];
let t = LITTLE_ENDIAN.uint32(&self.buf[4..8]) as i64;
let mod_time = if t > 0 {
time::unix(t, 0)
} else {
time::Time::default()
};
let os = self.buf[9];
self.digest = crc32::checksum_ieee(&self.buf[..10]);
let extra_data: Option<Vec<u8>> = if flg & FLAG_EXTRA != 0 {
let mut buf = [0; 2];
r.read_exact(&mut buf)?;
self.digest = crc32::update(self.digest, &crc32::IEEE_TABLE, &buf[..2]);
let mut data = vec![0; LITTLE_ENDIAN.uint16(&buf[..2]) as usize];
r.read_exact(&mut data)?;
self.digest = crc32::update(self.digest, &crc32::IEEE_TABLE, &data);
Some(data)
} else {
None
};
let name = if flg & FLAG_NAME != 0 {
Some(self.read_string(r)?)
} else {
None
};
let comment = if flg & FLAG_COMMENT != 0 {
Some(self.read_string(r)?)
} else {
None
};
if flg & FLAG_HDR_CRC != 0 {
r.read_exact(&mut self.buf[..2])?;
let digest = LITTLE_ENDIAN.uint16(&self.buf[..2]);
if digest != self.digest as u16 {
return Err(get_err_invalid_header());
}
}
self.digest = 0;
Ok(Some(Header {
comment,
extra: extra_data,
mod_time,
name,
os,
}))
}
fn read_string<T: std::io::BufRead>(&mut self, r: &mut T) -> std::io::Result<String> {
let mut need_conv = false;
let mut i = 0;
loop {
if i >= self.buf.len() {
return Err(get_err_invalid_header());
}
r.read_exact(&mut self.buf[i..i + 1])?;
if self.buf[i] > 0x7f {
need_conv = true;
}
if self.buf[i] == 0 {
self.digest = crc32::update(self.digest, &crc32::IEEE_TABLE, &self.buf[..i + 1]);
if need_conv {
let mut s = String::new();
for ch in &self.buf[..i] {
s.push(*ch as char);
}
return Ok(s);
}
return Ok(String::from_utf8_lossy(&self.buf[..i]).to_string());
}
i += 1;
}
}
}