use std::io::{self, Read};
use crate::constants::*;
use crate::crc::crc;
use crate::decode::decode;
pub struct Reader<R: Read> {
reader: R,
buf: Vec<u8>,
pos: usize,
read_header: bool,
eof: bool,
}
impl<R: Read> Reader<R> {
pub fn new(reader: R) -> Self {
Reader {
reader,
buf: Vec::new(),
pos: 0,
read_header: false,
eof: false,
}
}
fn read_stream_identifier(&mut self) -> io::Result<()> {
let mut magic = [0u8; MAGIC_CHUNK.len()];
self.reader.read_exact(&mut magic)?;
if magic == *MAGIC_CHUNK || magic == *MAGIC_CHUNK_SNAPPY {
Ok(())
} else {
Err(io::Error::new(
io::ErrorKind::InvalidData,
"invalid stream identifier",
))
}
}
fn read_chunk(&mut self) -> io::Result<bool> {
let mut header = [0u8; 4];
match self.reader.read_exact(&mut header) {
Ok(()) => {}
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
self.eof = true;
return Ok(false);
}
Err(e) => return Err(e),
}
let chunk_type = header[0];
let chunk_len = u32::from_le_bytes([header[1], header[2], header[3], 0]) as usize;
match chunk_type {
CHUNK_TYPE_COMPRESSED_DATA => {
self.read_compressed_chunk(chunk_len)?;
Ok(true)
}
CHUNK_TYPE_UNCOMPRESSED_DATA => {
self.read_uncompressed_chunk(chunk_len)?;
Ok(true)
}
CHUNK_TYPE_PADDING | CHUNK_TYPE_INDEX => {
self.skip_chunk(chunk_len)?;
self.read_chunk()
}
CHUNK_TYPE_STREAM_IDENTIFIER => {
self.skip_chunk(chunk_len)?;
self.read_chunk()
}
0x80..=0xfd => {
self.skip_chunk(chunk_len)?;
self.read_chunk()
}
_ => Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("unknown chunk type: 0x{:02x}", chunk_type),
)),
}
}
fn read_compressed_chunk(&mut self, chunk_len: usize) -> io::Result<()> {
if chunk_len < CHECKSUM_SIZE {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"chunk too small",
));
}
let mut checksum_bytes = [0u8; 4];
self.reader.read_exact(&mut checksum_bytes)?;
let expected_crc = u32::from_le_bytes(checksum_bytes);
let data_len = chunk_len - CHECKSUM_SIZE;
let mut compressed = vec![0u8; data_len];
self.reader.read_exact(&mut compressed)?;
let decompressed = decode(&compressed).map_err(|e| {
io::Error::new(io::ErrorKind::InvalidData, format!("decode error: {}", e))
})?;
let actual_crc = crc(&decompressed);
if actual_crc != expected_crc {
return Err(io::Error::new(io::ErrorKind::InvalidData, "CRC mismatch"));
}
self.buf.extend_from_slice(&decompressed);
Ok(())
}
fn read_uncompressed_chunk(&mut self, chunk_len: usize) -> io::Result<()> {
if chunk_len < CHECKSUM_SIZE {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"chunk too small",
));
}
let mut checksum_bytes = [0u8; 4];
self.reader.read_exact(&mut checksum_bytes)?;
let expected_crc = u32::from_le_bytes(checksum_bytes);
let data_len = chunk_len - CHECKSUM_SIZE;
let mut data = vec![0u8; data_len];
self.reader.read_exact(&mut data)?;
let actual_crc = crc(&data);
if actual_crc != expected_crc {
return Err(io::Error::new(io::ErrorKind::InvalidData, "CRC mismatch"));
}
self.buf.extend_from_slice(&data);
Ok(())
}
fn skip_chunk(&mut self, chunk_len: usize) -> io::Result<()> {
let mut discard = vec![0u8; chunk_len];
self.reader.read_exact(&mut discard)?;
Ok(())
}
pub fn reset(&mut self, reader: R) -> R {
self.buf.clear();
self.pos = 0;
self.read_header = false;
self.eof = false;
std::mem::replace(&mut self.reader, reader)
}
pub fn get_ref(&self) -> &R {
&self.reader
}
pub fn get_mut(&mut self) -> &mut R {
&mut self.reader
}
}
impl<R: Read> Read for Reader<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if !self.read_header {
self.read_stream_identifier()?;
self.read_header = true;
}
while self.pos >= self.buf.len() && !self.eof {
self.buf.clear();
self.pos = 0;
if !self.read_chunk()? {
break;
}
}
let available = self.buf.len() - self.pos;
if available == 0 {
return Ok(0); }
let to_copy = available.min(buf.len());
buf[..to_copy].copy_from_slice(&self.buf[self.pos..self.pos + to_copy]);
self.pos += to_copy;
Ok(to_copy)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Writer;
use std::io::Write;
#[test]
fn test_reader_basic() {
let mut compressed = Vec::new();
{
let mut writer = Writer::new(&mut compressed);
writer.write_all(b"Hello, World!").unwrap();
writer.flush().unwrap();
}
let mut reader = Reader::new(&compressed[..]);
let mut decompressed = Vec::new();
reader.read_to_end(&mut decompressed).unwrap();
assert_eq!(decompressed, b"Hello, World!");
}
#[test]
fn test_reader_empty() {
let mut compressed = Vec::new();
{
let _writer = Writer::new(&mut compressed);
}
let mut reader = Reader::new(&compressed[..]);
let mut decompressed = Vec::new();
let result = reader.read_to_end(&mut decompressed);
assert!(result.is_err() || decompressed.is_empty());
}
#[test]
fn test_reader_large() {
let data = vec![b'A'; 100000];
let mut compressed = Vec::new();
{
let mut writer = Writer::new(&mut compressed);
writer.write_all(&data).unwrap();
writer.flush().unwrap();
}
let mut reader = Reader::new(&compressed[..]);
let mut decompressed = Vec::new();
reader.read_to_end(&mut decompressed).unwrap();
assert_eq!(decompressed, data);
}
#[test]
fn test_reader_multiple_chunks() {
let data1 = b"First chunk of data";
let data2 = b"Second chunk of data";
let mut compressed = Vec::new();
{
let mut writer = Writer::with_block_size(&mut compressed, 16);
writer.write_all(data1).unwrap();
writer.write_all(data2).unwrap();
writer.flush().unwrap();
}
let mut reader = Reader::new(&compressed[..]);
let mut decompressed = Vec::new();
reader.read_to_end(&mut decompressed).unwrap();
let mut expected = Vec::new();
expected.extend_from_slice(data1);
expected.extend_from_slice(data2);
assert_eq!(decompressed, expected);
}
}