use anyhow::{anyhow, Result};
use flate2::read::MultiGzDecoder;
use std::fs::File;
use std::io::{BufRead, BufReader, Chain, Cursor, Read};
use std::path::Path;
type ChainReader = Chain<Cursor<Vec<u8>>, File>;
type GzipReader = BufReader<MultiGzDecoder<ChainReader>>;
type ZstdReader = BufReader<zstd::Decoder<'static, BufReader<ChainReader>>>;
type PlainReader = BufReader<ChainReader>;
pub enum DecompressionReader {
Gzip(GzipReader),
Zstd(ZstdReader),
Plain(PlainReader),
}
impl std::fmt::Debug for DecompressionReader {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DecompressionReader::Gzip(_) => write!(f, "DecompressionReader::Gzip"),
DecompressionReader::Zstd(_) => write!(f, "DecompressionReader::Zstd"),
DecompressionReader::Plain(_) => write!(f, "DecompressionReader::Plain"),
}
}
}
impl BufRead for DecompressionReader {
fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
match self {
DecompressionReader::Gzip(reader) => reader.fill_buf(),
DecompressionReader::Zstd(reader) => reader.fill_buf(),
DecompressionReader::Plain(reader) => reader.fill_buf(),
}
}
fn consume(&mut self, amt: usize) {
match self {
DecompressionReader::Gzip(reader) => reader.consume(amt),
DecompressionReader::Zstd(reader) => reader.consume(amt),
DecompressionReader::Plain(reader) => reader.consume(amt),
}
}
}
impl Read for DecompressionReader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
match self {
DecompressionReader::Gzip(reader) => reader.read(buf),
DecompressionReader::Zstd(reader) => reader.read(buf),
DecompressionReader::Plain(reader) => reader.read(buf),
}
}
}
fn detect_compression_file(mut file: File) -> std::io::Result<DecompressionReader> {
let mut head = [0u8; 4];
let n = file.read(&mut head)?;
let prefix = Cursor::new(head[..n].to_vec());
let chained = prefix.chain(file);
let is_gzip = n >= 3 && head[0] == 0x1F && head[1] == 0x8B && head[2] == 0x08;
let is_zstd =
n >= 4 && head[0] == 0x28 && head[1] == 0xB5 && head[2] == 0x2F && head[3] == 0xFD;
if is_gzip {
let decoder = MultiGzDecoder::new(chained);
Ok(DecompressionReader::Gzip(BufReader::new(decoder)))
} else if is_zstd {
let decoder = zstd::Decoder::new(chained)?;
Ok(DecompressionReader::Zstd(BufReader::new(decoder)))
} else {
Ok(DecompressionReader::Plain(BufReader::new(chained)))
}
}
pub fn maybe_decompress<R: Read + Send + 'static>(
mut reader: R,
) -> std::io::Result<Box<dyn Read + Send>> {
let mut head = [0u8; 4];
let n = reader.read(&mut head)?;
let prefix = Cursor::new(head[..n].to_vec());
let chained: Chain<Cursor<Vec<u8>>, R> = prefix.chain(reader);
let is_gzip = n >= 3 && head[0] == 0x1F && head[1] == 0x8B && head[2] == 0x08;
let is_zstd =
n >= 4 && head[0] == 0x28 && head[1] == 0xB5 && head[2] == 0x2F && head[3] == 0xFD;
if is_gzip {
Ok(Box::new(MultiGzDecoder::new(chained)))
} else if is_zstd {
Ok(Box::new(zstd::Decoder::new(chained)?))
} else {
Ok(Box::new(chained))
}
}
impl DecompressionReader {
pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
let path_ref = path.as_ref();
let file = File::open(path_ref)?;
if let Some(extension) = path_ref.extension().and_then(|ext| ext.to_str()) {
if extension.to_lowercase() == "zip" {
return Err(anyhow!("ZIP file decompression is not supported. Only gzip and zstd files are supported for streaming decompression. Extract the ZIP file first: unzip {}", path_ref.display()));
}
}
detect_compression_file(file)
.map_err(|e| anyhow!("Failed to detect compression format: {}", e))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{Read, Write};
use tempfile::NamedTempFile;
#[test]
fn test_plain_file_passthrough() -> Result<()> {
let mut temp_file = NamedTempFile::new()?;
writeln!(temp_file, "test line 1")?;
writeln!(temp_file, "test line 2")?;
temp_file.flush()?;
let mut reader = DecompressionReader::new(temp_file.path())?;
let mut content = String::new();
reader.read_to_string(&mut content)?;
assert!(content.contains("test line 1"));
assert!(content.contains("test line 2"));
Ok(())
}
#[test]
fn test_zip_file_rejection() {
let temp_file = NamedTempFile::new().unwrap();
let temp_path = temp_file.path();
let zip_path = temp_path.with_extension("zip");
std::fs::write(&zip_path, b"fake zip content").unwrap();
let result = DecompressionReader::new(&zip_path);
assert!(result.is_err());
let error_msg = result.unwrap_err().to_string();
assert!(error_msg.contains("ZIP file decompression is not supported"));
assert!(error_msg.contains("Only gzip and zstd files are supported"));
let _ = std::fs::remove_file(&zip_path);
}
#[test]
fn test_zstd_magic_bytes_detection() -> Result<()> {
use std::process::Command;
let mut temp_file = NamedTempFile::new()?;
writeln!(temp_file, "test line 1")?;
writeln!(temp_file, "test line 2")?;
writeln!(temp_file, "test line 3")?;
temp_file.flush()?;
let zstd_path = temp_file.path().with_extension("zst");
let compress_result = Command::new("zstd")
.arg("-q")
.arg("-f")
.arg(temp_file.path())
.arg("-o")
.arg(&zstd_path)
.status();
if compress_result.is_err() || !compress_result.unwrap().success() {
eprintln!("Skipping zstd test: zstd command not available");
return Ok(());
}
let mut reader = DecompressionReader::new(&zstd_path)?;
let mut content = String::new();
reader.read_to_string(&mut content)?;
assert!(content.contains("test line 1"));
assert!(content.contains("test line 2"));
assert!(content.contains("test line 3"));
let _ = std::fs::remove_file(&zstd_path);
Ok(())
}
#[test]
fn test_magic_bytes_detection() -> Result<()> {
let mut temp_file = NamedTempFile::new()?;
writeln!(temp_file, "plain text file")?;
temp_file.flush()?;
let mut reader = DecompressionReader::new(temp_file.path())?;
let mut content = String::new();
reader.read_to_string(&mut content)?;
assert!(content.contains("plain text file"));
let mut gzip_temp = NamedTempFile::new()?;
gzip_temp.write_all(&[0x1F, 0x8B, 0x08])?;
gzip_temp.write_all(b"fake gzip data")?;
gzip_temp.flush()?;
let result = DecompressionReader::new(gzip_temp.path());
match result {
Ok(_reader) => {
}
Err(_e) => {
}
}
Ok(())
}
#[test]
fn test_gzip_multiple_members() -> Result<()> {
use flate2::write::GzEncoder;
use flate2::Compression;
let mut temp_file = NamedTempFile::new()?;
let mut encoder1 = GzEncoder::new(Vec::new(), Compression::default());
encoder1.write_all(b"first member\n")?;
let compressed1 = encoder1.finish()?;
let mut encoder2 = GzEncoder::new(Vec::new(), Compression::default());
encoder2.write_all(b"second member\n")?;
let compressed2 = encoder2.finish()?;
temp_file.write_all(&compressed1)?;
temp_file.write_all(&compressed2)?;
temp_file.flush()?;
let mut reader = DecompressionReader::new(temp_file.path())?;
let mut content = String::new();
reader.read_to_string(&mut content)?;
assert!(content.contains("first member"));
assert!(content.contains("second member"));
Ok(())
}
#[test]
fn test_corrupted_gzip_data() {
use flate2::write::GzEncoder;
use flate2::Compression;
let mut temp_file = NamedTempFile::new().unwrap();
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(b"test data").unwrap();
let mut compressed = encoder.finish().unwrap();
if compressed.len() > 10 {
compressed[10] = !compressed[10]; compressed[11] = !compressed[11];
}
temp_file.write_all(&compressed).unwrap();
temp_file.flush().unwrap();
let result = DecompressionReader::new(temp_file.path());
if let Ok(mut reader) = result {
let mut content = String::new();
let read_result = reader.read_to_string(&mut content);
assert!(read_result.is_err() || content != "test data");
}
}
#[test]
fn test_empty_file() -> Result<()> {
let temp_file = NamedTempFile::new()?;
let mut reader = DecompressionReader::new(temp_file.path())?;
let mut content = String::new();
reader.read_to_string(&mut content)?;
assert_eq!(content, "");
Ok(())
}
#[test]
fn test_very_small_file() -> Result<()> {
let mut temp_file = NamedTempFile::new()?;
temp_file.write_all(b"x")?; temp_file.flush()?;
let mut reader = DecompressionReader::new(temp_file.path())?;
let mut content = String::new();
reader.read_to_string(&mut content)?;
assert_eq!(content, "x");
Ok(())
}
#[test]
fn test_file_with_only_partial_magic_bytes() -> Result<()> {
let mut temp_file = NamedTempFile::new()?;
temp_file.write_all(&[0x1F, 0x8B])?;
temp_file.flush()?;
let mut reader = DecompressionReader::new(temp_file.path())?;
let mut content = Vec::new();
reader.read_to_end(&mut content)?;
assert_eq!(content, vec![0x1F, 0x8B]);
Ok(())
}
#[test]
fn test_gzip_with_no_extension() -> Result<()> {
use flate2::write::GzEncoder;
use flate2::Compression;
let mut temp_file = NamedTempFile::new()?;
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(b"compressed content\n")?;
let compressed = encoder.finish()?;
temp_file.write_all(&compressed)?;
temp_file.flush()?;
let mut reader = DecompressionReader::new(temp_file.path())?;
let mut content = String::new();
reader.read_to_string(&mut content)?;
assert_eq!(content, "compressed content\n");
Ok(())
}
#[test]
fn test_decompression_reader_debug_impl() {
let mut temp_file = NamedTempFile::new().unwrap();
writeln!(temp_file, "test").unwrap();
temp_file.flush().unwrap();
let reader = DecompressionReader::new(temp_file.path()).unwrap();
let debug_str = format!("{:?}", reader);
assert!(
debug_str.contains("DecompressionReader::")
&& (debug_str.contains("Plain")
|| debug_str.contains("Gzip")
|| debug_str.contains("Zstd"))
);
}
#[test]
fn test_maybe_decompress_plain() -> Result<()> {
let data = b"plain text data";
let cursor = Cursor::new(data.to_vec());
let mut reader = maybe_decompress(cursor)?;
let mut content = Vec::new();
reader.read_to_end(&mut content)?;
assert_eq!(content, data);
Ok(())
}
#[test]
fn test_maybe_decompress_gzip() -> Result<()> {
use flate2::write::GzEncoder;
use flate2::Compression;
let original_data = b"test data for gzip";
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(original_data)?;
let compressed = encoder.finish()?;
let cursor = Cursor::new(compressed);
let mut reader = maybe_decompress(cursor)?;
let mut content = Vec::new();
reader.read_to_end(&mut content)?;
assert_eq!(content, original_data);
Ok(())
}
#[test]
fn test_bufread_methods() -> Result<()> {
use std::io::BufRead;
let mut temp_file = NamedTempFile::new()?;
writeln!(temp_file, "line1")?;
writeln!(temp_file, "line2")?;
writeln!(temp_file, "line3")?;
temp_file.flush()?;
let mut reader = DecompressionReader::new(temp_file.path())?;
let mut lines = Vec::new();
loop {
let mut line = String::new();
let bytes_read = reader.read_line(&mut line)?;
if bytes_read == 0 {
break;
}
lines.push(line);
}
assert_eq!(lines.len(), 3);
assert!(lines[0].contains("line1"));
assert!(lines[1].contains("line2"));
assert!(lines[2].contains("line3"));
Ok(())
}
#[test]
fn test_file_with_binary_data() -> Result<()> {
let mut temp_file = NamedTempFile::new()?;
let binary_data = vec![0xFF, 0xD8, 0xFF, 0xE0]; temp_file.write_all(&binary_data)?;
temp_file.write_all(b"more data")?;
temp_file.flush()?;
let mut reader = DecompressionReader::new(temp_file.path())?;
let mut content = Vec::new();
reader.read_to_end(&mut content)?;
assert!(content.starts_with(&binary_data));
Ok(())
}
#[test]
fn test_decompression_reader_is_send() {
fn assert_send<T: Send>() {}
assert_send::<DecompressionReader>();
}
#[test]
fn test_gzip_with_large_content() -> Result<()> {
use flate2::write::GzEncoder;
use flate2::Compression;
let large_content = "x".repeat(10_000);
let mut temp_file = NamedTempFile::new()?;
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(large_content.as_bytes())?;
let compressed = encoder.finish()?;
temp_file.write_all(&compressed)?;
temp_file.flush()?;
let mut reader = DecompressionReader::new(temp_file.path())?;
let mut content = String::new();
reader.read_to_string(&mut content)?;
assert_eq!(content.len(), 10_000);
assert_eq!(content, large_content);
Ok(())
}
#[test]
fn test_file_with_zstd_magic_but_invalid_data() {
let mut temp_file = NamedTempFile::new().unwrap();
temp_file.write_all(&[0x28, 0xB5, 0x2F, 0xFD]).unwrap();
temp_file.write_all(b"invalid data").unwrap();
temp_file.flush().unwrap();
let result = DecompressionReader::new(temp_file.path());
match result {
Ok(mut reader) => {
let mut content = Vec::new();
let read_result = reader.read_to_end(&mut content);
assert!(read_result.is_err());
}
Err(_) => {
}
}
}
}