use std::io::{Read, Write};
use bytes::Bytes;
use flate2::Compression;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use crate::{ChunkManifest, Codec, CodecError, CodecKind};
#[derive(Debug, Clone)]
pub struct CpuGzip {
level: u32,
}
impl CpuGzip {
pub const DEFAULT_LEVEL: u32 = 6;
pub fn new(level: u32) -> Self {
Self {
level: level.min(9),
}
}
}
impl Default for CpuGzip {
fn default() -> Self {
Self::new(Self::DEFAULT_LEVEL)
}
}
pub fn decompress_blocking(input: &[u8], manifest: &ChunkManifest) -> Result<Vec<u8>, CodecError> {
if manifest.codec != CodecKind::CpuGzip {
return Err(CodecError::CodecMismatch {
expected: CodecKind::CpuGzip,
got: manifest.codec,
});
}
if input.len() as u64 != manifest.compressed_size {
return Err(CodecError::SizeMismatch {
expected: manifest.compressed_size,
got: input.len() as u64,
});
}
let limit = manifest.original_size.saturating_add(1024);
let mut buf = Vec::with_capacity(manifest.original_size as usize);
let mut decoder = GzDecoder::new(input);
(&mut decoder)
.take(limit)
.read_to_end(&mut buf)
.map_err(CodecError::Io)?;
if (buf.len() as u64) > manifest.original_size {
return Err(CodecError::Io(std::io::Error::other(format!(
"gzip decompression bomb detected: produced {} bytes, manifest claimed {}",
buf.len(),
manifest.original_size
))));
}
if buf.len() as u64 != manifest.original_size {
return Err(CodecError::SizeMismatch {
expected: manifest.original_size,
got: buf.len() as u64,
});
}
let actual_crc = crc32c::crc32c(&buf);
if actual_crc != manifest.crc32c {
return Err(CodecError::CrcMismatch {
expected: manifest.crc32c,
got: actual_crc,
});
}
Ok(buf)
}
pub fn compress_blocking(input: &[u8], level: u32) -> Result<(Vec<u8>, ChunkManifest), CodecError> {
let level = level.min(9);
let original_size = input.len() as u64;
let original_crc = crc32c::crc32c(input);
let mut encoder = GzEncoder::new(Vec::new(), Compression::new(level));
encoder.write_all(input).map_err(CodecError::Io)?;
let compressed = encoder.finish().map_err(CodecError::Io)?;
Ok((
compressed.clone(),
ChunkManifest {
codec: CodecKind::CpuGzip,
original_size,
compressed_size: compressed.len() as u64,
crc32c: original_crc,
},
))
}
#[async_trait::async_trait]
impl Codec for CpuGzip {
fn kind(&self) -> CodecKind {
CodecKind::CpuGzip
}
async fn compress(&self, input: Bytes) -> Result<(Bytes, ChunkManifest), CodecError> {
let level = self.level;
let original_size = input.len() as u64;
let original_crc = crc32c::crc32c(&input);
let compressed = tokio::task::spawn_blocking(move || -> std::io::Result<Vec<u8>> {
let mut encoder = GzEncoder::new(Vec::new(), Compression::new(level));
encoder.write_all(input.as_ref())?;
encoder.finish()
})
.await??;
let manifest = ChunkManifest {
codec: CodecKind::CpuGzip,
original_size,
compressed_size: compressed.len() as u64,
crc32c: original_crc,
};
Ok((Bytes::from(compressed), manifest))
}
async fn decompress(
&self,
input: Bytes,
manifest: &ChunkManifest,
) -> Result<Bytes, CodecError> {
if manifest.codec != CodecKind::CpuGzip {
return Err(CodecError::CodecMismatch {
expected: CodecKind::CpuGzip,
got: manifest.codec,
});
}
if input.len() as u64 != manifest.compressed_size {
return Err(CodecError::SizeMismatch {
expected: manifest.compressed_size,
got: input.len() as u64,
});
}
let expected_crc = manifest.crc32c;
let expected_orig_size = manifest.original_size;
let decompressed = tokio::task::spawn_blocking(move || -> std::io::Result<Vec<u8>> {
let limit = expected_orig_size.saturating_add(1024);
let mut buf = Vec::with_capacity(expected_orig_size as usize);
let mut decoder = GzDecoder::new(input.as_ref());
(&mut decoder).take(limit).read_to_end(&mut buf)?;
if (buf.len() as u64) > expected_orig_size {
return Err(std::io::Error::other(format!(
"gzip decompression bomb detected: produced {} bytes, manifest claimed {}",
buf.len(),
expected_orig_size
)));
}
Ok(buf)
})
.await??;
if decompressed.len() as u64 != expected_orig_size {
return Err(CodecError::SizeMismatch {
expected: expected_orig_size,
got: decompressed.len() as u64,
});
}
let actual_crc = crc32c::crc32c(&decompressed);
if actual_crc != expected_crc {
return Err(CodecError::CrcMismatch {
expected: expected_crc,
got: actual_crc,
});
}
Ok(Bytes::from(decompressed))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Read;
#[tokio::test]
async fn roundtrip_small() {
let codec = CpuGzip::default();
let input = Bytes::from_static(b"the quick brown fox jumps over the lazy dog ".as_slice());
let (compressed, manifest) = codec.compress(input.clone()).await.unwrap();
assert_eq!(manifest.codec, CodecKind::CpuGzip);
assert_eq!(manifest.original_size, input.len() as u64);
let decompressed = codec.decompress(compressed, &manifest).await.unwrap();
assert_eq!(decompressed, input);
}
#[tokio::test]
async fn roundtrip_compressible() {
let codec = CpuGzip::default();
let input = Bytes::from(vec![b'x'; 1024 * 1024]);
let (compressed, manifest) = codec.compress(input.clone()).await.unwrap();
assert!(
compressed.len() < 2048,
"expected gzip to compress 1 MiB of x bytes well, got {} bytes",
compressed.len()
);
let decompressed = codec.decompress(compressed, &manifest).await.unwrap();
assert_eq!(decompressed, input);
}
#[tokio::test]
async fn output_is_decodable_by_stock_gunzip() {
let codec = CpuGzip::default();
let input = Bytes::from(b"hello squished world\n".repeat(100));
let (compressed, _manifest) = codec.compress(input.clone()).await.unwrap();
assert_eq!(
&compressed[..2],
&[0x1f, 0x8b],
"must start with gzip magic"
);
let mut buf = Vec::new();
flate2::read::GzDecoder::new(compressed.as_ref())
.read_to_end(&mut buf)
.unwrap();
assert_eq!(buf, input.as_ref());
}
#[tokio::test]
async fn rejects_codec_mismatch() {
let codec = CpuGzip::default();
let manifest = ChunkManifest {
codec: CodecKind::CpuZstd,
original_size: 10,
compressed_size: 10,
crc32c: 0,
};
let err = codec
.decompress(Bytes::from_static(b"0123456789"), &manifest)
.await
.unwrap_err();
assert!(matches!(err, CodecError::CodecMismatch { .. }));
}
#[test]
fn blocking_roundtrip_and_gzip_magic() {
let input = b"hello squished world\n".repeat(100);
let (compressed, manifest) = compress_blocking(&input, CpuGzip::DEFAULT_LEVEL).unwrap();
assert_eq!(&compressed[..2], &[0x1f, 0x8b]);
let decompressed = decompress_blocking(&compressed, &manifest).unwrap();
assert_eq!(decompressed, input);
}
}