use std::fmt;
pub const DEFAULT_ZSTD_LEVEL: i32 = 1;
pub const DEFAULT_MIN_BYTES: usize = 1024;
pub const DEFAULT_MAX_RATIO: f64 = 0.95;
#[derive(Clone, Copy, Debug)]
pub struct CompressOpts {
pub level: i32,
pub min_bytes: usize,
pub max_ratio: f64,
}
impl Default for CompressOpts {
fn default() -> Self {
Self {
level: DEFAULT_ZSTD_LEVEL,
min_bytes: DEFAULT_MIN_BYTES,
max_ratio: DEFAULT_MAX_RATIO,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Compressed {
Raw(Vec<u8>),
Zstd { bytes: Vec<u8>, original_len: u32 },
}
impl Compressed {
pub fn stored_len(&self) -> usize {
match self {
Self::Raw(b) => b.len(),
Self::Zstd { bytes, .. } => bytes.len(),
}
}
pub fn original_len(&self) -> usize {
match self {
Self::Raw(b) => b.len(),
Self::Zstd { original_len, .. } => *original_len as usize,
}
}
pub fn is_compressed(&self) -> bool {
matches!(self, Self::Zstd { .. })
}
}
#[derive(Debug)]
pub enum CompressError {
ZstdEncode(String),
ZstdDecode(String),
UnknownFormat,
OversizeOriginal(usize),
}
impl fmt::Display for CompressError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::ZstdEncode(msg) => write!(f, "zstd encode failed: {msg}"),
Self::ZstdDecode(msg) => write!(f, "zstd decode failed: {msg}"),
Self::UnknownFormat => write!(f, "unknown compressed format"),
Self::OversizeOriginal(n) => {
write!(
f,
"payload of {n} bytes exceeds u32::MAX original-length cap"
)
}
}
}
}
impl std::error::Error for CompressError {}
pub struct L2BlobCompressor;
impl L2BlobCompressor {
pub fn compress(
bytes: &[u8],
content_type: Option<&str>,
opts: &CompressOpts,
) -> Result<Compressed, CompressError> {
if bytes.len() > u32::MAX as usize {
return Err(CompressError::OversizeOriginal(bytes.len()));
}
if bytes.len() < opts.min_bytes {
return Ok(Compressed::Raw(bytes.to_vec()));
}
if let Some(ct) = content_type {
if is_precompressed_media(ct) {
return Ok(Compressed::Raw(bytes.to_vec()));
}
}
let encoded = zstd::stream::encode_all(bytes, opts.level)
.map_err(|e| CompressError::ZstdEncode(e.to_string()))?;
let cutoff = (bytes.len() as f64) * opts.max_ratio;
if (encoded.len() as f64) >= cutoff {
return Ok(Compressed::Raw(bytes.to_vec()));
}
Ok(Compressed::Zstd {
bytes: encoded,
original_len: bytes.len() as u32,
})
}
pub fn decompress(c: &Compressed) -> Result<Vec<u8>, CompressError> {
match c {
Compressed::Raw(b) => Ok(b.clone()),
Compressed::Zstd {
bytes,
original_len,
} => {
let mut out: Vec<u8> = Vec::with_capacity(*original_len as usize);
let written = {
let mut decoder = zstd::stream::Decoder::new(bytes.as_slice())
.map_err(|e| CompressError::ZstdDecode(e.to_string()))?;
std::io::copy(&mut decoder, &mut out)
.map_err(|e| CompressError::ZstdDecode(e.to_string()))?
};
if written as usize != *original_len as usize {
return Err(CompressError::ZstdDecode(format!(
"decoded {written} bytes, expected {original_len}"
)));
}
Ok(out)
}
}
}
}
fn is_precompressed_media(content_type: &str) -> bool {
let head = content_type.split(';').next().unwrap_or("").trim();
let lower = head.to_ascii_lowercase();
if let Some(rest) = lower.strip_prefix("image/") {
return rest != "svg+xml";
}
if lower.starts_with("video/") {
return true;
}
if let Some(rest) = lower.strip_prefix("audio/") {
return !matches!(rest, "wav" | "x-wav");
}
matches!(
lower.as_str(),
"application/zip"
| "application/gzip"
| "application/x-brotli"
| "application/x-zstd"
| "application/octet-stream"
)
}
#[cfg(test)]
mod tests {
use super::*;
fn pseudo_random(seed: u64, len: usize) -> Vec<u8> {
let mut state = seed.wrapping_mul(0x9E37_79B9_7F4A_7C15).wrapping_add(1);
let mut out = Vec::with_capacity(len);
for _ in 0..len {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
out.push((state >> 33) as u8);
}
out
}
fn lorem_4kb() -> Vec<u8> {
let unit = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. \
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. \
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris \
nisi ut aliquip ex ea commodo consequat. ";
let mut out = Vec::with_capacity(4096 + unit.len());
while out.len() < 4096 {
out.extend_from_slice(unit);
}
out.truncate(4096);
out
}
#[test]
fn round_trip_property_across_sizes() {
let opts = CompressOpts::default();
let sizes = [
0usize, 1, 16, 64, 255, 511, 1023, 1024, 1025, 2048, 4096, 8192, 12345, 16384,
];
for (i, &len) in sizes.iter().enumerate() {
let input = pseudo_random(0xDEAD_BEEF ^ (i as u64), len);
let compressed = L2BlobCompressor::compress(&input, None, &opts)
.expect("compress should not fail on in-memory input");
let decoded = L2BlobCompressor::decompress(&compressed)
.expect("decompress should not fail on freshly-encoded input");
assert_eq!(decoded, input, "round-trip mismatch at len={len}");
assert_eq!(compressed.original_len(), input.len());
}
}
#[test]
fn text_payload_shrinks_at_least_thirty_percent() {
let input = lorem_4kb();
let opts = CompressOpts::default();
let compressed = L2BlobCompressor::compress(&input, Some("text/plain"), &opts)
.expect("compress text payload");
match compressed {
Compressed::Zstd {
bytes,
original_len,
} => {
assert_eq!(original_len as usize, input.len());
let ratio = bytes.len() as f64 / input.len() as f64;
assert!(
ratio <= 0.70,
"expected >=30% reduction, got ratio {ratio} ({}/{})",
bytes.len(),
input.len()
);
}
other => panic!("expected Zstd variant for repetitive text, got {other:?}"),
}
}
#[test]
fn tiny_payload_returns_raw() {
let input = vec![0xABu8; 64]; let opts = CompressOpts::default();
let out = L2BlobCompressor::compress(&input, None, &opts).unwrap();
match out {
Compressed::Raw(bytes) => assert_eq!(bytes, input),
other => panic!("expected Raw for tiny payload, got {other:?}"),
}
}
#[test]
fn image_png_content_type_returns_raw_even_when_large() {
let input = vec![0u8; 8 * 1024];
let opts = CompressOpts::default();
let out = L2BlobCompressor::compress(&input, Some("image/png"), &opts).unwrap();
assert!(matches!(out, Compressed::Raw(_)), "PNG must be Raw");
}
#[test]
fn image_svg_is_compressed_as_exception() {
let mut input = Vec::new();
let chunk =
b"<svg xmlns='http://www.w3.org/2000/svg'><rect width='10' height='10'/></svg>\n";
while input.len() < 4096 {
input.extend_from_slice(chunk);
}
let opts = CompressOpts::default();
let out = L2BlobCompressor::compress(&input, Some("image/svg+xml"), &opts).unwrap();
assert!(out.is_compressed(), "image/svg+xml should be compressed");
}
#[test]
fn high_entropy_payload_returns_raw_via_max_ratio_gate() {
let input = pseudo_random(0xCAFE_F00D, 8 * 1024);
let opts = CompressOpts::default();
let out = L2BlobCompressor::compress(&input, None, &opts).unwrap();
match out {
Compressed::Raw(bytes) => assert_eq!(bytes, input),
Compressed::Zstd { bytes, .. } => {
panic!(
"high-entropy input was kept as Zstd ({} bytes vs {} original) — \
max_ratio gate failed",
bytes.len(),
input.len()
);
}
}
}
#[test]
fn malformed_zstd_bytes_yield_decode_error() {
let bogus = Compressed::Zstd {
bytes: vec![0x00, 0x01, 0x02, 0x03, 0xFF, 0xFE, 0xFD, 0xFC],
original_len: 4096,
};
let err = L2BlobCompressor::decompress(&bogus).expect_err("must fail to decode");
assert!(
matches!(err, CompressError::ZstdDecode(_)),
"expected ZstdDecode, got {err:?}"
);
}
#[test]
fn decoded_length_mismatch_yields_decode_error() {
let input = lorem_4kb();
let truthful =
L2BlobCompressor::compress(&input, Some("text/plain"), &CompressOpts::default())
.unwrap();
let lying = match truthful {
Compressed::Zstd { bytes, .. } => Compressed::Zstd {
bytes,
original_len: (input.len() as u32) + 1,
},
other => panic!("expected Zstd, got {other:?}"),
};
let err = L2BlobCompressor::decompress(&lying).expect_err("must fail length check");
assert!(matches!(err, CompressError::ZstdDecode(_)));
}
#[test]
fn oversize_input_returns_oversize_error() {
let backing = [0u8; 16];
let fake_len = (u32::MAX as usize) + 1;
let oversized: &[u8] = unsafe { std::slice::from_raw_parts(backing.as_ptr(), fake_len) };
let err = L2BlobCompressor::compress(oversized, None, &CompressOpts::default())
.expect_err("must reject oversize input");
match err {
CompressError::OversizeOriginal(n) => assert_eq!(n, fake_len),
other => panic!("expected OversizeOriginal, got {other:?}"),
}
}
#[test]
fn precompressed_media_classifier_handles_known_buckets() {
assert!(is_precompressed_media("image/png"));
assert!(is_precompressed_media("image/jpeg"));
assert!(!is_precompressed_media("image/svg+xml"));
assert!(is_precompressed_media("video/mp4"));
assert!(is_precompressed_media("video/webm"));
assert!(is_precompressed_media("audio/mpeg"));
assert!(!is_precompressed_media("audio/wav"));
assert!(!is_precompressed_media("audio/x-wav"));
assert!(is_precompressed_media("application/zip"));
assert!(is_precompressed_media("application/gzip"));
assert!(is_precompressed_media("application/x-brotli"));
assert!(is_precompressed_media("application/x-zstd"));
assert!(is_precompressed_media("application/octet-stream"));
assert!(!is_precompressed_media("text/plain"));
assert!(!is_precompressed_media("application/json"));
assert!(is_precompressed_media("Image/PNG; foo=bar"));
assert!(!is_precompressed_media("Text/Plain; charset=utf-8"));
}
}