use thiserror::Error;
const MAGIC: [u8; 3] = [0xCA, 0xCE, 0x00];
const HEADER_LEN: usize = 3 + 4;
const TAG_LITERAL: u8 = 0x00;
const TAG_BACKREF: u8 = 0x01;
#[derive(Debug, Error)]
pub enum CompressorError {
#[error("invalid compressed data: {0}")]
InvalidData(String),
#[error("length mismatch: expected {expected}, got {actual}")]
LengthMismatch {
expected: usize,
actual: usize,
},
#[error("input too large: {0} bytes")]
InputTooLarge(usize),
}
struct LevelParams {
window: usize,
max_len: usize,
}
fn params_for_level(level: u8) -> LevelParams {
match level {
0 => LevelParams { window: 64, max_len: 16 },
1 => LevelParams { window: 128, max_len: 32 },
2 => LevelParams { window: 256, max_len: 64 },
_ => LevelParams { window: 512, max_len: 128 },
}
}
#[derive(Debug, Clone)]
pub struct TierCompressor {
level: u8,
}
impl TierCompressor {
#[must_use]
pub fn new(level: u8) -> Self {
Self { level: level.min(3) }
}
pub fn compress(&self, input: &[u8]) -> Result<Vec<u8>, CompressorError> {
if input.len() > u32::MAX as usize {
return Err(CompressorError::InputTooLarge(input.len()));
}
let params = params_for_level(self.level);
let orig_len = input.len() as u32;
let mut out = Vec::with_capacity(HEADER_LEN + input.len() * 2);
out.extend_from_slice(&MAGIC);
out.extend_from_slice(&orig_len.to_le_bytes());
let mut pos = 0usize;
while pos < input.len() {
let window_start = pos.saturating_sub(params.window);
let search_window = &input[window_start..pos];
let max_match = params.max_len.min(input.len() - pos);
let best = find_longest_match(search_window, &input[pos..], max_match);
match best {
Some((offset_in_window, length)) if length >= 3 => {
let distance = (pos - window_start) - offset_in_window;
debug_assert!(distance > 0, "back-ref distance must be positive");
let dist_u16 = distance as u16;
let len_u8 = length as u8;
out.push(TAG_BACKREF);
out.push((dist_u16 & 0xFF) as u8);
out.push((dist_u16 >> 8) as u8);
out.push(len_u8);
pos += length;
}
_ => {
out.push(TAG_LITERAL);
out.push(input[pos]);
pos += 1;
}
}
}
Ok(out)
}
pub fn decompress(&self, input: &[u8]) -> Result<Vec<u8>, CompressorError> {
if input.len() < HEADER_LEN {
return Err(CompressorError::InvalidData(format!(
"too short: {} bytes (need at least {})",
input.len(),
HEADER_LEN
)));
}
if input[..3] != MAGIC {
return Err(CompressorError::InvalidData(
"magic bytes do not match".to_string(),
));
}
let orig_len = u32::from_le_bytes([input[3], input[4], input[5], input[6]]) as usize;
let mut out = Vec::with_capacity(orig_len);
let payload = &input[HEADER_LEN..];
let mut pos = 0usize;
while pos < payload.len() {
let tag = payload[pos];
pos += 1;
match tag {
TAG_LITERAL => {
if pos >= payload.len() {
return Err(CompressorError::InvalidData(
"truncated literal token".to_string(),
));
}
out.push(payload[pos]);
pos += 1;
}
TAG_BACKREF => {
if pos + 2 >= payload.len() {
return Err(CompressorError::InvalidData(
"truncated back-reference token".to_string(),
));
}
let dist_lo = payload[pos] as u16;
let dist_hi = payload[pos + 1] as u16;
let length = payload[pos + 2] as usize;
pos += 3;
let distance = (dist_hi << 8 | dist_lo) as usize;
if distance == 0 || distance > out.len() {
return Err(CompressorError::InvalidData(format!(
"invalid back-ref distance {distance} at output position {}",
out.len()
)));
}
let start = out.len() - distance;
for i in 0..length {
let byte = out[start + (i % distance)];
out.push(byte);
}
}
unknown => {
return Err(CompressorError::InvalidData(format!(
"unknown tag byte 0x{unknown:02X} at offset {pos}"
)));
}
}
}
if out.len() != orig_len {
return Err(CompressorError::LengthMismatch {
expected: orig_len,
actual: out.len(),
});
}
Ok(out)
}
#[must_use]
pub fn level(&self) -> u8 {
self.level
}
pub fn compression_ratio(&self, input: &[u8]) -> Result<f64, CompressorError> {
if input.is_empty() {
return Ok(1.0);
}
let compressed = self.compress(input)?;
Ok(compressed.len() as f64 / input.len() as f64)
}
}
fn find_longest_match(window: &[u8], lookahead: &[u8], max_len: usize) -> Option<(usize, usize)> {
if window.is_empty() || lookahead.is_empty() || max_len == 0 {
return None;
}
let mut best_offset = 0usize;
let mut best_len = 0usize;
for start in 0..window.len() {
let mut len = 0usize;
while len < max_len
&& len < lookahead.len()
&& len < window.len() - start + lookahead.len()
{
let window_idx = start + (len % (window.len() - start));
if window[window_idx] != lookahead[len] {
break;
}
len += 1;
}
if len > best_len {
best_len = len;
best_offset = start;
}
}
if best_len >= 3 {
Some((best_offset, best_len))
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_round_trip_ascii() {
let c = TierCompressor::new(1);
let orig = b"hello world hello world".to_vec();
let compressed = c.compress(&orig).expect("compress");
let restored = c.decompress(&compressed).expect("decompress");
assert_eq!(restored, orig);
}
#[test]
fn test_round_trip_empty() {
let c = TierCompressor::new(0);
let orig: Vec<u8> = Vec::new();
let compressed = c.compress(&orig).expect("compress empty");
let restored = c.decompress(&compressed).expect("decompress empty");
assert_eq!(restored, orig);
}
#[test]
fn test_round_trip_single_byte() {
let c = TierCompressor::new(0);
let orig = vec![0xABu8];
let compressed = c.compress(&orig).expect("compress");
let restored = c.decompress(&compressed).expect("decompress");
assert_eq!(restored, orig);
}
#[test]
fn test_round_trip_zeros() {
let c = TierCompressor::new(2);
let orig = vec![0u8; 512];
let compressed = c.compress(&orig).expect("compress");
let restored = c.decompress(&compressed).expect("decompress");
assert_eq!(restored, orig);
}
#[test]
fn test_round_trip_binary() {
let c = TierCompressor::new(1);
let orig: Vec<u8> = (0u8..=255).cycle().take(300).collect();
let compressed = c.compress(&orig).expect("compress");
let restored = c.decompress(&compressed).expect("decompress");
assert_eq!(restored, orig);
}
#[test]
fn test_zeros_compress_smaller() {
let c = TierCompressor::new(2);
let orig = vec![0u8; 256];
let compressed = c.compress(&orig).expect("compress");
assert!(
compressed.len() < orig.len(),
"compressed {} should be < original {}",
compressed.len(),
orig.len()
);
}
#[test]
fn test_decompress_invalid_magic() {
let c = TierCompressor::new(0);
let bad = vec![0xDE, 0xAD, 0xBE, 0xEF, 0, 0, 0, 0];
assert!(c.decompress(&bad).is_err());
}
#[test]
fn test_decompress_truncated() {
let c = TierCompressor::new(0);
assert!(c.decompress(&[0xCA, 0xCE, 0x00]).is_err());
}
#[test]
fn test_level_getter() {
assert_eq!(TierCompressor::new(2).level(), 2);
assert_eq!(TierCompressor::new(99).level(), 3); }
#[test]
fn test_ratio_empty() {
let c = TierCompressor::new(1);
let ratio = c.compression_ratio(&[]).expect("ratio");
assert!((ratio - 1.0).abs() < 1e-9);
}
#[test]
fn test_ratio_repeated_lt_1() {
let c = TierCompressor::new(3);
let orig = b"abcabcabcabcabcabcabcabcabc".to_vec();
let ratio = c.compression_ratio(&orig).expect("ratio");
assert!(ratio < 2.0, "ratio {ratio} should be reasonable");
}
#[test]
fn test_round_trip_all_levels() {
let orig = b"the quick brown fox jumps over the lazy dog".repeat(5);
for level in 0..=3 {
let c = TierCompressor::new(level);
let compressed = c.compress(&orig).expect("compress");
let restored = c.decompress(&compressed).expect("decompress");
assert_eq!(restored, orig, "level {level} failed round-trip");
}
}
#[test]
fn test_header_length_field() {
let c = TierCompressor::new(0);
let orig = b"abcde".to_vec();
let compressed = c.compress(&orig).expect("compress");
let stored_len = u32::from_le_bytes([
compressed[3], compressed[4], compressed[5], compressed[6],
]) as usize;
assert_eq!(stored_len, orig.len());
}
#[test]
fn test_overlapping_backref() {
let c = TierCompressor::new(2);
let orig: Vec<u8> = vec![b'a'; 200];
let compressed = c.compress(&orig).expect("compress");
let restored = c.decompress(&compressed).expect("decompress");
assert_eq!(restored, orig);
}
#[test]
fn test_large_input_round_trip() {
let c = TierCompressor::new(1);
let orig: Vec<u8> = b"media frame data segment"
.iter()
.copied()
.cycle()
.take(10_240)
.collect();
let compressed = c.compress(&orig).expect("compress");
let restored = c.decompress(&compressed).expect("decompress");
assert_eq!(restored, orig);
}
}