use thiserror::Error;
#[derive(Debug, Error)]
pub enum CompressionError {
#[error("Compression failed: {0}")]
CompressFailed(String),
#[error("Decompression failed: {0}")]
DecompressFailed(String),
#[error("Unknown codec: {0}")]
UnknownCodec(String),
}
pub trait Compressor: Send + Sync {
fn name(&self) -> &'static str;
fn compress(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError>;
fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError>;
}
pub struct IdentityCodec;
impl Compressor for IdentityCodec {
fn name(&self) -> &'static str {
"identity"
}
fn compress(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
Ok(data.to_vec())
}
fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
Ok(data.to_vec())
}
}
pub struct RleCodec;
impl Compressor for RleCodec {
fn name(&self) -> &'static str {
"rle"
}
fn compress(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
if data.is_empty() {
return Ok(Vec::new());
}
let mut out = Vec::with_capacity(data.len().min(data.len() * 2 / 3 + 16));
let mut i = 0usize;
while i < data.len() {
let byte = data[i];
let mut count: u8 = 1;
while i + (count as usize) < data.len()
&& data[i + (count as usize)] == byte
&& count < u8::MAX
{
count += 1;
}
out.push(count);
out.push(byte);
i += count as usize;
}
Ok(out)
}
fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
if data.is_empty() {
return Ok(Vec::new());
}
if data.len() % 2 != 0 {
return Err(CompressionError::DecompressFailed(
"RLE data length must be even".into(),
));
}
let capacity: usize = data.chunks_exact(2).map(|c| c[0] as usize).sum();
let mut out = Vec::with_capacity(capacity);
for chunk in data.chunks_exact(2) {
let count = chunk[0] as usize;
let byte = chunk[1];
out.extend(std::iter::repeat(byte).take(count));
}
Ok(out)
}
}
pub struct Lz4Codec;
impl Compressor for Lz4Codec {
fn name(&self) -> &'static str {
"lz4"
}
fn compress(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
oxiarc_lz4::compress(data)
.map_err(|e| CompressionError::CompressFailed(format!("LZ4: {e}")))
}
fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
oxiarc_lz4::decompress(data, 256 * 1024 * 1024)
.map_err(|e| CompressionError::DecompressFailed(format!("LZ4: {e}")))
}
}
pub struct ZstdCodec {
level: i32,
}
impl ZstdCodec {
pub fn new(level: i32) -> Self {
ZstdCodec { level }
}
pub fn default_level() -> Self {
ZstdCodec { level: 3 }
}
}
impl Compressor for ZstdCodec {
fn name(&self) -> &'static str {
"zstd"
}
fn compress(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
oxiarc_zstd::encode_all(data, self.level)
.map_err(|e| CompressionError::CompressFailed(format!("Zstd: {e}")))
}
fn decompress(&self, data: &[u8]) -> Result<Vec<u8>, CompressionError> {
oxiarc_zstd::decode_all(data)
.map_err(|e| CompressionError::DecompressFailed(format!("Zstd: {e}")))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn identity_empty() {
let c = IdentityCodec;
let d = c.compress(&[]).unwrap();
assert_eq!(c.decompress(&d).unwrap(), Vec::<u8>::new());
}
#[test]
fn identity_round_trip() {
let c = IdentityCodec;
let data: Vec<u8> = (0u8..=255).collect();
assert_eq!(c.decompress(&c.compress(&data).unwrap()).unwrap(), data);
}
#[test]
fn rle_empty() {
let c = RleCodec;
assert_eq!(c.compress(&[]).unwrap(), Vec::<u8>::new());
assert_eq!(c.decompress(&[]).unwrap(), Vec::<u8>::new());
}
#[test]
fn rle_single_run() {
let c = RleCodec;
let data = vec![0u8; 100];
let enc = c.compress(&data).unwrap();
assert_eq!(enc, vec![100u8, 0u8]);
assert_eq!(c.decompress(&enc).unwrap(), data);
}
#[test]
fn rle_round_trip_all_unique() {
let c = RleCodec;
let data: Vec<u8> = (0u8..=127).collect();
assert_eq!(c.decompress(&c.compress(&data).unwrap()).unwrap(), data);
}
#[test]
fn rle_compresses_repetitive() {
let c = RleCodec;
let data = vec![42u8; 200];
let enc = c.compress(&data).unwrap();
assert_eq!(enc.len(), 2);
assert!(enc.len() < data.len());
assert_eq!(c.decompress(&enc).unwrap(), data);
}
#[test]
fn rle_decompression_odd_input_error() {
let c = RleCodec;
let result = c.decompress(&[1u8]);
assert!(result.is_err());
}
#[test]
fn lz4_empty() {
let c = Lz4Codec;
let enc = c.compress(&[]).unwrap();
assert_eq!(c.decompress(&enc).unwrap(), Vec::<u8>::new());
}
#[test]
fn lz4_round_trip_repetitive() {
let c = Lz4Codec;
let data = b"hello world ".repeat(500);
let enc = c.compress(&data).unwrap();
assert!(enc.len() < data.len());
assert_eq!(c.decompress(&enc).unwrap(), data);
}
#[test]
fn lz4_round_trip_diverse() {
let c = Lz4Codec;
let data: Vec<u8> = (0u8..=255).cycle().take(1024).collect();
assert_eq!(c.decompress(&c.compress(&data).unwrap()).unwrap(), data);
}
#[test]
fn zstd_empty() {
let c = ZstdCodec::default_level();
let enc = c.compress(&[]).unwrap();
assert_eq!(c.decompress(&enc).unwrap(), Vec::<u8>::new());
}
#[test]
fn zstd_round_trip_repetitive() {
let c = ZstdCodec::new(3);
let data = b"oxirs cluster ".repeat(500);
let enc = c.compress(&data).unwrap();
assert!(enc.len() < data.len());
assert_eq!(c.decompress(&enc).unwrap(), data);
}
#[test]
fn zstd_round_trip_diverse() {
let c = ZstdCodec::default_level();
let data: Vec<u8> = (0u8..=255).cycle().take(2048).collect();
assert_eq!(c.decompress(&c.compress(&data).unwrap()).unwrap(), data);
}
}