use std::cmp::Reverse;
use std::collections::BinaryHeap;
use crate::error::IoError;
use oxiarc_brotli;
use oxiarc_lz4;
use oxiarc_zstd;
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompressionAlgo {
None,
Rle,
DeltaEncoding,
Lz77Lite,
DictionaryCoding,
HuffmanCoding,
OxiLz4,
OxiZstd,
OxiBrotli,
}
#[derive(Debug, Clone)]
pub struct DataProfile {
pub entropy: f64,
pub repetition_rate: f64,
pub sorted_fraction: f64,
pub n_bytes: usize,
}
#[derive(Debug, Clone)]
pub struct CompressionResult {
pub algorithm: CompressionAlgo,
pub compressed: Vec<u8>,
pub original_size: usize,
pub compressed_size: usize,
pub ratio: f64,
}
pub fn profile_data(data: &[u8]) -> DataProfile {
let n = data.len();
if n == 0 {
return DataProfile {
entropy: 0.0,
repetition_rate: 0.0,
sorted_fraction: 0.0,
n_bytes: 0,
};
}
let mut freq = [0u64; 256];
for &b in data {
freq[b as usize] += 1;
}
let n_f = n as f64;
let entropy = freq
.iter()
.filter(|&&c| c > 0)
.map(|&c| {
let p = c as f64 / n_f;
-p * p.log2()
})
.sum::<f64>();
if n < 2 {
return DataProfile {
entropy,
repetition_rate: 0.0,
sorted_fraction: 0.0,
n_bytes: n,
};
}
let pairs = (n - 1) as f64;
let mut repeated = 0u64;
let mut non_decreasing = 0u64;
for i in 1..n {
if data[i] == data[i - 1] {
repeated += 1;
}
if data[i] >= data[i - 1] {
non_decreasing += 1;
}
}
DataProfile {
entropy,
repetition_rate: repeated as f64 / pairs,
sorted_fraction: non_decreasing as f64 / pairs,
n_bytes: n,
}
}
pub fn recommend_algorithm(profile: &DataProfile) -> CompressionAlgo {
if profile.n_bytes < 64 {
return CompressionAlgo::None;
}
if profile.entropy < 1.0 {
return CompressionAlgo::HuffmanCoding;
}
if profile.repetition_rate > 0.7 {
return CompressionAlgo::Rle;
}
if profile.sorted_fraction > 0.8 {
return CompressionAlgo::DeltaEncoding;
}
CompressionAlgo::Lz77Lite
}
pub fn compress_rle(data: &[u8]) -> Vec<u8> {
if data.is_empty() {
return Vec::new();
}
let mut out = Vec::with_capacity(data.len());
let mut i = 0;
while i < data.len() {
let val = data[i];
let mut run: usize = 1;
while run < 255 {
let next = i + run;
if next >= data.len() || data[next] != val {
break;
}
run += 1;
}
out.push(run as u8);
out.push(val);
i += run;
}
out
}
pub fn decompress_rle(data: &[u8]) -> Result<Vec<u8>, IoError> {
if !data.len().is_multiple_of(2) {
return Err(IoError::DecompressionError(
"RLE data must have an even number of bytes".to_string(),
));
}
let mut out = Vec::new();
let mut i = 0;
while i + 1 < data.len() {
let count = data[i] as usize;
let val = data[i + 1];
for _ in 0..count {
out.push(val);
}
i += 2;
}
Ok(out)
}
pub fn compress_delta(data: &[u8]) -> Vec<u8> {
if data.is_empty() {
return Vec::new();
}
let mut out = Vec::with_capacity(data.len());
out.push(data[0]);
for i in 1..data.len() {
let delta = (data[i] as i16 - data[i - 1] as i16).clamp(-127, 127) as i8;
out.push(delta as u8);
}
out
}
pub fn decompress_delta(data: &[u8]) -> Result<Vec<u8>, IoError> {
if data.is_empty() {
return Ok(Vec::new());
}
let mut out = Vec::with_capacity(data.len());
out.push(data[0]);
for i in 1..data.len() {
let delta = data[i] as i8 as i16;
let prev = *out
.last()
.ok_or_else(|| IoError::DecompressionError("delta decode: empty output".to_string()))?
as i16;
let next = ((prev + delta).rem_euclid(256)) as u8;
out.push(next);
}
Ok(out)
}
const LZ77_TRIPLET_SIZE: usize = 4; const LZ77_HEADER_SIZE: usize = 4;
pub fn compress_lz77(data: &[u8], window_size: usize) -> Vec<u8> {
if data.is_empty() {
return (0u32).to_le_bytes().to_vec();
}
let win = window_size.max(1);
let mut out = (data.len() as u32).to_le_bytes().to_vec();
let mut pos = 0;
while pos < data.len() {
let window_start = pos.saturating_sub(win);
let window = &data[window_start..pos];
let mut best_offset = 0u16;
let mut best_len = 0u8;
if !window.is_empty() {
let look_ahead_end = data.len().min(pos + 255);
let look_ahead = &data[pos..look_ahead_end];
for start in 0..window.len() {
let mut mlen = 0usize;
while mlen < look_ahead.len()
&& mlen < 255
&& start + mlen < window.len()
&& window[start + mlen] == look_ahead[mlen]
{
mlen += 1;
}
if mlen > best_len as usize {
best_len = mlen as u8;
best_offset = (window.len() - start) as u16;
}
}
}
let next_pos = pos + best_len as usize;
let next_char = if next_pos < data.len() {
data[next_pos]
} else {
0
};
out.extend_from_slice(&best_offset.to_le_bytes());
out.push(best_len);
out.push(next_char);
pos += best_len as usize + 1;
}
out
}
pub fn decompress_lz77(data: &[u8]) -> Result<Vec<u8>, IoError> {
if data.len() < LZ77_HEADER_SIZE {
if data.is_empty() {
return Ok(Vec::new());
}
return Err(IoError::DecompressionError(
"LZ77 data too short (missing length header)".to_string(),
));
}
let orig_len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
let body = &data[LZ77_HEADER_SIZE..];
if !body.len().is_multiple_of(LZ77_TRIPLET_SIZE) {
return Err(IoError::DecompressionError(format!(
"LZ77 body length {} is not a multiple of {LZ77_TRIPLET_SIZE}",
body.len()
)));
}
let mut out: Vec<u8> = Vec::with_capacity(orig_len);
let mut i = 0;
while i + LZ77_TRIPLET_SIZE <= body.len() && out.len() < orig_len {
let offset = u16::from_le_bytes([body[i], body[i + 1]]) as usize;
let length = body[i + 2] as usize;
let next_char = body[i + 3];
i += LZ77_TRIPLET_SIZE;
if offset == 0 && length == 0 {
if out.len() < orig_len {
out.push(next_char);
}
} else {
if out.len() < offset {
return Err(IoError::DecompressionError(format!(
"LZ77 back-reference offset {offset} exceeds output length {}",
out.len()
)));
}
let start = out.len() - offset;
for k in 0..length {
if out.len() >= orig_len {
break;
}
let byte = out[start + k];
out.push(byte);
}
if out.len() < orig_len {
out.push(next_char);
}
}
}
Ok(out)
}
#[derive(Debug, Eq, PartialEq)]
struct HuffNode {
freq: u64,
symbol: Option<u8>,
left: Option<Box<HuffNode>>,
right: Option<Box<HuffNode>>,
}
impl Ord for HuffNode {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
other
.freq
.cmp(&self.freq)
.then(other.symbol.cmp(&self.symbol))
}
}
impl PartialOrd for HuffNode {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
fn assign_lengths(node: &HuffNode, depth: u8, lengths: &mut [u8; 256]) {
if let Some(sym) = node.symbol {
lengths[sym as usize] = depth;
} else {
if let Some(left) = &node.left {
assign_lengths(left, depth + 1, lengths);
}
if let Some(right) = &node.right {
assign_lengths(right, depth + 1, lengths);
}
}
}
fn canonical_codes(lengths: &[u8; 256]) -> [(u32, u8); 256] {
let mut bl_count = [0u32; 33];
for &l in lengths.iter() {
if l > 0 {
bl_count[l as usize] += 1;
}
}
let mut next_code = [0u32; 33];
let mut code = 0u32;
for bits in 1..=32usize {
code = (code + bl_count[bits - 1]) << 1;
next_code[bits] = code;
}
let mut codes = [(0u32, 0u8); 256];
for sym in 0..256usize {
let l = lengths[sym];
if l > 0 {
codes[sym] = (next_code[l as usize], l);
next_code[l as usize] += 1;
}
}
codes
}
pub fn compress_huffman(data: &[u8]) -> Result<Vec<u8>, IoError> {
if data.is_empty() {
let mut out = vec![0u8; 256];
out.push(0u8);
return Ok(out);
}
let mut freq = [0u64; 256];
for &b in data {
freq[b as usize] += 1;
}
let unique: Vec<usize> = (0..256).filter(|&i| freq[i] > 0).collect();
if unique.len() == 1 {
let mut lengths = [0u8; 256];
lengths[unique[0]] = 1;
let codes = canonical_codes(&lengths);
let mut out = lengths.to_vec();
let mut bit_buf = 0u64;
let mut bit_len = 0u8;
let mut encoded = Vec::new();
for &b in data {
let (c, cl) = codes[b as usize];
bit_buf = (bit_buf << cl) | c as u64;
bit_len += cl;
while bit_len >= 8 {
bit_len -= 8;
encoded.push((bit_buf >> bit_len) as u8);
}
}
let valid_bits = if bit_len == 0 { 0u8 } else { bit_len };
if bit_len > 0 {
encoded.push((bit_buf << (8 - bit_len)) as u8);
}
out.extend_from_slice(&encoded);
out.push(valid_bits);
return Ok(out);
}
let mut heap: BinaryHeap<HuffNode> = BinaryHeap::new();
for sym in 0..256usize {
if freq[sym] > 0 {
heap.push(HuffNode {
freq: freq[sym],
symbol: Some(sym as u8),
left: None,
right: None,
});
}
}
while heap.len() > 1 {
let left = heap.pop().ok_or_else(|| {
IoError::CompressionError("empty heap during Huffman build".to_string())
})?;
let right = heap.pop().ok_or_else(|| {
IoError::CompressionError("single-node heap during Huffman build".to_string())
})?;
heap.push(HuffNode {
freq: left.freq + right.freq,
symbol: None,
left: Some(Box::new(left)),
right: Some(Box::new(right)),
});
}
let root = heap
.pop()
.ok_or_else(|| IoError::CompressionError("empty heap after Huffman build".to_string()))?;
let mut lengths = [0u8; 256];
assign_lengths(&root, 0, &mut lengths);
if let Some(sym) = root.symbol {
lengths[sym as usize] = 1;
}
let codes = canonical_codes(&lengths);
let mut out = lengths.to_vec(); let mut bit_buf = 0u64;
let mut bit_len = 0u8;
let mut encoded = Vec::new();
for &b in data {
let (c, cl) = codes[b as usize];
if cl == 0 {
return Err(IoError::CompressionError(format!(
"symbol {b} has zero code length"
)));
}
bit_buf = (bit_buf << cl) | c as u64;
bit_len += cl;
while bit_len >= 8 {
bit_len -= 8;
encoded.push(((bit_buf >> bit_len) & 0xFF) as u8);
}
}
let valid_bits = if bit_len == 0 { 0u8 } else { bit_len };
if bit_len > 0 {
encoded.push(((bit_buf << (8 - bit_len)) & 0xFF) as u8);
}
out.extend_from_slice(&encoded);
out.push(valid_bits);
Ok(out)
}
pub fn decompress_huffman(data: &[u8]) -> Result<Vec<u8>, IoError> {
if data.len() < 257 {
return Ok(Vec::new());
}
let mut lengths = [0u8; 256];
lengths.copy_from_slice(&data[..256]);
let valid_bits = *data
.last()
.ok_or_else(|| IoError::DecompressionError("Huffman data too short".to_string()))?;
let encoded = &data[256..data.len() - 1];
let codes = canonical_codes(&lengths);
let max_bits = lengths.iter().copied().max().unwrap_or(0) as usize;
if max_bits == 0 {
return Ok(Vec::new());
}
let mut code_table: Vec<(u8, u32, u8)> = (0..256usize)
.filter(|&s| lengths[s] > 0)
.map(|s| (lengths[s], codes[s].0, s as u8))
.collect();
code_table.sort();
let mut out = Vec::new();
let mut bit_buf = 0u64;
let mut bits_in_buf = 0u8;
let total_encoded_bits = if encoded.is_empty() {
0usize
} else {
(encoded.len() - 1) * 8
+ if valid_bits == 0 {
8
} else {
valid_bits as usize
}
};
let mut bits_consumed = 0usize;
for &byte in encoded {
bit_buf = (bit_buf << 8) | byte as u64;
bits_in_buf += 8;
'decode: loop {
if bits_in_buf == 0 {
break;
}
let remaining_bits = total_encoded_bits.saturating_sub(bits_consumed);
if remaining_bits == 0 {
break;
}
for &(cl, c, sym) in &code_table {
if cl > bits_in_buf {
break 'decode;
}
let top = (bit_buf >> (bits_in_buf - cl)) & ((1u64 << cl) - 1);
if top == c as u64 {
out.push(sym);
bits_in_buf -= cl;
bits_consumed += cl as usize;
if bits_consumed >= total_encoded_bits {
break 'decode;
}
continue 'decode;
}
}
break;
}
}
Ok(out)
}
pub fn compress_adaptive(data: &[u8]) -> Result<CompressionResult, IoError> {
let profile = profile_data(data);
let algo = recommend_algorithm(&profile);
let original_size = data.len();
let compressed = match algo {
CompressionAlgo::None => data.to_vec(),
CompressionAlgo::Rle => compress_rle(data),
CompressionAlgo::DeltaEncoding => compress_delta(data),
CompressionAlgo::Lz77Lite => compress_lz77(data, 4096),
CompressionAlgo::DictionaryCoding => {
compress_lz77(data, 4096)
}
CompressionAlgo::HuffmanCoding => compress_huffman(data)?,
CompressionAlgo::OxiLz4 => compress_oxi_lz4(data)?,
CompressionAlgo::OxiZstd => compress_oxi_zstd(data)?,
CompressionAlgo::OxiBrotli => compress_oxi_brotli(data)?,
};
let compressed_size = compressed.len();
let ratio = if original_size == 0 {
1.0
} else {
compressed_size as f64 / original_size as f64
};
Ok(CompressionResult {
algorithm: algo,
compressed,
original_size,
compressed_size,
ratio,
})
}
pub fn decompress_adaptive(result: &CompressionResult) -> Result<Vec<u8>, IoError> {
match result.algorithm {
CompressionAlgo::None => Ok(result.compressed.clone()),
CompressionAlgo::Rle => decompress_rle(&result.compressed),
CompressionAlgo::DeltaEncoding => decompress_delta(&result.compressed),
CompressionAlgo::Lz77Lite | CompressionAlgo::DictionaryCoding => {
decompress_lz77(&result.compressed)
}
CompressionAlgo::HuffmanCoding => decompress_huffman(&result.compressed),
CompressionAlgo::OxiLz4 => decompress_oxi_lz4(&result.compressed),
CompressionAlgo::OxiZstd => decompress_oxi_zstd(&result.compressed),
CompressionAlgo::OxiBrotli => decompress_oxi_brotli(&result.compressed),
}
}
pub fn compress_oxi_lz4(data: &[u8]) -> Result<Vec<u8>, IoError> {
oxiarc_lz4::compress(data)
.map_err(|e| IoError::CompressionError(format!("oxiarc-lz4 compress: {e}")))
}
pub fn decompress_oxi_lz4(data: &[u8]) -> Result<Vec<u8>, IoError> {
const MAX_CAP: usize = 4 * 1024 * 1024 * 1024; const MIN_OUT: usize = 4 * 1024 * 1024; let max_out = data.len().saturating_mul(256).max(MIN_OUT).min(MAX_CAP);
oxiarc_lz4::decompress(data, max_out)
.map_err(|e| IoError::DecompressionError(format!("oxiarc-lz4 decompress: {e}")))
}
pub fn compress_oxi_zstd(data: &[u8]) -> Result<Vec<u8>, IoError> {
oxiarc_zstd::compress_with_level(data, 3)
.map_err(|e| IoError::CompressionError(format!("oxiarc-zstd compress: {e}")))
}
pub fn decompress_oxi_zstd(data: &[u8]) -> Result<Vec<u8>, IoError> {
oxiarc_zstd::decompress(data)
.map_err(|e| IoError::DecompressionError(format!("oxiarc-zstd decompress: {e}")))
}
pub fn compress_oxi_brotli(data: &[u8]) -> Result<Vec<u8>, IoError> {
oxiarc_brotli::compress(data, 6)
.map_err(|e| IoError::CompressionError(format!("oxiarc-brotli compress: {e}")))
}
pub fn decompress_oxi_brotli(data: &[u8]) -> Result<Vec<u8>, IoError> {
oxiarc_brotli::decompress(data)
.map_err(|e| IoError::DecompressionError(format!("oxiarc-brotli decompress: {e}")))
}
const TAG_NONE: u8 = 0x00;
const TAG_LZ4: u8 = 0x01;
const TAG_ZSTD: u8 = 0x02;
const TAG_BROTLI: u8 = 0x03;
pub fn estimate_entropy(data: &[u8]) -> f64 {
if data.is_empty() {
return 0.0;
}
let mut counts = [0u64; 256];
for &b in data {
counts[b as usize] += 1;
}
let n = data.len() as f64;
let mut entropy = 0.0_f64;
for &c in counts.iter() {
if c > 0 {
let p = c as f64 / n;
entropy -= p * p.log2();
}
}
entropy
}
pub fn select_oxi_algorithm(data: &[u8]) -> (CompressionAlgo, f64) {
let size = data.len();
if size < 256 {
return (CompressionAlgo::None, 1.0);
}
let entropy = estimate_entropy(data);
if entropy > 7.5 {
(CompressionAlgo::None, 1.0)
} else if entropy > 6.0 {
(CompressionAlgo::OxiLz4, 0.8)
} else if entropy > 4.0 {
(CompressionAlgo::OxiZstd, 0.5)
} else {
(CompressionAlgo::OxiBrotli, 0.2)
}
}
pub fn auto_compress(data: &[u8]) -> Result<Vec<u8>, IoError> {
let (algo, _estimated_ratio) = select_oxi_algorithm(data);
let (tag, compressed) = match algo {
CompressionAlgo::None => (TAG_NONE, data.to_vec()),
CompressionAlgo::OxiLz4 => (TAG_LZ4, compress_oxi_lz4(data)?),
CompressionAlgo::OxiZstd => (TAG_ZSTD, compress_oxi_zstd(data)?),
CompressionAlgo::OxiBrotli => (TAG_BROTLI, compress_oxi_brotli(data)?),
_ => (TAG_NONE, data.to_vec()),
};
let mut out = Vec::with_capacity(1 + compressed.len());
out.push(tag);
out.extend_from_slice(&compressed);
Ok(out)
}
pub fn auto_decompress(data: &[u8]) -> Result<Vec<u8>, IoError> {
if data.is_empty() {
return Ok(Vec::new());
}
let tag = data[0];
let payload = &data[1..];
match tag {
TAG_NONE => Ok(payload.to_vec()),
TAG_LZ4 => decompress_oxi_lz4(payload),
TAG_ZSTD => decompress_oxi_zstd(payload),
TAG_BROTLI => decompress_oxi_brotli(payload),
other => Err(IoError::DecompressionError(format!(
"auto_decompress: unknown algorithm tag 0x{other:02x}"
))),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rle_roundtrip_simple() {
let original = vec![0u8, 0, 0, 1, 1, 2];
let compressed = compress_rle(&original);
let decompressed = decompress_rle(&compressed).expect("decompress rle");
assert_eq!(decompressed, original);
}
#[test]
fn test_rle_roundtrip_single() {
let original = vec![42u8];
let c = compress_rle(&original);
assert_eq!(decompress_rle(&c).unwrap(), original);
}
#[test]
fn test_rle_roundtrip_empty() {
let c = compress_rle(&[]);
assert_eq!(decompress_rle(&c).unwrap(), Vec::<u8>::new());
}
#[test]
fn test_rle_max_run() {
let original: Vec<u8> = vec![7u8; 300]; let c = compress_rle(&original);
let d = decompress_rle(&c).unwrap();
assert_eq!(d, original);
}
#[test]
fn test_delta_roundtrip() {
let original = vec![1u8, 2, 3, 4];
let c = compress_delta(&original);
let d = decompress_delta(&c).unwrap();
assert_eq!(d, original);
}
#[test]
fn test_delta_empty() {
assert_eq!(compress_delta(&[]), Vec::<u8>::new());
assert_eq!(decompress_delta(&[]).unwrap(), Vec::<u8>::new());
}
#[test]
fn test_delta_roundtrip_with_overflow() {
let original = vec![0u8, 200, 100, 50];
let c = compress_delta(&original);
let d = decompress_delta(&c).unwrap();
assert_eq!(d.len(), original.len());
}
#[test]
fn test_lz77_roundtrip_short() {
let original = b"abcabc".to_vec();
let c = compress_lz77(&original, 64);
let d = decompress_lz77(&c).unwrap();
assert_eq!(d, original);
}
#[test]
fn test_lz77_roundtrip_empty() {
let c = compress_lz77(&[], 64);
let d = decompress_lz77(&c).unwrap();
assert_eq!(d, Vec::<u8>::new());
}
#[test]
fn test_lz77_roundtrip_repeated() {
let original: Vec<u8> = b"aaaa".repeat(10);
let c = compress_lz77(&original, 64);
let d = decompress_lz77(&c).unwrap();
assert_eq!(d, original);
}
#[test]
fn test_huffman_roundtrip_ascii() {
let original: Vec<u8> = b"hello world this is a huffman test string".to_vec();
let original: Vec<u8> = original.iter().copied().cycle().take(100).collect();
let c = compress_huffman(&original).unwrap();
let d = decompress_huffman(&c).unwrap();
assert_eq!(d, original, "Huffman roundtrip failed");
}
#[test]
fn test_huffman_empty() {
let c = compress_huffman(&[]).unwrap();
let d = decompress_huffman(&c).unwrap();
assert_eq!(d, Vec::<u8>::new());
}
#[test]
fn test_huffman_single_symbol() {
let original = vec![0xABu8; 50];
let c = compress_huffman(&original).unwrap();
let d = decompress_huffman(&c).unwrap();
assert_eq!(d, original);
}
#[test]
fn test_recommend_rle_high_repetition() {
let mut data = [0u8; 200];
for i in 150..200 {
data[i] = 1;
}
let profile = DataProfile {
entropy: 2.5,
repetition_rate: 0.85,
sorted_fraction: 0.4,
n_bytes: 200,
};
let algo = recommend_algorithm(&profile);
assert_eq!(algo, CompressionAlgo::Rle);
}
#[test]
fn test_recommend_none_small() {
let data = vec![1u8, 2, 3];
let profile = profile_data(&data);
let algo = recommend_algorithm(&profile);
assert_eq!(algo, CompressionAlgo::None);
}
#[test]
fn test_recommend_delta_sorted() {
let data: Vec<u8> = (0u8..128).chain(0u8..128).collect();
let profile = profile_data(&data);
assert!(
profile.sorted_fraction > 0.8,
"sorted_fraction={}",
profile.sorted_fraction
);
let algo = recommend_algorithm(&profile);
assert_eq!(algo, CompressionAlgo::DeltaEncoding);
}
#[test]
fn test_recommend_huffman_low_entropy() {
let data = vec![42u8; 200];
let profile = profile_data(&data);
assert!(profile.entropy < 1.0);
let algo = recommend_algorithm(&profile);
assert_eq!(algo, CompressionAlgo::HuffmanCoding);
}
#[test]
fn test_adaptive_roundtrip_rle() {
let data: Vec<u8> = vec![99u8; 200];
let result = compress_adaptive(&data).unwrap();
let recovered = decompress_adaptive(&result).unwrap();
assert_eq!(recovered, data);
}
#[test]
fn test_adaptive_roundtrip_mixed() {
let data: Vec<u8> = (0u8..200).collect();
let result = compress_adaptive(&data).unwrap();
let recovered = decompress_adaptive(&result).unwrap();
assert_eq!(recovered, data);
}
#[test]
fn test_profile_uniform() {
let data: Vec<u8> = (0u8..=255).cycle().take(256).collect();
let profile = profile_data(&data);
assert!(
(profile.entropy - 8.0).abs() < 0.01,
"uniform entropy should be ~8 bits"
);
}
#[test]
fn test_profile_empty() {
let profile = profile_data(&[]);
assert_eq!(profile.n_bytes, 0);
assert_eq!(profile.entropy, 0.0);
}
#[test]
fn test_entropy_all_zeros() {
let data = vec![0u8; 1024];
let e = estimate_entropy(&data);
assert!(e < 1e-9, "all-zero entropy should be ~0, got {e}");
}
#[test]
fn test_entropy_uniform() {
let data: Vec<u8> = (0u8..=255).cycle().take(2048).collect();
let e = estimate_entropy(&data);
assert!(
(e - 8.0).abs() < 0.01,
"uniform entropy should be ~8.0, got {e}"
);
}
#[test]
fn test_entropy_empty() {
assert_eq!(estimate_entropy(&[]), 0.0);
}
#[test]
fn test_select_none_high_entropy() {
let data: Vec<u8> = (0u8..=255).cycle().take(2048).collect();
let (algo, _ratio) = select_oxi_algorithm(&data);
assert_eq!(
algo,
CompressionAlgo::None,
"high entropy should select None"
);
}
#[test]
fn test_select_none_small_data() {
let data = vec![42u8; 100]; let (algo, _) = select_oxi_algorithm(&data);
assert_eq!(algo, CompressionAlgo::None, "small data should select None");
}
#[test]
fn test_select_brotli_low_entropy() {
let data = vec![b'A'; 512];
let (algo, _) = select_oxi_algorithm(&data);
assert_eq!(
algo,
CompressionAlgo::OxiBrotli,
"very low entropy should select OxiBrotli"
);
}
#[test]
fn test_select_zstd_structured_data() {
let json = r#"{"key":"value","n":1}"#;
let data: Vec<u8> = json.bytes().cycle().take(1024).collect();
let entropy = estimate_entropy(&data);
let (algo, _) = select_oxi_algorithm(&data);
assert!(
matches!(algo, CompressionAlgo::OxiZstd | CompressionAlgo::OxiBrotli),
"structured repeated data (entropy={entropy:.2}) should select Zstd or Brotli, got {algo:?}"
);
}
#[test]
fn test_oxi_lz4_roundtrip() {
let original: Vec<u8> = b"Hello LZ4! ".iter().copied().cycle().take(1024).collect();
let compressed = compress_oxi_lz4(&original).expect("lz4 compress");
let decompressed = decompress_oxi_lz4(&compressed).expect("lz4 decompress");
assert_eq!(decompressed, original, "LZ4 round-trip mismatch");
}
#[test]
fn test_oxi_zstd_roundtrip() {
let original: Vec<u8> = b"Zstd data! ".iter().copied().cycle().take(1024).collect();
let compressed = compress_oxi_zstd(&original).expect("zstd compress");
let decompressed = decompress_oxi_zstd(&compressed).expect("zstd decompress");
assert_eq!(decompressed, original, "Zstd round-trip mismatch");
}
#[test]
fn test_oxi_brotli_roundtrip() {
let original: Vec<u8> = b"Brotli text! "
.iter()
.copied()
.cycle()
.take(1024)
.collect();
let compressed = compress_oxi_brotli(&original).expect("brotli compress");
let decompressed = decompress_oxi_brotli(&compressed).expect("brotli decompress");
assert_eq!(decompressed, original, "Brotli round-trip mismatch");
}
#[test]
fn test_auto_compress_round_trip_repeated() {
let data: Vec<u8> = b"abcabcabc".iter().copied().cycle().take(2048).collect();
let compressed = auto_compress(&data).expect("auto_compress failed");
assert!(!compressed.is_empty());
let decompressed = auto_decompress(&compressed).expect("auto_decompress failed");
assert_eq!(decompressed, data, "auto round-trip mismatch");
}
#[test]
fn test_auto_compress_empty() {
let result = auto_compress(&[]).expect("auto_compress empty");
assert_eq!(result.len(), 1);
assert_eq!(result[0], TAG_NONE);
let decompressed = auto_decompress(&result).expect("auto_decompress empty");
assert!(decompressed.is_empty());
}
#[test]
fn test_auto_decompress_empty_input() {
let result = auto_decompress(&[]).expect("auto_decompress of empty slice");
assert!(result.is_empty());
}
#[test]
fn test_auto_compress_high_entropy_passthrough() {
let data: Vec<u8> = (0u8..=255).cycle().take(2048).collect();
let compressed = auto_compress(&data).expect("auto_compress high entropy");
assert_eq!(
compressed[0], TAG_NONE,
"high-entropy data should use passthrough tag"
);
let decompressed = auto_decompress(&compressed).expect("auto_decompress");
assert_eq!(decompressed, data);
}
#[test]
fn test_adaptive_compression_json() {
let json = r#"{"id":1,"name":"Alice","score":42.0,"active":true}"#;
let data: Vec<u8> = json.bytes().cycle().take(8192).collect();
let original_size = data.len();
let compressed = auto_compress(&data).expect("auto_compress json");
let ratio = compressed.len() as f64 / original_size as f64;
let decompressed = auto_decompress(&compressed).expect("auto_decompress json");
assert_eq!(decompressed, data, "JSON round-trip mismatch");
let _ = ratio;
}
}