use crate::error::{Result, YxdbError};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompressionAlgorithm {
Lzf,
}
impl CompressionAlgorithm {
pub fn version_id(self) -> i32 {
match self {
CompressionAlgorithm::Lzf => 1,
}
}
pub fn from_version_id(v: i32) -> Result<Option<Self>> {
match v {
0 => Ok(None),
1 => Ok(Some(CompressionAlgorithm::Lzf)),
_ => Err(YxdbError::InvalidFile(format!(
"unsupported compression version: {v} (expected 0=uncompressed or 1=LZF)"
))),
}
}
}
extern "C" {
fn lzf_decompress(
in_data: *const u8,
in_len: std::ffi::c_uint,
out_data: *mut u8,
out_len: std::ffi::c_uint,
) -> std::ffi::c_uint;
}
pub fn decompress_into(input: &[u8], out: &mut [u8]) -> Result<usize> {
if input.is_empty() {
return Ok(0);
}
let result = unsafe {
lzf_decompress(
input.as_ptr(),
input.len() as std::ffi::c_uint,
out.as_mut_ptr(),
out.len() as std::ffi::c_uint,
)
};
if result == 0 {
Err(YxdbError::LzfError("C lzf_decompress failed".into()))
} else if (result as usize) > out.len() {
Err(YxdbError::LzfError(format!(
"C lzf_decompress returned {} but output buffer is only {} bytes",
result,
out.len()
)))
} else {
Ok(result as usize)
}
}
const HASH_SIZE: usize = 1 << 16; const MAX_LIT: usize = 32; const MAX_REF: usize = 264; const MAX_OFF: usize = 8192;
pub fn compress(input: &[u8]) -> Option<Vec<u8>> {
let in_len = input.len();
if in_len <= 4 {
return None; }
let mut out = Vec::with_capacity(in_len + (in_len / 16) + 16);
let mut hash_table = [0u32; HASH_SIZE];
let mut ip: usize; let mut lit: usize = 0; let mut lit_start: usize;
out.push(0);
lit_start = 0;
let mut hval: u32 = ((input[0] as u32) << 8) | (input[1] as u32);
ip = 0;
while ip < in_len - 2 {
hval = (hval << 8) | (input[ip + 2] as u32);
let h = hash_idx(hval);
let ref_pos = hash_table[h] as usize;
hash_table[h] = ip as u32;
let off = ip.wrapping_sub(ref_pos).wrapping_sub(1);
if off < MAX_OFF
&& ref_pos > 0
&& ref_pos < ip
&& input[ref_pos + 2] == input[ip + 2]
&& input[ref_pos] == input[ip]
&& input[ref_pos + 1] == input[ip + 1]
{
let mut match_len = 3;
let max_match = MAX_REF.min(in_len - ip);
while match_len < max_match && input[ref_pos + match_len] == input[ip + match_len] {
match_len += 1;
}
if lit > 0 {
out[lit_start] = (lit - 1) as u8;
lit = 0;
} else {
out.pop();
}
let len_code = match_len - 2;
if len_code < 7 {
out.push(((len_code as u8) << 5) | ((off >> 8) as u8));
out.push((off & 0xFF) as u8);
} else {
out.push((7 << 5) | ((off >> 8) as u8));
out.push((len_code - 7) as u8);
out.push((off & 0xFF) as u8);
}
ip += 1; ip += match_len - 1;
if ip >= in_len - 2 {
lit_start = out.len();
out.push(0);
break;
}
ip -= 2;
hval = ((input[ip] as u32) << 8) | (input[ip + 1] as u32);
hval = (hval << 8) | (input[ip + 2] as u32);
hash_table[hash_idx(hval)] = ip as u32;
ip += 1;
hval = (hval << 8) | (input[ip + 2] as u32);
hash_table[hash_idx(hval)] = ip as u32;
ip += 1;
lit_start = out.len();
out.push(0);
} else {
out.push(input[ip]);
lit += 1;
ip += 1;
if lit == MAX_LIT {
out[lit_start] = (lit - 1) as u8;
lit = 0;
lit_start = out.len();
out.push(0);
}
}
}
while ip < in_len {
out.push(input[ip]);
lit += 1;
ip += 1;
if lit == MAX_LIT {
out[lit_start] = (lit - 1) as u8;
lit = 0;
lit_start = out.len();
out.push(0);
}
}
if lit > 0 {
out[lit_start] = (lit - 1) as u8;
} else {
out.pop();
}
if out.len() < in_len {
Some(out)
} else {
None
}
}
#[inline]
fn hash_idx(hval: u32) -> usize {
((hval >> 8).wrapping_sub(hval.wrapping_mul(5))) as usize & (HASH_SIZE - 1)
}
pub fn decompress(input: &[u8], out: &mut [u8]) -> Result<usize> {
decompress_into(input, out)
}
pub fn decompress_block_into(
algo: CompressionAlgorithm,
input: &[u8],
out: &mut [u8],
) -> Result<usize> {
match algo {
CompressionAlgorithm::Lzf => decompress_into(input, out),
}
}
pub fn decompress_block(algo: CompressionAlgorithm, input: &[u8], out: &mut [u8]) -> Result<usize> {
match algo {
CompressionAlgorithm::Lzf => decompress(input, out),
}
}
pub fn compress_block(algo: CompressionAlgorithm, input: &[u8]) -> Option<Vec<u8>> {
match algo {
CompressionAlgorithm::Lzf => compress(input),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn decompress_literal_only() {
let input = [4u8, b'H', b'e', b'l', b'l', b'o'];
let mut out = vec![0u8; 256];
let n = decompress(&input, &mut out).unwrap();
assert_eq!(n, 5);
assert_eq!(&out[..5], b"Hello");
}
#[test]
fn roundtrip_identity() {
let data = b"ABCDEFGHIJ";
let mut input = vec![9u8];
input.extend_from_slice(data);
let mut out = vec![0u8; 256];
let n = decompress(&input, &mut out).unwrap();
assert_eq!(&out[..n], data);
}
#[test]
fn compress_decompress_roundtrip() {
let data = b"Hello Hello Hello Hello Hello Hello Hello Hello World!";
let compressed = compress(data).expect("should compress");
assert!(compressed.len() < data.len());
let mut decompressed = vec![0u8; 256];
let n = decompress(&compressed, &mut decompressed).unwrap();
assert_eq!(&decompressed[..n], &data[..]);
}
#[test]
fn compress_decompress_large_block() {
let mut data = Vec::with_capacity(4096);
for i in 0..4096 {
data.push((i % 256) as u8);
}
let compressed = compress(&data).expect("should compress");
assert!(compressed.len() < data.len());
let mut decompressed = vec![0u8; 8192];
let n = decompress(&compressed, &mut decompressed).unwrap();
assert_eq!(&decompressed[..n], &data[..]);
}
#[test]
fn compress_returns_none_for_tiny_input() {
assert!(compress(b"ab").is_none());
}
#[test]
fn compress_returns_none_for_random_data() {
let data: Vec<u8> = (0..100).map(|i| (i * 37 + 13) as u8).collect();
if let Some(compressed) = compress(&data) {
let mut decompressed = vec![0u8; 256];
let n = decompress(&compressed, &mut decompressed).unwrap();
assert_eq!(&decompressed[..n], &data[..]);
}
}
#[test]
fn compression_algorithm_version_ids() {
assert_eq!(CompressionAlgorithm::Lzf.version_id(), 1);
assert_eq!(CompressionAlgorithm::from_version_id(0).unwrap(), None);
assert_eq!(
CompressionAlgorithm::from_version_id(1).unwrap(),
Some(CompressionAlgorithm::Lzf)
);
assert!(CompressionAlgorithm::from_version_id(2).is_err());
assert!(CompressionAlgorithm::from_version_id(3).is_err());
}
#[test]
fn decompress_block_dispatch() {
let data = b"Hello Hello Hello Hello Hello Hello Hello Hello World!";
let lzf_compressed = compress_block(CompressionAlgorithm::Lzf, data).unwrap();
let mut out = vec![0u8; 256];
let n =
decompress_block_into(CompressionAlgorithm::Lzf, &lzf_compressed, &mut out).unwrap();
assert_eq!(&out[..n], &data[..]);
}
#[test]
fn compress_all_zeros() {
let data = vec![0u8; 8192];
let compressed = compress(&data).expect("all-zeros should compress well");
assert!(compressed.len() < data.len() / 10);
let mut decompressed = vec![0u8; data.len()];
let n = decompress(&compressed, &mut decompressed).unwrap();
assert_eq!(&decompressed[..n], &data[..]);
}
#[test]
fn compress_all_same_byte() {
let data = vec![0xAA; 4096];
let compressed = compress(&data).expect("repeated byte should compress");
let mut decompressed = vec![0u8; data.len()];
let n = decompress(&compressed, &mut decompressed).unwrap();
assert_eq!(&decompressed[..n], &data[..]);
}
#[test]
fn compress_exactly_max_lit_boundary() {
let data: Vec<u8> = (0..64).map(|i| (i * 7 + 3) as u8).collect();
if let Some(compressed) = compress(&data) {
let mut decompressed = vec![0u8; 256];
let n = decompress(&compressed, &mut decompressed).unwrap();
assert_eq!(&decompressed[..n], &data[..]);
}
}
#[test]
fn compress_exactly_five_bytes() {
let data = b"AAAAA";
if let Some(compressed) = compress(data) {
let mut decompressed = vec![0u8; 256];
let n = decompress(&compressed, &mut decompressed).unwrap();
assert_eq!(&decompressed[..n], &data[..]);
}
}
#[test]
fn compress_block_size_data() {
let data: Vec<u8> = (0..262144).map(|i| ((i * 13 + 7) % 256) as u8).collect();
if let Some(compressed) = compress(&data) {
let mut decompressed = vec![0u8; data.len()];
let n = decompress(&compressed, &mut decompressed).unwrap();
assert_eq!(n, data.len());
assert_eq!(&decompressed[..n], &data[..]);
}
}
#[test]
fn compress_long_back_reference() {
let mut data = vec![b'A'; 300];
data.extend_from_slice(b"XYZ_UNIQUE_SUFFIX");
let compressed = compress(&data).expect("should compress with long matches");
let mut decompressed = vec![0u8; data.len()];
let n = decompress(&compressed, &mut decompressed).unwrap();
assert_eq!(&decompressed[..n], &data[..]);
}
#[test]
fn compress_alternating_pattern() {
let data: Vec<u8> = (0..1000)
.map(|i| if i % 2 == 0 { 0xAA } else { 0x55 })
.collect();
if let Some(compressed) = compress(&data) {
let mut decompressed = vec![0u8; data.len()];
let n = decompress(&compressed, &mut decompressed).unwrap();
assert_eq!(&decompressed[..n], &data[..]);
}
}
#[test]
fn decompress_empty_input() {
let mut out = vec![0u8; 64];
let n = decompress(&[], &mut out).unwrap();
assert_eq!(n, 0);
}
#[test]
fn roundtrip_many_sizes() {
for size in [5, 10, 31, 32, 33, 63, 64, 100, 255, 256, 1000, 4096, 10000] {
let data: Vec<u8> = (0..size).map(|i| ((i * 3 + 1) % 256) as u8).collect();
if let Some(compressed) = compress(&data) {
let mut decompressed = vec![0u8; size + 64];
let n = decompress(&compressed, &mut decompressed).unwrap();
assert_eq!(&decompressed[..n], &data[..], "failed for size {size}");
}
}
}
}