use crate::error::{Error, Result};
const HLOG: usize = 14;
const HSIZE: usize = 1 << HLOG;
const MAX_LIT: usize = 32;
const MAX_OFF: usize = 8192;
const MAX_REF: usize = 264;
pub fn decompress(input: &[u8], output_len: usize) -> Result<Vec<u8>> {
let mut output = vec![0u8; output_len];
let mut in_pos = 0;
let mut out_pos = 0;
while in_pos < input.len() {
let ctrl = input[in_pos];
in_pos += 1;
if ctrl < 32 {
let len = ctrl as usize + 1;
if in_pos + len > input.len() {
return Err(Error::ParseError {
line: 0,
desc: "LZF decompression error: literal run exceeds input size".into(),
});
}
if out_pos + len > output_len {
return Err(Error::ParseError {
line: 0,
desc: "LZF decompression error: output buffer overflow".into(),
});
}
output[out_pos..out_pos + len].copy_from_slice(&input[in_pos..in_pos + len]);
in_pos += len;
out_pos += len;
} else {
let mut len = (ctrl >> 5) as usize;
if len == 7 {
if in_pos >= input.len() {
return Err(Error::ParseError {
line: 0,
desc: "LZF decompression error: long match length exceeds input".into(),
});
}
len += input[in_pos] as usize;
in_pos += 1;
}
len += 2;
if in_pos >= input.len() {
return Err(Error::ParseError {
line: 0,
desc: "LZF decompression error: reference offset exceeds input size".into(),
});
}
let high_offset = ((ctrl & 0x1f) as usize) << 8;
let offset = high_offset + input[in_pos] as usize + 1;
in_pos += 1;
if offset > out_pos {
return Err(Error::ParseError {
line: 0,
desc: format!(
"LZF decompression error: invalid back reference (offset {} > position {})",
offset, out_pos
),
});
}
if out_pos + len > output_len {
return Err(Error::ParseError {
line: 0,
desc: "LZF decompression error: output buffer overflow".into(),
});
}
let src_pos = out_pos - offset;
if offset >= len {
output.copy_within(src_pos..src_pos + len, out_pos);
} else {
for i in 0..len {
output[out_pos + i] = output[src_pos + i];
}
}
out_pos += len;
}
}
if out_pos != output_len {
return Err(Error::ParseError {
line: 0,
desc: format!(
"LZF decompression error: expected {} bytes, got {}",
output_len, out_pos
),
});
}
Ok(output)
}
pub fn compress(input: &[u8]) -> Result<Vec<u8>> {
if input.is_empty() {
return Ok(Vec::new());
}
let mut output = Vec::with_capacity(input.len() + input.len() / 16 + 64);
let mut htab = vec![0usize; HSIZE];
let mut in_pos = 0;
let mut lit = 0;
let mut lit_pos = 0;
while in_pos < input.len() {
if in_pos + 4 <= input.len() {
let hval = hash(&input[in_pos..in_pos + 3]);
let ref_pos = htab[hval];
htab[hval] = in_pos;
if ref_pos != 0
&& in_pos > ref_pos
&& in_pos - ref_pos <= MAX_OFF
&& input[ref_pos] == input[in_pos]
&& input[ref_pos + 1] == input[in_pos + 1]
&& input[ref_pos + 2] == input[in_pos + 2]
{
let mut match_len = 3;
let max_len = std::cmp::min(MAX_REF, input.len() - in_pos);
while match_len < max_len
&& ref_pos + match_len < input.len()
&& input[ref_pos + match_len] == input[in_pos + match_len]
{
match_len += 1;
}
if lit > 0 {
output[lit_pos] = (lit - 1) as u8;
lit = 0;
}
let offset = in_pos - ref_pos - 1;
let len = match_len - 2;
if len < 7 {
output.push(((offset >> 8) as u8) | ((len as u8) << 5));
} else {
output.push(((offset >> 8) as u8) | 0xe0);
output.push((len - 7) as u8);
}
output.push((offset & 0xff) as u8);
in_pos += match_len;
continue;
}
}
if lit == 0 {
lit_pos = output.len();
output.push(0); }
output.push(input[in_pos]);
lit += 1;
in_pos += 1;
if lit == MAX_LIT {
output[lit_pos] = (MAX_LIT - 1) as u8;
lit = 0;
}
}
if lit > 0 {
output[lit_pos] = (lit - 1) as u8;
}
Ok(output)
}
fn hash(data: &[u8]) -> usize {
if data.len() < 3 {
return 0;
}
let v = ((data[0] as usize) << 16) | ((data[1] as usize) << 8) | (data[2] as usize);
((v >> (24 - HLOG)) ^ v) & (HSIZE - 1)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compress_decompress() {
let original = b"Hello, world! This is a test of LZF compression. Hello, world!";
let compressed = compress(original).unwrap();
assert!(compressed.len() < original.len());
let decompressed = decompress(&compressed, original.len()).unwrap();
assert_eq!(decompressed, original);
}
#[test]
fn test_empty_data() {
let original = b"";
let compressed = compress(original).unwrap();
assert_eq!(compressed.len(), 0);
}
#[test]
fn test_incompressible_data() {
let original: Vec<u8> = (0..100).map(|i| (i * 7 + 13) as u8).collect();
let compressed = compress(&original).unwrap();
let decompressed = decompress(&compressed, original.len()).unwrap();
assert_eq!(decompressed, original);
}
#[test]
fn test_repetitive_data() {
let original = vec![42u8; 1000];
let compressed = compress(&original).unwrap();
assert!(compressed.len() < original.len() / 10);
let decompressed = decompress(&compressed, original.len()).unwrap();
assert_eq!(decompressed, original);
}
#[test]
fn test_short_patterns() {
let original = b"aaabbbcccaaabbbccc";
let compressed = compress(original).unwrap();
let decompressed = decompress(&compressed, original.len()).unwrap();
assert_eq!(decompressed, original);
}
#[test]
fn test_single_byte() {
let original = b"x";
let compressed = compress(original).unwrap();
let decompressed = decompress(&compressed, original.len()).unwrap();
assert_eq!(decompressed, original);
}
#[test]
fn test_max_literal_run() {
let mut original = Vec::new();
for i in 0..32 {
original.push((i * 7) as u8);
}
let compressed = compress(&original).unwrap();
let decompressed = decompress(&compressed, original.len()).unwrap();
assert_eq!(decompressed, original);
}
#[test]
fn test_long_match() {
let mut original = Vec::new();
let pattern = b"abcdefghijklmnop";
original.extend_from_slice(pattern);
original.extend_from_slice(b"xyz");
original.extend_from_slice(pattern);
let compressed = compress(&original).unwrap();
let decompressed = decompress(&compressed, original.len()).unwrap();
assert_eq!(decompressed, original);
}
#[test]
fn test_overlapping_copy() {
let original = b"abcabcabcabcabc";
let compressed = compress(original).unwrap();
let decompressed = decompress(&compressed, original.len()).unwrap();
assert_eq!(decompressed, original);
}
#[test]
fn test_binary_data() {
let original = vec![
0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00, 0x80, 0x3f, 0x00, 0x00, 0x00, 0x40, ];
let compressed = compress(&original).unwrap();
let decompressed = decompress(&compressed, original.len()).unwrap();
assert_eq!(decompressed, original);
}
#[test]
fn test_decompress_invalid_literal_run() {
let invalid = vec![31, 1, 2]; let result = decompress(&invalid, 32);
assert!(result.is_err());
}
#[test]
fn test_decompress_invalid_back_ref() {
let invalid = vec![0x80, 0x10]; let result = decompress(&invalid, 10);
assert!(result.is_err());
}
#[test]
fn test_large_data() {
let mut original = Vec::with_capacity(1024);
for i in 0..256 {
original.extend_from_slice(&[i as u8; 4]);
}
let compressed = compress(&original).unwrap();
let decompressed = decompress(&compressed, original.len()).unwrap();
assert_eq!(decompressed, original);
}
}