use alloc::vec::Vec;
use crate::error::Error;
const MIN_MATCH: usize = 3;
const M3_MAX_DISTANCE: usize = 16384;
const M4_MAX_DISTANCE: usize = 0xBFFF; const M2_MAX_DISTANCE: usize = 0x800;
const HASH_LOG: u32 = 13;
const HASH_TABLE_SIZE: usize = 1 << HASH_LOG;
const HASH_EMPTY: u32 = u32::MAX;
#[inline]
fn hash4(bytes: [u8; 4]) -> usize {
let v = u32::from_le_bytes(bytes);
((v.wrapping_mul(2_654_435_761)) >> (32 - HASH_LOG)) as usize
}
pub fn compress_bound(input_len: usize) -> usize {
input_len + (input_len / 16) + 64 + 3
}
pub fn encode_block(input: &[u8], out: &mut Vec<u8>) {
out.clear();
if input.is_empty() {
emit_eos(out);
return;
}
if input.len() < MIN_MATCH + 4 {
emit_initial_literals(input, out);
emit_eos(out);
return;
}
let mut table = alloc::vec![HASH_EMPTY; HASH_TABLE_SIZE];
let mut ip: usize = 0;
let mut anchor: usize = 0;
let mut last_token_idx: Option<usize> = None;
let in_len = input.len();
let hash_limit = in_len.saturating_sub(4);
while ip < hash_limit {
let h = hash4([input[ip], input[ip + 1], input[ip + 2], input[ip + 3]]);
let candidate = table[h];
table[h] = ip as u32;
let mut found = false;
let mut match_pos = 0usize;
if candidate != HASH_EMPTY {
let cand = candidate as usize;
if cand < ip {
let distance = ip - cand;
if distance <= M4_MAX_DISTANCE
&& input[cand] == input[ip]
&& input[cand + 1] == input[ip + 1]
&& input[cand + 2] == input[ip + 2]
&& input[cand + 3] == input[ip + 3]
{
found = true;
match_pos = cand;
}
}
}
if !found {
ip += 1;
continue;
}
let mut match_len = 4usize;
while ip + match_len < in_len && input[match_pos + match_len] == input[ip + match_len] {
match_len += 1;
}
let literal_len = ip - anchor;
let distance = ip - match_pos;
match last_token_idx {
None => emit_initial_literals(&input[anchor..ip], out),
Some(_) if literal_len == 0 => {
}
Some(tok_idx) if literal_len <= 3 => {
patch_inline_literal_count(out, tok_idx, literal_len);
out.extend_from_slice(&input[anchor..ip]);
}
Some(_) => {
emit_long_literal_run(&input[anchor..ip], out);
}
}
let tok = emit_copy(distance, match_len, out);
last_token_idx = Some(tok);
ip += match_len;
anchor = ip;
if ip < hash_limit {
let probe = ip - 1;
let h2 = hash4([
input[probe],
input[probe + 1],
input[probe + 2],
input[probe + 3],
]);
table[h2] = probe as u32;
}
}
let trailing = in_len - anchor;
match last_token_idx {
None => emit_initial_literals(&input[anchor..in_len], out),
Some(_) if trailing == 0 => {
}
Some(tok_idx) if trailing <= 3 => {
patch_inline_literal_count(out, tok_idx, trailing);
out.extend_from_slice(&input[anchor..in_len]);
}
Some(_) => emit_long_literal_run(&input[anchor..in_len], out),
}
emit_eos(out);
}
fn emit_initial_literals(literals: &[u8], out: &mut Vec<u8>) {
let n = literals.len();
if n == 0 {
return;
}
if n <= 238 {
out.push((n + 17) as u8);
out.extend_from_slice(literals);
return;
}
out.push(0x00);
let mut rem = n - 18; while rem > 255 {
out.push(0);
rem -= 255;
}
out.push(rem as u8);
out.extend_from_slice(literals);
}
fn emit_copy(distance: usize, length: usize, out: &mut Vec<u8>) -> usize {
debug_assert!(length >= MIN_MATCH);
debug_assert!((1..=M4_MAX_DISTANCE).contains(&distance));
if length <= 8 && distance <= M2_MAX_DISTANCE && length >= 3 {
let d = distance - 1;
let d_lo = (d & 0x7) as u8;
let d_hi = ((d >> 3) & 0xFF) as u8;
let tok_idx = out.len();
if length <= 4 {
let l = (length - 3) as u8; let token = 0x40 | (l << 5) | (d_lo << 2);
out.push(token);
out.push(d_hi);
} else {
let l = (length - 5) as u8; let token = 0x80 | (l << 5) | (d_lo << 2);
out.push(token);
out.push(d_hi);
}
return tok_idx;
}
if distance <= M3_MAX_DISTANCE {
let d = distance - 1;
debug_assert!(d <= 0x3FFF);
let tok_idx = out.len();
if length <= 33 {
let token = 0x20 | ((length - 2) as u8);
out.push(token);
} else {
out.push(0x20);
let mut rem = length - 33;
while rem > 255 {
out.push(0);
rem -= 255;
}
out.push(rem as u8);
}
let off_word = (d as u16) << 2; out.push((off_word & 0xFF) as u8);
out.push((off_word >> 8) as u8);
return tok_idx;
}
debug_assert!(distance <= M4_MAX_DISTANCE);
let d = distance - M3_MAX_DISTANCE;
let h = ((d >> 14) & 0x1) as u8;
let d14 = (d & 0x3FFF) as u16;
let tok_idx = out.len();
if length <= 9 {
let token = 0x10 | (h << 3) | ((length - 2) as u8);
out.push(token);
} else {
let token = 0x10 | (h << 3);
out.push(token);
let mut rem = length - 9;
while rem > 255 {
out.push(0);
rem -= 255;
}
out.push(rem as u8);
}
let off_word = d14 << 2; out.push((off_word & 0xFF) as u8);
out.push((off_word >> 8) as u8);
tok_idx
}
fn patch_inline_literal_count(out: &mut [u8], tok_idx: usize, n: usize) {
debug_assert!((1..=3).contains(&n));
let s = n as u8;
let token = out[tok_idx];
if token >= 0x40 {
out[tok_idx] = (token & !0x03) | s;
} else {
let off_lo_idx = out.len() - 2;
out[off_lo_idx] = (out[off_lo_idx] & !0x03) | s;
}
}
fn emit_long_literal_run(literals: &[u8], out: &mut Vec<u8>) {
let n = literals.len();
debug_assert!(n >= 4);
if n <= 18 {
out.push((n - 3) as u8);
} else {
out.push(0x00);
let mut rem = n - 18;
while rem > 255 {
out.push(0);
rem -= 255;
}
out.push(rem as u8);
}
out.extend_from_slice(literals);
}
fn emit_eos(out: &mut Vec<u8>) {
out.extend_from_slice(&[0x11, 0x00, 0x00]);
}
pub fn decode_block(input: &[u8], out: &mut Vec<u8>) -> Result<(), Error> {
out.clear();
let n = input.len();
if n == 0 {
return Err(Error::UnexpectedEnd);
}
let mut ip = 0usize;
let mut state: u8 = 0;
let b0 = input[ip];
if b0 == 17 && n >= 5 {
ip += 2;
state = 0;
} else if b0 == 16 {
return Err(Error::Corrupt);
} else if (18..=255).contains(&b0) {
let lit_len = (b0 as usize) - 17;
ip += 1;
if ip + lit_len > n {
return Err(Error::UnexpectedEnd);
}
out.extend_from_slice(&input[ip..ip + lit_len]);
ip += lit_len;
state = if lit_len <= 3 { lit_len as u8 } else { 4 };
} else {
}
loop {
if ip >= n {
return Err(Error::UnexpectedEnd);
}
let t = input[ip];
ip += 1;
if t < 16 {
if state == 0 {
let mut lit_len = t as usize;
if lit_len == 0 {
lit_len = 15;
loop {
if ip >= n {
return Err(Error::UnexpectedEnd);
}
let b = input[ip];
ip += 1;
if b == 0 {
lit_len = lit_len.checked_add(255).ok_or(Error::Corrupt)?;
} else {
lit_len = lit_len.checked_add(b as usize).ok_or(Error::Corrupt)?;
break;
}
}
}
lit_len += 3;
if ip + lit_len > n {
return Err(Error::UnexpectedEnd);
}
out.extend_from_slice(&input[ip..ip + lit_len]);
ip += lit_len;
state = 4;
continue;
} else if state <= 3 {
let d_lo = ((t >> 2) & 0x3) as usize;
let s = (t & 0x3) as usize;
if ip >= n {
return Err(Error::UnexpectedEnd);
}
let h = input[ip] as usize;
ip += 1;
let distance = (h << 2) + d_lo + 1;
copy_match(out, distance, 2)?;
handle_trailing_literals(input, &mut ip, out, s, &mut state, n)?;
continue;
} else {
let d_lo = ((t >> 2) & 0x3) as usize;
let s = (t & 0x3) as usize;
if ip >= n {
return Err(Error::UnexpectedEnd);
}
let h = input[ip] as usize;
ip += 1;
let distance = (h << 2) + d_lo + 2049;
copy_match(out, distance, 3)?;
handle_trailing_literals(input, &mut ip, out, s, &mut state, n)?;
continue;
}
} else if t < 32 {
let h_bit = ((t >> 3) & 0x1) as usize;
let mut length = (t & 0x7) as usize;
if length == 0 {
length = 7;
loop {
if ip >= n {
return Err(Error::UnexpectedEnd);
}
let b = input[ip];
ip += 1;
if b == 0 {
length = length.checked_add(255).ok_or(Error::Corrupt)?;
} else {
length = length.checked_add(b as usize).ok_or(Error::Corrupt)?;
break;
}
}
}
length += 2;
if ip + 2 > n {
return Err(Error::UnexpectedEnd);
}
let off_word = (input[ip] as usize) | ((input[ip + 1] as usize) << 8);
ip += 2;
let s = off_word & 0x3;
let d = off_word >> 2;
let distance = 16384 + (h_bit << 14) + d;
if distance == 16384 {
return Ok(()); }
copy_match(out, distance, length)?;
handle_trailing_literals(input, &mut ip, out, s, &mut state, n)?;
continue;
} else if t < 64 {
let mut length = (t & 0x1F) as usize;
if length == 0 {
length = 31;
loop {
if ip >= n {
return Err(Error::UnexpectedEnd);
}
let b = input[ip];
ip += 1;
if b == 0 {
length = length.checked_add(255).ok_or(Error::Corrupt)?;
} else {
length = length.checked_add(b as usize).ok_or(Error::Corrupt)?;
break;
}
}
}
length += 2;
if ip + 2 > n {
return Err(Error::UnexpectedEnd);
}
let off_word = (input[ip] as usize) | ((input[ip + 1] as usize) << 8);
ip += 2;
let s = off_word & 0x3;
let d = off_word >> 2;
let distance = d + 1;
copy_match(out, distance, length)?;
handle_trailing_literals(input, &mut ip, out, s, &mut state, n)?;
continue;
} else if t < 128 {
let l = ((t >> 5) & 0x1) as usize;
let d_lo = ((t >> 2) & 0x7) as usize;
let s = (t & 0x3) as usize;
let length = 3 + l;
if ip >= n {
return Err(Error::UnexpectedEnd);
}
let h = input[ip] as usize;
ip += 1;
let distance = (h << 3) + d_lo + 1;
copy_match(out, distance, length)?;
handle_trailing_literals(input, &mut ip, out, s, &mut state, n)?;
continue;
} else {
let l = ((t >> 5) & 0x3) as usize;
let d_lo = ((t >> 2) & 0x7) as usize;
let s = (t & 0x3) as usize;
let length = 5 + l;
if ip >= n {
return Err(Error::UnexpectedEnd);
}
let h = input[ip] as usize;
ip += 1;
let distance = (h << 3) + d_lo + 1;
copy_match(out, distance, length)?;
handle_trailing_literals(input, &mut ip, out, s, &mut state, n)?;
continue;
}
}
}
fn copy_match(out: &mut Vec<u8>, distance: usize, length: usize) -> Result<(), Error> {
if distance == 0 || distance > out.len() {
return Err(Error::InvalidDistance);
}
let start = out.len() - distance;
for i in 0..length {
let b = out[start + i];
out.push(b);
}
Ok(())
}
fn handle_trailing_literals(
input: &[u8],
ip: &mut usize,
out: &mut Vec<u8>,
s: usize,
state: &mut u8,
n: usize,
) -> Result<(), Error> {
if s == 0 {
*state = 0;
return Ok(());
}
if *ip + s > n {
return Err(Error::UnexpectedEnd);
}
out.extend_from_slice(&input[*ip..*ip + s]);
*ip += s;
*state = s as u8;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn round_trip(data: &[u8]) {
let mut encoded = Vec::new();
encode_block(data, &mut encoded);
let mut decoded = Vec::new();
decode_block(&encoded, &mut decoded).expect("decode");
assert_eq!(decoded, data);
}
#[test]
fn empty() {
round_trip(&[]);
}
#[test]
fn short() {
round_trip(b"hello");
}
#[test]
fn short_no_match() {
round_trip(b"abcdefghij");
}
#[test]
fn repeated_pattern() {
round_trip(b"abcabcabcabcabcabcabcabcabc");
}
#[test]
fn ascii_text() {
let mut v = Vec::new();
for _ in 0..100 {
v.extend_from_slice(b"the quick brown fox jumps over the lazy dog. ");
}
round_trip(&v);
}
#[test]
fn run_of_one_byte() {
let v = alloc::vec![b'Z'; 4096];
round_trip(&v);
}
#[test]
fn lorem_progression() {
let lorem = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ";
for &sz in &[100usize, 127, 200, 500, 1000, 2000, 4000, 8000, 16384] {
let mut data = Vec::new();
while data.len() < sz {
data.extend_from_slice(lorem);
}
data.truncate(sz);
round_trip(&data);
}
}
}