use alloc::vec::Vec;
use crate::error::Error;
const MIN_MATCH: usize = 4;
const MAX_DISTANCE: usize = 65_535;
const LAST_LITERALS: usize = 5;
const MFLIMIT: usize = 12;
const HASH_LOG: u32 = 12;
const HASH_TABLE_SIZE: usize = 1 << HASH_LOG;
const HASH_EMPTY: u32 = u32::MAX;
#[inline]
fn hash4(bytes: [u8; 4]) -> usize {
let v = u32::from_le_bytes(bytes);
((v.wrapping_mul(2_654_435_761)) >> (32 - HASH_LOG)) as usize
}
pub fn compress_bound(input_len: usize) -> usize {
input_len + (input_len / 255) + 16
}
pub fn encode_block(input: &[u8], out: &mut Vec<u8>) {
out.clear();
if input.is_empty() {
return;
}
if input.len() < MFLIMIT + 1 {
emit_last_literals(input, out);
return;
}
let mut table = [HASH_EMPTY; HASH_TABLE_SIZE];
let mut ip: usize = 0; let mut anchor: usize = 0;
let match_limit = input.len() - MFLIMIT;
let hash_limit = input.len() - MIN_MATCH - LAST_LITERALS;
let mut next_ip = ip;
while next_ip <= match_limit {
ip = next_ip;
let mut step = 1usize;
let mut search_match_nb = 1u32 << 6;
let mut match_pos;
loop {
if ip > hash_limit {
emit_last_literals(&input[anchor..], out);
return;
}
let h = hash4([input[ip], input[ip + 1], input[ip + 2], input[ip + 3]]);
let candidate = table[h];
table[h] = ip as u32;
if candidate != HASH_EMPTY {
let cand = candidate as usize;
if ip - cand <= MAX_DISTANCE
&& input[cand] == input[ip]
&& input[cand + 1] == input[ip + 1]
&& input[cand + 2] == input[ip + 2]
&& input[cand + 3] == input[ip + 3]
{
match_pos = cand;
break;
}
}
next_ip = ip + step;
step = (search_match_nb >> 6) as usize;
search_match_nb += 1;
ip = next_ip;
}
while ip > anchor && match_pos > 0 && input[ip - 1] == input[match_pos - 1] {
ip -= 1;
match_pos -= 1;
}
let forward_limit = input.len() - LAST_LITERALS;
let mut match_len = MIN_MATCH;
while ip + match_len < forward_limit
&& input[match_pos + match_len] == input[ip + match_len]
{
match_len += 1;
}
let literal_len = ip - anchor;
let offset = (ip - match_pos) as u16;
let match_excess = match_len - MIN_MATCH;
emit_sequence(&input[anchor..ip], literal_len, offset, match_excess, out);
ip += match_len;
anchor = ip;
if ip >= 2 {
let seed = ip - 2;
if seed + MIN_MATCH <= input.len() {
let h = hash4([
input[seed],
input[seed + 1],
input[seed + 2],
input[seed + 3],
]);
table[h] = seed as u32;
}
}
next_ip = ip;
}
emit_last_literals(&input[anchor..], out);
}
fn emit_sequence(
literals: &[u8],
literal_len: usize,
offset: u16,
match_excess: usize,
out: &mut Vec<u8>,
) {
let lit_high = if literal_len >= 15 {
15u8
} else {
literal_len as u8
};
let match_low = if match_excess >= 15 {
15u8
} else {
match_excess as u8
};
let token = (lit_high << 4) | match_low;
out.push(token);
if literal_len >= 15 {
let mut rem = literal_len - 15;
while rem >= 255 {
out.push(255);
rem -= 255;
}
out.push(rem as u8);
}
out.extend_from_slice(literals);
out.push((offset & 0xFF) as u8);
out.push((offset >> 8) as u8);
if match_excess >= 15 {
let mut rem = match_excess - 15;
while rem >= 255 {
out.push(255);
rem -= 255;
}
out.push(rem as u8);
}
}
fn emit_last_literals(literals: &[u8], out: &mut Vec<u8>) {
let literal_len = literals.len();
let lit_high = if literal_len >= 15 {
15u8
} else {
literal_len as u8
};
out.push(lit_high << 4);
if literal_len >= 15 {
let mut rem = literal_len - 15;
while rem >= 255 {
out.push(255);
rem -= 255;
}
out.push(rem as u8);
}
out.extend_from_slice(literals);
}
pub fn decode_block(input: &[u8], out: &mut Vec<u8>, raw_max: usize) -> Result<(), Error> {
out.clear();
if input.is_empty() {
return Ok(());
}
let mut ip = 0usize;
let n = input.len();
loop {
if ip >= n {
return Err(Error::UnexpectedEnd);
}
let token = input[ip];
ip += 1;
let mut lit_len = (token >> 4) as usize;
if lit_len == 15 {
loop {
if ip >= n {
return Err(Error::UnexpectedEnd);
}
let b = input[ip];
ip += 1;
lit_len = lit_len.checked_add(b as usize).ok_or(Error::Corrupt)?;
if b != 255 {
break;
}
}
}
if lit_len > 0 {
if ip + lit_len > n {
return Err(Error::UnexpectedEnd);
}
if out.len() + lit_len > raw_max {
return Err(Error::Corrupt);
}
out.extend_from_slice(&input[ip..ip + lit_len]);
ip += lit_len;
}
if ip == n {
return Ok(());
}
if ip + 2 > n {
return Err(Error::UnexpectedEnd);
}
let offset = (input[ip] as usize) | ((input[ip + 1] as usize) << 8);
ip += 2;
if offset == 0 {
return Err(Error::InvalidDistance);
}
if offset > out.len() {
return Err(Error::InvalidDistance);
}
let mut match_excess = (token & 0x0F) as usize;
if match_excess == 15 {
loop {
if ip >= n {
return Err(Error::UnexpectedEnd);
}
let b = input[ip];
ip += 1;
match_excess = match_excess.checked_add(b as usize).ok_or(Error::Corrupt)?;
if b != 255 {
break;
}
}
}
let match_len = MIN_MATCH + match_excess;
if out.len() + match_len > raw_max {
return Err(Error::Corrupt);
}
let start = out.len() - offset;
if offset >= match_len {
out.extend_from_within(start..start + match_len);
} else if offset == 1 {
let b = out[start];
out.resize(out.len() + match_len, b);
} else {
for i in 0..match_len {
let b = out[start + i];
out.push(b);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn round_trip(data: &[u8]) {
let mut encoded = Vec::new();
encode_block(data, &mut encoded);
let mut decoded = Vec::new();
decode_block(&encoded, &mut decoded, usize::MAX).expect("decode");
assert_eq!(decoded, data);
}
#[test]
fn empty() {
round_trip(&[]);
}
#[test]
fn short() {
round_trip(b"hello");
}
#[test]
fn run() {
let v = alloc::vec![b'a'; 1024];
round_trip(&v);
}
#[test]
fn repeated_text() {
let mut v = Vec::new();
for _ in 0..200 {
v.extend_from_slice(b"the quick brown fox jumps over the lazy dog. ");
}
round_trip(&v);
}
}