extern crate alloc;
use alloc::collections::VecDeque;
use alloc::vec::Vec;
use crate::error::Error;
use crate::traits::{Flush, RawEncoder, RawProgress};
use super::huffman::{NUM_SYMBOLS, length_limited_huffman, lengths_to_codes, pack_lengths};
const BLOCK_BYTES: usize = 65536;
const MIN_MATCH: usize = 3;
const MAX_MATCH: usize = 65535 + 3; const HASH_BITS: u32 = 15;
const HASH_SIZE: usize = 1 << HASH_BITS;
const NIL: u32 = u32::MAX;
#[derive(Debug, Clone, Copy, Default)]
pub struct EncoderConfig {
pub level: u8,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
enum Phase {
NeedHeader,
Buffering,
Done,
}
pub struct Encoder {
in_buf: Vec<u8>,
out_buf: Vec<u8>,
out_idx: usize,
phase: Phase,
total_input: u64,
header_emitted: bool,
#[allow(dead_code)]
config: EncoderConfig,
head: Vec<u32>,
prev: Vec<u32>,
}
impl Encoder {
pub fn new() -> Self {
Self::with_config(EncoderConfig::default())
}
pub fn with_config(config: EncoderConfig) -> Self {
Self {
in_buf: Vec::new(),
out_buf: Vec::new(),
out_idx: 0,
phase: Phase::NeedHeader,
total_input: 0,
header_emitted: false,
config,
head: alloc::vec![NIL; HASH_SIZE],
prev: Vec::new(),
}
}
fn drain_out_into(&mut self, output: &mut [u8]) -> usize {
let avail = self.out_buf.len() - self.out_idx;
let n = avail.min(output.len());
output[..n].copy_from_slice(&self.out_buf[self.out_idx..self.out_idx + n]);
self.out_idx += n;
if self.out_idx == self.out_buf.len() {
self.out_buf.clear();
self.out_idx = 0;
}
n
}
fn flush_full_blocks(&mut self) -> Result<(), Error> {
while self.in_buf.len() >= BLOCK_BYTES {
let block: Vec<u8> = self.in_buf.drain(..BLOCK_BYTES).collect();
self.encode_block(&block, false)?;
}
Ok(())
}
fn encode_block(&mut self, data: &[u8], is_last: bool) -> Result<(), Error> {
let tokens = lz77_parse(data, &mut self.head, &mut self.prev);
let mut freqs = [0u32; NUM_SYMBOLS];
for tok in &tokens {
match tok {
Token::Literal(b) => freqs[*b as usize] += 1,
Token::Match { length, distance } => {
let dist_hi = high_bit_index(*distance);
let len_minus_3 = length - 3;
let class = if len_minus_3 < 15 { len_minus_3 } else { 15 };
let sym = 256 + class + 16 * dist_hi;
freqs[sym as usize] += 1;
}
}
}
if is_last {
freqs[256] += 1;
}
let lengths = length_limited_huffman(&freqs, 15);
let codes = lengths_to_codes(&lengths);
let packed = pack_lengths(&lengths);
self.out_buf.extend_from_slice(&packed);
let mut bw = BlockWriter::new();
for tok in &tokens {
match tok {
Token::Literal(b) => {
let sym = *b as usize;
bw.write_bits(codes[sym] as u32, lengths[sym] as u32, &mut self.out_buf);
}
Token::Match { length, distance } => {
let dist_hi = high_bit_index(*distance);
let len_minus_3 = length - 3;
let class = if len_minus_3 < 15 { len_minus_3 } else { 15 };
let sym = (256 + class + 16 * dist_hi) as usize;
bw.write_bits(codes[sym] as u32, lengths[sym] as u32, &mut self.out_buf);
if len_minus_3 >= 15 {
let remaining = len_minus_3 - 15;
if remaining < 255 {
bw.write_raw_byte(remaining as u8, &mut self.out_buf);
} else {
bw.write_raw_byte(255, &mut self.out_buf);
let full_len_minus_3 = len_minus_3 as u16;
bw.write_raw_byte((full_len_minus_3 & 0xFF) as u8, &mut self.out_buf);
bw.write_raw_byte(
((full_len_minus_3 >> 8) & 0xFF) as u8,
&mut self.out_buf,
);
}
}
if dist_hi > 0 {
let low_mask = (1u32 << dist_hi) - 1;
let dist_low = (*distance) & low_mask;
bw.write_bits(dist_low, dist_hi, &mut self.out_buf);
}
}
}
}
if is_last {
bw.write_bits(codes[256] as u32, lengths[256] as u32, &mut self.out_buf);
}
bw.pad_with(codes[256] as u32, lengths[256] as u32, &mut self.out_buf);
if is_last {
self.out_buf.push(0);
self.out_buf.push(0);
}
Ok(())
}
}
impl Default for Encoder {
fn default() -> Self {
Self::new()
}
}
impl RawEncoder for Encoder {
fn raw_encode(&mut self, input: &[u8], output: &mut [u8]) -> Result<RawProgress, Error> {
let mut consumed = 0usize;
let mut written = 0usize;
let _ = &mut written;
let _ = output;
match self.phase {
Phase::NeedHeader => {
self.phase = Phase::Buffering;
}
Phase::Done => {
return Ok(RawProgress {
consumed: 0,
written: 0,
done: false,
});
}
Phase::Buffering => {}
}
if !input.is_empty() {
self.in_buf.extend_from_slice(input);
self.total_input += input.len() as u64;
consumed = input.len();
}
self.flush_full_blocks()?;
Ok(RawProgress {
consumed,
written,
done: false,
})
}
fn raw_finish(&mut self, output: &mut [u8]) -> Result<RawProgress, Error> {
let mut written = 0usize;
if !matches!(self.phase, Phase::Done) {
if self.in_buf.len() >= BLOCK_BYTES {
self.flush_full_blocks()?;
}
if !self.in_buf.is_empty() {
let tail: Vec<u8> = core::mem::take(&mut self.in_buf);
self.encode_block(&tail, true)?;
} else if self.total_input == 0 {
} else {
let empty: Vec<u8> = Vec::new();
self.encode_block(&empty, true)?;
}
if !self.header_emitted {
let hdr = (self.total_input as u32).to_le_bytes();
self.out_buf.splice(0..0, hdr.iter().copied());
self.header_emitted = true;
}
self.phase = Phase::Done;
}
written += self.drain_out_into(&mut output[written..]);
let done = self.out_idx == self.out_buf.len();
Ok(RawProgress {
consumed: 0,
written,
done,
})
}
fn raw_reset(&mut self) {
self.in_buf.clear();
self.out_buf.clear();
self.out_idx = 0;
self.phase = Phase::NeedHeader;
self.total_input = 0;
self.header_emitted = false;
for h in self.head.iter_mut() {
*h = NIL;
}
self.prev.clear();
}
fn raw_flush(&mut self, _output: &mut [u8], _mode: Flush) -> Result<RawProgress, Error> {
Ok(RawProgress {
consumed: 0,
written: 0,
done: true,
})
}
}
#[derive(Clone, Copy, Debug)]
enum Token {
Literal(u8),
Match { length: u32, distance: u32 },
}
fn hash3(b: &[u8]) -> u32 {
let v = (b[0] as u32) | ((b[1] as u32) << 8) | ((b[2] as u32) << 16);
v.wrapping_mul(0x9E37_79B1) >> (32 - HASH_BITS)
}
fn lz77_parse(data: &[u8], head: &mut [u32], prev: &mut Vec<u32>) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
if data.len() < MIN_MATCH {
for &b in data {
tokens.push(Token::Literal(b));
}
return tokens;
}
for h in head.iter_mut() {
*h = NIL;
}
prev.clear();
prev.resize(data.len(), NIL);
let mut i: usize = 0;
let stop = data.len() - (MIN_MATCH - 1);
while i < stop {
let h = hash3(&data[i..i + MIN_MATCH]) as usize;
let max_chain = 64usize;
let mut best_len: usize = 0;
let mut best_dist: usize = 0;
let mut cur = head[h];
let mut tries = 0usize;
while cur != NIL && tries < max_chain {
let pos = cur as usize;
if pos >= i {
break;
}
let dist = i - pos;
if dist == 0 || dist >= 65536 {
cur = prev[pos];
tries += 1;
continue;
}
let mut l = 0usize;
let max_possible = (data.len() - i).min(MAX_MATCH);
while l < max_possible && data[pos + l] == data[i + l] {
l += 1;
}
if l >= MIN_MATCH && l > best_len {
best_len = l;
best_dist = dist;
if l >= 32 {
break;
}
}
cur = prev[pos];
tries += 1;
}
prev[i] = head[h];
head[h] = i as u32;
if best_len == MIN_MATCH && best_dist == 1 {
best_len = 0;
}
if best_len >= MIN_MATCH {
tokens.push(Token::Match {
length: best_len as u32,
distance: best_dist as u32,
});
let mut j = i + 1;
let end = (i + best_len).min(stop);
while j < end {
let h2 = hash3(&data[j..j + MIN_MATCH]) as usize;
prev[j] = head[h2];
head[h2] = j as u32;
j += 1;
}
i += best_len;
} else {
tokens.push(Token::Literal(data[i]));
i += 1;
}
}
while i < data.len() {
tokens.push(Token::Literal(data[i]));
i += 1;
}
tokens
}
fn high_bit_index(v: u32) -> u32 {
debug_assert!(v > 0);
31 - v.leading_zeros()
}
struct BlockWriter {
acc: u32,
pending_bits: u32,
slots: VecDeque<usize>,
primed: bool,
cum_bits: u32,
words_reserved: u32,
}
impl BlockWriter {
fn new() -> Self {
Self {
acc: 0,
pending_bits: 0,
slots: VecDeque::new(),
primed: false,
cum_bits: 0,
words_reserved: 0,
}
}
fn prime(&mut self, out: &mut Vec<u8>) {
if self.primed {
return;
}
self.primed = true;
for _ in 0..2 {
let idx = out.len();
out.push(0);
out.push(0);
self.slots.push_back(idx);
self.words_reserved += 1;
}
}
fn reserve_slot(&mut self, out: &mut Vec<u8>) {
let idx = out.len();
out.push(0);
out.push(0);
self.slots.push_back(idx);
self.words_reserved += 1;
}
fn reserve_and_return(&mut self, out: &mut Vec<u8>) -> usize {
let idx = out.len();
out.push(0);
out.push(0);
self.words_reserved += 1;
idx
}
fn decoder_words_at(cum_bits: u32) -> u32 {
if cum_bits <= 16 {
2
} else {
2 + (cum_bits - 16).div_ceil(16)
}
}
fn patch(out: &mut [u8], idx: usize, word: u16) {
out[idx] = word as u8;
out[idx + 1] = (word >> 8) as u8;
}
fn write_bits(&mut self, value: u32, len: u32, out: &mut Vec<u8>) {
if !self.primed {
self.prime(out);
}
if len == 0 {
return;
}
debug_assert!(len <= 16);
let value = value & ((1u32 << len) - 1);
self.acc |= value << (32 - self.pending_bits - len);
self.pending_bits += len;
self.cum_bits += len;
while self.pending_bits >= 16 {
let word = ((self.acc >> 16) & 0xFFFF) as u16;
self.acc <<= 16;
self.pending_bits -= 16;
let idx = self
.slots
.pop_front()
.unwrap_or_else(|| self.reserve_and_return(out));
Self::patch(out, idx, word);
}
}
fn write_raw_byte(&mut self, b: u8, out: &mut Vec<u8>) {
if !self.primed {
self.prime(out);
}
let required = Self::decoder_words_at(self.cum_bits);
while self.words_reserved < required {
self.reserve_slot(out);
}
out.push(b);
}
fn pad_with(&mut self, value: u32, len: u32, out: &mut Vec<u8>) {
if !self.primed {
self.prime(out);
}
if len == 0 {
self.finalize(out);
return;
}
while !self.slots.is_empty() || self.pending_bits >= 16 {
self.write_bits(value, len, out);
}
self.finalize(out);
}
fn finalize(&mut self, out: &mut Vec<u8>) {
if !self.primed {
self.prime(out);
}
if self.pending_bits > 0 {
let word = ((self.acc >> 16) & 0xFFFF) as u16;
self.acc = 0;
self.pending_bits = 0;
let idx = self
.slots
.pop_front()
.unwrap_or_else(|| self.reserve_and_return(out));
Self::patch(out, idx, word);
}
let required = Self::decoder_words_at(self.cum_bits);
while self.words_reserved < required {
let _ = self.reserve_and_return(out);
}
self.slots.clear();
}
}