use alloc::vec::Vec;
use crate::error::Error;
use crate::traits::{Encoder as EncoderTrait, Progress};
use crate::zstd::encoder_bitwriter::RevBitWriter;
use crate::zstd::encoder_fse::{
DEFAULT_LL_ACCURACY_LOG, DEFAULT_LL_COUNTS, DEFAULT_ML_ACCURACY_LOG, DEFAULT_ML_COUNTS,
DEFAULT_OF_ACCURACY_LOG, DEFAULT_OF_COUNTS, FseEncoder,
};
use crate::zstd::encoder_seq::{encode_sequence_count, ll_code, ml_code, of_code};
use crate::zstd::matcher::{MIN_MATCH, MatchFinder};
const MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
const FHD: u8 = 0x00;
const WD: u8 = 0x70;
const BLOCK_SIZE: usize = 16 * 1024;
pub struct Encoder {
state: State,
pending: Vec<u8>,
out_buf: Vec<u8>,
out_idx: usize,
matcher: MatchFinder,
ll_enc: FseEncoder,
ml_enc: FseEncoder,
of_enc: FseEncoder,
header_written: bool,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
enum State {
Accepting,
Draining { last: bool },
Done,
}
impl Encoder {
pub fn new() -> Self {
Self {
state: State::Accepting,
pending: Vec::with_capacity(BLOCK_SIZE),
out_buf: Vec::new(),
out_idx: 0,
matcher: MatchFinder::new(BLOCK_SIZE),
ll_enc: FseEncoder::from_normalized(&DEFAULT_LL_COUNTS, DEFAULT_LL_ACCURACY_LOG),
ml_enc: FseEncoder::from_normalized(&DEFAULT_ML_COUNTS, DEFAULT_ML_ACCURACY_LOG),
of_enc: FseEncoder::from_normalized(&DEFAULT_OF_COUNTS, DEFAULT_OF_ACCURACY_LOG),
header_written: false,
}
}
fn write_frame_header(&mut self) {
self.out_buf.extend_from_slice(&MAGIC);
self.out_buf.push(FHD);
self.out_buf.push(WD);
}
fn push_block_header(out: &mut Vec<u8>, body_size: u32, block_type: u32, last: bool) {
debug_assert!(body_size < (1u32 << 21));
debug_assert!(block_type < 4);
let bh: u32 = (if last { 1 } else { 0 }) | (block_type << 1) | (body_size << 3);
out.push((bh & 0xFF) as u8);
out.push(((bh >> 8) & 0xFF) as u8);
out.push(((bh >> 16) & 0xFF) as u8);
}
fn append_raw_block(out: &mut Vec<u8>, body: &[u8], last: bool) {
Self::push_block_header(out, body.len() as u32, 0, last);
out.extend_from_slice(body);
}
fn try_compress_block(&mut self) -> Option<Vec<u8>> {
if self.pending.len() < 16 {
return None;
}
let buffer = self.pending.as_slice();
self.matcher.resize_for(buffer.len());
let mut sequences: Vec<Seq> = Vec::new();
let mut literals: Vec<u8> = Vec::with_capacity(buffer.len());
let mut lit_start: usize = 0;
let mut pos: usize = 0;
while pos + MIN_MATCH < buffer.len() {
self.matcher.insert(buffer, pos);
let m = self.matcher.find_match(buffer, pos, buffer.len());
if let Some(m) = m {
let literal_run = pos - lit_start;
let distance = m.distance;
let match_len = m.length;
literals.extend_from_slice(&buffer[lit_start..pos]);
sequences.push(Seq {
literal_length: literal_run as u32,
match_length: match_len as u32,
distance: distance as u32,
});
for skip_pos in (pos + 1)..(pos + match_len) {
self.matcher.insert(buffer, skip_pos);
}
pos += match_len;
lit_start = pos;
} else {
pos += 1;
}
}
if sequences.is_empty() {
return None;
}
let trailing_literals = &buffer[lit_start..];
let regen_size = literals.len() + trailing_literals.len();
let lit_section = build_raw_literals_section(&literals, trailing_literals);
let seq_section = self.build_sequences_section(&sequences);
let total = lit_section.len() + seq_section.len();
let raw_size = buffer.len();
if total >= raw_size {
return None; }
let _ = regen_size;
let mut body = Vec::with_capacity(total);
body.extend_from_slice(&lit_section);
body.extend_from_slice(&seq_section);
Some(body)
}
fn build_sequences_section(&self, sequences: &[Seq]) -> Vec<u8> {
let n = sequences.len() as u32;
let mut out = encode_sequence_count(n);
let modes: u8 = 0b00_00_00_00;
out.push(modes);
let mut ll_codes: Vec<u8> = Vec::with_capacity(sequences.len());
let mut ml_codes: Vec<u8> = Vec::with_capacity(sequences.len());
let mut of_codes: Vec<u8> = Vec::with_capacity(sequences.len());
let mut ll_extras: Vec<(u32, u32)> = Vec::with_capacity(sequences.len());
let mut ml_extras: Vec<(u32, u32)> = Vec::with_capacity(sequences.len());
let mut of_extras: Vec<(u32, u32)> = Vec::with_capacity(sequences.len());
for s in sequences {
let offset_value = s.distance + 3;
let (oc, oe_bits, oe_val) = of_code(offset_value);
of_codes.push(oc);
of_extras.push((oe_bits, oe_val));
let (lc, le_bits, le_val) = ll_code(s.literal_length);
ll_codes.push(lc);
ll_extras.push((le_bits, le_val));
let (mc, me_bits, me_val) = ml_code(s.match_length);
ml_codes.push(mc);
ml_extras.push((me_bits, me_val));
}
let mut writer = RevBitWriter::new();
let n_seq = sequences.len();
let mut ll_state = self.ll_enc.init_state(ll_codes[n_seq - 1] as usize);
let mut of_state = self.of_enc.init_state(of_codes[n_seq - 1] as usize);
let mut ml_state = self.ml_enc.init_state(ml_codes[n_seq - 1] as usize);
for i in (0..n_seq).rev() {
if i == n_seq - 1 {
} else {
of_state = self
.of_enc
.encode_symbol(of_state, of_codes[i] as usize, &mut writer);
ml_state = self
.ml_enc
.encode_symbol(ml_state, ml_codes[i] as usize, &mut writer);
ll_state = self
.ll_enc
.encode_symbol(ll_state, ll_codes[i] as usize, &mut writer);
}
writer.write_bits(ll_extras[i].1 as u64, ll_extras[i].0);
writer.write_bits(ml_extras[i].1 as u64, ml_extras[i].0);
writer.write_bits(of_extras[i].1 as u64, of_extras[i].0);
}
self.ml_enc.write_final_state(ml_state, &mut writer);
self.of_enc.write_final_state(of_state, &mut writer);
self.ll_enc.write_final_state(ll_state, &mut writer);
let bitstream = writer.finish();
out.extend_from_slice(&bitstream);
out
}
fn flush_block(&mut self, last: bool) {
if let Some(body) = self.try_compress_block() {
Self::push_block_header(&mut self.out_buf, body.len() as u32, 2, last);
self.out_buf.extend_from_slice(&body);
} else {
let pending_snapshot = core::mem::take(&mut self.pending);
Self::append_raw_block(&mut self.out_buf, &pending_snapshot, last);
self.pending = pending_snapshot;
}
self.pending.clear();
}
fn drain_into(&mut self, output: &mut [u8], written: &mut usize) -> bool {
let avail = output.len() - *written;
let remaining = self.out_buf.len() - self.out_idx;
let n = core::cmp::min(avail, remaining);
if n > 0 {
output[*written..*written + n]
.copy_from_slice(&self.out_buf[self.out_idx..self.out_idx + n]);
*written += n;
self.out_idx += n;
}
let drained = self.out_idx == self.out_buf.len();
if drained {
self.out_buf.clear();
self.out_idx = 0;
}
drained
}
}
#[derive(Clone, Copy, Debug)]
struct Seq {
literal_length: u32,
match_length: u32,
distance: u32,
}
fn build_raw_literals_section(literals: &[u8], trailing: &[u8]) -> Vec<u8> {
let regen = literals.len() + trailing.len();
let mut out = Vec::with_capacity(3 + regen);
if regen < 32 {
let hdr = (regen as u8) << 3;
out.push(hdr);
} else if regen < 4096 {
let byte0 = (((regen & 0xF) as u8) << 4) | (0b01 << 2);
let byte1 = (regen >> 4) as u8;
out.push(byte0);
out.push(byte1);
} else {
let byte0 = (((regen & 0xF) as u8) << 4) | (0b11 << 2);
let byte1 = ((regen >> 4) & 0xFF) as u8;
let byte2 = ((regen >> 12) & 0xFF) as u8;
out.push(byte0);
out.push(byte1);
out.push(byte2);
}
out.extend_from_slice(literals);
out.extend_from_slice(trailing);
out
}
impl Default for Encoder {
fn default() -> Self {
Self::new()
}
}
impl EncoderTrait for Encoder {
fn encode(&mut self, input: &[u8], output: &mut [u8]) -> Result<Progress, Error> {
let mut consumed = 0usize;
let mut written = 0usize;
loop {
match self.state {
State::Accepting => {
if !self.header_written {
self.write_frame_header();
self.header_written = true;
}
let space = BLOCK_SIZE - self.pending.len();
let take = core::cmp::min(space, input.len() - consumed);
if take > 0 {
self.pending
.extend_from_slice(&input[consumed..consumed + take]);
consumed += take;
}
if self.pending.len() == BLOCK_SIZE {
self.flush_block(false);
self.state = State::Draining { last: false };
} else if !self.out_buf.is_empty() {
self.state = State::Draining { last: false };
} else {
return Ok(Progress {
consumed,
written,
done: false,
});
}
}
State::Draining { last } => {
let drained = self.drain_into(output, &mut written);
if !drained {
return Ok(Progress {
consumed,
written,
done: false,
});
}
if last {
self.state = State::Done;
} else {
self.state = State::Accepting;
}
}
State::Done => {
return Ok(Progress {
consumed,
written,
done: false,
});
}
}
}
}
fn finish(&mut self, output: &mut [u8]) -> Result<Progress, Error> {
let mut written = 0usize;
loop {
match self.state {
State::Accepting => {
if !self.header_written {
self.write_frame_header();
self.header_written = true;
}
if self.pending.is_empty() {
Self::push_block_header(&mut self.out_buf, 0, 0, true);
} else {
self.flush_block(true);
}
self.state = State::Draining { last: true };
}
State::Draining { last } => {
let drained = self.drain_into(output, &mut written);
if !drained {
return Ok(Progress {
consumed: 0,
written,
done: false,
});
}
if last {
self.state = State::Done;
} else {
self.state = State::Accepting;
}
}
State::Done => {
return Ok(Progress {
consumed: 0,
written,
done: true,
});
}
}
}
}
fn reset(&mut self) {
self.state = State::Accepting;
self.pending.clear();
self.out_buf.clear();
self.out_idx = 0;
self.matcher = MatchFinder::new(BLOCK_SIZE);
self.header_written = false;
}
}