use alloc::vec;
use alloc::vec::Vec;
use crate::error::Error;
use crate::traits::{Algorithm, RawDecoder, RawEncoder, RawProgress};
mod context;
mod dictionary;
mod encoder_dict;
mod encoder_huffman;
mod encoder_iac;
mod encoder_lz77;
mod huffman;
mod transforms;
use alloc::rc::Rc;
use context::ContextMode;
use huffman::{BitSource, HuffmanDecoder};
use encoder_dict::{DictIndex, IdTransform};
use encoder_huffman::reverse_bits;
use encoder_lz77::MatchFinder;
#[derive(Debug, Clone, Copy, Default)]
pub struct Brotli;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct EncoderConfig {
pub quality: u8,
}
impl Default for EncoderConfig {
fn default() -> Self {
Self { quality: 6 }
}
}
#[derive(Debug, Clone, Copy)]
struct LevelParams {
finder: encoder_lz77::FinderParams,
use_dict: bool,
}
impl LevelParams {
const fn from_quality(quality: u8) -> Self {
let q = if quality > 11 { 11 } else { quality };
let (max_chain, nice_match, use_dict) = match q {
0 => (2, 8, false),
1 => (4, 16, false),
2 => (8, 24, false),
3 => (16, 32, false),
4 => (24, 48, true),
5 => (48, 96, true),
6 => (64, 128, true),
7 => (96, 192, true),
8 => (160, 256, true),
9 => (256, 384, true),
10 => (512, 768, true),
_ => (1024, 1024, true),
};
Self {
finder: encoder_lz77::FinderParams {
max_chain,
nice_match,
},
use_dict,
}
}
}
impl Algorithm for Brotli {
const NAME: &'static str = "brotli";
type Encoder = Encoder;
type Decoder = Decoder;
type EncoderConfig = EncoderConfig;
type DecoderConfig = ();
fn encoder_with(c: EncoderConfig) -> Encoder {
Encoder::with_config(c)
}
fn decoder_with(_: ()) -> Decoder {
Decoder::new()
}
}
#[derive(Debug, Clone, Default)]
struct BitWriter {
acc: u64,
nbits: u32,
}
impl BitWriter {
const fn new() -> Self {
Self { acc: 0, nbits: 0 }
}
fn write(&mut self, value: u32, n: u32, out: &mut Vec<u8>) {
debug_assert!(n <= 32);
let masked: u64 = if n == 0 {
0
} else {
(value as u64) & ((1u64 << n) - 1)
};
self.acc |= masked << self.nbits;
self.nbits += n;
while self.nbits >= 8 {
out.push(self.acc as u8);
self.acc >>= 8;
self.nbits -= 8;
}
}
fn align(&mut self, out: &mut Vec<u8>) {
if self.nbits > 0 {
out.push(self.acc as u8);
self.acc = 0;
self.nbits = 0;
}
}
const fn pending_bits(&self) -> u32 {
self.nbits
}
}
const MAX_BLOCK: usize = 1 << 16;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum EncStage {
NeedHeader,
Buffering,
Draining,
Done,
}
#[derive(Debug, Clone)]
pub struct Encoder {
pending: Vec<u8>,
out: Vec<u8>,
out_pos: usize,
bw: BitWriter,
stage: EncStage,
seen_any_input: bool,
ring: DistRing,
prev_total_out: u64,
dict_index: Option<Rc<DictIndex>>,
id_transforms: Option<Rc<Vec<IdTransform>>>,
params: LevelParams,
scratch: Option<EncScratch>,
}
struct EncScratch {
mf: MatchFinder,
cmds: Vec<Command>,
insert_pool: Vec<Vec<u8>>,
ic_sym: Vec<u32>,
ins_extra: Vec<(u32, u32)>,
copy_extra: Vec<(u32, u32)>,
dist_enc: Vec<Option<(u32, u32, u32)>>,
lit_freq: [u32; 256],
ic_freq: [u32; 704],
dist_freq: [u32; 64],
}
impl core::fmt::Debug for EncScratch {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("EncScratch")
.field("cmds_cap", &self.cmds.capacity())
.finish()
}
}
impl Clone for EncScratch {
fn clone(&self) -> Self {
Self::new()
}
}
impl EncScratch {
fn new() -> Self {
Self {
mf: MatchFinder::new(),
cmds: Vec::new(),
insert_pool: Vec::new(),
ic_sym: Vec::new(),
ins_extra: Vec::new(),
copy_extra: Vec::new(),
dist_enc: Vec::new(),
lit_freq: [0u32; 256],
ic_freq: [0u32; 704],
dist_freq: [0u32; 64],
}
}
fn prepare(&mut self) {
self.mf.reset();
while let Some(c) = self.cmds.pop() {
let mut v = c.insert;
v.clear();
self.insert_pool.push(v);
}
self.ic_sym.clear();
self.ins_extra.clear();
self.copy_extra.clear();
self.dist_enc.clear();
self.lit_freq.fill(0);
self.ic_freq.fill(0);
self.dist_freq.fill(0);
}
}
impl Encoder {
pub fn new() -> Self {
Self::with_config(EncoderConfig::default())
}
pub fn with_config(config: EncoderConfig) -> Self {
Self {
pending: Vec::new(),
out: Vec::new(),
out_pos: 0,
bw: BitWriter::new(),
stage: EncStage::NeedHeader,
seen_any_input: false,
ring: DistRing::new(),
prev_total_out: 0,
dict_index: None,
id_transforms: None,
params: LevelParams::from_quality(config.quality),
scratch: None,
}
}
fn ensure_dict_index(&mut self) {
if self.dict_index.is_none() {
self.dict_index = Some(Rc::new(DictIndex::build()));
}
if self.id_transforms.is_none() {
self.id_transforms = Some(Rc::new(encoder_dict::identity_transforms()));
}
}
fn compact_out(&mut self) {
if self.out_pos == 0 {
return;
}
if self.out_pos >= self.out.len() {
self.out.clear();
} else {
self.out.drain(..self.out_pos);
}
self.out_pos = 0;
}
fn drain_out_into(&mut self, dst: &mut [u8]) -> usize {
let avail = self.out.len() - self.out_pos;
let n = avail.min(dst.len());
if n > 0 {
dst[..n].copy_from_slice(&self.out[self.out_pos..self.out_pos + n]);
self.out_pos += n;
}
n
}
fn ensure_header(&mut self) {
if self.stage == EncStage::NeedHeader {
self.bw.write(0, 1, &mut self.out);
self.stage = EncStage::Buffering;
}
}
fn emit_compressed_block(&mut self, mlen: usize, is_last: bool) {
debug_assert!((1..=MAX_BLOCK).contains(&mlen));
debug_assert!(mlen <= self.pending.len());
if self.params.use_dict {
self.ensure_dict_index();
}
if self.scratch.is_none() {
self.scratch = Some(EncScratch::new());
}
let dict_index = self.dict_index.clone();
let id_transforms = self.id_transforms.clone();
let scratch = self.scratch.as_mut().expect("scratch initialised above");
scratch.prepare();
let pending_view = &self.pending[..mlen];
encode_meta_block(
&mut self.bw,
&mut self.out,
pending_view,
is_last,
&mut self.ring,
self.prev_total_out,
dict_index.as_deref(),
id_transforms.as_deref().map(|v| v.as_slice()),
self.params,
scratch,
);
self.pending.drain(..mlen);
self.prev_total_out += mlen as u64;
}
fn flush_full_blocks(&mut self) {
while self.pending.len() > MAX_BLOCK {
self.emit_compressed_block(MAX_BLOCK, false);
}
}
fn emit_terminator(&mut self) {
if !self.seen_any_input || self.pending.is_empty() {
self.bw.write(1, 1, &mut self.out);
self.bw.write(1, 1, &mut self.out);
self.bw.align(&mut self.out);
debug_assert_eq!(self.bw.pending_bits(), 0);
return;
}
let n = self.pending.len();
debug_assert!(n <= MAX_BLOCK);
self.emit_compressed_block(n, true);
debug_assert_eq!(self.bw.pending_bits(), 0);
}
}
impl Default for Encoder {
fn default() -> Self {
Self::new()
}
}
impl RawEncoder for Encoder {
fn raw_encode(&mut self, input: &[u8], output: &mut [u8]) -> Result<RawProgress, Error> {
if self.stage == EncStage::Done {
return Err(Error::Corrupt);
}
let mut written = 0usize;
if self.out_pos < self.out.len() {
written += self.drain_out_into(&mut output[written..]);
if written == output.len() {
return Ok(RawProgress {
consumed: 0,
written,
done: false,
});
}
}
self.compact_out();
self.ensure_header();
if !input.is_empty() {
self.seen_any_input = true;
}
self.pending.extend_from_slice(input);
let consumed = input.len();
self.flush_full_blocks();
written += self.drain_out_into(&mut output[written..]);
self.compact_out();
Ok(RawProgress {
consumed,
written,
done: false,
})
}
fn raw_finish(&mut self, output: &mut [u8]) -> Result<RawProgress, Error> {
if self.stage == EncStage::Done {
return Ok(RawProgress {
consumed: 0,
written: 0,
done: true,
});
}
let mut written = 0usize;
if self.out_pos < self.out.len() {
written += self.drain_out_into(&mut output[written..]);
if written == output.len() {
return Ok(RawProgress {
consumed: 0,
written,
done: false,
});
}
}
self.compact_out();
if self.stage != EncStage::Draining {
self.ensure_header();
self.flush_full_blocks();
self.emit_terminator();
self.stage = EncStage::Draining;
}
written += self.drain_out_into(&mut output[written..]);
self.compact_out();
let done = self.out_pos == self.out.len();
if done {
self.stage = EncStage::Done;
}
Ok(RawProgress {
consumed: 0,
written,
done,
})
}
fn raw_reset(&mut self) {
self.pending.clear();
self.out.clear();
self.out_pos = 0;
self.bw = BitWriter::new();
self.stage = EncStage::NeedHeader;
self.seen_any_input = false;
self.ring = DistRing::new();
self.prev_total_out = 0;
}
}
fn distinct_bytes_low(payload: &[u8]) -> usize {
let mut seen = [false; 256];
let mut n = 0usize;
for &b in payload {
let s = &mut seen[b as usize];
if !*s {
*s = true;
n += 1;
if n >= 32 {
return n;
}
}
}
n
}
fn lz77_to_commands(
payload: &[u8],
dict_index: Option<&encoder_dict::DictIndex>,
id_transforms: Option<&[encoder_dict::IdTransform]>,
prev_total_out: u64,
finder_params: encoder_lz77::FinderParams,
scratch: &mut EncScratch,
) {
use encoder_lz77::{MAX_MATCH, MIN_MATCH};
let use_dict =
dict_index.is_some() && id_transforms.is_some() && distinct_bytes_low(payload) >= 5;
let estimate = (payload.len() / 4).max(16);
if scratch.cmds.capacity() < estimate {
scratch.cmds.reserve(estimate - scratch.cmds.capacity());
}
let EncScratch {
mf,
cmds,
insert_pool,
..
} = scratch;
let mut pending: Vec<u8> = insert_pool.pop().unwrap_or_default();
if pending.capacity() < 64 {
pending.reserve(64 - pending.capacity());
}
let payload_len = payload.len();
let mut pos = 0usize;
while pos < payload_len {
mf.insert(payload, pos);
let mut best_len: usize = 0;
let mut best_kind: u8 = 0;
let mut best_match_dist: u32 = 0;
let mut best_dict_word_len: u8 = 0;
let mut best_dict_word_idx: u32 = 0;
let mut best_dict_tr_id: u8 = 0;
let mut best_dict_emit_len: u32 = 0;
if pos + MIN_MATCH <= payload_len
&& let Some((len, dist)) = mf.find_match(payload, pos, finder_params)
{
let len = len.min(MAX_MATCH).min(payload_len - pos);
if len >= MIN_MATCH {
best_len = len;
best_kind = 1;
best_match_dist = dist as u32;
}
}
if use_dict {
let dict_min_emit = if best_len >= 4 {
(best_len + 3) as u32
} else {
6u32
};
if let Some(dm) = encoder_dict::find_dict_match(
dict_index.unwrap(),
id_transforms.unwrap(),
payload,
pos,
dict_min_emit,
) {
let total_out_at_pos: u64 = prev_total_out + pos as u64;
let max_dist: u64 = core::cmp::min((1u64 << 16) - 16, total_out_at_pos);
let len = dm.word_len as usize;
let nwords_bits = dictionary::SIZE_BITS_BY_LENGTH[len] as u32;
let off = (dm.word_idx as u64) | ((dm.transform_id as u64) << nwords_bits);
let distance = max_dist + 1 + off;
if distance <= u32::MAX as u64 && distance > 0 && (dm.emit_len as usize) > best_len
{
best_len = dm.emit_len as usize;
best_kind = 2;
best_dict_word_len = dm.word_len;
best_dict_word_idx = dm.word_idx;
best_dict_tr_id = dm.transform_id;
best_dict_emit_len = dm.emit_len;
}
}
}
if best_kind != 0 {
for j in 1..best_len {
let p = pos + j;
if p + MIN_MATCH <= payload_len {
mf.insert(payload, p);
}
}
let next_pending = insert_pool.pop().unwrap_or_default();
let cmd_kind = if best_kind == 1 {
CopyKind::Backref {
distance: best_match_dist,
}
} else {
CopyKind::Dict {
word_idx: best_dict_word_idx,
transform_id: best_dict_tr_id,
emit_len: best_dict_emit_len,
}
};
let copy_len = if best_kind == 1 {
best_len as u32
} else {
best_dict_word_len as u32
};
cmds.push(Command {
insert: core::mem::replace(&mut pending, next_pending),
copy_len,
kind: cmd_kind,
});
pos += best_len;
continue;
}
pending.push(payload[pos]);
pos += 1;
}
if !pending.is_empty() || cmds.is_empty() {
cmds.push(Command {
insert: pending,
copy_len: 0,
kind: CopyKind::None,
});
} else {
pending.clear();
insert_pool.push(pending);
}
}
#[derive(Clone, Copy)]
enum CopyKind {
None,
Backref { distance: u32 },
Dict {
word_idx: u32,
transform_id: u8,
emit_len: u32,
},
}
struct Command {
insert: Vec<u8>,
copy_len: u32,
kind: CopyKind,
}
#[derive(Debug, Clone, Copy)]
struct DistRing {
ring: [i32; 4],
idx: u32,
}
impl DistRing {
fn new() -> Self {
Self {
ring: [16, 15, 11, 4],
idx: 0,
}
}
fn nth_last(&self, n: u32) -> i32 {
debug_assert!((1..=4).contains(&n));
self.ring[((self.idx.wrapping_add(4 - n)) & 3) as usize]
}
fn push(&mut self, d: i32) {
let slot = (self.idx & 3) as usize;
self.ring[slot] = d;
self.idx = self.idx.wrapping_add(1);
}
fn try_short_code(&self, distance: u32) -> Option<u32> {
let d = distance as i32;
let last = self.nth_last(1);
let last2 = self.nth_last(2);
if d == last {
return Some(0);
}
if d == self.nth_last(2) {
return Some(1);
}
if d == self.nth_last(3) {
return Some(2);
}
if d == self.nth_last(4) {
return Some(3);
}
if d > 0 {
if d == last - 1 {
return Some(4);
}
if d == last + 1 {
return Some(5);
}
if d == last - 2 {
return Some(6);
}
if d == last + 2 {
return Some(7);
}
if d == last - 3 {
return Some(8);
}
if d == last + 3 {
return Some(9);
}
if d == last2 - 1 {
return Some(10);
}
if d == last2 + 1 {
return Some(11);
}
if d == last2 - 2 {
return Some(12);
}
if d == last2 + 2 {
return Some(13);
}
if d == last2 - 3 {
return Some(14);
}
if d == last2 + 3 {
return Some(15);
}
}
None
}
}
fn plan_commands(
mlen: u32,
ring: &mut DistRing,
prev_total_out: u64,
window_size: u32,
scratch: &mut EncScratch,
) {
use encoder_iac::{copy_to_code, distance_to_normal_code, ic_command_sym, insert_to_code};
let cmds_len = scratch.cmds.len();
scratch.ic_sym.reserve(cmds_len);
scratch.ins_extra.reserve(cmds_len);
scratch.copy_extra.reserve(cmds_len);
scratch.dist_enc.reserve(cmds_len);
let EncScratch {
cmds,
ic_sym,
ins_extra,
copy_extra,
dist_enc,
lit_freq,
ic_freq,
dist_freq,
..
} = scratch;
let mut emitted: u32 = 0;
for (i, c) in cmds.iter().enumerate() {
let is_last_cmd = i == cmds_len - 1;
let insert_len = c.insert.len() as u32;
for &b in &c.insert {
lit_freq[b as usize] += 1;
}
let (ins_code, ins_eb, ins_ev) = insert_to_code(insert_len);
let (copy_code, copy_eb, copy_ev, use_last_dist, dist_plan) = match c.kind {
CopyKind::Backref { distance } => {
let (cc, ceb, cev) = copy_to_code(c.copy_len);
let can_use_last = ins_code < 8 && cc < 16;
let short = ring.try_short_code(distance);
match short {
Some(0) if can_use_last => {
(cc, ceb, cev, true, None)
}
Some(0) => {
(cc, ceb, cev, false, Some((0u32, 0u32, 0u32)))
}
Some(code) => {
ring.push(distance as i32);
(cc, ceb, cev, false, Some((code, 0, 0)))
}
None => {
let (dcode, ndistbits, dextra) =
distance_to_normal_code(distance).expect("encodable distance");
ring.push(distance as i32);
(cc, ceb, cev, false, Some((dcode, ndistbits, dextra)))
}
}
}
CopyKind::Dict {
word_idx,
transform_id,
..
} => {
let (cc, ceb, cev) = copy_to_code(c.copy_len);
let total_out_at_cmd: u64 = prev_total_out + emitted as u64 + insert_len as u64;
let max_dist: u64 =
core::cmp::min(window_size.saturating_sub(16) as u64, total_out_at_cmd);
let nwords_bits = dictionary::SIZE_BITS_BY_LENGTH[c.copy_len as usize] as u32;
let off = (word_idx as u64) | ((transform_id as u64) << nwords_bits);
let distance: u64 = max_dist + 1 + off;
debug_assert!(distance <= u32::MAX as u64, "dictionary distance overflow");
let (dcode, ndistbits, dextra) =
distance_to_normal_code(distance as u32).expect("encodable dict distance");
(cc, ceb, cev, false, Some((dcode, ndistbits, dextra)))
}
CopyKind::None => {
if ins_code < 8 {
(0, 0, 0, true, None)
} else {
(0, 0, 0, false, None)
}
}
};
let use_last_for_ic = if is_last_cmd && matches!(c.kind, CopyKind::None) {
ins_code < 8 } else {
use_last_dist
};
let sym = ic_command_sym(ins_code, copy_code, use_last_for_ic);
ic_sym.push(sym);
ins_extra.push((ins_eb, ins_ev));
copy_extra.push((copy_eb, copy_ev));
dist_enc.push(dist_plan);
ic_freq[sym as usize] += 1;
if let Some((dcode, _, _)) = dist_plan {
dist_freq[dcode as usize] += 1;
}
emitted += insert_len;
match c.kind {
CopyKind::Backref { .. } => emitted += c.copy_len,
CopyKind::Dict { emit_len, .. } => emitted += emit_len,
CopyKind::None => {}
}
}
debug_assert!(
emitted == mlen,
"command emission ({emitted}) does not match mlen ({mlen})"
);
}
fn write_meta_block_header(bw: &mut BitWriter, out: &mut Vec<u8>, mlen: u32, is_last: bool) {
debug_assert!(mlen >= 1 && mlen <= MAX_BLOCK as u32);
bw.write(if is_last { 1 } else { 0 }, 1, out);
if is_last {
bw.write(0, 1, out);
}
bw.write(0, 2, out);
bw.write(mlen - 1, 16, out);
if !is_last {
bw.write(0, 1, out);
}
bw.write(0, 1, out);
bw.write(0, 1, out);
bw.write(0, 1, out);
bw.write(0, 2, out);
bw.write(0, 4, out);
bw.write(0, 2, out);
bw.write(0, 1, out);
bw.write(0, 1, out);
}
enum HuffStrategy {
SingleSymbol(u32),
TwoSymbols { symbols: [u32; 2], codes: [u32; 2] },
Complex(Vec<u8>),
}
fn pick_huffman_strategy(freqs: &[u32], alphabet_size: usize) -> HuffStrategy {
let mut first: Option<u32> = None;
let mut second: Option<u32> = None;
for (i, &f) in freqs.iter().enumerate() {
if f == 0 {
continue;
}
if first.is_none() {
first = Some(i as u32);
} else if second.is_none() {
second = Some(i as u32);
} else {
let lengths = encoder_huffman::build_huffman_lengths(freqs, alphabet_size);
return HuffStrategy::Complex(lengths);
}
}
match (first, second) {
(None, _) => HuffStrategy::SingleSymbol(0),
(Some(a), None) => HuffStrategy::SingleSymbol(a),
(Some(a), Some(b)) => HuffStrategy::TwoSymbols {
symbols: [a, b],
codes: [0, 1],
},
}
}
#[allow(clippy::too_many_arguments)]
fn encode_meta_block(
bw: &mut BitWriter,
out: &mut Vec<u8>,
payload: &[u8],
is_last: bool,
ring: &mut DistRing,
prev_total_out: u64,
dict_index: Option<&DictIndex>,
id_transforms: Option<&[IdTransform]>,
level: LevelParams,
scratch: &mut EncScratch,
) {
let mlen = payload.len() as u32;
debug_assert!(mlen >= 1 && mlen <= MAX_BLOCK as u32);
const WINDOW_SIZE: u32 = 1 << 16;
lz77_to_commands(
payload,
dict_index,
id_transforms,
prev_total_out,
level.finder,
scratch,
);
plan_commands(mlen, ring, prev_total_out, WINDOW_SIZE, scratch);
let lit_strategy = pick_huffman_strategy(&scratch.lit_freq, 256);
let ic_strategy = pick_huffman_strategy(&scratch.ic_freq, 704);
let dist_strategy = pick_huffman_strategy(&scratch.dist_freq, 64);
write_meta_block_header(bw, out, mlen, is_last);
let lit_codes = emit_prefix_code(bw, out, &lit_strategy, 256);
let ic_codes = emit_prefix_code(bw, out, &ic_strategy, 704);
let dist_codes = emit_prefix_code(bw, out, &dist_strategy, 64);
let scratch_view: &EncScratch = scratch;
let cmds_len = scratch_view.cmds.len();
for i in 0..cmds_len {
let sym = scratch_view.ic_sym[i];
write_symbol(bw, out, &ic_strategy, &ic_codes, sym);
let (ieb, iev) = scratch_view.ins_extra[i];
if ieb > 0 {
bw.write(iev, ieb, out);
}
let (ceb, cev) = scratch_view.copy_extra[i];
if ceb > 0 {
bw.write(cev, ceb, out);
}
let insert = &scratch_view.cmds[i].insert;
match &lit_strategy {
HuffStrategy::Complex(lengths) => {
for &b in insert {
let len = lengths[b as usize] as u32;
debug_assert!(len > 0);
let code = lit_codes[b as usize];
let rev = reverse_bits(code as u32, len);
bw.write(rev, len, out);
}
}
_ => {
for &b in insert {
write_symbol(bw, out, &lit_strategy, &lit_codes, b as u32);
}
}
}
if let Some((dcode, ndb, dextra)) = scratch_view.dist_enc[i] {
write_symbol(bw, out, &dist_strategy, &dist_codes, dcode);
if ndb > 0 {
bw.write(dextra, ndb, out);
}
}
}
if is_last {
bw.align(out);
}
}
fn emit_prefix_code(
bw: &mut BitWriter,
out: &mut Vec<u8>,
strategy: &HuffStrategy,
alphabet_size: u32,
) -> Vec<u16> {
match strategy {
HuffStrategy::SingleSymbol(sym) => {
encoder_huffman::emit_simple_nsym1(bw, out, *sym, alphabet_size);
Vec::new()
}
HuffStrategy::TwoSymbols { symbols, .. } => {
let _codes = encoder_huffman::emit_simple_nsym2(bw, out, *symbols, alphabet_size);
Vec::new()
}
HuffStrategy::Complex(lengths) => {
encoder_huffman::emit_complex_prefix_code(bw, out, lengths)
}
}
}
fn write_symbol(
bw: &mut BitWriter,
out: &mut Vec<u8>,
strategy: &HuffStrategy,
codes: &[u16],
sym: u32,
) {
match strategy {
HuffStrategy::SingleSymbol(_) => { }
HuffStrategy::TwoSymbols { symbols, codes: tc } => {
let slot = if sym == symbols[0] {
0
} else {
debug_assert!(sym == symbols[1], "symbol {sym} not in 2-symbol code");
1
};
bw.write(tc[slot], 1, out);
}
HuffStrategy::Complex(lengths) => {
let len = lengths[sym as usize] as u32;
debug_assert!(
len > 0,
"symbol {sym} has no complex-prefix-code entry (length 0)"
);
let code = codes[sym as usize];
let rev = reverse_bits(code as u32, len);
bw.write(rev, len, out);
}
}
}
const NUM_LITERAL_SYMBOLS: u32 = 256;
const NUM_COMMAND_SYMBOLS: u32 = 704;
const NUM_BLOCK_LEN_SYMBOLS: u32 = 26;
const CODE_LENGTH_ORDER: [usize; 18] =
[1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15];
const BLOCK_LEN_BASE: [u32; 26] = [
1, 5, 9, 13, 17, 25, 33, 41, 49, 65, 81, 97, 113, 145, 177, 209, 241, 305, 369, 497, 753, 1265,
2289, 4337, 8433, 16625,
];
const BLOCK_LEN_EXTRA: [u32; 26] = [
2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24,
];
const INS_EXTRA: [u32; 24] = [
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
];
const INS_BASE: [u32; 24] = [
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210,
22594,
];
const COPY_EXTRA: [u32; 24] = [
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
];
const COPY_BASE: [u32; 24] = [
2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118,
];
#[derive(Debug)]
pub struct Decoder {
#[doc(hidden)]
pub dbg_msgs: alloc::vec::Vec<alloc::string::String>,
raw: Vec<u8>,
bit_pos: usize,
out: Vec<u8>,
out_pos: usize,
state: DecState,
poisoned: bool,
window_size: u32,
dist_ring: [i32; 4],
ring_idx: u32,
total_out: usize,
p1: u8,
p2: u8,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum DecState {
NeedHeader,
NeedMetaBlock,
Done,
}
impl Decoder {
pub fn _take_msgs(&mut self) -> alloc::vec::Vec<alloc::string::String> {
core::mem::take(&mut self.dbg_msgs)
}
pub fn new() -> Self {
Self {
dbg_msgs: alloc::vec::Vec::new(),
raw: Vec::new(),
bit_pos: 0,
out: Vec::new(),
out_pos: 0,
state: DecState::NeedHeader,
poisoned: false,
window_size: 1 << 16,
dist_ring: [16, 15, 11, 4],
ring_idx: 0,
total_out: 0,
p1: 0,
p2: 0,
}
}
fn last_dist(&self) -> i32 {
self.dist_ring[((self.ring_idx.wrapping_add(3)) & 3) as usize]
}
fn nth_last_dist(&self, i: u32) -> i32 {
debug_assert!((1..=4).contains(&i));
let idx = self.ring_idx.wrapping_add(4 - i) & 3;
self.dist_ring[idx as usize]
}
fn push_dist(&mut self, d: i32) {
let slot = (self.ring_idx & 3) as usize;
self.dist_ring[slot] = d;
self.ring_idx = self.ring_idx.wrapping_add(1);
}
fn poison(&mut self, e: Error) -> Error {
self.poisoned = true;
e
}
fn compact_raw(&mut self) {
let drop_bytes = self.bit_pos >> 3;
if drop_bytes > 0 {
self.raw.drain(..drop_bytes);
self.bit_pos -= drop_bytes * 8;
}
}
fn drain_out_into(&mut self, dst: &mut [u8]) -> usize {
let avail = self.out.len() - self.out_pos;
let n = avail.min(dst.len());
if n > 0 {
dst[..n].copy_from_slice(&self.out[self.out_pos..self.out_pos + n]);
self.out_pos += n;
}
n
}
fn compact_out(&mut self) {
if self.out_pos == 0 {
return;
}
let want_history = self.window_size as usize;
let unread = self.out.len() - self.out_pos;
let total_keep_target = want_history + unread;
if self.out.len() <= total_keep_target {
return;
}
let drop_n = self.out.len() - total_keep_target;
let drop_n = drop_n.min(self.out_pos);
if drop_n > 0 {
self.out.drain(..drop_n);
self.out_pos -= drop_n;
}
}
fn read_stream_header(&mut self) -> Result<bool, Error> {
let mut src = BitSource::at(&self.raw, self.bit_pos);
let total_bits = self.raw.len() * 8 - self.bit_pos;
if total_bits < 1 {
return Ok(false);
}
let pos_save = src.position();
let b0 = src.read_bit()?;
if b0 == 0 {
self.window_size = 1 << 16;
self.bit_pos = src.position();
return Ok(true);
}
if total_bits < 4 {
return Ok(false);
}
let n = src.read_bits(3)? as u8;
if n != 0 {
let wbits = 17 + n as u32;
self.window_size = 1u32 << wbits;
self.bit_pos = src.position();
return Ok(true);
}
if total_bits < 7 {
src.set_position(pos_save);
return Ok(false);
}
let m = src.read_bits(3)? as u8;
match m {
0 => {
self.window_size = 1 << 17;
self.bit_pos = src.position();
Ok(true)
}
1 => Err(Error::Unsupported), _ => {
let wbits = 8 + m as u32;
self.window_size = 1u32 << wbits;
self.bit_pos = src.position();
Ok(true)
}
}
}
fn process_next_meta_block(&mut self) -> Result<bool, Error> {
let start_bit_pos = self.bit_pos;
match self.try_process_meta_block() {
Ok(()) => Ok(true),
Err(Error::UnexpectedEnd) => {
self.bit_pos = start_bit_pos;
Ok(false)
}
Err(e) => Err(e),
}
}
fn try_process_meta_block(&mut self) -> Result<(), Error> {
if core::option_env!("BROTLI_DEBUG").is_some() {
let s: alloc::string::String = self
.raw
.iter()
.take(12)
.map(|b| alloc::format!("{:02x} ", b))
.collect();
self.dbg_msgs.push(alloc::format!(
"try_pmb: bit_pos={}, raw.len()={}, first 12 bytes: {}",
self.bit_pos,
self.raw.len(),
s
));
}
let raw = self.raw.clone();
let mut src = BitSource::at(&raw, self.bit_pos);
let initial_pos = self.bit_pos;
let is_last = src.read_bit()? != 0;
let mut is_last_empty = false;
if is_last {
is_last_empty = src.read_bit()? != 0;
if is_last_empty {
self.bit_pos = src.position();
self.state = DecState::Done;
return Ok(());
}
}
let nibbles = src.read_bits(2)?;
if nibbles == 3 {
if is_last {
return Err(Error::Corrupt);
}
let r = src.read_bit()?;
if r != 0 {
return Err(Error::Corrupt);
}
let mskip_bytes = src.read_bits(2)?;
let mskiplen = if mskip_bytes == 0 {
0u32
} else {
let mut acc: u32 = 0;
for i in 0..mskip_bytes {
let b = src.read_bits(8)?;
acc |= b << (i * 8);
}
if mskip_bytes > 1 {
let top = (acc >> ((mskip_bytes - 1) * 8)) & 0xFF;
if top == 0 {
return Err(Error::Corrupt);
}
}
acc + 1
};
src.align_to_byte();
let byte_pos = src.position() / 8;
let need = byte_pos + mskiplen as usize;
if self.raw.len() < need {
return Err(Error::UnexpectedEnd);
}
src.set_position(need * 8);
self.bit_pos = src.position();
self.compact_raw();
return Ok(());
}
let nibbles = nibbles + 4; let mut mlen_minus_1: u32 = 0;
for i in 0..nibbles {
let nb = src.read_bits(4)?;
mlen_minus_1 |= nb << (i * 4);
}
if nibbles > 4 {
let top_shift = (nibbles - 1) * 4;
if ((mlen_minus_1 >> top_shift) & 0xF) == 0 {
return Err(Error::Corrupt);
}
}
let mlen = mlen_minus_1 + 1;
let is_uncompressed = if !is_last {
src.read_bit()? != 0
} else {
false
};
if is_last && is_last_empty {
unreachable!();
}
if is_uncompressed {
src.align_to_byte();
let byte_pos = src.position() / 8;
let need = byte_pos + mlen as usize;
if self.raw.len() < need {
return Err(Error::UnexpectedEnd);
}
let slice = self.raw[byte_pos..need].to_vec();
for b in &slice {
self.emit_literal(*b);
}
src.set_position(need * 8);
self.bit_pos = src.position();
self.compact_raw();
return Ok(());
}
let snap = (
self.dist_ring,
self.ring_idx,
self.p1,
self.p2,
self.total_out,
self.out.len(),
);
if let Err(e) = self.decode_compressed_meta_block(&mut src, mlen) {
if core::option_env!("BROTLI_DEBUG").is_some() {
self.dbg_msgs.push(alloc::format!(
"decode_compressed_meta_block ERR: {:?}, mlen={}, src.position()={}",
e,
mlen,
src.position()
));
}
if e == Error::UnexpectedEnd {
self.dist_ring = snap.0;
self.ring_idx = snap.1;
self.p1 = snap.2;
self.p2 = snap.3;
self.total_out = snap.4;
self.out.truncate(snap.5);
}
self.bit_pos = initial_pos;
return Err(e);
}
self.bit_pos = src.position();
self.compact_raw();
if is_last {
self.state = DecState::Done;
}
Ok(())
}
fn emit_literal(&mut self, b: u8) {
self.out.push(b);
self.p2 = self.p1;
self.p1 = b;
self.total_out += 1;
}
fn emit_copy(&mut self, distance: u32, len: u32) -> Result<(), Error> {
if distance as usize > self.total_out {
return Err(Error::InvalidDistance);
}
if len == 0 {
return Ok(());
}
let out_base = self.total_out - self.out.len();
let g0 = (self.total_out as u64) - (distance as u64);
if g0 < out_base as u64 {
return Err(Error::InvalidDistance);
}
let src_start = (g0 - out_base as u64) as usize;
let n = len as usize;
if (distance as usize) >= n {
self.out.extend_from_within(src_start..src_start + n);
let end = self.out.len();
if n >= 2 {
self.p2 = self.out[end - 2];
self.p1 = self.out[end - 1];
} else {
self.p2 = self.p1;
self.p1 = self.out[end - 1];
}
self.total_out += n;
} else if distance == 1 {
let b = self.out[src_start];
self.out.resize(self.out.len() + n, b);
if n >= 2 {
self.p2 = b;
self.p1 = b;
} else {
self.p2 = self.p1;
self.p1 = b;
}
self.total_out += n;
} else {
for _ in 0..len {
let g = (self.total_out as u64) - (distance as u64);
let byte = self.out[(g - out_base as u64) as usize];
self.emit_literal(byte);
}
}
Ok(())
}
fn read_nbltypes(src: &mut BitSource<'_>) -> Result<u32, Error> {
let first = src.read_bit()?;
if first == 0 {
return Ok(1);
}
let n = src.read_bits(3)?;
if n == 0 {
return Ok(2);
}
let extra = src.read_bits(n)?;
Ok((1u32 << n) + 1 + extra)
}
fn read_prefix_code(
src: &mut BitSource<'_>,
alphabet_size: u32,
) -> Result<HuffmanDecoder, Error> {
let kind = src.read_bits(2)?;
if kind == 1 {
return Self::read_simple_prefix_code(src, alphabet_size);
}
let hskip = kind;
Self::read_complex_prefix_code(src, alphabet_size, hskip)
}
fn read_simple_prefix_code(
src: &mut BitSource<'_>,
alphabet_size: u32,
) -> Result<HuffmanDecoder, Error> {
let nsym = src.read_bits(2)? + 1; let alpha_bits = alphabet_bits(alphabet_size);
let mut syms = [0u32; 4];
for i in 0..nsym {
let s = src.read_bits(alpha_bits)?;
if s >= alphabet_size {
return Err(Error::Corrupt);
}
for j in 0..i {
if syms[j as usize] == s {
return Err(Error::Corrupt);
}
}
syms[i as usize] = s;
}
match nsym {
1 => Ok(HuffmanDecoder::single(syms[0])),
2 => {
let mut a = syms[0];
let mut b = syms[1];
if a > b {
core::mem::swap(&mut a, &mut b);
}
HuffmanDecoder::from_lengths_sparse(&[(a, 1), (b, 1)])
}
3 => {
let l1 = syms[0];
let mut s2 = syms[1];
let mut s3 = syms[2];
if s2 > s3 {
core::mem::swap(&mut s2, &mut s3);
}
HuffmanDecoder::from_lengths_sparse(&[(l1, 1), (s2, 2), (s3, 2)])
}
4 => {
let tree_select = src.read_bit()?;
if tree_select == 0 {
let mut all = [syms[0], syms[1], syms[2], syms[3]];
all.sort();
HuffmanDecoder::from_lengths_sparse(&[
(all[0], 2),
(all[1], 2),
(all[2], 2),
(all[3], 2),
])
} else {
let mut c = syms[2];
let mut d = syms[3];
if c > d {
core::mem::swap(&mut c, &mut d);
}
HuffmanDecoder::from_lengths_sparse(&[
(syms[0], 1),
(syms[1], 2),
(c, 3),
(d, 3),
])
}
}
_ => unreachable!(),
}
}
fn read_complex_prefix_code(
src: &mut BitSource<'_>,
alphabet_size: u32,
hskip: u32,
) -> Result<HuffmanDecoder, Error> {
let cl_cl_lengths: [(u32, u8); 6] = [(0, 2), (1, 4), (2, 3), (3, 2), (4, 2), (5, 4)];
let cl_decoder = HuffmanDecoder::from_lengths_sparse(&cl_cl_lengths)?;
let mut cl_lengths = [0u8; 18];
let mut space: i32 = 32;
let mut idx = hskip as usize;
while idx < 18 {
let sym_pos = CODE_LENGTH_ORDER[idx];
let v = cl_decoder.decode(src)?;
if v > 5 {
return Err(Error::InvalidHuffmanTree);
}
cl_lengths[sym_pos] = v as u8;
if v != 0 {
space -= 32 >> v;
if space <= 0 {
break;
}
}
idx += 1;
}
if space != 0 {
return Err(Error::InvalidHuffmanTree);
}
let mut cl_sym_pairs: Vec<(u32, u8)> = Vec::new();
for (i, &l) in cl_lengths.iter().enumerate() {
if l > 0 {
cl_sym_pairs.push((i as u32, l));
}
}
if cl_sym_pairs.is_empty() {
return Err(Error::InvalidHuffmanTree);
}
let cl_sym_decoder = if cl_sym_pairs.len() == 1 {
HuffmanDecoder::single(cl_sym_pairs[0].0)
} else {
HuffmanDecoder::from_lengths_sparse(&cl_sym_pairs)?
};
let mut sym_lengths: Vec<u8> = vec![0u8; alphabet_size as usize];
let mut prev_nonzero: u8 = 8;
let mut filled: u32 = 0;
let mut space: i64 = 1 << 15;
let mut prev_code: u32 = u32::MAX; let mut prev_repeat_count: u32 = 0;
while filled < alphabet_size && space > 0 {
let code = cl_sym_decoder.decode(src)?;
match code {
0..=15 => {
sym_lengths[filled as usize] = code as u8;
if code != 0 {
prev_nonzero = code as u8;
space -= 1i64 << (15 - code);
}
filled += 1;
prev_code = code;
prev_repeat_count = 0;
}
16 => {
let extra = src.read_bits(2)?;
let new_count = if prev_code == 16 {
4 * (prev_repeat_count - 2) + (3 + extra)
} else {
3 + extra
};
let to_add = if prev_code == 16 {
new_count - prev_repeat_count
} else {
new_count
};
if filled + to_add > alphabet_size {
return Err(Error::Corrupt);
}
for _ in 0..to_add {
sym_lengths[filled as usize] = prev_nonzero;
filled += 1;
space -= 1i64 << (15 - prev_nonzero as u32);
}
prev_code = 16;
prev_repeat_count = new_count;
}
17 => {
let extra = src.read_bits(3)?;
let new_count = if prev_code == 17 {
8 * (prev_repeat_count - 2) + (3 + extra)
} else {
3 + extra
};
let to_add = if prev_code == 17 {
new_count - prev_repeat_count
} else {
new_count
};
if filled + to_add > alphabet_size {
return Err(Error::Corrupt);
}
for _ in 0..to_add {
sym_lengths[filled as usize] = 0;
filled += 1;
}
prev_code = 17;
prev_repeat_count = new_count;
}
_ => return Err(Error::Corrupt),
}
}
if space < 0 {
return Err(Error::Corrupt);
}
if filled < alphabet_size {
for slot in sym_lengths
.iter_mut()
.take(alphabet_size as usize)
.skip(filled as usize)
{
*slot = 0;
}
}
HuffmanDecoder::from_lengths_allow_single(&sym_lengths[..alphabet_size as usize])
}
fn read_block_count(src: &mut BitSource<'_>, tree: &HuffmanDecoder) -> Result<u32, Error> {
let sym = tree.decode(src)?;
if sym >= NUM_BLOCK_LEN_SYMBOLS {
return Err(Error::Corrupt);
}
let extra = src.read_bits(BLOCK_LEN_EXTRA[sym as usize])?;
Ok(BLOCK_LEN_BASE[sym as usize] + extra)
}
fn decode_compressed_meta_block(
&mut self,
src: &mut BitSource<'_>,
mlen: u32,
) -> Result<(), Error> {
let group_l = read_block_group(src)?;
let group_i = read_block_group(src)?;
let group_d = read_block_group(src)?;
let npostfix = src.read_bits(2)?;
let ndirect_bits = src.read_bits(4)?;
let ndirect = ndirect_bits << npostfix;
let num_dist_codes: u32 = 16 + ndirect + (48u32 << npostfix);
let mut cmodes: Vec<ContextMode> = Vec::with_capacity(group_l.nbltypes as usize);
for _ in 0..group_l.nbltypes {
cmodes.push(ContextMode::from_bits(src.read_bits(2)?));
}
let ntreesl = Self::read_nbltypes(src)?;
let cmapl_size = 64 * group_l.nbltypes;
let cmapl = if ntreesl >= 2 {
read_context_map(src, cmapl_size, ntreesl)?
} else {
vec![0u8; cmapl_size as usize]
};
let ntreesd = Self::read_nbltypes(src)?;
let cmapd_size = 4 * group_d.nbltypes;
let cmapd = if ntreesd >= 2 {
read_context_map(src, cmapd_size, ntreesd)?
} else {
vec![0u8; cmapd_size as usize]
};
let mut htree_l: Vec<HuffmanDecoder> = Vec::with_capacity(ntreesl as usize);
for _ in 0..ntreesl {
htree_l.push(Self::read_prefix_code(src, NUM_LITERAL_SYMBOLS)?);
}
let mut htree_i: Vec<HuffmanDecoder> = Vec::with_capacity(group_i.nbltypes as usize);
for _ in 0..group_i.nbltypes {
htree_i.push(Self::read_prefix_code(src, NUM_COMMAND_SYMBOLS)?);
}
let mut htree_d: Vec<HuffmanDecoder> = Vec::with_capacity(ntreesd as usize);
for _ in 0..ntreesd {
htree_d.push(Self::read_prefix_code(src, num_dist_codes)?);
}
let mut emitted: u32 = 0;
let mut block_type_l: u32 = 0;
let mut block_type_i: u32 = 0;
let mut block_type_d: u32 = 0;
let mut prev_block_type_l: u32 = 1;
let mut prev_block_type_i: u32 = 1;
let mut prev_block_type_d: u32 = 1;
let mut block_len_l: u32 = group_l.first_count;
let mut block_len_i: u32 = group_i.first_count;
let mut block_len_d: u32 = group_d.first_count;
let postfix_mask: u32 = (1u32 << npostfix) - 1;
macro_rules! maybe_switch {
($len:ident, $bt:ident, $prev:ident, $group:expr) => {
if $len == 0 {
let g = &$group;
let nbl = g.nbltypes;
let type_tree = g.type_tree.as_ref().unwrap();
let count_tree = g.count_tree.as_ref().unwrap();
let code = type_tree.decode(src)?;
let next_type = if code == 0 {
$prev
} else if code == 1 {
($bt + 1) % nbl
} else {
code - 2
};
if next_type >= nbl {
return Err(Error::Corrupt);
}
$prev = $bt;
$bt = next_type;
$len = Self::read_block_count(src, count_tree)?;
}
};
}
while emitted < mlen {
maybe_switch!(block_len_i, block_type_i, prev_block_type_i, group_i);
block_len_i -= 1;
let cmd_sym = htree_i[block_type_i as usize].decode(src)?;
if cmd_sym >= NUM_COMMAND_SYMBOLS {
return Err(Error::Corrupt);
}
let (ins_code, copy_code, use_last_dist) = decode_ic_command(cmd_sym);
let ins_extra = src.read_bits(INS_EXTRA[ins_code as usize])?;
let insert_len = INS_BASE[ins_code as usize] + ins_extra;
let copy_extra = src.read_bits(COPY_EXTRA[copy_code as usize])?;
let copy_len = COPY_BASE[copy_code as usize] + copy_extra;
for _ in 0..insert_len {
if emitted >= mlen {
return Err(Error::Corrupt);
}
maybe_switch!(block_len_l, block_type_l, prev_block_type_l, group_l);
block_len_l -= 1;
let cid = context::literal_context(cmodes[block_type_l as usize], self.p1, self.p2);
let tree_idx = cmapl[(64 * block_type_l + cid as u32) as usize] as usize;
let sym = htree_l[tree_idx].decode(src)?;
if sym > 255 {
return Err(Error::Corrupt);
}
self.emit_literal(sym as u8);
emitted += 1;
}
if emitted >= mlen {
break;
}
let (distance, is_short_or_direct) = if use_last_dist {
(self.last_dist() as u32, true)
} else {
maybe_switch!(block_len_d, block_type_d, prev_block_type_d, group_d);
block_len_d -= 1;
let cid = context::distance_context(copy_len) as u32;
let tree_idx = cmapd[(4 * block_type_d + cid) as usize] as usize;
let dcode = htree_d[tree_idx].decode(src)?;
if dcode >= num_dist_codes {
return Err(Error::Corrupt);
}
if dcode < 16 {
(decode_short_distance(self, dcode)?, true)
} else if dcode < 16 + ndirect {
(dcode - 15, false)
} else {
let v = dcode - ndirect - 16;
let ndistbits = 1 + (v >> (npostfix + 1));
let dextra = src.read_bits(ndistbits)?;
let hcode = v >> npostfix;
let lcode = v & postfix_mask;
let offset = ((2 + (hcode & 1)) << ndistbits) - 4;
let dist = ((offset + dextra) << npostfix) + lcode + ndirect + 1;
(dist, false)
}
};
let max_dist = (self.window_size.saturating_sub(16)).min(self.total_out as u32);
if distance <= max_dist {
if !is_short_or_direct {
self.push_dist(distance as i32);
}
self.emit_copy(distance, copy_len)?;
emitted += copy_len;
if emitted > mlen {
return Err(Error::Corrupt);
}
} else {
let n = self.emit_dictionary(distance, copy_len, max_dist)?;
emitted += n;
if emitted > mlen {
return Err(Error::Corrupt);
}
}
}
Ok(())
}
fn emit_dictionary(
&mut self,
distance: u32,
copy_len: u32,
max_dist: u32,
) -> Result<u32, Error> {
let len = copy_len as usize;
if !(dictionary::MIN_DICTIONARY_WORD_LENGTH..=dictionary::MAX_DICTIONARY_WORD_LENGTH)
.contains(&len)
{
return Err(Error::InvalidDistance);
}
let nwords_bits = dictionary::SIZE_BITS_BY_LENGTH[len];
if nwords_bits == 0 {
return Err(Error::InvalidDistance);
}
let nwords: u32 = 1 << nwords_bits;
let off = distance
.checked_sub(max_dist)
.ok_or(Error::InvalidDistance)?;
let off = off.checked_sub(1).ok_or(Error::InvalidDistance)?;
let word_id = off & (nwords - 1);
let transform_id = off >> nwords_bits;
if transform_id >= 121 {
return Err(Error::InvalidDistance);
}
let word = dictionary::word(len, word_id).ok_or(Error::InvalidDistance)?;
let mut scratch: Vec<u8> = Vec::with_capacity(64);
let n = transforms::apply_transform(&mut scratch, word, transform_id as usize);
for b in scratch {
self.emit_literal(b);
}
Ok(n as u32)
}
}
fn alphabet_bits(alphabet_size: u32) -> u32 {
debug_assert!(alphabet_size >= 1);
if alphabet_size <= 1 {
return 0;
}
let mut n = 1u32;
while (1u32 << n) < alphabet_size {
n += 1;
}
n
}
fn decode_short_distance(dec: &mut Decoder, code: u32) -> Result<u32, Error> {
let last = dec.nth_last_dist(1);
let last2 = dec.nth_last_dist(2);
let dist: i32 = match code {
0 => last,
1 => dec.nth_last_dist(2),
2 => dec.nth_last_dist(3),
3 => dec.nth_last_dist(4),
4 => last - 1,
5 => last + 1,
6 => last - 2,
7 => last + 2,
8 => last - 3,
9 => last + 3,
10 => last2 - 1,
11 => last2 + 1,
12 => last2 - 2,
13 => last2 + 2,
14 => last2 - 3,
15 => last2 + 3,
_ => unreachable!(),
};
if dist <= 0 {
return Err(Error::InvalidDistance);
}
if code != 0 {
dec.push_dist(dist);
}
Ok(dist as u32)
}
fn decode_ic_command(cmd: u32) -> (u32, u32, bool) {
let (ins_base, copy_base, use_last) = match cmd / 64 {
0 => (0u32, 0u32, true),
1 => (0, 8, true),
2 => (0, 0, false),
3 => (0, 8, false),
4 => (8, 0, false),
5 => (8, 8, false),
6 => (0, 16, false),
7 => (16, 0, false),
8 => (8, 16, false),
9 => (16, 8, false),
10 => (16, 16, false),
_ => unreachable!(),
};
let cell_local = cmd & 0x3F;
let copy_code = copy_base + (cell_local & 7);
let ins_code = ins_base + (cell_local >> 3);
(ins_code, copy_code, use_last)
}
fn read_context_map(src: &mut BitSource<'_>, size: u32, ntrees: u32) -> Result<Vec<u8>, Error> {
let has_rle = src.read_bit()?;
let rlemax = if has_rle == 1 {
src.read_bits(4)? + 1
} else {
0
};
let alphabet = ntrees + rlemax;
let tree = Decoder::read_prefix_code(src, alphabet)?;
let mut map: Vec<u8> = Vec::with_capacity(size as usize);
while (map.len() as u32) < size {
let sym = tree.decode(src)?;
if sym == 0 {
map.push(0);
} else if sym <= rlemax {
let extra = src.read_bits(sym)?;
let run = (1u32 << sym) + extra;
for _ in 0..run {
if (map.len() as u32) >= size {
return Err(Error::Corrupt);
}
map.push(0);
}
} else {
map.push((sym - rlemax) as u8);
}
}
if map.len() as u32 != size {
return Err(Error::Corrupt);
}
let imtf = src.read_bit()?;
if imtf == 1 {
inverse_mtf(&mut map);
}
Ok(map)
}
fn inverse_mtf(v: &mut [u8]) {
let mut mtf = [0u8; 256];
for (i, slot) in mtf.iter_mut().enumerate() {
*slot = i as u8;
}
for slot in v.iter_mut() {
let index = *slot as usize;
let value = mtf[index];
*slot = value;
for i in (1..=index).rev() {
mtf[i] = mtf[i - 1];
}
mtf[0] = value;
}
}
struct BlockGroup {
nbltypes: u32,
type_tree: Option<HuffmanDecoder>,
count_tree: Option<HuffmanDecoder>,
first_count: u32,
}
fn read_block_group(src: &mut BitSource<'_>) -> Result<BlockGroup, Error> {
let nbltypes = Decoder::read_nbltypes(src)?;
if nbltypes >= 2 {
let alphabet_type = nbltypes + 2;
let type_tree = Decoder::read_prefix_code(src, alphabet_type)?;
let count_tree = Decoder::read_prefix_code(src, NUM_BLOCK_LEN_SYMBOLS)?;
let first_count = Decoder::read_block_count(src, &count_tree)?;
Ok(BlockGroup {
nbltypes,
type_tree: Some(type_tree),
count_tree: Some(count_tree),
first_count,
})
} else {
Ok(BlockGroup {
nbltypes,
type_tree: None,
count_tree: None,
first_count: 1u32 << 24,
})
}
}
impl Default for Decoder {
fn default() -> Self {
Self::new()
}
}
impl RawDecoder for Decoder {
fn raw_decode(&mut self, input: &[u8], output: &mut [u8]) -> Result<RawProgress, Error> {
if self.poisoned {
return Err(Error::Corrupt);
}
let consumed = input.len();
self.raw.extend_from_slice(input);
let mut written = 0usize;
loop {
if self.out_pos < self.out.len() {
let drained = self.drain_out_into(&mut output[written..]);
written += drained;
if written == output.len() {
break;
}
}
match self.state {
DecState::NeedHeader => match self.read_stream_header() {
Ok(true) => {
self.compact_raw();
self.state = DecState::NeedMetaBlock;
}
Ok(false) => break,
Err(e) => return Err(self.poison(e)),
},
DecState::NeedMetaBlock => match self.process_next_meta_block() {
Ok(true) => {
if self.state == DecState::Done {
continue;
}
}
Ok(false) => break,
Err(e) => return Err(self.poison(e)),
},
DecState::Done => {
if self.out_pos >= self.out.len() {
break;
}
}
}
}
self.compact_out();
Ok(RawProgress {
consumed,
written,
done: false,
})
}
fn raw_finish(&mut self, output: &mut [u8]) -> Result<RawProgress, Error> {
if self.poisoned {
return Err(Error::Corrupt);
}
let mut written = 0usize;
let mut output_full = false;
loop {
if self.out_pos < self.out.len() {
let drained = self.drain_out_into(&mut output[written..]);
written += drained;
if written == output.len() {
output_full = true;
break;
}
}
match self.state {
DecState::NeedHeader => match self.read_stream_header() {
Ok(true) => {
self.compact_raw();
self.state = DecState::NeedMetaBlock;
}
Ok(false) => break,
Err(e) => return Err(self.poison(e)),
},
DecState::NeedMetaBlock => match self.process_next_meta_block() {
Ok(true) => {
}
Ok(false) => break,
Err(e) => return Err(self.poison(e)),
},
DecState::Done => {
if self.out_pos >= self.out.len() {
break;
}
}
}
}
self.compact_out();
let fully_done = self.state == DecState::Done && self.out_pos == self.out.len();
if fully_done {
return Ok(RawProgress {
consumed: 0,
written,
done: true,
});
}
if output_full || self.state == DecState::Done || self.out_pos < self.out.len() {
return Ok(RawProgress {
consumed: 0,
written,
done: false,
});
}
Err(self.poison(Error::UnexpectedEnd))
}
fn raw_reset(&mut self) {
self.raw.clear();
self.bit_pos = 0;
self.out.clear();
self.out_pos = 0;
self.state = DecState::NeedHeader;
self.poisoned = false;
self.window_size = 1 << 16;
self.dist_ring = [16, 15, 11, 4];
self.ring_idx = 0;
self.total_out = 0;
self.p1 = 0;
self.p2 = 0;
}
}