use super::deflate;
use super::huffman_code::{
get_fixed_literal_encoding, get_fixed_offset_encoding, new_huffman_encoder,
};
use super::huffman_code::{HCode, HuffmanEncoder};
use super::inflate;
use super::token::{length_code, offset_code, Token, TokenTrait, MATCH_TYPE};
use std::io;
use std::sync::OnceLock;
const OFFSET_CODE_COUNT: usize = 30;
pub(super) const END_BLOCK_MARKER: usize = 256;
const LENGTH_CODES_START: usize = 257;
const CODEGEN_CODE_COUNT: usize = 19;
const BAD_CODE: u8 = 255;
const BUFFER_FLUSH_SIZE: usize = 240;
const BUFFER_SIZE: usize = BUFFER_FLUSH_SIZE + 8;
const LENGTH_EXTRA_BITS: &[u8] = &[
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, ];
const LENGTH_BASE: &[u8] = &[
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 255, ];
const OFFSET_EXTRA_BITS: &[u8] = &[
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, ];
const OFFSET_BASE: &[u32] = &[
0x000000, 0x000001, 0x000002, 0x000003, 0x000004, 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, ];
const CODEGEN_ORDER: &[u32] = &[
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
];
fn get_huff_offset() -> &'static HuffmanEncoder {
static ENCODER: OnceLock<HuffmanEncoder> = OnceLock::new();
ENCODER.get_or_init(generate_huff_offset)
}
fn generate_huff_offset() -> HuffmanEncoder {
let mut offset_freq: [i32; OFFSET_CODE_COUNT] = [0; OFFSET_CODE_COUNT];
offset_freq[0] = 1;
let mut huff_offset = new_huffman_encoder(OFFSET_CODE_COUNT);
huff_offset.generate(&offset_freq, 15);
huff_offset
}
fn histogram(b: &[u8], h: &mut [i32]) {
for t in b {
h[*t as usize] += 1;
}
}
pub(super) struct HuffmanBitWriter<'a, Output: std::io::Write> {
writer: &'a mut Output,
bits: u64,
nbits: usize,
bytes: Vec<u8>,
codegen_freq: Vec<i32>,
nbytes: usize,
literal_freq: Vec<i32>,
offset_freq: Vec<i32>,
codegen: Vec<u8>,
literal_encoding: Box<HuffmanEncoder>,
offset_encoding: Box<HuffmanEncoder>,
codegen_encoding: Box<HuffmanEncoder>,
err: std::io::Result<usize>,
}
impl<'a, Output: std::io::Write> HuffmanBitWriter<'a, Output> {
pub(super) fn new(w: &'a mut Output) -> Self {
Self {
writer: w,
bits: 0,
nbits: 0,
nbytes: 0,
codegen_freq: vec![0; CODEGEN_CODE_COUNT],
bytes: vec![0; BUFFER_SIZE],
err: Ok(0),
literal_freq: vec![0; inflate::MAX_NUM_LIT],
offset_freq: vec![0; OFFSET_CODE_COUNT],
codegen: vec![0; inflate::MAX_NUM_LIT + OFFSET_CODE_COUNT + 1],
literal_encoding: Box::new(new_huffman_encoder(inflate::MAX_NUM_LIT)),
codegen_encoding: Box::new(new_huffman_encoder(CODEGEN_CODE_COUNT)),
offset_encoding: Box::new(new_huffman_encoder(OFFSET_CODE_COUNT)),
}
}
pub fn reset(&mut self, writer: &'a mut Output) {
self.writer = writer;
self.bits = 0;
self.nbits = 0;
self.nbytes = 0;
self.err = Ok(0);
}
pub fn flush(&mut self) {
if self.err.is_err() {
self.nbits = 0;
return;
}
let mut n = self.nbytes;
while self.nbits != 0 {
self.bytes[n] = self.bits as u8;
self.bits >>= 8;
if self.nbits > 8 {
self.nbits -= 8;
} else {
self.nbits = 0;
}
n += 1;
}
self.bits = 0;
self.write_bytes_from_internal_buffer(n);
self.nbytes = 0;
}
fn write(&mut self, b: &[u8]) {
if self.err.is_err() {
return;
}
self.err = self.writer.write(b);
}
fn write_bytes_from_internal_buffer(&mut self, n: usize) {
if self.err.is_err() {
return;
}
self.err = self.writer.write(&self.bytes[0..n]);
}
fn write_bits(&mut self, b: u32, nb: usize) {
if self.err.is_err() {
return;
}
self.bits |= (b as u64) << self.nbits;
self.nbits += nb;
if self.nbits >= 48 {
let bits = self.bits;
self.bits >>= 48;
self.nbits -= 48;
let mut n = self.nbytes;
{
let bytes = &mut self.bytes[n..n + 6];
bytes[0] = (bits) as u8;
bytes[1] = (bits >> 8) as u8;
bytes[2] = (bits >> 16) as u8;
bytes[3] = (bits >> 24) as u8;
bytes[4] = (bits >> 32) as u8;
bytes[5] = (bits >> 40) as u8;
}
n += 6;
if n >= BUFFER_FLUSH_SIZE {
self.write_bytes_from_internal_buffer(n);
n = 0;
}
self.nbytes = n
}
}
pub(super) fn write_bytes(&mut self, bytes: &[u8]) {
if self.err.is_err() {
return;
}
let mut n = self.nbytes;
if self.nbits & 7 != 0 {
self.err = Err(io::Error::new(
io::ErrorKind::Other,
"writeBytes with unfinished bits",
));
return;
}
while self.nbits != 0 {
self.bytes[n] = self.bits as u8;
self.bits >>= 8;
self.nbits -= 8;
n += 1;
}
if n != 0 {
self.write_bytes_from_internal_buffer(n);
}
self.nbytes = 0;
self.write(bytes);
}
fn generate_codegen(
&mut self,
num_literals: usize,
num_offsets: usize,
off_enc: Option<&HuffmanEncoder>,
) {
self.codegen_freq.fill(0);
for i in 0..num_literals {
self.codegen[i] = self.literal_encoding.codes[i].len as u8;
}
for i in 0..num_offsets {
self.codegen[num_literals + i] = match off_enc {
None => &self.offset_encoding,
Some(enc) => enc,
}
.codes[i]
.len as u8;
}
self.codegen[num_literals + num_offsets] = BAD_CODE;
let mut size = self.codegen[0];
let mut count = 1;
let mut out_index = 0;
let mut in_index = 1;
while size != BAD_CODE {
let next_size = self.codegen[in_index];
if next_size == size {
count += 1;
in_index += 1;
continue;
}
if size != 0 {
self.codegen[out_index] = size;
out_index += 1;
self.codegen_freq[size as usize] += 1;
count -= 1;
while count >= 3 {
let mut n = 6;
if n > count {
n = count;
}
self.codegen[out_index] = 16;
out_index += 1;
self.codegen[out_index] = (n - 3) as u8;
out_index += 1;
self.codegen_freq[16] += 1;
count -= n;
}
} else {
while count >= 11 {
let mut n = 138;
if n > count {
n = count;
}
self.codegen[out_index] = 18;
out_index += 1;
self.codegen[out_index] = (n - 11) as u8;
out_index += 1;
self.codegen_freq[18] += 1;
count -= n;
}
if count >= 3 {
self.codegen[out_index] = 17;
out_index += 1;
self.codegen[out_index] = (count - 3) as u8;
out_index += 1;
self.codegen_freq[17] += 1;
count = 0;
}
}
count -= 1;
while count >= 0 {
self.codegen[out_index] = size;
out_index += 1;
self.codegen_freq[size as usize] += 1;
count -= 1;
}
size = next_size;
count = 1;
in_index += 1;
}
self.codegen[out_index] = BAD_CODE;
}
fn dynamic_size(
&self,
lit_enc: &HuffmanEncoder,
off_enc: &HuffmanEncoder,
extra_bits: usize,
) -> (usize, usize) {
let mut num_codegens = self.codegen_freq.len();
while num_codegens > 4 && self.codegen_freq[CODEGEN_ORDER[num_codegens - 1] as usize] == 0 {
num_codegens -= 1;
}
let header = 3
+ 5
+ 5
+ 4
+ (3 * num_codegens)
+ self.codegen_encoding.bit_length(&self.codegen_freq)
+ (self.codegen_freq[16] as usize) * 2
+ (self.codegen_freq[17] as usize) * 3
+ (self.codegen_freq[18] as usize) * 7;
let size = header
+ lit_enc.bit_length(&self.literal_freq)
+ off_enc.bit_length(&self.offset_freq)
+ extra_bits;
(size, num_codegens)
}
fn fixed_size(&mut self, extra_bits: usize) -> usize {
3 + get_fixed_literal_encoding().bit_length(&self.literal_freq)
+ get_fixed_offset_encoding().bit_length(&self.offset_freq)
+ extra_bits
}
fn stored_size(&mut self, input: Option<&[u8]>) -> (usize, bool) {
if let Some(input) = input {
if input.len() <= deflate::MAX_STORE_BLOCK_SIZE {
return ((input.len() + 5) * 8, true);
}
}
(0, false)
}
fn write_code(&mut self, c: HCode) {
if self.err.is_err() {
return;
}
self.bits |= (c.code as u64) << self.nbits;
self.nbits += c.len as usize;
if self.nbits >= 48 {
let bits = self.bits;
self.bits >>= 48;
self.nbits -= 48;
let mut n = self.nbytes;
{
let bytes = &mut self.bytes[n..n + 6];
bytes[0] = (bits) as u8;
bytes[1] = (bits >> 8) as u8;
bytes[2] = (bits >> 16) as u8;
bytes[3] = (bits >> 24) as u8;
bytes[4] = (bits >> 32) as u8;
bytes[5] = (bits >> 40) as u8;
}
n += 6;
if n >= BUFFER_FLUSH_SIZE {
self.write_bytes_from_internal_buffer(n);
n = 0;
}
self.nbytes = n;
}
}
fn write_dynamic_header(
&mut self,
num_literals: usize,
num_offsets: usize,
num_codegens: usize,
is_eof: bool,
) {
if self.err.is_err() {
return;
}
let first_bits: u32 = if is_eof { 5 } else { 4 };
self.write_bits(first_bits, 3);
self.write_bits((num_literals - 257) as u32, 5);
self.write_bits((num_offsets - 1) as u32, 5);
self.write_bits((num_codegens - 4) as u32, 4);
#[allow(clippy::needless_range_loop)]
for i in 0..num_codegens {
let value = self.codegen_encoding.codes[CODEGEN_ORDER[i] as usize].len;
self.write_bits(value as u32, 3);
}
let mut i = 0;
loop {
let code_word = self.codegen[i];
i += 1;
if code_word == BAD_CODE {
break;
}
self.write_code(self.codegen_encoding.codes[code_word as usize]);
match code_word {
16 => {
self.write_bits(self.codegen[i] as u32, 2);
i += 1;
}
17 => {
self.write_bits(self.codegen[i] as u32, 3);
i += 1;
}
18 => {
self.write_bits(self.codegen[i] as u32, 7);
i += 1;
}
_ => {}
}
}
}
pub(super) fn write_stored_header(&mut self, length: usize, is_eof: bool) {
if self.err.is_err() {
return;
}
let flag: u32 = if is_eof { 1 } else { 0 };
self.write_bits(flag, 3);
self.flush();
self.write_bits(length as u32, 16);
self.write_bits((!(length as u16)) as u32, 16);
}
fn write_fixed_header(&mut self, is_eof: bool) {
if self.err.is_err() {
return;
}
let value = if is_eof { 3 } else { 2 };
self.write_bits(value, 3)
}
pub(super) fn write_block(&mut self, tokens: &[Token], eof: bool, input: Option<&[u8]>) {
if self.err.is_err() {
return;
}
let mut tokens = tokens.to_vec();
tokens.push(END_BLOCK_MARKER as u32);
let (num_literals, num_offsets) = self.index_tokens(&tokens);
let mut extra_bits = 0;
let (stored_size, storable) = self.stored_size(input);
if storable {
for length_code in LENGTH_CODES_START + 8..num_literals {
extra_bits += (self.literal_freq[length_code]) as usize
* (LENGTH_EXTRA_BITS[length_code - LENGTH_CODES_START]) as usize;
}
#[allow(clippy::needless_range_loop)]
for offset_code in 4..num_offsets {
extra_bits += (self.offset_freq[offset_code]) as usize
* (OFFSET_EXTRA_BITS[offset_code]) as usize;
}
}
let literal_encoding = get_fixed_literal_encoding();
let offset_encoding = get_fixed_offset_encoding();
let mut size = self.fixed_size(extra_bits);
self.generate_codegen(num_literals, num_offsets, None);
self.codegen_encoding.generate(&self.codegen_freq, 7);
let (dynamic_size, num_codegens) =
self.dynamic_size(&self.literal_encoding, &self.offset_encoding, extra_bits);
let mut use_dynamic = false;
if dynamic_size < size {
use_dynamic = true;
size = dynamic_size;
}
if storable && stored_size < size {
self.write_stored_header(input.unwrap().len(), eof);
self.write_bytes(input.unwrap());
return;
}
if !use_dynamic {
self.write_fixed_header(eof);
self.write_tokens(&tokens, &literal_encoding.codes, &offset_encoding.codes);
} else {
self.write_dynamic_header(num_literals, num_offsets, num_codegens, eof);
self.write_tokens_using_internal_codes(&tokens);
}
}
pub(super) fn write_block_dynamic(
&mut self,
tokens: &[Token],
eof: bool,
input: Option<&[u8]>,
) {
if self.err.is_err() {
return;
}
let mut tokens = tokens.to_vec();
tokens.push(END_BLOCK_MARKER as u32);
let (num_literals, num_offsets) = self.index_tokens(&tokens);
self.generate_codegen(num_literals, num_offsets, None);
self.codegen_encoding.generate(&self.codegen_freq, 7);
let (size, num_codegens) =
self.dynamic_size(&self.literal_encoding, &self.offset_encoding, 0);
let (ssize, storable) = self.stored_size(input);
if storable && ssize < (size + (size >> 4)) {
let input = input.unwrap();
self.write_stored_header(input.len(), eof);
self.write_bytes(input);
return;
}
self.write_dynamic_header(num_literals, num_offsets, num_codegens, eof);
self.write_tokens_using_internal_codes(&tokens);
}
fn index_tokens(&mut self, tokens: &[Token]) -> (usize, usize) {
for i in 0..self.literal_freq.len() {
self.literal_freq[i] = 0;
}
for i in 0..self.offset_freq.len() {
self.offset_freq[i] = 0;
}
for t in tokens {
if *t < MATCH_TYPE {
self.literal_freq[t.literal() as usize] += 1;
continue;
}
let length = t.length();
let offset = t.offset();
self.literal_freq[LENGTH_CODES_START + length_code(length)] += 1;
self.offset_freq[offset_code(offset as usize)] += 1;
}
let mut num_literals = self.literal_freq.len();
while self.literal_freq[num_literals - 1] == 0 {
num_literals -= 1;
}
let mut num_offsets = self.offset_freq.len();
while num_offsets > 0 && self.offset_freq[num_offsets - 1] == 0 {
num_offsets -= 1;
}
if num_offsets == 0 {
self.offset_freq[0] = 1;
num_offsets = 1;
}
self.literal_encoding.generate(&self.literal_freq, 15);
self.offset_encoding.generate(&self.offset_freq, 15);
(num_literals, num_offsets)
}
fn write_tokens(&mut self, tokens: &[Token], le_codes: &[HCode], oe_codes: &[HCode]) {
if self.err.is_err() {
return;
}
for t in tokens {
if *t < MATCH_TYPE {
self.write_code(le_codes[t.literal() as usize]);
continue;
}
let length = t.length();
let length_code = length_code(length);
self.write_code(le_codes[length_code + LENGTH_CODES_START]);
let extra_length_bits = LENGTH_EXTRA_BITS[length_code] as usize;
if extra_length_bits > 0 {
let extra_length = length - LENGTH_BASE[length_code] as u32;
self.write_bits(extra_length, extra_length_bits);
}
let offset = t.offset() as usize;
let offset_code = offset_code(offset);
self.write_code(oe_codes[offset_code]);
let extra_offset_bits = OFFSET_EXTRA_BITS[offset_code] as usize;
if extra_offset_bits > 0 {
let extra_offset = offset as u32 - OFFSET_BASE[offset_code];
self.write_bits(extra_offset, extra_offset_bits);
}
}
}
fn write_tokens_using_internal_codes(&mut self, tokens: &[Token]) {
if self.err.is_err() {
return;
}
for t in tokens {
if *t < MATCH_TYPE {
self.write_code(self.literal_encoding.codes[t.literal() as usize]);
continue;
}
let length = t.length();
let length_code = length_code(length);
self.write_code(self.literal_encoding.codes[length_code + LENGTH_CODES_START]);
let extra_length_bits = LENGTH_EXTRA_BITS[length_code] as usize;
if extra_length_bits > 0 {
let extra_length = length - LENGTH_BASE[length_code] as u32;
self.write_bits(extra_length, extra_length_bits);
}
let offset = t.offset() as usize;
let offset_code = offset_code(offset);
self.write_code(self.offset_encoding.codes[offset_code]);
let extra_offset_bits = OFFSET_EXTRA_BITS[offset_code] as usize;
if extra_offset_bits > 0 {
let extra_offset = offset as u32 - OFFSET_BASE[offset_code];
self.write_bits(extra_offset, extra_offset_bits);
}
}
}
pub fn write_block_huff(&mut self, eof: bool, input: &[u8]) {
if self.err.is_err() {
return;
}
self.literal_freq.fill(0);
histogram(input, &mut self.literal_freq);
self.literal_freq[END_BLOCK_MARKER] = 1;
let num_literals = END_BLOCK_MARKER + 1;
self.offset_freq[0] = 1;
let num_offsets = 1;
self.literal_encoding.generate(&self.literal_freq, 15);
self.generate_codegen(num_literals, num_offsets, Some(get_huff_offset()));
self.codegen_encoding.generate(&self.codegen_freq, 7);
let (size, num_codegens) = self.dynamic_size(&self.literal_encoding, get_huff_offset(), 0);
let (ssize, storable) = self.stored_size(Some(input));
if storable && ssize < (size + (size >> 4)) {
self.write_stored_header(input.len(), eof);
self.write_bytes(input);
return;
}
self.write_dynamic_header(num_literals, num_offsets, num_codegens, eof);
let mut n = self.nbytes;
for t in input {
let c = self.literal_encoding.codes[*t as usize];
self.bits |= (c.code as u64) << self.nbits;
self.nbits += c.len as usize;
if self.nbits < 48 {
continue;
}
let bits = self.bits;
self.bits >>= 48;
self.nbits -= 48;
{
let bytes = &mut self.bytes[n..n + 6];
bytes[0] = (bits) as u8;
bytes[1] = (bits >> 8) as u8;
bytes[2] = (bits >> 16) as u8;
bytes[3] = (bits >> 24) as u8;
bytes[4] = (bits >> 32) as u8;
bytes[5] = (bits >> 40) as u8;
}
n += 6;
if n < BUFFER_FLUSH_SIZE {
continue;
}
self.write_bytes_from_internal_buffer(n);
if self.err.is_err() {
return; }
n = 0;
}
self.nbytes = n;
self.write_code(self.literal_encoding.codes[END_BLOCK_MARKER]);
}
pub(super) fn error(&self) -> &std::io::Result<usize> {
&self.err
}
pub fn output(&mut self) -> &mut Output {
self.writer
}
}