#[cfg(not(feature = "std"))]
use alloc::{boxed::Box, vec::Vec};
use crate::constants::*;
use crate::matchfinder::bt::{BtMatchfinder, LzMatch};
use super::bitstream::OutputBitstream;
use super::block::{
BlockOutput, DeflateCodes, DeflateFreqs, EXTRA_PRECODE_BITS, LENGTH_SLOT,
compute_precode_items, compute_precode_items_best, flush_block, flush_block_best,
make_huffman_codes, make_huffman_codes_best,
};
use super::block_split::{BlockSplitStats, MIN_BLOCK_LENGTH, NUM_OBSERVATION_TYPES};
use super::huffman::{make_huffman_code, optimize_huffman_for_rle};
use super::sequences::Sequence;
use super::{SOFT_MAX_BLOCK_LENGTH, choose_min_match_len};
const BIT_COST: u32 = 16;
const LITERAL_NOSTAT_BITS: u32 = 13;
const LENGTH_NOSTAT_BITS: u32 = 13;
const OFFSET_NOSTAT_BITS: u32 = 10;
pub(crate) const MATCH_CACHE_LENGTH: usize = SOFT_MAX_BLOCK_LENGTH * 5;
const MAX_MATCHES_PER_POS: usize =
DEFLATE_MAX_MATCH_LEN as usize - DEFLATE_MIN_MATCH_LEN as usize + 1;
const MAX_BLOCK_LENGTH: usize = {
let a = SOFT_MAX_BLOCK_LENGTH + MIN_BLOCK_LENGTH - 1;
let b = SOFT_MAX_BLOCK_LENGTH + 1 + DEFLATE_MAX_MATCH_LEN as usize;
if a > b { a } else { b }
};
pub(crate) const OPTIMUM_OFFSET_SHIFT: u32 = 9;
pub(crate) const OPTIMUM_LEN_MASK: u32 = (1 << OPTIMUM_OFFSET_SHIFT) - 1;
const MATCH_CACHE_ALLOC_SIZE: usize =
MATCH_CACHE_LENGTH + MAX_MATCHES_PER_POS + DEFLATE_MAX_MATCH_LEN as usize - 1;
const OPTIMUM_NODES_SIZE: usize = MAX_BLOCK_LENGTH + 1;
const OFFSET_SLOT_FULL_SIZE: usize = DEFLATE_MAX_MATCH_OFFSET as usize + 1;
const MATCH_LEN_FREQ_SIZE: usize = DEFLATE_MAX_MATCH_LEN as usize + 1;
#[cfg(feature = "unchecked")]
type MatchCacheTab = [LzMatch; MATCH_CACHE_ALLOC_SIZE];
#[cfg(not(feature = "unchecked"))]
type MatchCacheTab = Vec<LzMatch>;
#[cfg(feature = "unchecked")]
type OptimumNodesTab = [OptimumNode; OPTIMUM_NODES_SIZE];
#[cfg(not(feature = "unchecked"))]
type OptimumNodesTab = Vec<OptimumNode>;
#[cfg(feature = "unchecked")]
type OffsetSlotFullTab = [u8; OFFSET_SLOT_FULL_SIZE];
#[cfg(not(feature = "unchecked"))]
type OffsetSlotFullTab = Vec<u8>;
#[cfg(feature = "unchecked")]
type MatchLenFreqTab = [u32; MATCH_LEN_FREQ_SIZE];
#[cfg(not(feature = "unchecked"))]
type MatchLenFreqTab = Vec<u32>;
#[derive(Clone, Copy, Default)]
pub(crate) struct OptimumNode {
pub cost_to_end: u32,
pub item: u32,
}
#[derive(Clone)]
pub(crate) struct DeflateCosts {
pub literal: [u32; DEFLATE_NUM_LITERALS as usize],
pub length: [u32; DEFLATE_MAX_MATCH_LEN as usize + 1],
pub offset_slot: [u32; DEFLATE_NUM_OFFSET_SYMS as usize],
}
impl Default for DeflateCosts {
fn default() -> Self {
Self {
literal: [0; DEFLATE_NUM_LITERALS as usize],
length: [0; DEFLATE_MAX_MATCH_LEN as usize + 1],
offset_slot: [0; DEFLATE_NUM_OFFSET_SYMS as usize],
}
}
}
#[derive(Clone, Copy)]
struct MwcRng {
z: u32,
w: u32,
}
impl MwcRng {
fn new(seed: u32) -> Self {
Self {
z: seed.wrapping_add(362436069),
w: seed.wrapping_mul(521288629).wrapping_add(1),
}
}
fn next_u32(&mut self) -> u32 {
self.z = 36969u32
.wrapping_mul(self.z & 0xFFFF)
.wrapping_add(self.z >> 16);
self.w = 18000u32
.wrapping_mul(self.w & 0xFFFF)
.wrapping_add(self.w >> 16);
(self.z << 16).wrapping_add(self.w)
}
}
#[derive(Clone)]
pub(crate) struct NearOptimalState {
pub bt_mf: BtMatchfinder,
pub match_cache: MatchCacheTab,
pub optimum_nodes: OptimumNodesTab,
pub costs: DeflateCosts,
pub costs_saved: DeflateCosts,
pub offset_slot_full: OffsetSlotFullTab,
pub prev_observations: [u32; NUM_OBSERVATION_TYPES],
pub prev_num_observations: u32,
pub new_match_len_freqs: MatchLenFreqTab,
pub match_len_freqs: MatchLenFreqTab,
pub max_optim_passes: u32,
pub min_improvement_to_continue: u32,
pub min_bits_to_use_nonfinal_path: u32,
pub max_len_to_optimize_static_block: u32,
}
impl NearOptimalState {
#[cfg(not(feature = "unchecked"))]
pub fn new(
max_optim_passes: u32,
min_improvement_to_continue: u32,
min_bits_to_use_nonfinal_path: u32,
max_len_to_optimize_static_block: u32,
) -> Box<Self> {
let mut s = Box::new(Self {
bt_mf: BtMatchfinder::new(),
match_cache: alloc::vec![LzMatch::default(); MATCH_CACHE_ALLOC_SIZE],
optimum_nodes: alloc::vec![OptimumNode::default(); OPTIMUM_NODES_SIZE],
costs: DeflateCosts::default(),
costs_saved: DeflateCosts::default(),
offset_slot_full: alloc::vec![0u8; OFFSET_SLOT_FULL_SIZE],
prev_observations: [0; NUM_OBSERVATION_TYPES],
prev_num_observations: 0,
new_match_len_freqs: alloc::vec![0u32; MATCH_LEN_FREQ_SIZE],
match_len_freqs: alloc::vec![0u32; MATCH_LEN_FREQ_SIZE],
max_optim_passes,
min_improvement_to_continue,
min_bits_to_use_nonfinal_path,
max_len_to_optimize_static_block,
});
init_offset_slot_full(&mut s.offset_slot_full);
s
}
#[cfg(feature = "unchecked")]
pub fn new(
max_optim_passes: u32,
min_improvement_to_continue: u32,
min_bits_to_use_nonfinal_path: u32,
max_len_to_optimize_static_block: u32,
) -> Box<Self> {
use core::ptr;
let mut boxed = Box::<Self>::new_uninit();
let p = boxed.as_mut_ptr();
unsafe {
BtMatchfinder::init_at(ptr::addr_of_mut!((*p).bt_mf));
let mc = ptr::addr_of_mut!((*p).match_cache) as *mut u8;
core::ptr::write_bytes(mc, 0, core::mem::size_of::<MatchCacheTab>());
let on = ptr::addr_of_mut!((*p).optimum_nodes) as *mut u8;
core::ptr::write_bytes(on, 0, core::mem::size_of::<OptimumNodesTab>());
let osf = ptr::addr_of_mut!((*p).offset_slot_full) as *mut u8;
let osf_slice = core::slice::from_raw_parts_mut(osf, OFFSET_SLOT_FULL_SIZE);
osf_slice.fill(0);
init_offset_slot_full(osf_slice);
ptr::addr_of_mut!((*p).costs).write(DeflateCosts::default());
ptr::addr_of_mut!((*p).costs_saved).write(DeflateCosts::default());
ptr::addr_of_mut!((*p).prev_observations).write([0; NUM_OBSERVATION_TYPES]);
ptr::addr_of_mut!((*p).prev_num_observations).write(0);
let nmlf = ptr::addr_of_mut!((*p).new_match_len_freqs) as *mut u8;
core::ptr::write_bytes(nmlf, 0, core::mem::size_of::<MatchLenFreqTab>());
let mlf = ptr::addr_of_mut!((*p).match_len_freqs) as *mut u8;
core::ptr::write_bytes(mlf, 0, core::mem::size_of::<MatchLenFreqTab>());
ptr::addr_of_mut!((*p).max_optim_passes).write(max_optim_passes);
ptr::addr_of_mut!((*p).min_improvement_to_continue).write(min_improvement_to_continue);
ptr::addr_of_mut!((*p).min_bits_to_use_nonfinal_path)
.write(min_bits_to_use_nonfinal_path);
ptr::addr_of_mut!((*p).max_len_to_optimize_static_block)
.write(max_len_to_optimize_static_block);
boxed.assume_init()
}
}
}
fn init_offset_slot_full(table: &mut [u8]) {
for slot in 0..30usize {
let base = DEFLATE_OFFSET_BASE[slot] as usize;
let count = 1usize << DEFLATE_OFFSET_EXTRA_BITS[slot];
for j in 0..count {
if base + j < table.len() {
table[base + j] = slot as u8;
}
}
}
}
struct DefaultLitlenCosts {
used_lits_to_lit_cost: [u8; 257],
len_sym_cost: u8,
}
#[rustfmt::skip]
static DEFAULT_LITLEN_COSTS: [DefaultLitlenCosts; 3] = [
DefaultLitlenCosts {
used_lits_to_lit_cost: [
6, 6, 22, 32, 38, 43, 48, 51, 54, 57, 59, 61, 64, 65, 67, 69,
70, 72, 73, 74, 75, 76, 77, 79, 80, 80, 81, 82, 83, 84, 85, 85,
86, 87, 88, 88, 89, 89, 90, 91, 91, 92, 92, 93, 93, 94, 95, 95,
96, 96, 96, 97, 97, 98, 98, 99, 99, 99, 100, 100, 101, 101, 101, 102,
102, 102, 103, 103, 104, 104, 104, 105, 105, 105, 105, 106, 106, 106, 107, 107,
107, 108, 108, 108, 108, 109, 109, 109, 109, 110, 110, 110, 111, 111, 111, 111,
112, 112, 112, 112, 112, 113, 113, 113, 113, 114, 114, 114, 114, 114, 115, 115,
115, 115, 115, 116, 116, 116, 116, 116, 117, 117, 117, 117, 117, 118, 118, 118,
118, 118, 118, 119, 119, 119, 119, 119, 120, 120, 120, 120, 120, 120, 121, 121,
121, 121, 121, 121, 121, 122, 122, 122, 122, 122, 122, 123, 123, 123, 123, 123,
123, 123, 124, 124, 124, 124, 124, 124, 124, 125, 125, 125, 125, 125, 125, 125,
125, 126, 126, 126, 126, 126, 126, 126, 127, 127, 127, 127, 127, 127, 127, 127,
128, 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 129,
129, 129, 130, 130, 130, 130, 130, 130, 130, 130, 130, 131, 131, 131, 131, 131,
131, 131, 131, 131, 131, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 133,
133, 133, 133, 133, 133, 133, 133, 133, 133, 134, 134, 134, 134, 134, 134, 134,
134,
],
len_sym_cost: 109,
},
DefaultLitlenCosts {
used_lits_to_lit_cost: [
16, 16, 32, 41, 48, 53, 57, 60, 64, 66, 69, 71, 73, 75, 76, 78,
80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 92, 92, 93, 94, 95,
96, 96, 97, 98, 98, 99, 99, 100, 101, 101, 102, 102, 103, 103, 104, 104,
105, 105, 106, 106, 107, 107, 108, 108, 108, 109, 109, 110, 110, 110, 111, 111,
112, 112, 112, 113, 113, 113, 114, 114, 114, 115, 115, 115, 115, 116, 116, 116,
117, 117, 117, 118, 118, 118, 118, 119, 119, 119, 119, 120, 120, 120, 120, 121,
121, 121, 121, 122, 122, 122, 122, 122, 123, 123, 123, 123, 124, 124, 124, 124,
124, 125, 125, 125, 125, 125, 126, 126, 126, 126, 126, 127, 127, 127, 127, 127,
128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 129, 129, 130, 130, 130, 130,
130, 130, 131, 131, 131, 131, 131, 131, 131, 132, 132, 132, 132, 132, 132, 133,
133, 133, 133, 133, 133, 133, 134, 134, 134, 134, 134, 134, 134, 134, 135, 135,
135, 135, 135, 135, 135, 135, 136, 136, 136, 136, 136, 136, 136, 136, 137, 137,
137, 137, 137, 137, 137, 137, 138, 138, 138, 138, 138, 138, 138, 138, 138, 139,
139, 139, 139, 139, 139, 139, 139, 139, 140, 140, 140, 140, 140, 140, 140, 140,
140, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 142, 142, 142, 142, 142,
142, 142, 142, 142, 142, 142, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143,
144,
],
len_sym_cost: 93,
},
DefaultLitlenCosts {
used_lits_to_lit_cost: [
32, 32, 48, 57, 64, 69, 73, 76, 80, 82, 85, 87, 89, 91, 92, 94,
96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 108, 109, 110, 111,
112, 112, 113, 114, 114, 115, 115, 116, 117, 117, 118, 118, 119, 119, 120, 120,
121, 121, 122, 122, 123, 123, 124, 124, 124, 125, 125, 126, 126, 126, 127, 127,
128, 128, 128, 129, 129, 129, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132,
133, 133, 133, 134, 134, 134, 134, 135, 135, 135, 135, 136, 136, 136, 136, 137,
137, 137, 137, 138, 138, 138, 138, 138, 139, 139, 139, 139, 140, 140, 140, 140,
140, 141, 141, 141, 141, 141, 142, 142, 142, 142, 142, 143, 143, 143, 143, 143,
144, 144, 144, 144, 144, 144, 145, 145, 145, 145, 145, 145, 146, 146, 146, 146,
146, 146, 147, 147, 147, 147, 147, 147, 147, 148, 148, 148, 148, 148, 148, 149,
149, 149, 149, 149, 149, 149, 150, 150, 150, 150, 150, 150, 150, 150, 151, 151,
151, 151, 151, 151, 151, 151, 152, 152, 152, 152, 152, 152, 152, 152, 153, 153,
153, 153, 153, 153, 153, 153, 154, 154, 154, 154, 154, 154, 154, 154, 154, 155,
155, 155, 155, 155, 155, 155, 155, 155, 156, 156, 156, 156, 156, 156, 156, 156,
156, 157, 157, 157, 157, 157, 157, 157, 157, 157, 157, 158, 158, 158, 158, 158,
158, 158, 158, 158, 158, 158, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159,
160,
],
len_sym_cost: 84,
},
];
fn choose_default_litlen_costs(
freqs: &mut DeflateFreqs,
match_len_freqs: &[u32],
max_search_depth: u32,
block_begin: &[u8],
block_length: u32,
) -> (u32, u32) {
freqs.litlen[..DEFLATE_NUM_LITERALS as usize].fill(0);
for &b in &block_begin[..block_length as usize] {
freqs.litlen[b as usize] += 1;
}
let cutoff = block_length >> 11;
let mut num_used_literals = 0u32;
for &f in &freqs.litlen[..DEFLATE_NUM_LITERALS as usize] {
if f > cutoff {
num_used_literals += 1;
}
}
if num_used_literals == 0 {
num_used_literals = 1;
}
let mut match_freq = 0u32;
let mut literal_freq = block_length;
let min_len = choose_min_match_len(num_used_literals, max_search_depth);
for (i, &freq) in match_len_freqs.iter().enumerate().skip(min_len as usize) {
match_freq += freq;
literal_freq = literal_freq.saturating_sub(i as u32 * freq);
}
let table_idx = if match_freq > literal_freq {
2 } else if match_freq * 4 > literal_freq {
1 } else {
0 };
let lit_cost =
DEFAULT_LITLEN_COSTS[table_idx].used_lits_to_lit_cost[num_used_literals as usize] as u32;
let len_sym_cost = DEFAULT_LITLEN_COSTS[table_idx].len_sym_cost as u32;
(lit_cost, len_sym_cost)
}
#[inline(always)]
fn default_length_cost(len: u32, len_sym_cost: u32) -> u32 {
let slot = LENGTH_SLOT[len as usize] as usize;
let extra = DEFLATE_LENGTH_EXTRA_BITS[slot] as u32;
len_sym_cost + extra * BIT_COST
}
#[inline(always)]
fn default_offset_slot_cost(slot: usize) -> u32 {
let extra = DEFLATE_OFFSET_EXTRA_BITS[slot] as u32;
let offset_sym_cost = 4 * BIT_COST + (907 * BIT_COST) / 1000;
offset_sym_cost + extra * BIT_COST
}
fn set_default_costs(costs: &mut DeflateCosts, lit_cost: u32, len_sym_cost: u32) {
costs.literal.fill(lit_cost);
for i in DEFLATE_MIN_MATCH_LEN..=DEFLATE_MAX_MATCH_LEN {
costs.length[i as usize] = default_length_cost(i, len_sym_cost);
}
for i in 0..30 {
costs.offset_slot[i] = default_offset_slot_cost(i);
}
}
#[inline(always)]
fn adjust_cost(cost: &mut u32, default_cost: u32, change_amount: i32) {
*cost = match change_amount {
0 => (default_cost + 3 * *cost) / 4,
1 => (default_cost + *cost) / 2,
2 => (5 * default_cost + 3 * *cost) / 8,
_ => (3 * default_cost + *cost) / 4,
};
}
fn adjust_costs_impl(
costs: &mut DeflateCosts,
lit_cost: u32,
len_sym_cost: u32,
change_amount: i32,
) {
for c in costs.literal.iter_mut() {
adjust_cost(c, lit_cost, change_amount);
}
for i in DEFLATE_MIN_MATCH_LEN..=DEFLATE_MAX_MATCH_LEN {
adjust_cost(
&mut costs.length[i as usize],
default_length_cost(i, len_sym_cost),
change_amount,
);
}
for i in 0..30 {
adjust_cost(
&mut costs.offset_slot[i],
default_offset_slot_cost(i),
change_amount,
);
}
}
fn adjust_costs(
costs: &mut DeflateCosts,
prev_observations: &[u32],
prev_num_observations: u32,
split_stats: &BlockSplitStats,
lit_cost: u32,
len_sym_cost: u32,
) {
let mut total_delta = 0u64;
for (po, so) in prev_observations[..NUM_OBSERVATION_TYPES]
.iter()
.zip(&split_stats.observations[..NUM_OBSERVATION_TYPES])
{
let prev = *po as u64 * split_stats.num_observations as u64;
let cur = *so as u64 * prev_num_observations as u64;
total_delta += prev.abs_diff(cur);
}
let cutoff = prev_num_observations as u64 * split_stats.num_observations as u64 * 200 / 512;
if total_delta > 3 * cutoff {
set_default_costs(costs, lit_cost, len_sym_cost);
} else if 4 * total_delta > 9 * cutoff {
adjust_costs_impl(costs, lit_cost, len_sym_cost, 3);
} else if 2 * total_delta > 3 * cutoff {
adjust_costs_impl(costs, lit_cost, len_sym_cost, 2);
} else if 2 * total_delta > cutoff {
adjust_costs_impl(costs, lit_cost, len_sym_cost, 1);
} else {
adjust_costs_impl(costs, lit_cost, len_sym_cost, 0);
}
}
pub(crate) fn set_initial_costs(
ns: &mut NearOptimalState,
freqs: &mut DeflateFreqs,
split_stats: &BlockSplitStats,
max_search_depth: u32,
block_begin: &[u8],
block_length: u32,
is_first_block: bool,
) {
let (lit_cost, len_sym_cost) = choose_default_litlen_costs(
freqs,
&ns.match_len_freqs,
max_search_depth,
block_begin,
block_length,
);
if is_first_block {
set_default_costs(&mut ns.costs, lit_cost, len_sym_cost);
} else {
adjust_costs(
&mut ns.costs,
&ns.prev_observations,
ns.prev_num_observations,
split_stats,
lit_cost,
len_sym_cost,
);
}
}
pub(crate) fn set_costs_from_codes(costs: &mut DeflateCosts, codes: &DeflateCodes) {
for i in 0..DEFLATE_NUM_LITERALS as usize {
let bits = if codes.lens_litlen[i] != 0 {
codes.lens_litlen[i] as u32
} else {
LITERAL_NOSTAT_BITS
};
costs.literal[i] = bits * BIT_COST;
}
for i in DEFLATE_MIN_MATCH_LEN..=DEFLATE_MAX_MATCH_LEN {
let slot = LENGTH_SLOT[i as usize] as usize;
let sym = DEFLATE_FIRST_LEN_SYM as usize + slot;
let bits = if codes.lens_litlen[sym] != 0 {
codes.lens_litlen[sym] as u32
} else {
LENGTH_NOSTAT_BITS
};
costs.length[i as usize] = (bits + DEFLATE_LENGTH_EXTRA_BITS[slot] as u32) * BIT_COST;
}
for (i, (&extra, &len)) in DEFLATE_OFFSET_EXTRA_BITS[..30]
.iter()
.zip(&codes.lens_offset[..30])
.enumerate()
{
let bits = if len != 0 {
len as u32
} else {
OFFSET_NOSTAT_BITS
};
costs.offset_slot[i] = (bits + extra as u32) * BIT_COST;
}
}
#[cfg(not(feature = "unchecked"))]
fn tally_item_list(
nodes: &[OptimumNode],
block_length: u32,
offset_slot_full: &[u8],
freqs: &mut DeflateFreqs,
) {
let mut cur_idx = 0usize;
let end = block_length as usize;
while cur_idx < end {
let length = nodes[cur_idx].item & OPTIMUM_LEN_MASK;
let offset = nodes[cur_idx].item >> OPTIMUM_OFFSET_SHIFT;
if length == 1 {
freqs.litlen[offset as usize] += 1;
} else {
let len_slot = LENGTH_SLOT[length as usize] as usize;
freqs.litlen[DEFLATE_FIRST_LEN_SYM as usize + len_slot] += 1;
freqs.offset[offset_slot_full[offset as usize] as usize] += 1;
}
cur_idx += length as usize;
}
freqs.litlen[DEFLATE_END_OF_BLOCK as usize] += 1;
}
#[cfg(feature = "unchecked")]
fn tally_item_list(
nodes: &[OptimumNode],
block_length: u32,
offset_slot_full: &[u8],
freqs: &mut DeflateFreqs,
) {
let nodes_ptr = nodes.as_ptr();
let osf_ptr = offset_slot_full.as_ptr();
let mut cur_idx = 0usize;
let end = block_length as usize;
unsafe {
while cur_idx < end {
let node = &*nodes_ptr.add(cur_idx);
let length = node.item & OPTIMUM_LEN_MASK;
let offset = node.item >> OPTIMUM_OFFSET_SHIFT;
if length == 1 {
freqs.litlen[offset as usize] += 1;
} else {
let len_slot = LENGTH_SLOT[length as usize] as usize;
freqs.litlen[DEFLATE_FIRST_LEN_SYM as usize + len_slot] += 1;
freqs.offset[*osf_ptr.add(offset as usize) as usize] += 1;
}
cur_idx += length as usize;
}
}
freqs.litlen[DEFLATE_END_OF_BLOCK as usize] += 1;
}
fn choose_all_literals(
block_begin: &[u8],
block_length: u32,
freqs: &mut DeflateFreqs,
codes: &mut DeflateCodes,
) {
freqs.reset();
for &b in &block_begin[..block_length as usize] {
freqs.litlen[b as usize] += 1;
}
freqs.litlen[DEFLATE_END_OF_BLOCK as usize] += 1;
make_huffman_codes(freqs, codes);
}
fn compute_true_cost_inner(
freqs: &DeflateFreqs,
codes: &DeflateCodes,
use_best_precode: bool,
) -> u32 {
let mut cost = 0u32;
let mut num_litlen_syms = DEFLATE_NUM_LITLEN_SYMS as usize;
while num_litlen_syms > 257 && codes.lens_litlen[num_litlen_syms - 1] == 0 {
num_litlen_syms -= 1;
}
let mut num_offset_syms = DEFLATE_NUM_OFFSET_SYMS as usize;
while num_offset_syms > 1 && codes.lens_offset[num_offset_syms - 1] == 0 {
num_offset_syms -= 1;
}
let total_lens = num_litlen_syms + num_offset_syms;
let mut combined_lens = [0u8; (DEFLATE_NUM_LITLEN_SYMS + DEFLATE_NUM_OFFSET_SYMS) as usize];
combined_lens[..num_litlen_syms].copy_from_slice(&codes.lens_litlen[..num_litlen_syms]);
combined_lens[num_litlen_syms..num_litlen_syms + num_offset_syms]
.copy_from_slice(&codes.lens_offset[..num_offset_syms]);
if use_best_precode {
let mut scratch = super::katajainen::HuffmanScratch::new();
let best = compute_precode_items_best(&combined_lens[..total_lens], &mut scratch);
cost += best.cost;
} else {
let mut precode_freqs = [0u32; DEFLATE_NUM_PRECODE_SYMS as usize];
let mut precode_items =
[0u32; (DEFLATE_NUM_LITLEN_SYMS + DEFLATE_NUM_OFFSET_SYMS) as usize];
compute_precode_items(
&combined_lens[..total_lens],
&mut precode_freqs,
&mut precode_items,
);
let mut precode_lens = [0u8; DEFLATE_NUM_PRECODE_SYMS as usize];
let mut precode_codewords = [0u32; DEFLATE_NUM_PRECODE_SYMS as usize];
make_huffman_code(
DEFLATE_NUM_PRECODE_SYMS as usize,
DEFLATE_MAX_PRE_CODEWORD_LEN,
&precode_freqs,
&mut precode_lens,
&mut precode_codewords,
);
let _ = precode_codewords;
let mut num_explicit_lens = DEFLATE_NUM_PRECODE_SYMS as usize;
while num_explicit_lens > 4
&& precode_lens[DEFLATE_PRECODE_LENS_PERMUTATION[num_explicit_lens - 1] as usize] == 0
{
num_explicit_lens -= 1;
}
cost += 5 + 5 + 4 + 3 * num_explicit_lens as u32;
for sym in 0..DEFLATE_NUM_PRECODE_SYMS as usize {
cost +=
precode_freqs[sym] * (precode_lens[sym] as u32 + EXTRA_PRECODE_BITS[sym] as u32);
}
}
for sym in 0..DEFLATE_FIRST_LEN_SYM as usize {
cost += freqs.litlen[sym] * codes.lens_litlen[sym] as u32;
}
for (i, &extra) in DEFLATE_LENGTH_EXTRA_BITS.iter().enumerate() {
let sym = DEFLATE_FIRST_LEN_SYM as usize + i;
cost += freqs.litlen[sym] * (codes.lens_litlen[sym] as u32 + extra as u32);
}
for (sym, &extra) in DEFLATE_OFFSET_EXTRA_BITS[..30].iter().enumerate() {
cost += freqs.offset[sym] * (codes.lens_offset[sym] as u32 + extra as u32);
}
cost
}
#[cfg(not(feature = "unchecked"))]
#[allow(clippy::too_many_arguments)]
pub(crate) fn find_min_cost_path(
optimum_nodes: &mut [OptimumNode],
costs: &DeflateCosts,
offset_slot_full: &[u8],
block_length: u32,
match_cache: &[LzMatch],
cache_end: usize,
freqs: &mut DeflateFreqs,
codes: &mut DeflateCodes,
) {
let end = block_length as usize;
optimum_nodes[end].cost_to_end = 0;
let mut cache_idx = cache_end;
let mut cur_idx = end;
while cur_idx > 0 {
cur_idx -= 1;
cache_idx -= 1;
let num_matches = match_cache[cache_idx].length as usize;
let literal = match_cache[cache_idx].offset as u32;
let mut best_cost =
costs.literal[literal as usize] + optimum_nodes[cur_idx + 1].cost_to_end;
optimum_nodes[cur_idx].item = (literal << OPTIMUM_OFFSET_SHIFT) | 1;
if num_matches > 0 {
let match_start = cache_idx - num_matches;
let mut match_idx = match_start;
let mut len = DEFLATE_MIN_MATCH_LEN;
loop {
let offset = match_cache[match_idx].offset as u32;
let os_idx = offset_slot_full[offset as usize] as usize;
let offset_cost = costs.offset_slot[os_idx];
loop {
let cost = offset_cost
+ costs.length[len as usize]
+ optimum_nodes[cur_idx + len as usize].cost_to_end;
if cost < best_cost {
best_cost = cost;
optimum_nodes[cur_idx].item = len | (offset << OPTIMUM_OFFSET_SHIFT);
}
len += 1;
if len > match_cache[match_idx].length as u32 {
break;
}
}
match_idx += 1;
if match_idx == cache_idx {
break;
}
}
cache_idx -= num_matches;
}
optimum_nodes[cur_idx].cost_to_end = best_cost;
}
freqs.reset();
tally_item_list(optimum_nodes, block_length, offset_slot_full, freqs);
make_huffman_codes(freqs, codes);
}
#[cfg(feature = "unchecked")]
#[allow(clippy::too_many_arguments)]
pub(crate) fn find_min_cost_path(
optimum_nodes: &mut [OptimumNode],
costs: &DeflateCosts,
offset_slot_full: &[u8],
block_length: u32,
match_cache: &[LzMatch],
cache_end: usize,
freqs: &mut DeflateFreqs,
codes: &mut DeflateCodes,
) {
let end = block_length as usize;
let nodes_ptr = optimum_nodes.as_mut_ptr();
let cache_ptr = match_cache.as_ptr();
let osf_ptr = offset_slot_full.as_ptr();
unsafe {
(*nodes_ptr.add(end)).cost_to_end = 0;
let mut cache_idx = cache_end;
let mut cur_idx = end;
while cur_idx > 0 {
cur_idx -= 1;
cache_idx -= 1;
let cache_entry = &*cache_ptr.add(cache_idx);
let num_matches = cache_entry.length as usize;
let literal = cache_entry.offset as u32;
let mut best_cost = *costs.literal.get_unchecked(literal as usize)
+ (*nodes_ptr.add(cur_idx + 1)).cost_to_end;
(*nodes_ptr.add(cur_idx)).item = (literal << OPTIMUM_OFFSET_SHIFT) | 1;
if num_matches > 0 {
let mut match_idx = cache_idx - num_matches;
let mut len = DEFLATE_MIN_MATCH_LEN;
loop {
let m = &*cache_ptr.add(match_idx);
let offset = m.offset as u32;
let os_idx = *osf_ptr.add(offset as usize) as usize;
let offset_cost = *costs.offset_slot.get_unchecked(os_idx);
loop {
let cost = offset_cost
+ *costs.length.get_unchecked(len as usize)
+ (*nodes_ptr.add(cur_idx + len as usize)).cost_to_end;
if cost < best_cost {
best_cost = cost;
(*nodes_ptr.add(cur_idx)).item = len | (offset << OPTIMUM_OFFSET_SHIFT);
}
len += 1;
if len > m.length as u32 {
break;
}
}
match_idx += 1;
if match_idx == cache_idx {
break;
}
}
cache_idx -= num_matches;
}
(*nodes_ptr.add(cur_idx)).cost_to_end = best_cost;
}
}
freqs.reset();
tally_item_list(optimum_nodes, block_length, offset_slot_full, freqs);
make_huffman_codes(freqs, codes);
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn optimize_and_flush_block(
ns: &mut NearOptimalState,
os: &mut OutputBitstream<'_>,
block_begin: &[u8],
block_length: u32,
cache_end: usize,
is_first_block: bool,
is_final_block: bool,
freqs: &mut DeflateFreqs,
codes: &mut DeflateCodes,
static_codes: &DeflateCodes,
split_stats: &BlockSplitStats,
max_search_depth: u32,
effort: u32,
libdeflate_compat: bool,
) -> bool {
let use_best_precode = !libdeflate_compat && effort >= 23;
let mut num_passes_remaining = ns.max_optim_passes;
let mut best_true_cost = u32::MAX;
choose_all_literals(block_begin, block_length, freqs, codes);
let only_lits_cost = compute_true_cost_inner(freqs, codes, use_best_precode);
let sentinel_end =
(block_length as usize + DEFLATE_MAX_MATCH_LEN as usize).min(ns.optimum_nodes.len() - 1);
for i in block_length as usize..=sentinel_end {
ns.optimum_nodes[i].cost_to_end = 0x80000000;
}
let mut static_cost = u32::MAX;
if block_length <= ns.max_len_to_optimize_static_block {
ns.costs_saved = ns.costs.clone();
set_costs_from_codes(&mut ns.costs, static_codes);
find_min_cost_path(
&mut ns.optimum_nodes,
&ns.costs,
&ns.offset_slot_full,
block_length,
&ns.match_cache,
cache_end,
freqs,
codes,
);
static_cost = ns.optimum_nodes[0].cost_to_end / BIT_COST;
static_cost += 7; ns.costs = ns.costs_saved.clone();
}
set_initial_costs(
ns,
freqs,
split_stats,
max_search_depth,
block_begin,
block_length,
is_first_block,
);
let max_passes = if libdeflate_compat {
num_passes_remaining
} else if effort >= 29 {
30
} else if effort >= 28 {
20
} else {
num_passes_remaining
};
num_passes_remaining = max_passes;
let use_diversification = !libdeflate_compat && effort >= 28;
let use_milestone_rle = !libdeflate_compat && effort >= 26;
let mut rng = MwcRng::new(block_length.wrapping_mul(0x9E3779B9));
let mut no_improvement_count = 0u32;
let mut pass_number = 0u32;
let mut checkpoint_costs: Option<DeflateCosts> = None;
let mut diversification_attempts = 0u32;
let mut prev_costs: Option<DeflateCosts> = None;
loop {
pass_number += 1;
find_min_cost_path(
&mut ns.optimum_nodes,
&ns.costs,
&ns.offset_slot_full,
block_length,
&ns.match_cache,
cache_end,
freqs,
codes,
);
let true_cost = compute_true_cost_inner(freqs, codes, use_best_precode);
if true_cost + ns.min_improvement_to_continue > best_true_cost {
no_improvement_count += 1;
if use_diversification && no_improvement_count >= 2 && diversification_attempts < 3 {
diversification_attempts += 1;
if checkpoint_costs.is_none() && pass_number >= 2 {
checkpoint_costs = Some(ns.costs_saved.clone());
}
let saved = ns.costs_saved.clone();
for i in 0..DEFLATE_NUM_LITERALS as usize {
if rng.next_u32().is_multiple_of(3) {
let other = rng.next_u32() as usize % DEFLATE_NUM_LITERALS as usize;
ns.costs.literal[i] = saved.literal[other];
}
}
for i in DEFLATE_MIN_MATCH_LEN as usize..=DEFLATE_MAX_MATCH_LEN as usize {
if rng.next_u32().is_multiple_of(3) {
let other = DEFLATE_MIN_MATCH_LEN as usize
+ rng.next_u32() as usize
% (DEFLATE_MAX_MATCH_LEN - DEFLATE_MIN_MATCH_LEN + 1) as usize;
ns.costs.length[i] = saved.length[other];
}
}
no_improvement_count = 0;
num_passes_remaining -= 1;
if num_passes_remaining == 0 {
break;
}
continue;
}
if let Some(ref cp) = checkpoint_costs
&& diversification_attempts >= 3
{
ns.costs = cp.clone();
find_min_cost_path(
&mut ns.optimum_nodes,
&ns.costs,
&ns.offset_slot_full,
block_length,
&ns.match_cache,
cache_end,
freqs,
codes,
);
let cp_cost = compute_true_cost_inner(freqs, codes, use_best_precode);
if cp_cost < best_true_cost {
best_true_cost = cp_cost;
ns.costs_saved = ns.costs.clone();
}
}
break;
}
best_true_cost = true_cost;
no_improvement_count = 0;
ns.costs_saved = ns.costs.clone();
if use_diversification {
prev_costs = Some(ns.costs.clone());
}
set_costs_from_codes(&mut ns.costs, codes);
if use_diversification
&& pass_number >= 2
&& let Some(ref prev) = prev_costs
{
for i in 0..DEFLATE_NUM_LITERALS as usize {
ns.costs.literal[i] = (2 * ns.costs.literal[i] + prev.literal[i]) / 3;
}
for i in DEFLATE_MIN_MATCH_LEN as usize..=DEFLATE_MAX_MATCH_LEN as usize {
ns.costs.length[i] = (2 * ns.costs.length[i] + prev.length[i]) / 3;
}
for i in 0..30 {
ns.costs.offset_slot[i] = (2 * ns.costs.offset_slot[i] + prev.offset_slot[i]) / 3;
}
}
if use_milestone_rle && (pass_number == 4 || pass_number == 8) {
let mut rle_freqs = freqs.clone();
optimize_huffman_for_rle(&mut rle_freqs.litlen);
optimize_huffman_for_rle(&mut rle_freqs.offset);
let mut rle_codes = DeflateCodes::default();
make_huffman_codes(&rle_freqs, &mut rle_codes);
set_costs_from_codes(&mut ns.costs, &rle_codes);
}
num_passes_remaining -= 1;
if num_passes_remaining == 0 {
break;
}
}
if use_milestone_rle && pass_number > 4 {
let mut rle_freqs = freqs.clone();
optimize_huffman_for_rle(&mut rle_freqs.litlen);
optimize_huffman_for_rle(&mut rle_freqs.offset);
let mut rle_codes = DeflateCodes::default();
make_huffman_codes(&rle_freqs, &mut rle_codes);
let mut rle_costs = ns.costs.clone();
set_costs_from_codes(&mut rle_costs, &rle_codes);
find_min_cost_path(
&mut ns.optimum_nodes,
&rle_costs,
&ns.offset_slot_full,
block_length,
&ns.match_cache,
cache_end,
freqs,
codes,
);
let rle_true_cost = compute_true_cost_inner(freqs, codes, use_best_precode);
if rle_true_cost < best_true_cost {
best_true_cost = rle_true_cost;
ns.costs_saved = rle_costs;
set_costs_from_codes(&mut ns.costs, codes);
}
}
let used_only_literals = false;
let true_cost = compute_true_cost_inner(freqs, codes, use_best_precode);
if only_lits_cost.min(static_cost) < best_true_cost {
if only_lits_cost < static_cost {
choose_all_literals(block_begin, block_length, freqs, codes);
set_costs_from_codes(&mut ns.costs, codes);
let seq = Sequence {
litrunlen_and_length: block_length,
offset: 0,
offset_slot: 0,
};
let flush_fn = if use_best_precode {
flush_block_best
} else {
flush_block
};
flush_fn(
os,
block_begin,
block_length as usize,
BlockOutput::Sequences(&[seq]),
freqs,
codes,
static_codes,
is_final_block,
);
return true;
} else {
set_costs_from_codes(&mut ns.costs, static_codes);
find_min_cost_path(
&mut ns.optimum_nodes,
&ns.costs,
&ns.offset_slot_full,
block_length,
&ns.match_cache,
cache_end,
freqs,
codes,
);
}
} else if true_cost >= best_true_cost + ns.min_bits_to_use_nonfinal_path {
ns.costs = ns.costs_saved.clone();
find_min_cost_path(
&mut ns.optimum_nodes,
&ns.costs,
&ns.offset_slot_full,
block_length,
&ns.match_cache,
cache_end,
freqs,
codes,
);
set_costs_from_codes(&mut ns.costs, codes);
}
let use_best_codes = !libdeflate_compat && effort >= 26;
if use_best_codes {
make_huffman_codes_best(freqs, codes);
}
let flush_fn = if use_best_precode {
flush_block_best
} else {
flush_block
};
flush_fn(
os,
block_begin,
block_length as usize,
BlockOutput::Optimum {
nodes: &ns.optimum_nodes,
block_length: block_length as usize,
offset_slot_full: &ns.offset_slot_full,
},
freqs,
codes,
static_codes,
is_final_block,
);
used_only_literals
}
pub(crate) fn init_stats(split_stats: &mut BlockSplitStats, ns: &mut NearOptimalState) {
*split_stats = BlockSplitStats::new();
ns.new_match_len_freqs.fill(0);
ns.match_len_freqs.fill(0);
}
pub(crate) fn merge_stats(split_stats: &mut BlockSplitStats, ns: &mut NearOptimalState) {
split_stats.merge_new_observations();
for i in 0..ns.match_len_freqs.len() {
ns.match_len_freqs[i] += ns.new_match_len_freqs[i];
ns.new_match_len_freqs[i] = 0;
}
}
pub(crate) fn save_stats(split_stats: &BlockSplitStats, ns: &mut NearOptimalState) {
for i in 0..NUM_OBSERVATION_TYPES {
ns.prev_observations[i] = split_stats.observations[i];
}
ns.prev_num_observations = split_stats.num_observations;
}
pub(crate) fn clear_old_stats(split_stats: &mut BlockSplitStats, ns: &mut NearOptimalState) {
split_stats.observations.fill(0);
split_stats.num_observations = 0;
ns.match_len_freqs.fill(0);
}