use alloc::vec::Vec;
use core::cmp;
#[cfg(feature = "std")]
use log::{debug, trace};
use crate::{
cache::Cache,
deflate::{calculate_block_size, BlockType},
hash::ZopfliHash,
lz77::{find_longest_match, LitLen, Lz77Store},
symbols::{get_dist_extra_bits, get_dist_symbol, get_length_extra_bits, get_length_symbol},
util::{ZOPFLI_MAX_MATCH, ZOPFLI_NUM_D, ZOPFLI_NUM_LL, ZOPFLI_WINDOW_MASK, ZOPFLI_WINDOW_SIZE},
};
#[cfg(not(feature = "std"))]
#[allow(unused_imports)] use crate::math::F64MathExt;
fn get_cost_fixed(litlen: usize, dist: u16) -> f64 {
let result = if dist == 0 {
if litlen <= 143 {
8
} else {
9
}
} else {
let dbits = get_dist_extra_bits(dist);
let lbits = get_length_extra_bits(litlen);
let lsym = get_length_symbol(litlen);
7 + u32::from(lsym > 279) + 5 + dbits + lbits
};
f64::from(result)
}
fn get_cost_stat(litlen: usize, dist: u16, stats: &SymbolStats) -> f64 {
assert!(litlen < ZOPFLI_NUM_LL); if dist == 0 {
stats.ll_symbols[litlen]
} else {
let lsym = get_length_symbol(litlen);
let lbits = f64::from(get_length_extra_bits(litlen));
let dsym = get_dist_symbol(dist) as usize;
let dbits = f64::from(get_dist_extra_bits(dist));
lbits + dbits + stats.ll_symbols[lsym] + stats.d_symbols[dsym]
}
}
#[derive(Default)]
struct RanState {
m_w: u32,
m_z: u32,
}
impl RanState {
const fn new() -> Self {
Self { m_w: 1, m_z: 2 }
}
fn random_marsaglia(&mut self) -> u32 {
self.m_z = 36969 * (self.m_z & 65535) + (self.m_z >> 16);
self.m_w = 18000 * (self.m_w & 65535) + (self.m_w >> 16);
(self.m_z << 16).wrapping_add(self.m_w) }
}
#[derive(Copy, Clone)]
struct SymbolStats {
litlens: [usize; ZOPFLI_NUM_LL],
dists: [usize; ZOPFLI_NUM_D],
ll_symbols: [f64; ZOPFLI_NUM_LL],
d_symbols: [f64; ZOPFLI_NUM_D],
}
impl Default for SymbolStats {
fn default() -> Self {
Self {
litlens: [0; ZOPFLI_NUM_LL],
dists: [0; ZOPFLI_NUM_D],
ll_symbols: [0.0; ZOPFLI_NUM_LL],
d_symbols: [0.0; ZOPFLI_NUM_D],
}
}
}
impl SymbolStats {
fn randomize_stat_freqs(&mut self, state: &mut RanState) {
fn randomize_freqs(freqs: &mut [usize], state: &mut RanState) {
let n = freqs.len();
let mut i = 0;
let end = n;
while i < end {
if (state.random_marsaglia() >> 4) % 3 == 0 {
let index = state.random_marsaglia() as usize % n;
freqs[i] = freqs[index];
}
i += 1;
}
}
randomize_freqs(&mut self.litlens, state);
randomize_freqs(&mut self.dists, state);
self.litlens[256] = 1; }
fn calculate_entropy(&mut self) {
fn calculate_and_store_entropy(count: &[usize], bitlengths: &mut [f64]) {
let n = count.len();
let sum = count.iter().sum();
let log2sum = (if sum == 0 { n } else { sum } as f64).log2();
for i in 0..n {
if count[i] == 0 {
bitlengths[i] = log2sum;
} else {
bitlengths[i] = log2sum - (count[i] as f64).log2();
}
}
}
calculate_and_store_entropy(&self.litlens, &mut self.ll_symbols);
calculate_and_store_entropy(&self.dists, &mut self.d_symbols);
}
fn get_statistics(&mut self, store: &Lz77Store) {
for &litlen in &store.litlens {
match litlen {
LitLen::Literal(lit) => self.litlens[lit as usize] += 1,
LitLen::LengthDist(len, dist) => {
self.litlens[get_length_symbol(len as usize)] += 1;
self.dists[get_dist_symbol(dist) as usize] += 1;
}
}
}
self.litlens[256] = 1;
self.calculate_entropy();
}
fn clear_freqs(&mut self) {
self.litlens = [0; ZOPFLI_NUM_LL];
self.dists = [0; ZOPFLI_NUM_D];
}
}
fn add_weighed_stat_freqs(
stats1: &SymbolStats,
w1: f64,
stats2: &SymbolStats,
w2: f64,
) -> SymbolStats {
let mut result = SymbolStats::default();
for i in 0..ZOPFLI_NUM_LL {
result.litlens[i] =
(stats1.litlens[i] as f64 * w1 + stats2.litlens[i] as f64 * w2) as usize;
}
for i in 0..ZOPFLI_NUM_D {
result.dists[i] = (stats1.dists[i] as f64 * w1 + stats2.dists[i] as f64 * w2) as usize;
}
result.litlens[256] = 1; result
}
fn get_cost_model_min_cost<F: Fn(usize, u16) -> f64>(costmodel: F) -> f64 {
let mut bestlength = 0; let mut bestdist = 0;
const DSYMBOLS: [u16; 30] = [
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537,
2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577,
];
let mut mincost = f64::INFINITY;
for i in 3..259 {
let c = costmodel(i, 1);
if c < mincost {
bestlength = i;
mincost = c;
}
}
mincost = f64::INFINITY;
for dsym in DSYMBOLS {
let c = costmodel(3, dsym);
if c < mincost {
bestdist = dsym;
mincost = c;
}
}
costmodel(bestlength, bestdist)
}
fn get_best_lengths<F: Fn(usize, u16) -> f64, C: Cache>(
lmc: &mut C,
in_data: &[u8],
instart: usize,
inend: usize,
costmodel: F,
h: &mut ZopfliHash,
costs: &mut Vec<f32>,
) -> (f64, Vec<u16>) {
let blocksize = inend - instart;
let mut length_array = vec![0; blocksize + 1];
if instart == inend {
return (0.0, length_array);
}
let windowstart = instart.saturating_sub(ZOPFLI_WINDOW_SIZE);
h.reset();
let arr = &in_data[..inend];
h.warmup(arr, windowstart, inend);
for i in windowstart..instart {
h.update(arr, i);
}
costs.resize(blocksize + 1, 0.0);
for cost in costs.iter_mut().take(blocksize + 1).skip(1) {
*cost = f32::INFINITY;
}
costs[0] = 0.0;
let mut i = instart;
let mut leng;
let mut longest_match;
let mut sublen = vec![0; ZOPFLI_MAX_MATCH + 1];
let mincost = get_cost_model_min_cost(&costmodel);
while i < inend {
let mut j = i - instart; h.update(arr, i);
if h.same[i & ZOPFLI_WINDOW_MASK] > ZOPFLI_MAX_MATCH as u16 * 2
&& i > instart + ZOPFLI_MAX_MATCH + 1
&& i + ZOPFLI_MAX_MATCH * 2 + 1 < inend
&& h.same[(i - ZOPFLI_MAX_MATCH) & ZOPFLI_WINDOW_MASK] > ZOPFLI_MAX_MATCH as u16
{
let symbolcost = costmodel(ZOPFLI_MAX_MATCH, 1);
for _ in 0..ZOPFLI_MAX_MATCH {
costs[j + ZOPFLI_MAX_MATCH] = costs[j] + symbolcost as f32;
length_array[j + ZOPFLI_MAX_MATCH] = ZOPFLI_MAX_MATCH as u16;
i += 1;
j += 1;
h.update(arr, i);
}
}
longest_match = find_longest_match(
lmc,
h,
arr,
i,
inend,
instart,
ZOPFLI_MAX_MATCH,
&mut Some(&mut sublen),
);
leng = longest_match.length;
if i < inend {
let new_cost = costmodel(arr[i] as usize, 0) + f64::from(costs[j]);
debug_assert!(new_cost >= 0.0);
if new_cost < f64::from(costs[j + 1]) {
costs[j + 1] = new_cost as f32;
length_array[j + 1] = 1;
}
}
let kend = cmp::min(leng as usize, inend - i);
let mincostaddcostj = mincost + f64::from(costs[j]);
for (k, &sublength) in sublen.iter().enumerate().take(kend + 1).skip(3) {
if f64::from(costs[j + k]) <= mincostaddcostj {
continue;
}
let new_cost = costmodel(k, sublength) + f64::from(costs[j]);
debug_assert!(new_cost >= 0.0);
if new_cost < f64::from(costs[j + k]) {
debug_assert!(k <= ZOPFLI_MAX_MATCH);
costs[j + k] = new_cost as f32;
length_array[j + k] = k as u16;
}
}
i += 1;
}
debug_assert!(costs[blocksize] >= 0.0);
(f64::from(costs[blocksize]), length_array)
}
fn trace(size: usize, length_array: &[u16]) -> Vec<u16> {
let mut index = size;
if size == 0 {
return vec![];
}
let mut path = Vec::with_capacity(index);
while index > 0 {
let lai = length_array[index];
let laiu = lai as usize;
path.push(lai);
debug_assert!(laiu <= index);
debug_assert!(laiu <= ZOPFLI_MAX_MATCH);
debug_assert_ne!(lai, 0);
index -= laiu;
}
path
}
#[allow(clippy::too_many_arguments)] fn lz77_optimal_run<F: Fn(usize, u16) -> f64, C: Cache>(
lmc: &mut C,
in_data: &[u8],
instart: usize,
inend: usize,
costmodel: F,
store: &mut Lz77Store,
h: &mut ZopfliHash,
costs: &mut Vec<f32>,
) {
let (cost, length_array) = get_best_lengths(lmc, in_data, instart, inend, costmodel, h, costs);
let path = trace(inend - instart, &length_array);
store.follow_path(in_data, instart, inend, path, lmc);
debug_assert!(cost < f64::INFINITY);
}
pub fn lz77_optimal_fixed<C: Cache>(
lmc: &mut C,
in_data: &[u8],
instart: usize,
inend: usize,
store: &mut Lz77Store,
) {
let mut costs = Vec::with_capacity(inend - instart);
lz77_optimal_run(
lmc,
in_data,
instart,
inend,
get_cost_fixed,
store,
&mut ZopfliHash::new(),
&mut costs,
);
}
pub fn lz77_optimal<C: Cache>(
lmc: &mut C,
in_data: &[u8],
instart: usize,
inend: usize,
max_iterations: u64,
max_iterations_without_improvement: u64,
) -> Lz77Store {
let mut currentstore = Lz77Store::new();
let mut outputstore = currentstore.clone();
currentstore.greedy(lmc, in_data, instart, inend);
let mut stats = SymbolStats::default();
stats.get_statistics(¤tstore);
let mut h = ZopfliHash::new();
let mut costs = Vec::with_capacity(inend - instart + 1);
let mut beststats = SymbolStats::default();
let mut bestcost = f64::INFINITY;
let mut lastcost = 0.0;
let mut ran_state = RanState::new();
let mut lastrandomstep = u64::MAX;
let mut current_iteration: u64 = 0;
let mut iterations_without_improvement: u64 = 0;
loop {
currentstore.reset();
lz77_optimal_run(
lmc,
in_data,
instart,
inend,
|a, b| get_cost_stat(a, b, &stats),
&mut currentstore,
&mut h,
&mut costs,
);
let cost = calculate_block_size(¤tstore, 0, currentstore.size(), BlockType::Dynamic);
if cost < bestcost {
iterations_without_improvement = 0;
outputstore = currentstore.clone();
beststats = stats;
bestcost = cost;
debug!("Iteration {current_iteration}: {cost} bit");
} else {
iterations_without_improvement += 1;
trace!("Iteration {current_iteration}: {cost} bit");
if iterations_without_improvement >= max_iterations_without_improvement {
break;
}
}
current_iteration += 1;
if current_iteration >= max_iterations {
break;
}
let laststats = stats;
stats.clear_freqs();
stats.get_statistics(¤tstore);
if lastrandomstep != u64::MAX {
stats = add_weighed_stat_freqs(&stats, 1.0, &laststats, 0.5);
stats.calculate_entropy();
}
if current_iteration > 5 && (cost - lastcost).abs() < f64::EPSILON {
stats = beststats;
stats.randomize_stat_freqs(&mut ran_state);
stats.calculate_entropy();
lastrandomstep = current_iteration;
}
lastcost = cost;
}
outputstore
}