#![allow(dead_code)]
use crate::bit_writer::BitWriter;
use crate::entropy_coding::encode::{
OwnedAnsEntropyCode, build_entropy_code_ans, write_tokens_ans,
};
use crate::entropy_coding::huffman_tree::{build_and_store_huffman_tree, create_huffman_tree};
use crate::entropy_coding::hybrid_uint::HybridUintConfig;
use crate::entropy_coding::token::Token as AnsToken;
use crate::error::Result;
pub(crate) const K_NUM_RAW_SYMBOLS: usize = 19;
pub(crate) const K_NUM_LZ77: usize = 33;
pub(crate) const K_LZ77_MIN_LENGTH: usize = 7;
#[inline]
pub fn pack_signed(value: i32) -> u32 {
((value as u32) << 1) ^ ((value >> 31) as u32)
}
#[inline]
pub(crate) fn encode_hybrid_uint_lz77_length(value: u32) -> (u32, u32, u32) {
encode_hybrid_uint_000(value)
}
#[inline]
pub(crate) fn encode_hybrid_uint_000(value: u32) -> (u32, u32, u32) {
if value == 0 {
(0, 0, 0)
} else {
let n = 31 - (value | 1).leading_zeros();
let token = n + 1;
let bits = value - (1 << n);
(token, n, bits)
}
}
pub(crate) enum Token {
Raw(u32),
Lz77Run(usize),
}
const MODULAR_HYBRID_UINT: HybridUintConfig = HybridUintConfig {
split_exponent: 4,
split: 16, msb_in_token: 2,
lsb_in_token: 0,
};
pub(super) struct EncodedResidual {
token: u32,
extra_bits: u32,
num_extra: u32,
}
pub(super) fn encode_residuals_hybrid(residuals: &[u32]) -> (Vec<EncodedResidual>, u32) {
let mut encoded = Vec::with_capacity(residuals.len());
let mut max_token: u32 = 0;
for &r in residuals {
let (token, extra_bits, num_extra) = MODULAR_HYBRID_UINT.encode(r);
max_token = max_token.max(token);
encoded.push(EncodedResidual {
token,
extra_bits,
num_extra,
});
}
(encoded, max_token)
}
pub(super) fn build_token_histogram(encoded: &[EncodedResidual], max_token: u32) -> Vec<u32> {
let size = (max_token + 1) as usize;
let mut histogram = vec![0u32; size];
for e in encoded {
histogram[e.token as usize] += 1;
}
histogram
}
pub(crate) fn write_hybrid_data_histogram(
writer: &mut BitWriter,
histogram: &[u32],
max_token: u32,
) -> Result<(Vec<u8>, Vec<u16>)> {
writer.write(1, 0)?;
writer.write(1, 1)?;
const LOG_ALPHABET_SIZE_PREFIX: u32 = 15;
write_integer_config(writer, LOG_ALPHABET_SIZE_PREFIX, 4, 2, 0)?;
write_varlen_u16(writer, max_token as u16)?;
let histogram_size = (max_token + 1) as usize;
let (depths, codes) = if histogram_size > 1 {
let table = build_and_store_huffman_tree(&histogram[..histogram_size], writer)?;
(table.depths, table.codes)
} else {
(vec![0u8; histogram_size], vec![0u16; histogram_size])
};
Ok((depths, codes))
}
pub(super) fn write_hybrid_residuals(
writer: &mut BitWriter,
encoded: &[EncodedResidual],
depths: &[u8],
codes: &[u16],
) -> Result<()> {
for e in encoded {
let depth = depths.get(e.token as usize).copied().unwrap_or(0);
let code = codes.get(e.token as usize).copied().unwrap_or(0);
if depth > 0 {
writer.write(depth as usize, code as u64)?;
}
if e.num_extra > 0 {
writer.write(e.num_extra as usize, e.extra_bits as u64)?;
}
}
Ok(())
}
fn build_ans_tokens(residuals: &[u32]) -> Vec<AnsToken> {
residuals.iter().map(|&r| AnsToken::new(0, r)).collect()
}
pub(crate) fn build_ans_modular_code(residuals: &[u32]) -> (Vec<AnsToken>, OwnedAnsEntropyCode) {
let tokens = build_ans_tokens(residuals);
let code = build_entropy_code_ans(&tokens, 1); (tokens, code)
}
pub(crate) fn write_ans_modular_header(
writer: &mut BitWriter,
code: &OwnedAnsEntropyCode,
) -> Result<()> {
assert_eq!(
code.histograms.len(),
1,
"modular ANS header only supports single-distribution (single-leaf tree)"
);
writer.write(1, 0)?;
writer.write(1, 0)?;
let las = code.log_alpha_size;
writer.write(2, (las - 5) as u64)?;
let config = code
.uint_configs
.first()
.copied()
.unwrap_or(crate::entropy_coding::hybrid_uint::HybridUintConfig::default_config());
let se_bits = ceil_log2_nonzero(las as u32 + 1);
writer.write(se_bits as usize, config.split_exponent as u64)?;
if (config.split_exponent as usize) != las {
let msb_bits = ceil_log2_nonzero(config.split_exponent + 1);
writer.write(msb_bits as usize, config.msb_in_token as u64)?;
let lsb_bits = ceil_log2_nonzero(config.split_exponent - config.msb_in_token + 1);
writer.write(lsb_bits as usize, config.lsb_in_token as u64)?;
}
code.histograms[0].write(writer)?;
Ok(())
}
fn ceil_log2_nonzero(x: u32) -> u32 {
debug_assert!(x > 0);
let floor = 31 - x.leading_zeros();
if x.is_power_of_two() {
floor
} else {
floor + 1
}
}
pub(crate) fn write_ans_modular_tokens(
writer: &mut BitWriter,
tokens: &[AnsToken],
code: &OwnedAnsEntropyCode,
) -> Result<()> {
write_tokens_ans(tokens, code, None, writer)?;
Ok(())
}
pub(crate) const K_LZ77_MIN_SYMBOL: usize = 224;
pub(crate) fn build_sparse_histogram(tokens: &[Token]) -> Vec<u64> {
let total_symbols = K_LZ77_MIN_SYMBOL + K_NUM_LZ77;
let mut counts = vec![0u64; total_symbols];
for token in tokens {
match token {
Token::Raw(value) => {
let (tok, _, _) = encode_hybrid_uint_000(*value);
if (tok as usize) < total_symbols {
counts[tok as usize] += 1;
}
}
Token::Lz77Run(count) => {
let adjusted = count - K_LZ77_MIN_LENGTH;
let (tok, _, _) = encode_hybrid_uint_lz77_length(adjusted as u32);
let symbol = K_LZ77_MIN_SYMBOL + tok as usize;
if symbol < total_symbols {
counts[symbol] += 1;
}
let (dist_tok, _, _) = encode_hybrid_uint_000(1);
counts[dist_tok as usize] += 1;
}
}
}
counts
}
#[allow(dead_code)]
fn compute_code_lengths(counts: &[u64], max_len: u8) -> Vec<u8> {
let n = counts.len();
if n == 0 {
return vec![];
}
let num_used: usize = counts.iter().filter(|&&c| c > 0).count();
if num_used == 0 {
return vec![0; n];
}
if num_used == 1 {
let mut depths = vec![0u8; n];
for (i, &c) in counts.iter().enumerate() {
if c > 0 {
depths[i] = 1;
break;
}
}
return depths;
}
let histogram: Vec<u32> = counts.iter().map(|&c| c as u32).collect();
create_huffman_tree(&histogram, max_len)
}
pub(crate) fn write_varlen_u16(writer: &mut BitWriter, value: u16) -> Result<()> {
if value == 0 {
writer.write(1, 0)?;
} else {
writer.write(1, 1)?;
let nbits = (16 - value.leading_zeros()) as usize - 1;
writer.write(4, nbits as u64)?;
writer.write(nbits, (value - (1u16 << nbits)) as u64)?;
}
Ok(())
}
#[inline]
fn add_log2_ceil(x: u32) -> u32 {
if x >= 0x80000000 {
32
} else {
(x + 1).next_power_of_two().trailing_zeros()
}
}
#[inline]
#[allow(dead_code)] fn ceil_log2(n: u32) -> u32 {
if n <= 1 {
0
} else {
32 - (n - 1).leading_zeros()
}
}
pub(crate) fn write_integer_config(
writer: &mut BitWriter,
log_alphabet_size: u32,
split_exponent: u32,
msb_in_token: u32,
lsb_in_token: u32,
) -> Result<()> {
let split_exponent_bits = add_log2_ceil(log_alphabet_size) as usize;
writer.write(split_exponent_bits, split_exponent as u64)?;
if split_exponent != log_alphabet_size {
let msb_bits = add_log2_ceil(split_exponent) as usize;
writer.write(msb_bits, msb_in_token as u64)?;
let lsb_bits = add_log2_ceil(split_exponent.saturating_sub(msb_in_token)) as usize;
writer.write(lsb_bits, lsb_in_token as u64)?;
}
Ok(())
}
#[allow(dead_code)] fn write_varint16(writer: &mut BitWriter, value: u16) -> Result<()> {
if value == 0 {
writer.write(1, 0)?;
} else if value == 1 {
writer.write(1, 1)?;
writer.write(4, 0)?;
} else {
writer.write(1, 1)?;
let nbits = 15 - value.leading_zeros() as usize;
let mantissa = value - (1 << nbits);
writer.write(4, nbits as u64)?;
writer.write(nbits, mantissa as u64)?;
}
Ok(())
}
pub(crate) fn write_sparse_lz77_histogram(
writer: &mut BitWriter,
sparse_counts: &[u64],
) -> Result<(Vec<u8>, Vec<u16>)> {
crate::trace::debug_eprintln!("SPARSE_HIST: Writing LZ77-enabled histogram");
writer.write(1, 1)?;
crate::trace::debug_eprintln!(
"SPARSE_HIST [bit {}]: lz77.enabled = 1",
writer.bits_written()
);
writer.write(2, 0)?; crate::trace::debug_eprintln!(
"SPARSE_HIST [bit {}]: min_symbol = 224",
writer.bits_written()
);
writer.write(2, 2)?; writer.write(2, 2)?; crate::trace::debug_eprintln!(
"SPARSE_HIST [bit {}]: min_length = 7",
writer.bits_written()
);
const LZ77_LENGTH_LOG_ALPHA: u32 = 8;
write_integer_config(writer, LZ77_LENGTH_LOG_ALPHA, 0, 0, 0)?;
crate::trace::debug_eprintln!(
"SPARSE_HIST [bit {}]: length_uint_config = {{0, 0, 0}}",
writer.bits_written()
);
writer.write(1, 1)?; writer.write(2, 0)?; crate::trace::debug_eprintln!(
"SPARSE_HIST [bit {}]: context_map (is_simple=1, bits=0)",
writer.bits_written()
);
let _max_raw_symbol = sparse_counts[..K_NUM_RAW_SYMBOLS]
.iter()
.enumerate()
.filter(|(_, c)| **c > 0)
.map(|(i, _)| i)
.max()
.unwrap_or(0);
let max_lz77_symbol = sparse_counts[K_LZ77_MIN_SYMBOL..]
.iter()
.enumerate()
.filter(|(_, c)| **c > 0)
.map(|(i, _)| K_LZ77_MIN_SYMBOL + i)
.max()
.unwrap_or(K_LZ77_MIN_SYMBOL);
crate::trace::debug_eprintln!(
"SPARSE_HIST: max_raw={}, max_lz77={} (count at lz77={})",
_max_raw_symbol,
max_lz77_symbol,
sparse_counts.get(K_LZ77_MIN_SYMBOL).unwrap_or(&0)
);
let histogram: Vec<u32> = sparse_counts.iter().map(|&c| c as u32).collect();
let _num_used: usize = histogram.iter().filter(|&&c| c > 0).count();
crate::trace::debug_eprintln!("SPARSE_HIST: {} used symbols", _num_used);
writer.write(1, 1)?;
crate::trace::debug_eprintln!(
"SPARSE_HIST [bit {}]: use_prefix_code = 1",
writer.bits_written()
);
let alphabet_size = max_lz77_symbol + 1;
const LOG_ALPHABET_SIZE_PREFIX: u32 = 15;
write_integer_config(writer, LOG_ALPHABET_SIZE_PREFIX, 0, 0, 0)?;
crate::trace::debug_eprintln!(
"SPARSE_HIST [bit {}]: uint_config (log_alpha={}, split_exp=0)",
writer.bits_written(),
LOG_ALPHABET_SIZE_PREFIX
);
write_varlen_u16(writer, (alphabet_size - 1) as u16)?;
crate::trace::debug_eprintln!(
"SPARSE_HIST [bit {}]: alphabet_size = {} (max_symbol={})",
writer.bits_written(),
alphabet_size,
alphabet_size - 1
);
let (depths, codes) = if alphabet_size > 1 {
let table = build_and_store_huffman_tree(&histogram[..alphabet_size], writer)?;
crate::trace::debug_eprintln!(
"SPARSE_HIST [bit {}]: After Huffman table",
writer.bits_written()
);
(table.depths, table.codes)
} else {
(vec![0u8; alphabet_size], vec![0u16; alphabet_size])
};
Ok((depths, codes))
}