use crate::bit_writer::BitWriter;
use crate::error::Result;
const CODE_LENGTH_CODES: usize = 18;
const MAX_CODE_DEPTH: u8 = 15;
const MAX_CODE_LENGTH_DEPTH: u8 = 5;
const STORAGE_ORDER: [usize; CODE_LENGTH_CODES] =
[1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15];
const DEPTH_CODE_SYMBOLS: [u8; 6] = [0, 7, 3, 2, 1, 15];
const DEPTH_CODE_BIT_LENGTHS: [u8; 6] = [2, 4, 3, 2, 2, 4];
#[derive(Clone, Copy, Debug)]
struct HuffmanNode {
total_count: u32,
index_left: i32,
index_right_or_value: i32,
}
impl HuffmanNode {
fn leaf(count: u32, symbol: usize) -> Self {
Self {
total_count: count,
index_left: -1,
index_right_or_value: symbol as i32,
}
}
fn sentinel() -> Self {
Self {
total_count: u32::MAX,
index_left: -1,
index_right_or_value: -1,
}
}
#[allow(dead_code)]
fn is_leaf(&self) -> bool {
self.index_left < 0
}
}
fn set_depth(pool: &[HuffmanNode], root_idx: usize, depth: &mut [u8]) {
let mut stack: Vec<(usize, u8)> = Vec::with_capacity(64);
stack.push((root_idx, 0));
while let Some((idx, level)) = stack.pop() {
let node = &pool[idx];
if node.index_left >= 0 {
let next_level = level + 1;
stack.push((node.index_left as usize, next_level));
stack.push((node.index_right_or_value as usize, next_level));
} else {
depth[node.index_right_or_value as usize] = level;
}
}
}
pub fn create_huffman_tree(histogram: &[u32], tree_limit: u8) -> Vec<u8> {
let length = histogram.len();
let mut depth = vec![0u8; length];
let mut count_limit: u32 = 1;
loop {
let mut tree: Vec<HuffmanNode> = Vec::with_capacity(2 * length + 1);
for i in (0..length).rev() {
if histogram[i] > 0 {
let count = histogram[i].max(count_limit.saturating_sub(1));
tree.push(HuffmanNode::leaf(count, i));
}
}
let n = tree.len();
if n == 0 {
return depth;
}
if n == 1 {
depth[tree[0].index_right_or_value as usize] = 1;
return depth;
}
tree.sort_by(|a, b| {
if a.total_count != b.total_count {
a.total_count.cmp(&b.total_count)
} else {
b.index_right_or_value.cmp(&a.index_right_or_value)
}
});
tree.push(HuffmanNode::sentinel());
tree.push(HuffmanNode::sentinel());
let mut i = 0;
let mut j = n + 1;
for _ in 0..n - 1 {
let left = if tree[i].total_count <= tree[j].total_count {
let idx = i;
i += 1;
idx
} else {
let idx = j;
j += 1;
idx
};
let right = if tree[i].total_count <= tree[j].total_count {
let idx = i;
i += 1;
idx
} else {
let idx = j;
j += 1;
idx
};
let j_end = tree.len() - 1;
tree[j_end].total_count = tree[left].total_count + tree[right].total_count;
tree[j_end].index_left = left as i32;
tree[j_end].index_right_or_value = right as i32;
tree.push(HuffmanNode::sentinel());
}
depth.fill(0);
set_depth(&tree, 2 * n - 1, &mut depth);
if depth.iter().copied().max().unwrap_or(0) <= tree_limit {
break;
}
count_limit = match count_limit.checked_mul(2) {
Some(v) => v,
None => break, };
}
depth
}
fn reverse_bits(num_bits: u8, bits: u16) -> u16 {
const LUT: [u16; 16] = [
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
];
let mut retval = LUT[(bits & 0xf) as usize];
let mut bits = bits;
let mut i = 4i32;
while i < num_bits as i32 {
retval <<= 4;
bits >>= 4;
retval |= LUT[(bits & 0xf) as usize];
i += 4;
}
retval >>= (-(num_bits as i32) & 0x3) as u32;
retval
}
pub fn convert_bit_depths_to_symbols(depth: &[u8]) -> Vec<u16> {
const MAX_BITS: usize = 16;
let len = depth.len();
let mut bits = vec![0u16; len];
let mut bl_count = [0u16; MAX_BITS];
for &d in depth {
bl_count[d as usize] += 1;
}
bl_count[0] = 0;
let mut next_code = [0u16; MAX_BITS];
let mut code = 0i32;
for i in 1..MAX_BITS {
code = (code + bl_count[i - 1] as i32) << 1;
next_code[i] = code as u16;
}
for i in 0..len {
let d = depth[i];
if d > 0 {
bits[i] = reverse_bits(d, next_code[d as usize]);
next_code[d as usize] += 1;
}
}
bits
}
#[derive(Debug, Clone)]
pub struct CompressedTree {
pub codes: Vec<u8>,
pub extra_bits: Vec<u8>,
}
fn reverse_slice(v: &mut [u8], start: usize, end: usize) {
let mut s = start;
let mut e = end - 1;
while s < e {
v.swap(s, e);
s += 1;
e -= 1;
}
}
fn write_repetitions_zeros(mut repetitions: usize, tree: &mut CompressedTree) {
if repetitions == 11 {
tree.codes.push(0);
tree.extra_bits.push(0);
repetitions -= 1;
}
if repetitions < 3 {
for _ in 0..repetitions {
tree.codes.push(0);
tree.extra_bits.push(0);
}
} else {
repetitions -= 3;
let start = tree.codes.len();
loop {
tree.codes.push(17);
tree.extra_bits.push((repetitions & 0x7) as u8);
repetitions >>= 3;
if repetitions == 0 {
break;
}
repetitions -= 1;
}
let end = tree.codes.len();
reverse_slice(&mut tree.codes, start, end);
reverse_slice(&mut tree.extra_bits, start, end);
}
}
fn write_repetitions_nonzero(
previous_value: u8,
value: u8,
mut repetitions: usize,
tree: &mut CompressedTree,
) {
debug_assert!(repetitions > 0);
if previous_value != value {
tree.codes.push(value);
tree.extra_bits.push(0);
repetitions -= 1;
}
if repetitions == 7 {
tree.codes.push(value);
tree.extra_bits.push(0);
repetitions -= 1;
}
if repetitions < 3 {
for _ in 0..repetitions {
tree.codes.push(value);
tree.extra_bits.push(0);
}
} else {
repetitions -= 3;
let start = tree.codes.len();
loop {
tree.codes.push(16);
tree.extra_bits.push((repetitions & 0x3) as u8);
repetitions >>= 2;
if repetitions == 0 {
break;
}
repetitions -= 1;
}
let end = tree.codes.len();
reverse_slice(&mut tree.codes, start, end);
reverse_slice(&mut tree.extra_bits, start, end);
}
}
fn decide_rle_use(depth: &[u8]) -> (bool, bool) {
let mut total_reps_zero = 0usize;
let mut total_reps_non_zero = 0usize;
let mut count_reps_zero = 1usize;
let mut count_reps_non_zero = 1usize;
let mut i = 0;
while i < depth.len() {
let value = depth[i];
let mut reps = 1usize;
while i + reps < depth.len() && depth[i + reps] == value {
reps += 1;
}
if reps >= 3 && value == 0 {
total_reps_zero += reps;
count_reps_zero += 1;
}
if reps >= 4 && value != 0 {
total_reps_non_zero += reps;
count_reps_non_zero += 1;
}
i += reps;
}
let use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
let use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
(use_rle_for_non_zero, use_rle_for_zero)
}
pub fn write_huffman_tree(depth: &[u8]) -> CompressedTree {
let mut tree = CompressedTree {
codes: Vec::new(),
extra_bits: Vec::new(),
};
let mut new_length = depth.len();
while new_length > 0 && depth[new_length - 1] == 0 {
new_length -= 1;
}
if new_length == 0 {
return tree;
}
let (use_rle_for_non_zero, use_rle_for_zero) = if depth.len() > 50 {
decide_rle_use(&depth[..new_length])
} else {
(false, false)
};
let mut previous_value = 8u8; let mut i = 0;
while i < new_length {
let value = depth[i];
let mut reps = 1usize;
if (value != 0 && use_rle_for_non_zero) || (value == 0 && use_rle_for_zero) {
while i + reps < new_length && depth[i + reps] == value {
reps += 1;
}
}
if value == 0 {
write_repetitions_zeros(reps, &mut tree);
} else {
write_repetitions_nonzero(previous_value, value, reps, &mut tree);
previous_value = value;
}
i += reps;
}
tree
}
pub fn store_meta_huffman_tree(
num_codes: usize,
code_length_depths: &[u8; CODE_LENGTH_CODES],
writer: &mut BitWriter,
) -> Result<()> {
let mut codes_to_store = CODE_LENGTH_CODES;
if num_codes > 1 {
while codes_to_store > 0 && code_length_depths[STORAGE_ORDER[codes_to_store - 1]] == 0 {
codes_to_store -= 1;
}
}
let mut skip_some = 0usize;
if code_length_depths[STORAGE_ORDER[0]] == 0 && code_length_depths[STORAGE_ORDER[1]] == 0 {
skip_some = 2;
if code_length_depths[STORAGE_ORDER[2]] == 0 {
skip_some = 3;
}
}
crate::trace::debug_eprintln!(
"STORE_META: codes_to_store={}, skip_some={}",
codes_to_store,
skip_some
);
let _bit_pos_start = writer.bits_written();
writer.write(2, skip_some as u64)?;
crate::trace::debug_eprintln!(
" META: wrote hskip={} at bit {}",
skip_some,
_bit_pos_start
);
let mut _bitacc = 0usize;
#[allow(clippy::unused_enumerate_index)]
for (_idx, &symbol) in STORAGE_ORDER[skip_some..codes_to_store].iter().enumerate() {
let depth_value = code_length_depths[symbol] as usize;
debug_assert!(depth_value <= 5, "Code length depth must be 0-5");
let bits = DEPTH_CODE_BIT_LENGTHS[depth_value] as usize;
let code = DEPTH_CODE_SYMBOLS[depth_value] as u64;
writer.write(bits, code)?;
if depth_value > 0 {
_bitacc += 32 >> depth_value;
}
crate::trace::debug_eprintln!(
" META[{}]: symbol={}, depth={}, code=0b{:0width$b} ({} bits), bitacc={}",
skip_some + _idx,
symbol,
depth_value,
code,
bits,
_bitacc,
width = bits
);
}
crate::trace::debug_eprintln!(" META: final bitacc={} (should be 32)", _bitacc);
Ok(())
}
pub fn store_compressed_tree(
tree: &CompressedTree,
code_length_depths: &[u8; CODE_LENGTH_CODES],
code_length_codes: &[u16; CODE_LENGTH_CODES],
writer: &mut BitWriter,
) -> Result<()> {
crate::trace::debug_eprintln!(" TREE: meta_codes = {:?}", &code_length_codes[..5]);
crate::trace::debug_eprintln!(" TREE: meta_depths = {:?}", &code_length_depths[..5]);
for i in 0..tree.codes.len().min(10) {
let ix = tree.codes[i] as usize;
debug_assert!(ix < CODE_LENGTH_CODES);
let depth = code_length_depths[ix] as usize;
let code = code_length_codes[ix] as u64;
let _bit_pos = writer.bits_written();
if depth > 0 {
writer.write(depth, code)?;
}
crate::trace::debug_eprintln!(
" TREE[{}]: code_len_code={}, meta_depth={}, meta_code=0b{:b}, extra_bits={}",
i,
ix,
depth,
code,
tree.extra_bits[i]
);
match ix {
16 => writer.write(2, tree.extra_bits[i] as u64)?,
17 => writer.write(3, tree.extra_bits[i] as u64)?,
_ => {}
}
}
for i in 10..tree.codes.len() {
let ix = tree.codes[i] as usize;
let depth = code_length_depths[ix] as usize;
let code = code_length_codes[ix] as u64;
if depth > 0 {
writer.write(depth, code)?;
}
match ix {
16 => writer.write(2, tree.extra_bits[i] as u64)?,
17 => writer.write(3, tree.extra_bits[i] as u64)?,
_ => {}
}
}
Ok(())
}
pub fn store_huffman_tree(depths: &[u8], writer: &mut BitWriter) -> Result<()> {
let _first_10: Vec<u8> = depths.iter().take(10).copied().collect();
let _last_10: Vec<u8> = depths.iter().rev().take(10).rev().copied().collect();
crate::trace::debug_eprintln!(
"STORE_HUFF: depths len={}, first_10={:?}, last_10={:?}",
depths.len(),
_first_10,
_last_10
);
let compressed = write_huffman_tree(depths);
crate::trace::debug_eprintln!(
"STORE_HUFF: compressed codes={:?}, extra_bits={:?}",
compressed.codes,
compressed.extra_bits
);
let mut histogram = [0u32; CODE_LENGTH_CODES];
for &code in &compressed.codes {
histogram[code as usize] += 1;
}
crate::trace::debug_eprintln!("STORE_HUFF: code_length_histogram={:?}", histogram);
let mut num_codes = 0;
let mut single_code = 0;
for (i, &count) in histogram.iter().enumerate() {
if count > 0 {
if num_codes == 0 {
single_code = i;
num_codes = 1;
} else if num_codes == 1 {
num_codes = 2;
break;
}
}
}
let code_length_depths = create_huffman_tree(&histogram, MAX_CODE_LENGTH_DEPTH);
let mut code_length_depths_arr = [0u8; CODE_LENGTH_CODES];
code_length_depths_arr.copy_from_slice(&code_length_depths);
let code_length_codes_vec = convert_bit_depths_to_symbols(&code_length_depths);
let mut code_length_codes_arr = [0u16; CODE_LENGTH_CODES];
code_length_codes_arr.copy_from_slice(&code_length_codes_vec);
crate::trace::debug_eprintln!(
"STORE_HUFF: num_codes={}, meta_depths={:?}",
num_codes,
code_length_depths_arr
);
store_meta_huffman_tree(num_codes, &code_length_depths_arr, writer)?;
if num_codes == 1 {
code_length_depths_arr[single_code] = 0;
}
store_compressed_tree(
&compressed,
&code_length_depths_arr,
&code_length_codes_arr,
writer,
)?;
Ok(())
}
pub struct HuffmanTable {
pub depths: Vec<u8>,
pub codes: Vec<u16>,
}
pub fn build_and_store_huffman_tree(
histogram: &[u32],
writer: &mut BitWriter,
) -> Result<HuffmanTable> {
let length = histogram.len();
let nonzero: Vec<(usize, u32)> = histogram
.iter()
.enumerate()
.filter(|&(_, h)| *h > 0)
.map(|(i, h)| (i, *h))
.collect();
crate::trace::debug_eprintln!(
"HUFFMAN_BUILD: alphabet_size={}, nonzero_symbols={:?}",
length,
nonzero
);
let mut count = 0usize;
let mut s4 = [0usize; 4];
for (i, &h) in histogram.iter().enumerate() {
if h > 0 {
if count < 4 {
s4[count] = i;
}
count += 1;
if count > 4 {
break; }
}
}
let max_bits = if length <= 1 {
0
} else {
(usize::BITS - (length - 1).leading_zeros()) as usize
};
if count <= 1 {
let mut depths = vec![0u8; length];
let codes = vec![0u16; length];
writer.write(4, 1)?;
writer.write(max_bits, s4[0] as u64)?;
if count == 1 {
depths[s4[0]] = 0;
}
return Ok(HuffmanTable { depths, codes });
}
let depths = create_huffman_tree(histogram, MAX_CODE_DEPTH);
let codes = convert_bit_depths_to_symbols(&depths);
let _depths_info: Vec<(usize, u8, u16)> = nonzero
.iter()
.map(|(i, _)| (*i, depths[*i], codes[*i]))
.collect();
crate::trace::debug_eprintln!(
"HUFFMAN_BUILD: depths/codes for used symbols: {:?}",
_depths_info
);
if count <= 4 {
crate::trace::debug_eprintln!("HUFFMAN_BUILD: using simple code for {} symbols", count);
store_simple_huffman_tree(&depths, &mut s4, count, max_bits, writer)?;
} else {
crate::trace::debug_eprintln!("HUFFMAN_BUILD: using full table for {} symbols", count);
store_huffman_tree(&depths, writer)?;
}
Ok(HuffmanTable { depths, codes })
}
fn store_simple_huffman_tree(
depths: &[u8],
symbols: &mut [usize; 4],
num_symbols: usize,
max_bits: usize,
writer: &mut BitWriter,
) -> Result<()> {
writer.write(2, 1)?;
writer.write(2, (num_symbols - 1) as u64)?;
for i in 0..num_symbols {
for j in i + 1..num_symbols {
if depths[symbols[j]] < depths[symbols[i]] {
symbols.swap(i, j);
}
}
}
for &sym in symbols.iter().take(num_symbols) {
writer.write(max_bits, sym as u64)?;
}
if num_symbols == 4 {
let tree_select = if depths[symbols[0]] == 1 { 1 } else { 0 };
writer.write(1, tree_select)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_huffman_tree_single_symbol() {
let histogram = [0, 0, 100, 0, 0];
let depths = create_huffman_tree(&histogram, 15);
assert_eq!(depths, vec![0, 0, 1, 0, 0]);
}
#[test]
fn test_create_huffman_tree_two_symbols() {
let histogram = [50, 0, 50, 0, 0];
let depths = create_huffman_tree(&histogram, 15);
assert_eq!(depths[0], 1);
assert_eq!(depths[2], 1);
}
#[test]
fn test_create_huffman_tree_four_symbols() {
let histogram = [10, 20, 30, 40, 0];
let depths = create_huffman_tree(&histogram, 15);
assert!(depths[3] <= depths[2]);
assert!(depths[2] <= depths[1]);
assert!(depths[1] <= depths[0]);
}
#[test]
fn test_create_huffman_tree_many_symbols() {
let histogram = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512];
let depths = create_huffman_tree(&histogram, 15);
for (i, &d) in depths.iter().enumerate() {
assert!(d > 0, "Symbol {} should have non-zero depth", i);
}
for i in 1..10 {
assert!(
depths[i] <= depths[i - 1],
"Symbol {} (freq {}) should have depth <= symbol {} (freq {})",
i,
histogram[i],
i - 1,
histogram[i - 1]
);
}
}
#[test]
fn test_create_huffman_tree_depth_limit() {
let histogram = vec![1u32; 100];
let depths = create_huffman_tree(&histogram, 15);
for &d in &depths {
assert!(d <= 15, "Depth {} exceeds limit of 15", d);
}
}
#[test]
fn test_reverse_bits() {
assert_eq!(reverse_bits(1, 0), 0);
assert_eq!(reverse_bits(1, 1), 1);
assert_eq!(reverse_bits(2, 0b01), 0b10);
assert_eq!(reverse_bits(2, 0b10), 0b01);
assert_eq!(reverse_bits(3, 0b001), 0b100);
assert_eq!(reverse_bits(4, 0b0001), 0b1000);
assert_eq!(reverse_bits(8, 0b10101010), 0b01010101);
}
#[test]
fn test_convert_bit_depths_to_symbols() {
let depths = [1, 1, 0];
let codes = convert_bit_depths_to_symbols(&depths);
assert_eq!(codes[0], 0);
assert_eq!(codes[1], 1);
}
#[test]
fn test_convert_bit_depths_three_symbols() {
let depths = [1, 2, 2, 0];
let codes = convert_bit_depths_to_symbols(&depths);
assert_eq!(codes[0], 0b0);
assert_eq!(codes[1], 0b01);
assert_eq!(codes[2], 0b11);
}
#[test]
fn test_write_huffman_tree_simple() {
let depths = [2, 2, 0, 0];
let tree = write_huffman_tree(&depths);
assert_eq!(tree.codes, vec![2, 2]);
assert!(tree.extra_bits.iter().all(|&x| x == 0));
}
#[test]
fn test_write_huffman_tree_with_zeros() {
let depths = [3, 0, 0, 0, 3];
let tree = write_huffman_tree(&depths);
assert!(!tree.codes.is_empty());
}
#[test]
fn test_write_huffman_tree_long_zeros() {
let mut depths = vec![0u8; 100];
depths[0] = 3;
depths[99] = 3;
let tree = write_huffman_tree(&depths);
assert!(tree.codes.contains(&17));
}
#[test]
fn test_build_and_store_single_symbol() {
let histogram = [0, 0, 100, 0, 0];
let mut writer = BitWriter::new();
let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
assert_eq!(table.depths[2], 0);
let bytes = writer.finish_with_padding();
assert!(!bytes.is_empty());
}
#[test]
fn test_build_and_store_two_symbols() {
let histogram = [10, 0, 20, 0, 0];
let mut writer = BitWriter::new();
let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
assert_eq!(table.depths[0], 1);
assert_eq!(table.depths[2], 1);
let bytes = writer.finish_with_padding();
assert!(!bytes.is_empty());
}
#[test]
fn test_build_and_store_many_symbols() {
let histogram: Vec<u32> = (1..=10).map(|x| x * 10).collect();
let mut writer = BitWriter::new();
let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
for i in 0..10 {
assert!(table.depths[i] > 0);
}
let bytes = writer.finish_with_padding();
assert!(!bytes.is_empty());
}
#[test]
fn test_build_and_store_256_symbols() {
let mut histogram = vec![0u32; 256];
for (i, h) in histogram.iter_mut().enumerate() {
*h = (i as u32 + 1) * 2;
}
let mut writer = BitWriter::new();
let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
for i in 0..256 {
assert!(table.depths[i] > 0 && table.depths[i] <= 15);
}
let bytes = writer.finish_with_padding();
assert!(!bytes.is_empty());
}
#[test]
fn test_canonical_codes_are_prefix_free() {
let histogram: Vec<u32> = (1..=20).map(|x| x * 5).collect();
let depths = create_huffman_tree(&histogram, 15);
let codes = convert_bit_depths_to_symbols(&depths);
for i in 0..20 {
if depths[i] == 0 {
continue;
}
for j in i + 1..20 {
if depths[j] == 0 {
continue;
}
let (shorter, longer) = if depths[i] <= depths[j] {
((codes[i], depths[i]), (codes[j], depths[j]))
} else {
((codes[j], depths[j]), (codes[i], depths[i]))
};
let prefix_mask = (1u16 << shorter.1) - 1;
let longer_prefix = longer.0 & prefix_mask;
assert_ne!(
shorter.0, longer_prefix,
"Code {} (depth {}) is prefix of code {} (depth {})",
shorter.0, shorter.1, longer.0, longer.1
);
}
}
}
}