jxl_encoder/entropy_coding/huffman_tree.rs
1// Copyright (c) Imazen LLC and the JPEG XL Project Authors.
2// Algorithms and constants derived from libjxl (BSD-3-Clause).
3// Licensed under AGPL-3.0-or-later. Commercial licenses at https://www.imazen.io/pricing
4
5//! Full Huffman tree encoder supporting arbitrary alphabet sizes.
6//!
7//! This module ports libjxl's `enc_huffman_tree.cc` and parts of `enc_huffman.cc`.
8//! It supports encoding Huffman tables with more than 4 symbols using the
9//! code length table format with RLE compression.
10//!
11//! # Architecture
12//! 1. `create_huffman_tree` - builds optimal tree, outputs depth array
13//! 2. `convert_bit_depths_to_symbols` - converts depths to canonical codes
14//! 3. `write_huffman_tree` - RLE-compresses depth array for storage
15//! 4. `store_huffman_tree` - writes compressed tree to bitstream
16
17use crate::bit_writer::BitWriter;
18use crate::error::Result;
19
20/// Number of code length codes (0-15 for depths, 16-17 for RLE).
21const CODE_LENGTH_CODES: usize = 18;
22
23/// Maximum code depth for regular Huffman codes.
24const MAX_CODE_DEPTH: u8 = 15;
25
26/// Maximum code depth for code length codes (meta-Huffman).
27const MAX_CODE_LENGTH_DEPTH: u8 = 5;
28
29/// Order in which code length codes are stored.
30/// This ordering groups commonly-used codes together to allow truncation.
31///
32/// ```cpp
33/// // libjxl: enc_huffman.cc
34/// static const uint8_t kStorageOrder[kCodeLengthCodes] = {
35/// 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15};
36/// ```
37const STORAGE_ORDER: [usize; CODE_LENGTH_CODES] =
38 [1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15];
39
40/// Static Huffman code for encoding code length code depths.
41/// Maps depth values 0-5 to their codes.
42///
43/// ```cpp
44/// // libjxl: enc_huffman.cc
45/// // Symbol Code
46/// // ------ ----
47/// // 0 00
48/// // 1 1110
49/// // 2 110
50/// // 3 01
51/// // 4 10
52/// // 5 1111
53/// static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {0, 7, 3, 2, 1, 15};
54/// static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {2, 4, 3, 2, 2, 4};
55/// ```
56const DEPTH_CODE_SYMBOLS: [u8; 6] = [0, 7, 3, 2, 1, 15];
57const DEPTH_CODE_BIT_LENGTHS: [u8; 6] = [2, 4, 3, 2, 2, 4];
58
59// ============================================================================
60// Huffman Tree Building
61// ============================================================================
62
63/// Node in the Huffman tree during construction.
64///
65/// ```cpp
66/// // libjxl: enc_huffman_tree.h
67/// struct HuffmanTree {
68/// uint32_t total_count;
69/// int16_t index_left;
70/// int16_t index_right_or_value;
71/// };
72/// ```
73#[derive(Clone, Copy, Debug)]
74struct HuffmanNode {
75 /// Total count (frequency) of this subtree.
76 total_count: u32,
77 /// Index of left child, or -1 if leaf.
78 index_left: i32,
79 /// Index of right child, or symbol value if leaf.
80 /// Note: libjxl uses i16, but we need i32 for alphabets > 32K symbols.
81 index_right_or_value: i32,
82}
83
84impl HuffmanNode {
85 fn leaf(count: u32, symbol: usize) -> Self {
86 Self {
87 total_count: count,
88 index_left: -1,
89 index_right_or_value: symbol as i32,
90 }
91 }
92
93 fn sentinel() -> Self {
94 Self {
95 total_count: u32::MAX,
96 index_left: -1,
97 index_right_or_value: -1,
98 }
99 }
100
101 #[allow(dead_code)]
102 fn is_leaf(&self) -> bool {
103 self.index_left < 0
104 }
105}
106
107/// Recursively set depth for all symbols in a subtree.
108///
109/// ```cpp
110/// // libjxl: enc_huffman_tree.cc
111/// void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth, uint8_t level) {
112/// if (p.index_left >= 0) {
113/// ++level;
114/// SetDepth(pool[p.index_left], pool, depth, level);
115/// SetDepth(pool[p.index_right_or_value], pool, depth, level);
116/// } else {
117/// depth[p.index_right_or_value] = level;
118/// }
119/// }
120/// ```
121fn set_depth(pool: &[HuffmanNode], root_idx: usize, depth: &mut [u8]) {
122 // Iterative DFS to avoid stack overflow on deep/degenerate Huffman trees.
123 // A degenerate tree with N leaves can have depth N-1, which overflows the
124 // call stack for N > ~10K symbols (common with patch position histograms).
125 let mut stack: Vec<(usize, u8)> = Vec::with_capacity(64);
126 stack.push((root_idx, 0));
127
128 while let Some((idx, level)) = stack.pop() {
129 let node = &pool[idx];
130 if node.index_left >= 0 {
131 let next_level = level + 1;
132 stack.push((node.index_left as usize, next_level));
133 stack.push((node.index_right_or_value as usize, next_level));
134 } else {
135 depth[node.index_right_or_value as usize] = level;
136 }
137 }
138}
139
140/// Creates an optimal Huffman tree from symbol frequencies.
141///
142/// Returns the depth (code length) for each symbol. Symbols with zero
143/// frequency get depth 0 (meaning they don't appear in the tree).
144///
145/// The tree depth is limited to `tree_limit` bits. If the natural tree
146/// would exceed this, the algorithm retries with inflated minimum counts.
147///
148/// ```cpp
149/// // libjxl: enc_huffman_tree.cc
150/// void CreateHuffmanTree(const uint32_t* data, const size_t length,
151/// const int tree_limit, uint8_t* depth) {
152/// for (uint32_t count_limit = 1;; count_limit *= 2) {
153/// std::vector<HuffmanTree> tree;
154/// tree.reserve(2 * length + 1);
155/// // ... build tree ...
156/// if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
157/// break;
158/// }
159/// }
160/// }
161/// ```
162pub fn create_huffman_tree(histogram: &[u32], tree_limit: u8) -> Vec<u8> {
163 let length = histogram.len();
164 let mut depth = vec![0u8; length];
165
166 // Retry with increasing count_limit until tree fits in tree_limit bits
167 let mut count_limit: u32 = 1;
168 loop {
169 // Build list of leaf nodes for non-zero symbols
170 // Process in reverse order so that ties favor lower symbol indices
171 let mut tree: Vec<HuffmanNode> = Vec::with_capacity(2 * length + 1);
172 for i in (0..length).rev() {
173 if histogram[i] > 0 {
174 // Clamp count to at least count_limit - 1 to limit tree depth
175 let count = histogram[i].max(count_limit.saturating_sub(1));
176 tree.push(HuffmanNode::leaf(count, i));
177 }
178 }
179
180 let n = tree.len();
181 if n == 0 {
182 // No symbols - return all zeros
183 return depth;
184 }
185 if n == 1 {
186 // Single symbol - give it depth 1 (will be corrected by caller)
187 depth[tree[0].index_right_or_value as usize] = 1;
188 return depth;
189 }
190
191 // Sort by count (ascending), then by symbol index (descending for ties)
192 tree.sort_by(|a, b| {
193 if a.total_count != b.total_count {
194 a.total_count.cmp(&b.total_count)
195 } else {
196 // Higher index first for ties (reverse order)
197 b.index_right_or_value.cmp(&a.index_right_or_value)
198 }
199 });
200
201 // Add sentinel nodes
202 tree.push(HuffmanNode::sentinel());
203 tree.push(HuffmanNode::sentinel());
204
205 // Build tree using two-queue algorithm
206 // i: next leaf node, j: next internal node
207 let mut i = 0;
208 let mut j = n + 1;
209
210 for _ in 0..n - 1 {
211 // Pick two smallest nodes
212 let left = if tree[i].total_count <= tree[j].total_count {
213 let idx = i;
214 i += 1;
215 idx
216 } else {
217 let idx = j;
218 j += 1;
219 idx
220 };
221
222 let right = if tree[i].total_count <= tree[j].total_count {
223 let idx = i;
224 i += 1;
225 idx
226 } else {
227 let idx = j;
228 j += 1;
229 idx
230 };
231
232 // Create parent node at the sentinel position
233 let j_end = tree.len() - 1;
234 tree[j_end].total_count = tree[left].total_count + tree[right].total_count;
235 tree[j_end].index_left = left as i32;
236 tree[j_end].index_right_or_value = right as i32;
237
238 // Add new sentinel
239 tree.push(HuffmanNode::sentinel());
240 }
241
242 // Extract depths from tree
243 depth.fill(0);
244 set_depth(&tree, 2 * n - 1, &mut depth);
245
246 // Check if tree fits in limit
247 if depth.iter().copied().max().unwrap_or(0) <= tree_limit {
248 break;
249 }
250
251 // Tree too deep - retry with higher count_limit
252 count_limit = match count_limit.checked_mul(2) {
253 Some(v) => v,
254 None => break, // u32 overflow — stop retrying, use best depth so far
255 };
256 }
257
258 depth
259}
260
261// ============================================================================
262// Bit Code Generation
263// ============================================================================
264
265/// Reverses bits in a value.
266///
267/// ```cpp
268/// // libjxl: enc_huffman_tree.cc
269/// uint16_t ReverseBits(int num_bits, uint16_t bits) {
270/// static const size_t kLut[16] = {
271/// 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
272/// 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf};
273/// size_t retval = kLut[bits & 0xf];
274/// for (int i = 4; i < num_bits; i += 4) {
275/// retval <<= 4;
276/// bits = static_cast<uint16_t>(bits >> 4);
277/// retval |= kLut[bits & 0xf];
278/// }
279/// retval >>= (-num_bits & 0x3);
280/// return static_cast<uint16_t>(retval);
281/// }
282/// ```
283fn reverse_bits(num_bits: u8, bits: u16) -> u16 {
284 const LUT: [u16; 16] = [
285 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
286 ];
287
288 let mut retval = LUT[(bits & 0xf) as usize];
289 let mut bits = bits;
290 let mut i = 4i32;
291 while i < num_bits as i32 {
292 retval <<= 4;
293 bits >>= 4;
294 retval |= LUT[(bits & 0xf) as usize];
295 i += 4;
296 }
297 retval >>= (-(num_bits as i32) & 0x3) as u32;
298 retval
299}
300
301/// Converts code depths to canonical Huffman codes.
302///
303/// Returns (codes, depths) where `codes[i]` is the bit pattern for symbol `i`.
304///
305/// ```cpp
306/// // libjxl: enc_huffman_tree.cc
307/// void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, uint16_t* bits) {
308/// const int kMaxBits = 16;
309/// uint16_t bl_count[kMaxBits] = {0};
310/// for (size_t i = 0; i < len; ++i) {
311/// ++bl_count[depth[i]];
312/// }
313/// bl_count[0] = 0;
314/// uint16_t next_code[kMaxBits];
315/// next_code[0] = 0;
316/// int code = 0;
317/// for (size_t i = 1; i < kMaxBits; ++i) {
318/// code = (code + bl_count[i - 1]) << 1;
319/// next_code[i] = static_cast<uint16_t>(code);
320/// }
321/// for (size_t i = 0; i < len; ++i) {
322/// if (depth[i]) {
323/// bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
324/// }
325/// }
326/// }
327/// ```
328pub fn convert_bit_depths_to_symbols(depth: &[u8]) -> Vec<u16> {
329 const MAX_BITS: usize = 16;
330 let len = depth.len();
331 let mut bits = vec![0u16; len];
332
333 // Count symbols at each depth
334 let mut bl_count = [0u16; MAX_BITS];
335 for &d in depth {
336 bl_count[d as usize] += 1;
337 }
338 bl_count[0] = 0; // Depth 0 means symbol doesn't exist
339
340 // Compute first code at each depth
341 let mut next_code = [0u16; MAX_BITS];
342 let mut code = 0i32;
343 for i in 1..MAX_BITS {
344 code = (code + bl_count[i - 1] as i32) << 1;
345 next_code[i] = code as u16;
346 }
347
348 // Assign codes to symbols
349 for i in 0..len {
350 let d = depth[i];
351 if d > 0 {
352 bits[i] = reverse_bits(d, next_code[d as usize]);
353 next_code[d as usize] += 1;
354 }
355 }
356
357 bits
358}
359
360// ============================================================================
361// RLE Compression of Code Lengths
362// ============================================================================
363
364/// RLE-compressed representation of a Huffman tree.
365#[derive(Debug, Clone)]
366pub struct CompressedTree {
367 /// Code length codes (0-17)
368 pub codes: Vec<u8>,
369 /// Extra bits for each code (used by codes 16 and 17)
370 pub extra_bits: Vec<u8>,
371}
372
373/// Reverse a slice in place.
374fn reverse_slice(v: &mut [u8], start: usize, end: usize) {
375 let mut s = start;
376 let mut e = end - 1;
377 while s < e {
378 v.swap(s, e);
379 s += 1;
380 e -= 1;
381 }
382}
383
384/// Write RLE-encoded repetitions of zeros.
385///
386/// ```cpp
387/// // libjxl: enc_huffman_tree.cc
388/// void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size,
389/// uint8_t* tree, uint8_t* extra_bits_data) {
390/// if (repetitions == 11) {
391/// tree[*tree_size] = 0;
392/// extra_bits_data[*tree_size] = 0;
393/// ++(*tree_size);
394/// --repetitions;
395/// }
396/// if (repetitions < 3) {
397/// for (size_t i = 0; i < repetitions; ++i) {
398/// tree[*tree_size] = 0;
399/// extra_bits_data[*tree_size] = 0;
400/// ++(*tree_size);
401/// }
402/// } else {
403/// repetitions -= 3;
404/// size_t start = *tree_size;
405/// while (true) {
406/// tree[*tree_size] = 17;
407/// extra_bits_data[*tree_size] = repetitions & 0x7;
408/// ++(*tree_size);
409/// repetitions >>= 3;
410/// if (repetitions == 0) {
411/// break;
412/// }
413/// --repetitions;
414/// }
415/// Reverse(tree, start, *tree_size);
416/// Reverse(extra_bits_data, start, *tree_size);
417/// }
418/// }
419/// ```
420fn write_repetitions_zeros(mut repetitions: usize, tree: &mut CompressedTree) {
421 // Special case: 11 reps would need code 17 with extra=8, but max extra is 7
422 if repetitions == 11 {
423 tree.codes.push(0);
424 tree.extra_bits.push(0);
425 repetitions -= 1;
426 }
427
428 if repetitions < 3 {
429 // Write individual zeros
430 for _ in 0..repetitions {
431 tree.codes.push(0);
432 tree.extra_bits.push(0);
433 }
434 } else {
435 // Use code 17: repeat zeros with 3 extra bits (3-10 reps)
436 repetitions -= 3;
437 let start = tree.codes.len();
438 loop {
439 tree.codes.push(17);
440 tree.extra_bits.push((repetitions & 0x7) as u8);
441 repetitions >>= 3;
442 if repetitions == 0 {
443 break;
444 }
445 repetitions -= 1;
446 }
447 // Reverse the codes we just wrote
448 let end = tree.codes.len();
449 reverse_slice(&mut tree.codes, start, end);
450 reverse_slice(&mut tree.extra_bits, start, end);
451 }
452}
453
454/// Write RLE-encoded repetitions of non-zero values.
455///
456/// ```cpp
457/// // libjxl: enc_huffman_tree.cc
458/// void WriteHuffmanTreeRepetitions(const uint8_t previous_value,
459/// const uint8_t value, size_t repetitions,
460/// size_t* tree_size, uint8_t* tree,
461/// uint8_t* extra_bits_data) {
462/// JXL_DASSERT(repetitions > 0);
463/// if (previous_value != value) {
464/// tree[*tree_size] = value;
465/// extra_bits_data[*tree_size] = 0;
466/// ++(*tree_size);
467/// --repetitions;
468/// }
469/// if (repetitions == 7) {
470/// tree[*tree_size] = value;
471/// extra_bits_data[*tree_size] = 0;
472/// ++(*tree_size);
473/// --repetitions;
474/// }
475/// if (repetitions < 3) {
476/// for (size_t i = 0; i < repetitions; ++i) {
477/// tree[*tree_size] = value;
478/// extra_bits_data[*tree_size] = 0;
479/// ++(*tree_size);
480/// }
481/// } else {
482/// repetitions -= 3;
483/// size_t start = *tree_size;
484/// while (true) {
485/// tree[*tree_size] = 16;
486/// extra_bits_data[*tree_size] = repetitions & 0x3;
487/// ++(*tree_size);
488/// repetitions >>= 2;
489/// if (repetitions == 0) {
490/// break;
491/// }
492/// --repetitions;
493/// }
494/// Reverse(tree, start, *tree_size);
495/// Reverse(extra_bits_data, start, *tree_size);
496/// }
497/// }
498/// ```
499fn write_repetitions_nonzero(
500 previous_value: u8,
501 value: u8,
502 mut repetitions: usize,
503 tree: &mut CompressedTree,
504) {
505 debug_assert!(repetitions > 0);
506
507 // If value changed, write it literally first
508 if previous_value != value {
509 tree.codes.push(value);
510 tree.extra_bits.push(0);
511 repetitions -= 1;
512 }
513
514 // Special case: 7 reps would need code 16 with extra=4, but max extra is 3
515 if repetitions == 7 {
516 tree.codes.push(value);
517 tree.extra_bits.push(0);
518 repetitions -= 1;
519 }
520
521 if repetitions < 3 {
522 // Write individual values
523 for _ in 0..repetitions {
524 tree.codes.push(value);
525 tree.extra_bits.push(0);
526 }
527 } else {
528 // Use code 16: repeat previous with 2 extra bits (3-6 reps)
529 repetitions -= 3;
530 let start = tree.codes.len();
531 loop {
532 tree.codes.push(16);
533 tree.extra_bits.push((repetitions & 0x3) as u8);
534 repetitions >>= 2;
535 if repetitions == 0 {
536 break;
537 }
538 repetitions -= 1;
539 }
540 // Reverse the codes we just wrote
541 let end = tree.codes.len();
542 reverse_slice(&mut tree.codes, start, end);
543 reverse_slice(&mut tree.extra_bits, start, end);
544 }
545}
546
547/// Decide whether to use RLE for zeros and non-zeros.
548///
549/// ```cpp
550/// // libjxl: enc_huffman_tree.cc
551/// static void DecideOverRleUse(const uint8_t* depth, const size_t length,
552/// bool* use_rle_for_non_zero, bool* use_rle_for_zero) {
553/// size_t total_reps_zero = 0;
554/// size_t total_reps_non_zero = 0;
555/// size_t count_reps_zero = 1;
556/// size_t count_reps_non_zero = 1;
557/// for (size_t i = 0; i < length;) {
558/// const uint8_t value = depth[i];
559/// size_t reps = 1;
560/// for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
561/// ++reps;
562/// }
563/// if (reps >= 3 && value == 0) {
564/// total_reps_zero += reps;
565/// ++count_reps_zero;
566/// }
567/// if (reps >= 4 && value != 0) {
568/// total_reps_non_zero += reps;
569/// ++count_reps_non_zero;
570/// }
571/// i += reps;
572/// }
573/// *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
574/// *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
575/// }
576/// ```
577fn decide_rle_use(depth: &[u8]) -> (bool, bool) {
578 let mut total_reps_zero = 0usize;
579 let mut total_reps_non_zero = 0usize;
580 let mut count_reps_zero = 1usize;
581 let mut count_reps_non_zero = 1usize;
582
583 let mut i = 0;
584 while i < depth.len() {
585 let value = depth[i];
586 let mut reps = 1usize;
587 while i + reps < depth.len() && depth[i + reps] == value {
588 reps += 1;
589 }
590 if reps >= 3 && value == 0 {
591 total_reps_zero += reps;
592 count_reps_zero += 1;
593 }
594 if reps >= 4 && value != 0 {
595 total_reps_non_zero += reps;
596 count_reps_non_zero += 1;
597 }
598 i += reps;
599 }
600
601 let use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
602 let use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
603 (use_rle_for_non_zero, use_rle_for_zero)
604}
605
606/// Compresses a depth array into RLE-encoded code length codes.
607///
608/// ```cpp
609/// // libjxl: enc_huffman_tree.cc
610/// void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
611/// uint8_t* tree, uint8_t* extra_bits_data) {
612/// uint8_t previous_value = 8;
613/// // Throw away trailing zeros.
614/// size_t new_length = length;
615/// for (size_t i = 0; i < length; ++i) {
616/// if (depth[length - i - 1] == 0) {
617/// --new_length;
618/// } else {
619/// break;
620/// }
621/// }
622/// // First gather statistics on if it is a good idea to do rle.
623/// bool use_rle_for_non_zero = false;
624/// bool use_rle_for_zero = false;
625/// if (length > 50) {
626/// DecideOverRleUse(depth, new_length, &use_rle_for_non_zero, &use_rle_for_zero);
627/// }
628/// // Actual rle coding.
629/// for (size_t i = 0; i < new_length;) {
630/// const uint8_t value = depth[i];
631/// size_t reps = 1;
632/// if ((value != 0 && use_rle_for_non_zero) || (value == 0 && use_rle_for_zero)) {
633/// for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
634/// ++reps;
635/// }
636/// }
637/// if (value == 0) {
638/// WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
639/// } else {
640/// WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree, extra_bits_data);
641/// previous_value = value;
642/// }
643/// i += reps;
644/// }
645/// }
646/// ```
647pub fn write_huffman_tree(depth: &[u8]) -> CompressedTree {
648 let mut tree = CompressedTree {
649 codes: Vec::new(),
650 extra_bits: Vec::new(),
651 };
652
653 // Throw away trailing zeros
654 let mut new_length = depth.len();
655 while new_length > 0 && depth[new_length - 1] == 0 {
656 new_length -= 1;
657 }
658
659 if new_length == 0 {
660 return tree;
661 }
662
663 // Decide whether to use RLE (only for longer sequences)
664 let (use_rle_for_non_zero, use_rle_for_zero) = if depth.len() > 50 {
665 decide_rle_use(&depth[..new_length])
666 } else {
667 (false, false)
668 };
669
670 // Encode with RLE
671 let mut previous_value = 8u8; // Initial "previous" value (won't match any real depth)
672 let mut i = 0;
673 while i < new_length {
674 let value = depth[i];
675 let mut reps = 1usize;
676
677 // Count repetitions if RLE is enabled for this value type
678 if (value != 0 && use_rle_for_non_zero) || (value == 0 && use_rle_for_zero) {
679 while i + reps < new_length && depth[i + reps] == value {
680 reps += 1;
681 }
682 }
683
684 if value == 0 {
685 write_repetitions_zeros(reps, &mut tree);
686 } else {
687 write_repetitions_nonzero(previous_value, value, reps, &mut tree);
688 previous_value = value;
689 }
690 i += reps;
691 }
692
693 tree
694}
695
696// ============================================================================
697// Bitstream Encoding
698// ============================================================================
699
700/// Writes the meta-Huffman tree (tree of tree) to the bitstream.
701///
702/// This encodes which code length codes (0-17) are used and their depths.
703///
704/// ```cpp
705/// // libjxl: enc_huffman.cc
706/// void StoreHuffmanTreeOfHuffmanTreeToBitMask(const int num_codes,
707/// const uint8_t* code_length_bitdepth,
708/// BitWriter* writer) {
709/// // Throw away trailing zeros:
710/// size_t codes_to_store = kCodeLengthCodes;
711/// if (num_codes > 1) {
712/// for (; codes_to_store > 0; --codes_to_store) {
713/// if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
714/// break;
715/// }
716/// }
717/// }
718/// size_t skip_some = 0;
719/// if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
720/// code_length_bitdepth[kStorageOrder[1]] == 0) {
721/// skip_some = 2;
722/// if (code_length_bitdepth[kStorageOrder[2]] == 0) {
723/// skip_some = 3;
724/// }
725/// }
726/// writer->Write(2, skip_some);
727/// for (size_t i = skip_some; i < codes_to_store; ++i) {
728/// size_t l = code_length_bitdepth[kStorageOrder[i]];
729/// writer->Write(kHuffmanBitLengthHuffmanCodeBitLengths[l],
730/// kHuffmanBitLengthHuffmanCodeSymbols[l]);
731/// }
732/// }
733/// ```
734pub fn store_meta_huffman_tree(
735 num_codes: usize,
736 code_length_depths: &[u8; CODE_LENGTH_CODES],
737 writer: &mut BitWriter,
738) -> Result<()> {
739 // Find how many codes to store (trim trailing zeros in storage order)
740 let mut codes_to_store = CODE_LENGTH_CODES;
741 if num_codes > 1 {
742 while codes_to_store > 0 && code_length_depths[STORAGE_ORDER[codes_to_store - 1]] == 0 {
743 codes_to_store -= 1;
744 }
745 }
746
747 // Determine skip count (how many leading zeros in storage order)
748 let mut skip_some = 0usize;
749 if code_length_depths[STORAGE_ORDER[0]] == 0 && code_length_depths[STORAGE_ORDER[1]] == 0 {
750 skip_some = 2;
751 if code_length_depths[STORAGE_ORDER[2]] == 0 {
752 skip_some = 3;
753 }
754 }
755
756 crate::trace::debug_eprintln!(
757 "STORE_META: codes_to_store={}, skip_some={}",
758 codes_to_store,
759 skip_some
760 );
761
762 // Write skip count (2 bits) - this is the simple_code_or_skip field
763 // Values 0, 2, 3 indicate full tree with skip; value 1 would be simple code
764 let _bit_pos_start = writer.bits_written();
765 writer.write(2, skip_some as u64)?;
766 crate::trace::debug_eprintln!(
767 " META: wrote hskip={} at bit {}",
768 skip_some,
769 _bit_pos_start
770 );
771
772 // Write code length depths using static Huffman code
773 // The decoder reads code lengths for symbols in storage order
774 let mut _bitacc = 0usize;
775 #[allow(clippy::unused_enumerate_index)]
776 for (_idx, &symbol) in STORAGE_ORDER[skip_some..codes_to_store].iter().enumerate() {
777 let depth_value = code_length_depths[symbol] as usize;
778 debug_assert!(depth_value <= 5, "Code length depth must be 0-5");
779 let bits = DEPTH_CODE_BIT_LENGTHS[depth_value] as usize;
780 let code = DEPTH_CODE_SYMBOLS[depth_value] as u64;
781 writer.write(bits, code)?;
782 if depth_value > 0 {
783 _bitacc += 32 >> depth_value;
784 }
785 crate::trace::debug_eprintln!(
786 " META[{}]: symbol={}, depth={}, code=0b{:0width$b} ({} bits), bitacc={}",
787 skip_some + _idx,
788 symbol,
789 depth_value,
790 code,
791 bits,
792 _bitacc,
793 width = bits
794 );
795 }
796 crate::trace::debug_eprintln!(" META: final bitacc={} (should be 32)", _bitacc);
797
798 Ok(())
799}
800
801/// Writes the RLE-compressed Huffman tree to the bitstream.
802///
803/// ```cpp
804/// // libjxl: enc_huffman.cc
805/// Status StoreHuffmanTreeToBitMask(const size_t huffman_tree_size,
806/// const uint8_t* huffman_tree,
807/// const uint8_t* huffman_tree_extra_bits,
808/// const uint8_t* code_length_bitdepth,
809/// const uint16_t* code_length_bitdepth_symbols,
810/// BitWriter* writer) {
811/// for (size_t i = 0; i < huffman_tree_size; ++i) {
812/// size_t ix = huffman_tree[i];
813/// writer->Write(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix]);
814/// switch (ix) {
815/// case 16: writer->Write(2, huffman_tree_extra_bits[i]); break;
816/// case 17: writer->Write(3, huffman_tree_extra_bits[i]); break;
817/// }
818/// }
819/// return true;
820/// }
821/// ```
822pub fn store_compressed_tree(
823 tree: &CompressedTree,
824 code_length_depths: &[u8; CODE_LENGTH_CODES],
825 code_length_codes: &[u16; CODE_LENGTH_CODES],
826 writer: &mut BitWriter,
827) -> Result<()> {
828 crate::trace::debug_eprintln!(" TREE: meta_codes = {:?}", &code_length_codes[..5]);
829 crate::trace::debug_eprintln!(" TREE: meta_depths = {:?}", &code_length_depths[..5]);
830
831 for i in 0..tree.codes.len().min(10) {
832 let ix = tree.codes[i] as usize;
833 debug_assert!(ix < CODE_LENGTH_CODES);
834
835 // Write the code for this code length
836 let depth = code_length_depths[ix] as usize;
837 let code = code_length_codes[ix] as u64;
838 let _bit_pos = writer.bits_written();
839 if depth > 0 {
840 writer.write(depth, code)?;
841 }
842 crate::trace::debug_eprintln!(
843 " TREE[{}]: code_len_code={}, meta_depth={}, meta_code=0b{:b}, extra_bits={}",
844 i,
845 ix,
846 depth,
847 code,
848 tree.extra_bits[i]
849 );
850
851 // Write extra bits for RLE codes
852 match ix {
853 16 => writer.write(2, tree.extra_bits[i] as u64)?,
854 17 => writer.write(3, tree.extra_bits[i] as u64)?,
855 _ => {}
856 }
857 }
858
859 // Write remaining codes silently
860 for i in 10..tree.codes.len() {
861 let ix = tree.codes[i] as usize;
862 let depth = code_length_depths[ix] as usize;
863 let code = code_length_codes[ix] as u64;
864 if depth > 0 {
865 writer.write(depth, code)?;
866 }
867 match ix {
868 16 => writer.write(2, tree.extra_bits[i] as u64)?,
869 17 => writer.write(3, tree.extra_bits[i] as u64)?,
870 _ => {}
871 }
872 }
873
874 Ok(())
875}
876
877/// Stores a full Huffman tree to the bitstream.
878///
879/// This is used when there are more than 4 unique symbols.
880///
881/// ```cpp
882/// // libjxl: enc_huffman.cc
883/// Status StoreHuffmanTree(const uint8_t* depths, size_t num, BitWriter* writer) {
884/// // Write the Huffman tree into the compact representation.
885/// auto arena = ...;
886/// uint8_t* huffman_tree = arena.data();
887/// uint8_t* huffman_tree_extra_bits = arena.data() + num;
888/// size_t huffman_tree_size = 0;
889/// WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree, huffman_tree_extra_bits);
890///
891/// // Calculate the statistics of the Huffman tree.
892/// uint32_t huffman_tree_histogram[kCodeLengthCodes] = {0};
893/// for (size_t i = 0; i < huffman_tree_size; ++i) {
894/// ++huffman_tree_histogram[huffman_tree[i]];
895/// }
896///
897/// int num_codes = 0;
898/// int code = 0;
899/// for (int i = 0; i < kCodeLengthCodes; ++i) {
900/// if (huffman_tree_histogram[i]) {
901/// if (num_codes == 0) { code = i; num_codes = 1; }
902/// else if (num_codes == 1) { num_codes = 2; break; }
903/// }
904/// }
905///
906/// // Calculate another Huffman tree for compressing the first.
907/// uint8_t code_length_bitdepth[kCodeLengthCodes] = {0};
908/// uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = {0};
909/// CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, 5, &code_length_bitdepth[0]);
910/// ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes, &code_length_bitdepth_symbols[0]);
911///
912/// StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth, writer);
913///
914/// if (num_codes == 1) {
915/// code_length_bitdepth[code] = 0;
916/// }
917///
918/// StoreHuffmanTreeToBitMask(...);
919/// return true;
920/// }
921/// ```
922pub fn store_huffman_tree(depths: &[u8], writer: &mut BitWriter) -> Result<()> {
923 // Debug: show raw depths for first and last few elements
924 let _first_10: Vec<u8> = depths.iter().take(10).copied().collect();
925 let _last_10: Vec<u8> = depths.iter().rev().take(10).rev().copied().collect();
926 crate::trace::debug_eprintln!(
927 "STORE_HUFF: depths len={}, first_10={:?}, last_10={:?}",
928 depths.len(),
929 _first_10,
930 _last_10
931 );
932
933 // RLE-compress the depth array
934 let compressed = write_huffman_tree(depths);
935 crate::trace::debug_eprintln!(
936 "STORE_HUFF: compressed codes={:?}, extra_bits={:?}",
937 compressed.codes,
938 compressed.extra_bits
939 );
940
941 // Build histogram of code length codes
942 let mut histogram = [0u32; CODE_LENGTH_CODES];
943 for &code in &compressed.codes {
944 histogram[code as usize] += 1;
945 }
946 crate::trace::debug_eprintln!("STORE_HUFF: code_length_histogram={:?}", histogram);
947
948 // Count how many distinct code length codes are used
949 let mut num_codes = 0;
950 let mut single_code = 0;
951 for (i, &count) in histogram.iter().enumerate() {
952 if count > 0 {
953 if num_codes == 0 {
954 single_code = i;
955 num_codes = 1;
956 } else if num_codes == 1 {
957 num_codes = 2;
958 break;
959 }
960 }
961 }
962
963 // Build Huffman tree for the code length codes (meta-Huffman)
964 let code_length_depths = create_huffman_tree(&histogram, MAX_CODE_LENGTH_DEPTH);
965 let mut code_length_depths_arr = [0u8; CODE_LENGTH_CODES];
966 code_length_depths_arr.copy_from_slice(&code_length_depths);
967
968 let code_length_codes_vec = convert_bit_depths_to_symbols(&code_length_depths);
969 let mut code_length_codes_arr = [0u16; CODE_LENGTH_CODES];
970 code_length_codes_arr.copy_from_slice(&code_length_codes_vec);
971
972 crate::trace::debug_eprintln!(
973 "STORE_HUFF: num_codes={}, meta_depths={:?}",
974 num_codes,
975 code_length_depths_arr
976 );
977
978 // Write meta-Huffman tree
979 store_meta_huffman_tree(num_codes, &code_length_depths_arr, writer)?;
980
981 // Special case: single code length code - set its depth to 0 for implicit encoding
982 if num_codes == 1 {
983 code_length_depths_arr[single_code] = 0;
984 }
985
986 // Write the compressed tree
987 store_compressed_tree(
988 &compressed,
989 &code_length_depths_arr,
990 &code_length_codes_arr,
991 writer,
992 )?;
993
994 Ok(())
995}
996
997// ============================================================================
998// Main Entry Point
999// ============================================================================
1000
1001/// Result of building a Huffman table.
1002pub struct HuffmanTable {
1003 /// Code depth (bit length) for each symbol.
1004 pub depths: Vec<u8>,
1005 /// Bit code for each symbol.
1006 pub codes: Vec<u16>,
1007}
1008
1009/// Builds and stores a Huffman table from a histogram.
1010///
1011/// This is the main entry point. It handles:
1012/// - Single symbol (special 4-bit encoding)
1013/// - 2-4 symbols (simple Huffman code)
1014/// - 5+ symbols (full code length table)
1015///
1016/// ```cpp
1017/// // libjxl: enc_huffman.cc
1018/// Status BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length,
1019/// uint8_t* depth, uint16_t* bits, BitWriter* writer) {
1020/// size_t count = 0;
1021/// size_t s4[4] = {0};
1022/// for (size_t i = 0; i < length; i++) {
1023/// if (histogram[i]) {
1024/// if (count < 4) { s4[count] = i; }
1025/// else if (count > 4) { break; }
1026/// count++;
1027/// }
1028/// }
1029///
1030/// size_t max_bits = 0;
1031/// size_t max_bits_counter = length - 1;
1032/// while (max_bits_counter) { max_bits_counter >>= 1; ++max_bits; }
1033///
1034/// if (count <= 1) {
1035/// writer->Write(4, 1);
1036/// writer->Write(max_bits, s4[0]);
1037/// return true;
1038/// }
1039///
1040/// CreateHuffmanTree(histogram, length, 15, depth);
1041/// ConvertBitDepthsToSymbols(depth, length, bits);
1042///
1043/// if (count <= 4) {
1044/// StoreSimpleHuffmanTree(depth, s4, count, max_bits, writer);
1045/// } else {
1046/// JXL_RETURN_IF_ERROR(StoreHuffmanTree(depth, length, writer));
1047/// }
1048/// return true;
1049/// }
1050/// ```
1051pub fn build_and_store_huffman_tree(
1052 histogram: &[u32],
1053 writer: &mut BitWriter,
1054) -> Result<HuffmanTable> {
1055 let length = histogram.len();
1056
1057 // Debug: print non-zero symbols
1058 let nonzero: Vec<(usize, u32)> = histogram
1059 .iter()
1060 .enumerate()
1061 .filter(|&(_, h)| *h > 0)
1062 .map(|(i, h)| (i, *h))
1063 .collect();
1064 crate::trace::debug_eprintln!(
1065 "HUFFMAN_BUILD: alphabet_size={}, nonzero_symbols={:?}",
1066 length,
1067 nonzero
1068 );
1069
1070 // Count non-zero symbols and track first 4
1071 let mut count = 0usize;
1072 let mut s4 = [0usize; 4];
1073 for (i, &h) in histogram.iter().enumerate() {
1074 if h > 0 {
1075 if count < 4 {
1076 s4[count] = i;
1077 }
1078 count += 1;
1079 if count > 4 {
1080 break; // We know it's >4, that's all we need
1081 }
1082 }
1083 }
1084
1085 // Calculate max_bits = ceil(log2(length))
1086 let max_bits = if length <= 1 {
1087 0
1088 } else {
1089 (usize::BITS - (length - 1).leading_zeros()) as usize
1090 };
1091
1092 // Single symbol case
1093 if count <= 1 {
1094 let mut depths = vec![0u8; length];
1095 let codes = vec![0u16; length];
1096 // Write: 4 bits = 0001, then max_bits for symbol
1097 writer.write(4, 1)?;
1098 writer.write(max_bits, s4[0] as u64)?;
1099 // Single symbol has depth 0 (implicit, no bits needed to encode)
1100 if count == 1 {
1101 depths[s4[0]] = 0;
1102 }
1103 return Ok(HuffmanTable { depths, codes });
1104 }
1105
1106 // Build optimal Huffman tree
1107 let depths = create_huffman_tree(histogram, MAX_CODE_DEPTH);
1108 let codes = convert_bit_depths_to_symbols(&depths);
1109
1110 // Debug: print depths for non-zero symbols
1111 let _depths_info: Vec<(usize, u8, u16)> = nonzero
1112 .iter()
1113 .map(|(i, _)| (*i, depths[*i], codes[*i]))
1114 .collect();
1115 crate::trace::debug_eprintln!(
1116 "HUFFMAN_BUILD: depths/codes for used symbols: {:?}",
1117 _depths_info
1118 );
1119
1120 if count <= 4 {
1121 // Simple Huffman code
1122 crate::trace::debug_eprintln!("HUFFMAN_BUILD: using simple code for {} symbols", count);
1123 store_simple_huffman_tree(&depths, &mut s4, count, max_bits, writer)?;
1124 } else {
1125 // Full code length table
1126 crate::trace::debug_eprintln!("HUFFMAN_BUILD: using full table for {} symbols", count);
1127 store_huffman_tree(&depths, writer)?;
1128 }
1129
1130 Ok(HuffmanTable { depths, codes })
1131}
1132
1133/// Stores a simple Huffman tree (1-4 symbols).
1134///
1135/// ```cpp
1136/// // libjxl: enc_huffman.cc
1137/// void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4],
1138/// size_t num_symbols, size_t max_bits, BitWriter* writer) {
1139/// writer->Write(2, 1); // simple code marker
1140/// writer->Write(2, num_symbols - 1); // NSYM - 1
1141/// // Sort by depth
1142/// for (size_t i = 0; i < num_symbols; i++) {
1143/// for (size_t j = i + 1; j < num_symbols; j++) {
1144/// if (depths[symbols[j]] < depths[symbols[i]]) {
1145/// std::swap(symbols[j], symbols[i]);
1146/// }
1147/// }
1148/// }
1149/// // Write symbols
1150/// for (size_t i = 0; i < num_symbols; i++) {
1151/// writer->Write(max_bits, symbols[i]);
1152/// }
1153/// // tree_select for 4 symbols
1154/// if (num_symbols == 4) {
1155/// writer->Write(1, depths[symbols[0]] == 1 ? 1 : 0);
1156/// }
1157/// }
1158/// ```
1159fn store_simple_huffman_tree(
1160 depths: &[u8],
1161 symbols: &mut [usize; 4],
1162 num_symbols: usize,
1163 max_bits: usize,
1164 writer: &mut BitWriter,
1165) -> Result<()> {
1166 // Write simple code marker
1167 writer.write(2, 1)?;
1168 writer.write(2, (num_symbols - 1) as u64)?;
1169
1170 // Sort symbols by depth (ascending)
1171 for i in 0..num_symbols {
1172 for j in i + 1..num_symbols {
1173 if depths[symbols[j]] < depths[symbols[i]] {
1174 symbols.swap(i, j);
1175 }
1176 }
1177 }
1178
1179 // Write symbol indices
1180 for &sym in symbols.iter().take(num_symbols) {
1181 writer.write(max_bits, sym as u64)?;
1182 }
1183
1184 // tree_select for 4 symbols
1185 if num_symbols == 4 {
1186 let tree_select = if depths[symbols[0]] == 1 { 1 } else { 0 };
1187 writer.write(1, tree_select)?;
1188 }
1189
1190 Ok(())
1191}
1192
1193// ============================================================================
1194// Tests
1195// ============================================================================
1196
1197#[cfg(test)]
1198mod tests {
1199 use super::*;
1200
1201 #[test]
1202 fn test_create_huffman_tree_single_symbol() {
1203 let histogram = [0, 0, 100, 0, 0];
1204 let depths = create_huffman_tree(&histogram, 15);
1205 assert_eq!(depths, vec![0, 0, 1, 0, 0]);
1206 }
1207
1208 #[test]
1209 fn test_create_huffman_tree_two_symbols() {
1210 let histogram = [50, 0, 50, 0, 0];
1211 let depths = create_huffman_tree(&histogram, 15);
1212 // Both symbols should have depth 1
1213 assert_eq!(depths[0], 1);
1214 assert_eq!(depths[2], 1);
1215 }
1216
1217 #[test]
1218 fn test_create_huffman_tree_four_symbols() {
1219 let histogram = [10, 20, 30, 40, 0];
1220 let depths = create_huffman_tree(&histogram, 15);
1221 // Most frequent (40) should have shortest depth
1222 assert!(depths[3] <= depths[2]);
1223 assert!(depths[2] <= depths[1]);
1224 assert!(depths[1] <= depths[0]);
1225 }
1226
1227 #[test]
1228 fn test_create_huffman_tree_many_symbols() {
1229 // 10 symbols with varying frequencies
1230 let histogram = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512];
1231 let depths = create_huffman_tree(&histogram, 15);
1232
1233 // All non-zero symbols should have non-zero depth
1234 for (i, &d) in depths.iter().enumerate() {
1235 assert!(d > 0, "Symbol {} should have non-zero depth", i);
1236 }
1237
1238 // More frequent symbols should have shorter or equal depth
1239 for i in 1..10 {
1240 assert!(
1241 depths[i] <= depths[i - 1],
1242 "Symbol {} (freq {}) should have depth <= symbol {} (freq {})",
1243 i,
1244 histogram[i],
1245 i - 1,
1246 histogram[i - 1]
1247 );
1248 }
1249 }
1250
1251 #[test]
1252 fn test_create_huffman_tree_depth_limit() {
1253 // Many symbols with same frequency - could create deep tree
1254 let histogram = vec![1u32; 100];
1255 let depths = create_huffman_tree(&histogram, 15);
1256
1257 // All depths should be <= 15
1258 for &d in &depths {
1259 assert!(d <= 15, "Depth {} exceeds limit of 15", d);
1260 }
1261 }
1262
1263 #[test]
1264 fn test_reverse_bits() {
1265 assert_eq!(reverse_bits(1, 0), 0);
1266 assert_eq!(reverse_bits(1, 1), 1);
1267 assert_eq!(reverse_bits(2, 0b01), 0b10);
1268 assert_eq!(reverse_bits(2, 0b10), 0b01);
1269 assert_eq!(reverse_bits(3, 0b001), 0b100);
1270 assert_eq!(reverse_bits(4, 0b0001), 0b1000);
1271 assert_eq!(reverse_bits(8, 0b10101010), 0b01010101);
1272 }
1273
1274 #[test]
1275 fn test_convert_bit_depths_to_symbols() {
1276 // Two symbols at depth 1
1277 let depths = [1, 1, 0];
1278 let codes = convert_bit_depths_to_symbols(&depths);
1279 // Should get codes 0 and 1 (reversed)
1280 assert_eq!(codes[0], 0);
1281 assert_eq!(codes[1], 1);
1282 }
1283
1284 #[test]
1285 fn test_convert_bit_depths_three_symbols() {
1286 // Three symbols: depths 1, 2, 2
1287 let depths = [1, 2, 2, 0];
1288 let codes = convert_bit_depths_to_symbols(&depths);
1289 // Symbol 0: depth 1, code 0
1290 // Symbol 1: depth 2, code 10 -> reversed = 01
1291 // Symbol 2: depth 2, code 11 -> reversed = 11
1292 assert_eq!(codes[0], 0b0);
1293 assert_eq!(codes[1], 0b01);
1294 assert_eq!(codes[2], 0b11);
1295 }
1296
1297 #[test]
1298 fn test_write_huffman_tree_simple() {
1299 // Simple depth array
1300 let depths = [2, 2, 0, 0];
1301 let tree = write_huffman_tree(&depths);
1302 // Should just have two 2s
1303 assert_eq!(tree.codes, vec![2, 2]);
1304 assert!(tree.extra_bits.iter().all(|&x| x == 0));
1305 }
1306
1307 #[test]
1308 fn test_write_huffman_tree_with_zeros() {
1309 let depths = [3, 0, 0, 0, 3];
1310 let tree = write_huffman_tree(&depths);
1311 // Should encode 3, zeros (maybe RLE), 3
1312 // Note: trailing zeros are stripped, so we only go to index 4
1313 assert!(!tree.codes.is_empty());
1314 }
1315
1316 #[test]
1317 fn test_write_huffman_tree_long_zeros() {
1318 // Many zeros in the middle - should trigger RLE
1319 let mut depths = vec![0u8; 100];
1320 depths[0] = 3;
1321 depths[99] = 3;
1322 let tree = write_huffman_tree(&depths);
1323 // Should use code 17 for zero runs
1324 assert!(tree.codes.contains(&17));
1325 }
1326
1327 #[test]
1328 fn test_build_and_store_single_symbol() {
1329 let histogram = [0, 0, 100, 0, 0];
1330 let mut writer = BitWriter::new();
1331 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1332
1333 // Single symbol should have depth 0
1334 assert_eq!(table.depths[2], 0);
1335
1336 // Check output format: 4 bits = 0001, then max_bits for symbol
1337 let bytes = writer.finish_with_padding();
1338 assert!(!bytes.is_empty());
1339 }
1340
1341 #[test]
1342 fn test_build_and_store_two_symbols() {
1343 let histogram = [10, 0, 20, 0, 0];
1344 let mut writer = BitWriter::new();
1345 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1346
1347 // Both should have depth 1
1348 assert_eq!(table.depths[0], 1);
1349 assert_eq!(table.depths[2], 1);
1350
1351 let bytes = writer.finish_with_padding();
1352 // Should start with simple code marker (bits: 01)
1353 assert!(!bytes.is_empty());
1354 }
1355
1356 #[test]
1357 fn test_build_and_store_many_symbols() {
1358 // 10 symbols - requires full code length table
1359 let histogram: Vec<u32> = (1..=10).map(|x| x * 10).collect();
1360 let mut writer = BitWriter::new();
1361 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1362
1363 // All symbols should have non-zero depth
1364 for i in 0..10 {
1365 assert!(table.depths[i] > 0);
1366 }
1367
1368 let bytes = writer.finish_with_padding();
1369 assert!(!bytes.is_empty());
1370 }
1371
1372 #[test]
1373 fn test_build_and_store_256_symbols() {
1374 // Full byte alphabet with varying frequencies
1375 let mut histogram = vec![0u32; 256];
1376 for (i, h) in histogram.iter_mut().enumerate() {
1377 *h = (i as u32 + 1) * 2;
1378 }
1379
1380 let mut writer = BitWriter::new();
1381 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1382
1383 // All symbols should have valid depth
1384 for i in 0..256 {
1385 assert!(table.depths[i] > 0 && table.depths[i] <= 15);
1386 }
1387
1388 let bytes = writer.finish_with_padding();
1389 assert!(!bytes.is_empty());
1390 }
1391
1392 #[test]
1393 fn test_canonical_codes_are_prefix_free() {
1394 let histogram: Vec<u32> = (1..=20).map(|x| x * 5).collect();
1395 let depths = create_huffman_tree(&histogram, 15);
1396 let codes = convert_bit_depths_to_symbols(&depths);
1397
1398 // Verify no code is a prefix of another
1399 for i in 0..20 {
1400 if depths[i] == 0 {
1401 continue;
1402 }
1403 for j in i + 1..20 {
1404 if depths[j] == 0 {
1405 continue;
1406 }
1407 let (shorter, longer) = if depths[i] <= depths[j] {
1408 ((codes[i], depths[i]), (codes[j], depths[j]))
1409 } else {
1410 ((codes[j], depths[j]), (codes[i], depths[i]))
1411 };
1412
1413 // Extract prefix of longer code
1414 let prefix_mask = (1u16 << shorter.1) - 1;
1415 let longer_prefix = longer.0 & prefix_mask;
1416
1417 assert_ne!(
1418 shorter.0, longer_prefix,
1419 "Code {} (depth {}) is prefix of code {} (depth {})",
1420 shorter.0, shorter.1, longer.0, longer.1
1421 );
1422 }
1423 }
1424 }
1425}