jxl_encoder/entropy_coding/huffman_tree.rs
1// Copyright (c) Imazen LLC and the JPEG XL Project Authors.
2// Algorithms and constants derived from libjxl (BSD-3-Clause).
3// Licensed under AGPL-3.0-or-later. Commercial licenses at https://www.imazen.io/pricing
4
5//! Full Huffman tree encoder supporting arbitrary alphabet sizes.
6//!
7//! This module ports libjxl's `enc_huffman_tree.cc` and parts of `enc_huffman.cc`.
8//! It supports encoding Huffman tables with more than 4 symbols using the
9//! code length table format with RLE compression.
10//!
11//! # Architecture
12//! 1. `create_huffman_tree` - builds optimal tree, outputs depth array
13//! 2. `convert_bit_depths_to_symbols` - converts depths to canonical codes
14//! 3. `write_huffman_tree` - RLE-compresses depth array for storage
15//! 4. `store_huffman_tree` - writes compressed tree to bitstream
16
17use crate::bit_writer::BitWriter;
18use crate::error::Result;
19
20/// Number of code length codes (0-15 for depths, 16-17 for RLE).
21const CODE_LENGTH_CODES: usize = 18;
22
23/// Maximum code depth for regular Huffman codes.
24const MAX_CODE_DEPTH: u8 = 15;
25
26/// Maximum code depth for code length codes (meta-Huffman).
27const MAX_CODE_LENGTH_DEPTH: u8 = 5;
28
29/// Order in which code length codes are stored.
30/// This ordering groups commonly-used codes together to allow truncation.
31///
32/// ```cpp
33/// // libjxl: enc_huffman.cc
34/// static const uint8_t kStorageOrder[kCodeLengthCodes] = {
35/// 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15};
36/// ```
37const STORAGE_ORDER: [usize; CODE_LENGTH_CODES] =
38 [1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15];
39
40/// Static Huffman code for encoding code length code depths.
41/// Maps depth values 0-5 to their codes.
42///
43/// ```cpp
44/// // libjxl: enc_huffman.cc
45/// // Symbol Code
46/// // ------ ----
47/// // 0 00
48/// // 1 1110
49/// // 2 110
50/// // 3 01
51/// // 4 10
52/// // 5 1111
53/// static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {0, 7, 3, 2, 1, 15};
54/// static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {2, 4, 3, 2, 2, 4};
55/// ```
56const DEPTH_CODE_SYMBOLS: [u8; 6] = [0, 7, 3, 2, 1, 15];
57const DEPTH_CODE_BIT_LENGTHS: [u8; 6] = [2, 4, 3, 2, 2, 4];
58
59// ============================================================================
60// Huffman Tree Building
61// ============================================================================
62
63/// Node in the Huffman tree during construction.
64///
65/// ```cpp
66/// // libjxl: enc_huffman_tree.h
67/// struct HuffmanTree {
68/// uint32_t total_count;
69/// int16_t index_left;
70/// int16_t index_right_or_value;
71/// };
72/// ```
73#[derive(Clone, Copy, Debug)]
74struct HuffmanNode {
75 /// Total count (frequency) of this subtree.
76 total_count: u32,
77 /// Index of left child, or -1 if leaf.
78 index_left: i32,
79 /// Index of right child, or symbol value if leaf.
80 /// Note: libjxl uses i16, but we need i32 for alphabets > 32K symbols.
81 index_right_or_value: i32,
82}
83
84impl HuffmanNode {
85 fn leaf(count: u32, symbol: usize) -> Self {
86 Self {
87 total_count: count,
88 index_left: -1,
89 index_right_or_value: symbol as i32,
90 }
91 }
92
93 fn sentinel() -> Self {
94 Self {
95 total_count: u32::MAX,
96 index_left: -1,
97 index_right_or_value: -1,
98 }
99 }
100
101 #[allow(dead_code)]
102 fn is_leaf(&self) -> bool {
103 self.index_left < 0
104 }
105}
106
107/// Recursively set depth for all symbols in a subtree.
108///
109/// ```cpp
110/// // libjxl: enc_huffman_tree.cc
111/// void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth, uint8_t level) {
112/// if (p.index_left >= 0) {
113/// ++level;
114/// SetDepth(pool[p.index_left], pool, depth, level);
115/// SetDepth(pool[p.index_right_or_value], pool, depth, level);
116/// } else {
117/// depth[p.index_right_or_value] = level;
118/// }
119/// }
120/// ```
121fn set_depth(pool: &[HuffmanNode], root_idx: usize, depth: &mut [u8]) {
122 // Iterative DFS to avoid stack overflow on deep/degenerate Huffman trees.
123 // A degenerate tree with N leaves can have depth N-1, which overflows the
124 // call stack for N > ~10K symbols (common with patch position histograms).
125 let mut stack: Vec<(usize, u8)> = Vec::with_capacity(64);
126 stack.push((root_idx, 0));
127
128 while let Some((idx, level)) = stack.pop() {
129 let node = &pool[idx];
130 if node.index_left >= 0 {
131 let next_level = level + 1;
132 stack.push((node.index_left as usize, next_level));
133 stack.push((node.index_right_or_value as usize, next_level));
134 } else {
135 depth[node.index_right_or_value as usize] = level;
136 }
137 }
138}
139
140/// Creates an optimal Huffman tree from symbol frequencies.
141///
142/// Returns the depth (code length) for each symbol. Symbols with zero
143/// frequency get depth 0 (meaning they don't appear in the tree).
144///
145/// The tree depth is limited to `tree_limit` bits. If the natural tree
146/// would exceed this, the algorithm retries with inflated minimum counts.
147///
148/// ```cpp
149/// // libjxl: enc_huffman_tree.cc
150/// void CreateHuffmanTree(const uint32_t* data, const size_t length,
151/// const int tree_limit, uint8_t* depth) {
152/// for (uint32_t count_limit = 1;; count_limit *= 2) {
153/// std::vector<HuffmanTree> tree;
154/// tree.reserve(2 * length + 1);
155/// // ... build tree ...
156/// if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
157/// break;
158/// }
159/// }
160/// }
161/// ```
162pub fn create_huffman_tree(histogram: &[u32], tree_limit: u8) -> Vec<u8> {
163 let length = histogram.len();
164 let mut depth = vec![0u8; length];
165
166 // Retry with increasing count_limit until tree fits in tree_limit bits
167 let mut count_limit: u32 = 1;
168 loop {
169 // Build list of leaf nodes for non-zero symbols
170 // Process in reverse order so that ties favor lower symbol indices
171 let mut tree: Vec<HuffmanNode> = Vec::with_capacity(2 * length + 1);
172 for i in (0..length).rev() {
173 if histogram[i] > 0 {
174 // Clamp count to at least count_limit - 1 to limit tree depth
175 let count = histogram[i].max(count_limit.saturating_sub(1));
176 tree.push(HuffmanNode::leaf(count, i));
177 }
178 }
179
180 let n = tree.len();
181 if n == 0 {
182 // No symbols - return all zeros
183 return depth;
184 }
185 if n == 1 {
186 // Single symbol - give it depth 1 (will be corrected by caller)
187 depth[tree[0].index_right_or_value as usize] = 1;
188 return depth;
189 }
190
191 // Sort by count (ascending), then by symbol index (descending for ties)
192 tree.sort_by(|a, b| {
193 if a.total_count != b.total_count {
194 a.total_count.cmp(&b.total_count)
195 } else {
196 // Higher index first for ties (reverse order)
197 b.index_right_or_value.cmp(&a.index_right_or_value)
198 }
199 });
200
201 // Add sentinel nodes
202 tree.push(HuffmanNode::sentinel());
203 tree.push(HuffmanNode::sentinel());
204
205 // Build tree using two-queue algorithm
206 // i: next leaf node, j: next internal node
207 let mut i = 0;
208 let mut j = n + 1;
209
210 for _ in 0..n - 1 {
211 // Pick two smallest nodes
212 let left = if tree[i].total_count <= tree[j].total_count {
213 let idx = i;
214 i += 1;
215 idx
216 } else {
217 let idx = j;
218 j += 1;
219 idx
220 };
221
222 let right = if tree[i].total_count <= tree[j].total_count {
223 let idx = i;
224 i += 1;
225 idx
226 } else {
227 let idx = j;
228 j += 1;
229 idx
230 };
231
232 // Create parent node at the sentinel position
233 let j_end = tree.len() - 1;
234 tree[j_end].total_count = tree[left].total_count + tree[right].total_count;
235 tree[j_end].index_left = left as i32;
236 tree[j_end].index_right_or_value = right as i32;
237
238 // Add new sentinel
239 tree.push(HuffmanNode::sentinel());
240 }
241
242 // Extract depths from tree
243 depth.fill(0);
244 set_depth(&tree, 2 * n - 1, &mut depth);
245
246 // Check if tree fits in limit
247 if depth.iter().copied().max().unwrap_or(0) <= tree_limit {
248 break;
249 }
250
251 // Tree too deep - retry with higher count_limit
252 count_limit *= 2;
253 }
254
255 depth
256}
257
258// ============================================================================
259// Bit Code Generation
260// ============================================================================
261
262/// Reverses bits in a value.
263///
264/// ```cpp
265/// // libjxl: enc_huffman_tree.cc
266/// uint16_t ReverseBits(int num_bits, uint16_t bits) {
267/// static const size_t kLut[16] = {
268/// 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
269/// 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf};
270/// size_t retval = kLut[bits & 0xf];
271/// for (int i = 4; i < num_bits; i += 4) {
272/// retval <<= 4;
273/// bits = static_cast<uint16_t>(bits >> 4);
274/// retval |= kLut[bits & 0xf];
275/// }
276/// retval >>= (-num_bits & 0x3);
277/// return static_cast<uint16_t>(retval);
278/// }
279/// ```
280fn reverse_bits(num_bits: u8, bits: u16) -> u16 {
281 const LUT: [u16; 16] = [
282 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
283 ];
284
285 let mut retval = LUT[(bits & 0xf) as usize];
286 let mut bits = bits;
287 let mut i = 4i32;
288 while i < num_bits as i32 {
289 retval <<= 4;
290 bits >>= 4;
291 retval |= LUT[(bits & 0xf) as usize];
292 i += 4;
293 }
294 retval >>= (-(num_bits as i32) & 0x3) as u32;
295 retval
296}
297
298/// Converts code depths to canonical Huffman codes.
299///
300/// Returns (codes, depths) where `codes[i]` is the bit pattern for symbol `i`.
301///
302/// ```cpp
303/// // libjxl: enc_huffman_tree.cc
304/// void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, uint16_t* bits) {
305/// const int kMaxBits = 16;
306/// uint16_t bl_count[kMaxBits] = {0};
307/// for (size_t i = 0; i < len; ++i) {
308/// ++bl_count[depth[i]];
309/// }
310/// bl_count[0] = 0;
311/// uint16_t next_code[kMaxBits];
312/// next_code[0] = 0;
313/// int code = 0;
314/// for (size_t i = 1; i < kMaxBits; ++i) {
315/// code = (code + bl_count[i - 1]) << 1;
316/// next_code[i] = static_cast<uint16_t>(code);
317/// }
318/// for (size_t i = 0; i < len; ++i) {
319/// if (depth[i]) {
320/// bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
321/// }
322/// }
323/// }
324/// ```
325pub fn convert_bit_depths_to_symbols(depth: &[u8]) -> Vec<u16> {
326 const MAX_BITS: usize = 16;
327 let len = depth.len();
328 let mut bits = vec![0u16; len];
329
330 // Count symbols at each depth
331 let mut bl_count = [0u16; MAX_BITS];
332 for &d in depth {
333 bl_count[d as usize] += 1;
334 }
335 bl_count[0] = 0; // Depth 0 means symbol doesn't exist
336
337 // Compute first code at each depth
338 let mut next_code = [0u16; MAX_BITS];
339 let mut code = 0i32;
340 for i in 1..MAX_BITS {
341 code = (code + bl_count[i - 1] as i32) << 1;
342 next_code[i] = code as u16;
343 }
344
345 // Assign codes to symbols
346 for i in 0..len {
347 let d = depth[i];
348 if d > 0 {
349 bits[i] = reverse_bits(d, next_code[d as usize]);
350 next_code[d as usize] += 1;
351 }
352 }
353
354 bits
355}
356
357// ============================================================================
358// RLE Compression of Code Lengths
359// ============================================================================
360
361/// RLE-compressed representation of a Huffman tree.
362#[derive(Debug, Clone)]
363pub struct CompressedTree {
364 /// Code length codes (0-17)
365 pub codes: Vec<u8>,
366 /// Extra bits for each code (used by codes 16 and 17)
367 pub extra_bits: Vec<u8>,
368}
369
370/// Reverse a slice in place.
371fn reverse_slice(v: &mut [u8], start: usize, end: usize) {
372 let mut s = start;
373 let mut e = end - 1;
374 while s < e {
375 v.swap(s, e);
376 s += 1;
377 e -= 1;
378 }
379}
380
381/// Write RLE-encoded repetitions of zeros.
382///
383/// ```cpp
384/// // libjxl: enc_huffman_tree.cc
385/// void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size,
386/// uint8_t* tree, uint8_t* extra_bits_data) {
387/// if (repetitions == 11) {
388/// tree[*tree_size] = 0;
389/// extra_bits_data[*tree_size] = 0;
390/// ++(*tree_size);
391/// --repetitions;
392/// }
393/// if (repetitions < 3) {
394/// for (size_t i = 0; i < repetitions; ++i) {
395/// tree[*tree_size] = 0;
396/// extra_bits_data[*tree_size] = 0;
397/// ++(*tree_size);
398/// }
399/// } else {
400/// repetitions -= 3;
401/// size_t start = *tree_size;
402/// while (true) {
403/// tree[*tree_size] = 17;
404/// extra_bits_data[*tree_size] = repetitions & 0x7;
405/// ++(*tree_size);
406/// repetitions >>= 3;
407/// if (repetitions == 0) {
408/// break;
409/// }
410/// --repetitions;
411/// }
412/// Reverse(tree, start, *tree_size);
413/// Reverse(extra_bits_data, start, *tree_size);
414/// }
415/// }
416/// ```
417fn write_repetitions_zeros(mut repetitions: usize, tree: &mut CompressedTree) {
418 // Special case: 11 reps would need code 17 with extra=8, but max extra is 7
419 if repetitions == 11 {
420 tree.codes.push(0);
421 tree.extra_bits.push(0);
422 repetitions -= 1;
423 }
424
425 if repetitions < 3 {
426 // Write individual zeros
427 for _ in 0..repetitions {
428 tree.codes.push(0);
429 tree.extra_bits.push(0);
430 }
431 } else {
432 // Use code 17: repeat zeros with 3 extra bits (3-10 reps)
433 repetitions -= 3;
434 let start = tree.codes.len();
435 loop {
436 tree.codes.push(17);
437 tree.extra_bits.push((repetitions & 0x7) as u8);
438 repetitions >>= 3;
439 if repetitions == 0 {
440 break;
441 }
442 repetitions -= 1;
443 }
444 // Reverse the codes we just wrote
445 let end = tree.codes.len();
446 reverse_slice(&mut tree.codes, start, end);
447 reverse_slice(&mut tree.extra_bits, start, end);
448 }
449}
450
451/// Write RLE-encoded repetitions of non-zero values.
452///
453/// ```cpp
454/// // libjxl: enc_huffman_tree.cc
455/// void WriteHuffmanTreeRepetitions(const uint8_t previous_value,
456/// const uint8_t value, size_t repetitions,
457/// size_t* tree_size, uint8_t* tree,
458/// uint8_t* extra_bits_data) {
459/// JXL_DASSERT(repetitions > 0);
460/// if (previous_value != value) {
461/// tree[*tree_size] = value;
462/// extra_bits_data[*tree_size] = 0;
463/// ++(*tree_size);
464/// --repetitions;
465/// }
466/// if (repetitions == 7) {
467/// tree[*tree_size] = value;
468/// extra_bits_data[*tree_size] = 0;
469/// ++(*tree_size);
470/// --repetitions;
471/// }
472/// if (repetitions < 3) {
473/// for (size_t i = 0; i < repetitions; ++i) {
474/// tree[*tree_size] = value;
475/// extra_bits_data[*tree_size] = 0;
476/// ++(*tree_size);
477/// }
478/// } else {
479/// repetitions -= 3;
480/// size_t start = *tree_size;
481/// while (true) {
482/// tree[*tree_size] = 16;
483/// extra_bits_data[*tree_size] = repetitions & 0x3;
484/// ++(*tree_size);
485/// repetitions >>= 2;
486/// if (repetitions == 0) {
487/// break;
488/// }
489/// --repetitions;
490/// }
491/// Reverse(tree, start, *tree_size);
492/// Reverse(extra_bits_data, start, *tree_size);
493/// }
494/// }
495/// ```
496fn write_repetitions_nonzero(
497 previous_value: u8,
498 value: u8,
499 mut repetitions: usize,
500 tree: &mut CompressedTree,
501) {
502 debug_assert!(repetitions > 0);
503
504 // If value changed, write it literally first
505 if previous_value != value {
506 tree.codes.push(value);
507 tree.extra_bits.push(0);
508 repetitions -= 1;
509 }
510
511 // Special case: 7 reps would need code 16 with extra=4, but max extra is 3
512 if repetitions == 7 {
513 tree.codes.push(value);
514 tree.extra_bits.push(0);
515 repetitions -= 1;
516 }
517
518 if repetitions < 3 {
519 // Write individual values
520 for _ in 0..repetitions {
521 tree.codes.push(value);
522 tree.extra_bits.push(0);
523 }
524 } else {
525 // Use code 16: repeat previous with 2 extra bits (3-6 reps)
526 repetitions -= 3;
527 let start = tree.codes.len();
528 loop {
529 tree.codes.push(16);
530 tree.extra_bits.push((repetitions & 0x3) as u8);
531 repetitions >>= 2;
532 if repetitions == 0 {
533 break;
534 }
535 repetitions -= 1;
536 }
537 // Reverse the codes we just wrote
538 let end = tree.codes.len();
539 reverse_slice(&mut tree.codes, start, end);
540 reverse_slice(&mut tree.extra_bits, start, end);
541 }
542}
543
544/// Decide whether to use RLE for zeros and non-zeros.
545///
546/// ```cpp
547/// // libjxl: enc_huffman_tree.cc
548/// static void DecideOverRleUse(const uint8_t* depth, const size_t length,
549/// bool* use_rle_for_non_zero, bool* use_rle_for_zero) {
550/// size_t total_reps_zero = 0;
551/// size_t total_reps_non_zero = 0;
552/// size_t count_reps_zero = 1;
553/// size_t count_reps_non_zero = 1;
554/// for (size_t i = 0; i < length;) {
555/// const uint8_t value = depth[i];
556/// size_t reps = 1;
557/// for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
558/// ++reps;
559/// }
560/// if (reps >= 3 && value == 0) {
561/// total_reps_zero += reps;
562/// ++count_reps_zero;
563/// }
564/// if (reps >= 4 && value != 0) {
565/// total_reps_non_zero += reps;
566/// ++count_reps_non_zero;
567/// }
568/// i += reps;
569/// }
570/// *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
571/// *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
572/// }
573/// ```
574fn decide_rle_use(depth: &[u8]) -> (bool, bool) {
575 let mut total_reps_zero = 0usize;
576 let mut total_reps_non_zero = 0usize;
577 let mut count_reps_zero = 1usize;
578 let mut count_reps_non_zero = 1usize;
579
580 let mut i = 0;
581 while i < depth.len() {
582 let value = depth[i];
583 let mut reps = 1usize;
584 while i + reps < depth.len() && depth[i + reps] == value {
585 reps += 1;
586 }
587 if reps >= 3 && value == 0 {
588 total_reps_zero += reps;
589 count_reps_zero += 1;
590 }
591 if reps >= 4 && value != 0 {
592 total_reps_non_zero += reps;
593 count_reps_non_zero += 1;
594 }
595 i += reps;
596 }
597
598 let use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
599 let use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
600 (use_rle_for_non_zero, use_rle_for_zero)
601}
602
603/// Compresses a depth array into RLE-encoded code length codes.
604///
605/// ```cpp
606/// // libjxl: enc_huffman_tree.cc
607/// void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
608/// uint8_t* tree, uint8_t* extra_bits_data) {
609/// uint8_t previous_value = 8;
610/// // Throw away trailing zeros.
611/// size_t new_length = length;
612/// for (size_t i = 0; i < length; ++i) {
613/// if (depth[length - i - 1] == 0) {
614/// --new_length;
615/// } else {
616/// break;
617/// }
618/// }
619/// // First gather statistics on if it is a good idea to do rle.
620/// bool use_rle_for_non_zero = false;
621/// bool use_rle_for_zero = false;
622/// if (length > 50) {
623/// DecideOverRleUse(depth, new_length, &use_rle_for_non_zero, &use_rle_for_zero);
624/// }
625/// // Actual rle coding.
626/// for (size_t i = 0; i < new_length;) {
627/// const uint8_t value = depth[i];
628/// size_t reps = 1;
629/// if ((value != 0 && use_rle_for_non_zero) || (value == 0 && use_rle_for_zero)) {
630/// for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
631/// ++reps;
632/// }
633/// }
634/// if (value == 0) {
635/// WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
636/// } else {
637/// WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree, extra_bits_data);
638/// previous_value = value;
639/// }
640/// i += reps;
641/// }
642/// }
643/// ```
644pub fn write_huffman_tree(depth: &[u8]) -> CompressedTree {
645 let mut tree = CompressedTree {
646 codes: Vec::new(),
647 extra_bits: Vec::new(),
648 };
649
650 // Throw away trailing zeros
651 let mut new_length = depth.len();
652 while new_length > 0 && depth[new_length - 1] == 0 {
653 new_length -= 1;
654 }
655
656 if new_length == 0 {
657 return tree;
658 }
659
660 // Decide whether to use RLE (only for longer sequences)
661 let (use_rle_for_non_zero, use_rle_for_zero) = if depth.len() > 50 {
662 decide_rle_use(&depth[..new_length])
663 } else {
664 (false, false)
665 };
666
667 // Encode with RLE
668 let mut previous_value = 8u8; // Initial "previous" value (won't match any real depth)
669 let mut i = 0;
670 while i < new_length {
671 let value = depth[i];
672 let mut reps = 1usize;
673
674 // Count repetitions if RLE is enabled for this value type
675 if (value != 0 && use_rle_for_non_zero) || (value == 0 && use_rle_for_zero) {
676 while i + reps < new_length && depth[i + reps] == value {
677 reps += 1;
678 }
679 }
680
681 if value == 0 {
682 write_repetitions_zeros(reps, &mut tree);
683 } else {
684 write_repetitions_nonzero(previous_value, value, reps, &mut tree);
685 previous_value = value;
686 }
687 i += reps;
688 }
689
690 tree
691}
692
693// ============================================================================
694// Bitstream Encoding
695// ============================================================================
696
697/// Writes the meta-Huffman tree (tree of tree) to the bitstream.
698///
699/// This encodes which code length codes (0-17) are used and their depths.
700///
701/// ```cpp
702/// // libjxl: enc_huffman.cc
703/// void StoreHuffmanTreeOfHuffmanTreeToBitMask(const int num_codes,
704/// const uint8_t* code_length_bitdepth,
705/// BitWriter* writer) {
706/// // Throw away trailing zeros:
707/// size_t codes_to_store = kCodeLengthCodes;
708/// if (num_codes > 1) {
709/// for (; codes_to_store > 0; --codes_to_store) {
710/// if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
711/// break;
712/// }
713/// }
714/// }
715/// size_t skip_some = 0;
716/// if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
717/// code_length_bitdepth[kStorageOrder[1]] == 0) {
718/// skip_some = 2;
719/// if (code_length_bitdepth[kStorageOrder[2]] == 0) {
720/// skip_some = 3;
721/// }
722/// }
723/// writer->Write(2, skip_some);
724/// for (size_t i = skip_some; i < codes_to_store; ++i) {
725/// size_t l = code_length_bitdepth[kStorageOrder[i]];
726/// writer->Write(kHuffmanBitLengthHuffmanCodeBitLengths[l],
727/// kHuffmanBitLengthHuffmanCodeSymbols[l]);
728/// }
729/// }
730/// ```
731pub fn store_meta_huffman_tree(
732 num_codes: usize,
733 code_length_depths: &[u8; CODE_LENGTH_CODES],
734 writer: &mut BitWriter,
735) -> Result<()> {
736 // Find how many codes to store (trim trailing zeros in storage order)
737 let mut codes_to_store = CODE_LENGTH_CODES;
738 if num_codes > 1 {
739 while codes_to_store > 0 && code_length_depths[STORAGE_ORDER[codes_to_store - 1]] == 0 {
740 codes_to_store -= 1;
741 }
742 }
743
744 // Determine skip count (how many leading zeros in storage order)
745 let mut skip_some = 0usize;
746 if code_length_depths[STORAGE_ORDER[0]] == 0 && code_length_depths[STORAGE_ORDER[1]] == 0 {
747 skip_some = 2;
748 if code_length_depths[STORAGE_ORDER[2]] == 0 {
749 skip_some = 3;
750 }
751 }
752
753 crate::trace::debug_eprintln!(
754 "STORE_META: codes_to_store={}, skip_some={}",
755 codes_to_store,
756 skip_some
757 );
758
759 // Write skip count (2 bits) - this is the simple_code_or_skip field
760 // Values 0, 2, 3 indicate full tree with skip; value 1 would be simple code
761 let _bit_pos_start = writer.bits_written();
762 writer.write(2, skip_some as u64)?;
763 crate::trace::debug_eprintln!(
764 " META: wrote hskip={} at bit {}",
765 skip_some,
766 _bit_pos_start
767 );
768
769 // Write code length depths using static Huffman code
770 // The decoder reads code lengths for symbols in storage order
771 let mut _bitacc = 0usize;
772 #[allow(clippy::unused_enumerate_index)]
773 for (_idx, &symbol) in STORAGE_ORDER[skip_some..codes_to_store].iter().enumerate() {
774 let depth_value = code_length_depths[symbol] as usize;
775 debug_assert!(depth_value <= 5, "Code length depth must be 0-5");
776 let bits = DEPTH_CODE_BIT_LENGTHS[depth_value] as usize;
777 let code = DEPTH_CODE_SYMBOLS[depth_value] as u64;
778 writer.write(bits, code)?;
779 if depth_value > 0 {
780 _bitacc += 32 >> depth_value;
781 }
782 crate::trace::debug_eprintln!(
783 " META[{}]: symbol={}, depth={}, code=0b{:0width$b} ({} bits), bitacc={}",
784 skip_some + _idx,
785 symbol,
786 depth_value,
787 code,
788 bits,
789 _bitacc,
790 width = bits
791 );
792 }
793 crate::trace::debug_eprintln!(" META: final bitacc={} (should be 32)", _bitacc);
794
795 Ok(())
796}
797
798/// Writes the RLE-compressed Huffman tree to the bitstream.
799///
800/// ```cpp
801/// // libjxl: enc_huffman.cc
802/// Status StoreHuffmanTreeToBitMask(const size_t huffman_tree_size,
803/// const uint8_t* huffman_tree,
804/// const uint8_t* huffman_tree_extra_bits,
805/// const uint8_t* code_length_bitdepth,
806/// const uint16_t* code_length_bitdepth_symbols,
807/// BitWriter* writer) {
808/// for (size_t i = 0; i < huffman_tree_size; ++i) {
809/// size_t ix = huffman_tree[i];
810/// writer->Write(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix]);
811/// switch (ix) {
812/// case 16: writer->Write(2, huffman_tree_extra_bits[i]); break;
813/// case 17: writer->Write(3, huffman_tree_extra_bits[i]); break;
814/// }
815/// }
816/// return true;
817/// }
818/// ```
819pub fn store_compressed_tree(
820 tree: &CompressedTree,
821 code_length_depths: &[u8; CODE_LENGTH_CODES],
822 code_length_codes: &[u16; CODE_LENGTH_CODES],
823 writer: &mut BitWriter,
824) -> Result<()> {
825 crate::trace::debug_eprintln!(" TREE: meta_codes = {:?}", &code_length_codes[..5]);
826 crate::trace::debug_eprintln!(" TREE: meta_depths = {:?}", &code_length_depths[..5]);
827
828 for i in 0..tree.codes.len().min(10) {
829 let ix = tree.codes[i] as usize;
830 debug_assert!(ix < CODE_LENGTH_CODES);
831
832 // Write the code for this code length
833 let depth = code_length_depths[ix] as usize;
834 let code = code_length_codes[ix] as u64;
835 let _bit_pos = writer.bits_written();
836 if depth > 0 {
837 writer.write(depth, code)?;
838 }
839 crate::trace::debug_eprintln!(
840 " TREE[{}]: code_len_code={}, meta_depth={}, meta_code=0b{:b}, extra_bits={}",
841 i,
842 ix,
843 depth,
844 code,
845 tree.extra_bits[i]
846 );
847
848 // Write extra bits for RLE codes
849 match ix {
850 16 => writer.write(2, tree.extra_bits[i] as u64)?,
851 17 => writer.write(3, tree.extra_bits[i] as u64)?,
852 _ => {}
853 }
854 }
855
856 // Write remaining codes silently
857 for i in 10..tree.codes.len() {
858 let ix = tree.codes[i] as usize;
859 let depth = code_length_depths[ix] as usize;
860 let code = code_length_codes[ix] as u64;
861 if depth > 0 {
862 writer.write(depth, code)?;
863 }
864 match ix {
865 16 => writer.write(2, tree.extra_bits[i] as u64)?,
866 17 => writer.write(3, tree.extra_bits[i] as u64)?,
867 _ => {}
868 }
869 }
870
871 Ok(())
872}
873
874/// Stores a full Huffman tree to the bitstream.
875///
876/// This is used when there are more than 4 unique symbols.
877///
878/// ```cpp
879/// // libjxl: enc_huffman.cc
880/// Status StoreHuffmanTree(const uint8_t* depths, size_t num, BitWriter* writer) {
881/// // Write the Huffman tree into the compact representation.
882/// auto arena = ...;
883/// uint8_t* huffman_tree = arena.data();
884/// uint8_t* huffman_tree_extra_bits = arena.data() + num;
885/// size_t huffman_tree_size = 0;
886/// WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree, huffman_tree_extra_bits);
887///
888/// // Calculate the statistics of the Huffman tree.
889/// uint32_t huffman_tree_histogram[kCodeLengthCodes] = {0};
890/// for (size_t i = 0; i < huffman_tree_size; ++i) {
891/// ++huffman_tree_histogram[huffman_tree[i]];
892/// }
893///
894/// int num_codes = 0;
895/// int code = 0;
896/// for (int i = 0; i < kCodeLengthCodes; ++i) {
897/// if (huffman_tree_histogram[i]) {
898/// if (num_codes == 0) { code = i; num_codes = 1; }
899/// else if (num_codes == 1) { num_codes = 2; break; }
900/// }
901/// }
902///
903/// // Calculate another Huffman tree for compressing the first.
904/// uint8_t code_length_bitdepth[kCodeLengthCodes] = {0};
905/// uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = {0};
906/// CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, 5, &code_length_bitdepth[0]);
907/// ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes, &code_length_bitdepth_symbols[0]);
908///
909/// StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth, writer);
910///
911/// if (num_codes == 1) {
912/// code_length_bitdepth[code] = 0;
913/// }
914///
915/// StoreHuffmanTreeToBitMask(...);
916/// return true;
917/// }
918/// ```
919pub fn store_huffman_tree(depths: &[u8], writer: &mut BitWriter) -> Result<()> {
920 // Debug: show raw depths for first and last few elements
921 let _first_10: Vec<u8> = depths.iter().take(10).copied().collect();
922 let _last_10: Vec<u8> = depths.iter().rev().take(10).rev().copied().collect();
923 crate::trace::debug_eprintln!(
924 "STORE_HUFF: depths len={}, first_10={:?}, last_10={:?}",
925 depths.len(),
926 _first_10,
927 _last_10
928 );
929
930 // RLE-compress the depth array
931 let compressed = write_huffman_tree(depths);
932 crate::trace::debug_eprintln!(
933 "STORE_HUFF: compressed codes={:?}, extra_bits={:?}",
934 compressed.codes,
935 compressed.extra_bits
936 );
937
938 // Build histogram of code length codes
939 let mut histogram = [0u32; CODE_LENGTH_CODES];
940 for &code in &compressed.codes {
941 histogram[code as usize] += 1;
942 }
943 crate::trace::debug_eprintln!("STORE_HUFF: code_length_histogram={:?}", histogram);
944
945 // Count how many distinct code length codes are used
946 let mut num_codes = 0;
947 let mut single_code = 0;
948 for (i, &count) in histogram.iter().enumerate() {
949 if count > 0 {
950 if num_codes == 0 {
951 single_code = i;
952 num_codes = 1;
953 } else if num_codes == 1 {
954 num_codes = 2;
955 break;
956 }
957 }
958 }
959
960 // Build Huffman tree for the code length codes (meta-Huffman)
961 let code_length_depths = create_huffman_tree(&histogram, MAX_CODE_LENGTH_DEPTH);
962 let mut code_length_depths_arr = [0u8; CODE_LENGTH_CODES];
963 code_length_depths_arr.copy_from_slice(&code_length_depths);
964
965 let code_length_codes_vec = convert_bit_depths_to_symbols(&code_length_depths);
966 let mut code_length_codes_arr = [0u16; CODE_LENGTH_CODES];
967 code_length_codes_arr.copy_from_slice(&code_length_codes_vec);
968
969 crate::trace::debug_eprintln!(
970 "STORE_HUFF: num_codes={}, meta_depths={:?}",
971 num_codes,
972 code_length_depths_arr
973 );
974
975 // Write meta-Huffman tree
976 store_meta_huffman_tree(num_codes, &code_length_depths_arr, writer)?;
977
978 // Special case: single code length code - set its depth to 0 for implicit encoding
979 if num_codes == 1 {
980 code_length_depths_arr[single_code] = 0;
981 }
982
983 // Write the compressed tree
984 store_compressed_tree(
985 &compressed,
986 &code_length_depths_arr,
987 &code_length_codes_arr,
988 writer,
989 )?;
990
991 Ok(())
992}
993
994// ============================================================================
995// Main Entry Point
996// ============================================================================
997
998/// Result of building a Huffman table.
999pub struct HuffmanTable {
1000 /// Code depth (bit length) for each symbol.
1001 pub depths: Vec<u8>,
1002 /// Bit code for each symbol.
1003 pub codes: Vec<u16>,
1004}
1005
1006/// Builds and stores a Huffman table from a histogram.
1007///
1008/// This is the main entry point. It handles:
1009/// - Single symbol (special 4-bit encoding)
1010/// - 2-4 symbols (simple Huffman code)
1011/// - 5+ symbols (full code length table)
1012///
1013/// ```cpp
1014/// // libjxl: enc_huffman.cc
1015/// Status BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length,
1016/// uint8_t* depth, uint16_t* bits, BitWriter* writer) {
1017/// size_t count = 0;
1018/// size_t s4[4] = {0};
1019/// for (size_t i = 0; i < length; i++) {
1020/// if (histogram[i]) {
1021/// if (count < 4) { s4[count] = i; }
1022/// else if (count > 4) { break; }
1023/// count++;
1024/// }
1025/// }
1026///
1027/// size_t max_bits = 0;
1028/// size_t max_bits_counter = length - 1;
1029/// while (max_bits_counter) { max_bits_counter >>= 1; ++max_bits; }
1030///
1031/// if (count <= 1) {
1032/// writer->Write(4, 1);
1033/// writer->Write(max_bits, s4[0]);
1034/// return true;
1035/// }
1036///
1037/// CreateHuffmanTree(histogram, length, 15, depth);
1038/// ConvertBitDepthsToSymbols(depth, length, bits);
1039///
1040/// if (count <= 4) {
1041/// StoreSimpleHuffmanTree(depth, s4, count, max_bits, writer);
1042/// } else {
1043/// JXL_RETURN_IF_ERROR(StoreHuffmanTree(depth, length, writer));
1044/// }
1045/// return true;
1046/// }
1047/// ```
1048pub fn build_and_store_huffman_tree(
1049 histogram: &[u32],
1050 writer: &mut BitWriter,
1051) -> Result<HuffmanTable> {
1052 let length = histogram.len();
1053
1054 // Debug: print non-zero symbols
1055 let nonzero: Vec<(usize, u32)> = histogram
1056 .iter()
1057 .enumerate()
1058 .filter(|&(_, h)| *h > 0)
1059 .map(|(i, h)| (i, *h))
1060 .collect();
1061 crate::trace::debug_eprintln!(
1062 "HUFFMAN_BUILD: alphabet_size={}, nonzero_symbols={:?}",
1063 length,
1064 nonzero
1065 );
1066
1067 // Count non-zero symbols and track first 4
1068 let mut count = 0usize;
1069 let mut s4 = [0usize; 4];
1070 for (i, &h) in histogram.iter().enumerate() {
1071 if h > 0 {
1072 if count < 4 {
1073 s4[count] = i;
1074 }
1075 count += 1;
1076 if count > 4 {
1077 break; // We know it's >4, that's all we need
1078 }
1079 }
1080 }
1081
1082 // Calculate max_bits = ceil(log2(length))
1083 let max_bits = if length <= 1 {
1084 0
1085 } else {
1086 (usize::BITS - (length - 1).leading_zeros()) as usize
1087 };
1088
1089 // Single symbol case
1090 if count <= 1 {
1091 let mut depths = vec![0u8; length];
1092 let codes = vec![0u16; length];
1093 // Write: 4 bits = 0001, then max_bits for symbol
1094 writer.write(4, 1)?;
1095 writer.write(max_bits, s4[0] as u64)?;
1096 // Single symbol has depth 0 (implicit, no bits needed to encode)
1097 if count == 1 {
1098 depths[s4[0]] = 0;
1099 }
1100 return Ok(HuffmanTable { depths, codes });
1101 }
1102
1103 // Build optimal Huffman tree
1104 let depths = create_huffman_tree(histogram, MAX_CODE_DEPTH);
1105 let codes = convert_bit_depths_to_symbols(&depths);
1106
1107 // Debug: print depths for non-zero symbols
1108 let _depths_info: Vec<(usize, u8, u16)> = nonzero
1109 .iter()
1110 .map(|(i, _)| (*i, depths[*i], codes[*i]))
1111 .collect();
1112 crate::trace::debug_eprintln!(
1113 "HUFFMAN_BUILD: depths/codes for used symbols: {:?}",
1114 _depths_info
1115 );
1116
1117 if count <= 4 {
1118 // Simple Huffman code
1119 crate::trace::debug_eprintln!("HUFFMAN_BUILD: using simple code for {} symbols", count);
1120 store_simple_huffman_tree(&depths, &mut s4, count, max_bits, writer)?;
1121 } else {
1122 // Full code length table
1123 crate::trace::debug_eprintln!("HUFFMAN_BUILD: using full table for {} symbols", count);
1124 store_huffman_tree(&depths, writer)?;
1125 }
1126
1127 Ok(HuffmanTable { depths, codes })
1128}
1129
1130/// Stores a simple Huffman tree (1-4 symbols).
1131///
1132/// ```cpp
1133/// // libjxl: enc_huffman.cc
1134/// void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4],
1135/// size_t num_symbols, size_t max_bits, BitWriter* writer) {
1136/// writer->Write(2, 1); // simple code marker
1137/// writer->Write(2, num_symbols - 1); // NSYM - 1
1138/// // Sort by depth
1139/// for (size_t i = 0; i < num_symbols; i++) {
1140/// for (size_t j = i + 1; j < num_symbols; j++) {
1141/// if (depths[symbols[j]] < depths[symbols[i]]) {
1142/// std::swap(symbols[j], symbols[i]);
1143/// }
1144/// }
1145/// }
1146/// // Write symbols
1147/// for (size_t i = 0; i < num_symbols; i++) {
1148/// writer->Write(max_bits, symbols[i]);
1149/// }
1150/// // tree_select for 4 symbols
1151/// if (num_symbols == 4) {
1152/// writer->Write(1, depths[symbols[0]] == 1 ? 1 : 0);
1153/// }
1154/// }
1155/// ```
1156fn store_simple_huffman_tree(
1157 depths: &[u8],
1158 symbols: &mut [usize; 4],
1159 num_symbols: usize,
1160 max_bits: usize,
1161 writer: &mut BitWriter,
1162) -> Result<()> {
1163 // Write simple code marker
1164 writer.write(2, 1)?;
1165 writer.write(2, (num_symbols - 1) as u64)?;
1166
1167 // Sort symbols by depth (ascending)
1168 for i in 0..num_symbols {
1169 for j in i + 1..num_symbols {
1170 if depths[symbols[j]] < depths[symbols[i]] {
1171 symbols.swap(i, j);
1172 }
1173 }
1174 }
1175
1176 // Write symbol indices
1177 for &sym in symbols.iter().take(num_symbols) {
1178 writer.write(max_bits, sym as u64)?;
1179 }
1180
1181 // tree_select for 4 symbols
1182 if num_symbols == 4 {
1183 let tree_select = if depths[symbols[0]] == 1 { 1 } else { 0 };
1184 writer.write(1, tree_select)?;
1185 }
1186
1187 Ok(())
1188}
1189
1190// ============================================================================
1191// Tests
1192// ============================================================================
1193
1194#[cfg(test)]
1195mod tests {
1196 use super::*;
1197
1198 #[test]
1199 fn test_create_huffman_tree_single_symbol() {
1200 let histogram = [0, 0, 100, 0, 0];
1201 let depths = create_huffman_tree(&histogram, 15);
1202 assert_eq!(depths, vec![0, 0, 1, 0, 0]);
1203 }
1204
1205 #[test]
1206 fn test_create_huffman_tree_two_symbols() {
1207 let histogram = [50, 0, 50, 0, 0];
1208 let depths = create_huffman_tree(&histogram, 15);
1209 // Both symbols should have depth 1
1210 assert_eq!(depths[0], 1);
1211 assert_eq!(depths[2], 1);
1212 }
1213
1214 #[test]
1215 fn test_create_huffman_tree_four_symbols() {
1216 let histogram = [10, 20, 30, 40, 0];
1217 let depths = create_huffman_tree(&histogram, 15);
1218 // Most frequent (40) should have shortest depth
1219 assert!(depths[3] <= depths[2]);
1220 assert!(depths[2] <= depths[1]);
1221 assert!(depths[1] <= depths[0]);
1222 }
1223
1224 #[test]
1225 fn test_create_huffman_tree_many_symbols() {
1226 // 10 symbols with varying frequencies
1227 let histogram = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512];
1228 let depths = create_huffman_tree(&histogram, 15);
1229
1230 // All non-zero symbols should have non-zero depth
1231 for (i, &d) in depths.iter().enumerate() {
1232 assert!(d > 0, "Symbol {} should have non-zero depth", i);
1233 }
1234
1235 // More frequent symbols should have shorter or equal depth
1236 for i in 1..10 {
1237 assert!(
1238 depths[i] <= depths[i - 1],
1239 "Symbol {} (freq {}) should have depth <= symbol {} (freq {})",
1240 i,
1241 histogram[i],
1242 i - 1,
1243 histogram[i - 1]
1244 );
1245 }
1246 }
1247
1248 #[test]
1249 fn test_create_huffman_tree_depth_limit() {
1250 // Many symbols with same frequency - could create deep tree
1251 let histogram = vec![1u32; 100];
1252 let depths = create_huffman_tree(&histogram, 15);
1253
1254 // All depths should be <= 15
1255 for &d in &depths {
1256 assert!(d <= 15, "Depth {} exceeds limit of 15", d);
1257 }
1258 }
1259
1260 #[test]
1261 fn test_reverse_bits() {
1262 assert_eq!(reverse_bits(1, 0), 0);
1263 assert_eq!(reverse_bits(1, 1), 1);
1264 assert_eq!(reverse_bits(2, 0b01), 0b10);
1265 assert_eq!(reverse_bits(2, 0b10), 0b01);
1266 assert_eq!(reverse_bits(3, 0b001), 0b100);
1267 assert_eq!(reverse_bits(4, 0b0001), 0b1000);
1268 assert_eq!(reverse_bits(8, 0b10101010), 0b01010101);
1269 }
1270
1271 #[test]
1272 fn test_convert_bit_depths_to_symbols() {
1273 // Two symbols at depth 1
1274 let depths = [1, 1, 0];
1275 let codes = convert_bit_depths_to_symbols(&depths);
1276 // Should get codes 0 and 1 (reversed)
1277 assert_eq!(codes[0], 0);
1278 assert_eq!(codes[1], 1);
1279 }
1280
1281 #[test]
1282 fn test_convert_bit_depths_three_symbols() {
1283 // Three symbols: depths 1, 2, 2
1284 let depths = [1, 2, 2, 0];
1285 let codes = convert_bit_depths_to_symbols(&depths);
1286 // Symbol 0: depth 1, code 0
1287 // Symbol 1: depth 2, code 10 -> reversed = 01
1288 // Symbol 2: depth 2, code 11 -> reversed = 11
1289 assert_eq!(codes[0], 0b0);
1290 assert_eq!(codes[1], 0b01);
1291 assert_eq!(codes[2], 0b11);
1292 }
1293
1294 #[test]
1295 fn test_write_huffman_tree_simple() {
1296 // Simple depth array
1297 let depths = [2, 2, 0, 0];
1298 let tree = write_huffman_tree(&depths);
1299 // Should just have two 2s
1300 assert_eq!(tree.codes, vec![2, 2]);
1301 assert!(tree.extra_bits.iter().all(|&x| x == 0));
1302 }
1303
1304 #[test]
1305 fn test_write_huffman_tree_with_zeros() {
1306 let depths = [3, 0, 0, 0, 3];
1307 let tree = write_huffman_tree(&depths);
1308 // Should encode 3, zeros (maybe RLE), 3
1309 // Note: trailing zeros are stripped, so we only go to index 4
1310 assert!(!tree.codes.is_empty());
1311 }
1312
1313 #[test]
1314 fn test_write_huffman_tree_long_zeros() {
1315 // Many zeros in the middle - should trigger RLE
1316 let mut depths = vec![0u8; 100];
1317 depths[0] = 3;
1318 depths[99] = 3;
1319 let tree = write_huffman_tree(&depths);
1320 // Should use code 17 for zero runs
1321 assert!(tree.codes.contains(&17));
1322 }
1323
1324 #[test]
1325 fn test_build_and_store_single_symbol() {
1326 let histogram = [0, 0, 100, 0, 0];
1327 let mut writer = BitWriter::new();
1328 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1329
1330 // Single symbol should have depth 0
1331 assert_eq!(table.depths[2], 0);
1332
1333 // Check output format: 4 bits = 0001, then max_bits for symbol
1334 let bytes = writer.finish_with_padding();
1335 assert!(!bytes.is_empty());
1336 }
1337
1338 #[test]
1339 fn test_build_and_store_two_symbols() {
1340 let histogram = [10, 0, 20, 0, 0];
1341 let mut writer = BitWriter::new();
1342 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1343
1344 // Both should have depth 1
1345 assert_eq!(table.depths[0], 1);
1346 assert_eq!(table.depths[2], 1);
1347
1348 let bytes = writer.finish_with_padding();
1349 // Should start with simple code marker (bits: 01)
1350 assert!(!bytes.is_empty());
1351 }
1352
1353 #[test]
1354 fn test_build_and_store_many_symbols() {
1355 // 10 symbols - requires full code length table
1356 let histogram: Vec<u32> = (1..=10).map(|x| x * 10).collect();
1357 let mut writer = BitWriter::new();
1358 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1359
1360 // All symbols should have non-zero depth
1361 for i in 0..10 {
1362 assert!(table.depths[i] > 0);
1363 }
1364
1365 let bytes = writer.finish_with_padding();
1366 assert!(!bytes.is_empty());
1367 }
1368
1369 #[test]
1370 fn test_build_and_store_256_symbols() {
1371 // Full byte alphabet with varying frequencies
1372 let mut histogram = vec![0u32; 256];
1373 for (i, h) in histogram.iter_mut().enumerate() {
1374 *h = (i as u32 + 1) * 2;
1375 }
1376
1377 let mut writer = BitWriter::new();
1378 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1379
1380 // All symbols should have valid depth
1381 for i in 0..256 {
1382 assert!(table.depths[i] > 0 && table.depths[i] <= 15);
1383 }
1384
1385 let bytes = writer.finish_with_padding();
1386 assert!(!bytes.is_empty());
1387 }
1388
1389 #[test]
1390 fn test_canonical_codes_are_prefix_free() {
1391 let histogram: Vec<u32> = (1..=20).map(|x| x * 5).collect();
1392 let depths = create_huffman_tree(&histogram, 15);
1393 let codes = convert_bit_depths_to_symbols(&depths);
1394
1395 // Verify no code is a prefix of another
1396 for i in 0..20 {
1397 if depths[i] == 0 {
1398 continue;
1399 }
1400 for j in i + 1..20 {
1401 if depths[j] == 0 {
1402 continue;
1403 }
1404 let (shorter, longer) = if depths[i] <= depths[j] {
1405 ((codes[i], depths[i]), (codes[j], depths[j]))
1406 } else {
1407 ((codes[j], depths[j]), (codes[i], depths[i]))
1408 };
1409
1410 // Extract prefix of longer code
1411 let prefix_mask = (1u16 << shorter.1) - 1;
1412 let longer_prefix = longer.0 & prefix_mask;
1413
1414 assert_ne!(
1415 shorter.0, longer_prefix,
1416 "Code {} (depth {}) is prefix of code {} (depth {})",
1417 shorter.0, shorter.1, longer.0, longer.1
1418 );
1419 }
1420 }
1421 }
1422}