jxl_encoder/entropy_coding/huffman_tree.rs
1// Copyright (c) Imazen LLC and the JPEG XL Project Authors.
2// Algorithms and constants derived from libjxl (BSD-3-Clause).
3// Licensed under AGPL-3.0-or-later. Commercial licenses at https://www.imazen.io/pricing
4
5//! Full Huffman tree encoder supporting arbitrary alphabet sizes.
6//!
7//! This module ports libjxl's `enc_huffman_tree.cc` and parts of `enc_huffman.cc`.
8//! It supports encoding Huffman tables with more than 4 symbols using the
9//! code length table format with RLE compression.
10//!
11//! # Architecture
12//! 1. `create_huffman_tree` - builds optimal tree, outputs depth array
13//! 2. `convert_bit_depths_to_symbols` - converts depths to canonical codes
14//! 3. `write_huffman_tree` - RLE-compresses depth array for storage
15//! 4. `store_huffman_tree` - writes compressed tree to bitstream
16
17use crate::bit_writer::BitWriter;
18use crate::error::Result;
19
20/// Number of code length codes (0-15 for depths, 16-17 for RLE).
21const CODE_LENGTH_CODES: usize = 18;
22
23/// Maximum code depth for regular Huffman codes.
24const MAX_CODE_DEPTH: u8 = 15;
25
26/// Maximum code depth for code length codes (meta-Huffman).
27const MAX_CODE_LENGTH_DEPTH: u8 = 5;
28
29/// Order in which code length codes are stored.
30/// This ordering groups commonly-used codes together to allow truncation.
31///
32/// ```cpp
33/// // libjxl: enc_huffman.cc
34/// static const uint8_t kStorageOrder[kCodeLengthCodes] = {
35/// 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15};
36/// ```
37const STORAGE_ORDER: [usize; CODE_LENGTH_CODES] =
38 [1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15];
39
40/// Static Huffman code for encoding code length code depths.
41/// Maps depth values 0-5 to their codes.
42///
43/// ```cpp
44/// // libjxl: enc_huffman.cc
45/// // Symbol Code
46/// // ------ ----
47/// // 0 00
48/// // 1 1110
49/// // 2 110
50/// // 3 01
51/// // 4 10
52/// // 5 1111
53/// static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {0, 7, 3, 2, 1, 15};
54/// static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {2, 4, 3, 2, 2, 4};
55/// ```
56const DEPTH_CODE_SYMBOLS: [u8; 6] = [0, 7, 3, 2, 1, 15];
57const DEPTH_CODE_BIT_LENGTHS: [u8; 6] = [2, 4, 3, 2, 2, 4];
58
59// ============================================================================
60// Huffman Tree Building
61// ============================================================================
62
63/// Node in the Huffman tree during construction.
64///
65/// ```cpp
66/// // libjxl: enc_huffman_tree.h
67/// struct HuffmanTree {
68/// uint32_t total_count;
69/// int16_t index_left;
70/// int16_t index_right_or_value;
71/// };
72/// ```
73#[derive(Clone, Copy, Debug)]
74struct HuffmanNode {
75 /// Total count (frequency) of this subtree.
76 total_count: u32,
77 /// Index of left child, or -1 if leaf.
78 index_left: i32,
79 /// Index of right child, or symbol value if leaf.
80 /// Note: libjxl uses i16, but we need i32 for alphabets > 32K symbols.
81 index_right_or_value: i32,
82}
83
84impl HuffmanNode {
85 fn leaf(count: u32, symbol: usize) -> Self {
86 Self {
87 total_count: count,
88 index_left: -1,
89 index_right_or_value: symbol as i32,
90 }
91 }
92
93 fn sentinel() -> Self {
94 Self {
95 total_count: u32::MAX,
96 index_left: -1,
97 index_right_or_value: -1,
98 }
99 }
100
101 #[allow(dead_code)]
102 fn is_leaf(&self) -> bool {
103 self.index_left < 0
104 }
105}
106
107/// Recursively set depth for all symbols in a subtree.
108///
109/// ```cpp
110/// // libjxl: enc_huffman_tree.cc
111/// void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth, uint8_t level) {
112/// if (p.index_left >= 0) {
113/// ++level;
114/// SetDepth(pool[p.index_left], pool, depth, level);
115/// SetDepth(pool[p.index_right_or_value], pool, depth, level);
116/// } else {
117/// depth[p.index_right_or_value] = level;
118/// }
119/// }
120/// ```
121fn set_depth(node: &HuffmanNode, pool: &[HuffmanNode], depth: &mut [u8], level: u8) {
122 if node.index_left >= 0 {
123 let next_level = level + 1;
124 set_depth(&pool[node.index_left as usize], pool, depth, next_level);
125 set_depth(
126 &pool[node.index_right_or_value as usize],
127 pool,
128 depth,
129 next_level,
130 );
131 } else {
132 depth[node.index_right_or_value as usize] = level;
133 }
134}
135
136/// Creates an optimal Huffman tree from symbol frequencies.
137///
138/// Returns the depth (code length) for each symbol. Symbols with zero
139/// frequency get depth 0 (meaning they don't appear in the tree).
140///
141/// The tree depth is limited to `tree_limit` bits. If the natural tree
142/// would exceed this, the algorithm retries with inflated minimum counts.
143///
144/// ```cpp
145/// // libjxl: enc_huffman_tree.cc
146/// void CreateHuffmanTree(const uint32_t* data, const size_t length,
147/// const int tree_limit, uint8_t* depth) {
148/// for (uint32_t count_limit = 1;; count_limit *= 2) {
149/// std::vector<HuffmanTree> tree;
150/// tree.reserve(2 * length + 1);
151/// // ... build tree ...
152/// if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
153/// break;
154/// }
155/// }
156/// }
157/// ```
158pub fn create_huffman_tree(histogram: &[u32], tree_limit: u8) -> Vec<u8> {
159 let length = histogram.len();
160 let mut depth = vec![0u8; length];
161
162 // Retry with increasing count_limit until tree fits in tree_limit bits
163 let mut count_limit: u32 = 1;
164 loop {
165 // Build list of leaf nodes for non-zero symbols
166 // Process in reverse order so that ties favor lower symbol indices
167 let mut tree: Vec<HuffmanNode> = Vec::with_capacity(2 * length + 1);
168 for i in (0..length).rev() {
169 if histogram[i] > 0 {
170 // Clamp count to at least count_limit - 1 to limit tree depth
171 let count = histogram[i].max(count_limit.saturating_sub(1));
172 tree.push(HuffmanNode::leaf(count, i));
173 }
174 }
175
176 let n = tree.len();
177 if n == 0 {
178 // No symbols - return all zeros
179 return depth;
180 }
181 if n == 1 {
182 // Single symbol - give it depth 1 (will be corrected by caller)
183 depth[tree[0].index_right_or_value as usize] = 1;
184 return depth;
185 }
186
187 // Sort by count (ascending), then by symbol index (descending for ties)
188 tree.sort_by(|a, b| {
189 if a.total_count != b.total_count {
190 a.total_count.cmp(&b.total_count)
191 } else {
192 // Higher index first for ties (reverse order)
193 b.index_right_or_value.cmp(&a.index_right_or_value)
194 }
195 });
196
197 // Add sentinel nodes
198 tree.push(HuffmanNode::sentinel());
199 tree.push(HuffmanNode::sentinel());
200
201 // Build tree using two-queue algorithm
202 // i: next leaf node, j: next internal node
203 let mut i = 0;
204 let mut j = n + 1;
205
206 for _ in 0..n - 1 {
207 // Pick two smallest nodes
208 let left = if tree[i].total_count <= tree[j].total_count {
209 let idx = i;
210 i += 1;
211 idx
212 } else {
213 let idx = j;
214 j += 1;
215 idx
216 };
217
218 let right = if tree[i].total_count <= tree[j].total_count {
219 let idx = i;
220 i += 1;
221 idx
222 } else {
223 let idx = j;
224 j += 1;
225 idx
226 };
227
228 // Create parent node at the sentinel position
229 let j_end = tree.len() - 1;
230 tree[j_end].total_count = tree[left].total_count + tree[right].total_count;
231 tree[j_end].index_left = left as i32;
232 tree[j_end].index_right_or_value = right as i32;
233
234 // Add new sentinel
235 tree.push(HuffmanNode::sentinel());
236 }
237
238 // Extract depths from tree
239 depth.fill(0);
240 set_depth(&tree[2 * n - 1], &tree, &mut depth, 0);
241
242 // Check if tree fits in limit
243 if depth.iter().copied().max().unwrap_or(0) <= tree_limit {
244 break;
245 }
246
247 // Tree too deep - retry with higher count_limit
248 count_limit *= 2;
249 }
250
251 depth
252}
253
254// ============================================================================
255// Bit Code Generation
256// ============================================================================
257
258/// Reverses bits in a value.
259///
260/// ```cpp
261/// // libjxl: enc_huffman_tree.cc
262/// uint16_t ReverseBits(int num_bits, uint16_t bits) {
263/// static const size_t kLut[16] = {
264/// 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
265/// 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf};
266/// size_t retval = kLut[bits & 0xf];
267/// for (int i = 4; i < num_bits; i += 4) {
268/// retval <<= 4;
269/// bits = static_cast<uint16_t>(bits >> 4);
270/// retval |= kLut[bits & 0xf];
271/// }
272/// retval >>= (-num_bits & 0x3);
273/// return static_cast<uint16_t>(retval);
274/// }
275/// ```
276fn reverse_bits(num_bits: u8, bits: u16) -> u16 {
277 const LUT: [u16; 16] = [
278 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
279 ];
280
281 let mut retval = LUT[(bits & 0xf) as usize];
282 let mut bits = bits;
283 let mut i = 4i32;
284 while i < num_bits as i32 {
285 retval <<= 4;
286 bits >>= 4;
287 retval |= LUT[(bits & 0xf) as usize];
288 i += 4;
289 }
290 retval >>= (-(num_bits as i32) & 0x3) as u32;
291 retval
292}
293
294/// Converts code depths to canonical Huffman codes.
295///
296/// Returns (codes, depths) where codes[i] is the bit pattern for symbol i.
297///
298/// ```cpp
299/// // libjxl: enc_huffman_tree.cc
300/// void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, uint16_t* bits) {
301/// const int kMaxBits = 16;
302/// uint16_t bl_count[kMaxBits] = {0};
303/// for (size_t i = 0; i < len; ++i) {
304/// ++bl_count[depth[i]];
305/// }
306/// bl_count[0] = 0;
307/// uint16_t next_code[kMaxBits];
308/// next_code[0] = 0;
309/// int code = 0;
310/// for (size_t i = 1; i < kMaxBits; ++i) {
311/// code = (code + bl_count[i - 1]) << 1;
312/// next_code[i] = static_cast<uint16_t>(code);
313/// }
314/// for (size_t i = 0; i < len; ++i) {
315/// if (depth[i]) {
316/// bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
317/// }
318/// }
319/// }
320/// ```
321pub fn convert_bit_depths_to_symbols(depth: &[u8]) -> Vec<u16> {
322 const MAX_BITS: usize = 16;
323 let len = depth.len();
324 let mut bits = vec![0u16; len];
325
326 // Count symbols at each depth
327 let mut bl_count = [0u16; MAX_BITS];
328 for &d in depth {
329 bl_count[d as usize] += 1;
330 }
331 bl_count[0] = 0; // Depth 0 means symbol doesn't exist
332
333 // Compute first code at each depth
334 let mut next_code = [0u16; MAX_BITS];
335 let mut code = 0i32;
336 for i in 1..MAX_BITS {
337 code = (code + bl_count[i - 1] as i32) << 1;
338 next_code[i] = code as u16;
339 }
340
341 // Assign codes to symbols
342 for i in 0..len {
343 let d = depth[i];
344 if d > 0 {
345 bits[i] = reverse_bits(d, next_code[d as usize]);
346 next_code[d as usize] += 1;
347 }
348 }
349
350 bits
351}
352
353// ============================================================================
354// RLE Compression of Code Lengths
355// ============================================================================
356
357/// RLE-compressed representation of a Huffman tree.
358#[derive(Debug, Clone)]
359pub struct CompressedTree {
360 /// Code length codes (0-17)
361 pub codes: Vec<u8>,
362 /// Extra bits for each code (used by codes 16 and 17)
363 pub extra_bits: Vec<u8>,
364}
365
366/// Reverse a slice in place.
367fn reverse_slice(v: &mut [u8], start: usize, end: usize) {
368 let mut s = start;
369 let mut e = end - 1;
370 while s < e {
371 v.swap(s, e);
372 s += 1;
373 e -= 1;
374 }
375}
376
377/// Write RLE-encoded repetitions of zeros.
378///
379/// ```cpp
380/// // libjxl: enc_huffman_tree.cc
381/// void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size,
382/// uint8_t* tree, uint8_t* extra_bits_data) {
383/// if (repetitions == 11) {
384/// tree[*tree_size] = 0;
385/// extra_bits_data[*tree_size] = 0;
386/// ++(*tree_size);
387/// --repetitions;
388/// }
389/// if (repetitions < 3) {
390/// for (size_t i = 0; i < repetitions; ++i) {
391/// tree[*tree_size] = 0;
392/// extra_bits_data[*tree_size] = 0;
393/// ++(*tree_size);
394/// }
395/// } else {
396/// repetitions -= 3;
397/// size_t start = *tree_size;
398/// while (true) {
399/// tree[*tree_size] = 17;
400/// extra_bits_data[*tree_size] = repetitions & 0x7;
401/// ++(*tree_size);
402/// repetitions >>= 3;
403/// if (repetitions == 0) {
404/// break;
405/// }
406/// --repetitions;
407/// }
408/// Reverse(tree, start, *tree_size);
409/// Reverse(extra_bits_data, start, *tree_size);
410/// }
411/// }
412/// ```
413fn write_repetitions_zeros(mut repetitions: usize, tree: &mut CompressedTree) {
414 // Special case: 11 reps would need code 17 with extra=8, but max extra is 7
415 if repetitions == 11 {
416 tree.codes.push(0);
417 tree.extra_bits.push(0);
418 repetitions -= 1;
419 }
420
421 if repetitions < 3 {
422 // Write individual zeros
423 for _ in 0..repetitions {
424 tree.codes.push(0);
425 tree.extra_bits.push(0);
426 }
427 } else {
428 // Use code 17: repeat zeros with 3 extra bits (3-10 reps)
429 repetitions -= 3;
430 let start = tree.codes.len();
431 loop {
432 tree.codes.push(17);
433 tree.extra_bits.push((repetitions & 0x7) as u8);
434 repetitions >>= 3;
435 if repetitions == 0 {
436 break;
437 }
438 repetitions -= 1;
439 }
440 // Reverse the codes we just wrote
441 let end = tree.codes.len();
442 reverse_slice(&mut tree.codes, start, end);
443 reverse_slice(&mut tree.extra_bits, start, end);
444 }
445}
446
447/// Write RLE-encoded repetitions of non-zero values.
448///
449/// ```cpp
450/// // libjxl: enc_huffman_tree.cc
451/// void WriteHuffmanTreeRepetitions(const uint8_t previous_value,
452/// const uint8_t value, size_t repetitions,
453/// size_t* tree_size, uint8_t* tree,
454/// uint8_t* extra_bits_data) {
455/// JXL_DASSERT(repetitions > 0);
456/// if (previous_value != value) {
457/// tree[*tree_size] = value;
458/// extra_bits_data[*tree_size] = 0;
459/// ++(*tree_size);
460/// --repetitions;
461/// }
462/// if (repetitions == 7) {
463/// tree[*tree_size] = value;
464/// extra_bits_data[*tree_size] = 0;
465/// ++(*tree_size);
466/// --repetitions;
467/// }
468/// if (repetitions < 3) {
469/// for (size_t i = 0; i < repetitions; ++i) {
470/// tree[*tree_size] = value;
471/// extra_bits_data[*tree_size] = 0;
472/// ++(*tree_size);
473/// }
474/// } else {
475/// repetitions -= 3;
476/// size_t start = *tree_size;
477/// while (true) {
478/// tree[*tree_size] = 16;
479/// extra_bits_data[*tree_size] = repetitions & 0x3;
480/// ++(*tree_size);
481/// repetitions >>= 2;
482/// if (repetitions == 0) {
483/// break;
484/// }
485/// --repetitions;
486/// }
487/// Reverse(tree, start, *tree_size);
488/// Reverse(extra_bits_data, start, *tree_size);
489/// }
490/// }
491/// ```
492fn write_repetitions_nonzero(
493 previous_value: u8,
494 value: u8,
495 mut repetitions: usize,
496 tree: &mut CompressedTree,
497) {
498 debug_assert!(repetitions > 0);
499
500 // If value changed, write it literally first
501 if previous_value != value {
502 tree.codes.push(value);
503 tree.extra_bits.push(0);
504 repetitions -= 1;
505 }
506
507 // Special case: 7 reps would need code 16 with extra=4, but max extra is 3
508 if repetitions == 7 {
509 tree.codes.push(value);
510 tree.extra_bits.push(0);
511 repetitions -= 1;
512 }
513
514 if repetitions < 3 {
515 // Write individual values
516 for _ in 0..repetitions {
517 tree.codes.push(value);
518 tree.extra_bits.push(0);
519 }
520 } else {
521 // Use code 16: repeat previous with 2 extra bits (3-6 reps)
522 repetitions -= 3;
523 let start = tree.codes.len();
524 loop {
525 tree.codes.push(16);
526 tree.extra_bits.push((repetitions & 0x3) as u8);
527 repetitions >>= 2;
528 if repetitions == 0 {
529 break;
530 }
531 repetitions -= 1;
532 }
533 // Reverse the codes we just wrote
534 let end = tree.codes.len();
535 reverse_slice(&mut tree.codes, start, end);
536 reverse_slice(&mut tree.extra_bits, start, end);
537 }
538}
539
540/// Decide whether to use RLE for zeros and non-zeros.
541///
542/// ```cpp
543/// // libjxl: enc_huffman_tree.cc
544/// static void DecideOverRleUse(const uint8_t* depth, const size_t length,
545/// bool* use_rle_for_non_zero, bool* use_rle_for_zero) {
546/// size_t total_reps_zero = 0;
547/// size_t total_reps_non_zero = 0;
548/// size_t count_reps_zero = 1;
549/// size_t count_reps_non_zero = 1;
550/// for (size_t i = 0; i < length;) {
551/// const uint8_t value = depth[i];
552/// size_t reps = 1;
553/// for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
554/// ++reps;
555/// }
556/// if (reps >= 3 && value == 0) {
557/// total_reps_zero += reps;
558/// ++count_reps_zero;
559/// }
560/// if (reps >= 4 && value != 0) {
561/// total_reps_non_zero += reps;
562/// ++count_reps_non_zero;
563/// }
564/// i += reps;
565/// }
566/// *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
567/// *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
568/// }
569/// ```
570fn decide_rle_use(depth: &[u8]) -> (bool, bool) {
571 let mut total_reps_zero = 0usize;
572 let mut total_reps_non_zero = 0usize;
573 let mut count_reps_zero = 1usize;
574 let mut count_reps_non_zero = 1usize;
575
576 let mut i = 0;
577 while i < depth.len() {
578 let value = depth[i];
579 let mut reps = 1usize;
580 while i + reps < depth.len() && depth[i + reps] == value {
581 reps += 1;
582 }
583 if reps >= 3 && value == 0 {
584 total_reps_zero += reps;
585 count_reps_zero += 1;
586 }
587 if reps >= 4 && value != 0 {
588 total_reps_non_zero += reps;
589 count_reps_non_zero += 1;
590 }
591 i += reps;
592 }
593
594 let use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
595 let use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
596 (use_rle_for_non_zero, use_rle_for_zero)
597}
598
599/// Compresses a depth array into RLE-encoded code length codes.
600///
601/// ```cpp
602/// // libjxl: enc_huffman_tree.cc
603/// void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
604/// uint8_t* tree, uint8_t* extra_bits_data) {
605/// uint8_t previous_value = 8;
606/// // Throw away trailing zeros.
607/// size_t new_length = length;
608/// for (size_t i = 0; i < length; ++i) {
609/// if (depth[length - i - 1] == 0) {
610/// --new_length;
611/// } else {
612/// break;
613/// }
614/// }
615/// // First gather statistics on if it is a good idea to do rle.
616/// bool use_rle_for_non_zero = false;
617/// bool use_rle_for_zero = false;
618/// if (length > 50) {
619/// DecideOverRleUse(depth, new_length, &use_rle_for_non_zero, &use_rle_for_zero);
620/// }
621/// // Actual rle coding.
622/// for (size_t i = 0; i < new_length;) {
623/// const uint8_t value = depth[i];
624/// size_t reps = 1;
625/// if ((value != 0 && use_rle_for_non_zero) || (value == 0 && use_rle_for_zero)) {
626/// for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
627/// ++reps;
628/// }
629/// }
630/// if (value == 0) {
631/// WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
632/// } else {
633/// WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree, extra_bits_data);
634/// previous_value = value;
635/// }
636/// i += reps;
637/// }
638/// }
639/// ```
640pub fn write_huffman_tree(depth: &[u8]) -> CompressedTree {
641 let mut tree = CompressedTree {
642 codes: Vec::new(),
643 extra_bits: Vec::new(),
644 };
645
646 // Throw away trailing zeros
647 let mut new_length = depth.len();
648 while new_length > 0 && depth[new_length - 1] == 0 {
649 new_length -= 1;
650 }
651
652 if new_length == 0 {
653 return tree;
654 }
655
656 // Decide whether to use RLE (only for longer sequences)
657 let (use_rle_for_non_zero, use_rle_for_zero) = if depth.len() > 50 {
658 decide_rle_use(&depth[..new_length])
659 } else {
660 (false, false)
661 };
662
663 // Encode with RLE
664 let mut previous_value = 8u8; // Initial "previous" value (won't match any real depth)
665 let mut i = 0;
666 while i < new_length {
667 let value = depth[i];
668 let mut reps = 1usize;
669
670 // Count repetitions if RLE is enabled for this value type
671 if (value != 0 && use_rle_for_non_zero) || (value == 0 && use_rle_for_zero) {
672 while i + reps < new_length && depth[i + reps] == value {
673 reps += 1;
674 }
675 }
676
677 if value == 0 {
678 write_repetitions_zeros(reps, &mut tree);
679 } else {
680 write_repetitions_nonzero(previous_value, value, reps, &mut tree);
681 previous_value = value;
682 }
683 i += reps;
684 }
685
686 tree
687}
688
689// ============================================================================
690// Bitstream Encoding
691// ============================================================================
692
693/// Writes the meta-Huffman tree (tree of tree) to the bitstream.
694///
695/// This encodes which code length codes (0-17) are used and their depths.
696///
697/// ```cpp
698/// // libjxl: enc_huffman.cc
699/// void StoreHuffmanTreeOfHuffmanTreeToBitMask(const int num_codes,
700/// const uint8_t* code_length_bitdepth,
701/// BitWriter* writer) {
702/// // Throw away trailing zeros:
703/// size_t codes_to_store = kCodeLengthCodes;
704/// if (num_codes > 1) {
705/// for (; codes_to_store > 0; --codes_to_store) {
706/// if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
707/// break;
708/// }
709/// }
710/// }
711/// size_t skip_some = 0;
712/// if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
713/// code_length_bitdepth[kStorageOrder[1]] == 0) {
714/// skip_some = 2;
715/// if (code_length_bitdepth[kStorageOrder[2]] == 0) {
716/// skip_some = 3;
717/// }
718/// }
719/// writer->Write(2, skip_some);
720/// for (size_t i = skip_some; i < codes_to_store; ++i) {
721/// size_t l = code_length_bitdepth[kStorageOrder[i]];
722/// writer->Write(kHuffmanBitLengthHuffmanCodeBitLengths[l],
723/// kHuffmanBitLengthHuffmanCodeSymbols[l]);
724/// }
725/// }
726/// ```
727pub fn store_meta_huffman_tree(
728 num_codes: usize,
729 code_length_depths: &[u8; CODE_LENGTH_CODES],
730 writer: &mut BitWriter,
731) -> Result<()> {
732 // Find how many codes to store (trim trailing zeros in storage order)
733 let mut codes_to_store = CODE_LENGTH_CODES;
734 if num_codes > 1 {
735 while codes_to_store > 0 && code_length_depths[STORAGE_ORDER[codes_to_store - 1]] == 0 {
736 codes_to_store -= 1;
737 }
738 }
739
740 // Determine skip count (how many leading zeros in storage order)
741 let mut skip_some = 0usize;
742 if code_length_depths[STORAGE_ORDER[0]] == 0 && code_length_depths[STORAGE_ORDER[1]] == 0 {
743 skip_some = 2;
744 if code_length_depths[STORAGE_ORDER[2]] == 0 {
745 skip_some = 3;
746 }
747 }
748
749 crate::trace::debug_eprintln!(
750 "STORE_META: codes_to_store={}, skip_some={}",
751 codes_to_store,
752 skip_some
753 );
754
755 // Write skip count (2 bits) - this is the simple_code_or_skip field
756 // Values 0, 2, 3 indicate full tree with skip; value 1 would be simple code
757 let _bit_pos_start = writer.bits_written();
758 writer.write(2, skip_some as u64)?;
759 crate::trace::debug_eprintln!(
760 " META: wrote hskip={} at bit {}",
761 skip_some,
762 _bit_pos_start
763 );
764
765 // Write code length depths using static Huffman code
766 // The decoder reads code lengths for symbols in storage order
767 let mut _bitacc = 0usize;
768 #[allow(clippy::unused_enumerate_index)]
769 for (_idx, &symbol) in STORAGE_ORDER[skip_some..codes_to_store].iter().enumerate() {
770 let depth_value = code_length_depths[symbol] as usize;
771 debug_assert!(depth_value <= 5, "Code length depth must be 0-5");
772 let bits = DEPTH_CODE_BIT_LENGTHS[depth_value] as usize;
773 let code = DEPTH_CODE_SYMBOLS[depth_value] as u64;
774 writer.write(bits, code)?;
775 if depth_value > 0 {
776 _bitacc += 32 >> depth_value;
777 }
778 crate::trace::debug_eprintln!(
779 " META[{}]: symbol={}, depth={}, code=0b{:0width$b} ({} bits), bitacc={}",
780 skip_some + _idx,
781 symbol,
782 depth_value,
783 code,
784 bits,
785 _bitacc,
786 width = bits
787 );
788 }
789 crate::trace::debug_eprintln!(" META: final bitacc={} (should be 32)", _bitacc);
790
791 Ok(())
792}
793
794/// Writes the RLE-compressed Huffman tree to the bitstream.
795///
796/// ```cpp
797/// // libjxl: enc_huffman.cc
798/// Status StoreHuffmanTreeToBitMask(const size_t huffman_tree_size,
799/// const uint8_t* huffman_tree,
800/// const uint8_t* huffman_tree_extra_bits,
801/// const uint8_t* code_length_bitdepth,
802/// const uint16_t* code_length_bitdepth_symbols,
803/// BitWriter* writer) {
804/// for (size_t i = 0; i < huffman_tree_size; ++i) {
805/// size_t ix = huffman_tree[i];
806/// writer->Write(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix]);
807/// switch (ix) {
808/// case 16: writer->Write(2, huffman_tree_extra_bits[i]); break;
809/// case 17: writer->Write(3, huffman_tree_extra_bits[i]); break;
810/// }
811/// }
812/// return true;
813/// }
814/// ```
815pub fn store_compressed_tree(
816 tree: &CompressedTree,
817 code_length_depths: &[u8; CODE_LENGTH_CODES],
818 code_length_codes: &[u16; CODE_LENGTH_CODES],
819 writer: &mut BitWriter,
820) -> Result<()> {
821 crate::trace::debug_eprintln!(" TREE: meta_codes = {:?}", &code_length_codes[..5]);
822 crate::trace::debug_eprintln!(" TREE: meta_depths = {:?}", &code_length_depths[..5]);
823
824 for i in 0..tree.codes.len().min(10) {
825 let ix = tree.codes[i] as usize;
826 debug_assert!(ix < CODE_LENGTH_CODES);
827
828 // Write the code for this code length
829 let depth = code_length_depths[ix] as usize;
830 let code = code_length_codes[ix] as u64;
831 let _bit_pos = writer.bits_written();
832 if depth > 0 {
833 writer.write(depth, code)?;
834 }
835 crate::trace::debug_eprintln!(
836 " TREE[{}]: code_len_code={}, meta_depth={}, meta_code=0b{:b}, extra_bits={}",
837 i,
838 ix,
839 depth,
840 code,
841 tree.extra_bits[i]
842 );
843
844 // Write extra bits for RLE codes
845 match ix {
846 16 => writer.write(2, tree.extra_bits[i] as u64)?,
847 17 => writer.write(3, tree.extra_bits[i] as u64)?,
848 _ => {}
849 }
850 }
851
852 // Write remaining codes silently
853 for i in 10..tree.codes.len() {
854 let ix = tree.codes[i] as usize;
855 let depth = code_length_depths[ix] as usize;
856 let code = code_length_codes[ix] as u64;
857 if depth > 0 {
858 writer.write(depth, code)?;
859 }
860 match ix {
861 16 => writer.write(2, tree.extra_bits[i] as u64)?,
862 17 => writer.write(3, tree.extra_bits[i] as u64)?,
863 _ => {}
864 }
865 }
866
867 Ok(())
868}
869
870/// Stores a full Huffman tree to the bitstream.
871///
872/// This is used when there are more than 4 unique symbols.
873///
874/// ```cpp
875/// // libjxl: enc_huffman.cc
876/// Status StoreHuffmanTree(const uint8_t* depths, size_t num, BitWriter* writer) {
877/// // Write the Huffman tree into the compact representation.
878/// auto arena = ...;
879/// uint8_t* huffman_tree = arena.data();
880/// uint8_t* huffman_tree_extra_bits = arena.data() + num;
881/// size_t huffman_tree_size = 0;
882/// WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree, huffman_tree_extra_bits);
883///
884/// // Calculate the statistics of the Huffman tree.
885/// uint32_t huffman_tree_histogram[kCodeLengthCodes] = {0};
886/// for (size_t i = 0; i < huffman_tree_size; ++i) {
887/// ++huffman_tree_histogram[huffman_tree[i]];
888/// }
889///
890/// int num_codes = 0;
891/// int code = 0;
892/// for (int i = 0; i < kCodeLengthCodes; ++i) {
893/// if (huffman_tree_histogram[i]) {
894/// if (num_codes == 0) { code = i; num_codes = 1; }
895/// else if (num_codes == 1) { num_codes = 2; break; }
896/// }
897/// }
898///
899/// // Calculate another Huffman tree for compressing the first.
900/// uint8_t code_length_bitdepth[kCodeLengthCodes] = {0};
901/// uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = {0};
902/// CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, 5, &code_length_bitdepth[0]);
903/// ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes, &code_length_bitdepth_symbols[0]);
904///
905/// StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth, writer);
906///
907/// if (num_codes == 1) {
908/// code_length_bitdepth[code] = 0;
909/// }
910///
911/// StoreHuffmanTreeToBitMask(...);
912/// return true;
913/// }
914/// ```
915pub fn store_huffman_tree(depths: &[u8], writer: &mut BitWriter) -> Result<()> {
916 // Debug: show raw depths for first and last few elements
917 let _first_10: Vec<u8> = depths.iter().take(10).copied().collect();
918 let _last_10: Vec<u8> = depths.iter().rev().take(10).rev().copied().collect();
919 crate::trace::debug_eprintln!(
920 "STORE_HUFF: depths len={}, first_10={:?}, last_10={:?}",
921 depths.len(),
922 _first_10,
923 _last_10
924 );
925
926 // RLE-compress the depth array
927 let compressed = write_huffman_tree(depths);
928 crate::trace::debug_eprintln!(
929 "STORE_HUFF: compressed codes={:?}, extra_bits={:?}",
930 compressed.codes,
931 compressed.extra_bits
932 );
933
934 // Build histogram of code length codes
935 let mut histogram = [0u32; CODE_LENGTH_CODES];
936 for &code in &compressed.codes {
937 histogram[code as usize] += 1;
938 }
939 crate::trace::debug_eprintln!("STORE_HUFF: code_length_histogram={:?}", histogram);
940
941 // Count how many distinct code length codes are used
942 let mut num_codes = 0;
943 let mut single_code = 0;
944 for (i, &count) in histogram.iter().enumerate() {
945 if count > 0 {
946 if num_codes == 0 {
947 single_code = i;
948 num_codes = 1;
949 } else if num_codes == 1 {
950 num_codes = 2;
951 break;
952 }
953 }
954 }
955
956 // Build Huffman tree for the code length codes (meta-Huffman)
957 let code_length_depths = create_huffman_tree(&histogram, MAX_CODE_LENGTH_DEPTH);
958 let mut code_length_depths_arr = [0u8; CODE_LENGTH_CODES];
959 code_length_depths_arr.copy_from_slice(&code_length_depths);
960
961 let code_length_codes_vec = convert_bit_depths_to_symbols(&code_length_depths);
962 let mut code_length_codes_arr = [0u16; CODE_LENGTH_CODES];
963 code_length_codes_arr.copy_from_slice(&code_length_codes_vec);
964
965 crate::trace::debug_eprintln!(
966 "STORE_HUFF: num_codes={}, meta_depths={:?}",
967 num_codes,
968 code_length_depths_arr
969 );
970
971 // Write meta-Huffman tree
972 store_meta_huffman_tree(num_codes, &code_length_depths_arr, writer)?;
973
974 // Special case: single code length code - set its depth to 0 for implicit encoding
975 if num_codes == 1 {
976 code_length_depths_arr[single_code] = 0;
977 }
978
979 // Write the compressed tree
980 store_compressed_tree(
981 &compressed,
982 &code_length_depths_arr,
983 &code_length_codes_arr,
984 writer,
985 )?;
986
987 Ok(())
988}
989
990// ============================================================================
991// Main Entry Point
992// ============================================================================
993
994/// Result of building a Huffman table.
995pub struct HuffmanTable {
996 /// Code depth (bit length) for each symbol.
997 pub depths: Vec<u8>,
998 /// Bit code for each symbol.
999 pub codes: Vec<u16>,
1000}
1001
1002/// Builds and stores a Huffman table from a histogram.
1003///
1004/// This is the main entry point. It handles:
1005/// - Single symbol (special 4-bit encoding)
1006/// - 2-4 symbols (simple Huffman code)
1007/// - 5+ symbols (full code length table)
1008///
1009/// ```cpp
1010/// // libjxl: enc_huffman.cc
1011/// Status BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length,
1012/// uint8_t* depth, uint16_t* bits, BitWriter* writer) {
1013/// size_t count = 0;
1014/// size_t s4[4] = {0};
1015/// for (size_t i = 0; i < length; i++) {
1016/// if (histogram[i]) {
1017/// if (count < 4) { s4[count] = i; }
1018/// else if (count > 4) { break; }
1019/// count++;
1020/// }
1021/// }
1022///
1023/// size_t max_bits = 0;
1024/// size_t max_bits_counter = length - 1;
1025/// while (max_bits_counter) { max_bits_counter >>= 1; ++max_bits; }
1026///
1027/// if (count <= 1) {
1028/// writer->Write(4, 1);
1029/// writer->Write(max_bits, s4[0]);
1030/// return true;
1031/// }
1032///
1033/// CreateHuffmanTree(histogram, length, 15, depth);
1034/// ConvertBitDepthsToSymbols(depth, length, bits);
1035///
1036/// if (count <= 4) {
1037/// StoreSimpleHuffmanTree(depth, s4, count, max_bits, writer);
1038/// } else {
1039/// JXL_RETURN_IF_ERROR(StoreHuffmanTree(depth, length, writer));
1040/// }
1041/// return true;
1042/// }
1043/// ```
1044pub fn build_and_store_huffman_tree(
1045 histogram: &[u32],
1046 writer: &mut BitWriter,
1047) -> Result<HuffmanTable> {
1048 let length = histogram.len();
1049
1050 // Debug: print non-zero symbols
1051 let nonzero: Vec<(usize, u32)> = histogram
1052 .iter()
1053 .enumerate()
1054 .filter(|&(_, h)| *h > 0)
1055 .map(|(i, h)| (i, *h))
1056 .collect();
1057 crate::trace::debug_eprintln!(
1058 "HUFFMAN_BUILD: alphabet_size={}, nonzero_symbols={:?}",
1059 length,
1060 nonzero
1061 );
1062
1063 // Count non-zero symbols and track first 4
1064 let mut count = 0usize;
1065 let mut s4 = [0usize; 4];
1066 for (i, &h) in histogram.iter().enumerate() {
1067 if h > 0 {
1068 if count < 4 {
1069 s4[count] = i;
1070 }
1071 count += 1;
1072 if count > 4 {
1073 break; // We know it's >4, that's all we need
1074 }
1075 }
1076 }
1077
1078 // Calculate max_bits = ceil(log2(length))
1079 let max_bits = if length <= 1 {
1080 0
1081 } else {
1082 (usize::BITS - (length - 1).leading_zeros()) as usize
1083 };
1084
1085 // Single symbol case
1086 if count <= 1 {
1087 let mut depths = vec![0u8; length];
1088 let codes = vec![0u16; length];
1089 // Write: 4 bits = 0001, then max_bits for symbol
1090 writer.write(4, 1)?;
1091 writer.write(max_bits, s4[0] as u64)?;
1092 // Single symbol has depth 0 (implicit, no bits needed to encode)
1093 if count == 1 {
1094 depths[s4[0]] = 0;
1095 }
1096 return Ok(HuffmanTable { depths, codes });
1097 }
1098
1099 // Build optimal Huffman tree
1100 let depths = create_huffman_tree(histogram, MAX_CODE_DEPTH);
1101 let codes = convert_bit_depths_to_symbols(&depths);
1102
1103 // Debug: print depths for non-zero symbols
1104 let _depths_info: Vec<(usize, u8, u16)> = nonzero
1105 .iter()
1106 .map(|(i, _)| (*i, depths[*i], codes[*i]))
1107 .collect();
1108 crate::trace::debug_eprintln!(
1109 "HUFFMAN_BUILD: depths/codes for used symbols: {:?}",
1110 _depths_info
1111 );
1112
1113 if count <= 4 {
1114 // Simple Huffman code
1115 crate::trace::debug_eprintln!("HUFFMAN_BUILD: using simple code for {} symbols", count);
1116 store_simple_huffman_tree(&depths, &mut s4, count, max_bits, writer)?;
1117 } else {
1118 // Full code length table
1119 crate::trace::debug_eprintln!("HUFFMAN_BUILD: using full table for {} symbols", count);
1120 store_huffman_tree(&depths, writer)?;
1121 }
1122
1123 Ok(HuffmanTable { depths, codes })
1124}
1125
1126/// Stores a simple Huffman tree (1-4 symbols).
1127///
1128/// ```cpp
1129/// // libjxl: enc_huffman.cc
1130/// void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4],
1131/// size_t num_symbols, size_t max_bits, BitWriter* writer) {
1132/// writer->Write(2, 1); // simple code marker
1133/// writer->Write(2, num_symbols - 1); // NSYM - 1
1134/// // Sort by depth
1135/// for (size_t i = 0; i < num_symbols; i++) {
1136/// for (size_t j = i + 1; j < num_symbols; j++) {
1137/// if (depths[symbols[j]] < depths[symbols[i]]) {
1138/// std::swap(symbols[j], symbols[i]);
1139/// }
1140/// }
1141/// }
1142/// // Write symbols
1143/// for (size_t i = 0; i < num_symbols; i++) {
1144/// writer->Write(max_bits, symbols[i]);
1145/// }
1146/// // tree_select for 4 symbols
1147/// if (num_symbols == 4) {
1148/// writer->Write(1, depths[symbols[0]] == 1 ? 1 : 0);
1149/// }
1150/// }
1151/// ```
1152fn store_simple_huffman_tree(
1153 depths: &[u8],
1154 symbols: &mut [usize; 4],
1155 num_symbols: usize,
1156 max_bits: usize,
1157 writer: &mut BitWriter,
1158) -> Result<()> {
1159 // Write simple code marker
1160 writer.write(2, 1)?;
1161 writer.write(2, (num_symbols - 1) as u64)?;
1162
1163 // Sort symbols by depth (ascending)
1164 for i in 0..num_symbols {
1165 for j in i + 1..num_symbols {
1166 if depths[symbols[j]] < depths[symbols[i]] {
1167 symbols.swap(i, j);
1168 }
1169 }
1170 }
1171
1172 // Write symbol indices
1173 for &sym in symbols.iter().take(num_symbols) {
1174 writer.write(max_bits, sym as u64)?;
1175 }
1176
1177 // tree_select for 4 symbols
1178 if num_symbols == 4 {
1179 let tree_select = if depths[symbols[0]] == 1 { 1 } else { 0 };
1180 writer.write(1, tree_select)?;
1181 }
1182
1183 Ok(())
1184}
1185
1186// ============================================================================
1187// Tests
1188// ============================================================================
1189
1190#[cfg(test)]
1191mod tests {
1192 use super::*;
1193
1194 #[test]
1195 fn test_create_huffman_tree_single_symbol() {
1196 let histogram = [0, 0, 100, 0, 0];
1197 let depths = create_huffman_tree(&histogram, 15);
1198 assert_eq!(depths, vec![0, 0, 1, 0, 0]);
1199 }
1200
1201 #[test]
1202 fn test_create_huffman_tree_two_symbols() {
1203 let histogram = [50, 0, 50, 0, 0];
1204 let depths = create_huffman_tree(&histogram, 15);
1205 // Both symbols should have depth 1
1206 assert_eq!(depths[0], 1);
1207 assert_eq!(depths[2], 1);
1208 }
1209
1210 #[test]
1211 fn test_create_huffman_tree_four_symbols() {
1212 let histogram = [10, 20, 30, 40, 0];
1213 let depths = create_huffman_tree(&histogram, 15);
1214 // Most frequent (40) should have shortest depth
1215 assert!(depths[3] <= depths[2]);
1216 assert!(depths[2] <= depths[1]);
1217 assert!(depths[1] <= depths[0]);
1218 }
1219
1220 #[test]
1221 fn test_create_huffman_tree_many_symbols() {
1222 // 10 symbols with varying frequencies
1223 let histogram = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512];
1224 let depths = create_huffman_tree(&histogram, 15);
1225
1226 // All non-zero symbols should have non-zero depth
1227 for (i, &d) in depths.iter().enumerate() {
1228 assert!(d > 0, "Symbol {} should have non-zero depth", i);
1229 }
1230
1231 // More frequent symbols should have shorter or equal depth
1232 for i in 1..10 {
1233 assert!(
1234 depths[i] <= depths[i - 1],
1235 "Symbol {} (freq {}) should have depth <= symbol {} (freq {})",
1236 i,
1237 histogram[i],
1238 i - 1,
1239 histogram[i - 1]
1240 );
1241 }
1242 }
1243
1244 #[test]
1245 fn test_create_huffman_tree_depth_limit() {
1246 // Many symbols with same frequency - could create deep tree
1247 let histogram = vec![1u32; 100];
1248 let depths = create_huffman_tree(&histogram, 15);
1249
1250 // All depths should be <= 15
1251 for &d in &depths {
1252 assert!(d <= 15, "Depth {} exceeds limit of 15", d);
1253 }
1254 }
1255
1256 #[test]
1257 fn test_reverse_bits() {
1258 assert_eq!(reverse_bits(1, 0), 0);
1259 assert_eq!(reverse_bits(1, 1), 1);
1260 assert_eq!(reverse_bits(2, 0b01), 0b10);
1261 assert_eq!(reverse_bits(2, 0b10), 0b01);
1262 assert_eq!(reverse_bits(3, 0b001), 0b100);
1263 assert_eq!(reverse_bits(4, 0b0001), 0b1000);
1264 assert_eq!(reverse_bits(8, 0b10101010), 0b01010101);
1265 }
1266
1267 #[test]
1268 fn test_convert_bit_depths_to_symbols() {
1269 // Two symbols at depth 1
1270 let depths = [1, 1, 0];
1271 let codes = convert_bit_depths_to_symbols(&depths);
1272 // Should get codes 0 and 1 (reversed)
1273 assert_eq!(codes[0], 0);
1274 assert_eq!(codes[1], 1);
1275 }
1276
1277 #[test]
1278 fn test_convert_bit_depths_three_symbols() {
1279 // Three symbols: depths 1, 2, 2
1280 let depths = [1, 2, 2, 0];
1281 let codes = convert_bit_depths_to_symbols(&depths);
1282 // Symbol 0: depth 1, code 0
1283 // Symbol 1: depth 2, code 10 -> reversed = 01
1284 // Symbol 2: depth 2, code 11 -> reversed = 11
1285 assert_eq!(codes[0], 0b0);
1286 assert_eq!(codes[1], 0b01);
1287 assert_eq!(codes[2], 0b11);
1288 }
1289
1290 #[test]
1291 fn test_write_huffman_tree_simple() {
1292 // Simple depth array
1293 let depths = [2, 2, 0, 0];
1294 let tree = write_huffman_tree(&depths);
1295 // Should just have two 2s
1296 assert_eq!(tree.codes, vec![2, 2]);
1297 assert!(tree.extra_bits.iter().all(|&x| x == 0));
1298 }
1299
1300 #[test]
1301 fn test_write_huffman_tree_with_zeros() {
1302 let depths = [3, 0, 0, 0, 3];
1303 let tree = write_huffman_tree(&depths);
1304 // Should encode 3, zeros (maybe RLE), 3
1305 // Note: trailing zeros are stripped, so we only go to index 4
1306 assert!(!tree.codes.is_empty());
1307 }
1308
1309 #[test]
1310 fn test_write_huffman_tree_long_zeros() {
1311 // Many zeros in the middle - should trigger RLE
1312 let mut depths = vec![0u8; 100];
1313 depths[0] = 3;
1314 depths[99] = 3;
1315 let tree = write_huffman_tree(&depths);
1316 // Should use code 17 for zero runs
1317 assert!(tree.codes.contains(&17));
1318 }
1319
1320 #[test]
1321 fn test_build_and_store_single_symbol() {
1322 let histogram = [0, 0, 100, 0, 0];
1323 let mut writer = BitWriter::new();
1324 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1325
1326 // Single symbol should have depth 0
1327 assert_eq!(table.depths[2], 0);
1328
1329 // Check output format: 4 bits = 0001, then max_bits for symbol
1330 let bytes = writer.finish_with_padding();
1331 assert!(!bytes.is_empty());
1332 }
1333
1334 #[test]
1335 fn test_build_and_store_two_symbols() {
1336 let histogram = [10, 0, 20, 0, 0];
1337 let mut writer = BitWriter::new();
1338 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1339
1340 // Both should have depth 1
1341 assert_eq!(table.depths[0], 1);
1342 assert_eq!(table.depths[2], 1);
1343
1344 let bytes = writer.finish_with_padding();
1345 // Should start with simple code marker (bits: 01)
1346 assert!(!bytes.is_empty());
1347 }
1348
1349 #[test]
1350 fn test_build_and_store_many_symbols() {
1351 // 10 symbols - requires full code length table
1352 let histogram: Vec<u32> = (1..=10).map(|x| x * 10).collect();
1353 let mut writer = BitWriter::new();
1354 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1355
1356 // All symbols should have non-zero depth
1357 for i in 0..10 {
1358 assert!(table.depths[i] > 0);
1359 }
1360
1361 let bytes = writer.finish_with_padding();
1362 assert!(!bytes.is_empty());
1363 }
1364
1365 #[test]
1366 fn test_build_and_store_256_symbols() {
1367 // Full byte alphabet with varying frequencies
1368 let mut histogram = vec![0u32; 256];
1369 for (i, h) in histogram.iter_mut().enumerate() {
1370 *h = (i as u32 + 1) * 2;
1371 }
1372
1373 let mut writer = BitWriter::new();
1374 let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1375
1376 // All symbols should have valid depth
1377 for i in 0..256 {
1378 assert!(table.depths[i] > 0 && table.depths[i] <= 15);
1379 }
1380
1381 let bytes = writer.finish_with_padding();
1382 assert!(!bytes.is_empty());
1383 }
1384
1385 #[test]
1386 fn test_canonical_codes_are_prefix_free() {
1387 let histogram: Vec<u32> = (1..=20).map(|x| x * 5).collect();
1388 let depths = create_huffman_tree(&histogram, 15);
1389 let codes = convert_bit_depths_to_symbols(&depths);
1390
1391 // Verify no code is a prefix of another
1392 for i in 0..20 {
1393 if depths[i] == 0 {
1394 continue;
1395 }
1396 for j in i + 1..20 {
1397 if depths[j] == 0 {
1398 continue;
1399 }
1400 let (shorter, longer) = if depths[i] <= depths[j] {
1401 ((codes[i], depths[i]), (codes[j], depths[j]))
1402 } else {
1403 ((codes[j], depths[j]), (codes[i], depths[i]))
1404 };
1405
1406 // Extract prefix of longer code
1407 let prefix_mask = (1u16 << shorter.1) - 1;
1408 let longer_prefix = longer.0 & prefix_mask;
1409
1410 assert_ne!(
1411 shorter.0, longer_prefix,
1412 "Code {} (depth {}) is prefix of code {} (depth {})",
1413 shorter.0, shorter.1, longer.0, longer.1
1414 );
1415 }
1416 }
1417 }
1418}