Skip to main content

jxl_encoder/entropy_coding/
huffman_tree.rs

1// Copyright (c) Imazen LLC and the JPEG XL Project Authors.
2// Algorithms and constants derived from libjxl (BSD-3-Clause).
3// Licensed under AGPL-3.0-or-later. Commercial licenses at https://www.imazen.io/pricing
4
5//! Full Huffman tree encoder supporting arbitrary alphabet sizes.
6//!
7//! This module ports libjxl's `enc_huffman_tree.cc` and parts of `enc_huffman.cc`.
8//! It supports encoding Huffman tables with more than 4 symbols using the
9//! code length table format with RLE compression.
10//!
11//! # Architecture
12//! 1. `create_huffman_tree` - builds optimal tree, outputs depth array
13//! 2. `convert_bit_depths_to_symbols` - converts depths to canonical codes
14//! 3. `write_huffman_tree` - RLE-compresses depth array for storage
15//! 4. `store_huffman_tree` - writes compressed tree to bitstream
16
17use crate::bit_writer::BitWriter;
18use crate::error::Result;
19
20/// Number of code length codes (0-15 for depths, 16-17 for RLE).
21const CODE_LENGTH_CODES: usize = 18;
22
23/// Maximum code depth for regular Huffman codes.
24const MAX_CODE_DEPTH: u8 = 15;
25
26/// Maximum code depth for code length codes (meta-Huffman).
27const MAX_CODE_LENGTH_DEPTH: u8 = 5;
28
29/// Order in which code length codes are stored.
30/// This ordering groups commonly-used codes together to allow truncation.
31///
32/// ```cpp
33/// // libjxl: enc_huffman.cc
34/// static const uint8_t kStorageOrder[kCodeLengthCodes] = {
35///     1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15};
36/// ```
37const STORAGE_ORDER: [usize; CODE_LENGTH_CODES] =
38    [1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15];
39
40/// Static Huffman code for encoding code length code depths.
41/// Maps depth values 0-5 to their codes.
42///
43/// ```cpp
44/// // libjxl: enc_huffman.cc
45/// // Symbol   Code
46/// // ------   ----
47/// // 0          00
48/// // 1        1110
49/// // 2         110
50/// // 3          01
51/// // 4          10
52/// // 5        1111
53/// static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {0, 7, 3, 2, 1, 15};
54/// static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {2, 4, 3, 2, 2, 4};
55/// ```
56const DEPTH_CODE_SYMBOLS: [u8; 6] = [0, 7, 3, 2, 1, 15];
57const DEPTH_CODE_BIT_LENGTHS: [u8; 6] = [2, 4, 3, 2, 2, 4];
58
59// ============================================================================
60// Huffman Tree Building
61// ============================================================================
62
63/// Node in the Huffman tree during construction.
64///
65/// ```cpp
66/// // libjxl: enc_huffman_tree.h
67/// struct HuffmanTree {
68///   uint32_t total_count;
69///   int16_t index_left;
70///   int16_t index_right_or_value;
71/// };
72/// ```
73#[derive(Clone, Copy, Debug)]
74struct HuffmanNode {
75    /// Total count (frequency) of this subtree.
76    total_count: u32,
77    /// Index of left child, or -1 if leaf.
78    index_left: i32,
79    /// Index of right child, or symbol value if leaf.
80    /// Note: libjxl uses i16, but we need i32 for alphabets > 32K symbols.
81    index_right_or_value: i32,
82}
83
84impl HuffmanNode {
85    fn leaf(count: u32, symbol: usize) -> Self {
86        Self {
87            total_count: count,
88            index_left: -1,
89            index_right_or_value: symbol as i32,
90        }
91    }
92
93    fn sentinel() -> Self {
94        Self {
95            total_count: u32::MAX,
96            index_left: -1,
97            index_right_or_value: -1,
98        }
99    }
100
101    #[allow(dead_code)]
102    fn is_leaf(&self) -> bool {
103        self.index_left < 0
104    }
105}
106
107/// Recursively set depth for all symbols in a subtree.
108///
109/// ```cpp
110/// // libjxl: enc_huffman_tree.cc
111/// void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth, uint8_t level) {
112///   if (p.index_left >= 0) {
113///     ++level;
114///     SetDepth(pool[p.index_left], pool, depth, level);
115///     SetDepth(pool[p.index_right_or_value], pool, depth, level);
116///   } else {
117///     depth[p.index_right_or_value] = level;
118///   }
119/// }
120/// ```
121fn set_depth(pool: &[HuffmanNode], root_idx: usize, depth: &mut [u8]) {
122    // Iterative DFS to avoid stack overflow on deep/degenerate Huffman trees.
123    // A degenerate tree with N leaves can have depth N-1, which overflows the
124    // call stack for N > ~10K symbols (common with patch position histograms).
125    let mut stack: Vec<(usize, u8)> = Vec::with_capacity(64);
126    stack.push((root_idx, 0));
127
128    while let Some((idx, level)) = stack.pop() {
129        let node = &pool[idx];
130        if node.index_left >= 0 {
131            let next_level = level + 1;
132            stack.push((node.index_left as usize, next_level));
133            stack.push((node.index_right_or_value as usize, next_level));
134        } else {
135            depth[node.index_right_or_value as usize] = level;
136        }
137    }
138}
139
140/// Creates an optimal Huffman tree from symbol frequencies.
141///
142/// Returns the depth (code length) for each symbol. Symbols with zero
143/// frequency get depth 0 (meaning they don't appear in the tree).
144///
145/// The tree depth is limited to `tree_limit` bits. If the natural tree
146/// would exceed this, the algorithm retries with inflated minimum counts.
147///
148/// ```cpp
149/// // libjxl: enc_huffman_tree.cc
150/// void CreateHuffmanTree(const uint32_t* data, const size_t length,
151///                        const int tree_limit, uint8_t* depth) {
152///   for (uint32_t count_limit = 1;; count_limit *= 2) {
153///     std::vector<HuffmanTree> tree;
154///     tree.reserve(2 * length + 1);
155///     // ... build tree ...
156///     if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
157///       break;
158///     }
159///   }
160/// }
161/// ```
162pub fn create_huffman_tree(histogram: &[u32], tree_limit: u8) -> Vec<u8> {
163    let length = histogram.len();
164    let mut depth = vec![0u8; length];
165
166    // Retry with increasing count_limit until tree fits in tree_limit bits
167    let mut count_limit: u32 = 1;
168    loop {
169        // Build list of leaf nodes for non-zero symbols
170        // Process in reverse order so that ties favor lower symbol indices
171        let mut tree: Vec<HuffmanNode> = Vec::with_capacity(2 * length + 1);
172        for i in (0..length).rev() {
173            if histogram[i] > 0 {
174                // Clamp count to at least count_limit - 1 to limit tree depth
175                let count = histogram[i].max(count_limit.saturating_sub(1));
176                tree.push(HuffmanNode::leaf(count, i));
177            }
178        }
179
180        let n = tree.len();
181        if n == 0 {
182            // No symbols - return all zeros
183            return depth;
184        }
185        if n == 1 {
186            // Single symbol - give it depth 1 (will be corrected by caller)
187            depth[tree[0].index_right_or_value as usize] = 1;
188            return depth;
189        }
190
191        // Sort by count (ascending), then by symbol index (descending for ties)
192        tree.sort_by(|a, b| {
193            if a.total_count != b.total_count {
194                a.total_count.cmp(&b.total_count)
195            } else {
196                // Higher index first for ties (reverse order)
197                b.index_right_or_value.cmp(&a.index_right_or_value)
198            }
199        });
200
201        // Add sentinel nodes
202        tree.push(HuffmanNode::sentinel());
203        tree.push(HuffmanNode::sentinel());
204
205        // Build tree using two-queue algorithm
206        // i: next leaf node, j: next internal node
207        let mut i = 0;
208        let mut j = n + 1;
209
210        for _ in 0..n - 1 {
211            // Pick two smallest nodes
212            let left = if tree[i].total_count <= tree[j].total_count {
213                let idx = i;
214                i += 1;
215                idx
216            } else {
217                let idx = j;
218                j += 1;
219                idx
220            };
221
222            let right = if tree[i].total_count <= tree[j].total_count {
223                let idx = i;
224                i += 1;
225                idx
226            } else {
227                let idx = j;
228                j += 1;
229                idx
230            };
231
232            // Create parent node at the sentinel position
233            let j_end = tree.len() - 1;
234            tree[j_end].total_count = tree[left].total_count + tree[right].total_count;
235            tree[j_end].index_left = left as i32;
236            tree[j_end].index_right_or_value = right as i32;
237
238            // Add new sentinel
239            tree.push(HuffmanNode::sentinel());
240        }
241
242        // Extract depths from tree
243        depth.fill(0);
244        set_depth(&tree, 2 * n - 1, &mut depth);
245
246        // Check if tree fits in limit
247        if depth.iter().copied().max().unwrap_or(0) <= tree_limit {
248            break;
249        }
250
251        // Tree too deep - retry with higher count_limit
252        count_limit = match count_limit.checked_mul(2) {
253            Some(v) => v,
254            None => break, // u32 overflow — stop retrying, use best depth so far
255        };
256    }
257
258    depth
259}
260
261// ============================================================================
262// Bit Code Generation
263// ============================================================================
264
265/// Reverses bits in a value.
266///
267/// ```cpp
268/// // libjxl: enc_huffman_tree.cc
269/// uint16_t ReverseBits(int num_bits, uint16_t bits) {
270///   static const size_t kLut[16] = {
271///       0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
272///       0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf};
273///   size_t retval = kLut[bits & 0xf];
274///   for (int i = 4; i < num_bits; i += 4) {
275///     retval <<= 4;
276///     bits = static_cast<uint16_t>(bits >> 4);
277///     retval |= kLut[bits & 0xf];
278///   }
279///   retval >>= (-num_bits & 0x3);
280///   return static_cast<uint16_t>(retval);
281/// }
282/// ```
283fn reverse_bits(num_bits: u8, bits: u16) -> u16 {
284    const LUT: [u16; 16] = [
285        0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
286    ];
287
288    let mut retval = LUT[(bits & 0xf) as usize];
289    let mut bits = bits;
290    let mut i = 4i32;
291    while i < num_bits as i32 {
292        retval <<= 4;
293        bits >>= 4;
294        retval |= LUT[(bits & 0xf) as usize];
295        i += 4;
296    }
297    retval >>= (-(num_bits as i32) & 0x3) as u32;
298    retval
299}
300
301/// Converts code depths to canonical Huffman codes.
302///
303/// Returns (codes, depths) where `codes[i]` is the bit pattern for symbol `i`.
304///
305/// ```cpp
306/// // libjxl: enc_huffman_tree.cc
307/// void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, uint16_t* bits) {
308///   const int kMaxBits = 16;
309///   uint16_t bl_count[kMaxBits] = {0};
310///   for (size_t i = 0; i < len; ++i) {
311///     ++bl_count[depth[i]];
312///   }
313///   bl_count[0] = 0;
314///   uint16_t next_code[kMaxBits];
315///   next_code[0] = 0;
316///   int code = 0;
317///   for (size_t i = 1; i < kMaxBits; ++i) {
318///     code = (code + bl_count[i - 1]) << 1;
319///     next_code[i] = static_cast<uint16_t>(code);
320///   }
321///   for (size_t i = 0; i < len; ++i) {
322///     if (depth[i]) {
323///       bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
324///     }
325///   }
326/// }
327/// ```
328pub fn convert_bit_depths_to_symbols(depth: &[u8]) -> Vec<u16> {
329    const MAX_BITS: usize = 16;
330    let len = depth.len();
331    let mut bits = vec![0u16; len];
332
333    // Count symbols at each depth
334    let mut bl_count = [0u16; MAX_BITS];
335    for &d in depth {
336        bl_count[d as usize] += 1;
337    }
338    bl_count[0] = 0; // Depth 0 means symbol doesn't exist
339
340    // Compute first code at each depth
341    let mut next_code = [0u16; MAX_BITS];
342    let mut code = 0i32;
343    for i in 1..MAX_BITS {
344        code = (code + bl_count[i - 1] as i32) << 1;
345        next_code[i] = code as u16;
346    }
347
348    // Assign codes to symbols
349    for i in 0..len {
350        let d = depth[i];
351        if d > 0 {
352            bits[i] = reverse_bits(d, next_code[d as usize]);
353            next_code[d as usize] += 1;
354        }
355    }
356
357    bits
358}
359
360// ============================================================================
361// RLE Compression of Code Lengths
362// ============================================================================
363
364/// RLE-compressed representation of a Huffman tree.
365#[derive(Debug, Clone)]
366pub struct CompressedTree {
367    /// Code length codes (0-17)
368    pub codes: Vec<u8>,
369    /// Extra bits for each code (used by codes 16 and 17)
370    pub extra_bits: Vec<u8>,
371}
372
373/// Reverse a slice in place.
374fn reverse_slice(v: &mut [u8], start: usize, end: usize) {
375    let mut s = start;
376    let mut e = end - 1;
377    while s < e {
378        v.swap(s, e);
379        s += 1;
380        e -= 1;
381    }
382}
383
384/// Write RLE-encoded repetitions of zeros.
385///
386/// ```cpp
387/// // libjxl: enc_huffman_tree.cc
388/// void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size,
389///                                       uint8_t* tree, uint8_t* extra_bits_data) {
390///   if (repetitions == 11) {
391///     tree[*tree_size] = 0;
392///     extra_bits_data[*tree_size] = 0;
393///     ++(*tree_size);
394///     --repetitions;
395///   }
396///   if (repetitions < 3) {
397///     for (size_t i = 0; i < repetitions; ++i) {
398///       tree[*tree_size] = 0;
399///       extra_bits_data[*tree_size] = 0;
400///       ++(*tree_size);
401///     }
402///   } else {
403///     repetitions -= 3;
404///     size_t start = *tree_size;
405///     while (true) {
406///       tree[*tree_size] = 17;
407///       extra_bits_data[*tree_size] = repetitions & 0x7;
408///       ++(*tree_size);
409///       repetitions >>= 3;
410///       if (repetitions == 0) {
411///         break;
412///       }
413///       --repetitions;
414///     }
415///     Reverse(tree, start, *tree_size);
416///     Reverse(extra_bits_data, start, *tree_size);
417///   }
418/// }
419/// ```
420fn write_repetitions_zeros(mut repetitions: usize, tree: &mut CompressedTree) {
421    // Special case: 11 reps would need code 17 with extra=8, but max extra is 7
422    if repetitions == 11 {
423        tree.codes.push(0);
424        tree.extra_bits.push(0);
425        repetitions -= 1;
426    }
427
428    if repetitions < 3 {
429        // Write individual zeros
430        for _ in 0..repetitions {
431            tree.codes.push(0);
432            tree.extra_bits.push(0);
433        }
434    } else {
435        // Use code 17: repeat zeros with 3 extra bits (3-10 reps)
436        repetitions -= 3;
437        let start = tree.codes.len();
438        loop {
439            tree.codes.push(17);
440            tree.extra_bits.push((repetitions & 0x7) as u8);
441            repetitions >>= 3;
442            if repetitions == 0 {
443                break;
444            }
445            repetitions -= 1;
446        }
447        // Reverse the codes we just wrote
448        let end = tree.codes.len();
449        reverse_slice(&mut tree.codes, start, end);
450        reverse_slice(&mut tree.extra_bits, start, end);
451    }
452}
453
454/// Write RLE-encoded repetitions of non-zero values.
455///
456/// ```cpp
457/// // libjxl: enc_huffman_tree.cc
458/// void WriteHuffmanTreeRepetitions(const uint8_t previous_value,
459///                                  const uint8_t value, size_t repetitions,
460///                                  size_t* tree_size, uint8_t* tree,
461///                                  uint8_t* extra_bits_data) {
462///   JXL_DASSERT(repetitions > 0);
463///   if (previous_value != value) {
464///     tree[*tree_size] = value;
465///     extra_bits_data[*tree_size] = 0;
466///     ++(*tree_size);
467///     --repetitions;
468///   }
469///   if (repetitions == 7) {
470///     tree[*tree_size] = value;
471///     extra_bits_data[*tree_size] = 0;
472///     ++(*tree_size);
473///     --repetitions;
474///   }
475///   if (repetitions < 3) {
476///     for (size_t i = 0; i < repetitions; ++i) {
477///       tree[*tree_size] = value;
478///       extra_bits_data[*tree_size] = 0;
479///       ++(*tree_size);
480///     }
481///   } else {
482///     repetitions -= 3;
483///     size_t start = *tree_size;
484///     while (true) {
485///       tree[*tree_size] = 16;
486///       extra_bits_data[*tree_size] = repetitions & 0x3;
487///       ++(*tree_size);
488///       repetitions >>= 2;
489///       if (repetitions == 0) {
490///         break;
491///       }
492///       --repetitions;
493///     }
494///     Reverse(tree, start, *tree_size);
495///     Reverse(extra_bits_data, start, *tree_size);
496///   }
497/// }
498/// ```
499fn write_repetitions_nonzero(
500    previous_value: u8,
501    value: u8,
502    mut repetitions: usize,
503    tree: &mut CompressedTree,
504) {
505    debug_assert!(repetitions > 0);
506
507    // If value changed, write it literally first
508    if previous_value != value {
509        tree.codes.push(value);
510        tree.extra_bits.push(0);
511        repetitions -= 1;
512    }
513
514    // Special case: 7 reps would need code 16 with extra=4, but max extra is 3
515    if repetitions == 7 {
516        tree.codes.push(value);
517        tree.extra_bits.push(0);
518        repetitions -= 1;
519    }
520
521    if repetitions < 3 {
522        // Write individual values
523        for _ in 0..repetitions {
524            tree.codes.push(value);
525            tree.extra_bits.push(0);
526        }
527    } else {
528        // Use code 16: repeat previous with 2 extra bits (3-6 reps)
529        repetitions -= 3;
530        let start = tree.codes.len();
531        loop {
532            tree.codes.push(16);
533            tree.extra_bits.push((repetitions & 0x3) as u8);
534            repetitions >>= 2;
535            if repetitions == 0 {
536                break;
537            }
538            repetitions -= 1;
539        }
540        // Reverse the codes we just wrote
541        let end = tree.codes.len();
542        reverse_slice(&mut tree.codes, start, end);
543        reverse_slice(&mut tree.extra_bits, start, end);
544    }
545}
546
547/// Decide whether to use RLE for zeros and non-zeros.
548///
549/// ```cpp
550/// // libjxl: enc_huffman_tree.cc
551/// static void DecideOverRleUse(const uint8_t* depth, const size_t length,
552///                              bool* use_rle_for_non_zero, bool* use_rle_for_zero) {
553///   size_t total_reps_zero = 0;
554///   size_t total_reps_non_zero = 0;
555///   size_t count_reps_zero = 1;
556///   size_t count_reps_non_zero = 1;
557///   for (size_t i = 0; i < length;) {
558///     const uint8_t value = depth[i];
559///     size_t reps = 1;
560///     for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
561///       ++reps;
562///     }
563///     if (reps >= 3 && value == 0) {
564///       total_reps_zero += reps;
565///       ++count_reps_zero;
566///     }
567///     if (reps >= 4 && value != 0) {
568///       total_reps_non_zero += reps;
569///       ++count_reps_non_zero;
570///     }
571///     i += reps;
572///   }
573///   *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
574///   *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
575/// }
576/// ```
577fn decide_rle_use(depth: &[u8]) -> (bool, bool) {
578    let mut total_reps_zero = 0usize;
579    let mut total_reps_non_zero = 0usize;
580    let mut count_reps_zero = 1usize;
581    let mut count_reps_non_zero = 1usize;
582
583    let mut i = 0;
584    while i < depth.len() {
585        let value = depth[i];
586        let mut reps = 1usize;
587        while i + reps < depth.len() && depth[i + reps] == value {
588            reps += 1;
589        }
590        if reps >= 3 && value == 0 {
591            total_reps_zero += reps;
592            count_reps_zero += 1;
593        }
594        if reps >= 4 && value != 0 {
595            total_reps_non_zero += reps;
596            count_reps_non_zero += 1;
597        }
598        i += reps;
599    }
600
601    let use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
602    let use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
603    (use_rle_for_non_zero, use_rle_for_zero)
604}
605
606/// Compresses a depth array into RLE-encoded code length codes.
607///
608/// ```cpp
609/// // libjxl: enc_huffman_tree.cc
610/// void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
611///                       uint8_t* tree, uint8_t* extra_bits_data) {
612///   uint8_t previous_value = 8;
613///   // Throw away trailing zeros.
614///   size_t new_length = length;
615///   for (size_t i = 0; i < length; ++i) {
616///     if (depth[length - i - 1] == 0) {
617///       --new_length;
618///     } else {
619///       break;
620///     }
621///   }
622///   // First gather statistics on if it is a good idea to do rle.
623///   bool use_rle_for_non_zero = false;
624///   bool use_rle_for_zero = false;
625///   if (length > 50) {
626///     DecideOverRleUse(depth, new_length, &use_rle_for_non_zero, &use_rle_for_zero);
627///   }
628///   // Actual rle coding.
629///   for (size_t i = 0; i < new_length;) {
630///     const uint8_t value = depth[i];
631///     size_t reps = 1;
632///     if ((value != 0 && use_rle_for_non_zero) || (value == 0 && use_rle_for_zero)) {
633///       for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
634///         ++reps;
635///       }
636///     }
637///     if (value == 0) {
638///       WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
639///     } else {
640///       WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree, extra_bits_data);
641///       previous_value = value;
642///     }
643///     i += reps;
644///   }
645/// }
646/// ```
647pub fn write_huffman_tree(depth: &[u8]) -> CompressedTree {
648    let mut tree = CompressedTree {
649        codes: Vec::new(),
650        extra_bits: Vec::new(),
651    };
652
653    // Throw away trailing zeros
654    let mut new_length = depth.len();
655    while new_length > 0 && depth[new_length - 1] == 0 {
656        new_length -= 1;
657    }
658
659    if new_length == 0 {
660        return tree;
661    }
662
663    // Decide whether to use RLE (only for longer sequences)
664    let (use_rle_for_non_zero, use_rle_for_zero) = if depth.len() > 50 {
665        decide_rle_use(&depth[..new_length])
666    } else {
667        (false, false)
668    };
669
670    // Encode with RLE
671    let mut previous_value = 8u8; // Initial "previous" value (won't match any real depth)
672    let mut i = 0;
673    while i < new_length {
674        let value = depth[i];
675        let mut reps = 1usize;
676
677        // Count repetitions if RLE is enabled for this value type
678        if (value != 0 && use_rle_for_non_zero) || (value == 0 && use_rle_for_zero) {
679            while i + reps < new_length && depth[i + reps] == value {
680                reps += 1;
681            }
682        }
683
684        if value == 0 {
685            write_repetitions_zeros(reps, &mut tree);
686        } else {
687            write_repetitions_nonzero(previous_value, value, reps, &mut tree);
688            previous_value = value;
689        }
690        i += reps;
691    }
692
693    tree
694}
695
696// ============================================================================
697// Bitstream Encoding
698// ============================================================================
699
700/// Writes the meta-Huffman tree (tree of tree) to the bitstream.
701///
702/// This encodes which code length codes (0-17) are used and their depths.
703///
704/// ```cpp
705/// // libjxl: enc_huffman.cc
706/// void StoreHuffmanTreeOfHuffmanTreeToBitMask(const int num_codes,
707///                                             const uint8_t* code_length_bitdepth,
708///                                             BitWriter* writer) {
709///   // Throw away trailing zeros:
710///   size_t codes_to_store = kCodeLengthCodes;
711///   if (num_codes > 1) {
712///     for (; codes_to_store > 0; --codes_to_store) {
713///       if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
714///         break;
715///       }
716///     }
717///   }
718///   size_t skip_some = 0;
719///   if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
720///       code_length_bitdepth[kStorageOrder[1]] == 0) {
721///     skip_some = 2;
722///     if (code_length_bitdepth[kStorageOrder[2]] == 0) {
723///       skip_some = 3;
724///     }
725///   }
726///   writer->Write(2, skip_some);
727///   for (size_t i = skip_some; i < codes_to_store; ++i) {
728///     size_t l = code_length_bitdepth[kStorageOrder[i]];
729///     writer->Write(kHuffmanBitLengthHuffmanCodeBitLengths[l],
730///                   kHuffmanBitLengthHuffmanCodeSymbols[l]);
731///   }
732/// }
733/// ```
734pub fn store_meta_huffman_tree(
735    num_codes: usize,
736    code_length_depths: &[u8; CODE_LENGTH_CODES],
737    writer: &mut BitWriter,
738) -> Result<()> {
739    // Find how many codes to store (trim trailing zeros in storage order)
740    let mut codes_to_store = CODE_LENGTH_CODES;
741    if num_codes > 1 {
742        while codes_to_store > 0 && code_length_depths[STORAGE_ORDER[codes_to_store - 1]] == 0 {
743            codes_to_store -= 1;
744        }
745    }
746
747    // Determine skip count (how many leading zeros in storage order)
748    let mut skip_some = 0usize;
749    if code_length_depths[STORAGE_ORDER[0]] == 0 && code_length_depths[STORAGE_ORDER[1]] == 0 {
750        skip_some = 2;
751        if code_length_depths[STORAGE_ORDER[2]] == 0 {
752            skip_some = 3;
753        }
754    }
755
756    crate::trace::debug_eprintln!(
757        "STORE_META: codes_to_store={}, skip_some={}",
758        codes_to_store,
759        skip_some
760    );
761
762    // Write skip count (2 bits) - this is the simple_code_or_skip field
763    // Values 0, 2, 3 indicate full tree with skip; value 1 would be simple code
764    let _bit_pos_start = writer.bits_written();
765    writer.write(2, skip_some as u64)?;
766    crate::trace::debug_eprintln!(
767        "  META: wrote hskip={} at bit {}",
768        skip_some,
769        _bit_pos_start
770    );
771
772    // Write code length depths using static Huffman code
773    // The decoder reads code lengths for symbols in storage order
774    let mut _bitacc = 0usize;
775    #[allow(clippy::unused_enumerate_index)]
776    for (_idx, &symbol) in STORAGE_ORDER[skip_some..codes_to_store].iter().enumerate() {
777        let depth_value = code_length_depths[symbol] as usize;
778        debug_assert!(depth_value <= 5, "Code length depth must be 0-5");
779        let bits = DEPTH_CODE_BIT_LENGTHS[depth_value] as usize;
780        let code = DEPTH_CODE_SYMBOLS[depth_value] as u64;
781        writer.write(bits, code)?;
782        if depth_value > 0 {
783            _bitacc += 32 >> depth_value;
784        }
785        crate::trace::debug_eprintln!(
786            "  META[{}]: symbol={}, depth={}, code=0b{:0width$b} ({} bits), bitacc={}",
787            skip_some + _idx,
788            symbol,
789            depth_value,
790            code,
791            bits,
792            _bitacc,
793            width = bits
794        );
795    }
796    crate::trace::debug_eprintln!("  META: final bitacc={} (should be 32)", _bitacc);
797
798    Ok(())
799}
800
801/// Writes the RLE-compressed Huffman tree to the bitstream.
802///
803/// ```cpp
804/// // libjxl: enc_huffman.cc
805/// Status StoreHuffmanTreeToBitMask(const size_t huffman_tree_size,
806///                                  const uint8_t* huffman_tree,
807///                                  const uint8_t* huffman_tree_extra_bits,
808///                                  const uint8_t* code_length_bitdepth,
809///                                  const uint16_t* code_length_bitdepth_symbols,
810///                                  BitWriter* writer) {
811///   for (size_t i = 0; i < huffman_tree_size; ++i) {
812///     size_t ix = huffman_tree[i];
813///     writer->Write(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix]);
814///     switch (ix) {
815///       case 16: writer->Write(2, huffman_tree_extra_bits[i]); break;
816///       case 17: writer->Write(3, huffman_tree_extra_bits[i]); break;
817///     }
818///   }
819///   return true;
820/// }
821/// ```
822pub fn store_compressed_tree(
823    tree: &CompressedTree,
824    code_length_depths: &[u8; CODE_LENGTH_CODES],
825    code_length_codes: &[u16; CODE_LENGTH_CODES],
826    writer: &mut BitWriter,
827) -> Result<()> {
828    crate::trace::debug_eprintln!("  TREE: meta_codes = {:?}", &code_length_codes[..5]);
829    crate::trace::debug_eprintln!("  TREE: meta_depths = {:?}", &code_length_depths[..5]);
830
831    for i in 0..tree.codes.len().min(10) {
832        let ix = tree.codes[i] as usize;
833        debug_assert!(ix < CODE_LENGTH_CODES);
834
835        // Write the code for this code length
836        let depth = code_length_depths[ix] as usize;
837        let code = code_length_codes[ix] as u64;
838        let _bit_pos = writer.bits_written();
839        if depth > 0 {
840            writer.write(depth, code)?;
841        }
842        crate::trace::debug_eprintln!(
843            "  TREE[{}]: code_len_code={}, meta_depth={}, meta_code=0b{:b}, extra_bits={}",
844            i,
845            ix,
846            depth,
847            code,
848            tree.extra_bits[i]
849        );
850
851        // Write extra bits for RLE codes
852        match ix {
853            16 => writer.write(2, tree.extra_bits[i] as u64)?,
854            17 => writer.write(3, tree.extra_bits[i] as u64)?,
855            _ => {}
856        }
857    }
858
859    // Write remaining codes silently
860    for i in 10..tree.codes.len() {
861        let ix = tree.codes[i] as usize;
862        let depth = code_length_depths[ix] as usize;
863        let code = code_length_codes[ix] as u64;
864        if depth > 0 {
865            writer.write(depth, code)?;
866        }
867        match ix {
868            16 => writer.write(2, tree.extra_bits[i] as u64)?,
869            17 => writer.write(3, tree.extra_bits[i] as u64)?,
870            _ => {}
871        }
872    }
873
874    Ok(())
875}
876
877/// Stores a full Huffman tree to the bitstream.
878///
879/// This is used when there are more than 4 unique symbols.
880///
881/// ```cpp
882/// // libjxl: enc_huffman.cc
883/// Status StoreHuffmanTree(const uint8_t* depths, size_t num, BitWriter* writer) {
884///   // Write the Huffman tree into the compact representation.
885///   auto arena = ...;
886///   uint8_t* huffman_tree = arena.data();
887///   uint8_t* huffman_tree_extra_bits = arena.data() + num;
888///   size_t huffman_tree_size = 0;
889///   WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree, huffman_tree_extra_bits);
890///
891///   // Calculate the statistics of the Huffman tree.
892///   uint32_t huffman_tree_histogram[kCodeLengthCodes] = {0};
893///   for (size_t i = 0; i < huffman_tree_size; ++i) {
894///     ++huffman_tree_histogram[huffman_tree[i]];
895///   }
896///
897///   int num_codes = 0;
898///   int code = 0;
899///   for (int i = 0; i < kCodeLengthCodes; ++i) {
900///     if (huffman_tree_histogram[i]) {
901///       if (num_codes == 0) { code = i; num_codes = 1; }
902///       else if (num_codes == 1) { num_codes = 2; break; }
903///     }
904///   }
905///
906///   // Calculate another Huffman tree for compressing the first.
907///   uint8_t code_length_bitdepth[kCodeLengthCodes] = {0};
908///   uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = {0};
909///   CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, 5, &code_length_bitdepth[0]);
910///   ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes, &code_length_bitdepth_symbols[0]);
911///
912///   StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth, writer);
913///
914///   if (num_codes == 1) {
915///     code_length_bitdepth[code] = 0;
916///   }
917///
918///   StoreHuffmanTreeToBitMask(...);
919///   return true;
920/// }
921/// ```
922pub fn store_huffman_tree(depths: &[u8], writer: &mut BitWriter) -> Result<()> {
923    // Debug: show raw depths for first and last few elements
924    let _first_10: Vec<u8> = depths.iter().take(10).copied().collect();
925    let _last_10: Vec<u8> = depths.iter().rev().take(10).rev().copied().collect();
926    crate::trace::debug_eprintln!(
927        "STORE_HUFF: depths len={}, first_10={:?}, last_10={:?}",
928        depths.len(),
929        _first_10,
930        _last_10
931    );
932
933    // RLE-compress the depth array
934    let compressed = write_huffman_tree(depths);
935    crate::trace::debug_eprintln!(
936        "STORE_HUFF: compressed codes={:?}, extra_bits={:?}",
937        compressed.codes,
938        compressed.extra_bits
939    );
940
941    // Build histogram of code length codes
942    let mut histogram = [0u32; CODE_LENGTH_CODES];
943    for &code in &compressed.codes {
944        histogram[code as usize] += 1;
945    }
946    crate::trace::debug_eprintln!("STORE_HUFF: code_length_histogram={:?}", histogram);
947
948    // Count how many distinct code length codes are used
949    let mut num_codes = 0;
950    let mut single_code = 0;
951    for (i, &count) in histogram.iter().enumerate() {
952        if count > 0 {
953            if num_codes == 0 {
954                single_code = i;
955                num_codes = 1;
956            } else if num_codes == 1 {
957                num_codes = 2;
958                break;
959            }
960        }
961    }
962
963    // Build Huffman tree for the code length codes (meta-Huffman)
964    let code_length_depths = create_huffman_tree(&histogram, MAX_CODE_LENGTH_DEPTH);
965    let mut code_length_depths_arr = [0u8; CODE_LENGTH_CODES];
966    code_length_depths_arr.copy_from_slice(&code_length_depths);
967
968    let code_length_codes_vec = convert_bit_depths_to_symbols(&code_length_depths);
969    let mut code_length_codes_arr = [0u16; CODE_LENGTH_CODES];
970    code_length_codes_arr.copy_from_slice(&code_length_codes_vec);
971
972    crate::trace::debug_eprintln!(
973        "STORE_HUFF: num_codes={}, meta_depths={:?}",
974        num_codes,
975        code_length_depths_arr
976    );
977
978    // Write meta-Huffman tree
979    store_meta_huffman_tree(num_codes, &code_length_depths_arr, writer)?;
980
981    // Special case: single code length code - set its depth to 0 for implicit encoding
982    if num_codes == 1 {
983        code_length_depths_arr[single_code] = 0;
984    }
985
986    // Write the compressed tree
987    store_compressed_tree(
988        &compressed,
989        &code_length_depths_arr,
990        &code_length_codes_arr,
991        writer,
992    )?;
993
994    Ok(())
995}
996
997// ============================================================================
998// Main Entry Point
999// ============================================================================
1000
1001/// Result of building a Huffman table.
1002pub struct HuffmanTable {
1003    /// Code depth (bit length) for each symbol.
1004    pub depths: Vec<u8>,
1005    /// Bit code for each symbol.
1006    pub codes: Vec<u16>,
1007}
1008
1009/// Builds and stores a Huffman table from a histogram.
1010///
1011/// This is the main entry point. It handles:
1012/// - Single symbol (special 4-bit encoding)
1013/// - 2-4 symbols (simple Huffman code)
1014/// - 5+ symbols (full code length table)
1015///
1016/// ```cpp
1017/// // libjxl: enc_huffman.cc
1018/// Status BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length,
1019///                                 uint8_t* depth, uint16_t* bits, BitWriter* writer) {
1020///   size_t count = 0;
1021///   size_t s4[4] = {0};
1022///   for (size_t i = 0; i < length; i++) {
1023///     if (histogram[i]) {
1024///       if (count < 4) { s4[count] = i; }
1025///       else if (count > 4) { break; }
1026///       count++;
1027///     }
1028///   }
1029///
1030///   size_t max_bits = 0;
1031///   size_t max_bits_counter = length - 1;
1032///   while (max_bits_counter) { max_bits_counter >>= 1; ++max_bits; }
1033///
1034///   if (count <= 1) {
1035///     writer->Write(4, 1);
1036///     writer->Write(max_bits, s4[0]);
1037///     return true;
1038///   }
1039///
1040///   CreateHuffmanTree(histogram, length, 15, depth);
1041///   ConvertBitDepthsToSymbols(depth, length, bits);
1042///
1043///   if (count <= 4) {
1044///     StoreSimpleHuffmanTree(depth, s4, count, max_bits, writer);
1045///   } else {
1046///     JXL_RETURN_IF_ERROR(StoreHuffmanTree(depth, length, writer));
1047///   }
1048///   return true;
1049/// }
1050/// ```
1051pub fn build_and_store_huffman_tree(
1052    histogram: &[u32],
1053    writer: &mut BitWriter,
1054) -> Result<HuffmanTable> {
1055    let length = histogram.len();
1056
1057    // Debug: print non-zero symbols
1058    let nonzero: Vec<(usize, u32)> = histogram
1059        .iter()
1060        .enumerate()
1061        .filter(|&(_, h)| *h > 0)
1062        .map(|(i, h)| (i, *h))
1063        .collect();
1064    crate::trace::debug_eprintln!(
1065        "HUFFMAN_BUILD: alphabet_size={}, nonzero_symbols={:?}",
1066        length,
1067        nonzero
1068    );
1069
1070    // Count non-zero symbols and track first 4
1071    let mut count = 0usize;
1072    let mut s4 = [0usize; 4];
1073    for (i, &h) in histogram.iter().enumerate() {
1074        if h > 0 {
1075            if count < 4 {
1076                s4[count] = i;
1077            }
1078            count += 1;
1079            if count > 4 {
1080                break; // We know it's >4, that's all we need
1081            }
1082        }
1083    }
1084
1085    // Calculate max_bits = ceil(log2(length))
1086    let max_bits = if length <= 1 {
1087        0
1088    } else {
1089        (usize::BITS - (length - 1).leading_zeros()) as usize
1090    };
1091
1092    // Single symbol case
1093    if count <= 1 {
1094        let mut depths = vec![0u8; length];
1095        let codes = vec![0u16; length];
1096        // Write: 4 bits = 0001, then max_bits for symbol
1097        writer.write(4, 1)?;
1098        writer.write(max_bits, s4[0] as u64)?;
1099        // Single symbol has depth 0 (implicit, no bits needed to encode)
1100        if count == 1 {
1101            depths[s4[0]] = 0;
1102        }
1103        return Ok(HuffmanTable { depths, codes });
1104    }
1105
1106    // Build optimal Huffman tree
1107    let depths = create_huffman_tree(histogram, MAX_CODE_DEPTH);
1108    let codes = convert_bit_depths_to_symbols(&depths);
1109
1110    // Debug: print depths for non-zero symbols
1111    let _depths_info: Vec<(usize, u8, u16)> = nonzero
1112        .iter()
1113        .map(|(i, _)| (*i, depths[*i], codes[*i]))
1114        .collect();
1115    crate::trace::debug_eprintln!(
1116        "HUFFMAN_BUILD: depths/codes for used symbols: {:?}",
1117        _depths_info
1118    );
1119
1120    if count <= 4 {
1121        // Simple Huffman code
1122        crate::trace::debug_eprintln!("HUFFMAN_BUILD: using simple code for {} symbols", count);
1123        store_simple_huffman_tree(&depths, &mut s4, count, max_bits, writer)?;
1124    } else {
1125        // Full code length table
1126        crate::trace::debug_eprintln!("HUFFMAN_BUILD: using full table for {} symbols", count);
1127        store_huffman_tree(&depths, writer)?;
1128    }
1129
1130    Ok(HuffmanTable { depths, codes })
1131}
1132
1133/// Stores a simple Huffman tree (1-4 symbols).
1134///
1135/// ```cpp
1136/// // libjxl: enc_huffman.cc
1137/// void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4],
1138///                             size_t num_symbols, size_t max_bits, BitWriter* writer) {
1139///   writer->Write(2, 1);  // simple code marker
1140///   writer->Write(2, num_symbols - 1);  // NSYM - 1
1141///   // Sort by depth
1142///   for (size_t i = 0; i < num_symbols; i++) {
1143///     for (size_t j = i + 1; j < num_symbols; j++) {
1144///       if (depths[symbols[j]] < depths[symbols[i]]) {
1145///         std::swap(symbols[j], symbols[i]);
1146///       }
1147///     }
1148///   }
1149///   // Write symbols
1150///   for (size_t i = 0; i < num_symbols; i++) {
1151///     writer->Write(max_bits, symbols[i]);
1152///   }
1153///   // tree_select for 4 symbols
1154///   if (num_symbols == 4) {
1155///     writer->Write(1, depths[symbols[0]] == 1 ? 1 : 0);
1156///   }
1157/// }
1158/// ```
1159fn store_simple_huffman_tree(
1160    depths: &[u8],
1161    symbols: &mut [usize; 4],
1162    num_symbols: usize,
1163    max_bits: usize,
1164    writer: &mut BitWriter,
1165) -> Result<()> {
1166    // Write simple code marker
1167    writer.write(2, 1)?;
1168    writer.write(2, (num_symbols - 1) as u64)?;
1169
1170    // Sort symbols by depth (ascending)
1171    for i in 0..num_symbols {
1172        for j in i + 1..num_symbols {
1173            if depths[symbols[j]] < depths[symbols[i]] {
1174                symbols.swap(i, j);
1175            }
1176        }
1177    }
1178
1179    // Write symbol indices
1180    for &sym in symbols.iter().take(num_symbols) {
1181        writer.write(max_bits, sym as u64)?;
1182    }
1183
1184    // tree_select for 4 symbols
1185    if num_symbols == 4 {
1186        let tree_select = if depths[symbols[0]] == 1 { 1 } else { 0 };
1187        writer.write(1, tree_select)?;
1188    }
1189
1190    Ok(())
1191}
1192
1193// ============================================================================
1194// Tests
1195// ============================================================================
1196
1197#[cfg(test)]
1198mod tests {
1199    use super::*;
1200
1201    #[test]
1202    fn test_create_huffman_tree_single_symbol() {
1203        let histogram = [0, 0, 100, 0, 0];
1204        let depths = create_huffman_tree(&histogram, 15);
1205        assert_eq!(depths, vec![0, 0, 1, 0, 0]);
1206    }
1207
1208    #[test]
1209    fn test_create_huffman_tree_two_symbols() {
1210        let histogram = [50, 0, 50, 0, 0];
1211        let depths = create_huffman_tree(&histogram, 15);
1212        // Both symbols should have depth 1
1213        assert_eq!(depths[0], 1);
1214        assert_eq!(depths[2], 1);
1215    }
1216
1217    #[test]
1218    fn test_create_huffman_tree_four_symbols() {
1219        let histogram = [10, 20, 30, 40, 0];
1220        let depths = create_huffman_tree(&histogram, 15);
1221        // Most frequent (40) should have shortest depth
1222        assert!(depths[3] <= depths[2]);
1223        assert!(depths[2] <= depths[1]);
1224        assert!(depths[1] <= depths[0]);
1225    }
1226
1227    #[test]
1228    fn test_create_huffman_tree_many_symbols() {
1229        // 10 symbols with varying frequencies
1230        let histogram = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512];
1231        let depths = create_huffman_tree(&histogram, 15);
1232
1233        // All non-zero symbols should have non-zero depth
1234        for (i, &d) in depths.iter().enumerate() {
1235            assert!(d > 0, "Symbol {} should have non-zero depth", i);
1236        }
1237
1238        // More frequent symbols should have shorter or equal depth
1239        for i in 1..10 {
1240            assert!(
1241                depths[i] <= depths[i - 1],
1242                "Symbol {} (freq {}) should have depth <= symbol {} (freq {})",
1243                i,
1244                histogram[i],
1245                i - 1,
1246                histogram[i - 1]
1247            );
1248        }
1249    }
1250
1251    #[test]
1252    fn test_create_huffman_tree_depth_limit() {
1253        // Many symbols with same frequency - could create deep tree
1254        let histogram = vec![1u32; 100];
1255        let depths = create_huffman_tree(&histogram, 15);
1256
1257        // All depths should be <= 15
1258        for &d in &depths {
1259            assert!(d <= 15, "Depth {} exceeds limit of 15", d);
1260        }
1261    }
1262
1263    #[test]
1264    fn test_reverse_bits() {
1265        assert_eq!(reverse_bits(1, 0), 0);
1266        assert_eq!(reverse_bits(1, 1), 1);
1267        assert_eq!(reverse_bits(2, 0b01), 0b10);
1268        assert_eq!(reverse_bits(2, 0b10), 0b01);
1269        assert_eq!(reverse_bits(3, 0b001), 0b100);
1270        assert_eq!(reverse_bits(4, 0b0001), 0b1000);
1271        assert_eq!(reverse_bits(8, 0b10101010), 0b01010101);
1272    }
1273
1274    #[test]
1275    fn test_convert_bit_depths_to_symbols() {
1276        // Two symbols at depth 1
1277        let depths = [1, 1, 0];
1278        let codes = convert_bit_depths_to_symbols(&depths);
1279        // Should get codes 0 and 1 (reversed)
1280        assert_eq!(codes[0], 0);
1281        assert_eq!(codes[1], 1);
1282    }
1283
1284    #[test]
1285    fn test_convert_bit_depths_three_symbols() {
1286        // Three symbols: depths 1, 2, 2
1287        let depths = [1, 2, 2, 0];
1288        let codes = convert_bit_depths_to_symbols(&depths);
1289        // Symbol 0: depth 1, code 0
1290        // Symbol 1: depth 2, code 10 -> reversed = 01
1291        // Symbol 2: depth 2, code 11 -> reversed = 11
1292        assert_eq!(codes[0], 0b0);
1293        assert_eq!(codes[1], 0b01);
1294        assert_eq!(codes[2], 0b11);
1295    }
1296
1297    #[test]
1298    fn test_write_huffman_tree_simple() {
1299        // Simple depth array
1300        let depths = [2, 2, 0, 0];
1301        let tree = write_huffman_tree(&depths);
1302        // Should just have two 2s
1303        assert_eq!(tree.codes, vec![2, 2]);
1304        assert!(tree.extra_bits.iter().all(|&x| x == 0));
1305    }
1306
1307    #[test]
1308    fn test_write_huffman_tree_with_zeros() {
1309        let depths = [3, 0, 0, 0, 3];
1310        let tree = write_huffman_tree(&depths);
1311        // Should encode 3, zeros (maybe RLE), 3
1312        // Note: trailing zeros are stripped, so we only go to index 4
1313        assert!(!tree.codes.is_empty());
1314    }
1315
1316    #[test]
1317    fn test_write_huffman_tree_long_zeros() {
1318        // Many zeros in the middle - should trigger RLE
1319        let mut depths = vec![0u8; 100];
1320        depths[0] = 3;
1321        depths[99] = 3;
1322        let tree = write_huffman_tree(&depths);
1323        // Should use code 17 for zero runs
1324        assert!(tree.codes.contains(&17));
1325    }
1326
1327    #[test]
1328    fn test_build_and_store_single_symbol() {
1329        let histogram = [0, 0, 100, 0, 0];
1330        let mut writer = BitWriter::new();
1331        let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1332
1333        // Single symbol should have depth 0
1334        assert_eq!(table.depths[2], 0);
1335
1336        // Check output format: 4 bits = 0001, then max_bits for symbol
1337        let bytes = writer.finish_with_padding();
1338        assert!(!bytes.is_empty());
1339    }
1340
1341    #[test]
1342    fn test_build_and_store_two_symbols() {
1343        let histogram = [10, 0, 20, 0, 0];
1344        let mut writer = BitWriter::new();
1345        let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1346
1347        // Both should have depth 1
1348        assert_eq!(table.depths[0], 1);
1349        assert_eq!(table.depths[2], 1);
1350
1351        let bytes = writer.finish_with_padding();
1352        // Should start with simple code marker (bits: 01)
1353        assert!(!bytes.is_empty());
1354    }
1355
1356    #[test]
1357    fn test_build_and_store_many_symbols() {
1358        // 10 symbols - requires full code length table
1359        let histogram: Vec<u32> = (1..=10).map(|x| x * 10).collect();
1360        let mut writer = BitWriter::new();
1361        let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1362
1363        // All symbols should have non-zero depth
1364        for i in 0..10 {
1365            assert!(table.depths[i] > 0);
1366        }
1367
1368        let bytes = writer.finish_with_padding();
1369        assert!(!bytes.is_empty());
1370    }
1371
1372    #[test]
1373    fn test_build_and_store_256_symbols() {
1374        // Full byte alphabet with varying frequencies
1375        let mut histogram = vec![0u32; 256];
1376        for (i, h) in histogram.iter_mut().enumerate() {
1377            *h = (i as u32 + 1) * 2;
1378        }
1379
1380        let mut writer = BitWriter::new();
1381        let table = build_and_store_huffman_tree(&histogram, &mut writer).unwrap();
1382
1383        // All symbols should have valid depth
1384        for i in 0..256 {
1385            assert!(table.depths[i] > 0 && table.depths[i] <= 15);
1386        }
1387
1388        let bytes = writer.finish_with_padding();
1389        assert!(!bytes.is_empty());
1390    }
1391
1392    #[test]
1393    fn test_canonical_codes_are_prefix_free() {
1394        let histogram: Vec<u32> = (1..=20).map(|x| x * 5).collect();
1395        let depths = create_huffman_tree(&histogram, 15);
1396        let codes = convert_bit_depths_to_symbols(&depths);
1397
1398        // Verify no code is a prefix of another
1399        for i in 0..20 {
1400            if depths[i] == 0 {
1401                continue;
1402            }
1403            for j in i + 1..20 {
1404                if depths[j] == 0 {
1405                    continue;
1406                }
1407                let (shorter, longer) = if depths[i] <= depths[j] {
1408                    ((codes[i], depths[i]), (codes[j], depths[j]))
1409                } else {
1410                    ((codes[j], depths[j]), (codes[i], depths[i]))
1411                };
1412
1413                // Extract prefix of longer code
1414                let prefix_mask = (1u16 << shorter.1) - 1;
1415                let longer_prefix = longer.0 & prefix_mask;
1416
1417                assert_ne!(
1418                    shorter.0, longer_prefix,
1419                    "Code {} (depth {}) is prefix of code {} (depth {})",
1420                    shorter.0, shorter.1, longer.0, longer.1
1421                );
1422            }
1423        }
1424    }
1425}