Skip to main content

gamut_webp/vp8l/
prefix.rs

1//! VP8L canonical prefix (Huffman) codes (RFC 9649 §3.7).
2//!
3//! VP8L entropy-codes symbols with canonical prefix codes built from per-symbol code lengths. A
4//! prefix-code group bundles five codes (green+length+cache, red, blue, alpha, distance), and meta
5//! prefix codes select a group per block via an entropy image (§3.7.1-§3.7.3).
6//!
7//! # Bit order
8//!
9//! VP8L canonical codes are assigned in the usual increasing-by-(length, symbol) manner — the same
10//! convention as DEFLATE — but written into an **LSB-first** stream. The encoder emits each code
11//! *bit-reversed* to its length (see [`reverse_bits`]) so that the first bit on the wire is the
12//! code's most-significant bit; the decoder ([`PrefixCode::read_symbol`]) reads bit by bit,
13//! rebuilding the code MSB-first (the canonical "puff" decode), so no reversal is needed on read.
14//!
15//! # Single-symbol codes
16//!
17//! A code with a single used symbol is a complete tree that **consumes no bits** (RFC 9649 §3.7.2):
18//! the symbol is implicit. Both [`PrefixEncoder::write_symbol`] (writes nothing) and
19//! [`PrefixCode::read_symbol`] (returns the symbol without reading) honor this, so they stay in
20//! lock-step. An empty alphabet is coded as a single symbol `0`.
21
22use gamut_core::{Error, Result};
23
24use crate::vp8l::bit_io::{BitReader, BitWriter};
25
26/// Maximum prefix-code length in bits (RFC 9649 §3.7.2).
27pub const MAX_CODE_LENGTH: usize = 15;
28/// Number of literal symbols per channel (a full 8-bit byte).
29pub const NUM_LITERAL_CODES: usize = 256;
30/// Number of LZ77 length prefix codes packed into the green alphabet (§5.2.2).
31pub const NUM_LENGTH_CODES: usize = 24;
32/// Number of distance prefix codes (§5.2.2).
33pub const NUM_DISTANCE_CODES: usize = 40;
34/// Number of code-length code symbols (literals 0..=15 plus repeat codes 16/17/18) (§3.7.2).
35pub const CODE_LENGTH_CODES: usize = 19;
36
37/// The order in which code-length code lengths appear on the wire (RFC 9649 §3.7.2).
38pub const CODE_LENGTH_CODE_ORDER: [usize; CODE_LENGTH_CODES] = [
39    17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
40];
41
42/// Default code length assumed by repeat-code 16 before any nonzero length is seen (§3.7.2).
43const DEFAULT_CODE_LENGTH: u8 = 8;
44
45/// Size of the green/length/cache alphabet for a given color-cache size (0 if the cache is off).
46#[must_use]
47pub fn green_alphabet_size(color_cache_size: usize) -> usize {
48    NUM_LITERAL_CODES + NUM_LENGTH_CODES + color_cache_size
49}
50
51/// Reverses the low `num_bits` of `value` (used to emit canonical codes MSB-first into the
52/// LSB-first stream).
53#[must_use]
54pub fn reverse_bits(value: u16, num_bits: u8) -> u16 {
55    let mut v = value;
56    let mut r = 0u16;
57    for _ in 0..num_bits {
58        r = (r << 1) | (v & 1);
59        v >>= 1;
60    }
61    r
62}
63
64/// A canonical prefix (Huffman) decoder built from per-symbol code lengths (RFC 9649 §3.7.2).
65///
66/// Decoding uses the classic canonical algorithm (no large lookup table): bits are read one at a
67/// time and accumulated MSB-first until they identify a symbol. A single-symbol code returns its
68/// symbol without consuming any bits.
69#[derive(Debug, Clone)]
70pub struct PrefixCode {
71    /// `counts[len]` = number of symbols coded with length `len`.
72    counts: [u16; MAX_CODE_LENGTH + 1],
73    /// Symbols sorted by `(length, symbol)`.
74    symbols: Vec<u16>,
75    /// Set for a single-symbol code (consumes 0 bits, always returns this symbol).
76    single: Option<u16>,
77}
78
79impl PrefixCode {
80    /// Builds a decoder from `code_lengths` (one entry per symbol; `0` = unused).
81    ///
82    /// # Errors
83    ///
84    /// Returns [`Error::InvalidInput`] if a length exceeds [`MAX_CODE_LENGTH`], if no symbol is
85    /// used, or if the lengths do not form a complete tree (the single-symbol leaf is the one
86    /// permitted incomplete tree, per §3.7.2).
87    pub fn from_code_lengths(code_lengths: &[u8]) -> Result<Self> {
88        let mut counts = [0u16; MAX_CODE_LENGTH + 1];
89        let mut n_used = 0usize;
90        let mut last_used = 0u16;
91        for (sym, &len) in code_lengths.iter().enumerate() {
92            if len as usize > MAX_CODE_LENGTH {
93                return Err(Error::InvalidInput("VP8L: prefix code length too large"));
94            }
95            if len > 0 {
96                counts[len as usize] += 1;
97                n_used += 1;
98                last_used = sym as u16;
99            }
100        }
101        if n_used == 0 {
102            return Err(Error::InvalidInput("VP8L: empty prefix code"));
103        }
104        if n_used == 1 {
105            return Ok(Self {
106                counts,
107                symbols: Vec::new(),
108                single: Some(last_used),
109            });
110        }
111        // Completeness check (Kraft equality), over-subscription detected as a negative remainder.
112        let mut left = 1i32;
113        for &count in counts.iter().take(MAX_CODE_LENGTH + 1).skip(1) {
114            left <<= 1;
115            left -= i32::from(count);
116            if left < 0 {
117                return Err(Error::InvalidInput("VP8L: over-subscribed prefix code"));
118            }
119        }
120        if left != 0 {
121            return Err(Error::InvalidInput("VP8L: incomplete prefix code"));
122        }
123        // Sort symbols by (length, symbol) into a flat table.
124        let mut offsets = [0usize; MAX_CODE_LENGTH + 2];
125        for len in 1..=MAX_CODE_LENGTH {
126            offsets[len + 1] = offsets[len] + usize::from(counts[len]);
127        }
128        let mut symbols = vec![0u16; n_used];
129        for (sym, &len) in code_lengths.iter().enumerate() {
130            if len > 0 {
131                let slot = &mut offsets[len as usize];
132                symbols[*slot] = sym as u16;
133                *slot += 1;
134            }
135        }
136        Ok(Self {
137            counts,
138            symbols,
139            single: None,
140        })
141    }
142
143    /// Reads one symbol from `r`.
144    ///
145    /// # Errors
146    ///
147    /// Returns [`Error::InvalidInput`] on truncation or if the bits do not match any code.
148    pub fn read_symbol(&self, r: &mut BitReader<'_>) -> Result<u16> {
149        if let Some(sym) = self.single {
150            return Ok(sym);
151        }
152        let mut code: i32 = 0;
153        let mut first: i32 = 0;
154        let mut index: usize = 0;
155        for len in 1..=MAX_CODE_LENGTH {
156            code |= r.read_bit()? as i32;
157            let count = i32::from(self.counts[len]);
158            if code - first < count {
159                let pos = index + (code - first) as usize;
160                return self
161                    .symbols
162                    .get(pos)
163                    .copied()
164                    .ok_or(Error::InvalidInput("VP8L: prefix code index out of range"));
165            }
166            index += count as usize;
167            first += count;
168            first <<= 1;
169            code <<= 1;
170        }
171        Err(Error::InvalidInput("VP8L: invalid prefix code"))
172    }
173}
174
175/// Reads a single prefix code's lengths from the bitstream (simple or normal variant) and builds it
176/// (RFC 9649 §3.7.2). `alphabet_size` bounds the symbols and `max_symbol`.
177///
178/// # Errors
179///
180/// Returns [`Error::InvalidInput`] on any malformed code-length coding or truncation.
181pub fn read_prefix_code(r: &mut BitReader<'_>, alphabet_size: usize) -> Result<PrefixCode> {
182    if r.read_bit()? == 1 {
183        read_simple_prefix_code(r, alphabet_size)
184    } else {
185        read_normal_prefix_code(r, alphabet_size)
186    }
187}
188
189/// Reads the *simple code length code* variant: 1 or 2 symbols, each with code length 1 (§3.7.2).
190fn read_simple_prefix_code(r: &mut BitReader<'_>, alphabet_size: usize) -> Result<PrefixCode> {
191    let num_symbols = r.read_bit()? + 1; // 1 or 2
192    let is_first_8bits = r.read_bit()?;
193    let mut lengths = vec![0u8; alphabet_size];
194    let symbol0 = r.read_bits(1 + 7 * is_first_8bits)? as usize;
195    if symbol0 >= alphabet_size {
196        return Err(Error::InvalidInput(
197            "VP8L: simple prefix symbol out of range",
198        ));
199    }
200    lengths[symbol0] = 1;
201    if num_symbols == 2 {
202        let symbol1 = r.read_bits(8)? as usize;
203        if symbol1 >= alphabet_size {
204            return Err(Error::InvalidInput(
205                "VP8L: simple prefix symbol out of range",
206            ));
207        }
208        lengths[symbol1] = 1;
209    }
210    PrefixCode::from_code_lengths(&lengths)
211}
212
213/// Reads the *normal code length code* variant (§3.7.2): a meta code over `code_length_code_lengths`
214/// drives literal lengths plus the repeat codes 16/17/18.
215fn read_normal_prefix_code(r: &mut BitReader<'_>, alphabet_size: usize) -> Result<PrefixCode> {
216    let num_code_lengths = 4 + r.read_bits(4)? as usize;
217    if num_code_lengths > CODE_LENGTH_CODES {
218        return Err(Error::InvalidInput("VP8L: too many code-length codes"));
219    }
220    let mut cl_lengths = [0u8; CODE_LENGTH_CODES];
221    for &order in CODE_LENGTH_CODE_ORDER.iter().take(num_code_lengths) {
222        cl_lengths[order] = r.read_bits(3)? as u8;
223    }
224    let cl_code = PrefixCode::from_code_lengths(&cl_lengths)?;
225
226    let mut max_symbol = if r.read_bit()? != 0 {
227        let length_nbits = 2 + 2 * r.read_bits(3)?;
228        2 + r.read_bits(length_nbits)? as usize
229    } else {
230        alphabet_size
231    };
232    if max_symbol > alphabet_size {
233        return Err(Error::InvalidInput("VP8L: max_symbol exceeds alphabet"));
234    }
235
236    let mut lengths = vec![0u8; alphabet_size];
237    let mut prev_len = DEFAULT_CODE_LENGTH;
238    let mut symbol = 0usize;
239    while symbol < alphabet_size {
240        if max_symbol == 0 {
241            break;
242        }
243        max_symbol -= 1;
244        let code = cl_code.read_symbol(r)?;
245        if code < 16 {
246            lengths[symbol] = code as u8;
247            symbol += 1;
248            if code != 0 {
249                prev_len = code as u8;
250            }
251        } else {
252            let (extra_bits, repeat_offset, value) = match code {
253                16 => (2u32, 3usize, prev_len),
254                17 => (3, 3, 0),
255                18 => (7, 11, 0),
256                _ => return Err(Error::InvalidInput("VP8L: invalid code-length symbol")),
257            };
258            let repeat = repeat_offset + r.read_bits(extra_bits)? as usize;
259            if symbol + repeat > alphabet_size {
260                return Err(Error::InvalidInput(
261                    "VP8L: code-length repeat overruns alphabet",
262                ));
263            }
264            for _ in 0..repeat {
265                lengths[symbol] = value;
266                symbol += 1;
267            }
268        }
269    }
270    PrefixCode::from_code_lengths(&lengths)
271}
272
273/// The five canonical codes used to decode a pixel (RFC 9649 §3.7.1).
274#[derive(Debug, Clone)]
275pub struct PrefixCodeGroup {
276    /// Green channel, LZ77 lengths, and color-cache indices.
277    pub green: PrefixCode,
278    /// Red channel.
279    pub red: PrefixCode,
280    /// Blue channel.
281    pub blue: PrefixCode,
282    /// Alpha channel.
283    pub alpha: PrefixCode,
284    /// LZ77 distance codes.
285    pub distance: PrefixCode,
286}
287
288/// Reads a [`PrefixCodeGroup`] (five codes in bitstream order); the green alphabet grows by
289/// `color_cache_size` (0 when the cache is off).
290///
291/// # Errors
292///
293/// Returns [`Error::InvalidInput`] on any malformed code or truncation.
294pub fn read_prefix_code_group(
295    r: &mut BitReader<'_>,
296    color_cache_size: usize,
297) -> Result<PrefixCodeGroup> {
298    Ok(PrefixCodeGroup {
299        green: read_prefix_code(r, green_alphabet_size(color_cache_size))?,
300        red: read_prefix_code(r, NUM_LITERAL_CODES)?,
301        blue: read_prefix_code(r, NUM_LITERAL_CODES)?,
302        alpha: read_prefix_code(r, NUM_LITERAL_CODES)?,
303        distance: read_prefix_code(r, NUM_DISTANCE_CODES)?,
304    })
305}
306
307// --- Encoder side ---------------------------------------------------------------------------------
308
309/// Derives the canonical (bit-reversed, ready-to-emit) codes for each symbol from its `lengths`.
310///
311/// Each returned code is reversed to its length so it can be written LSB-first with
312/// [`BitWriter::write_bits`]; unused symbols (length 0) get code 0.
313#[must_use]
314pub fn canonical_codes(lengths: &[u8]) -> Vec<u16> {
315    let mut bl_count = [0u32; MAX_CODE_LENGTH + 1];
316    for &len in lengths {
317        if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
318            bl_count[len as usize] += 1;
319        }
320    }
321    let mut next_code = [0u32; MAX_CODE_LENGTH + 1];
322    let mut code = 0u32;
323    for bits in 1..=MAX_CODE_LENGTH {
324        code = (code + bl_count[bits - 1]) << 1;
325        next_code[bits] = code;
326    }
327    let mut codes = vec![0u16; lengths.len()];
328    for (sym, &len) in lengths.iter().enumerate() {
329        if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
330            let c = next_code[len as usize];
331            next_code[len as usize] += 1;
332            codes[sym] = reverse_bits(c as u16, len);
333        }
334    }
335    codes
336}
337
338/// Builds length-limited (`<= max_len`) canonical Huffman code lengths from a symbol `histogram`.
339///
340/// Returns one length per symbol (`0` = unused). An empty histogram yields all-zero lengths (the
341/// caller codes that as a single symbol `0`); a single nonzero symbol gets length 1. To bound the
342/// maximum length, the histogram counts are raised toward a common floor and the tree rebuilt until
343/// it fits (libwebp's approach) — this yields *a* valid code, not necessarily the optimal one
344/// (density tuning is deferred to issue #31).
345#[must_use]
346pub fn build_length_limited_lengths(histogram: &[u32], max_len: u8) -> Vec<u8> {
347    let n = histogram.len();
348    let used = histogram.iter().filter(|&&h| h > 0).count();
349    if used == 0 {
350        return vec![0u8; n];
351    }
352    if used == 1 {
353        let mut lengths = vec![0u8; n];
354        if let Some(sym) = (0..n).find(|&i| histogram[i] > 0) {
355            lengths[sym] = 1;
356        }
357        return lengths;
358    }
359    let mut count_min = 1u32;
360    loop {
361        let depths = huffman_pass(histogram, count_min);
362        let max_depth = depths.iter().copied().max().unwrap_or(0);
363        if max_depth <= u32::from(max_len) {
364            return depths.iter().map(|&d| d as u8).collect();
365        }
366        count_min = count_min.saturating_mul(2);
367    }
368}
369
370/// One Huffman construction pass; returns per-symbol depths (code lengths) with each present
371/// symbol's weight floored to `count_min` (raising the floor flattens the tree, capping its depth).
372fn huffman_pass(histogram: &[u32], count_min: u32) -> Vec<u32> {
373    use std::cmp::Reverse;
374    use std::collections::BinaryHeap;
375
376    /// A node in the Huffman tree (`sym >= 0` marks a leaf).
377    struct Node {
378        left: i32,
379        right: i32,
380        sym: i32,
381    }
382
383    let n = histogram.len();
384    let mut lengths = vec![0u32; n];
385    let mut nodes: Vec<Node> = Vec::new();
386    // Min-heap keyed by (weight, tie-break index) for deterministic output.
387    let mut heap: BinaryHeap<Reverse<(u64, usize)>> = BinaryHeap::new();
388    for (sym, &count) in histogram.iter().enumerate() {
389        if count > 0 {
390            let idx = nodes.len();
391            nodes.push(Node {
392                left: -1,
393                right: -1,
394                sym: sym as i32,
395            });
396            heap.push(Reverse((u64::from(count.max(count_min)), idx)));
397        }
398    }
399    if nodes.len() == 1 {
400        if let Some(sym) = (0..n).find(|&i| histogram[i] > 0) {
401            lengths[sym] = 1;
402        }
403        return lengths;
404    }
405    while heap.len() > 1 {
406        let (Some(Reverse((wa, a))), Some(Reverse((wb, b)))) = (heap.pop(), heap.pop()) else {
407            break;
408        };
409        let idx = nodes.len();
410        nodes.push(Node {
411            left: a as i32,
412            right: b as i32,
413            sym: -1,
414        });
415        heap.push(Reverse((wa + wb, idx)));
416    }
417    let Some(Reverse((_, root))) = heap.pop() else {
418        return lengths;
419    };
420    // Assign depths with an explicit stack (the tree can be up to `used` deep).
421    let mut stack = vec![(root, 0u32)];
422    while let Some((idx, depth)) = stack.pop() {
423        let Some(node) = nodes.get(idx) else { continue };
424        if node.sym >= 0 {
425            if let Some(slot) = lengths.get_mut(node.sym as usize) {
426                *slot = depth;
427            }
428        } else {
429            stack.push((node.left as usize, depth + 1));
430            stack.push((node.right as usize, depth + 1));
431        }
432    }
433    lengths
434}
435
436/// An encoder-side prefix code: per-symbol emit patterns + lengths, with the single-symbol
437/// (0-bit) special case tracked so emission stays in lock-step with [`PrefixCode::read_symbol`].
438#[derive(Debug, Clone)]
439pub struct PrefixEncoder {
440    lengths: Vec<u8>,
441    codes: Vec<u16>,
442    single: bool,
443}
444
445impl PrefixEncoder {
446    /// Builds an encoder from per-symbol `lengths`.
447    #[must_use]
448    pub fn from_lengths(lengths: &[u8]) -> Self {
449        let codes = canonical_codes(lengths);
450        let single = lengths.iter().filter(|&&l| l > 0).count() <= 1;
451        Self {
452            lengths: lengths.to_vec(),
453            codes,
454            single,
455        }
456    }
457
458    /// Per-symbol code lengths (one entry per symbol; `0` = unused).
459    #[must_use]
460    pub fn lengths(&self) -> &[u8] {
461        &self.lengths
462    }
463
464    /// Writes `symbol` to `w`. A single-symbol code writes nothing (0 bits).
465    pub fn write_symbol(&self, w: &mut BitWriter, symbol: usize) {
466        if self.single {
467            return;
468        }
469        if let (Some(&code), Some(&len)) = (self.codes.get(symbol), self.lengths.get(symbol)) {
470            w.write_bits(u32::from(code), u32::from(len));
471        }
472    }
473}
474
475/// Writes a prefix code described by `lengths` using the *normal code length code* (RFC 9649
476/// §3.7.2).
477///
478/// The code lengths are themselves prefix-coded; for simplicity each length is emitted literally
479/// (no 16/17/18 run compression — that density win is deferred to issue #31), `max_symbol` is left
480/// at the alphabet default, and the meta code is itself length-limited to the 3-bit field range.
481pub fn write_normal_prefix_code(w: &mut BitWriter, lengths: &[u8]) {
482    w.write_bits(0, 1); // 0 = normal (not simple) code length code.
483
484    // Histogram the literal code-length symbols (only values 0..=15 occur in this literal scheme).
485    let mut cl_hist = [0u32; CODE_LENGTH_CODES];
486    for &len in lengths {
487        if (len as usize) < CODE_LENGTH_CODES {
488            cl_hist[len as usize] += 1;
489        }
490    }
491    // The meta code's lengths are emitted in 3-bit fields, so they must fit in 7 bits.
492    let cl_lengths = build_length_limited_lengths(&cl_hist, 7);
493    let cl_encoder = PrefixEncoder::from_lengths(&cl_lengths);
494
495    // Emit the meta code lengths in CODE_LENGTH_CODE_ORDER, trimming trailing zeros (min 4).
496    let mut num_code_lengths = CODE_LENGTH_CODES;
497    while num_code_lengths > 4 && cl_lengths[CODE_LENGTH_CODE_ORDER[num_code_lengths - 1]] == 0 {
498        num_code_lengths -= 1;
499    }
500    w.write_bits((num_code_lengths - 4) as u32, 4);
501    for &order in CODE_LENGTH_CODE_ORDER.iter().take(num_code_lengths) {
502        w.write_bits(u32::from(cl_lengths[order]), 3);
503    }
504
505    w.write_bits(0, 1); // max_symbol uses the alphabet default.
506    for &len in lengths {
507        cl_encoder.write_symbol(w, len as usize);
508    }
509}
510
511/// Writes a prefix code for 1 or 2 symbols using the *simple code length code* (RFC 9649 §3.7.2).
512/// Each listed symbol is given code length 1. `symbols` must hold 1 or 2 entries; extra entries are
513/// ignored.
514pub fn write_simple_prefix_code(w: &mut BitWriter, symbols: &[u16]) {
515    w.write_bits(1, 1); // 1 = simple code length code.
516    let num_symbols = symbols.len().clamp(1, 2);
517    w.write_bits((num_symbols - 1) as u32, 1);
518    let symbol0 = symbols.first().copied().unwrap_or(0);
519    let is_first_8bits = u32::from(symbol0 > 1);
520    w.write_bits(is_first_8bits, 1);
521    w.write_bits(u32::from(symbol0), 1 + 7 * is_first_8bits);
522    if num_symbols == 2 {
523        let symbol1 = symbols.get(1).copied().unwrap_or(0);
524        w.write_bits(u32::from(symbol1), 8);
525    }
526}
527
528#[cfg(test)]
529mod tests {
530    use super::*;
531
532    #[test]
533    fn reverse_bits_matches_manual() {
534        assert_eq!(reverse_bits(0b1, 1), 0b1);
535        assert_eq!(reverse_bits(0b10, 2), 0b01);
536        assert_eq!(reverse_bits(0b1011, 4), 0b1101);
537        assert_eq!(reverse_bits(0b0000_0001, 8), 0b1000_0000);
538        // Reversing twice is the identity for the given width.
539        for v in 0u16..256 {
540            assert_eq!(reverse_bits(reverse_bits(v, 8), 8), v);
541        }
542    }
543
544    /// Round-trips a symbol stream through an encoder built from a histogram and a decoder built
545    /// from the same lengths, exercising `build_length_limited_lengths` + `canonical_codes`.
546    fn assert_code_round_trips(histogram: &[u32], stream: &[usize], max_len: u8) {
547        let lengths = build_length_limited_lengths(histogram, max_len);
548        assert!(lengths.iter().all(|&l| l <= max_len), "length exceeds cap");
549        let encoder = PrefixEncoder::from_lengths(&lengths);
550        let mut w = BitWriter::new();
551        for &s in stream {
552            encoder.write_symbol(&mut w, s);
553        }
554        let bytes = w.finish();
555        let decoder = PrefixCode::from_code_lengths(&lengths).expect("valid lengths");
556        let mut r = BitReader::new(&bytes);
557        for &s in stream {
558            assert_eq!(decoder.read_symbol(&mut r).unwrap() as usize, s);
559        }
560    }
561
562    #[test]
563    fn round_trips_varied_histograms() {
564        // Uniform, skewed, two-symbol, and a single-symbol alphabet.
565        let uniform: Vec<u32> = vec![1; 16];
566        assert_code_round_trips(&uniform, &[0, 5, 15, 3, 8, 8, 0], 15);
567
568        let mut skewed = vec![1u32; 32];
569        skewed[7] = 10_000;
570        skewed[19] = 2_000;
571        assert_code_round_trips(&skewed, &[7, 7, 19, 0, 31, 7], 15);
572
573        let mut two = vec![0u32; 256];
574        two[10] = 5;
575        two[200] = 9;
576        assert_code_round_trips(&two, &[10, 200, 10, 10, 200], 15);
577
578        let mut single = vec![0u32; 40];
579        single[12] = 99;
580        // Single-symbol code consumes no bits, so the stream decodes regardless of length.
581        assert_code_round_trips(&single, &[12, 12, 12], 15);
582    }
583
584    #[test]
585    fn forces_and_caps_15_bit_lengths() {
586        // A Fibonacci-like distribution drives natural Huffman lengths well past 15; the limiter
587        // must still cap them.
588        let mut hist = vec![0u32; 64];
589        let (mut a, mut b) = (1u32, 1u32);
590        for h in hist.iter_mut() {
591            *h = a;
592            let next = a.saturating_add(b);
593            a = b;
594            b = next;
595        }
596        let lengths = build_length_limited_lengths(&hist, 15);
597        assert!(lengths.iter().all(|&l| l <= 15));
598        // Still a usable, complete code.
599        let stream: Vec<usize> = (0..64).collect();
600        assert_code_round_trips(&hist, &stream, 15);
601    }
602
603    #[test]
604    fn normal_code_length_coding_round_trips() {
605        // Build a code, serialize it with write_normal_prefix_code, read it back, and confirm the
606        // reconstructed decoder agrees on a symbol stream.
607        let mut hist = vec![0u32; 256];
608        for (i, h) in hist.iter_mut().enumerate() {
609            *h = (i as u32 % 7) + 1;
610        }
611        let lengths = build_length_limited_lengths(&hist, 15);
612        let encoder = PrefixEncoder::from_lengths(&lengths);
613
614        let stream: Vec<usize> = vec![0, 1, 2, 100, 255, 17, 42, 42, 7];
615        let mut w = BitWriter::new();
616        write_normal_prefix_code(&mut w, &lengths);
617        for &s in &stream {
618            encoder.write_symbol(&mut w, s);
619        }
620        let bytes = w.finish();
621
622        let mut r = BitReader::new(&bytes);
623        let decoder = read_prefix_code(&mut r, 256).expect("valid code description");
624        for &s in &stream {
625            assert_eq!(decoder.read_symbol(&mut r).unwrap() as usize, s);
626        }
627    }
628
629    #[test]
630    fn simple_code_length_coding_round_trips() {
631        for symbols in [
632            vec![0u16],
633            vec![1u16],
634            vec![5u16],
635            vec![3u16, 200],
636            vec![0u16, 1],
637        ] {
638            let mut lengths = vec![0u8; 256];
639            for &s in &symbols {
640                lengths[s as usize] = 1;
641            }
642            let encoder = PrefixEncoder::from_lengths(&lengths);
643            let stream: Vec<usize> = symbols.iter().map(|&s| s as usize).collect();
644
645            let mut w = BitWriter::new();
646            write_simple_prefix_code(&mut w, &symbols);
647            for &s in &stream {
648                encoder.write_symbol(&mut w, s);
649            }
650            let bytes = w.finish();
651
652            let mut r = BitReader::new(&bytes);
653            let decoder = read_prefix_code(&mut r, 256).expect("valid simple code");
654            for &s in &stream {
655                assert_eq!(decoder.read_symbol(&mut r).unwrap() as usize, s);
656            }
657        }
658    }
659
660    #[test]
661    fn rejects_malformed_lengths() {
662        // Over-subscribed: three length-1 codes (Kraft sum > 1).
663        assert!(matches!(
664            PrefixCode::from_code_lengths(&[1, 1, 1]),
665            Err(Error::InvalidInput(_))
666        ));
667        // Incomplete: a length-1 and a length-2 code leave the tree under-filled.
668        assert!(matches!(
669            PrefixCode::from_code_lengths(&[1, 2]),
670            Err(Error::InvalidInput(_))
671        ));
672        // Length beyond the 15-bit cap.
673        assert!(matches!(
674            PrefixCode::from_code_lengths(&[16, 0]),
675            Err(Error::InvalidInput(_))
676        ));
677        // Empty alphabet.
678        assert!(matches!(
679            PrefixCode::from_code_lengths(&[0, 0, 0]),
680            Err(Error::InvalidInput(_))
681        ));
682    }
683
684    #[test]
685    fn single_symbol_consumes_no_bits() {
686        let code = PrefixCode::from_code_lengths(&[0, 0, 3, 0]).expect("single leaf");
687        let mut r = BitReader::new(&[]); // no data at all
688        assert_eq!(code.read_symbol(&mut r).unwrap(), 2);
689        assert_eq!(code.read_symbol(&mut r).unwrap(), 2);
690    }
691
692    #[test]
693    fn green_alphabet_size_includes_cache() {
694        assert_eq!(green_alphabet_size(0), 280);
695        assert_eq!(green_alphabet_size(1024), 280 + 1024);
696    }
697
698    #[test]
699    fn reads_prefix_code_group() {
700        // Emit five trivial single-symbol codes (each consumes no data) and read them as a group.
701        let mut w = BitWriter::new();
702        for _ in 0..5 {
703            write_simple_prefix_code(&mut w, &[0]);
704        }
705        let bytes = w.finish();
706        let mut r = BitReader::new(&bytes);
707        let group = read_prefix_code_group(&mut r, 0).expect("group");
708        let mut rr = BitReader::new(&[]);
709        assert_eq!(group.green.read_symbol(&mut rr).unwrap(), 0);
710        assert_eq!(group.distance.read_symbol(&mut rr).unwrap(), 0);
711    }
712}