Skip to main content

zerodds_hpack/
huffman.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 ZeroDDS Contributors
3
4//! RFC 7541 Appendix B Static Huffman Code.
5//!
6//! Wir implementieren die Wire-Form-Compatible-Variante: das voll-
7//! staendige Code-Buch ist embedded als `(code, bit_length)`-Array
8//! ueber die 256 Octets + EOS (Index 256 — wird nur fuer Padding
9//! verwendet).
10
11use alloc::vec::Vec;
12
13/// Huffman-Code-Tabelle aus RFC 7541 Appendix B.
14/// Tupel `(code_bits, bit_length)`. Index 0..=255 = Octet, 256 = EOS.
15const TABLE: [(u32, u8); 257] = [
16    (0x1ff8, 13),
17    (0x7fffd8, 23),
18    (0xfffffe2, 28),
19    (0xfffffe3, 28),
20    (0xfffffe4, 28),
21    (0xfffffe5, 28),
22    (0xfffffe6, 28),
23    (0xfffffe7, 28),
24    (0xfffffe8, 28),
25    (0xffffea, 24),
26    (0x3ffffffc, 30),
27    (0xfffffe9, 28),
28    (0xfffffea, 28),
29    (0x3ffffffd, 30),
30    (0xfffffeb, 28),
31    (0xfffffec, 28),
32    (0xfffffed, 28),
33    (0xfffffee, 28),
34    (0xfffffef, 28),
35    (0xffffff0, 28),
36    (0xffffff1, 28),
37    (0xffffff2, 28),
38    (0x3ffffffe, 30),
39    (0xffffff3, 28),
40    (0xffffff4, 28),
41    (0xffffff5, 28),
42    (0xffffff6, 28),
43    (0xffffff7, 28),
44    (0xffffff8, 28),
45    (0xffffff9, 28),
46    (0xffffffa, 28),
47    (0xffffffb, 28),
48    (0x14, 6),
49    (0x3f8, 10),
50    (0x3f9, 10),
51    (0xffa, 12),
52    (0x1ff9, 13),
53    (0x15, 6),
54    (0xf8, 8),
55    (0x7fa, 11),
56    (0x3fa, 10),
57    (0x3fb, 10),
58    (0xf9, 8),
59    (0x7fb, 11),
60    (0xfa, 8),
61    (0x16, 6),
62    (0x17, 6),
63    (0x18, 6),
64    (0x0, 5),
65    (0x1, 5),
66    (0x2, 5),
67    (0x19, 6),
68    (0x1a, 6),
69    (0x1b, 6),
70    (0x1c, 6),
71    (0x1d, 6),
72    (0x1e, 6),
73    (0x1f, 6),
74    (0x5c, 7),
75    (0xfb, 8),
76    (0x7ffc, 15),
77    (0x20, 6),
78    (0xffb, 12),
79    (0x3fc, 10),
80    (0x1ffa, 13),
81    (0x21, 6),
82    (0x5d, 7),
83    (0x5e, 7),
84    (0x5f, 7),
85    (0x60, 7),
86    (0x61, 7),
87    (0x62, 7),
88    (0x63, 7),
89    (0x64, 7),
90    (0x65, 7),
91    (0x66, 7),
92    (0x67, 7),
93    (0x68, 7),
94    (0x69, 7),
95    (0x6a, 7),
96    (0x6b, 7),
97    (0x6c, 7),
98    (0x6d, 7),
99    (0x6e, 7),
100    (0x6f, 7),
101    (0x70, 7),
102    (0x71, 7),
103    (0x72, 7),
104    (0xfc, 8),
105    (0x73, 7),
106    (0xfd, 8),
107    (0x1ffb, 13),
108    (0x7fff0, 19),
109    (0x1ffc, 13),
110    (0x3ffc, 14),
111    (0x22, 6),
112    (0x7ffd, 15),
113    (0x3, 5),
114    (0x23, 6),
115    (0x4, 5),
116    (0x24, 6),
117    (0x5, 5),
118    (0x25, 6),
119    (0x26, 6),
120    (0x27, 6),
121    (0x6, 5),
122    (0x74, 7),
123    (0x75, 7),
124    (0x28, 6),
125    (0x29, 6),
126    (0x2a, 6),
127    (0x7, 5),
128    (0x2b, 6),
129    (0x76, 7),
130    (0x2c, 6),
131    (0x8, 5),
132    (0x9, 5),
133    (0x2d, 6),
134    (0x77, 7),
135    (0x78, 7),
136    (0x79, 7),
137    (0x7a, 7),
138    (0x7b, 7),
139    (0x7ffe, 15),
140    (0x7fc, 11),
141    (0x3ffd, 14),
142    (0x1ffd, 13),
143    (0xffffffc, 28),
144    (0xfffe6, 20),
145    (0x3fffd2, 22),
146    (0xfffe7, 20),
147    (0xfffe8, 20),
148    (0x3fffd3, 22),
149    (0x3fffd4, 22),
150    (0x3fffd5, 22),
151    (0x7fffd9, 23),
152    (0x3fffd6, 22),
153    (0x7fffda, 23),
154    (0x7fffdb, 23),
155    (0x7fffdc, 23),
156    (0x7fffdd, 23),
157    (0x7fffde, 23),
158    (0xffffeb, 24),
159    (0x7fffdf, 23),
160    (0xffffec, 24),
161    (0xffffed, 24),
162    (0x3fffd7, 22),
163    (0x7fffe0, 23),
164    (0xffffee, 24),
165    (0x7fffe1, 23),
166    (0x7fffe2, 23),
167    (0x7fffe3, 23),
168    (0x7fffe4, 23),
169    (0x1fffdc, 21),
170    (0x3fffd8, 22),
171    (0x7fffe5, 23),
172    (0x3fffd9, 22),
173    (0x7fffe6, 23),
174    (0x7fffe7, 23),
175    (0xffffef, 24),
176    (0x3fffda, 22),
177    (0x1fffdd, 21),
178    (0xfffe9, 20),
179    (0x3fffdb, 22),
180    (0x3fffdc, 22),
181    (0x7fffe8, 23),
182    (0x7fffe9, 23),
183    (0x1fffde, 21),
184    (0x7fffea, 23),
185    (0x3fffdd, 22),
186    (0x3fffde, 22),
187    (0xfffff0, 24),
188    (0x1fffdf, 21),
189    (0x3fffdf, 22),
190    (0x7fffeb, 23),
191    (0x7fffec, 23),
192    (0x1fffe0, 21),
193    (0x1fffe1, 21),
194    (0x3fffe0, 22),
195    (0x1fffe2, 21),
196    (0x7fffed, 23),
197    (0x3fffe1, 22),
198    (0x7fffee, 23),
199    (0x7fffef, 23),
200    (0xfffea, 20),
201    (0x3fffe2, 22),
202    (0x3fffe3, 22),
203    (0x3fffe4, 22),
204    (0x7ffff0, 23),
205    (0x3fffe5, 22),
206    (0x3fffe6, 22),
207    (0x7ffff1, 23),
208    (0x3ffffe0, 26),
209    (0x3ffffe1, 26),
210    (0xfffeb, 20),
211    (0x7fff1, 19),
212    (0x3fffe7, 22),
213    (0x7ffff2, 23),
214    (0x3fffe8, 22),
215    (0x1ffffec, 25),
216    (0x3ffffe2, 26),
217    (0x3ffffe3, 26),
218    (0x3ffffe4, 26),
219    (0x7ffffde, 27),
220    (0x7ffffdf, 27),
221    (0x3ffffe5, 26),
222    (0xfffff1, 24),
223    (0x1ffffed, 25),
224    (0x7fff2, 19),
225    (0x1fffe3, 21),
226    (0x3ffffe6, 26),
227    (0x7ffffe0, 27),
228    (0x7ffffe1, 27),
229    (0x3ffffe7, 26),
230    (0x7ffffe2, 27),
231    (0xfffff2, 24),
232    (0x1fffe4, 21),
233    (0x1fffe5, 21),
234    (0x3ffffe8, 26),
235    (0x3ffffe9, 26),
236    (0xffffffd, 28),
237    (0x7ffffe3, 27),
238    (0x7ffffe4, 27),
239    (0x7ffffe5, 27),
240    (0xfffec, 20),
241    (0xfffff3, 24),
242    (0xfffed, 20),
243    (0x1fffe6, 21),
244    (0x3fffe9, 22),
245    (0x1fffe7, 21),
246    (0x1fffe8, 21),
247    (0x7ffff3, 23),
248    (0x3fffea, 22),
249    (0x3fffeb, 22),
250    (0x1ffffee, 25),
251    (0x1ffffef, 25),
252    (0xfffff4, 24),
253    (0xfffff5, 24),
254    (0x3ffffea, 26),
255    (0x7ffff4, 23),
256    (0x3ffffeb, 26),
257    (0x7ffffe6, 27),
258    (0x3ffffec, 26),
259    (0x3ffffed, 26),
260    (0x7ffffe7, 27),
261    (0x7ffffe8, 27),
262    (0x7ffffe9, 27),
263    (0x7ffffea, 27),
264    (0x7ffffeb, 27),
265    (0xffffffe, 28),
266    (0x7ffffec, 27),
267    (0x7ffffed, 27),
268    (0x7ffffee, 27),
269    (0x7ffffef, 27),
270    (0x7fffff0, 27),
271    (0x3ffffee, 26),
272    (0x3fffffff, 30),
273];
274
275/// Huffman-Decode-Fehler.
276#[derive(Debug, Clone, Copy, PartialEq, Eq)]
277pub struct HuffmanError;
278
279/// Encode `bytes` per Huffman.
280#[must_use]
281pub fn encode(bytes: &[u8]) -> Vec<u8> {
282    let mut out = Vec::with_capacity(bytes.len());
283    let mut acc: u64 = 0;
284    let mut acc_bits: u8 = 0;
285    for &b in bytes {
286        let (code, len) = TABLE[b as usize];
287        acc = (acc << len) | u64::from(code);
288        acc_bits += len;
289        while acc_bits >= 8 {
290            acc_bits -= 8;
291            out.push((acc >> acc_bits) as u8);
292        }
293    }
294    // Pad with EOS-prefix (1-bits) per RFC 7541 §5.2.
295    if acc_bits > 0 {
296        let pad = 8 - acc_bits;
297        acc = (acc << pad) | ((1u64 << pad) - 1);
298        out.push(acc as u8);
299    }
300    out
301}
302
303/// Decode Huffman-codiertes Input.
304///
305/// # Errors
306/// `HuffmanError` wenn der Code-Stream invalid ist (Spec §5.2: pad-
307/// bits mit Nullen oder Sequenz dekodiert nicht).
308pub fn decode(bytes: &[u8]) -> Result<Vec<u8>, HuffmanError> {
309    let mut out = Vec::new();
310    let mut acc: u64 = 0;
311    let mut acc_bits: u8 = 0;
312    for &b in bytes {
313        acc = (acc << 8) | u64::from(b);
314        acc_bits += 8;
315        // Try to consume codes from MSB.
316        while acc_bits > 0 {
317            let mut found = None;
318            for (sym, (code, len)) in TABLE.iter().enumerate().take(256) {
319                if *len <= acc_bits {
320                    let shift = acc_bits - len;
321                    let candidate = (acc >> shift) & ((1u64 << len) - 1);
322                    if candidate == u64::from(*code) {
323                        found = Some((sym as u8, *len));
324                        break;
325                    }
326                }
327            }
328            if let Some((sym, len)) = found {
329                out.push(sym);
330                acc_bits -= len;
331                acc &= (1u64 << acc_bits) - 1;
332            } else {
333                break;
334            }
335        }
336    }
337    // Pad bits must be 1 (EOS-prefix) per RFC 7541.
338    if acc_bits > 0 {
339        let pad_mask = (1u64 << acc_bits) - 1;
340        if (acc & pad_mask) != pad_mask {
341            return Err(HuffmanError);
342        }
343        if acc_bits >= 8 {
344            return Err(HuffmanError);
345        }
346    }
347    Ok(out)
348}
349
350#[cfg(test)]
351#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
352mod tests {
353    use super::*;
354
355    #[test]
356    fn round_trip_ascii() {
357        for s in ["www.example.com", "no-cache", "custom-key", "/index.html"] {
358            let enc = encode(s.as_bytes());
359            let dec = decode(&enc).unwrap();
360            assert_eq!(dec, s.as_bytes());
361        }
362    }
363
364    #[test]
365    fn round_trip_empty() {
366        let enc = encode(b"");
367        assert!(enc.is_empty());
368        let dec = decode(&enc).unwrap();
369        assert!(dec.is_empty());
370    }
371
372    #[test]
373    fn round_trip_single_char() {
374        for c in [b'a', b'A', b'0', b'!', b' '] {
375            let enc = encode(&[c]);
376            let dec = decode(&enc).unwrap();
377            assert_eq!(dec, alloc::vec![c]);
378        }
379    }
380
381    #[test]
382    fn rfc_appendix_c_4_1_www_example_com_compresses() {
383        // Spec §C.4.1: "www.example.com" → 12 bytes Huffman-encoded
384        // (`f1e3 c2e5 f23a 6ba0 ab90 f4ff`).
385        let enc = encode(b"www.example.com");
386        let expected = [
387            0xf1, 0xe3, 0xc2, 0xe5, 0xf2, 0x3a, 0x6b, 0xa0, 0xab, 0x90, 0xf4, 0xff,
388        ];
389        assert_eq!(enc, expected);
390    }
391
392    #[test]
393    fn rfc_appendix_c_4_2_no_cache_compresses() {
394        let enc = encode(b"no-cache");
395        let expected = [0xa8, 0xeb, 0x10, 0x64, 0x9c, 0xbf];
396        assert_eq!(enc, expected);
397    }
398
399    #[test]
400    fn invalid_pad_with_zeros_rejected() {
401        // Encode normally then flip pad bits to 0.
402        let mut enc = encode(b"a");
403        // Force pad bits to 0 by clearing low bits.
404        if let Some(last) = enc.last_mut() {
405            // 'a' is 6 bits (0x21), so pad is 2 bits → mask 0xfc.
406            *last &= 0xfc;
407        }
408        assert_eq!(decode(&enc), Err(HuffmanError));
409    }
410}