fluke_hpack/
huffman.rs

1//! A module exposing utilities for encoding and decoding Huffman-coded octet
2//! strings, under the Huffman code defined by HPACK.
3//! (HPACK-draft-10, Appendix B)
4
5use std::collections::HashMap;
6
7/// Represents a symbol that can be inserted into a Huffman-encoded octet
8/// string.
9enum HuffmanCodeSymbol {
10    /// Any octet is a valid symbol
11    Symbol(u8),
12    /// A special symbol represents the end of the string
13    EndOfString,
14}
15
16impl HuffmanCodeSymbol {
17    pub fn new(symbol: usize) -> HuffmanCodeSymbol {
18        if symbol == 256 {
19            HuffmanCodeSymbol::EndOfString
20        } else {
21            // It is safe to downcast since now we know that the value
22            // is in the half-open interval [0, 256)
23            HuffmanCodeSymbol::Symbol(symbol as u8)
24        }
25    }
26}
27
28/// Represents the error variants that the `HuffmanDecoder` can return.
29#[derive(PartialEq, Copy, Clone, Debug)]
30pub enum HuffmanDecoderError {
31    /// Any padding strictly larger than 7 bits MUST be interpreted as an error
32    PaddingTooLarge,
33    /// Any padding that does not correspond to the most significant bits of
34    /// EOS MUST be interpreted as an error.
35    InvalidPadding,
36    /// If EOS is ever found in the string, it causes an error.
37    EOSInString,
38}
39
40/// The type that represents the result of the `decode` method of the
41/// `HuffmanDecoder`.
42pub type HuffmanDecoderResult = Result<Vec<u8>, HuffmanDecoderError>;
43
44/// A simple implementation of a Huffman code decoder.
45pub struct HuffmanDecoder {
46    table: HashMap<u8, HashMap<u32, HuffmanCodeSymbol>>,
47    // The representation of the EOS: the left-aligned code representation and
48    // the actual length of the codepoint, as a tuple.
49    eos_codepoint: (u32, u8),
50}
51
52impl HuffmanDecoder {
53    /// Constructs a new `HuffmanDecoder` using the given table of
54    /// (code point, code length) tuples to represent the Huffman code.
55    fn from_table(table: &[(u32, u8)]) -> HuffmanDecoder {
56        if table.len() != 257 {
57            panic!("Invalid Huffman code table. It must define exactly 257 symbols.");
58        }
59
60        let mut decoder_table: HashMap<u8, HashMap<u32, HuffmanCodeSymbol>> = HashMap::new();
61        let mut eos_codepoint: Option<(u32, u8)> = None;
62
63        for (symbol, &(code, code_len)) in table.iter().enumerate() {
64            decoder_table.entry(code_len).or_default();
65            let subtable = decoder_table.get_mut(&code_len).unwrap();
66            let huff_symbol = HuffmanCodeSymbol::new(symbol);
67            if let HuffmanCodeSymbol::EndOfString = huff_symbol {
68                // We also remember the code point of the EOS for easier
69                // reference later on.
70                eos_codepoint = Some((code, code_len));
71            };
72            subtable.insert(code, huff_symbol);
73        }
74
75        HuffmanDecoder {
76            table: decoder_table,
77            eos_codepoint: eos_codepoint.unwrap(),
78        }
79    }
80
81    /// Constructs a new HuffmanDecoder with the default Huffman code table, as
82    /// defined in the HPACK-draft-10, Appendix B.
83    pub fn new() -> HuffmanDecoder {
84        HuffmanDecoder::from_table(HUFFMAN_CODE_TABLE)
85    }
86
87    /// Decodes the buffer `buf` into a newly allocated `Vec`.
88    ///
89    /// It assumes that the entire buffer should be considered as the Huffman
90    /// encoding of an octet string and handles the padding rules
91    /// accordingly.
92    pub fn decode(&mut self, buf: &[u8]) -> HuffmanDecoderResult {
93        let mut current: u32 = 0;
94        let mut current_len: u8 = 0;
95        let mut result: Vec<u8> = Vec::new();
96
97        for b in BitIterator::new(buf.iter()) {
98            current_len += 1;
99            current <<= 1;
100            if b {
101                current |= 1;
102            }
103
104            if self.table.contains_key(&current_len) {
105                let length_table = self.table.get(&current_len).unwrap();
106                if length_table.contains_key(&current) {
107                    let decoded_symbol = match length_table.get(&current).unwrap() {
108                        HuffmanCodeSymbol::Symbol(symbol) => *symbol,
109                        HuffmanCodeSymbol::EndOfString => {
110                            // If the EOS symbol is detected within the stream,
111                            // we need to consider it an error.
112                            return Err(HuffmanDecoderError::EOSInString);
113                        }
114                    };
115                    result.push(decoded_symbol);
116                    current = 0;
117                    current_len = 0;
118                }
119            }
120        }
121
122        // Now we need to verify that the padding is correct.
123        // The spec mandates that the padding must not be strictly longer than
124        // 7 bits and that it must represent the most significant bits of the
125        // EOS symbol's code.
126
127        // First: the check for the length of the padding
128        if current_len > 7 {
129            return Err(HuffmanDecoderError::PaddingTooLarge);
130        }
131
132        // Second: the padding corresponds to the most-significant bits of the
133        // EOS symbol.
134        // Align both of them to have their most significant bit as the most
135        // significant bit of a u32.
136        let right_align_current = if current_len == 0 {
137            0
138        } else {
139            current << (32 - current_len)
140        };
141        let right_align_eos = self.eos_codepoint.0 << (32 - self.eos_codepoint.1);
142        // Now take only the necessary amount of most significant bit of EOS.
143        // The mask defines a bit pattern of `current_len` leading set bits,
144        // followed by the rest of the bits 0.
145        let mask = if current_len == 0 {
146            0
147        } else {
148            ((1 << current_len) - 1) << (32 - current_len)
149        };
150        // The mask is now used to strip the unwanted bits of the EOS
151        let eos_mask = right_align_eos & mask;
152
153        if eos_mask != right_align_current {
154            return Err(HuffmanDecoderError::InvalidPadding);
155        }
156
157        Ok(result)
158    }
159}
160
161/// A helper struct that represents an iterator over individual bits of all
162/// bytes found in a wrapped Iterator over bytes.
163/// Bits are represented as `bool`s, where `true` corresponds to a set bit and
164/// `false` to a 0 bit.
165///
166/// Bits are yielded in order of significance, starting from the
167/// most-significant bit.
168struct BitIterator<'a, I: Iterator> {
169    buffer_iterator: I,
170    current_byte: Option<&'a u8>,
171    /// The bit-position within the current byte
172    pos: u8,
173}
174
175impl<'a, I: Iterator> BitIterator<'a, I>
176where
177    I: Iterator<Item = &'a u8>,
178{
179    pub fn new(iterator: I) -> BitIterator<'a, I> {
180        BitIterator::<'a, I> {
181            buffer_iterator: iterator,
182            current_byte: None,
183            pos: 7,
184        }
185    }
186}
187
188impl<'a, I> Iterator for BitIterator<'a, I>
189where
190    I: Iterator<Item = &'a u8>,
191{
192    type Item = bool;
193
194    fn next(&mut self) -> Option<bool> {
195        if self.current_byte.is_none() {
196            self.current_byte = self.buffer_iterator.next();
197            self.pos = 7;
198        }
199
200        // If we still have `None`, it means the buffer has been exhausted
201        self.current_byte?;
202
203        let b = *self.current_byte.unwrap();
204
205        let is_set = (b & (1 << self.pos)) == (1 << self.pos);
206        if self.pos == 0 {
207            // We have exhausted all bits from the current byte -- try to get
208            // a new one on the next pass.
209            self.current_byte = None;
210        } else {
211            // Still more bits left here...
212            self.pos -= 1;
213        }
214
215        Some(is_set)
216    }
217}
218
219static HUFFMAN_CODE_TABLE: &[(u32, u8)] = &[
220    (0x1ff8, 13),
221    (0x7fffd8, 23),
222    (0xfffffe2, 28),
223    (0xfffffe3, 28),
224    (0xfffffe4, 28),
225    (0xfffffe5, 28),
226    (0xfffffe6, 28),
227    (0xfffffe7, 28),
228    (0xfffffe8, 28),
229    (0xffffea, 24),
230    (0x3ffffffc, 30),
231    (0xfffffe9, 28),
232    (0xfffffea, 28),
233    (0x3ffffffd, 30),
234    (0xfffffeb, 28),
235    (0xfffffec, 28),
236    (0xfffffed, 28),
237    (0xfffffee, 28),
238    (0xfffffef, 28),
239    (0xffffff0, 28),
240    (0xffffff1, 28),
241    (0xffffff2, 28),
242    (0x3ffffffe, 30),
243    (0xffffff3, 28),
244    (0xffffff4, 28),
245    (0xffffff5, 28),
246    (0xffffff6, 28),
247    (0xffffff7, 28),
248    (0xffffff8, 28),
249    (0xffffff9, 28),
250    (0xffffffa, 28),
251    (0xffffffb, 28),
252    (0x14, 6),
253    (0x3f8, 10),
254    (0x3f9, 10),
255    (0xffa, 12),
256    (0x1ff9, 13),
257    (0x15, 6),
258    (0xf8, 8),
259    (0x7fa, 11),
260    (0x3fa, 10),
261    (0x3fb, 10),
262    (0xf9, 8),
263    (0x7fb, 11),
264    (0xfa, 8),
265    (0x16, 6),
266    (0x17, 6),
267    (0x18, 6),
268    (0x0, 5),
269    (0x1, 5),
270    (0x2, 5),
271    (0x19, 6),
272    (0x1a, 6),
273    (0x1b, 6),
274    (0x1c, 6),
275    (0x1d, 6),
276    (0x1e, 6),
277    (0x1f, 6),
278    (0x5c, 7),
279    (0xfb, 8),
280    (0x7ffc, 15),
281    (0x20, 6),
282    (0xffb, 12),
283    (0x3fc, 10),
284    (0x1ffa, 13),
285    (0x21, 6),
286    (0x5d, 7),
287    (0x5e, 7),
288    (0x5f, 7),
289    (0x60, 7),
290    (0x61, 7),
291    (0x62, 7),
292    (0x63, 7),
293    (0x64, 7),
294    (0x65, 7),
295    (0x66, 7),
296    (0x67, 7),
297    (0x68, 7),
298    (0x69, 7),
299    (0x6a, 7),
300    (0x6b, 7),
301    (0x6c, 7),
302    (0x6d, 7),
303    (0x6e, 7),
304    (0x6f, 7),
305    (0x70, 7),
306    (0x71, 7),
307    (0x72, 7),
308    (0xfc, 8),
309    (0x73, 7),
310    (0xfd, 8),
311    (0x1ffb, 13),
312    (0x7fff0, 19),
313    (0x1ffc, 13),
314    (0x3ffc, 14),
315    (0x22, 6),
316    (0x7ffd, 15),
317    (0x3, 5),
318    (0x23, 6),
319    (0x4, 5),
320    (0x24, 6),
321    (0x5, 5),
322    (0x25, 6),
323    (0x26, 6),
324    (0x27, 6),
325    (0x6, 5),
326    (0x74, 7),
327    (0x75, 7),
328    (0x28, 6),
329    (0x29, 6),
330    (0x2a, 6),
331    (0x7, 5),
332    (0x2b, 6),
333    (0x76, 7),
334    (0x2c, 6),
335    (0x8, 5),
336    (0x9, 5),
337    (0x2d, 6),
338    (0x77, 7),
339    (0x78, 7),
340    (0x79, 7),
341    (0x7a, 7),
342    (0x7b, 7),
343    (0x7ffe, 15),
344    (0x7fc, 11),
345    (0x3ffd, 14),
346    (0x1ffd, 13),
347    (0xffffffc, 28),
348    (0xfffe6, 20),
349    (0x3fffd2, 22),
350    (0xfffe7, 20),
351    (0xfffe8, 20),
352    (0x3fffd3, 22),
353    (0x3fffd4, 22),
354    (0x3fffd5, 22),
355    (0x7fffd9, 23),
356    (0x3fffd6, 22),
357    (0x7fffda, 23),
358    (0x7fffdb, 23),
359    (0x7fffdc, 23),
360    (0x7fffdd, 23),
361    (0x7fffde, 23),
362    (0xffffeb, 24),
363    (0x7fffdf, 23),
364    (0xffffec, 24),
365    (0xffffed, 24),
366    (0x3fffd7, 22),
367    (0x7fffe0, 23),
368    (0xffffee, 24),
369    (0x7fffe1, 23),
370    (0x7fffe2, 23),
371    (0x7fffe3, 23),
372    (0x7fffe4, 23),
373    (0x1fffdc, 21),
374    (0x3fffd8, 22),
375    (0x7fffe5, 23),
376    (0x3fffd9, 22),
377    (0x7fffe6, 23),
378    (0x7fffe7, 23),
379    (0xffffef, 24),
380    (0x3fffda, 22),
381    (0x1fffdd, 21),
382    (0xfffe9, 20),
383    (0x3fffdb, 22),
384    (0x3fffdc, 22),
385    (0x7fffe8, 23),
386    (0x7fffe9, 23),
387    (0x1fffde, 21),
388    (0x7fffea, 23),
389    (0x3fffdd, 22),
390    (0x3fffde, 22),
391    (0xfffff0, 24),
392    (0x1fffdf, 21),
393    (0x3fffdf, 22),
394    (0x7fffeb, 23),
395    (0x7fffec, 23),
396    (0x1fffe0, 21),
397    (0x1fffe1, 21),
398    (0x3fffe0, 22),
399    (0x1fffe2, 21),
400    (0x7fffed, 23),
401    (0x3fffe1, 22),
402    (0x7fffee, 23),
403    (0x7fffef, 23),
404    (0xfffea, 20),
405    (0x3fffe2, 22),
406    (0x3fffe3, 22),
407    (0x3fffe4, 22),
408    (0x7ffff0, 23),
409    (0x3fffe5, 22),
410    (0x3fffe6, 22),
411    (0x7ffff1, 23),
412    (0x3ffffe0, 26),
413    (0x3ffffe1, 26),
414    (0xfffeb, 20),
415    (0x7fff1, 19),
416    (0x3fffe7, 22),
417    (0x7ffff2, 23),
418    (0x3fffe8, 22),
419    (0x1ffffec, 25),
420    (0x3ffffe2, 26),
421    (0x3ffffe3, 26),
422    (0x3ffffe4, 26),
423    (0x7ffffde, 27),
424    (0x7ffffdf, 27),
425    (0x3ffffe5, 26),
426    (0xfffff1, 24),
427    (0x1ffffed, 25),
428    (0x7fff2, 19),
429    (0x1fffe3, 21),
430    (0x3ffffe6, 26),
431    (0x7ffffe0, 27),
432    (0x7ffffe1, 27),
433    (0x3ffffe7, 26),
434    (0x7ffffe2, 27),
435    (0xfffff2, 24),
436    (0x1fffe4, 21),
437    (0x1fffe5, 21),
438    (0x3ffffe8, 26),
439    (0x3ffffe9, 26),
440    (0xffffffd, 28),
441    (0x7ffffe3, 27),
442    (0x7ffffe4, 27),
443    (0x7ffffe5, 27),
444    (0xfffec, 20),
445    (0xfffff3, 24),
446    (0xfffed, 20),
447    (0x1fffe6, 21),
448    (0x3fffe9, 22),
449    (0x1fffe7, 21),
450    (0x1fffe8, 21),
451    (0x7ffff3, 23),
452    (0x3fffea, 22),
453    (0x3fffeb, 22),
454    (0x1ffffee, 25),
455    (0x1ffffef, 25),
456    (0xfffff4, 24),
457    (0xfffff5, 24),
458    (0x3ffffea, 26),
459    (0x7ffff4, 23),
460    (0x3ffffeb, 26),
461    (0x7ffffe6, 27),
462    (0x3ffffec, 26),
463    (0x3ffffed, 26),
464    (0x7ffffe7, 27),
465    (0x7ffffe8, 27),
466    (0x7ffffe9, 27),
467    (0x7ffffea, 27),
468    (0x7ffffeb, 27),
469    (0xffffffe, 28),
470    (0x7ffffec, 27),
471    (0x7ffffed, 27),
472    (0x7ffffee, 27),
473    (0x7ffffef, 27),
474    (0x7fffff0, 27),
475    (0x3ffffee, 26),
476    (0x3fffffff, 30),
477];
478
479#[cfg(test)]
480mod tests {
481    use super::BitIterator;
482    use super::HuffmanDecoder;
483    use super::HuffmanDecoderError;
484
485    /// A helper function that converts the given slice containing values `1`
486    /// and `0` to a `Vec` of `bool`s, according to the number.
487    fn to_expected_bit_result(numbers: &[u8]) -> Vec<bool> {
488        numbers.iter().map(|b| -> bool { *b == 1 }).collect()
489    }
490
491    #[test]
492    fn test_bit_iterator_single_byte() {
493        let expected_result = to_expected_bit_result(&[0, 0, 0, 0, 1, 0, 1, 0]);
494
495        let mut res: Vec<bool> = Vec::new();
496        for b in BitIterator::new([10u8].iter()) {
497            res.push(b);
498        }
499
500        assert_eq!(res, expected_result);
501    }
502
503    #[test]
504    fn test_bit_iterator_multiple_bytes() {
505        let expected_result = to_expected_bit_result(&[
506            0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507            0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0,
508        ]);
509
510        let mut res: Vec<bool> = Vec::new();
511        for b in BitIterator::new([10u8, 255, 128, 1, 0, 170].iter()) {
512            res.push(b);
513        }
514
515        assert_eq!(res, expected_result);
516    }
517
518    /// Simple tests for the Huffman decoder -- whether it can decode a single
519    /// character code represented additionally with only a single byte.
520    #[test]
521    fn test_huffman_code_single_byte() {
522        let mut decoder = HuffmanDecoder::new();
523        // (The + (2^n - 1) at the final byte is to add the correct expected
524        // padding: 1s)
525        {
526            // We need to shift it by 3, since we need the top-order bytes to
527            // start the code point.
528            let hex_buffer = [(0x7 << 3) + 7];
529            let expected_result = vec![b'o'];
530
531            let result = decoder.decode(&hex_buffer).ok().unwrap();
532
533            assert_eq!(result, expected_result);
534        }
535        {
536            #[allow(clippy::identity_op)] // for communicating intent
537            let hex_buffer = [0x0 + 7];
538            let expected_result = vec![b'0'];
539
540            let result = decoder.decode(&hex_buffer).ok().unwrap();
541
542            assert_eq!(result, expected_result);
543        }
544        {
545            // The length of the codepoint is 6, so we shift by two
546            let hex_buffer = [(0x21 << 2) + 3];
547            let expected_result = vec![b'A'];
548
549            let result = decoder.decode(&hex_buffer).ok().unwrap();
550
551            assert_eq!(result, expected_result);
552        }
553    }
554
555    /// Tests that the Huffman decoder can decode a single character made of
556    /// multiple bytes.
557    #[test]
558    fn test_huffman_code_single_char_multiple_byte() {
559        let mut decoder = HuffmanDecoder::new();
560        // (The + (2^n - 1) at the final byte is to add the correct expected
561        // padding: 1s)
562        {
563            let hex_buffer = [255, 160 + 15];
564            let expected_result = vec![b'#'];
565
566            let result = decoder.decode(&hex_buffer).ok().unwrap();
567
568            assert_eq!(result, expected_result);
569        }
570        {
571            let hex_buffer = [255, 200 + 7];
572            let expected_result = vec![b'$'];
573
574            let result = decoder.decode(&hex_buffer).ok().unwrap();
575
576            assert_eq!(result, expected_result);
577        }
578        {
579            let hex_buffer = [255, 255, 255, 240 + 3];
580            let expected_result = vec![10];
581
582            let result = decoder.decode(&hex_buffer).ok().unwrap();
583
584            assert_eq!(result, expected_result);
585        }
586    }
587
588    #[test]
589    fn test_huffman_code_multiple_chars() {
590        let mut decoder = HuffmanDecoder::new();
591        {
592            let hex_buffer = [254, 1];
593            let expected_result = vec![b'!', b'0'];
594
595            let result = decoder.decode(&hex_buffer).ok().unwrap();
596
597            assert_eq!(result, expected_result);
598        }
599        {
600            let hex_buffer = [(0x14 << 2) | 0x3, 248];
601            let expected_result = vec![b' ', b'!'];
602
603            let result = decoder.decode(&hex_buffer).ok().unwrap();
604
605            assert_eq!(result, expected_result);
606        }
607    }
608
609    /// Tests that if we find the EOS symbol in the stream, we consider it an
610    /// error.
611    #[test]
612    fn test_eos_is_error() {
613        let mut decoder = HuffmanDecoder::new();
614        {
615            // The EOS is an all-ones pattern of length 30
616            let hex_buffer = [0xFF, 0xFF, 0xFF, 0xFF];
617
618            let result = decoder.decode(&hex_buffer);
619
620            assert_eq!(
621                HuffmanDecoderError::EOSInString,
622                match result {
623                    Err(e) => e,
624                    _ => panic!("Expected error due to EOS symbol in string"),
625                }
626            );
627        }
628        {
629            // Full EOS after a valid character.
630            let hex_buffer = [0x3F, 0xFF, 0xFF, 0xFF, 0xFF];
631
632            let result = decoder.decode(&hex_buffer);
633
634            assert_eq!(
635                HuffmanDecoderError::EOSInString,
636                match result {
637                    Err(e) => e,
638                    _ => panic!("Expected error due to EOS symbol in string"),
639                }
640            );
641        }
642    }
643
644    /// Tests that when there are 7 or less padding bits, the string is
645    /// correctly decoded.
646    #[test]
647    fn test_short_padding_okay() {
648        let mut decoder = HuffmanDecoder::new();
649        let hex_buffer = [0x3F];
650
651        let result = decoder.decode(&hex_buffer);
652
653        assert_eq!(b"o".to_vec(), result.ok().unwrap());
654    }
655
656    /// Tests that when there are more than 7 padding bits, we get an error.
657    #[test]
658    fn test_padding_too_long() {
659        let mut decoder = HuffmanDecoder::new();
660        let hex_buffer = [0x3F, 0xFF];
661
662        let result = decoder.decode(&hex_buffer);
663
664        assert_eq!(
665            HuffmanDecoderError::PaddingTooLarge,
666            match result {
667                Err(e) => e,
668                _ => panic!("Expected `PaddingTooLarge` error"),
669            }
670        );
671    }
672
673    /// Tests that when there is a certain number of padding bits that deviate
674    /// from the most significant bits of the EOS symbol, we get an error.
675    #[test]
676    fn test_padding_invalid() {
677        let mut decoder = HuffmanDecoder::new();
678        {
679            let hex_buffer = [0x3E];
680
681            let result = decoder.decode(&hex_buffer);
682
683            assert_eq!(
684                HuffmanDecoderError::InvalidPadding,
685                match result {
686                    Err(e) => e,
687                    _ => panic!("Expected `InvalidPadding` error"),
688                }
689            );
690        }
691        {
692            let hex_buffer = [254, 0];
693
694            let result = decoder.decode(&hex_buffer);
695
696            assert_eq!(
697                HuffmanDecoderError::InvalidPadding,
698                match result {
699                    Err(e) => e,
700                    _ => panic!("Expected `InvalidPadding` error"),
701                }
702            );
703        }
704    }
705}