1use std::collections::HashMap;
6
7enum HuffmanCodeSymbol {
10 Symbol(u8),
12 EndOfString,
14}
15
16impl HuffmanCodeSymbol {
17 pub fn new(symbol: usize) -> HuffmanCodeSymbol {
18 if symbol == 256 {
19 HuffmanCodeSymbol::EndOfString
20 } else {
21 HuffmanCodeSymbol::Symbol(symbol as u8)
24 }
25 }
26}
27
28#[derive(thiserror::Error, PartialEq, Copy, Clone, Debug)]
29#[non_exhaustive]
30pub enum HuffmanDecoderError {
31 #[error("Padding too large")]
33 PaddingTooLarge,
34 #[error("Invalid padding")]
37 InvalidPadding,
38 #[error("EOS in string")]
40 EOSInString,
41}
42
43pub type HuffmanDecoderResult = Result<Vec<u8>, HuffmanDecoderError>;
46
47pub struct HuffmanDecoder {
49 table: HashMap<u8, HashMap<u32, HuffmanCodeSymbol>>,
50 eos_codepoint: (u32, u8),
53}
54
55impl HuffmanDecoder {
56 fn from_table(table: &[(u32, u8)]) -> HuffmanDecoder {
59 if table.len() != 257 {
60 panic!("Invalid Huffman code table. It must define exactly 257 symbols.");
61 }
62
63 let mut decoder_table: HashMap<u8, HashMap<u32, HuffmanCodeSymbol>> = HashMap::new();
64 let mut eos_codepoint: Option<(u32, u8)> = None;
65
66 for (symbol, &(code, code_len)) in table.iter().enumerate() {
67 decoder_table.entry(code_len).or_default();
68 let subtable = decoder_table.get_mut(&code_len).unwrap();
69 let huff_symbol = HuffmanCodeSymbol::new(symbol);
70 if let HuffmanCodeSymbol::EndOfString = huff_symbol {
71 eos_codepoint = Some((code, code_len));
74 };
75 subtable.insert(code, huff_symbol);
76 }
77
78 HuffmanDecoder {
79 table: decoder_table,
80 eos_codepoint: eos_codepoint.unwrap(),
81 }
82 }
83
84 pub fn new() -> HuffmanDecoder {
87 HuffmanDecoder::from_table(HUFFMAN_CODE_TABLE)
88 }
89
90 pub fn decode(&mut self, buf: &[u8]) -> HuffmanDecoderResult {
96 let mut current: u32 = 0;
97 let mut current_len: u8 = 0;
98 let mut result: Vec<u8> = Vec::new();
99
100 for b in BitIterator::new(buf.iter()) {
101 current_len += 1;
102 current <<= 1;
103 if b {
104 current |= 1;
105 }
106
107 if self.table.contains_key(¤t_len) {
108 let length_table = self.table.get(¤t_len).unwrap();
109 if length_table.contains_key(¤t) {
110 let decoded_symbol = match length_table.get(¤t).unwrap() {
111 HuffmanCodeSymbol::Symbol(symbol) => *symbol,
112 HuffmanCodeSymbol::EndOfString => {
113 return Err(HuffmanDecoderError::EOSInString);
116 }
117 };
118 result.push(decoded_symbol);
119 current = 0;
120 current_len = 0;
121 }
122 }
123 }
124
125 if current_len > 7 {
132 return Err(HuffmanDecoderError::PaddingTooLarge);
133 }
134
135 let right_align_current = if current_len == 0 {
140 0
141 } else {
142 current << (32 - current_len)
143 };
144 let right_align_eos = self.eos_codepoint.0 << (32 - self.eos_codepoint.1);
145 let mask = if current_len == 0 {
149 0
150 } else {
151 ((1 << current_len) - 1) << (32 - current_len)
152 };
153 let eos_mask = right_align_eos & mask;
155
156 if eos_mask != right_align_current {
157 return Err(HuffmanDecoderError::InvalidPadding);
158 }
159
160 Ok(result)
161 }
162}
163
164struct BitIterator<'a, I: Iterator> {
172 buffer_iterator: I,
173 current_byte: Option<&'a u8>,
174 pos: u8,
176}
177
178impl<'a, I: Iterator> BitIterator<'a, I>
179where
180 I: Iterator<Item = &'a u8>,
181{
182 pub fn new(iterator: I) -> BitIterator<'a, I> {
183 BitIterator::<'a, I> {
184 buffer_iterator: iterator,
185 current_byte: None,
186 pos: 7,
187 }
188 }
189}
190
191impl<'a, I> Iterator for BitIterator<'a, I>
192where
193 I: Iterator<Item = &'a u8>,
194{
195 type Item = bool;
196
197 fn next(&mut self) -> Option<bool> {
198 if self.current_byte.is_none() {
199 self.current_byte = self.buffer_iterator.next();
200 self.pos = 7;
201 }
202
203 self.current_byte?;
205
206 let b = *self.current_byte.unwrap();
207
208 let is_set = (b & (1 << self.pos)) == (1 << self.pos);
209 if self.pos == 0 {
210 self.current_byte = None;
213 } else {
214 self.pos -= 1;
216 }
217
218 Some(is_set)
219 }
220}
221
222static HUFFMAN_CODE_TABLE: &[(u32, u8)] = &[
223 (0x1ff8, 13),
224 (0x7fffd8, 23),
225 (0xfffffe2, 28),
226 (0xfffffe3, 28),
227 (0xfffffe4, 28),
228 (0xfffffe5, 28),
229 (0xfffffe6, 28),
230 (0xfffffe7, 28),
231 (0xfffffe8, 28),
232 (0xffffea, 24),
233 (0x3ffffffc, 30),
234 (0xfffffe9, 28),
235 (0xfffffea, 28),
236 (0x3ffffffd, 30),
237 (0xfffffeb, 28),
238 (0xfffffec, 28),
239 (0xfffffed, 28),
240 (0xfffffee, 28),
241 (0xfffffef, 28),
242 (0xffffff0, 28),
243 (0xffffff1, 28),
244 (0xffffff2, 28),
245 (0x3ffffffe, 30),
246 (0xffffff3, 28),
247 (0xffffff4, 28),
248 (0xffffff5, 28),
249 (0xffffff6, 28),
250 (0xffffff7, 28),
251 (0xffffff8, 28),
252 (0xffffff9, 28),
253 (0xffffffa, 28),
254 (0xffffffb, 28),
255 (0x14, 6),
256 (0x3f8, 10),
257 (0x3f9, 10),
258 (0xffa, 12),
259 (0x1ff9, 13),
260 (0x15, 6),
261 (0xf8, 8),
262 (0x7fa, 11),
263 (0x3fa, 10),
264 (0x3fb, 10),
265 (0xf9, 8),
266 (0x7fb, 11),
267 (0xfa, 8),
268 (0x16, 6),
269 (0x17, 6),
270 (0x18, 6),
271 (0x0, 5),
272 (0x1, 5),
273 (0x2, 5),
274 (0x19, 6),
275 (0x1a, 6),
276 (0x1b, 6),
277 (0x1c, 6),
278 (0x1d, 6),
279 (0x1e, 6),
280 (0x1f, 6),
281 (0x5c, 7),
282 (0xfb, 8),
283 (0x7ffc, 15),
284 (0x20, 6),
285 (0xffb, 12),
286 (0x3fc, 10),
287 (0x1ffa, 13),
288 (0x21, 6),
289 (0x5d, 7),
290 (0x5e, 7),
291 (0x5f, 7),
292 (0x60, 7),
293 (0x61, 7),
294 (0x62, 7),
295 (0x63, 7),
296 (0x64, 7),
297 (0x65, 7),
298 (0x66, 7),
299 (0x67, 7),
300 (0x68, 7),
301 (0x69, 7),
302 (0x6a, 7),
303 (0x6b, 7),
304 (0x6c, 7),
305 (0x6d, 7),
306 (0x6e, 7),
307 (0x6f, 7),
308 (0x70, 7),
309 (0x71, 7),
310 (0x72, 7),
311 (0xfc, 8),
312 (0x73, 7),
313 (0xfd, 8),
314 (0x1ffb, 13),
315 (0x7fff0, 19),
316 (0x1ffc, 13),
317 (0x3ffc, 14),
318 (0x22, 6),
319 (0x7ffd, 15),
320 (0x3, 5),
321 (0x23, 6),
322 (0x4, 5),
323 (0x24, 6),
324 (0x5, 5),
325 (0x25, 6),
326 (0x26, 6),
327 (0x27, 6),
328 (0x6, 5),
329 (0x74, 7),
330 (0x75, 7),
331 (0x28, 6),
332 (0x29, 6),
333 (0x2a, 6),
334 (0x7, 5),
335 (0x2b, 6),
336 (0x76, 7),
337 (0x2c, 6),
338 (0x8, 5),
339 (0x9, 5),
340 (0x2d, 6),
341 (0x77, 7),
342 (0x78, 7),
343 (0x79, 7),
344 (0x7a, 7),
345 (0x7b, 7),
346 (0x7ffe, 15),
347 (0x7fc, 11),
348 (0x3ffd, 14),
349 (0x1ffd, 13),
350 (0xffffffc, 28),
351 (0xfffe6, 20),
352 (0x3fffd2, 22),
353 (0xfffe7, 20),
354 (0xfffe8, 20),
355 (0x3fffd3, 22),
356 (0x3fffd4, 22),
357 (0x3fffd5, 22),
358 (0x7fffd9, 23),
359 (0x3fffd6, 22),
360 (0x7fffda, 23),
361 (0x7fffdb, 23),
362 (0x7fffdc, 23),
363 (0x7fffdd, 23),
364 (0x7fffde, 23),
365 (0xffffeb, 24),
366 (0x7fffdf, 23),
367 (0xffffec, 24),
368 (0xffffed, 24),
369 (0x3fffd7, 22),
370 (0x7fffe0, 23),
371 (0xffffee, 24),
372 (0x7fffe1, 23),
373 (0x7fffe2, 23),
374 (0x7fffe3, 23),
375 (0x7fffe4, 23),
376 (0x1fffdc, 21),
377 (0x3fffd8, 22),
378 (0x7fffe5, 23),
379 (0x3fffd9, 22),
380 (0x7fffe6, 23),
381 (0x7fffe7, 23),
382 (0xffffef, 24),
383 (0x3fffda, 22),
384 (0x1fffdd, 21),
385 (0xfffe9, 20),
386 (0x3fffdb, 22),
387 (0x3fffdc, 22),
388 (0x7fffe8, 23),
389 (0x7fffe9, 23),
390 (0x1fffde, 21),
391 (0x7fffea, 23),
392 (0x3fffdd, 22),
393 (0x3fffde, 22),
394 (0xfffff0, 24),
395 (0x1fffdf, 21),
396 (0x3fffdf, 22),
397 (0x7fffeb, 23),
398 (0x7fffec, 23),
399 (0x1fffe0, 21),
400 (0x1fffe1, 21),
401 (0x3fffe0, 22),
402 (0x1fffe2, 21),
403 (0x7fffed, 23),
404 (0x3fffe1, 22),
405 (0x7fffee, 23),
406 (0x7fffef, 23),
407 (0xfffea, 20),
408 (0x3fffe2, 22),
409 (0x3fffe3, 22),
410 (0x3fffe4, 22),
411 (0x7ffff0, 23),
412 (0x3fffe5, 22),
413 (0x3fffe6, 22),
414 (0x7ffff1, 23),
415 (0x3ffffe0, 26),
416 (0x3ffffe1, 26),
417 (0xfffeb, 20),
418 (0x7fff1, 19),
419 (0x3fffe7, 22),
420 (0x7ffff2, 23),
421 (0x3fffe8, 22),
422 (0x1ffffec, 25),
423 (0x3ffffe2, 26),
424 (0x3ffffe3, 26),
425 (0x3ffffe4, 26),
426 (0x7ffffde, 27),
427 (0x7ffffdf, 27),
428 (0x3ffffe5, 26),
429 (0xfffff1, 24),
430 (0x1ffffed, 25),
431 (0x7fff2, 19),
432 (0x1fffe3, 21),
433 (0x3ffffe6, 26),
434 (0x7ffffe0, 27),
435 (0x7ffffe1, 27),
436 (0x3ffffe7, 26),
437 (0x7ffffe2, 27),
438 (0xfffff2, 24),
439 (0x1fffe4, 21),
440 (0x1fffe5, 21),
441 (0x3ffffe8, 26),
442 (0x3ffffe9, 26),
443 (0xffffffd, 28),
444 (0x7ffffe3, 27),
445 (0x7ffffe4, 27),
446 (0x7ffffe5, 27),
447 (0xfffec, 20),
448 (0xfffff3, 24),
449 (0xfffed, 20),
450 (0x1fffe6, 21),
451 (0x3fffe9, 22),
452 (0x1fffe7, 21),
453 (0x1fffe8, 21),
454 (0x7ffff3, 23),
455 (0x3fffea, 22),
456 (0x3fffeb, 22),
457 (0x1ffffee, 25),
458 (0x1ffffef, 25),
459 (0xfffff4, 24),
460 (0xfffff5, 24),
461 (0x3ffffea, 26),
462 (0x7ffff4, 23),
463 (0x3ffffeb, 26),
464 (0x7ffffe6, 27),
465 (0x3ffffec, 26),
466 (0x3ffffed, 26),
467 (0x7ffffe7, 27),
468 (0x7ffffe8, 27),
469 (0x7ffffe9, 27),
470 (0x7ffffea, 27),
471 (0x7ffffeb, 27),
472 (0xffffffe, 28),
473 (0x7ffffec, 27),
474 (0x7ffffed, 27),
475 (0x7ffffee, 27),
476 (0x7ffffef, 27),
477 (0x7fffff0, 27),
478 (0x3ffffee, 26),
479 (0x3fffffff, 30),
480];
481
482#[cfg(test)]
483mod tests {
484 use super::BitIterator;
485 use super::HuffmanDecoder;
486 use super::HuffmanDecoderError;
487
488 fn to_expected_bit_result(numbers: &[u8]) -> Vec<bool> {
491 numbers.iter().map(|b| -> bool { *b == 1 }).collect()
492 }
493
494 #[test]
495 fn test_bit_iterator_single_byte() {
496 let expected_result = to_expected_bit_result(&[0, 0, 0, 0, 1, 0, 1, 0]);
497
498 let mut res: Vec<bool> = Vec::new();
499 for b in BitIterator::new([10u8].iter()) {
500 res.push(b);
501 }
502
503 assert_eq!(res, expected_result);
504 }
505
506 #[test]
507 fn test_bit_iterator_multiple_bytes() {
508 let expected_result = to_expected_bit_result(&[
509 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0,
511 ]);
512
513 let mut res: Vec<bool> = Vec::new();
514 for b in BitIterator::new([10u8, 255, 128, 1, 0, 170].iter()) {
515 res.push(b);
516 }
517
518 assert_eq!(res, expected_result);
519 }
520
521 #[test]
524 fn test_huffman_code_single_byte() {
525 let mut decoder = HuffmanDecoder::new();
526 {
529 let hex_buffer = [(0x7 << 3) + 7];
532 let expected_result = vec![b'o'];
533
534 let result = decoder.decode(&hex_buffer).ok().unwrap();
535
536 assert_eq!(result, expected_result);
537 }
538 {
539 #[allow(clippy::identity_op)] let hex_buffer = [0x0 + 7];
541 let expected_result = vec![b'0'];
542
543 let result = decoder.decode(&hex_buffer).ok().unwrap();
544
545 assert_eq!(result, expected_result);
546 }
547 {
548 let hex_buffer = [(0x21 << 2) + 3];
550 let expected_result = vec![b'A'];
551
552 let result = decoder.decode(&hex_buffer).ok().unwrap();
553
554 assert_eq!(result, expected_result);
555 }
556 }
557
558 #[test]
561 fn test_huffman_code_single_char_multiple_byte() {
562 let mut decoder = HuffmanDecoder::new();
563 {
566 let hex_buffer = [255, 160 + 15];
567 let expected_result = vec![b'#'];
568
569 let result = decoder.decode(&hex_buffer).ok().unwrap();
570
571 assert_eq!(result, expected_result);
572 }
573 {
574 let hex_buffer = [255, 200 + 7];
575 let expected_result = vec![b'$'];
576
577 let result = decoder.decode(&hex_buffer).ok().unwrap();
578
579 assert_eq!(result, expected_result);
580 }
581 {
582 let hex_buffer = [255, 255, 255, 240 + 3];
583 let expected_result = vec![10];
584
585 let result = decoder.decode(&hex_buffer).ok().unwrap();
586
587 assert_eq!(result, expected_result);
588 }
589 }
590
591 #[test]
592 fn test_huffman_code_multiple_chars() {
593 let mut decoder = HuffmanDecoder::new();
594 {
595 let hex_buffer = [254, 1];
596 let expected_result = vec![b'!', b'0'];
597
598 let result = decoder.decode(&hex_buffer).ok().unwrap();
599
600 assert_eq!(result, expected_result);
601 }
602 {
603 let hex_buffer = [(0x14 << 2) | 0x3, 248];
604 let expected_result = vec![b' ', b'!'];
605
606 let result = decoder.decode(&hex_buffer).ok().unwrap();
607
608 assert_eq!(result, expected_result);
609 }
610 }
611
612 #[test]
615 fn test_eos_is_error() {
616 let mut decoder = HuffmanDecoder::new();
617 {
618 let hex_buffer = [0xFF, 0xFF, 0xFF, 0xFF];
620
621 let result = decoder.decode(&hex_buffer);
622
623 assert_eq!(
624 HuffmanDecoderError::EOSInString,
625 match result {
626 Err(e) => e,
627 _ => panic!("Expected error due to EOS symbol in string"),
628 }
629 );
630 }
631 {
632 let hex_buffer = [0x3F, 0xFF, 0xFF, 0xFF, 0xFF];
634
635 let result = decoder.decode(&hex_buffer);
636
637 assert_eq!(
638 HuffmanDecoderError::EOSInString,
639 match result {
640 Err(e) => e,
641 _ => panic!("Expected error due to EOS symbol in string"),
642 }
643 );
644 }
645 }
646
647 #[test]
650 fn test_short_padding_okay() {
651 let mut decoder = HuffmanDecoder::new();
652 let hex_buffer = [0x3F];
653
654 let result = decoder.decode(&hex_buffer);
655
656 assert_eq!(b"o".to_vec(), result.ok().unwrap());
657 }
658
659 #[test]
661 fn test_padding_too_long() {
662 let mut decoder = HuffmanDecoder::new();
663 let hex_buffer = [0x3F, 0xFF];
664
665 let result = decoder.decode(&hex_buffer);
666
667 assert_eq!(
668 HuffmanDecoderError::PaddingTooLarge,
669 match result {
670 Err(e) => e,
671 _ => panic!("Expected `PaddingTooLarge` error"),
672 }
673 );
674 }
675
676 #[test]
679 fn test_padding_invalid() {
680 let mut decoder = HuffmanDecoder::new();
681 {
682 let hex_buffer = [0x3E];
683
684 let result = decoder.decode(&hex_buffer);
685
686 assert_eq!(
687 HuffmanDecoderError::InvalidPadding,
688 match result {
689 Err(e) => e,
690 _ => panic!("Expected `InvalidPadding` error"),
691 }
692 );
693 }
694 {
695 let hex_buffer = [254, 0];
696
697 let result = decoder.decode(&hex_buffer);
698
699 assert_eq!(
700 HuffmanDecoderError::InvalidPadding,
701 match result {
702 Err(e) => e,
703 _ => panic!("Expected `InvalidPadding` error"),
704 }
705 );
706 }
707 }
708}