1use std::collections::HashMap;
6
7enum HuffmanCodeSymbol {
10 Symbol(u8),
12 EndOfString,
14}
15
16impl HuffmanCodeSymbol {
17 pub fn new(symbol: usize) -> HuffmanCodeSymbol {
18 if symbol == 256 {
19 HuffmanCodeSymbol::EndOfString
20 } else {
21 HuffmanCodeSymbol::Symbol(symbol as u8)
24 }
25 }
26}
27
28#[derive(PartialEq, Copy, Clone, Debug)]
30pub enum HuffmanDecoderError {
31 PaddingTooLarge,
33 InvalidPadding,
36 EOSInString,
38}
39
40pub type HuffmanDecoderResult = Result<Vec<u8>, HuffmanDecoderError>;
43
44pub struct HuffmanDecoder {
46 table: HashMap<u8, HashMap<u32, HuffmanCodeSymbol>>,
47 eos_codepoint: (u32, u8),
50}
51
52impl HuffmanDecoder {
53 fn from_table(table: &[(u32, u8)]) -> HuffmanDecoder {
56 if table.len() != 257 {
57 panic!("Invalid Huffman code table. It must define exactly 257 symbols.");
58 }
59
60 let mut decoder_table: HashMap<u8, HashMap<u32, HuffmanCodeSymbol>> = HashMap::new();
61 let mut eos_codepoint: Option<(u32, u8)> = None;
62
63 for (symbol, &(code, code_len)) in table.iter().enumerate() {
64 decoder_table.entry(code_len).or_default();
65 let subtable = decoder_table.get_mut(&code_len).unwrap();
66 let huff_symbol = HuffmanCodeSymbol::new(symbol);
67 if let HuffmanCodeSymbol::EndOfString = huff_symbol {
68 eos_codepoint = Some((code, code_len));
71 };
72 subtable.insert(code, huff_symbol);
73 }
74
75 HuffmanDecoder {
76 table: decoder_table,
77 eos_codepoint: eos_codepoint.unwrap(),
78 }
79 }
80
81 pub fn new() -> HuffmanDecoder {
84 HuffmanDecoder::from_table(HUFFMAN_CODE_TABLE)
85 }
86
87 pub fn decode(&mut self, buf: &[u8]) -> HuffmanDecoderResult {
93 let mut current: u32 = 0;
94 let mut current_len: u8 = 0;
95 let mut result: Vec<u8> = Vec::new();
96
97 for b in BitIterator::new(buf.iter()) {
98 current_len += 1;
99 current <<= 1;
100 if b {
101 current |= 1;
102 }
103
104 if self.table.contains_key(¤t_len) {
105 let length_table = self.table.get(¤t_len).unwrap();
106 if length_table.contains_key(¤t) {
107 let decoded_symbol = match length_table.get(¤t).unwrap() {
108 HuffmanCodeSymbol::Symbol(symbol) => *symbol,
109 HuffmanCodeSymbol::EndOfString => {
110 return Err(HuffmanDecoderError::EOSInString);
113 }
114 };
115 result.push(decoded_symbol);
116 current = 0;
117 current_len = 0;
118 }
119 }
120 }
121
122 if current_len > 7 {
129 return Err(HuffmanDecoderError::PaddingTooLarge);
130 }
131
132 let right_align_current = if current_len == 0 {
137 0
138 } else {
139 current << (32 - current_len)
140 };
141 let right_align_eos = self.eos_codepoint.0 << (32 - self.eos_codepoint.1);
142 let mask = if current_len == 0 {
146 0
147 } else {
148 ((1 << current_len) - 1) << (32 - current_len)
149 };
150 let eos_mask = right_align_eos & mask;
152
153 if eos_mask != right_align_current {
154 return Err(HuffmanDecoderError::InvalidPadding);
155 }
156
157 Ok(result)
158 }
159}
160
161struct BitIterator<'a, I: Iterator> {
169 buffer_iterator: I,
170 current_byte: Option<&'a u8>,
171 pos: u8,
173}
174
175impl<'a, I: Iterator> BitIterator<'a, I>
176where
177 I: Iterator<Item = &'a u8>,
178{
179 pub fn new(iterator: I) -> BitIterator<'a, I> {
180 BitIterator::<'a, I> {
181 buffer_iterator: iterator,
182 current_byte: None,
183 pos: 7,
184 }
185 }
186}
187
188impl<'a, I> Iterator for BitIterator<'a, I>
189where
190 I: Iterator<Item = &'a u8>,
191{
192 type Item = bool;
193
194 fn next(&mut self) -> Option<bool> {
195 if self.current_byte.is_none() {
196 self.current_byte = self.buffer_iterator.next();
197 self.pos = 7;
198 }
199
200 self.current_byte?;
202
203 let b = *self.current_byte.unwrap();
204
205 let is_set = (b & (1 << self.pos)) == (1 << self.pos);
206 if self.pos == 0 {
207 self.current_byte = None;
210 } else {
211 self.pos -= 1;
213 }
214
215 Some(is_set)
216 }
217}
218
219static HUFFMAN_CODE_TABLE: &[(u32, u8)] = &[
220 (0x1ff8, 13),
221 (0x7fffd8, 23),
222 (0xfffffe2, 28),
223 (0xfffffe3, 28),
224 (0xfffffe4, 28),
225 (0xfffffe5, 28),
226 (0xfffffe6, 28),
227 (0xfffffe7, 28),
228 (0xfffffe8, 28),
229 (0xffffea, 24),
230 (0x3ffffffc, 30),
231 (0xfffffe9, 28),
232 (0xfffffea, 28),
233 (0x3ffffffd, 30),
234 (0xfffffeb, 28),
235 (0xfffffec, 28),
236 (0xfffffed, 28),
237 (0xfffffee, 28),
238 (0xfffffef, 28),
239 (0xffffff0, 28),
240 (0xffffff1, 28),
241 (0xffffff2, 28),
242 (0x3ffffffe, 30),
243 (0xffffff3, 28),
244 (0xffffff4, 28),
245 (0xffffff5, 28),
246 (0xffffff6, 28),
247 (0xffffff7, 28),
248 (0xffffff8, 28),
249 (0xffffff9, 28),
250 (0xffffffa, 28),
251 (0xffffffb, 28),
252 (0x14, 6),
253 (0x3f8, 10),
254 (0x3f9, 10),
255 (0xffa, 12),
256 (0x1ff9, 13),
257 (0x15, 6),
258 (0xf8, 8),
259 (0x7fa, 11),
260 (0x3fa, 10),
261 (0x3fb, 10),
262 (0xf9, 8),
263 (0x7fb, 11),
264 (0xfa, 8),
265 (0x16, 6),
266 (0x17, 6),
267 (0x18, 6),
268 (0x0, 5),
269 (0x1, 5),
270 (0x2, 5),
271 (0x19, 6),
272 (0x1a, 6),
273 (0x1b, 6),
274 (0x1c, 6),
275 (0x1d, 6),
276 (0x1e, 6),
277 (0x1f, 6),
278 (0x5c, 7),
279 (0xfb, 8),
280 (0x7ffc, 15),
281 (0x20, 6),
282 (0xffb, 12),
283 (0x3fc, 10),
284 (0x1ffa, 13),
285 (0x21, 6),
286 (0x5d, 7),
287 (0x5e, 7),
288 (0x5f, 7),
289 (0x60, 7),
290 (0x61, 7),
291 (0x62, 7),
292 (0x63, 7),
293 (0x64, 7),
294 (0x65, 7),
295 (0x66, 7),
296 (0x67, 7),
297 (0x68, 7),
298 (0x69, 7),
299 (0x6a, 7),
300 (0x6b, 7),
301 (0x6c, 7),
302 (0x6d, 7),
303 (0x6e, 7),
304 (0x6f, 7),
305 (0x70, 7),
306 (0x71, 7),
307 (0x72, 7),
308 (0xfc, 8),
309 (0x73, 7),
310 (0xfd, 8),
311 (0x1ffb, 13),
312 (0x7fff0, 19),
313 (0x1ffc, 13),
314 (0x3ffc, 14),
315 (0x22, 6),
316 (0x7ffd, 15),
317 (0x3, 5),
318 (0x23, 6),
319 (0x4, 5),
320 (0x24, 6),
321 (0x5, 5),
322 (0x25, 6),
323 (0x26, 6),
324 (0x27, 6),
325 (0x6, 5),
326 (0x74, 7),
327 (0x75, 7),
328 (0x28, 6),
329 (0x29, 6),
330 (0x2a, 6),
331 (0x7, 5),
332 (0x2b, 6),
333 (0x76, 7),
334 (0x2c, 6),
335 (0x8, 5),
336 (0x9, 5),
337 (0x2d, 6),
338 (0x77, 7),
339 (0x78, 7),
340 (0x79, 7),
341 (0x7a, 7),
342 (0x7b, 7),
343 (0x7ffe, 15),
344 (0x7fc, 11),
345 (0x3ffd, 14),
346 (0x1ffd, 13),
347 (0xffffffc, 28),
348 (0xfffe6, 20),
349 (0x3fffd2, 22),
350 (0xfffe7, 20),
351 (0xfffe8, 20),
352 (0x3fffd3, 22),
353 (0x3fffd4, 22),
354 (0x3fffd5, 22),
355 (0x7fffd9, 23),
356 (0x3fffd6, 22),
357 (0x7fffda, 23),
358 (0x7fffdb, 23),
359 (0x7fffdc, 23),
360 (0x7fffdd, 23),
361 (0x7fffde, 23),
362 (0xffffeb, 24),
363 (0x7fffdf, 23),
364 (0xffffec, 24),
365 (0xffffed, 24),
366 (0x3fffd7, 22),
367 (0x7fffe0, 23),
368 (0xffffee, 24),
369 (0x7fffe1, 23),
370 (0x7fffe2, 23),
371 (0x7fffe3, 23),
372 (0x7fffe4, 23),
373 (0x1fffdc, 21),
374 (0x3fffd8, 22),
375 (0x7fffe5, 23),
376 (0x3fffd9, 22),
377 (0x7fffe6, 23),
378 (0x7fffe7, 23),
379 (0xffffef, 24),
380 (0x3fffda, 22),
381 (0x1fffdd, 21),
382 (0xfffe9, 20),
383 (0x3fffdb, 22),
384 (0x3fffdc, 22),
385 (0x7fffe8, 23),
386 (0x7fffe9, 23),
387 (0x1fffde, 21),
388 (0x7fffea, 23),
389 (0x3fffdd, 22),
390 (0x3fffde, 22),
391 (0xfffff0, 24),
392 (0x1fffdf, 21),
393 (0x3fffdf, 22),
394 (0x7fffeb, 23),
395 (0x7fffec, 23),
396 (0x1fffe0, 21),
397 (0x1fffe1, 21),
398 (0x3fffe0, 22),
399 (0x1fffe2, 21),
400 (0x7fffed, 23),
401 (0x3fffe1, 22),
402 (0x7fffee, 23),
403 (0x7fffef, 23),
404 (0xfffea, 20),
405 (0x3fffe2, 22),
406 (0x3fffe3, 22),
407 (0x3fffe4, 22),
408 (0x7ffff0, 23),
409 (0x3fffe5, 22),
410 (0x3fffe6, 22),
411 (0x7ffff1, 23),
412 (0x3ffffe0, 26),
413 (0x3ffffe1, 26),
414 (0xfffeb, 20),
415 (0x7fff1, 19),
416 (0x3fffe7, 22),
417 (0x7ffff2, 23),
418 (0x3fffe8, 22),
419 (0x1ffffec, 25),
420 (0x3ffffe2, 26),
421 (0x3ffffe3, 26),
422 (0x3ffffe4, 26),
423 (0x7ffffde, 27),
424 (0x7ffffdf, 27),
425 (0x3ffffe5, 26),
426 (0xfffff1, 24),
427 (0x1ffffed, 25),
428 (0x7fff2, 19),
429 (0x1fffe3, 21),
430 (0x3ffffe6, 26),
431 (0x7ffffe0, 27),
432 (0x7ffffe1, 27),
433 (0x3ffffe7, 26),
434 (0x7ffffe2, 27),
435 (0xfffff2, 24),
436 (0x1fffe4, 21),
437 (0x1fffe5, 21),
438 (0x3ffffe8, 26),
439 (0x3ffffe9, 26),
440 (0xffffffd, 28),
441 (0x7ffffe3, 27),
442 (0x7ffffe4, 27),
443 (0x7ffffe5, 27),
444 (0xfffec, 20),
445 (0xfffff3, 24),
446 (0xfffed, 20),
447 (0x1fffe6, 21),
448 (0x3fffe9, 22),
449 (0x1fffe7, 21),
450 (0x1fffe8, 21),
451 (0x7ffff3, 23),
452 (0x3fffea, 22),
453 (0x3fffeb, 22),
454 (0x1ffffee, 25),
455 (0x1ffffef, 25),
456 (0xfffff4, 24),
457 (0xfffff5, 24),
458 (0x3ffffea, 26),
459 (0x7ffff4, 23),
460 (0x3ffffeb, 26),
461 (0x7ffffe6, 27),
462 (0x3ffffec, 26),
463 (0x3ffffed, 26),
464 (0x7ffffe7, 27),
465 (0x7ffffe8, 27),
466 (0x7ffffe9, 27),
467 (0x7ffffea, 27),
468 (0x7ffffeb, 27),
469 (0xffffffe, 28),
470 (0x7ffffec, 27),
471 (0x7ffffed, 27),
472 (0x7ffffee, 27),
473 (0x7ffffef, 27),
474 (0x7fffff0, 27),
475 (0x3ffffee, 26),
476 (0x3fffffff, 30),
477];
478
479#[cfg(test)]
480mod tests {
481 use super::BitIterator;
482 use super::HuffmanDecoder;
483 use super::HuffmanDecoderError;
484
485 fn to_expected_bit_result(numbers: &[u8]) -> Vec<bool> {
488 numbers.iter().map(|b| -> bool { *b == 1 }).collect()
489 }
490
491 #[test]
492 fn test_bit_iterator_single_byte() {
493 let expected_result = to_expected_bit_result(&[0, 0, 0, 0, 1, 0, 1, 0]);
494
495 let mut res: Vec<bool> = Vec::new();
496 for b in BitIterator::new([10u8].iter()) {
497 res.push(b);
498 }
499
500 assert_eq!(res, expected_result);
501 }
502
503 #[test]
504 fn test_bit_iterator_multiple_bytes() {
505 let expected_result = to_expected_bit_result(&[
506 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0,
508 ]);
509
510 let mut res: Vec<bool> = Vec::new();
511 for b in BitIterator::new([10u8, 255, 128, 1, 0, 170].iter()) {
512 res.push(b);
513 }
514
515 assert_eq!(res, expected_result);
516 }
517
518 #[test]
521 fn test_huffman_code_single_byte() {
522 let mut decoder = HuffmanDecoder::new();
523 {
526 let hex_buffer = [(0x7 << 3) + 7];
529 let expected_result = vec![b'o'];
530
531 let result = decoder.decode(&hex_buffer).ok().unwrap();
532
533 assert_eq!(result, expected_result);
534 }
535 {
536 #[allow(clippy::identity_op)] let hex_buffer = [0x0 + 7];
538 let expected_result = vec![b'0'];
539
540 let result = decoder.decode(&hex_buffer).ok().unwrap();
541
542 assert_eq!(result, expected_result);
543 }
544 {
545 let hex_buffer = [(0x21 << 2) + 3];
547 let expected_result = vec![b'A'];
548
549 let result = decoder.decode(&hex_buffer).ok().unwrap();
550
551 assert_eq!(result, expected_result);
552 }
553 }
554
555 #[test]
558 fn test_huffman_code_single_char_multiple_byte() {
559 let mut decoder = HuffmanDecoder::new();
560 {
563 let hex_buffer = [255, 160 + 15];
564 let expected_result = vec![b'#'];
565
566 let result = decoder.decode(&hex_buffer).ok().unwrap();
567
568 assert_eq!(result, expected_result);
569 }
570 {
571 let hex_buffer = [255, 200 + 7];
572 let expected_result = vec![b'$'];
573
574 let result = decoder.decode(&hex_buffer).ok().unwrap();
575
576 assert_eq!(result, expected_result);
577 }
578 {
579 let hex_buffer = [255, 255, 255, 240 + 3];
580 let expected_result = vec![10];
581
582 let result = decoder.decode(&hex_buffer).ok().unwrap();
583
584 assert_eq!(result, expected_result);
585 }
586 }
587
588 #[test]
589 fn test_huffman_code_multiple_chars() {
590 let mut decoder = HuffmanDecoder::new();
591 {
592 let hex_buffer = [254, 1];
593 let expected_result = vec![b'!', b'0'];
594
595 let result = decoder.decode(&hex_buffer).ok().unwrap();
596
597 assert_eq!(result, expected_result);
598 }
599 {
600 let hex_buffer = [(0x14 << 2) | 0x3, 248];
601 let expected_result = vec![b' ', b'!'];
602
603 let result = decoder.decode(&hex_buffer).ok().unwrap();
604
605 assert_eq!(result, expected_result);
606 }
607 }
608
609 #[test]
612 fn test_eos_is_error() {
613 let mut decoder = HuffmanDecoder::new();
614 {
615 let hex_buffer = [0xFF, 0xFF, 0xFF, 0xFF];
617
618 let result = decoder.decode(&hex_buffer);
619
620 assert_eq!(
621 HuffmanDecoderError::EOSInString,
622 match result {
623 Err(e) => e,
624 _ => panic!("Expected error due to EOS symbol in string"),
625 }
626 );
627 }
628 {
629 let hex_buffer = [0x3F, 0xFF, 0xFF, 0xFF, 0xFF];
631
632 let result = decoder.decode(&hex_buffer);
633
634 assert_eq!(
635 HuffmanDecoderError::EOSInString,
636 match result {
637 Err(e) => e,
638 _ => panic!("Expected error due to EOS symbol in string"),
639 }
640 );
641 }
642 }
643
644 #[test]
647 fn test_short_padding_okay() {
648 let mut decoder = HuffmanDecoder::new();
649 let hex_buffer = [0x3F];
650
651 let result = decoder.decode(&hex_buffer);
652
653 assert_eq!(b"o".to_vec(), result.ok().unwrap());
654 }
655
656 #[test]
658 fn test_padding_too_long() {
659 let mut decoder = HuffmanDecoder::new();
660 let hex_buffer = [0x3F, 0xFF];
661
662 let result = decoder.decode(&hex_buffer);
663
664 assert_eq!(
665 HuffmanDecoderError::PaddingTooLarge,
666 match result {
667 Err(e) => e,
668 _ => panic!("Expected `PaddingTooLarge` error"),
669 }
670 );
671 }
672
673 #[test]
676 fn test_padding_invalid() {
677 let mut decoder = HuffmanDecoder::new();
678 {
679 let hex_buffer = [0x3E];
680
681 let result = decoder.decode(&hex_buffer);
682
683 assert_eq!(
684 HuffmanDecoderError::InvalidPadding,
685 match result {
686 Err(e) => e,
687 _ => panic!("Expected `InvalidPadding` error"),
688 }
689 );
690 }
691 {
692 let hex_buffer = [254, 0];
693
694 let result = decoder.decode(&hex_buffer);
695
696 assert_eq!(
697 HuffmanDecoderError::InvalidPadding,
698 match result {
699 Err(e) => e,
700 _ => panic!("Expected `InvalidPadding` error"),
701 }
702 );
703 }
704 }
705}