1use std::io::{self, Seek as _};
2
3use bitstream_io::Endianness;
4use bitstream_io::{BigEndian, BitRead, BitReader};
5
6const FIXED_CODE_LENGTHS: [usize; 257] = [
7 3, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
9 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
10 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 9, 9, 10, 10, 10, 10, 10, 10,
11 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
12 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
13 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
14 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
15 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
16 11, 11, 11, 11, 11, 11, 11, 11, 11, 13, 13, 12,
17];
18
19#[derive(thiserror::Error, Debug)]
20pub enum Error {
21 #[error("Invalid huffman tree")]
22 InvalidTree,
23
24 #[error("The input stream ended too soon")]
25 InsufficentSize,
26
27 #[error("Invalid stream")]
28 InvalidStream,
29
30 #[error(transparent)]
31 Io(#[from] io::Error),
32}
33
34impl From<Error> for io::Error {
35 fn from(val: Error) -> Self {
36 match val {
37 Error::Io(error) => error,
38 me => io::Error::other(me),
39 }
40 }
41}
42
43pub struct HuffmanReader<R: io::Read + io::Seek> {
44 inner: bitstream_io::BitReader<R, BigEndian>,
45 uncompressed_size: u64,
46 offset: u64,
47
48 tree: FixedHuffmanDecoder,
49 translations: [u8; 257],
50
51 current_block_size: usize,
52 current_block_offset: usize,
53
54 packbits_operation: packbits_rle::Operation,
55 packbits_cursor: io::Cursor<Vec<u8>>,
56}
57
58impl<R: io::Read + io::Seek> HuffmanReader<R> {
59 pub fn try_from(inner: R, uncompressed_size: u64) -> Result<Self, Error> {
60 let inner = bitstream_io::BitReader::new(inner);
61 let tree = Self::build_decoder();
62
63 Ok(Self {
64 inner,
65 uncompressed_size,
66 offset: 0,
67 tree,
68 translations: [0u8; 257],
69
70 current_block_size: 0,
71 current_block_offset: 0,
72
73 packbits_operation: Default::default(),
74 packbits_cursor: io::Cursor::new(Vec::new()),
75 })
76 }
77
78 fn build_decoder() -> FixedHuffmanDecoder {
79 let mut tree = FixedHuffmanDecoder::new();
80
81 let mut last_length = 0;
82 let mut last_code = 0;
83
84 FIXED_CODE_LENGTHS
85 .iter()
86 .enumerate()
87 .for_each(|(i, length)| {
88 let mut thiscode;
89
90 if last_length == 0 {
91 thiscode = 0;
92 } else if *length < last_length {
93 thiscode = (last_code >> (last_length - length)) + 1;
94 } else {
95 thiscode = last_code + 1;
96 if *length > last_length {
97 thiscode <<= length - last_length;
98 }
99 }
100
101 last_length = *length;
102 last_code = thiscode;
103
104 tree.add_value(i as i32, thiscode as u32, *length, *length)
105 .unwrap();
106 });
107
108 tree.make_table(false);
109 tree
110 }
111
112 pub fn open_block(&mut self) -> io::Result<()> {
113 assert!(
114 self.inner.position_in_bits()? % 8 == 0,
115 "Should be opening at byte boundary",
116 );
117 let mut block_size: i32 = self.inner.read_var(32)?;
118 if block_size > 0 {
119 if block_size < 4 {
120 return Err(Error::InvalidStream)?;
121 }
122 block_size -= 4;
123
124 let uncompressed_block_size: u32 = self.inner.read_var(32)?;
125 self.current_block_offset = 0;
126 self.current_block_size = uncompressed_block_size as usize;
127
128 let bytes_read = self.read_huffman_symbols_for_block()?;
129 let mut chunk = vec![0u8; block_size as usize - bytes_read - 4];
130 self.inner.read_bytes(&mut chunk)?;
131 let chunk_reader = io::Cursor::new(chunk);
132 let mut bitstream: bitstream_io::BitReader<_, BigEndian> =
133 bitstream_io::BitReader::new(chunk_reader);
134 let mut buffer = vec![0u8; self.current_block_size + 1];
135 for (idx, byte) in buffer.iter_mut().enumerate() {
136 match self.tree.next_symbol(&mut bitstream)? {
137 v if (0..256).contains(&v) => {
138 self.current_block_offset += 1;
139 *byte = self.translations[v as usize];
140 }
141 _ => {
142 log::info!(
143 "Stop code at byte {idx} (expected {})",
144 self.current_block_size
145 );
146 buffer.truncate(idx);
147 break;
148 }
149 }
150 }
151 self.packbits_cursor = io::Cursor::new(buffer);
152
153 Ok(())
154 } else {
155 self.current_block_offset = 0;
157 self.current_block_size = (-block_size) as usize;
158 if self.current_block_size < 4 {
159 return Err(Error::InvalidStream)?;
160 }
161 self.current_block_size -= 4;
162
163 let mut buffer = vec![0u8; self.current_block_size];
164 self.inner.read_bytes(&mut buffer)?;
165 self.packbits_cursor = io::Cursor::new(buffer);
166
167 Ok(())
168 }
169 }
170
171 fn next_byte(&mut self) -> io::Result<Option<u8>> {
172 if self.stream_position()? >= self.stream_len()? {
173 return Ok(None);
174 }
175
176 match self.packbits_operation.advance(&mut self.packbits_cursor) {
177 Ok((byte, next_state)) => {
178 self.packbits_operation = next_state;
179 Ok(Some(byte))
180 }
181 Err(packbits_rle::OperationError::InsufficientInput(command)) => {
182 self.open_block()?;
183 let (byte, next_state) = command.execute(&mut self.packbits_cursor)?;
184 self.packbits_operation = next_state;
185 Ok(Some(byte))
186 }
187 Err(packbits_rle::OperationError::UnexpectedEof) => {
188 self.open_block()?;
189 let (byte, next_state) =
190 packbits_rle::Operation::default().advance(&mut self.packbits_cursor)?;
191 self.packbits_operation = next_state;
192 Ok(Some(byte))
193 }
194 Err(e) => Err(e)?,
195 }
196 }
197
198 fn read_huffman_symbols_for_block(&mut self) -> io::Result<usize> {
199 assert!(
200 self.inner.position_in_bits()? % 8 == 0,
201 "Should be reading symbols at byte boundary",
202 );
203 let symbol_count: u16 = self.inner.read_var(16)?;
204 if symbol_count >= 256 {
205 return Err(Error::InvalidStream)?;
206 }
207
208 self.inner
212 .read_bytes(&mut self.translations[0..(symbol_count as usize)])?;
213
214 Ok(symbol_count as usize + 2)
215 }
216
217 pub fn into_inner(self) -> R {
218 self.inner.into_reader()
219 }
220}
221
222impl<R: io::Read + io::Seek> io::Read for HuffmanReader<R> {
223 #[inline]
224 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
225 for (idx, byte) in buf.iter_mut().enumerate() {
226 match self.next_byte()? {
227 Some(value) => {
228 self.offset += 1;
229 *byte = value;
230 }
231 None => return Ok(idx),
232 }
233 }
234
235 Ok(buf.len())
236 }
237}
238
239impl<R: io::Read + io::Seek> io::Seek for HuffmanReader<R> {
240 fn seek(&mut self, _: io::SeekFrom) -> io::Result<u64> {
241 todo!()
242 }
243
244 #[inline]
245 fn stream_len(&mut self) -> io::Result<u64> {
246 Ok(self.uncompressed_size)
247 }
248
249 #[inline]
250 fn stream_position(&mut self) -> io::Result<u64> {
251 Ok(self.offset)
252 }
253}
254
255#[derive(thiserror::Error, Debug)]
256pub enum InitializationError {
257 #[error("Prefix already exists")]
258 DuplicatedPrefix,
259 #[error("Invalid repeat position")]
260 InvalidRepeatPosition,
261 #[error("Invalid repeating code")]
262 InvaildRepeatingCode,
263}
264
265impl From<InitializationError> for io::Error {
266 fn from(value: InitializationError) -> Self {
267 io::Error::other(value)
268 }
269}
270
271#[derive(thiserror::Error, Debug)]
272pub enum DecompressionError {
273 #[error("Invalid prefix code when getting next symbol [length]")]
274 InvalidPrefixCodeLength,
275 #[error("Invalid prefix code when getting next symbol [code]")]
276 InvalidPrefixCodeCode,
277}
278
279impl From<DecompressionError> for io::Error {
280 fn from(value: DecompressionError) -> Self {
281 io::Error::other(value)
282 }
283}
284
285#[derive(Clone, Debug)]
286struct HuffmanTreeNode {
287 left: i32,
288 right: i32,
289}
290
291#[derive(Default, Clone)]
292struct HuffmanTableRow {
293 length: i32,
294 value: i32,
295}
296
297#[derive(Clone)]
298pub struct FixedHuffmanDecoder {
299 min_length: usize,
300 max_length: usize,
301
302 tree: Vec<HuffmanTreeNode>,
303
304 table: Option<Vec<HuffmanTableRow>>,
305 table_size: usize,
306}
307
308impl Default for FixedHuffmanDecoder {
309 fn default() -> Self {
310 Self::new()
311 }
312}
313
314impl FixedHuffmanDecoder {
315 pub fn new() -> Self {
316 let mut me = Self {
317 min_length: usize::MAX,
318 max_length: usize::MIN,
319 tree: Vec::new(),
320 table: None,
321 table_size: 0,
322 };
323 me.new_node();
324 me
325 }
326
327 pub fn initialize(
328 lengths: &[isize],
329 max_code_length: isize,
330 zeros: bool,
331 ) -> Result<Self, InitializationError> {
332 let mut me = Self::new();
333 let mut code = 0;
334 let mut unhandled_symbols = lengths.len();
335
336 for length in 1..=max_code_length {
337 for (i, cur_len) in lengths.iter().enumerate() {
338 if *cur_len != length {
339 continue;
340 }
341
342 me.add_value(
343 i as i32,
344 if zeros { code } else { !code },
345 length as usize,
346 length as usize,
347 )?;
348
349 code += 1;
350
351 unhandled_symbols -= 1;
352 if unhandled_symbols == 0 {
353 break;
354 }
355 }
356
357 code <<= 1;
358 }
359
360 Ok(me)
361 }
362
363 pub fn add_value(
364 &mut self,
365 value: i32,
366 code: u32,
367 length: usize,
368 repeat_pos: usize,
369 ) -> Result<(), InitializationError> {
370 self.max_length = self.max_length.max(length);
371 self.min_length = self.min_length.min(length);
372
373 let repeat_pos = length as isize - 1 - repeat_pos as isize;
374 let mut last_node = 0;
375
376 let codest = (((repeat_pos - 1) as u32) >> 1) & 3;
377 if repeat_pos == 0 || (repeat_pos >= 0 && (codest == 0 || codest == 3)) {
378 return Err(InitializationError::InvalidRepeatPosition);
379 }
380
381 let mut bitpos = length as isize - 1;
382 loop {
383 if bitpos < 0 {
384 break;
385 }
386
387 let bit = ((code >> bitpos) & 1) != 0;
388 if self.is_leaf_node(last_node) {
389 return Err(InitializationError::DuplicatedPrefix);
390 };
391
392 if bitpos == repeat_pos {
393 if !self.is_open_branch(last_node, bit) {
394 return Err(InitializationError::InvaildRepeatingCode);
395 };
396
397 let repeat_node = self.new_node();
398 let next_node = self.new_node();
399
400 self.set_branch(last_node, bit, repeat_node);
401 self.set_branch(repeat_node, bit, repeat_node);
402 self.set_branch(repeat_node, !bit, next_node);
403
404 last_node = next_node;
405
406 bitpos += 1;
407 } else {
408 if self.is_open_branch(last_node, bit) {
409 let new_node = self.new_node();
410 self.set_branch(last_node, bit, new_node);
411 }
412 last_node = self.branch(last_node, bit);
413 }
414
415 bitpos -= 1;
416 }
417
418 if !self.is_empty_node(last_node) {
419 return Err(InitializationError::DuplicatedPrefix);
420 }
421
422 self.set_leaf_value(last_node, value);
423
424 Ok(())
425 }
426
427 pub(crate) fn new_node(&mut self) -> i32 {
428 self.tree.push(HuffmanTreeNode {
429 left: -1,
430 right: -2,
431 });
432 self.tree.len() as i32 - 1
433 }
434
435 pub(crate) fn set_leaf_value(&mut self, node: i32, value: i32) {
436 self.set_branch(node, false, value);
437 self.set_branch(node, true, value);
438 }
439
440 fn leaf_value(&self, node: i32) -> i32 {
441 assert!(self.branch(node, false) == self.branch(node, true));
442 self.branch(node, false)
443 }
444
445 fn is_empty_node(&self, node: i32) -> bool {
446 self.branch(node, false) == -1 && self.branch(node, true) == -2
447 }
448
449 fn is_leaf_node(&self, node: i32) -> bool {
450 self.branch(node, false) == self.branch(node, true)
451 }
452
453 fn is_open_branch(&self, node: i32, right_branch: bool) -> bool {
454 if right_branch {
455 self.tree[node as usize].right < 0
456 } else {
457 self.tree[node as usize].left < 0
458 }
459 }
460
461 pub(crate) fn set_branch(&mut self, node: i32, right_branch: bool, value: i32) {
462 if right_branch {
463 self.tree[node as usize].right = value;
464 } else {
465 self.tree[node as usize].left = value;
466 }
467 }
468
469 fn branch(&self, node: i32, right_branch: bool) -> i32 {
470 if right_branch {
471 self.tree[node as usize].right
472 } else {
473 self.tree[node as usize].left
474 }
475 }
476
477 fn is_invalid_node(&self, node: i32) -> bool {
478 node < 0
479 }
480
481 pub fn add_value_lf(
482 &mut self,
483 value: i32,
484 code: u32,
485 length: usize,
486 repeat_pos: usize,
487 ) -> Result<(), InitializationError> {
488 self.add_value(value, reverse_n(code, length), length, repeat_pos)
489 }
490
491 #[inline]
492 pub fn next_symbol<R: io::Read + io::Seek, E: Endianness>(
493 &mut self,
494 input: &mut bitstream_io::BitReader<R, E>,
495 ) -> io::Result<i32> {
496 let Some(_) = self.table.as_ref() else {
497 panic!("Huffman: search table not built");
498 };
499
500 let mut node = 0;
501 loop {
502 if self.is_leaf_node(node) {
503 break;
504 }
505 let bit = input.read_bit()?;
506 if self.is_open_branch(node, bit) {
507 return Err(DecompressionError::InvalidPrefixCodeCode)?;
508 }
509 node = self.branch(node, bit);
510 }
511
512 Ok(self.leaf_value(node))
513 }
514
515 fn make_table_recursive_le(&mut self, node: i32, table: &mut [HuffmanTableRow], depth: i32) {
516 let curr_table_size = 1 << (self.table_size as i32 - depth);
517 let curr_stride = 1 << depth;
518
519 if self.is_invalid_node(node) {
520 for i in 0..curr_table_size {
521 table[i * curr_stride].length = -1;
522 }
523 } else if self.is_leaf_node(node) {
524 for i in 0..curr_table_size {
525 table[i * curr_stride].length = depth;
526 table[i * curr_stride].value = self.leaf_value(node);
527 }
528 } else if depth == self.table_size as i32 {
529 table[0].length = self.table_size as i32 + 1;
530 table[0].value = node;
531 } else {
532 self.make_table_recursive_le(self.branch(node, false), table, depth + 1);
533 let size = table.len();
534 self.make_table_recursive_le(
535 self.branch(node, true),
536 &mut table[curr_stride..size],
537 depth + 1,
538 );
539 }
540 }
541
542 fn make_table_recursive_be(&mut self, node: i32, table: &mut [HuffmanTableRow], depth: i32) {
543 let curr_table_size = 1 << (self.table_size as i32 - depth);
544
545 if self.is_invalid_node(node) {
546 table
547 .iter_mut()
548 .take(curr_table_size)
549 .for_each(|i| i.length = -1);
550 } else if self.is_leaf_node(node) {
551 table.iter_mut().take(curr_table_size).for_each(|i| {
552 i.length = depth;
553 i.value = self.leaf_value(node);
554 });
555 } else if depth == self.table_size as i32 {
556 table[0].length = self.table_size as i32 + 1;
557 table[0].value = node;
558 } else {
559 self.make_table_recursive_be(self.branch(node, false), table, depth + 1);
560 let size = table.len();
561 self.make_table_recursive_be(
562 self.branch(node, true),
563 &mut table[(curr_table_size / 2)..size],
564 depth + 1,
565 );
566 }
567 }
568
569 pub fn make_table(&mut self, little_endian: bool) {
570 self.table_size = if self.max_length < self.min_length || self.max_length >= 10 {
571 10
572 } else {
573 self.max_length
574 };
575
576 let mut table = vec![Default::default(); 1 << self.table_size];
577 if little_endian {
578 self.make_table_recursive_le(0, &mut table, 0)
579 } else {
580 self.make_table_recursive_be(0, &mut table, 0)
581 }
582 self.table = Some(table);
583 }
584}
585
586#[inline]
587fn reverse(value: u32) -> u32 {
588 let mut val = value;
589 val = ((val >> 1) & 0x55555555) | ((val & 0x55555555) << 1);
590 val = ((val >> 2) & 0x33333333) | ((val & 0x33333333) << 2);
591 val = ((val >> 4) & 0x0F0F0F0F) | ((val & 0x0F0F0F0F) << 4);
592 val = ((val >> 8) & 0x00FF00FF) | ((val & 0x00FF00FF) << 8);
593 val.rotate_left(16)
594}
595
596#[inline]
597fn reverse_n(value: u32, length: usize) -> u32 {
598 reverse(value) >> (32 - length)
599}
600
601pub trait ReadWord {
602 fn peek_word(&mut self, bits: u32) -> io::Result<u32>;
603 fn read_word(&mut self, bits: u32) -> io::Result<u32>;
604}
605
606impl<R: io::Read + io::Seek, E: Endianness> ReadWord for BitReader<R, E> {
607 fn peek_word(&mut self, bits: u32) -> io::Result<u32> {
608 let start = self.position_in_bits().unwrap();
609 match self.read_var::<u32>(bits) {
610 Ok(result) => {
611 self.seek_bits(io::SeekFrom::Current(-(bits as i64)))?;
612 Ok(result)
613 }
614 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
615 let end = self.seek_bits(io::SeekFrom::End(0)).unwrap();
616 self.seek_bits(io::SeekFrom::Start(start)).unwrap();
617
618 if start < end {
619 let available_bits = end - start;
620 let result = self.read_var::<u32>(available_bits as u32)?
621 << (bits as u64 - available_bits);
622 self.seek_bits(io::SeekFrom::Current(-(available_bits as i64)))?;
623 return Ok(result);
624 }
625 Err(e)
626 }
627 Err(e) => Err(e),
628 }
629 }
630
631 fn read_word(&mut self, bits: u32) -> io::Result<u32> {
632 let start = self.position_in_bits().unwrap();
633
634 match self.read_var::<u32>(bits) {
635 Ok(result) => Ok(result),
636 Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
637 let end = self.seek_bits(io::SeekFrom::End(0)).unwrap();
638 if start < end {
639 let available_bits = end - start;
640 self.seek_bits(io::SeekFrom::Start(start)).unwrap();
641 return Ok(self.read_var::<u32>(available_bits as u32)?
642 << (bits as u64 - available_bits));
643 }
644
645 Err(e)
646 }
647 Err(e) => Err(e),
648 }
649 }
650}
651
652#[cfg(test)]
653mod test {
654 use super::*;
655
656 #[test]
657 fn successful_initialization() {
658 assert!(
659 FixedHuffmanDecoder::initialize(
660 &[
661 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7,
662 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
663 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
664 ],
665 8,
666 true,
667 )
668 .is_ok()
669 )
670 }
671}