1#![cfg_attr(test, allow(clippy::logic_bug))]
8
9use super::{
10 bit_reader::BitReader,
11 huffman::HuffmanDecoder,
12 lzss::LzssDecoder,
13 ppm::{PpmModel, RangeCoder},
14 vm::RarVM,
15 DecompressError, Result,
16};
17
18#[allow(dead_code)]
19const MAIN_CODES: usize = 299;
21
22#[allow(dead_code)]
23const DIST_CODES: usize = 60;
25
26#[allow(dead_code)]
27const LOW_DIST_CODES: usize = 17;
29
30#[allow(dead_code)]
31const LEN_CODES: usize = 28;
33
34#[allow(dead_code)]
35const MAX_MATCH_LEN: u32 = 258;
37
38const SHORT_BASES: [u32; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
40
41const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
43
44const LENGTH_BASE: [u32; 28] = [
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
47 160, 192, 224,
48];
49
50const LENGTH_EXTRA: [u8; 28] = [
52 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
53];
54
55const DIST_BASE: [u32; 60] = [
57 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
58 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
59 262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
60 1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872,
61 3670016, 3932160,
62];
63
64const DIST_EXTRA: [u8; 60] = [
66 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
67 13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18,
68 18, 18, 18, 18, 18, 18, 18,
69];
70
71pub struct Rar29Decoder {
73 lzss: LzssDecoder,
75 huffman: HuffmanDecoder,
77 vm: RarVM,
79 ppm: Option<PpmModel>,
81 ppm_coder: Option<RangeCoder>,
83 ppm_esc_char: i32,
85 old_dist: [u32; 4],
87 old_dist_ptr: usize,
89 last_dist: u32,
91 last_len: u32,
93 ppm_mode: bool,
95 tables_read: bool,
97 prev_low_offset: u32,
99 low_offset_repeat_count: u32,
101 next_filter_check: u64,
103}
104
105impl Rar29Decoder {
106 pub fn new() -> Self {
108 Self::with_window_size(0x400000) }
110
111 pub fn with_window_size(window_size: usize) -> Self {
114 Self {
115 lzss: LzssDecoder::new(window_size),
116 huffman: HuffmanDecoder::new(),
117 vm: RarVM::new(),
118 ppm: None,
119 ppm_coder: None,
120 ppm_esc_char: -1,
121 old_dist: [0; 4],
122 old_dist_ptr: 0,
123 last_dist: 0,
124 last_len: 0,
125 ppm_mode: false,
126 tables_read: false,
127 prev_low_offset: 0,
128 low_offset_repeat_count: 0,
129 next_filter_check: u64::MAX,
130 }
131 }
132
133 #[cfg(test)]
135 pub fn get_output(&self) -> Vec<u8> {
136 self.lzss.output().to_vec()
137 }
138
139 pub fn decompress(&mut self, data: &[u8], unpacked_size: u64) -> Result<Vec<u8>> {
142 let mut reader = BitReader::new(data);
143
144 self.lzss.enable_output(unpacked_size as usize);
146
147 if !self.tables_read {
149 self.read_tables(&mut reader)?;
150 }
151
152 while self.lzss.total_written() < unpacked_size {
154 if reader.is_eof() {
155 break;
156 }
157
158 self.decode_block(&mut reader, unpacked_size)?;
159 }
160
161 let total_written = self.lzss.total_written();
163 let window_mask = self.lzss.window_mask() as usize;
164
165 loop {
167 let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
169 Some((idx, pos)) => (idx, pos),
170 None => break,
171 };
172
173 let flushed = self.lzss.flushed_pos();
175 if flushed < next_pos {
176 self.lzss.flush_to_output(next_pos);
177 }
178
179 let window = self.lzss.window();
180 if let Some((_filter_end, filtered_data)) =
181 self.vm
182 .execute_filter_at_index(filter_idx, window, window_mask, total_written)
183 {
184 self.lzss.write_filtered_to_output(filtered_data, next_pos);
186 } else {
187 break;
188 }
189 }
190
191 self.lzss.flush_to_output(total_written);
193
194 Ok(self.lzss.take_output())
196 }
197
198 fn read_tables(&mut self, reader: &mut BitReader) -> Result<()> {
200 #[cfg(test)]
201 {
202 let byte_pos = reader.bit_position() / 8;
203 eprintln!(
204 "read_tables ENTRY: bit_pos={}, byte_pos={}",
205 reader.bit_position(),
206 byte_pos
207 );
208 eprintln!(" raw bytes at pos: {:02x?}", reader.peek_bytes(8));
209 }
210 reader.align_to_byte();
212 #[cfg(test)]
213 {
214 let byte_pos = reader.bit_position() / 8;
215 eprintln!(
216 "read_tables AFTER align: bit_pos={}, byte_pos={}",
217 reader.bit_position(),
218 byte_pos
219 );
220 eprintln!(" raw bytes at pos: {:02x?}", reader.peek_bytes(8));
221 };
222
223 let ppm_flag = reader.peek_bits(1) != 0;
226
227 self.ppm_mode = ppm_flag;
228
229 if self.ppm_mode {
230 let ppm = self.ppm.get_or_insert_with(PpmModel::new);
233 match ppm.init(reader) {
234 Ok((coder, esc_char)) => {
235 self.ppm_coder = Some(coder);
236 self.ppm_esc_char = esc_char;
237 #[cfg(test)]
238 println!("PPMd initialized: esc_char={}", esc_char);
239 }
240 Err(e) => {
241 #[cfg(test)]
242 println!("PPMd init failed: {}", e);
243 #[cfg(not(test))]
244 let _ = e;
245 return Err(DecompressError::UnsupportedMethod(0x33));
246 }
247 }
248 } else {
249 self.prev_low_offset = 0;
251 self.low_offset_repeat_count = 0;
252
253 let reset_tables = reader.peek_bits(2) & 1 == 0; reader.advance_bits(2);
257
258 if reset_tables {
259 self.huffman.reset_tables();
260 }
261
262 self.huffman.read_tables_after_header(reader)?;
264 }
265
266 self.tables_read = true;
267 Ok(())
268 }
269
270 fn decode_block(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
272 if self.ppm_mode {
273 return self.decode_block_ppm(reader, max_size);
274 }
275
276 if self.huffman.main_table.is_none() || self.huffman.dist_table.is_none() {
278 return Err(DecompressError::InvalidHuffmanCode);
279 }
280
281 #[cfg(test)]
282 let mut symbol_count = 0;
283
284 while self.lzss.total_written() < max_size && !reader.is_eof() {
285 self.maybe_execute_filters();
287
288 #[cfg(test)]
290 let bit_pos_main_start = reader.bit_position();
291 #[cfg(test)]
292 let peek_bits = reader.peek_bits(16);
293
294 let symbol = unsafe {
296 self.huffman
297 .main_table
298 .as_ref()
299 .unwrap_unchecked()
300 .decode(reader)?
301 };
302
303 #[cfg(test)]
304 {
305 let pos = self.lzss.total_written();
306 if pos >= 1498580 && pos <= 1498610 {
307 let bit_pos_after = reader.bit_position();
308 eprintln!(
309 "MAIN sym={} at pos={}, bits {}->{} peek={:016b}",
310 symbol, pos, bit_pos_main_start, bit_pos_after, peek_bits
311 );
312 }
313 }
314
315 if symbol < 256 {
316 #[cfg(test)]
318 {
319 let pos = self.lzss.total_written();
320 if pos >= 1498595 && pos <= 1498610 {
321 eprintln!("WRITING literal 0x{:02x} at output pos {}", symbol, pos);
322 }
323 }
324 self.lzss.write_literal(symbol as u8);
325 } else if symbol == 256 {
326 #[cfg(test)]
332 eprintln!(
333 "\n=== SYMBOL 256 (end of block) at output pos {}, bit_pos {} ===",
334 self.lzss.total_written(),
335 reader.bit_position()
336 );
337 if !reader.is_eof() {
338 let first_bit = reader.read_bit()?;
339 #[cfg(test)]
340 eprintln!(
341 " first_bit={}, bit_pos after={}",
342 first_bit,
343 reader.bit_position()
344 );
345 if first_bit {
346 self.prev_low_offset = 0;
349 self.low_offset_repeat_count = 0;
350 self.read_tables(reader)?;
352 #[cfg(test)]
353 {
354 eprintln!(
355 "After new tables: bit_pos={}, next 16 bits={:016b}",
356 reader.bit_position(),
357 reader.peek_bits(16)
358 );
359 eprintln!(" About to decode first symbol after table read");
360 }
361 continue;
363 }
364 let _second_bit = reader.read_bit()?; }
368 break;
369 } else if symbol == 257 {
370 #[cfg(test)]
372 eprintln!(
373 "\n=== SYMBOL 257 (VM code) at output pos {} ===",
374 self.lzss.total_written()
375 );
376 self.read_vm_code(reader)?;
377 } else if symbol == 258 {
378 if self.last_len > 0 {
380 #[cfg(test)]
381 {
382 let pos = self.lzss.total_written();
383 let end = pos + self.last_len as u64;
384 if pos <= 1498598 && end > 1498598 {
385 eprintln!(
386 "!!! AT 1498598: symbol 258 repeat, last_dist={}, last_len={}",
387 self.last_dist, self.last_len
388 );
389 }
390 }
391 self.lzss.copy_match(self.last_dist, self.last_len)?;
392 }
393 } else if symbol < 263 {
394 let idx = (symbol - 259) as usize;
396 let distance = self.old_dist[idx];
397
398 let length = self.decode_length_from_table(reader)?;
400
401 #[cfg(test)]
402 {
403 let written = self.lzss.total_written();
404 let end = written + length as u64;
405 if written <= 1498598 && end > 1498598 {
406 eprintln!(
407 "!!! AT 1498598: old idx={},len={},dist={}",
408 idx, length, distance
409 );
410 }
411 }
412
413 self.lzss.copy_match(distance, length)?;
414
415 for i in (1..=idx).rev() {
417 self.old_dist[i] = self.old_dist[i - 1];
418 }
419 self.old_dist[0] = distance;
420 self.last_dist = distance;
421 self.last_len = length;
422 } else if symbol <= 270 {
423 let idx = (symbol - 263) as usize;
425 let base = SHORT_BASES[idx];
426 let bits = SHORT_BITS[idx];
427 let extra = if bits > 0 {
428 reader.read_bits(bits as u32)?
429 } else {
430 0
431 };
432 let distance = base + extra + 1;
433 let length = 2u32;
434
435 #[cfg(test)]
436 {
437 let written = self.lzss.total_written();
438 let end = written + length as u64;
439 if written <= 1498598 && end > 1498598 {
440 eprintln!(
441 "!!! AT 1498598: short sym={}, idx={}, base={}, bits={}, extra={}, dist={}",
442 symbol, idx, base, bits, extra, distance
443 );
444 }
445 }
446
447 self.lzss.copy_match(distance, length)?;
448
449 for i in (1..4).rev() {
451 self.old_dist[i] = self.old_dist[i - 1];
452 }
453 self.old_dist[0] = distance;
454 self.old_dist_ptr = 0;
455 self.last_dist = distance;
456 self.last_len = length;
457 } else {
458 #[cfg(test)]
460 let bit_before_len = reader.bit_position();
461
462 let len_idx = (symbol - 271) as usize;
463 let length = if len_idx < LENGTH_BASE.len() {
464 let base = LENGTH_BASE[len_idx];
465 let extra = LENGTH_EXTRA[len_idx];
466 let extra_val = if extra > 0 {
467 reader.read_bits(extra as u32)?
468 } else {
469 0
470 };
471 #[cfg(test)]
472 {
473 let written = self.lzss.total_written();
474 if written >= 1498595 && written <= 1498602 {
475 let bit_after_len = reader.bit_position();
476 eprintln!(
477 "!!! LONG DECODE at {}: sym={}, len_idx={}, len={}, bits {}->{}]",
478 written,
479 symbol,
480 len_idx,
481 base + extra_val + 3,
482 bit_before_len,
483 bit_after_len
484 );
485 }
486 }
487 base + extra_val + 3 } else {
489 #[cfg(test)]
490 eprintln!(
491 "\nlen_idx {} out of range at written={}",
492 len_idx,
493 self.lzss.total_written()
494 );
495 return Err(DecompressError::InvalidHuffmanCode);
496 };
497
498 let dist_symbol = {
500 #[cfg(test)]
501 let bit_pos_before = reader.bit_position();
502
503 let dist_table = unsafe { self.huffman.dist_table.as_ref().unwrap_unchecked() };
505 match dist_table.decode(reader) {
506 Ok(s) => {
507 #[cfg(test)]
508 {
509 let written = self.lzss.total_written();
510 if written >= 1498595 && written <= 1498610 {
511 let bit_pos_after = reader.bit_position();
512 eprintln!(
513 " dist_symbol={} at pos {} (bits {}->{})",
514 s, written, bit_pos_before, bit_pos_after
515 );
516 }
517 }
518 s
519 }
520 Err(e) => {
521 #[cfg(test)]
522 eprintln!(
523 "\nOffset decode failed at written={}, len={}",
524 self.lzss.total_written(),
525 length
526 );
527 return Err(e);
528 }
529 }
530 };
531
532 let dist_code = dist_symbol as usize;
533 let distance = if dist_code < DIST_BASE.len() {
534 let base = DIST_BASE[dist_code];
535 let extra = DIST_EXTRA[dist_code];
536
537 let extra_val = if extra > 0 {
538 if dist_code > 9 {
539 let high = if extra > 4 {
542 #[cfg(test)]
543 let high_bit_pos = reader.bit_position();
544 let h = reader.read_bits((extra - 4) as u32)?;
545 #[cfg(test)]
546 {
547 let written = self.lzss.total_written();
548 if (written >= 1498595 && written <= 1498610)
549 || (written >= 2176060 && written <= 2176080)
550 {
551 eprintln!(
552 " high bits at {}: {} bits = {} (0b{:016b}), pos {}->{}",
553 written,
554 extra - 4,
555 h, h,
556 high_bit_pos,
557 reader.bit_position()
558 );
559 }
560 }
561 h << 4
562 } else {
563 0
564 };
565 let low = if self.low_offset_repeat_count > 0 {
567 self.low_offset_repeat_count -= 1;
568 #[cfg(test)]
569 {
570 let written = self.lzss.total_written();
571 if written >= 1498550 && written <= 1498610 {
572 eprintln!(
573 "!!! low_offset REPEAT at {}: prev={}",
574 written, self.prev_low_offset
575 );
576 }
577 }
578 self.prev_low_offset
579 } else {
580 #[cfg(test)]
581 let bit_pos_before = reader.bit_position();
582 #[cfg(test)]
583 let raw_bits_16 = reader.peek_bits(16);
584 let low_table = unsafe {
586 self.huffman.low_dist_table.as_ref().unwrap_unchecked()
587 };
588 #[cfg(test)]
589 {
590 let written = self.lzss.total_written();
591 if written == 1498598 {
592 eprintln!(
594 "!!! LOW_TABLE at 1498598 decode_len: {:?}",
595 low_table.dump_decode_len()
596 );
597 eprintln!(
598 "!!! LOW_TABLE at 1498598 symbols: {:?}",
599 low_table.dump_symbols()
600 );
601 }
602 }
603 let sym = low_table.decode(reader)? as u32;
604 #[cfg(test)]
605 {
606 let written = self.lzss.total_written();
607 if written >= 1498550 && written <= 1498610 {
608 let bit_pos_after = reader.bit_position();
609 eprintln!("!!! low_offset at {}: sym={} (bits {}->{}), raw peek = {:016b}",
610 written, sym, bit_pos_before, bit_pos_after, raw_bits_16);
611 }
612 }
613
614 if sym == 16 {
615 self.low_offset_repeat_count = 16 - 1; self.prev_low_offset
619 } else {
620 self.prev_low_offset = sym;
621 sym
622 }
623 };
624 #[cfg(test)]
625 {
626 let written = self.lzss.total_written();
627 if written >= 2176060 && written <= 2176080 {
628 if self.low_offset_repeat_count > 0 {
629 eprintln!(
630 " low_offset REPEAT at {}: prev={}, remaining={}",
631 written,
632 self.prev_low_offset,
633 self.low_offset_repeat_count
634 );
635 } else {
636 eprintln!(" low_offset at {}: dist_code={}, base={}, extra={}, high={}, low={}, dist={}",
637 written, dist_code, base, extra, high, low, base + high + low + 1);
638 }
639 }
640 }
641 high + low
642 } else {
643 #[cfg(test)]
645 let peek = reader.peek_bits(extra as u32);
646 let val = reader.read_bits(extra as u32)?;
647 #[cfg(test)]
648 {
649 let written = self.lzss.total_written();
650 if written >= 0 && written < 0 {
651 eprintln!(" direct: dist_code={}, base={}, extra_bits={}, peek={:04b}, extra_val={}, distance={}",
652 dist_code, base, extra, peek, val, base + val + 1);
653 }
654 }
655 val
656 }
657 } else {
658 0
659 };
660 base + extra_val + 1
661 } else {
662 #[cfg(test)]
663 eprintln!(
664 "\ndist_code {} out of range at written={}",
665 dist_code,
666 self.lzss.total_written()
667 );
668 return Err(DecompressError::InvalidHuffmanCode);
669 };
670
671 let length = if distance >= 0x2000 {
674 if distance >= 0x40000 {
675 length + 2
676 } else {
677 length + 1
678 }
679 } else {
680 length
681 };
682
683 #[cfg(test)]
684 {
685 let written = self.lzss.total_written();
686 let end = written + length as u64;
687 if written <= 1498598 && end > 1498598 {
688 eprintln!(
689 "!!! AT 1498598: long match dist={}, len={}",
690 distance, length
691 );
692 let src_pos = (written as u32).wrapping_sub(distance) as usize;
694 let mask = self.lzss.window_mask() as usize;
695 let window = self.lzss.window();
696 eprintln!(
697 " window src[{}..{}]: {:02x?}",
698 src_pos,
699 src_pos + length as usize,
700 &window[src_pos..src_pos + length as usize]
701 );
702 }
703 if written >= 1498595 && written <= 1498602 {
704 eprintln!(
705 "LONG MATCH at {}: dist={}, len={}",
706 written, distance, length
707 );
708 }
709 }
710
711 self.lzss.copy_match(distance, length)?;
712
713 for i in (1..4).rev() {
715 self.old_dist[i] = self.old_dist[i - 1];
716 }
717 self.old_dist[0] = distance;
718 self.old_dist_ptr = 0;
719 self.last_dist = distance;
720 self.last_len = length;
721 }
722 }
723
724 Ok(())
725 }
726
727 fn decode_length_from_table(&mut self, reader: &mut BitReader) -> Result<u32> {
729 let symbol = {
730 let len_table = self
731 .huffman
732 .len_table
733 .as_ref()
734 .ok_or(DecompressError::InvalidHuffmanCode)?;
735 len_table.decode(reader)?
736 };
737
738 let sym = symbol as usize;
739 if sym < LENGTH_BASE.len() {
740 let base = LENGTH_BASE[sym];
741 let extra = LENGTH_EXTRA[sym];
742 let extra_val = if extra > 0 {
743 reader.read_bits(extra as u32)?
744 } else {
745 0
746 };
747 Ok(base + extra_val + 2)
748 } else {
749 Err(DecompressError::InvalidHuffmanCode)
750 }
751 }
752
753 #[cold]
756 fn read_vm_code(&mut self, reader: &mut BitReader) -> Result<()> {
757 #[cfg(test)]
758 let bit_pos_start = reader.bit_position();
759
760 let first_byte = reader.read_bits(8)? as u8;
762
763 let length = {
768 let base = (first_byte & 7) + 1;
769 match base {
770 7 => {
771 let next = reader.read_bits(8)? as u32;
773 next + 7
774 }
775 8 => {
776 reader.read_bits(16)?
778 }
779 _ => base as u32,
780 }
781 };
782
783 #[cfg(test)]
784 eprintln!(
785 " read_vm_code: first_byte=0x{:02x}, length={}, bit_pos_start={}",
786 first_byte, length, bit_pos_start
787 );
788
789 if length == 0 {
790 return Ok(());
791 }
792
793 let mut vm_code = vec![0u8; length as usize];
795 for i in 0..length as usize {
796 vm_code[i] = reader.read_bits(8)? as u8;
797 }
798
799 #[cfg(test)]
800 eprintln!(" vm_code end bit_pos={}", reader.bit_position());
801
802 let total_written = self.lzss.total_written();
804 let window_mask = self.lzss.window_mask();
805
806 #[cfg(test)]
807 eprintln!(
808 " add_code: total_written={}, window_mask={:x}",
809 total_written, window_mask
810 );
811
812 #[cfg(test)]
813 {
814 let had_pending_before = self.vm.has_pending_filters();
815 let result = self
816 .vm
817 .add_code(first_byte, &vm_code, total_written, window_mask);
818 let has_pending_after = self.vm.has_pending_filters();
819 if let Some(next_pos) = self.vm.next_filter_pos() {
820 eprintln!(
821 " vm.add_code: added={}, pending={}->{}, next_pos={}",
822 result, had_pending_before, has_pending_after, next_pos
823 );
824 } else {
825 eprintln!(
826 " vm.add_code: added={}, pending={}->{}, next_pos=NONE",
827 result, had_pending_before, has_pending_after
828 );
829 }
830 }
831 #[cfg(not(test))]
832 self.vm
833 .add_code(first_byte, &vm_code, total_written, window_mask);
834
835 if let Some(end) = self.vm.next_filter_end() {
837 self.next_filter_check = self.next_filter_check.min(end);
838 }
839
840 Ok(())
841 }
842
843 #[inline]
846 fn maybe_execute_filters(&mut self) {
847 let total_written = self.lzss.total_written();
848
849 if total_written < self.next_filter_check {
851 return;
852 }
853
854 let window_mask = self.lzss.window_mask() as usize;
855
856 loop {
858 let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
860 Some((idx, pos)) => (idx, pos),
861 None => break,
862 };
863
864 let flushed = self.lzss.flushed_pos();
866 if flushed < next_pos {
867 self.lzss.flush_to_output(next_pos);
868 }
869
870 let window = self.lzss.window();
872 if let Some((filter_end, filtered_data)) =
873 self.vm
874 .execute_filter_at_index(filter_idx, window, window_mask, total_written)
875 {
876 self.lzss.write_filtered_to_output(filtered_data, next_pos);
878 self.next_filter_check = filter_end;
880 } else {
881 break;
882 }
883 }
884
885 self.next_filter_check = self.vm.next_filter_end().unwrap_or(u64::MAX);
887 }
888
889 fn decode_block_ppm(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
891 let ppm = self
892 .ppm
893 .as_mut()
894 .ok_or(DecompressError::UnsupportedMethod(0x33))?;
895 let coder = self
896 .ppm_coder
897 .as_mut()
898 .ok_or(DecompressError::UnsupportedMethod(0x33))?;
899 let esc_char = self.ppm_esc_char;
900
901 while self.lzss.total_written() < max_size && !reader.is_eof() {
902 let ch = ppm.decode_char(coder, reader).map_err(|e| {
903 #[cfg(test)]
904 eprintln!(
905 "PPM decode_char failed at pos {}: {}",
906 self.lzss.total_written(),
907 e
908 );
909 #[cfg(not(test))]
910 let _ = e;
911 DecompressError::InvalidHuffmanCode
912 })?;
913
914 if ch < 0 {
915 #[cfg(test)]
917 eprintln!("PPM decode_char returned negative: {}", ch);
918 return Err(DecompressError::InvalidHuffmanCode);
919 }
920
921 #[cfg(test)]
922 {
923 if self.lzss.total_written() < 20 {
924 eprint!("[{}:{}] ", self.lzss.total_written(), ch);
925 }
926 }
927
928 if ch != esc_char {
929 self.lzss.write_literal(ch as u8);
931 } else {
932 let ctrl = ppm
934 .decode_char(coder, reader)
935 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
936
937 if ctrl < 0 {
938 return Err(DecompressError::InvalidHuffmanCode);
939 }
940
941 match ctrl {
942 0 => {
943 break;
945 }
946 1 => {
947 self.lzss.write_literal(esc_char as u8);
949 }
950 2 => {
951 break;
953 }
954 3 => {
955 let first_byte = ppm
957 .decode_char(coder, reader)
958 .map_err(|_| DecompressError::InvalidHuffmanCode)?
959 as u8;
960
961 let mut length = ((first_byte & 7) + 1) as u32;
963 if length == 7 {
964 let b1 = ppm
965 .decode_char(coder, reader)
966 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
967 length = (b1 as u32) + 7;
968 } else if length == 8 {
969 let b1 = ppm
970 .decode_char(coder, reader)
971 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
972 let b2 = ppm
973 .decode_char(coder, reader)
974 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
975 length = (b1 as u32) * 256 + (b2 as u32);
976 }
977
978 if length == 0 {
979 continue;
980 }
981
982 let mut vm_code = vec![0u8; length as usize];
984 for i in 0..length as usize {
985 let ch = ppm
986 .decode_char(coder, reader)
987 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
988 vm_code[i] = ch as u8;
989 }
990
991 let total_written = self.lzss.total_written();
993 let window_mask = self.lzss.window_mask();
994 self.vm
995 .add_code(first_byte, &vm_code, total_written, window_mask);
996
997 if let Some(end) = self.vm.next_filter_end() {
999 self.next_filter_check = self.next_filter_check.min(end);
1000 }
1001 }
1002 4 => {
1003 let mut distance: u32 = 0;
1005 for _ in 0..3 {
1006 let ch = ppm
1007 .decode_char(coder, reader)
1008 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1009 distance = (distance << 8) + (ch as u32);
1010 }
1011 let len = ppm
1012 .decode_char(coder, reader)
1013 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1014
1015 let distance = distance + 2;
1017 let length = (len as u32) + 32;
1018
1019 self.lzss.copy_match(distance, length)?;
1020 self.last_dist = distance;
1021 self.last_len = length;
1022 }
1023 5 => {
1024 let len = ppm
1026 .decode_char(coder, reader)
1027 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1028
1029 let length = (len as u32) + 4;
1031
1032 self.lzss.copy_match(1, length)?;
1033 self.last_dist = 1;
1034 self.last_len = length;
1035 }
1036 _ => {
1037 #[cfg(test)]
1039 eprintln!("Unknown PPM control code: {}", ctrl);
1040 return Err(DecompressError::InvalidHuffmanCode);
1041 }
1042 }
1043 }
1044 }
1045
1046 Ok(())
1047 }
1048
1049 pub fn reset(&mut self) {
1051 self.lzss.reset();
1052 self.vm.reset();
1053 self.ppm_coder = None;
1055 self.ppm_esc_char = -1;
1056 self.old_dist = [0; 4];
1057 self.old_dist_ptr = 0;
1058 self.last_dist = 0;
1059 self.last_len = 0;
1060 self.ppm_mode = false;
1061 self.tables_read = false;
1062 self.prev_low_offset = 0;
1063 self.low_offset_repeat_count = 0;
1064 self.next_filter_check = u64::MAX;
1065 }
1066
1067 pub fn bytes_written(&self) -> u64 {
1069 self.lzss.total_written()
1070 }
1071}
1072
1073impl Default for Rar29Decoder {
1074 fn default() -> Self {
1075 Self::new()
1076 }
1077}
1078
1079#[allow(dead_code)]
1083pub struct Rar29StreamDecoder {
1084 decoder: Rar29Decoder,
1085 input_buffer: Vec<u8>,
1087 input_pos: usize,
1089 unpacked_size: u64,
1091}
1092
1093#[allow(dead_code)]
1094impl Rar29StreamDecoder {
1095 pub fn new(unpacked_size: u64) -> Self {
1097 Self {
1098 decoder: Rar29Decoder::new(),
1099 input_buffer: Vec::new(),
1100 input_pos: 0,
1101 unpacked_size,
1102 }
1103 }
1104
1105 pub fn feed(&mut self, data: &[u8]) -> Result<Vec<u8>> {
1108 self.input_buffer.extend_from_slice(data);
1109
1110 let result = self
1112 .decoder
1113 .decompress(&self.input_buffer[self.input_pos..], self.unpacked_size)?;
1114
1115 Ok(result)
1116 }
1117
1118 pub fn is_complete(&self) -> bool {
1120 self.decoder.bytes_written() >= self.unpacked_size
1121 }
1122
1123 pub fn bytes_written(&self) -> u64 {
1125 self.decoder.bytes_written()
1126 }
1127}
1128
1129#[cfg(test)]
1130mod tests {
1131 use super::*;
1132
1133 #[test]
1134 fn test_decoder_creation() {
1135 let decoder = Rar29Decoder::new();
1136 assert_eq!(decoder.bytes_written(), 0);
1137 assert!(!decoder.tables_read);
1138 }
1139
1140 }