1#![cfg_attr(test, allow(clippy::logic_bug))]
8
9use super::{
10 bit_reader::BitReader,
11 huffman::HuffmanDecoder,
12 lzss::LzssDecoder,
13 ppm::{PpmModel, RangeCoder},
14 vm::RarVM,
15 DecompressError, Result,
16};
17
18#[allow(dead_code)]
19const MAIN_CODES: usize = 299;
21
22#[allow(dead_code)]
23const DIST_CODES: usize = 60;
25
26#[allow(dead_code)]
27const LOW_DIST_CODES: usize = 17;
29
30#[allow(dead_code)]
31const LEN_CODES: usize = 28;
33
34#[allow(dead_code)]
35const MAX_MATCH_LEN: u32 = 258;
37
38const SHORT_BASES: [u32; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
40
41const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
43
44const LENGTH_BASE: [u32; 28] = [
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
47 160, 192, 224,
48];
49
50const LENGTH_EXTRA: [u8; 28] = [
52 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
53];
54
55const DIST_BASE: [u32; 60] = [
57 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
58 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
59 262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
60 1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872,
61 3670016, 3932160,
62];
63
64const DIST_EXTRA: [u8; 60] = [
66 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
67 13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18,
68 18, 18, 18, 18, 18, 18, 18,
69];
70
71pub struct Rar29Decoder {
85 lzss: LzssDecoder,
87 huffman: HuffmanDecoder,
89 vm: RarVM,
91 ppm: Option<PpmModel>,
93 ppm_coder: Option<RangeCoder>,
95 ppm_esc_char: i32,
97 old_dist: [u32; 4],
99 old_dist_ptr: usize,
101 last_dist: u32,
103 last_len: u32,
105 ppm_mode: bool,
107 tables_read: bool,
109 prev_low_offset: u32,
111 low_offset_repeat_count: u32,
113 next_filter_check: u64,
115}
116
117impl Rar29Decoder {
118 pub fn new() -> Self {
120 Self::with_window_size(0x400000) }
122
123 pub fn with_window_size(window_size: usize) -> Self {
126 Self {
127 lzss: LzssDecoder::new(window_size),
128 huffman: HuffmanDecoder::new(),
129 vm: RarVM::new(),
130 ppm: None,
131 ppm_coder: None,
132 ppm_esc_char: -1,
133 old_dist: [0; 4],
134 old_dist_ptr: 0,
135 last_dist: 0,
136 last_len: 0,
137 ppm_mode: false,
138 tables_read: false,
139 prev_low_offset: 0,
140 low_offset_repeat_count: 0,
141 next_filter_check: u64::MAX,
142 }
143 }
144
145 #[cfg(test)]
147 pub fn get_output(&self) -> Vec<u8> {
148 self.lzss.output().to_vec()
149 }
150
151 pub fn decompress(&mut self, data: &[u8], unpacked_size: u64) -> Result<Vec<u8>> {
154 let mut reader = BitReader::new(data);
155
156 self.lzss.enable_output(unpacked_size as usize);
158
159 if !self.tables_read {
161 self.read_tables(&mut reader)?;
162 }
163
164 while self.lzss.total_written() < unpacked_size {
166 if reader.is_eof() {
167 break;
168 }
169
170 self.decode_block(&mut reader, unpacked_size)?;
171 }
172
173 let total_written = self.lzss.total_written();
175 let window_mask = self.lzss.window_mask() as usize;
176
177 loop {
179 let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
181 Some((idx, pos)) => (idx, pos),
182 None => break,
183 };
184
185 let flushed = self.lzss.flushed_pos();
187 if flushed < next_pos {
188 self.lzss.flush_to_output(next_pos);
189 }
190
191 let window = self.lzss.window();
192 if let Some((_filter_end, filtered_data)) =
193 self.vm
194 .execute_filter_at_index(filter_idx, window, window_mask, total_written)
195 {
196 self.lzss.write_filtered_to_output(filtered_data, next_pos);
198 } else {
199 break;
200 }
201 }
202
203 self.lzss.flush_to_output(total_written);
205
206 Ok(self.lzss.take_output())
208 }
209
210 fn read_tables(&mut self, reader: &mut BitReader) -> Result<()> {
212 #[cfg(test)]
213 {
214 let byte_pos = reader.bit_position() / 8;
215 eprintln!(
216 "read_tables ENTRY: bit_pos={}, byte_pos={}",
217 reader.bit_position(),
218 byte_pos
219 );
220 eprintln!(" raw bytes at pos: {:02x?}", reader.peek_bytes(8));
221 }
222 reader.align_to_byte();
224 #[cfg(test)]
225 {
226 let byte_pos = reader.bit_position() / 8;
227 eprintln!(
228 "read_tables AFTER align: bit_pos={}, byte_pos={}",
229 reader.bit_position(),
230 byte_pos
231 );
232 eprintln!(" raw bytes at pos: {:02x?}", reader.peek_bytes(8));
233 };
234
235 let ppm_flag = reader.peek_bits(1) != 0;
238
239 self.ppm_mode = ppm_flag;
240
241 if self.ppm_mode {
242 let ppm = self.ppm.get_or_insert_with(PpmModel::new);
245 match ppm.init(reader) {
246 Ok((coder, esc_char)) => {
247 self.ppm_coder = Some(coder);
248 self.ppm_esc_char = esc_char;
249 #[cfg(test)]
250 println!("PPMd initialized: esc_char={}", esc_char);
251 }
252 Err(e) => {
253 #[cfg(test)]
254 println!("PPMd init failed: {}", e);
255 #[cfg(not(test))]
256 let _ = e;
257 return Err(DecompressError::UnsupportedMethod(0x33));
258 }
259 }
260 } else {
261 self.prev_low_offset = 0;
263 self.low_offset_repeat_count = 0;
264
265 let reset_tables = reader.peek_bits(2) & 1 == 0; reader.advance_bits(2);
269
270 if reset_tables {
271 self.huffman.reset_tables();
272 }
273
274 self.huffman.read_tables_after_header(reader)?;
276 }
277
278 self.tables_read = true;
279 Ok(())
280 }
281
282 fn decode_block(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
284 if self.ppm_mode {
285 return self.decode_block_ppm(reader, max_size);
286 }
287
288 if self.huffman.main_table.is_none() || self.huffman.dist_table.is_none() {
290 return Err(DecompressError::InvalidHuffmanCode);
291 }
292
293 #[cfg(test)]
294 let mut symbol_count = 0;
295
296 while self.lzss.total_written() < max_size && !reader.is_eof() {
297 self.maybe_execute_filters();
299
300 #[cfg(test)]
302 let bit_pos_main_start = reader.bit_position();
303 #[cfg(test)]
304 let peek_bits = reader.peek_bits(16);
305
306 let symbol = unsafe {
308 self.huffman
309 .main_table
310 .as_ref()
311 .unwrap_unchecked()
312 .decode(reader)?
313 };
314
315 #[cfg(test)]
316 {
317 let pos = self.lzss.total_written();
318 if pos >= 1498580 && pos <= 1498610 {
319 let bit_pos_after = reader.bit_position();
320 eprintln!(
321 "MAIN sym={} at pos={}, bits {}->{} peek={:016b}",
322 symbol, pos, bit_pos_main_start, bit_pos_after, peek_bits
323 );
324 }
325 }
326
327 if symbol < 256 {
328 #[cfg(test)]
330 {
331 let pos = self.lzss.total_written();
332 if pos >= 1498595 && pos <= 1498610 {
333 eprintln!("WRITING literal 0x{:02x} at output pos {}", symbol, pos);
334 }
335 }
336 self.lzss.write_literal(symbol as u8);
337 } else if symbol == 256 {
338 #[cfg(test)]
344 eprintln!(
345 "\n=== SYMBOL 256 (end of block) at output pos {}, bit_pos {} ===",
346 self.lzss.total_written(),
347 reader.bit_position()
348 );
349 if !reader.is_eof() {
350 let first_bit = reader.read_bit()?;
351 #[cfg(test)]
352 eprintln!(
353 " first_bit={}, bit_pos after={}",
354 first_bit,
355 reader.bit_position()
356 );
357 if first_bit {
358 self.prev_low_offset = 0;
361 self.low_offset_repeat_count = 0;
362 self.read_tables(reader)?;
364 #[cfg(test)]
365 {
366 eprintln!(
367 "After new tables: bit_pos={}, next 16 bits={:016b}",
368 reader.bit_position(),
369 reader.peek_bits(16)
370 );
371 eprintln!(" About to decode first symbol after table read");
372 }
373 continue;
375 }
376 let _second_bit = reader.read_bit()?; }
380 break;
381 } else if symbol == 257 {
382 #[cfg(test)]
384 eprintln!(
385 "\n=== SYMBOL 257 (VM code) at output pos {} ===",
386 self.lzss.total_written()
387 );
388 self.read_vm_code(reader)?;
389 } else if symbol == 258 {
390 if self.last_len > 0 {
392 #[cfg(test)]
393 {
394 let pos = self.lzss.total_written();
395 let end = pos + self.last_len as u64;
396 if pos <= 1498598 && end > 1498598 {
397 eprintln!(
398 "!!! AT 1498598: symbol 258 repeat, last_dist={}, last_len={}",
399 self.last_dist, self.last_len
400 );
401 }
402 }
403 self.lzss.copy_match(self.last_dist, self.last_len)?;
404 }
405 } else if symbol < 263 {
406 let idx = (symbol - 259) as usize;
408 let distance = self.old_dist[idx];
409
410 let length = self.decode_length_from_table(reader)?;
412
413 #[cfg(test)]
414 {
415 let written = self.lzss.total_written();
416 let end = written + length as u64;
417 if written <= 1498598 && end > 1498598 {
418 eprintln!(
419 "!!! AT 1498598: old idx={},len={},dist={}",
420 idx, length, distance
421 );
422 }
423 }
424
425 self.lzss.copy_match(distance, length)?;
426
427 for i in (1..=idx).rev() {
429 self.old_dist[i] = self.old_dist[i - 1];
430 }
431 self.old_dist[0] = distance;
432 self.last_dist = distance;
433 self.last_len = length;
434 } else if symbol <= 270 {
435 let idx = (symbol - 263) as usize;
437 let base = SHORT_BASES[idx];
438 let bits = SHORT_BITS[idx];
439 let extra = if bits > 0 {
440 reader.read_bits(bits as u32)?
441 } else {
442 0
443 };
444 let distance = base + extra + 1;
445 let length = 2u32;
446
447 #[cfg(test)]
448 {
449 let written = self.lzss.total_written();
450 let end = written + length as u64;
451 if written <= 1498598 && end > 1498598 {
452 eprintln!(
453 "!!! AT 1498598: short sym={}, idx={}, base={}, bits={}, extra={}, dist={}",
454 symbol, idx, base, bits, extra, distance
455 );
456 }
457 }
458
459 self.lzss.copy_match(distance, length)?;
460
461 for i in (1..4).rev() {
463 self.old_dist[i] = self.old_dist[i - 1];
464 }
465 self.old_dist[0] = distance;
466 self.old_dist_ptr = 0;
467 self.last_dist = distance;
468 self.last_len = length;
469 } else {
470 #[cfg(test)]
472 let bit_before_len = reader.bit_position();
473
474 let len_idx = (symbol - 271) as usize;
475 let length = if len_idx < LENGTH_BASE.len() {
476 let base = LENGTH_BASE[len_idx];
477 let extra = LENGTH_EXTRA[len_idx];
478 let extra_val = if extra > 0 {
479 reader.read_bits(extra as u32)?
480 } else {
481 0
482 };
483 #[cfg(test)]
484 {
485 let written = self.lzss.total_written();
486 if written >= 1498595 && written <= 1498602 {
487 let bit_after_len = reader.bit_position();
488 eprintln!(
489 "!!! LONG DECODE at {}: sym={}, len_idx={}, len={}, bits {}->{}]",
490 written,
491 symbol,
492 len_idx,
493 base + extra_val + 3,
494 bit_before_len,
495 bit_after_len
496 );
497 }
498 }
499 base + extra_val + 3 } else {
501 #[cfg(test)]
502 eprintln!(
503 "\nlen_idx {} out of range at written={}",
504 len_idx,
505 self.lzss.total_written()
506 );
507 return Err(DecompressError::InvalidHuffmanCode);
508 };
509
510 let dist_symbol = {
512 #[cfg(test)]
513 let bit_pos_before = reader.bit_position();
514
515 let dist_table = unsafe { self.huffman.dist_table.as_ref().unwrap_unchecked() };
517 match dist_table.decode(reader) {
518 Ok(s) => {
519 #[cfg(test)]
520 {
521 let written = self.lzss.total_written();
522 if written >= 1498595 && written <= 1498610 {
523 let bit_pos_after = reader.bit_position();
524 eprintln!(
525 " dist_symbol={} at pos {} (bits {}->{})",
526 s, written, bit_pos_before, bit_pos_after
527 );
528 }
529 }
530 s
531 }
532 Err(e) => {
533 #[cfg(test)]
534 eprintln!(
535 "\nOffset decode failed at written={}, len={}",
536 self.lzss.total_written(),
537 length
538 );
539 return Err(e);
540 }
541 }
542 };
543
544 let dist_code = dist_symbol as usize;
545 let distance = if dist_code < DIST_BASE.len() {
546 let base = DIST_BASE[dist_code];
547 let extra = DIST_EXTRA[dist_code];
548
549 let extra_val = if extra > 0 {
550 if dist_code > 9 {
551 let high = if extra > 4 {
554 #[cfg(test)]
555 let high_bit_pos = reader.bit_position();
556 let h = reader.read_bits((extra - 4) as u32)?;
557 #[cfg(test)]
558 {
559 let written = self.lzss.total_written();
560 if (written >= 1498595 && written <= 1498610)
561 || (written >= 2176060 && written <= 2176080)
562 {
563 eprintln!(
564 " high bits at {}: {} bits = {} (0b{:016b}), pos {}->{}",
565 written,
566 extra - 4,
567 h, h,
568 high_bit_pos,
569 reader.bit_position()
570 );
571 }
572 }
573 h << 4
574 } else {
575 0
576 };
577 let low = if self.low_offset_repeat_count > 0 {
579 self.low_offset_repeat_count -= 1;
580 #[cfg(test)]
581 {
582 let written = self.lzss.total_written();
583 if written >= 1498550 && written <= 1498610 {
584 eprintln!(
585 "!!! low_offset REPEAT at {}: prev={}",
586 written, self.prev_low_offset
587 );
588 }
589 }
590 self.prev_low_offset
591 } else {
592 #[cfg(test)]
593 let bit_pos_before = reader.bit_position();
594 #[cfg(test)]
595 let raw_bits_16 = reader.peek_bits(16);
596 let low_table = unsafe {
598 self.huffman.low_dist_table.as_ref().unwrap_unchecked()
599 };
600 #[cfg(test)]
601 {
602 let written = self.lzss.total_written();
603 if written == 1498598 {
604 eprintln!(
606 "!!! LOW_TABLE at 1498598 decode_len: {:?}",
607 low_table.dump_decode_len()
608 );
609 eprintln!(
610 "!!! LOW_TABLE at 1498598 symbols: {:?}",
611 low_table.dump_symbols()
612 );
613 }
614 }
615 let sym = low_table.decode(reader)? as u32;
616 #[cfg(test)]
617 {
618 let written = self.lzss.total_written();
619 if written >= 1498550 && written <= 1498610 {
620 let bit_pos_after = reader.bit_position();
621 eprintln!("!!! low_offset at {}: sym={} (bits {}->{}), raw peek = {:016b}",
622 written, sym, bit_pos_before, bit_pos_after, raw_bits_16);
623 }
624 }
625
626 if sym == 16 {
627 self.low_offset_repeat_count = 16 - 1; self.prev_low_offset
631 } else {
632 self.prev_low_offset = sym;
633 sym
634 }
635 };
636 #[cfg(test)]
637 {
638 let written = self.lzss.total_written();
639 if written >= 2176060 && written <= 2176080 {
640 if self.low_offset_repeat_count > 0 {
641 eprintln!(
642 " low_offset REPEAT at {}: prev={}, remaining={}",
643 written,
644 self.prev_low_offset,
645 self.low_offset_repeat_count
646 );
647 } else {
648 eprintln!(" low_offset at {}: dist_code={}, base={}, extra={}, high={}, low={}, dist={}",
649 written, dist_code, base, extra, high, low, base + high + low + 1);
650 }
651 }
652 }
653 high + low
654 } else {
655 #[cfg(test)]
657 let peek = reader.peek_bits(extra as u32);
658 let val = reader.read_bits(extra as u32)?;
659 #[cfg(test)]
660 {
661 let written = self.lzss.total_written();
662 if written >= 0 && written < 0 {
663 eprintln!(" direct: dist_code={}, base={}, extra_bits={}, peek={:04b}, extra_val={}, distance={}",
664 dist_code, base, extra, peek, val, base + val + 1);
665 }
666 }
667 val
668 }
669 } else {
670 0
671 };
672 base + extra_val + 1
673 } else {
674 #[cfg(test)]
675 eprintln!(
676 "\ndist_code {} out of range at written={}",
677 dist_code,
678 self.lzss.total_written()
679 );
680 return Err(DecompressError::InvalidHuffmanCode);
681 };
682
683 let length = if distance >= 0x2000 {
686 if distance >= 0x40000 {
687 length + 2
688 } else {
689 length + 1
690 }
691 } else {
692 length
693 };
694
695 #[cfg(test)]
696 {
697 let written = self.lzss.total_written();
698 let end = written + length as u64;
699 if written <= 1498598 && end > 1498598 {
700 eprintln!(
701 "!!! AT 1498598: long match dist={}, len={}",
702 distance, length
703 );
704 let src_pos = (written as u32).wrapping_sub(distance) as usize;
706 let mask = self.lzss.window_mask() as usize;
707 let window = self.lzss.window();
708 eprintln!(
709 " window src[{}..{}]: {:02x?}",
710 src_pos,
711 src_pos + length as usize,
712 &window[src_pos..src_pos + length as usize]
713 );
714 }
715 if written >= 1498595 && written <= 1498602 {
716 eprintln!(
717 "LONG MATCH at {}: dist={}, len={}",
718 written, distance, length
719 );
720 }
721 }
722
723 self.lzss.copy_match(distance, length)?;
724
725 for i in (1..4).rev() {
727 self.old_dist[i] = self.old_dist[i - 1];
728 }
729 self.old_dist[0] = distance;
730 self.old_dist_ptr = 0;
731 self.last_dist = distance;
732 self.last_len = length;
733 }
734 }
735
736 Ok(())
737 }
738
739 fn decode_length_from_table(&mut self, reader: &mut BitReader) -> Result<u32> {
741 let symbol = {
742 let len_table = self
743 .huffman
744 .len_table
745 .as_ref()
746 .ok_or(DecompressError::InvalidHuffmanCode)?;
747 len_table.decode(reader)?
748 };
749
750 let sym = symbol as usize;
751 if sym < LENGTH_BASE.len() {
752 let base = LENGTH_BASE[sym];
753 let extra = LENGTH_EXTRA[sym];
754 let extra_val = if extra > 0 {
755 reader.read_bits(extra as u32)?
756 } else {
757 0
758 };
759 Ok(base + extra_val + 2)
760 } else {
761 Err(DecompressError::InvalidHuffmanCode)
762 }
763 }
764
765 #[cold]
768 fn read_vm_code(&mut self, reader: &mut BitReader) -> Result<()> {
769 #[cfg(test)]
770 let bit_pos_start = reader.bit_position();
771
772 let first_byte = reader.read_bits(8)? as u8;
774
775 let length = {
780 let base = (first_byte & 7) + 1;
781 match base {
782 7 => {
783 let next = reader.read_bits(8)? as u32;
785 next + 7
786 }
787 8 => {
788 reader.read_bits(16)?
790 }
791 _ => base as u32,
792 }
793 };
794
795 #[cfg(test)]
796 eprintln!(
797 " read_vm_code: first_byte=0x{:02x}, length={}, bit_pos_start={}",
798 first_byte, length, bit_pos_start
799 );
800
801 if length == 0 {
802 return Ok(());
803 }
804
805 let mut vm_code = vec![0u8; length as usize];
807 for i in 0..length as usize {
808 vm_code[i] = reader.read_bits(8)? as u8;
809 }
810
811 #[cfg(test)]
812 eprintln!(" vm_code end bit_pos={}", reader.bit_position());
813
814 let total_written = self.lzss.total_written();
816 let window_mask = self.lzss.window_mask();
817
818 #[cfg(test)]
819 eprintln!(
820 " add_code: total_written={}, window_mask={:x}",
821 total_written, window_mask
822 );
823
824 #[cfg(test)]
825 {
826 let had_pending_before = self.vm.has_pending_filters();
827 let result = self
828 .vm
829 .add_code(first_byte, &vm_code, total_written, window_mask);
830 let has_pending_after = self.vm.has_pending_filters();
831 if let Some(next_pos) = self.vm.next_filter_pos() {
832 eprintln!(
833 " vm.add_code: added={}, pending={}->{}, next_pos={}",
834 result, had_pending_before, has_pending_after, next_pos
835 );
836 } else {
837 eprintln!(
838 " vm.add_code: added={}, pending={}->{}, next_pos=NONE",
839 result, had_pending_before, has_pending_after
840 );
841 }
842 }
843 #[cfg(not(test))]
844 self.vm
845 .add_code(first_byte, &vm_code, total_written, window_mask);
846
847 if let Some(end) = self.vm.next_filter_end() {
849 self.next_filter_check = self.next_filter_check.min(end);
850 }
851
852 Ok(())
853 }
854
855 #[inline]
858 fn maybe_execute_filters(&mut self) {
859 let total_written = self.lzss.total_written();
860
861 if total_written < self.next_filter_check {
863 return;
864 }
865
866 let window_mask = self.lzss.window_mask() as usize;
867
868 loop {
870 let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
872 Some((idx, pos)) => (idx, pos),
873 None => break,
874 };
875
876 let flushed = self.lzss.flushed_pos();
878 if flushed < next_pos {
879 self.lzss.flush_to_output(next_pos);
880 }
881
882 let window = self.lzss.window();
884 if let Some((filter_end, filtered_data)) =
885 self.vm
886 .execute_filter_at_index(filter_idx, window, window_mask, total_written)
887 {
888 self.lzss.write_filtered_to_output(filtered_data, next_pos);
890 self.next_filter_check = filter_end;
892 } else {
893 break;
894 }
895 }
896
897 self.next_filter_check = self.vm.next_filter_end().unwrap_or(u64::MAX);
899 }
900
901 fn decode_block_ppm(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
903 let ppm = self
904 .ppm
905 .as_mut()
906 .ok_or(DecompressError::UnsupportedMethod(0x33))?;
907 let coder = self
908 .ppm_coder
909 .as_mut()
910 .ok_or(DecompressError::UnsupportedMethod(0x33))?;
911 let esc_char = self.ppm_esc_char;
912
913 while self.lzss.total_written() < max_size && !reader.is_eof() {
914 let ch = ppm.decode_char(coder, reader).map_err(|e| {
915 #[cfg(test)]
916 eprintln!(
917 "PPM decode_char failed at pos {}: {}",
918 self.lzss.total_written(),
919 e
920 );
921 #[cfg(not(test))]
922 let _ = e;
923 DecompressError::InvalidHuffmanCode
924 })?;
925
926 if ch < 0 {
927 #[cfg(test)]
929 eprintln!("PPM decode_char returned negative: {}", ch);
930 return Err(DecompressError::InvalidHuffmanCode);
931 }
932
933 #[cfg(test)]
934 {
935 if self.lzss.total_written() < 20 {
936 eprint!("[{}:{}] ", self.lzss.total_written(), ch);
937 }
938 }
939
940 if ch != esc_char {
941 self.lzss.write_literal(ch as u8);
943 } else {
944 let ctrl = ppm
946 .decode_char(coder, reader)
947 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
948
949 if ctrl < 0 {
950 return Err(DecompressError::InvalidHuffmanCode);
951 }
952
953 match ctrl {
954 0 => {
955 break;
957 }
958 1 => {
959 self.lzss.write_literal(esc_char as u8);
961 }
962 2 => {
963 break;
965 }
966 3 => {
967 let first_byte = ppm
969 .decode_char(coder, reader)
970 .map_err(|_| DecompressError::InvalidHuffmanCode)?
971 as u8;
972
973 let mut length = ((first_byte & 7) + 1) as u32;
975 if length == 7 {
976 let b1 = ppm
977 .decode_char(coder, reader)
978 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
979 length = (b1 as u32) + 7;
980 } else if length == 8 {
981 let b1 = ppm
982 .decode_char(coder, reader)
983 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
984 let b2 = ppm
985 .decode_char(coder, reader)
986 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
987 length = (b1 as u32) * 256 + (b2 as u32);
988 }
989
990 if length == 0 {
991 continue;
992 }
993
994 let mut vm_code = vec![0u8; length as usize];
996 for i in 0..length as usize {
997 let ch = ppm
998 .decode_char(coder, reader)
999 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1000 vm_code[i] = ch as u8;
1001 }
1002
1003 let total_written = self.lzss.total_written();
1005 let window_mask = self.lzss.window_mask();
1006 self.vm
1007 .add_code(first_byte, &vm_code, total_written, window_mask);
1008
1009 if let Some(end) = self.vm.next_filter_end() {
1011 self.next_filter_check = self.next_filter_check.min(end);
1012 }
1013 }
1014 4 => {
1015 let mut distance: u32 = 0;
1017 for _ in 0..3 {
1018 let ch = ppm
1019 .decode_char(coder, reader)
1020 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1021 distance = (distance << 8) + (ch as u32);
1022 }
1023 let len = ppm
1024 .decode_char(coder, reader)
1025 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1026
1027 let distance = distance + 2;
1029 let length = (len as u32) + 32;
1030
1031 self.lzss.copy_match(distance, length)?;
1032 self.last_dist = distance;
1033 self.last_len = length;
1034 }
1035 5 => {
1036 let len = ppm
1038 .decode_char(coder, reader)
1039 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1040
1041 let length = (len as u32) + 4;
1043
1044 self.lzss.copy_match(1, length)?;
1045 self.last_dist = 1;
1046 self.last_len = length;
1047 }
1048 _ => {
1049 #[cfg(test)]
1051 eprintln!("Unknown PPM control code: {}", ctrl);
1052 return Err(DecompressError::InvalidHuffmanCode);
1053 }
1054 }
1055 }
1056 }
1057
1058 Ok(())
1059 }
1060
1061 pub fn reset(&mut self) {
1063 self.lzss.reset();
1064 self.vm.reset();
1065 self.ppm_coder = None;
1067 self.ppm_esc_char = -1;
1068 self.old_dist = [0; 4];
1069 self.old_dist_ptr = 0;
1070 self.last_dist = 0;
1071 self.last_len = 0;
1072 self.ppm_mode = false;
1073 self.tables_read = false;
1074 self.prev_low_offset = 0;
1075 self.low_offset_repeat_count = 0;
1076 self.next_filter_check = u64::MAX;
1077 }
1078
1079 pub fn bytes_written(&self) -> u64 {
1081 self.lzss.total_written()
1082 }
1083}
1084
1085impl Default for Rar29Decoder {
1086 fn default() -> Self {
1087 Self::new()
1088 }
1089}
1090
1091#[allow(dead_code)]
1095pub struct Rar29StreamDecoder {
1096 decoder: Rar29Decoder,
1097 input_buffer: Vec<u8>,
1099 input_pos: usize,
1101 unpacked_size: u64,
1103}
1104
1105#[allow(dead_code)]
1106impl Rar29StreamDecoder {
1107 pub fn new(unpacked_size: u64) -> Self {
1109 Self {
1110 decoder: Rar29Decoder::new(),
1111 input_buffer: Vec::new(),
1112 input_pos: 0,
1113 unpacked_size,
1114 }
1115 }
1116
1117 pub fn feed(&mut self, data: &[u8]) -> Result<Vec<u8>> {
1120 self.input_buffer.extend_from_slice(data);
1121
1122 let result = self
1124 .decoder
1125 .decompress(&self.input_buffer[self.input_pos..], self.unpacked_size)?;
1126
1127 Ok(result)
1128 }
1129
1130 pub fn is_complete(&self) -> bool {
1132 self.decoder.bytes_written() >= self.unpacked_size
1133 }
1134
1135 pub fn bytes_written(&self) -> u64 {
1137 self.decoder.bytes_written()
1138 }
1139}
1140
1141#[cfg(test)]
1142mod tests {
1143 use super::*;
1144
1145 #[test]
1146 fn test_decoder_creation() {
1147 let decoder = Rar29Decoder::new();
1148 assert_eq!(decoder.bytes_written(), 0);
1149 assert!(!decoder.tables_read);
1150 }
1151
1152 }