1#![cfg_attr(test, allow(clippy::logic_bug))]
8
9use super::{
10 bit_reader::BitReader,
11 huffman::HuffmanDecoder,
12 lzss::LzssDecoder,
13 ppm::{PpmModel, RangeCoder},
14 vm::RarVM,
15 DecompressError, Result,
16};
17
18#[allow(dead_code)]
19const MAIN_CODES: usize = 299;
21
22#[allow(dead_code)]
23const DIST_CODES: usize = 60;
25
26#[allow(dead_code)]
27const LOW_DIST_CODES: usize = 17;
29
30#[allow(dead_code)]
31const LEN_CODES: usize = 28;
33
34#[allow(dead_code)]
35const MAX_MATCH_LEN: u32 = 258;
37
38const SHORT_BASES: [u32; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
40
41const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
43
44const LENGTH_BASE: [u32; 28] = [
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
47 160, 192, 224,
48];
49
50const LENGTH_EXTRA: [u8; 28] = [
52 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
53];
54
55const DIST_BASE: [u32; 60] = [
57 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
58 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
59 262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
60 1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872,
61 3670016, 3932160,
62];
63
64const DIST_EXTRA: [u8; 60] = [
66 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
67 13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18,
68 18, 18, 18, 18, 18, 18, 18,
69];
70
71pub struct Rar29Decoder {
85 lzss: LzssDecoder,
87 huffman: HuffmanDecoder,
89 vm: RarVM,
91 ppm: Option<PpmModel>,
93 ppm_coder: Option<RangeCoder>,
95 ppm_esc_char: i32,
97 old_dist: [u32; 4],
99 old_dist_ptr: usize,
101 last_dist: u32,
103 last_len: u32,
105 ppm_mode: bool,
107 tables_read: bool,
109 prev_low_offset: u32,
111 low_offset_repeat_count: u32,
113 next_filter_check: u64,
115}
116
117impl Rar29Decoder {
118 pub fn new() -> Self {
120 Self::with_window_size(0x400000) }
122
123 pub fn with_window_size(window_size: usize) -> Self {
126 Self {
127 lzss: LzssDecoder::new(window_size),
128 huffman: HuffmanDecoder::new(),
129 vm: RarVM::new(),
130 ppm: None,
131 ppm_coder: None,
132 ppm_esc_char: -1,
133 old_dist: [0; 4],
134 old_dist_ptr: 0,
135 last_dist: 0,
136 last_len: 0,
137 ppm_mode: false,
138 tables_read: false,
139 prev_low_offset: 0,
140 low_offset_repeat_count: 0,
141 next_filter_check: u64::MAX,
142 }
143 }
144
145 #[cfg(test)]
147 pub fn get_output(&self) -> Vec<u8> {
148 self.lzss.output().to_vec()
149 }
150
151 pub fn decompress(&mut self, data: &[u8], unpacked_size: u64) -> Result<Vec<u8>> {
154 let mut reader = BitReader::new(data);
155
156 self.lzss.enable_output(unpacked_size as usize);
158
159 if !self.tables_read {
161 self.read_tables(&mut reader)?;
162 }
163
164 while self.lzss.total_written() < unpacked_size {
166 if reader.is_eof() {
167 break;
168 }
169
170 self.decode_block(&mut reader, unpacked_size)?;
171 }
172
173 let total_written = self.lzss.total_written();
175 let window_mask = self.lzss.window_mask() as usize;
176
177 loop {
179 let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
181 Some((idx, pos)) => (idx, pos),
182 None => break,
183 };
184
185 let flushed = self.lzss.flushed_pos();
187 if flushed < next_pos {
188 self.lzss.flush_to_output(next_pos);
189 }
190
191 let window = self.lzss.window();
192 if let Some((_filter_end, filtered_data)) =
193 self.vm
194 .execute_filter_at_index(filter_idx, window, window_mask, total_written)
195 {
196 self.lzss.write_filtered_to_output(filtered_data, next_pos);
198 } else {
199 break;
200 }
201 }
202
203 self.lzss.flush_to_output(total_written);
205
206 Ok(self.lzss.take_output())
208 }
209
210 fn read_tables(&mut self, reader: &mut BitReader) -> Result<()> {
212 #[cfg(test)]
213 {
214 let byte_pos = reader.bit_position() / 8;
215 eprintln!(
216 "read_tables ENTRY: bit_pos={}, byte_pos={}",
217 reader.bit_position(),
218 byte_pos
219 );
220 eprintln!(" raw bytes at pos: {:02x?}", reader.peek_bytes(8));
221 }
222 reader.align_to_byte();
224 #[cfg(test)]
225 {
226 let byte_pos = reader.bit_position() / 8;
227 eprintln!(
228 "read_tables AFTER align: bit_pos={}, byte_pos={}",
229 reader.bit_position(),
230 byte_pos
231 );
232 eprintln!(" raw bytes at pos: {:02x?}", reader.peek_bytes(8));
233 };
234
235 let ppm_flag = reader.peek_bits(1) != 0;
238
239 self.ppm_mode = ppm_flag;
240
241 if self.ppm_mode {
242 let ppm = self.ppm.get_or_insert_with(PpmModel::new);
245 match ppm.init(reader) {
246 Ok((coder, esc_char)) => {
247 self.ppm_coder = Some(coder);
248 self.ppm_esc_char = esc_char;
249 #[cfg(test)]
250 println!("PPMd initialized: esc_char={}", esc_char);
251 }
252 Err(e) => {
253 #[cfg(test)]
254 println!("PPMd init failed: {}", e);
255 #[cfg(not(test))]
256 let _ = e;
257 return Err(DecompressError::UnsupportedMethod(0x33));
258 }
259 }
260 } else {
261 self.prev_low_offset = 0;
263 self.low_offset_repeat_count = 0;
264
265 let reset_tables = reader.peek_bits(2) & 1 == 0; reader.advance_bits(2);
269
270 if reset_tables {
271 self.huffman.reset_tables();
272 }
273
274 self.huffman.read_tables_after_header(reader)?;
276 }
277
278 self.tables_read = true;
279 Ok(())
280 }
281
282 fn decode_block(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
284 if self.ppm_mode {
285 return self.decode_block_ppm(reader, max_size);
286 }
287
288 if self.huffman.main_table.is_none() || self.huffman.dist_table.is_none() {
290 return Err(DecompressError::InvalidHuffmanCode);
291 }
292
293 while self.lzss.total_written() < max_size && !reader.is_eof() {
294 self.maybe_execute_filters();
296
297 #[cfg(test)]
299 let bit_pos_main_start = reader.bit_position();
300 #[cfg(test)]
301 let peek_bits = reader.peek_bits(16);
302
303 let symbol = unsafe {
305 self.huffman
306 .main_table
307 .as_ref()
308 .unwrap_unchecked()
309 .decode(reader)?
310 };
311
312 #[cfg(test)]
313 {
314 let pos = self.lzss.total_written();
315 if pos >= 1498580 && pos <= 1498610 {
316 let bit_pos_after = reader.bit_position();
317 eprintln!(
318 "MAIN sym={} at pos={}, bits {}->{} peek={:016b}",
319 symbol, pos, bit_pos_main_start, bit_pos_after, peek_bits
320 );
321 }
322 }
323
324 if symbol < 256 {
325 #[cfg(test)]
327 {
328 let pos = self.lzss.total_written();
329 if pos >= 1498595 && pos <= 1498610 {
330 eprintln!("WRITING literal 0x{:02x} at output pos {}", symbol, pos);
331 }
332 }
333 self.lzss.write_literal(symbol as u8);
334 } else if symbol == 256 {
335 #[cfg(test)]
341 eprintln!(
342 "\n=== SYMBOL 256 (end of block) at output pos {}, bit_pos {} ===",
343 self.lzss.total_written(),
344 reader.bit_position()
345 );
346 if !reader.is_eof() {
347 let first_bit = reader.read_bit()?;
348 #[cfg(test)]
349 eprintln!(
350 " first_bit={}, bit_pos after={}",
351 first_bit,
352 reader.bit_position()
353 );
354 if first_bit {
355 self.prev_low_offset = 0;
358 self.low_offset_repeat_count = 0;
359 self.read_tables(reader)?;
361 #[cfg(test)]
362 {
363 eprintln!(
364 "After new tables: bit_pos={}, next 16 bits={:016b}",
365 reader.bit_position(),
366 reader.peek_bits(16)
367 );
368 eprintln!(" About to decode first symbol after table read");
369 }
370 continue;
372 }
373 let _second_bit = reader.read_bit()?; }
377 break;
378 } else if symbol == 257 {
379 #[cfg(test)]
381 eprintln!(
382 "\n=== SYMBOL 257 (VM code) at output pos {} ===",
383 self.lzss.total_written()
384 );
385 self.read_vm_code(reader)?;
386 } else if symbol == 258 {
387 if self.last_len > 0 {
389 #[cfg(test)]
390 {
391 let pos = self.lzss.total_written();
392 let end = pos + self.last_len as u64;
393 if pos <= 1498598 && end > 1498598 {
394 eprintln!(
395 "!!! AT 1498598: symbol 258 repeat, last_dist={}, last_len={}",
396 self.last_dist, self.last_len
397 );
398 }
399 }
400 self.lzss.copy_match(self.last_dist, self.last_len)?;
401 }
402 } else if symbol < 263 {
403 let idx = (symbol - 259) as usize;
405 let distance = self.old_dist[idx];
406
407 let length = self.decode_length_from_table(reader)?;
409
410 #[cfg(test)]
411 {
412 let written = self.lzss.total_written();
413 let end = written + length as u64;
414 if written <= 1498598 && end > 1498598 {
415 eprintln!(
416 "!!! AT 1498598: old idx={},len={},dist={}",
417 idx, length, distance
418 );
419 }
420 }
421
422 self.lzss.copy_match(distance, length)?;
423
424 for i in (1..=idx).rev() {
426 self.old_dist[i] = self.old_dist[i - 1];
427 }
428 self.old_dist[0] = distance;
429 self.last_dist = distance;
430 self.last_len = length;
431 } else if symbol <= 270 {
432 let idx = (symbol - 263) as usize;
434 let base = SHORT_BASES[idx];
435 let bits = SHORT_BITS[idx];
436 let extra = if bits > 0 {
437 reader.read_bits(bits as u32)?
438 } else {
439 0
440 };
441 let distance = base + extra + 1;
442 let length = 2u32;
443
444 #[cfg(test)]
445 {
446 let written = self.lzss.total_written();
447 let end = written + length as u64;
448 if written <= 1498598 && end > 1498598 {
449 eprintln!(
450 "!!! AT 1498598: short sym={}, idx={}, base={}, bits={}, extra={}, dist={}",
451 symbol, idx, base, bits, extra, distance
452 );
453 }
454 }
455
456 self.lzss.copy_match(distance, length)?;
457
458 for i in (1..4).rev() {
460 self.old_dist[i] = self.old_dist[i - 1];
461 }
462 self.old_dist[0] = distance;
463 self.old_dist_ptr = 0;
464 self.last_dist = distance;
465 self.last_len = length;
466 } else {
467 #[cfg(test)]
469 let bit_before_len = reader.bit_position();
470
471 let len_idx = (symbol - 271) as usize;
472 let length = if len_idx < LENGTH_BASE.len() {
473 let base = LENGTH_BASE[len_idx];
474 let extra = LENGTH_EXTRA[len_idx];
475 let extra_val = if extra > 0 {
476 reader.read_bits(extra as u32)?
477 } else {
478 0
479 };
480 #[cfg(test)]
481 {
482 let written = self.lzss.total_written();
483 if written >= 1498595 && written <= 1498602 {
484 let bit_after_len = reader.bit_position();
485 eprintln!(
486 "!!! LONG DECODE at {}: sym={}, len_idx={}, len={}, bits {}->{}]",
487 written,
488 symbol,
489 len_idx,
490 base + extra_val + 3,
491 bit_before_len,
492 bit_after_len
493 );
494 }
495 }
496 base + extra_val + 3 } else {
498 #[cfg(test)]
499 eprintln!(
500 "\nlen_idx {} out of range at written={}",
501 len_idx,
502 self.lzss.total_written()
503 );
504 return Err(DecompressError::InvalidHuffmanCode);
505 };
506
507 let dist_symbol = {
509 #[cfg(test)]
510 let bit_pos_before = reader.bit_position();
511
512 let dist_table = unsafe { self.huffman.dist_table.as_ref().unwrap_unchecked() };
514 match dist_table.decode(reader) {
515 Ok(s) => {
516 #[cfg(test)]
517 {
518 let written = self.lzss.total_written();
519 if written >= 1498595 && written <= 1498610 {
520 let bit_pos_after = reader.bit_position();
521 eprintln!(
522 " dist_symbol={} at pos {} (bits {}->{})",
523 s, written, bit_pos_before, bit_pos_after
524 );
525 }
526 }
527 s
528 }
529 Err(e) => {
530 #[cfg(test)]
531 eprintln!(
532 "\nOffset decode failed at written={}, len={}",
533 self.lzss.total_written(),
534 length
535 );
536 return Err(e);
537 }
538 }
539 };
540
541 let dist_code = dist_symbol as usize;
542 let distance = if dist_code < DIST_BASE.len() {
543 let base = DIST_BASE[dist_code];
544 let extra = DIST_EXTRA[dist_code];
545
546 let extra_val = if extra > 0 {
547 if dist_code > 9 {
548 let high = if extra > 4 {
551 #[cfg(test)]
552 let high_bit_pos = reader.bit_position();
553 let h = reader.read_bits((extra - 4) as u32)?;
554 #[cfg(test)]
555 {
556 let written = self.lzss.total_written();
557 if (written >= 1498595 && written <= 1498610)
558 || (written >= 2176060 && written <= 2176080)
559 {
560 eprintln!(
561 " high bits at {}: {} bits = {} (0b{:016b}), pos {}->{}",
562 written,
563 extra - 4,
564 h, h,
565 high_bit_pos,
566 reader.bit_position()
567 );
568 }
569 }
570 h << 4
571 } else {
572 0
573 };
574 let low = if self.low_offset_repeat_count > 0 {
576 self.low_offset_repeat_count -= 1;
577 #[cfg(test)]
578 {
579 let written = self.lzss.total_written();
580 if written >= 1498550 && written <= 1498610 {
581 eprintln!(
582 "!!! low_offset REPEAT at {}: prev={}",
583 written, self.prev_low_offset
584 );
585 }
586 }
587 self.prev_low_offset
588 } else {
589 #[cfg(test)]
590 let bit_pos_before = reader.bit_position();
591 #[cfg(test)]
592 let raw_bits_16 = reader.peek_bits(16);
593 let low_table = unsafe {
595 self.huffman.low_dist_table.as_ref().unwrap_unchecked()
596 };
597 #[cfg(test)]
598 {
599 let written = self.lzss.total_written();
600 if written == 1498598 {
601 eprintln!(
603 "!!! LOW_TABLE at 1498598 decode_len: {:?}",
604 low_table.dump_decode_len()
605 );
606 eprintln!(
607 "!!! LOW_TABLE at 1498598 symbols: {:?}",
608 low_table.dump_symbols()
609 );
610 }
611 }
612 let sym = low_table.decode(reader)? as u32;
613 #[cfg(test)]
614 {
615 let written = self.lzss.total_written();
616 if written >= 1498550 && written <= 1498610 {
617 let bit_pos_after = reader.bit_position();
618 eprintln!("!!! low_offset at {}: sym={} (bits {}->{}), raw peek = {:016b}",
619 written, sym, bit_pos_before, bit_pos_after, raw_bits_16);
620 }
621 }
622
623 if sym == 16 {
624 self.low_offset_repeat_count = 16 - 1; self.prev_low_offset
628 } else {
629 self.prev_low_offset = sym;
630 sym
631 }
632 };
633 #[cfg(test)]
634 {
635 let written = self.lzss.total_written();
636 if written >= 2176060 && written <= 2176080 {
637 if self.low_offset_repeat_count > 0 {
638 eprintln!(
639 " low_offset REPEAT at {}: prev={}, remaining={}",
640 written,
641 self.prev_low_offset,
642 self.low_offset_repeat_count
643 );
644 } else {
645 eprintln!(" low_offset at {}: dist_code={}, base={}, extra={}, high={}, low={}, dist={}",
646 written, dist_code, base, extra, high, low, base + high + low + 1);
647 }
648 }
649 }
650 high + low
651 } else {
652 reader.read_bits(extra as u32)?
654 }
655 } else {
656 0
657 };
658 base + extra_val + 1
659 } else {
660 #[cfg(test)]
661 eprintln!(
662 "\ndist_code {} out of range at written={}",
663 dist_code,
664 self.lzss.total_written()
665 );
666 return Err(DecompressError::InvalidHuffmanCode);
667 };
668
669 let length = if distance >= 0x2000 {
672 if distance >= 0x40000 {
673 length + 2
674 } else {
675 length + 1
676 }
677 } else {
678 length
679 };
680
681 #[cfg(test)]
682 {
683 let written = self.lzss.total_written();
684 let end = written + length as u64;
685 if written <= 1498598 && end > 1498598 {
686 eprintln!(
687 "!!! AT 1498598: long match dist={}, len={}",
688 distance, length
689 );
690 let src_pos = (written as u32).wrapping_sub(distance) as usize;
692 let _mask = self.lzss.window_mask() as usize;
693 let window = self.lzss.window();
694 eprintln!(
695 " window src[{}..{}]: {:02x?}",
696 src_pos,
697 src_pos + length as usize,
698 &window[src_pos..src_pos + length as usize]
699 );
700 }
701 if written >= 1498595 && written <= 1498602 {
702 eprintln!(
703 "LONG MATCH at {}: dist={}, len={}",
704 written, distance, length
705 );
706 }
707 }
708
709 self.lzss.copy_match(distance, length)?;
710
711 for i in (1..4).rev() {
713 self.old_dist[i] = self.old_dist[i - 1];
714 }
715 self.old_dist[0] = distance;
716 self.old_dist_ptr = 0;
717 self.last_dist = distance;
718 self.last_len = length;
719 }
720 }
721
722 Ok(())
723 }
724
725 fn decode_length_from_table(&mut self, reader: &mut BitReader) -> Result<u32> {
727 let symbol = {
728 let len_table = self
729 .huffman
730 .len_table
731 .as_ref()
732 .ok_or(DecompressError::InvalidHuffmanCode)?;
733 len_table.decode(reader)?
734 };
735
736 let sym = symbol as usize;
737 if sym < LENGTH_BASE.len() {
738 let base = LENGTH_BASE[sym];
739 let extra = LENGTH_EXTRA[sym];
740 let extra_val = if extra > 0 {
741 reader.read_bits(extra as u32)?
742 } else {
743 0
744 };
745 Ok(base + extra_val + 2)
746 } else {
747 Err(DecompressError::InvalidHuffmanCode)
748 }
749 }
750
751 #[cold]
754 fn read_vm_code(&mut self, reader: &mut BitReader) -> Result<()> {
755 #[cfg(test)]
756 let bit_pos_start = reader.bit_position();
757
758 let first_byte = reader.read_bits(8)? as u8;
760
761 let length = {
766 let base = (first_byte & 7) + 1;
767 match base {
768 7 => {
769 let next = reader.read_bits(8)? as u32;
771 next + 7
772 }
773 8 => {
774 reader.read_bits(16)?
776 }
777 _ => base as u32,
778 }
779 };
780
781 #[cfg(test)]
782 eprintln!(
783 " read_vm_code: first_byte=0x{:02x}, length={}, bit_pos_start={}",
784 first_byte, length, bit_pos_start
785 );
786
787 if length == 0 {
788 return Ok(());
789 }
790
791 let mut vm_code = vec![0u8; length as usize];
793 for i in 0..length as usize {
794 vm_code[i] = reader.read_bits(8)? as u8;
795 }
796
797 #[cfg(test)]
798 eprintln!(" vm_code end bit_pos={}", reader.bit_position());
799
800 let total_written = self.lzss.total_written();
802 let window_mask = self.lzss.window_mask();
803
804 #[cfg(test)]
805 eprintln!(
806 " add_code: total_written={}, window_mask={:x}",
807 total_written, window_mask
808 );
809
810 #[cfg(test)]
811 {
812 let had_pending_before = self.vm.has_pending_filters();
813 let result = self
814 .vm
815 .add_code(first_byte, &vm_code, total_written, window_mask);
816 let has_pending_after = self.vm.has_pending_filters();
817 if let Some(next_pos) = self.vm.next_filter_pos() {
818 eprintln!(
819 " vm.add_code: added={}, pending={}->{}, next_pos={}",
820 result, had_pending_before, has_pending_after, next_pos
821 );
822 } else {
823 eprintln!(
824 " vm.add_code: added={}, pending={}->{}, next_pos=NONE",
825 result, had_pending_before, has_pending_after
826 );
827 }
828 }
829 #[cfg(not(test))]
830 self.vm
831 .add_code(first_byte, &vm_code, total_written, window_mask);
832
833 if let Some(end) = self.vm.next_filter_end() {
835 self.next_filter_check = self.next_filter_check.min(end);
836 }
837
838 Ok(())
839 }
840
841 #[inline]
844 fn maybe_execute_filters(&mut self) {
845 let total_written = self.lzss.total_written();
846
847 if total_written < self.next_filter_check {
849 return;
850 }
851
852 let window_mask = self.lzss.window_mask() as usize;
853
854 loop {
856 let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
858 Some((idx, pos)) => (idx, pos),
859 None => break,
860 };
861
862 let flushed = self.lzss.flushed_pos();
864 if flushed < next_pos {
865 self.lzss.flush_to_output(next_pos);
866 }
867
868 let window = self.lzss.window();
870 if let Some((filter_end, filtered_data)) =
871 self.vm
872 .execute_filter_at_index(filter_idx, window, window_mask, total_written)
873 {
874 self.lzss.write_filtered_to_output(filtered_data, next_pos);
876 self.next_filter_check = filter_end;
878 } else {
879 break;
880 }
881 }
882
883 self.next_filter_check = self.vm.next_filter_end().unwrap_or(u64::MAX);
885 }
886
887 fn decode_block_ppm(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
889 let ppm = self
890 .ppm
891 .as_mut()
892 .ok_or(DecompressError::UnsupportedMethod(0x33))?;
893 let coder = self
894 .ppm_coder
895 .as_mut()
896 .ok_or(DecompressError::UnsupportedMethod(0x33))?;
897 let esc_char = self.ppm_esc_char;
898
899 while self.lzss.total_written() < max_size && !reader.is_eof() {
900 let ch = ppm.decode_char(coder, reader).map_err(|e| {
901 #[cfg(test)]
902 eprintln!(
903 "PPM decode_char failed at pos {}: {}",
904 self.lzss.total_written(),
905 e
906 );
907 #[cfg(not(test))]
908 let _ = e;
909 DecompressError::InvalidHuffmanCode
910 })?;
911
912 if ch < 0 {
913 #[cfg(test)]
915 eprintln!("PPM decode_char returned negative: {}", ch);
916 return Err(DecompressError::InvalidHuffmanCode);
917 }
918
919 #[cfg(test)]
920 {
921 if self.lzss.total_written() < 20 {
922 eprint!("[{}:{}] ", self.lzss.total_written(), ch);
923 }
924 }
925
926 if ch != esc_char {
927 self.lzss.write_literal(ch as u8);
929 } else {
930 let ctrl = ppm
932 .decode_char(coder, reader)
933 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
934
935 if ctrl < 0 {
936 return Err(DecompressError::InvalidHuffmanCode);
937 }
938
939 match ctrl {
940 0 => {
941 break;
943 }
944 1 => {
945 self.lzss.write_literal(esc_char as u8);
947 }
948 2 => {
949 break;
951 }
952 3 => {
953 let first_byte = ppm
955 .decode_char(coder, reader)
956 .map_err(|_| DecompressError::InvalidHuffmanCode)?
957 as u8;
958
959 let mut length = ((first_byte & 7) + 1) as u32;
961 if length == 7 {
962 let b1 = ppm
963 .decode_char(coder, reader)
964 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
965 length = (b1 as u32) + 7;
966 } else if length == 8 {
967 let b1 = ppm
968 .decode_char(coder, reader)
969 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
970 let b2 = ppm
971 .decode_char(coder, reader)
972 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
973 length = (b1 as u32) * 256 + (b2 as u32);
974 }
975
976 if length == 0 {
977 continue;
978 }
979
980 let mut vm_code = vec![0u8; length as usize];
982 for i in 0..length as usize {
983 let ch = ppm
984 .decode_char(coder, reader)
985 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
986 vm_code[i] = ch as u8;
987 }
988
989 let total_written = self.lzss.total_written();
991 let window_mask = self.lzss.window_mask();
992 self.vm
993 .add_code(first_byte, &vm_code, total_written, window_mask);
994
995 if let Some(end) = self.vm.next_filter_end() {
997 self.next_filter_check = self.next_filter_check.min(end);
998 }
999 }
1000 4 => {
1001 let mut distance: u32 = 0;
1003 for _ in 0..3 {
1004 let ch = ppm
1005 .decode_char(coder, reader)
1006 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1007 distance = (distance << 8) + (ch as u32);
1008 }
1009 let len = ppm
1010 .decode_char(coder, reader)
1011 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1012
1013 let distance = distance + 2;
1015 let length = (len as u32) + 32;
1016
1017 self.lzss.copy_match(distance, length)?;
1018 self.last_dist = distance;
1019 self.last_len = length;
1020 }
1021 5 => {
1022 let len = ppm
1024 .decode_char(coder, reader)
1025 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1026
1027 let length = (len as u32) + 4;
1029
1030 self.lzss.copy_match(1, length)?;
1031 self.last_dist = 1;
1032 self.last_len = length;
1033 }
1034 _ => {
1035 #[cfg(test)]
1037 eprintln!("Unknown PPM control code: {}", ctrl);
1038 return Err(DecompressError::InvalidHuffmanCode);
1039 }
1040 }
1041 }
1042 }
1043
1044 Ok(())
1045 }
1046
1047 pub fn reset(&mut self) {
1049 self.lzss.reset();
1050 self.vm.reset();
1051 self.ppm_coder = None;
1053 self.ppm_esc_char = -1;
1054 self.old_dist = [0; 4];
1055 self.old_dist_ptr = 0;
1056 self.last_dist = 0;
1057 self.last_len = 0;
1058 self.ppm_mode = false;
1059 self.tables_read = false;
1060 self.prev_low_offset = 0;
1061 self.low_offset_repeat_count = 0;
1062 self.next_filter_check = u64::MAX;
1063 }
1064
1065 pub fn bytes_written(&self) -> u64 {
1067 self.lzss.total_written()
1068 }
1069}
1070
1071impl Default for Rar29Decoder {
1072 fn default() -> Self {
1073 Self::new()
1074 }
1075}
1076
1077#[allow(dead_code)]
1081pub struct Rar29StreamDecoder {
1082 decoder: Rar29Decoder,
1083 input_buffer: Vec<u8>,
1085 input_pos: usize,
1087 unpacked_size: u64,
1089}
1090
1091#[allow(dead_code)]
1092impl Rar29StreamDecoder {
1093 pub fn new(unpacked_size: u64) -> Self {
1095 Self {
1096 decoder: Rar29Decoder::new(),
1097 input_buffer: Vec::new(),
1098 input_pos: 0,
1099 unpacked_size,
1100 }
1101 }
1102
1103 pub fn feed(&mut self, data: &[u8]) -> Result<Vec<u8>> {
1106 self.input_buffer.extend_from_slice(data);
1107
1108 let result = self
1110 .decoder
1111 .decompress(&self.input_buffer[self.input_pos..], self.unpacked_size)?;
1112
1113 Ok(result)
1114 }
1115
1116 pub fn is_complete(&self) -> bool {
1118 self.decoder.bytes_written() >= self.unpacked_size
1119 }
1120
1121 pub fn bytes_written(&self) -> u64 {
1123 self.decoder.bytes_written()
1124 }
1125}
1126
1127#[cfg(test)]
1128mod tests {
1129 use super::*;
1130
1131 #[test]
1132 fn test_decoder_creation() {
1133 let decoder = Rar29Decoder::new();
1134 assert_eq!(decoder.bytes_written(), 0);
1135 assert!(!decoder.tables_read);
1136 }
1137
1138 }