1#![cfg_attr(test, allow(clippy::logic_bug))]
8
9use super::{
10 bit_reader::BitReader,
11 huffman::HuffmanDecoder,
12 lzss::LzssDecoder,
13 ppm::{PpmModel, RangeCoder},
14 vm::RarVM,
15 DecompressError, Result,
16};
17
18const MAIN_CODES: usize = 299;
20
21const DIST_CODES: usize = 60;
23
24const LOW_DIST_CODES: usize = 17;
26
27const LEN_CODES: usize = 28;
29
30const MAX_MATCH_LEN: u32 = 258;
32
33const SHORT_BASES: [u32; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
35
36const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
38
39const LENGTH_BASE: [u32; 28] = [
41 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
42 160, 192, 224,
43];
44
45const LENGTH_EXTRA: [u8; 28] = [
47 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
48];
49
50const DIST_BASE: [u32; 60] = [
52 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
53 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
54 262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
55 1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872,
56 3670016, 3932160,
57];
58
59const DIST_EXTRA: [u8; 60] = [
61 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
62 13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18,
63 18, 18, 18, 18, 18, 18, 18,
64];
65
66pub struct Rar29Decoder {
68 lzss: LzssDecoder,
70 huffman: HuffmanDecoder,
72 vm: RarVM,
74 ppm: Option<PpmModel>,
76 ppm_coder: Option<RangeCoder>,
78 ppm_esc_char: i32,
80 old_dist: [u32; 4],
82 old_dist_ptr: usize,
84 last_dist: u32,
86 last_len: u32,
88 ppm_mode: bool,
90 tables_read: bool,
92 prev_low_offset: u32,
94 low_offset_repeat_count: u32,
96 next_filter_check: u64,
98}
99
100impl Rar29Decoder {
101 pub fn new() -> Self {
103 Self::with_window_size(0x400000) }
105
106 pub fn with_window_size(window_size: usize) -> Self {
109 Self {
110 lzss: LzssDecoder::new(window_size),
111 huffman: HuffmanDecoder::new(),
112 vm: RarVM::new(),
113 ppm: None,
114 ppm_coder: None,
115 ppm_esc_char: -1,
116 old_dist: [0; 4],
117 old_dist_ptr: 0,
118 last_dist: 0,
119 last_len: 0,
120 ppm_mode: false,
121 tables_read: false,
122 prev_low_offset: 0,
123 low_offset_repeat_count: 0,
124 next_filter_check: u64::MAX,
125 }
126 }
127
128 #[cfg(test)]
130 pub fn get_output(&self) -> Vec<u8> {
131 self.lzss.output().to_vec()
132 }
133
134 pub fn decompress(&mut self, data: &[u8], unpacked_size: u64) -> Result<Vec<u8>> {
137 let mut reader = BitReader::new(data);
138
139 self.lzss.enable_output(unpacked_size as usize);
141
142 if !self.tables_read {
144 self.read_tables(&mut reader)?;
145 }
146
147 while self.lzss.total_written() < unpacked_size {
149 if reader.is_eof() {
150 break;
151 }
152
153 self.decode_block(&mut reader, unpacked_size)?;
154 }
155
156 let total_written = self.lzss.total_written();
158 let window_mask = self.lzss.window_mask() as usize;
159
160 loop {
162 let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
164 Some((idx, pos)) => (idx, pos),
165 None => break,
166 };
167
168 let flushed = self.lzss.flushed_pos();
170 if flushed < next_pos {
171 self.lzss.flush_to_output(next_pos);
172 }
173
174 let window = self.lzss.window();
175 if let Some((_filter_end, filtered_data)) =
176 self.vm
177 .execute_filter_at_index(filter_idx, window, window_mask, total_written)
178 {
179 self.lzss.write_filtered_to_output(&filtered_data, next_pos);
181 } else {
182 break;
183 }
184 }
185
186 self.lzss.flush_to_output(total_written);
188
189 Ok(self.lzss.take_output())
191 }
192
193 fn read_tables(&mut self, reader: &mut BitReader) -> Result<()> {
195 #[cfg(test)]
196 {
197 let byte_pos = reader.bit_position() / 8;
198 eprintln!(
199 "read_tables ENTRY: bit_pos={}, byte_pos={}",
200 reader.bit_position(),
201 byte_pos
202 );
203 eprintln!(" raw bytes at pos: {:02x?}", reader.peek_bytes(8));
204 }
205 reader.align_to_byte();
207 #[cfg(test)]
208 {
209 let byte_pos = reader.bit_position() / 8;
210 eprintln!(
211 "read_tables AFTER align: bit_pos={}, byte_pos={}",
212 reader.bit_position(),
213 byte_pos
214 );
215 eprintln!(" raw bytes at pos: {:02x?}", reader.peek_bytes(8));
216 };
217
218 let ppm_flag = reader.peek_bits(1) != 0;
221
222 self.ppm_mode = ppm_flag;
223
224 if self.ppm_mode {
225 let ppm = self.ppm.get_or_insert_with(PpmModel::new);
228 match ppm.init(reader) {
229 Ok((coder, esc_char)) => {
230 self.ppm_coder = Some(coder);
231 self.ppm_esc_char = esc_char;
232 #[cfg(test)]
233 println!("PPMd initialized: esc_char={}", esc_char);
234 }
235 Err(e) => {
236 #[cfg(test)]
237 println!("PPMd init failed: {}", e);
238 #[cfg(not(test))]
239 let _ = e;
240 return Err(DecompressError::UnsupportedMethod(0x33));
241 }
242 }
243 } else {
244 self.prev_low_offset = 0;
246 self.low_offset_repeat_count = 0;
247
248 let reset_tables = reader.peek_bits(2) & 1 == 0; reader.advance_bits(2);
252
253 if reset_tables {
254 self.huffman.reset_tables();
255 }
256
257 self.huffman.read_tables_after_header(reader)?;
259 }
260
261 self.tables_read = true;
262 Ok(())
263 }
264
265 fn decode_block(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
267 if self.ppm_mode {
268 return self.decode_block_ppm(reader, max_size);
269 }
270
271 if self.huffman.main_table.is_none() || self.huffman.dist_table.is_none() {
273 return Err(DecompressError::InvalidHuffmanCode);
274 }
275
276 #[cfg(test)]
277 let mut symbol_count = 0;
278
279 while self.lzss.total_written() < max_size && !reader.is_eof() {
280 self.maybe_execute_filters();
282
283 #[cfg(test)]
285 let bit_pos_main_start = reader.bit_position();
286 #[cfg(test)]
287 let peek_bits = reader.peek_bits(16);
288
289 let symbol = unsafe {
291 self.huffman
292 .main_table
293 .as_ref()
294 .unwrap_unchecked()
295 .decode(reader)?
296 };
297
298 #[cfg(test)]
299 {
300 let pos = self.lzss.total_written();
301 if pos >= 1498580 && pos <= 1498610 {
302 let bit_pos_after = reader.bit_position();
303 eprintln!(
304 "MAIN sym={} at pos={}, bits {}->{} peek={:016b}",
305 symbol, pos, bit_pos_main_start, bit_pos_after, peek_bits
306 );
307 }
308 }
309
310 if symbol < 256 {
311 #[cfg(test)]
313 {
314 let pos = self.lzss.total_written();
315 if pos >= 1498595 && pos <= 1498610 {
316 eprintln!("WRITING literal 0x{:02x} at output pos {}", symbol, pos);
317 }
318 }
319 self.lzss.write_literal(symbol as u8);
320 } else if symbol == 256 {
321 #[cfg(test)]
327 eprintln!(
328 "\n=== SYMBOL 256 (end of block) at output pos {}, bit_pos {} ===",
329 self.lzss.total_written(),
330 reader.bit_position()
331 );
332 if !reader.is_eof() {
333 let first_bit = reader.read_bit()?;
334 #[cfg(test)]
335 eprintln!(
336 " first_bit={}, bit_pos after={}",
337 first_bit,
338 reader.bit_position()
339 );
340 if first_bit {
341 self.prev_low_offset = 0;
344 self.low_offset_repeat_count = 0;
345 self.read_tables(reader)?;
347 #[cfg(test)]
348 {
349 eprintln!(
350 "After new tables: bit_pos={}, next 16 bits={:016b}",
351 reader.bit_position(),
352 reader.peek_bits(16)
353 );
354 eprintln!(" About to decode first symbol after table read");
355 }
356 continue;
358 }
359 let _second_bit = reader.read_bit()?; }
363 break;
364 } else if symbol == 257 {
365 #[cfg(test)]
367 eprintln!(
368 "\n=== SYMBOL 257 (VM code) at output pos {} ===",
369 self.lzss.total_written()
370 );
371 self.read_vm_code(reader)?;
372 } else if symbol == 258 {
373 if self.last_len > 0 {
375 #[cfg(test)]
376 {
377 let pos = self.lzss.total_written();
378 let end = pos + self.last_len as u64;
379 if pos <= 1498598 && end > 1498598 {
380 eprintln!(
381 "!!! AT 1498598: symbol 258 repeat, last_dist={}, last_len={}",
382 self.last_dist, self.last_len
383 );
384 }
385 }
386 self.lzss.copy_match(self.last_dist, self.last_len)?;
387 }
388 } else if symbol < 263 {
389 let idx = (symbol - 259) as usize;
391 let distance = self.old_dist[idx];
392
393 let length = self.decode_length_from_table(reader)?;
395
396 #[cfg(test)]
397 {
398 let written = self.lzss.total_written();
399 let end = written + length as u64;
400 if written <= 1498598 && end > 1498598 {
401 eprintln!(
402 "!!! AT 1498598: old idx={},len={},dist={}",
403 idx, length, distance
404 );
405 }
406 }
407
408 self.lzss.copy_match(distance, length)?;
409
410 for i in (1..=idx).rev() {
412 self.old_dist[i] = self.old_dist[i - 1];
413 }
414 self.old_dist[0] = distance;
415 self.last_dist = distance;
416 self.last_len = length;
417 } else if symbol <= 270 {
418 let idx = (symbol - 263) as usize;
420 let base = SHORT_BASES[idx];
421 let bits = SHORT_BITS[idx];
422 let extra = if bits > 0 {
423 reader.read_bits(bits as u32)?
424 } else {
425 0
426 };
427 let distance = base + extra + 1;
428 let length = 2u32;
429
430 #[cfg(test)]
431 {
432 let written = self.lzss.total_written();
433 let end = written + length as u64;
434 if written <= 1498598 && end > 1498598 {
435 eprintln!(
436 "!!! AT 1498598: short sym={}, idx={}, base={}, bits={}, extra={}, dist={}",
437 symbol, idx, base, bits, extra, distance
438 );
439 }
440 }
441
442 self.lzss.copy_match(distance, length)?;
443
444 for i in (1..4).rev() {
446 self.old_dist[i] = self.old_dist[i - 1];
447 }
448 self.old_dist[0] = distance;
449 self.old_dist_ptr = 0;
450 self.last_dist = distance;
451 self.last_len = length;
452 } else {
453 #[cfg(test)]
455 let bit_before_len = reader.bit_position();
456
457 let len_idx = (symbol - 271) as usize;
458 let length = if len_idx < LENGTH_BASE.len() {
459 let base = LENGTH_BASE[len_idx];
460 let extra = LENGTH_EXTRA[len_idx];
461 let extra_val = if extra > 0 {
462 reader.read_bits(extra as u32)?
463 } else {
464 0
465 };
466 #[cfg(test)]
467 {
468 let written = self.lzss.total_written();
469 if written >= 1498595 && written <= 1498602 {
470 let bit_after_len = reader.bit_position();
471 eprintln!(
472 "!!! LONG DECODE at {}: sym={}, len_idx={}, len={}, bits {}->{}]",
473 written,
474 symbol,
475 len_idx,
476 base + extra_val + 3,
477 bit_before_len,
478 bit_after_len
479 );
480 }
481 }
482 base + extra_val + 3 } else {
484 #[cfg(test)]
485 eprintln!(
486 "\nlen_idx {} out of range at written={}",
487 len_idx,
488 self.lzss.total_written()
489 );
490 return Err(DecompressError::InvalidHuffmanCode);
491 };
492
493 let dist_symbol = {
495 #[cfg(test)]
496 let bit_pos_before = reader.bit_position();
497
498 let dist_table = unsafe { self.huffman.dist_table.as_ref().unwrap_unchecked() };
500 match dist_table.decode(reader) {
501 Ok(s) => {
502 #[cfg(test)]
503 {
504 let written = self.lzss.total_written();
505 if written >= 1498595 && written <= 1498610 {
506 let bit_pos_after = reader.bit_position();
507 eprintln!(
508 " dist_symbol={} at pos {} (bits {}->{})",
509 s, written, bit_pos_before, bit_pos_after
510 );
511 }
512 }
513 s
514 }
515 Err(e) => {
516 #[cfg(test)]
517 eprintln!(
518 "\nOffset decode failed at written={}, len={}",
519 self.lzss.total_written(),
520 length
521 );
522 return Err(e);
523 }
524 }
525 };
526
527 let dist_code = dist_symbol as usize;
528 let distance = if dist_code < DIST_BASE.len() {
529 let base = DIST_BASE[dist_code];
530 let extra = DIST_EXTRA[dist_code];
531
532 let extra_val = if extra > 0 {
533 if dist_code > 9 {
534 let high = if extra > 4 {
537 #[cfg(test)]
538 let high_bit_pos = reader.bit_position();
539 let h = reader.read_bits((extra - 4) as u32)?;
540 #[cfg(test)]
541 {
542 let written = self.lzss.total_written();
543 if (written >= 1498595 && written <= 1498610)
544 || (written >= 2176060 && written <= 2176080)
545 {
546 eprintln!(
547 " high bits at {}: {} bits = {} (0b{:016b}), pos {}->{}",
548 written,
549 extra - 4,
550 h, h,
551 high_bit_pos,
552 reader.bit_position()
553 );
554 }
555 }
556 h << 4
557 } else {
558 0
559 };
560 let low = if self.low_offset_repeat_count > 0 {
562 self.low_offset_repeat_count -= 1;
563 #[cfg(test)]
564 {
565 let written = self.lzss.total_written();
566 if written >= 1498550 && written <= 1498610 {
567 eprintln!(
568 "!!! low_offset REPEAT at {}: prev={}",
569 written, self.prev_low_offset
570 );
571 }
572 }
573 self.prev_low_offset
574 } else {
575 #[cfg(test)]
576 let bit_pos_before = reader.bit_position();
577 #[cfg(test)]
578 let raw_bits_16 = reader.peek_bits(16);
579 let low_table = unsafe {
581 self.huffman.low_dist_table.as_ref().unwrap_unchecked()
582 };
583 #[cfg(test)]
584 {
585 let written = self.lzss.total_written();
586 if written == 1498598 {
587 eprintln!(
589 "!!! LOW_TABLE at 1498598 decode_len: {:?}",
590 low_table.dump_decode_len()
591 );
592 eprintln!(
593 "!!! LOW_TABLE at 1498598 symbols: {:?}",
594 low_table.dump_symbols()
595 );
596 }
597 }
598 let sym = low_table.decode(reader)? as u32;
599 #[cfg(test)]
600 {
601 let written = self.lzss.total_written();
602 if written >= 1498550 && written <= 1498610 {
603 let bit_pos_after = reader.bit_position();
604 eprintln!("!!! low_offset at {}: sym={} (bits {}->{}), raw peek = {:016b}",
605 written, sym, bit_pos_before, bit_pos_after, raw_bits_16);
606 }
607 }
608
609 if sym == 16 {
610 self.low_offset_repeat_count = 16 - 1; self.prev_low_offset
614 } else {
615 self.prev_low_offset = sym;
616 sym
617 }
618 };
619 #[cfg(test)]
620 {
621 let written = self.lzss.total_written();
622 if written >= 2176060 && written <= 2176080 {
623 if self.low_offset_repeat_count > 0 {
624 eprintln!(
625 " low_offset REPEAT at {}: prev={}, remaining={}",
626 written,
627 self.prev_low_offset,
628 self.low_offset_repeat_count
629 );
630 } else {
631 eprintln!(" low_offset at {}: dist_code={}, base={}, extra={}, high={}, low={}, dist={}",
632 written, dist_code, base, extra, high, low, base + high + low + 1);
633 }
634 }
635 }
636 high + low
637 } else {
638 #[cfg(test)]
640 let peek = reader.peek_bits(extra as u32);
641 let val = reader.read_bits(extra as u32)?;
642 #[cfg(test)]
643 {
644 let written = self.lzss.total_written();
645 if written >= 0 && written < 0 {
646 eprintln!(" direct: dist_code={}, base={}, extra_bits={}, peek={:04b}, extra_val={}, distance={}",
647 dist_code, base, extra, peek, val, base + val + 1);
648 }
649 }
650 val
651 }
652 } else {
653 0
654 };
655 base + extra_val + 1
656 } else {
657 #[cfg(test)]
658 eprintln!(
659 "\ndist_code {} out of range at written={}",
660 dist_code,
661 self.lzss.total_written()
662 );
663 return Err(DecompressError::InvalidHuffmanCode);
664 };
665
666 let length = if distance >= 0x2000 {
669 if distance >= 0x40000 {
670 length + 2
671 } else {
672 length + 1
673 }
674 } else {
675 length
676 };
677
678 #[cfg(test)]
679 {
680 let written = self.lzss.total_written();
681 let end = written + length as u64;
682 if written <= 1498598 && end > 1498598 {
683 eprintln!(
684 "!!! AT 1498598: long match dist={}, len={}",
685 distance, length
686 );
687 let src_pos = (written as u32).wrapping_sub(distance) as usize;
689 let mask = self.lzss.window_mask() as usize;
690 let window = self.lzss.window();
691 eprintln!(
692 " window src[{}..{}]: {:02x?}",
693 src_pos,
694 src_pos + length as usize,
695 &window[src_pos..src_pos + length as usize]
696 );
697 }
698 if written >= 1498595 && written <= 1498602 {
699 eprintln!(
700 "LONG MATCH at {}: dist={}, len={}",
701 written, distance, length
702 );
703 }
704 }
705
706 self.lzss.copy_match(distance, length)?;
707
708 for i in (1..4).rev() {
710 self.old_dist[i] = self.old_dist[i - 1];
711 }
712 self.old_dist[0] = distance;
713 self.old_dist_ptr = 0;
714 self.last_dist = distance;
715 self.last_len = length;
716 }
717 }
718
719 Ok(())
720 }
721
722 fn decode_length_from_table(&mut self, reader: &mut BitReader) -> Result<u32> {
724 let symbol = {
725 let len_table = self
726 .huffman
727 .len_table
728 .as_ref()
729 .ok_or(DecompressError::InvalidHuffmanCode)?;
730 len_table.decode(reader)?
731 };
732
733 let sym = symbol as usize;
734 if sym < LENGTH_BASE.len() {
735 let base = LENGTH_BASE[sym];
736 let extra = LENGTH_EXTRA[sym];
737 let extra_val = if extra > 0 {
738 reader.read_bits(extra as u32)?
739 } else {
740 0
741 };
742 Ok(base + extra_val + 2)
743 } else {
744 Err(DecompressError::InvalidHuffmanCode)
745 }
746 }
747
748 fn read_vm_code(&mut self, reader: &mut BitReader) -> Result<()> {
751 #[cfg(test)]
752 let bit_pos_start = reader.bit_position();
753
754 let first_byte = reader.read_bits(8)? as u8;
756
757 let length = {
762 let base = (first_byte & 7) + 1;
763 match base {
764 7 => {
765 let next = reader.read_bits(8)? as u32;
767 next + 7
768 }
769 8 => {
770 reader.read_bits(16)?
772 }
773 _ => base as u32,
774 }
775 };
776
777 #[cfg(test)]
778 eprintln!(
779 " read_vm_code: first_byte=0x{:02x}, length={}, bit_pos_start={}",
780 first_byte, length, bit_pos_start
781 );
782
783 if length == 0 {
784 return Ok(());
785 }
786
787 let mut vm_code = vec![0u8; length as usize];
789 for i in 0..length as usize {
790 vm_code[i] = reader.read_bits(8)? as u8;
791 }
792
793 #[cfg(test)]
794 eprintln!(" vm_code end bit_pos={}", reader.bit_position());
795
796 let total_written = self.lzss.total_written();
798 let window_mask = self.lzss.window_mask();
799
800 #[cfg(test)]
801 eprintln!(
802 " add_code: total_written={}, window_mask={:x}",
803 total_written, window_mask
804 );
805
806 #[cfg(test)]
807 {
808 let had_pending_before = self.vm.has_pending_filters();
809 let result = self
810 .vm
811 .add_code(first_byte, &vm_code, total_written, window_mask);
812 let has_pending_after = self.vm.has_pending_filters();
813 if let Some(next_pos) = self.vm.next_filter_pos() {
814 eprintln!(
815 " vm.add_code: added={}, pending={}->{}, next_pos={}",
816 result, had_pending_before, has_pending_after, next_pos
817 );
818 } else {
819 eprintln!(
820 " vm.add_code: added={}, pending={}->{}, next_pos=NONE",
821 result, had_pending_before, has_pending_after
822 );
823 }
824 }
825 #[cfg(not(test))]
826 self.vm
827 .add_code(first_byte, &vm_code, total_written, window_mask);
828
829 if let Some(end) = self.vm.next_filter_end() {
831 self.next_filter_check = self.next_filter_check.min(end);
832 }
833
834 Ok(())
835 }
836
837 fn maybe_execute_filters(&mut self) {
840 let total_written = self.lzss.total_written();
841
842 if total_written < self.next_filter_check {
844 return;
845 }
846
847 let window_mask = self.lzss.window_mask() as usize;
848
849 loop {
851 let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
853 Some((idx, pos)) => (idx, pos),
854 None => break,
855 };
856
857 let flushed = self.lzss.flushed_pos();
859 if flushed < next_pos {
860 self.lzss.flush_to_output(next_pos);
861 }
862
863 let window = self.lzss.window();
865 if let Some((filter_end, filtered_data)) =
866 self.vm
867 .execute_filter_at_index(filter_idx, window, window_mask, total_written)
868 {
869 self.lzss.write_filtered_to_output(&filtered_data, next_pos);
871 self.next_filter_check = filter_end;
873 } else {
874 break;
875 }
876 }
877
878 self.next_filter_check = self.vm.next_filter_end().unwrap_or(u64::MAX);
880 }
881
882 fn decode_block_ppm(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
884 let ppm = self
885 .ppm
886 .as_mut()
887 .ok_or(DecompressError::UnsupportedMethod(0x33))?;
888 let coder = self
889 .ppm_coder
890 .as_mut()
891 .ok_or(DecompressError::UnsupportedMethod(0x33))?;
892 let esc_char = self.ppm_esc_char;
893
894 while self.lzss.total_written() < max_size && !reader.is_eof() {
895 let ch = ppm.decode_char(coder, reader).map_err(|e| {
896 #[cfg(test)]
897 eprintln!(
898 "PPM decode_char failed at pos {}: {}",
899 self.lzss.total_written(),
900 e
901 );
902 #[cfg(not(test))]
903 let _ = e;
904 DecompressError::InvalidHuffmanCode
905 })?;
906
907 if ch < 0 {
908 #[cfg(test)]
910 eprintln!("PPM decode_char returned negative: {}", ch);
911 return Err(DecompressError::InvalidHuffmanCode);
912 }
913
914 #[cfg(test)]
915 {
916 if self.lzss.total_written() < 20 {
917 eprint!("[{}:{}] ", self.lzss.total_written(), ch);
918 }
919 }
920
921 if ch != esc_char {
922 self.lzss.write_literal(ch as u8);
924 } else {
925 let ctrl = ppm
927 .decode_char(coder, reader)
928 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
929
930 if ctrl < 0 {
931 return Err(DecompressError::InvalidHuffmanCode);
932 }
933
934 match ctrl {
935 0 => {
936 break;
938 }
939 1 => {
940 self.lzss.write_literal(esc_char as u8);
942 }
943 2 => {
944 break;
946 }
947 3 => {
948 let first_byte = ppm
950 .decode_char(coder, reader)
951 .map_err(|_| DecompressError::InvalidHuffmanCode)?
952 as u8;
953
954 let mut length = ((first_byte & 7) + 1) as u32;
956 if length == 7 {
957 let b1 = ppm
958 .decode_char(coder, reader)
959 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
960 length = (b1 as u32) + 7;
961 } else if length == 8 {
962 let b1 = ppm
963 .decode_char(coder, reader)
964 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
965 let b2 = ppm
966 .decode_char(coder, reader)
967 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
968 length = (b1 as u32) * 256 + (b2 as u32);
969 }
970
971 if length == 0 {
972 continue;
973 }
974
975 let mut vm_code = vec![0u8; length as usize];
977 for i in 0..length as usize {
978 let ch = ppm
979 .decode_char(coder, reader)
980 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
981 vm_code[i] = ch as u8;
982 }
983
984 let total_written = self.lzss.total_written();
986 let window_mask = self.lzss.window_mask();
987 self.vm
988 .add_code(first_byte, &vm_code, total_written, window_mask);
989
990 if let Some(end) = self.vm.next_filter_end() {
992 self.next_filter_check = self.next_filter_check.min(end);
993 }
994 }
995 4 => {
996 let mut distance: u32 = 0;
998 for _ in 0..3 {
999 let ch = ppm
1000 .decode_char(coder, reader)
1001 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1002 distance = (distance << 8) + (ch as u32);
1003 }
1004 let len = ppm
1005 .decode_char(coder, reader)
1006 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1007
1008 let distance = distance + 2;
1010 let length = (len as u32) + 32;
1011
1012 self.lzss.copy_match(distance, length)?;
1013 self.last_dist = distance;
1014 self.last_len = length;
1015 }
1016 5 => {
1017 let len = ppm
1019 .decode_char(coder, reader)
1020 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1021
1022 let length = (len as u32) + 4;
1024
1025 self.lzss.copy_match(1, length)?;
1026 self.last_dist = 1;
1027 self.last_len = length;
1028 }
1029 _ => {
1030 #[cfg(test)]
1032 eprintln!("Unknown PPM control code: {}", ctrl);
1033 return Err(DecompressError::InvalidHuffmanCode);
1034 }
1035 }
1036 }
1037 }
1038
1039 Ok(())
1040 }
1041
1042 pub fn reset(&mut self) {
1044 self.lzss.reset();
1045 self.vm.reset();
1046 self.ppm_coder = None;
1048 self.ppm_esc_char = -1;
1049 self.old_dist = [0; 4];
1050 self.old_dist_ptr = 0;
1051 self.last_dist = 0;
1052 self.last_len = 0;
1053 self.ppm_mode = false;
1054 self.tables_read = false;
1055 self.prev_low_offset = 0;
1056 self.low_offset_repeat_count = 0;
1057 self.next_filter_check = u64::MAX;
1058 }
1059
1060 pub fn bytes_written(&self) -> u64 {
1062 self.lzss.total_written()
1063 }
1064}
1065
1066impl Default for Rar29Decoder {
1067 fn default() -> Self {
1068 Self::new()
1069 }
1070}
1071
1072pub struct Rar29StreamDecoder {
1075 decoder: Rar29Decoder,
1076 input_buffer: Vec<u8>,
1078 input_pos: usize,
1080 unpacked_size: u64,
1082}
1083
1084impl Rar29StreamDecoder {
1085 pub fn new(unpacked_size: u64) -> Self {
1087 Self {
1088 decoder: Rar29Decoder::new(),
1089 input_buffer: Vec::new(),
1090 input_pos: 0,
1091 unpacked_size,
1092 }
1093 }
1094
1095 pub fn feed(&mut self, data: &[u8]) -> Result<Vec<u8>> {
1098 self.input_buffer.extend_from_slice(data);
1099
1100 let result = self
1102 .decoder
1103 .decompress(&self.input_buffer[self.input_pos..], self.unpacked_size)?;
1104
1105 Ok(result)
1106 }
1107
1108 pub fn is_complete(&self) -> bool {
1110 self.decoder.bytes_written() >= self.unpacked_size
1111 }
1112
1113 pub fn bytes_written(&self) -> u64 {
1115 self.decoder.bytes_written()
1116 }
1117}
1118
1119#[cfg(test)]
1120mod tests {
1121 use super::*;
1122
1123 #[test]
1124 fn test_decoder_creation() {
1125 let decoder = Rar29Decoder::new();
1126 assert_eq!(decoder.bytes_written(), 0);
1127 assert!(!decoder.tables_read);
1128 }
1129
1130 }