1#![cfg_attr(test, allow(clippy::logic_bug))]
8
9use super::{
10 bit_reader::BitReader,
11 huffman::HuffmanDecoder,
12 lzss::LzssDecoder,
13 ppm::{PpmModel, RangeCoder},
14 vm::RarVM,
15 DecompressError, Result,
16};
17
18#[allow(dead_code)]
19const MAIN_CODES: usize = 299;
21
22#[allow(dead_code)]
23const DIST_CODES: usize = 60;
25
26#[allow(dead_code)]
27const LOW_DIST_CODES: usize = 17;
29
30#[allow(dead_code)]
31const LEN_CODES: usize = 28;
33
34#[allow(dead_code)]
35const MAX_MATCH_LEN: u32 = 258;
37
38const SHORT_BASES: [u32; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
40
41const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
43
44const LENGTH_BASE: [u32; 28] = [
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
47 160, 192, 224,
48];
49
50const LENGTH_EXTRA: [u8; 28] = [
52 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
53];
54
55const DIST_BASE: [u32; 60] = [
57 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
58 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
59 262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
60 1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872,
61 3670016, 3932160,
62];
63
64const DIST_EXTRA: [u8; 60] = [
66 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
67 13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18,
68 18, 18, 18, 18, 18, 18, 18,
69];
70
71pub struct Rar29Decoder {
85 lzss: LzssDecoder,
87 huffman: HuffmanDecoder,
89 vm: RarVM,
91 ppm: Option<PpmModel>,
93 ppm_coder: Option<RangeCoder>,
95 ppm_esc_char: i32,
97 old_dist: [u32; 4],
99 old_dist_ptr: usize,
101 last_dist: u32,
103 last_len: u32,
105 ppm_mode: bool,
107 tables_read: bool,
109 prev_low_offset: u32,
111 low_offset_repeat_count: u32,
113 next_filter_check: u64,
115}
116
117impl Rar29Decoder {
118 pub fn new() -> Self {
120 Self::with_window_size(0x400000) }
122
123 pub fn with_window_size(window_size: usize) -> Self {
126 Self {
127 lzss: LzssDecoder::new(window_size),
128 huffman: HuffmanDecoder::new(),
129 vm: RarVM::new(),
130 ppm: None,
131 ppm_coder: None,
132 ppm_esc_char: -1,
133 old_dist: [0; 4],
134 old_dist_ptr: 0,
135 last_dist: 0,
136 last_len: 0,
137 ppm_mode: false,
138 tables_read: false,
139 prev_low_offset: 0,
140 low_offset_repeat_count: 0,
141 next_filter_check: u64::MAX,
142 }
143 }
144
145 #[cfg(test)]
147 pub fn get_output(&self) -> Vec<u8> {
148 self.lzss.output().to_vec()
149 }
150
151 pub fn decompress(&mut self, data: &[u8], unpacked_size: u64) -> Result<Vec<u8>> {
154 let mut reader = BitReader::new(data);
155
156 self.lzss.enable_output(unpacked_size as usize);
158
159 if !self.tables_read {
161 self.read_tables(&mut reader)?;
162 }
163
164 while self.lzss.total_written() < unpacked_size {
166 if reader.is_eof() {
167 break;
168 }
169
170 self.decode_block(&mut reader, unpacked_size)?;
171 }
172
173 let total_written = self.lzss.total_written();
175 let window_mask = self.lzss.window_mask() as usize;
176
177 loop {
179 let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
181 Some((idx, pos)) => (idx, pos),
182 None => break,
183 };
184
185 let flushed = self.lzss.flushed_pos();
187 if flushed < next_pos {
188 self.lzss.flush_to_output(next_pos);
189 }
190
191 let window = self.lzss.window();
192 if let Some((_filter_end, filtered_data)) =
193 self.vm
194 .execute_filter_at_index(filter_idx, window, window_mask, total_written)
195 {
196 self.lzss.write_filtered_to_output(filtered_data, next_pos);
198 } else {
199 break;
200 }
201 }
202
203 self.lzss.flush_to_output(total_written);
205
206 Ok(self.lzss.take_output())
208 }
209
210 fn read_tables(&mut self, reader: &mut BitReader) -> Result<()> {
212 #[cfg(test)]
213 {
214 let byte_pos = reader.bit_position() / 8;
215 eprintln!(
216 "read_tables ENTRY: bit_pos={}, byte_pos={}",
217 reader.bit_position(),
218 byte_pos
219 );
220 eprintln!(" raw bytes at pos: {:02x?}", reader.peek_bytes(8));
221 }
222 reader.align_to_byte();
224 #[cfg(test)]
225 {
226 let byte_pos = reader.bit_position() / 8;
227 eprintln!(
228 "read_tables AFTER align: bit_pos={}, byte_pos={}",
229 reader.bit_position(),
230 byte_pos
231 );
232 eprintln!(" raw bytes at pos: {:02x?}", reader.peek_bytes(8));
233 };
234
235 let ppm_flag = reader.peek_bits(1) != 0;
238
239 self.ppm_mode = ppm_flag;
240
241 if self.ppm_mode {
242 let ppm = self.ppm.get_or_insert_with(PpmModel::new);
245 match ppm.init(reader) {
246 Ok((coder, esc_char)) => {
247 self.ppm_coder = Some(coder);
248 self.ppm_esc_char = esc_char;
249 #[cfg(test)]
250 println!("PPMd initialized: esc_char={}", esc_char);
251 }
252 Err(e) => {
253 #[cfg(test)]
254 println!("PPMd init failed: {}", e);
255 #[cfg(not(test))]
256 let _ = e;
257 return Err(DecompressError::UnsupportedMethod(0x33));
258 }
259 }
260 } else {
261 self.prev_low_offset = 0;
263 self.low_offset_repeat_count = 0;
264
265 let reset_tables = reader.peek_bits(2) & 1 == 0; reader.advance_bits(2);
269
270 if reset_tables {
271 self.huffman.reset_tables();
272 }
273
274 self.huffman.read_tables_after_header(reader)?;
276 }
277
278 self.tables_read = true;
279 Ok(())
280 }
281
282 fn decode_block(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
284 if self.ppm_mode {
285 return self.decode_block_ppm(reader, max_size);
286 }
287
288 if self.huffman.main_table.is_none() || self.huffman.dist_table.is_none() {
290 return Err(DecompressError::InvalidHuffmanCode);
291 }
292
293 while self.lzss.total_written() < max_size && !reader.is_eof() {
294 self.maybe_flush_window();
296 self.maybe_execute_filters();
298
299 #[cfg(test)]
301 let bit_pos_main_start = reader.bit_position();
302 #[cfg(test)]
303 let peek_bits = reader.peek_bits(16);
304
305 let symbol = unsafe {
307 self.huffman
308 .main_table
309 .as_ref()
310 .unwrap_unchecked()
311 .decode(reader)?
312 };
313
314 #[cfg(test)]
315 {
316 let pos = self.lzss.total_written();
317 if pos >= 1498580 && pos <= 1498610 {
318 let bit_pos_after = reader.bit_position();
319 eprintln!(
320 "MAIN sym={} at pos={}, bits {}->{} peek={:016b}",
321 symbol, pos, bit_pos_main_start, bit_pos_after, peek_bits
322 );
323 }
324 }
325
326 if symbol < 256 {
327 #[cfg(test)]
329 {
330 let pos = self.lzss.total_written();
331 if pos >= 1498595 && pos <= 1498610 {
332 eprintln!("WRITING literal 0x{:02x} at output pos {}", symbol, pos);
333 }
334 }
335 self.lzss.write_literal(symbol as u8);
336 } else if symbol == 256 {
337 #[cfg(test)]
343 eprintln!(
344 "\n=== SYMBOL 256 (end of block) at output pos {}, bit_pos {} ===",
345 self.lzss.total_written(),
346 reader.bit_position()
347 );
348 if !reader.is_eof() {
349 let first_bit = reader.read_bit()?;
350 #[cfg(test)]
351 eprintln!(
352 " first_bit={}, bit_pos after={}",
353 first_bit,
354 reader.bit_position()
355 );
356 if first_bit {
357 self.prev_low_offset = 0;
360 self.low_offset_repeat_count = 0;
361 self.read_tables(reader)?;
363 #[cfg(test)]
364 {
365 eprintln!(
366 "After new tables: bit_pos={}, next 16 bits={:016b}",
367 reader.bit_position(),
368 reader.peek_bits(16)
369 );
370 eprintln!(" About to decode first symbol after table read");
371 }
372 continue;
374 }
375 let _second_bit = reader.read_bit()?; }
379 break;
380 } else if symbol == 257 {
381 #[cfg(test)]
383 eprintln!(
384 "\n=== SYMBOL 257 (VM code) at output pos {} ===",
385 self.lzss.total_written()
386 );
387 self.read_vm_code(reader)?;
388 } else if symbol == 258 {
389 if self.last_len > 0 {
391 #[cfg(test)]
392 {
393 let pos = self.lzss.total_written();
394 let end = pos + self.last_len as u64;
395 if pos <= 1498598 && end > 1498598 {
396 eprintln!(
397 "!!! AT 1498598: symbol 258 repeat, last_dist={}, last_len={}",
398 self.last_dist, self.last_len
399 );
400 }
401 }
402 self.lzss.copy_match(self.last_dist, self.last_len)?;
403 }
404 } else if symbol < 263 {
405 let idx = (symbol - 259) as usize;
407 let distance = self.old_dist[idx];
408
409 let length = self.decode_length_from_table(reader)?;
411
412 #[cfg(test)]
413 {
414 let written = self.lzss.total_written();
415 let end = written + length as u64;
416 if written <= 1498598 && end > 1498598 {
417 eprintln!(
418 "!!! AT 1498598: old idx={},len={},dist={}",
419 idx, length, distance
420 );
421 }
422 }
423
424 self.lzss.copy_match(distance, length)?;
425
426 for i in (1..=idx).rev() {
428 self.old_dist[i] = self.old_dist[i - 1];
429 }
430 self.old_dist[0] = distance;
431 self.last_dist = distance;
432 self.last_len = length;
433 } else if symbol <= 270 {
434 let idx = (symbol - 263) as usize;
436 let base = SHORT_BASES[idx];
437 let bits = SHORT_BITS[idx];
438 let extra = if bits > 0 {
439 reader.read_bits(bits as u32)?
440 } else {
441 0
442 };
443 let distance = base + extra + 1;
444 let length = 2u32;
445
446 #[cfg(test)]
447 {
448 let written = self.lzss.total_written();
449 let end = written + length as u64;
450 if written <= 1498598 && end > 1498598 {
451 eprintln!(
452 "!!! AT 1498598: short sym={}, idx={}, base={}, bits={}, extra={}, dist={}",
453 symbol, idx, base, bits, extra, distance
454 );
455 }
456 }
457
458 self.lzss.copy_match(distance, length)?;
459
460 for i in (1..4).rev() {
462 self.old_dist[i] = self.old_dist[i - 1];
463 }
464 self.old_dist[0] = distance;
465 self.old_dist_ptr = 0;
466 self.last_dist = distance;
467 self.last_len = length;
468 } else {
469 #[cfg(test)]
471 let bit_before_len = reader.bit_position();
472
473 let len_idx = (symbol - 271) as usize;
474 let length = if len_idx < LENGTH_BASE.len() {
475 let base = LENGTH_BASE[len_idx];
476 let extra = LENGTH_EXTRA[len_idx];
477 let extra_val = if extra > 0 {
478 reader.read_bits(extra as u32)?
479 } else {
480 0
481 };
482 #[cfg(test)]
483 {
484 let written = self.lzss.total_written();
485 if written >= 1498595 && written <= 1498602 {
486 let bit_after_len = reader.bit_position();
487 eprintln!(
488 "!!! LONG DECODE at {}: sym={}, len_idx={}, len={}, bits {}->{}]",
489 written,
490 symbol,
491 len_idx,
492 base + extra_val + 3,
493 bit_before_len,
494 bit_after_len
495 );
496 }
497 }
498 base + extra_val + 3 } else {
500 #[cfg(test)]
501 eprintln!(
502 "\nlen_idx {} out of range at written={}",
503 len_idx,
504 self.lzss.total_written()
505 );
506 return Err(DecompressError::InvalidHuffmanCode);
507 };
508
509 let dist_symbol = {
511 #[cfg(test)]
512 let bit_pos_before = reader.bit_position();
513
514 let dist_table = unsafe { self.huffman.dist_table.as_ref().unwrap_unchecked() };
516 match dist_table.decode(reader) {
517 Ok(s) => {
518 #[cfg(test)]
519 {
520 let written = self.lzss.total_written();
521 if written >= 1498595 && written <= 1498610 {
522 let bit_pos_after = reader.bit_position();
523 eprintln!(
524 " dist_symbol={} at pos {} (bits {}->{})",
525 s, written, bit_pos_before, bit_pos_after
526 );
527 }
528 }
529 s
530 }
531 Err(e) => {
532 #[cfg(test)]
533 eprintln!(
534 "\nOffset decode failed at written={}, len={}",
535 self.lzss.total_written(),
536 length
537 );
538 return Err(e);
539 }
540 }
541 };
542
543 let dist_code = dist_symbol as usize;
544 let distance = if dist_code < DIST_BASE.len() {
545 let base = DIST_BASE[dist_code];
546 let extra = DIST_EXTRA[dist_code];
547
548 let extra_val = if extra > 0 {
549 if dist_code > 9 {
550 let high = if extra > 4 {
553 #[cfg(test)]
554 let high_bit_pos = reader.bit_position();
555 let h = reader.read_bits((extra - 4) as u32)?;
556 #[cfg(test)]
557 {
558 let written = self.lzss.total_written();
559 if (written >= 1498595 && written <= 1498610)
560 || (written >= 2176060 && written <= 2176080)
561 {
562 eprintln!(
563 " high bits at {}: {} bits = {} (0b{:016b}), pos {}->{}",
564 written,
565 extra - 4,
566 h, h,
567 high_bit_pos,
568 reader.bit_position()
569 );
570 }
571 }
572 h << 4
573 } else {
574 0
575 };
576 let low = if self.low_offset_repeat_count > 0 {
578 self.low_offset_repeat_count -= 1;
579 #[cfg(test)]
580 {
581 let written = self.lzss.total_written();
582 if written >= 1498550 && written <= 1498610 {
583 eprintln!(
584 "!!! low_offset REPEAT at {}: prev={}",
585 written, self.prev_low_offset
586 );
587 }
588 }
589 self.prev_low_offset
590 } else {
591 #[cfg(test)]
592 let bit_pos_before = reader.bit_position();
593 #[cfg(test)]
594 let raw_bits_16 = reader.peek_bits(16);
595 let low_table = unsafe {
597 self.huffman.low_dist_table.as_ref().unwrap_unchecked()
598 };
599 #[cfg(test)]
600 {
601 let written = self.lzss.total_written();
602 if written == 1498598 {
603 eprintln!(
605 "!!! LOW_TABLE at 1498598 decode_len: {:?}",
606 low_table.dump_decode_len()
607 );
608 eprintln!(
609 "!!! LOW_TABLE at 1498598 symbols: {:?}",
610 low_table.dump_symbols()
611 );
612 }
613 }
614 let sym = low_table.decode(reader)? as u32;
615 #[cfg(test)]
616 {
617 let written = self.lzss.total_written();
618 if written >= 1498550 && written <= 1498610 {
619 let bit_pos_after = reader.bit_position();
620 eprintln!("!!! low_offset at {}: sym={} (bits {}->{}), raw peek = {:016b}",
621 written, sym, bit_pos_before, bit_pos_after, raw_bits_16);
622 }
623 }
624
625 if sym == 16 {
626 self.low_offset_repeat_count = 16 - 1; self.prev_low_offset
630 } else {
631 self.prev_low_offset = sym;
632 sym
633 }
634 };
635 #[cfg(test)]
636 {
637 let written = self.lzss.total_written();
638 if written >= 2176060 && written <= 2176080 {
639 if self.low_offset_repeat_count > 0 {
640 eprintln!(
641 " low_offset REPEAT at {}: prev={}, remaining={}",
642 written,
643 self.prev_low_offset,
644 self.low_offset_repeat_count
645 );
646 } else {
647 eprintln!(" low_offset at {}: dist_code={}, base={}, extra={}, high={}, low={}, dist={}",
648 written, dist_code, base, extra, high, low, base + high + low + 1);
649 }
650 }
651 }
652 high + low
653 } else {
654 reader.read_bits(extra as u32)?
656 }
657 } else {
658 0
659 };
660 base + extra_val + 1
661 } else {
662 #[cfg(test)]
663 eprintln!(
664 "\ndist_code {} out of range at written={}",
665 dist_code,
666 self.lzss.total_written()
667 );
668 return Err(DecompressError::InvalidHuffmanCode);
669 };
670
671 let length = if distance >= 0x2000 {
674 if distance >= 0x40000 {
675 length + 2
676 } else {
677 length + 1
678 }
679 } else {
680 length
681 };
682
683 #[cfg(test)]
684 {
685 let written = self.lzss.total_written();
686 let end = written + length as u64;
687 if written <= 1498598 && end > 1498598 {
688 eprintln!(
689 "!!! AT 1498598: long match dist={}, len={}",
690 distance, length
691 );
692 let src_pos = (written as u32).wrapping_sub(distance) as usize;
694 let _mask = self.lzss.window_mask() as usize;
695 let window = self.lzss.window();
696 eprintln!(
697 " window src[{}..{}]: {:02x?}",
698 src_pos,
699 src_pos + length as usize,
700 &window[src_pos..src_pos + length as usize]
701 );
702 }
703 if written >= 1498595 && written <= 1498602 {
704 eprintln!(
705 "LONG MATCH at {}: dist={}, len={}",
706 written, distance, length
707 );
708 }
709 }
710
711 self.lzss.copy_match(distance, length)?;
712
713 for i in (1..4).rev() {
715 self.old_dist[i] = self.old_dist[i - 1];
716 }
717 self.old_dist[0] = distance;
718 self.old_dist_ptr = 0;
719 self.last_dist = distance;
720 self.last_len = length;
721 }
722 }
723
724 Ok(())
725 }
726
727 fn decode_length_from_table(&mut self, reader: &mut BitReader) -> Result<u32> {
729 let symbol = {
730 let len_table = self
731 .huffman
732 .len_table
733 .as_ref()
734 .ok_or(DecompressError::InvalidHuffmanCode)?;
735 len_table.decode(reader)?
736 };
737
738 let sym = symbol as usize;
739 if sym < LENGTH_BASE.len() {
740 let base = LENGTH_BASE[sym];
741 let extra = LENGTH_EXTRA[sym];
742 let extra_val = if extra > 0 {
743 reader.read_bits(extra as u32)?
744 } else {
745 0
746 };
747 Ok(base + extra_val + 2)
748 } else {
749 Err(DecompressError::InvalidHuffmanCode)
750 }
751 }
752
753 #[cold]
756 fn read_vm_code(&mut self, reader: &mut BitReader) -> Result<()> {
757 #[cfg(test)]
758 let bit_pos_start = reader.bit_position();
759
760 let first_byte = reader.read_bits(8)? as u8;
762
763 let length = {
768 let base = (first_byte & 7) + 1;
769 match base {
770 7 => {
771 let next = reader.read_bits(8)? as u32;
773 next + 7
774 }
775 8 => {
776 reader.read_bits(16)?
778 }
779 _ => base as u32,
780 }
781 };
782
783 #[cfg(test)]
784 eprintln!(
785 " read_vm_code: first_byte=0x{:02x}, length={}, bit_pos_start={}",
786 first_byte, length, bit_pos_start
787 );
788
789 if length == 0 {
790 return Ok(());
791 }
792
793 let mut vm_code = vec![0u8; length as usize];
795 for i in 0..length as usize {
796 vm_code[i] = reader.read_bits(8)? as u8;
797 }
798
799 #[cfg(test)]
800 eprintln!(" vm_code end bit_pos={}", reader.bit_position());
801
802 let total_written = self.lzss.total_written();
804 let window_mask = self.lzss.window_mask();
805
806 #[cfg(test)]
807 eprintln!(
808 " add_code: total_written={}, window_mask={:x}",
809 total_written, window_mask
810 );
811
812 #[cfg(test)]
813 {
814 let had_pending_before = self.vm.has_pending_filters();
815 let result = self
816 .vm
817 .add_code(first_byte, &vm_code, total_written, window_mask);
818 let has_pending_after = self.vm.has_pending_filters();
819 if let Some(next_pos) = self.vm.next_filter_pos() {
820 eprintln!(
821 " vm.add_code: added={}, pending={}->{}, next_pos={}",
822 result, had_pending_before, has_pending_after, next_pos
823 );
824 } else {
825 eprintln!(
826 " vm.add_code: added={}, pending={}->{}, next_pos=NONE",
827 result, had_pending_before, has_pending_after
828 );
829 }
830 }
831 #[cfg(not(test))]
832 self.vm
833 .add_code(first_byte, &vm_code, total_written, window_mask);
834
835 if let Some(end) = self.vm.next_filter_end() {
837 self.next_filter_check = self.next_filter_check.min(end);
838 }
839
840 Ok(())
841 }
842
843 #[inline]
846 fn maybe_flush_window(&mut self) {
847 let total_written = self.lzss.total_written();
848 let flushed = self.lzss.flushed_pos();
849 let window_size = self.lzss.window().len() as u64;
850
851 if total_written - flushed < window_size / 2 {
853 return;
854 }
855
856 let safe_pos = match self.vm.next_filter_pos() {
858 Some(filter_start) if filter_start > flushed => filter_start,
859 Some(_) => return, None => total_written,
861 };
862
863 if safe_pos > flushed {
864 self.lzss.flush_to_output(safe_pos);
865 }
866 }
867
868 #[inline]
871 fn maybe_execute_filters(&mut self) {
872 let total_written = self.lzss.total_written();
873
874 if total_written < self.next_filter_check {
876 return;
877 }
878
879 let window_mask = self.lzss.window_mask() as usize;
880
881 loop {
883 let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
885 Some((idx, pos)) => (idx, pos),
886 None => break,
887 };
888
889 let flushed = self.lzss.flushed_pos();
891 if flushed < next_pos {
892 self.lzss.flush_to_output(next_pos);
893 }
894
895 let window = self.lzss.window();
897 if let Some((filter_end, filtered_data)) =
898 self.vm
899 .execute_filter_at_index(filter_idx, window, window_mask, total_written)
900 {
901 self.lzss.write_filtered_to_output(filtered_data, next_pos);
903 self.next_filter_check = filter_end;
905 } else {
906 break;
907 }
908 }
909
910 self.next_filter_check = self.vm.next_filter_end().unwrap_or(u64::MAX);
912 }
913
914 fn decode_block_ppm(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
916 let ppm = self
917 .ppm
918 .as_mut()
919 .ok_or(DecompressError::UnsupportedMethod(0x33))?;
920 let coder = self
921 .ppm_coder
922 .as_mut()
923 .ok_or(DecompressError::UnsupportedMethod(0x33))?;
924 let esc_char = self.ppm_esc_char;
925
926 while self.lzss.total_written() < max_size && !reader.is_eof() {
927 {
930 let total_written = self.lzss.total_written();
931 let flushed = self.lzss.flushed_pos();
932 let window_size = self.lzss.window().len() as u64;
933 if total_written - flushed >= window_size / 2 {
934 let safe_pos = match self.vm.next_filter_pos() {
935 Some(fs) if fs > flushed => fs,
936 Some(_) => total_written, None => total_written,
938 };
939 if safe_pos > flushed {
940 self.lzss.flush_to_output(safe_pos);
941 }
942 }
943 }
944
945 let ch = ppm.decode_char(coder, reader).map_err(|e| {
946 #[cfg(test)]
947 eprintln!(
948 "PPM decode_char failed at pos {}: {}",
949 self.lzss.total_written(),
950 e
951 );
952 #[cfg(not(test))]
953 let _ = e;
954 DecompressError::InvalidHuffmanCode
955 })?;
956
957 if ch < 0 {
958 #[cfg(test)]
960 eprintln!("PPM decode_char returned negative: {}", ch);
961 return Err(DecompressError::InvalidHuffmanCode);
962 }
963
964 #[cfg(test)]
965 {
966 if self.lzss.total_written() < 20 {
967 eprint!("[{}:{}] ", self.lzss.total_written(), ch);
968 }
969 }
970
971 if ch != esc_char {
972 self.lzss.write_literal(ch as u8);
974 } else {
975 let ctrl = ppm
977 .decode_char(coder, reader)
978 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
979
980 if ctrl < 0 {
981 return Err(DecompressError::InvalidHuffmanCode);
982 }
983
984 match ctrl {
985 0 => {
986 break;
988 }
989 1 => {
990 self.lzss.write_literal(esc_char as u8);
992 }
993 2 => {
994 break;
996 }
997 3 => {
998 let first_byte = ppm
1000 .decode_char(coder, reader)
1001 .map_err(|_| DecompressError::InvalidHuffmanCode)?
1002 as u8;
1003
1004 let mut length = ((first_byte & 7) + 1) as u32;
1006 if length == 7 {
1007 let b1 = ppm
1008 .decode_char(coder, reader)
1009 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1010 length = (b1 as u32) + 7;
1011 } else if length == 8 {
1012 let b1 = ppm
1013 .decode_char(coder, reader)
1014 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1015 let b2 = ppm
1016 .decode_char(coder, reader)
1017 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1018 length = (b1 as u32) * 256 + (b2 as u32);
1019 }
1020
1021 if length == 0 {
1022 continue;
1023 }
1024
1025 let mut vm_code = vec![0u8; length as usize];
1027 for i in 0..length as usize {
1028 let ch = ppm
1029 .decode_char(coder, reader)
1030 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1031 vm_code[i] = ch as u8;
1032 }
1033
1034 let total_written = self.lzss.total_written();
1036 let window_mask = self.lzss.window_mask();
1037 self.vm
1038 .add_code(first_byte, &vm_code, total_written, window_mask);
1039
1040 if let Some(end) = self.vm.next_filter_end() {
1042 self.next_filter_check = self.next_filter_check.min(end);
1043 }
1044 }
1045 4 => {
1046 let mut distance: u32 = 0;
1048 for _ in 0..3 {
1049 let ch = ppm
1050 .decode_char(coder, reader)
1051 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1052 distance = (distance << 8) + (ch as u32);
1053 }
1054 let len = ppm
1055 .decode_char(coder, reader)
1056 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1057
1058 let distance = distance + 2;
1060 let length = (len as u32) + 32;
1061
1062 self.lzss.copy_match(distance, length)?;
1063 self.last_dist = distance;
1064 self.last_len = length;
1065 }
1066 5 => {
1067 let len = ppm
1069 .decode_char(coder, reader)
1070 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1071
1072 let length = (len as u32) + 4;
1074
1075 self.lzss.copy_match(1, length)?;
1076 self.last_dist = 1;
1077 self.last_len = length;
1078 }
1079 _ => {
1080 #[cfg(test)]
1082 eprintln!("Unknown PPM control code: {}", ctrl);
1083 return Err(DecompressError::InvalidHuffmanCode);
1084 }
1085 }
1086 }
1087 }
1088
1089 Ok(())
1090 }
1091
1092 pub fn reset(&mut self) {
1094 self.lzss.reset();
1095 self.vm.reset();
1096 self.ppm_coder = None;
1098 self.ppm_esc_char = -1;
1099 self.old_dist = [0; 4];
1100 self.old_dist_ptr = 0;
1101 self.last_dist = 0;
1102 self.last_len = 0;
1103 self.ppm_mode = false;
1104 self.tables_read = false;
1105 self.prev_low_offset = 0;
1106 self.low_offset_repeat_count = 0;
1107 self.next_filter_check = u64::MAX;
1108 }
1109
1110 pub fn bytes_written(&self) -> u64 {
1112 self.lzss.total_written()
1113 }
1114}
1115
1116impl Default for Rar29Decoder {
1117 fn default() -> Self {
1118 Self::new()
1119 }
1120}
1121
1122#[allow(dead_code)]
1126pub struct Rar29StreamDecoder {
1127 decoder: Rar29Decoder,
1128 input_buffer: Vec<u8>,
1130 input_pos: usize,
1132 unpacked_size: u64,
1134}
1135
1136#[allow(dead_code)]
1137impl Rar29StreamDecoder {
1138 pub fn new(unpacked_size: u64) -> Self {
1140 Self {
1141 decoder: Rar29Decoder::new(),
1142 input_buffer: Vec::new(),
1143 input_pos: 0,
1144 unpacked_size,
1145 }
1146 }
1147
1148 pub fn feed(&mut self, data: &[u8]) -> Result<Vec<u8>> {
1151 self.input_buffer.extend_from_slice(data);
1152
1153 let result = self
1155 .decoder
1156 .decompress(&self.input_buffer[self.input_pos..], self.unpacked_size)?;
1157
1158 Ok(result)
1159 }
1160
1161 pub fn is_complete(&self) -> bool {
1163 self.decoder.bytes_written() >= self.unpacked_size
1164 }
1165
1166 pub fn bytes_written(&self) -> u64 {
1168 self.decoder.bytes_written()
1169 }
1170}
1171
1172#[cfg(test)]
1173mod tests {
1174 use super::*;
1175
1176 #[test]
1177 fn test_decoder_creation() {
1178 let decoder = Rar29Decoder::new();
1179 assert_eq!(decoder.bytes_written(), 0);
1180 assert!(!decoder.tables_read);
1181 }
1182
1183 }