1use super::{
7 bit_reader::BitReader,
8 huffman::HuffmanDecoder,
9 lzss::LzssDecoder,
10 ppm::{PpmModel, RangeCoder},
11 vm::RarVM,
12 DecompressError, Result,
13};
14
15const MAIN_CODES: usize = 299;
17
18const DIST_CODES: usize = 60;
20
21const LOW_DIST_CODES: usize = 17;
23
24const LEN_CODES: usize = 28;
26
27const MAX_MATCH_LEN: u32 = 258;
29
30const SHORT_BASES: [u32; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
32
33const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
35
36const LENGTH_BASE: [u32; 28] = [
38 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28,
39 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224,
40];
41
42const LENGTH_EXTRA: [u8; 28] = [
44 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
45 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
46];
47
48const DIST_BASE: [u32; 48] = [
50 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48,
51 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072,
52 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304,
53 131072, 196608, 262144, 327680, 393216, 458752, 524288, 589824,
54 655360, 720896, 786432, 851968, 917504, 983040,
55];
56
57const DIST_EXTRA: [u8; 48] = [
59 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
60 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10,
61 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
62 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
63];
64
65pub struct Rar29Decoder {
67 lzss: LzssDecoder,
69 huffman: HuffmanDecoder,
71 vm: RarVM,
73 ppm: Option<PpmModel>,
75 ppm_coder: Option<RangeCoder>,
77 ppm_esc_char: i32,
79 old_dist: [u32; 4],
81 old_dist_ptr: usize,
83 last_dist: u32,
85 last_len: u32,
87 ppm_mode: bool,
89 tables_read: bool,
91 prev_low_offset: u32,
93 low_offset_repeat_count: u32,
95}
96
97impl Rar29Decoder {
98 pub fn new() -> Self {
100 Self {
101 lzss: LzssDecoder::rar29(),
102 huffman: HuffmanDecoder::new(),
103 vm: RarVM::new(),
104 ppm: None,
105 ppm_coder: None,
106 ppm_esc_char: -1,
107 old_dist: [0; 4],
108 old_dist_ptr: 0,
109 last_dist: 0,
110 last_len: 0,
111 ppm_mode: false,
112 tables_read: false,
113 prev_low_offset: 0,
114 low_offset_repeat_count: 0,
115 }
116 }
117
118 pub fn decompress(&mut self, data: &[u8], unpacked_size: u64) -> Result<Vec<u8>> {
121 let mut reader = BitReader::new(data);
122
123 if !self.tables_read {
125 self.read_tables(&mut reader)?;
126 }
127
128 while self.lzss.total_written() < unpacked_size {
130 if reader.is_eof() {
131 break;
132 }
133
134 self.decode_block(&mut reader, unpacked_size)?;
135 }
136
137 let len = unpacked_size as usize;
139 Ok(self.lzss.get_output(0, len))
140 }
141
142 fn read_tables(&mut self, reader: &mut BitReader) -> Result<()> {
144 reader.align_to_byte();
146
147 let ppm_flag = reader.peek_bits(1) != 0;
150
151 self.ppm_mode = ppm_flag;
152
153 if self.ppm_mode {
154 let mut ppm = PpmModel::new();
157 match ppm.init(reader) {
158 Ok((coder, esc_char)) => {
159 self.ppm = Some(ppm);
160 self.ppm_coder = Some(coder);
161 self.ppm_esc_char = esc_char;
162 #[cfg(test)]
163 println!("PPMd initialized: esc_char={}", esc_char);
164 }
165 Err(_e) => {
166 #[cfg(test)]
167 println!("PPMd init failed: {}", _e);
168 return Err(DecompressError::UnsupportedMethod(0x33));
169 }
170 }
171 } else {
172 let reset_tables = reader.peek_bits(2) & 1 == 0; reader.advance_bits(2);
176
177 if reset_tables {
178 self.huffman.reset_tables();
179 }
180
181 self.huffman.read_tables_after_header(reader)?;
183 }
184
185 self.tables_read = true;
186 Ok(())
187 }
188
189 fn decode_block(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
191 if self.ppm_mode {
192 return self.decode_block_ppm(reader, max_size);
193 }
194
195 if self.huffman.main_table.is_none() || self.huffman.dist_table.is_none() {
197 return Err(DecompressError::InvalidHuffmanCode);
198 }
199
200 #[cfg(test)]
201 let mut symbol_count = 0;
202
203 while self.lzss.total_written() < max_size && !reader.is_eof() {
204 #[cfg(test)]
206 let bit_pos_main_start = reader.bit_position();
207
208 let symbol = {
209 let main_table = self.huffman.main_table.as_ref().unwrap();
210 main_table.decode(reader)?
211 };
212
213 #[cfg(test)]
214 {
215 let written = self.lzss.total_written();
216 if written >= 0 && written < 0 {
217 let bit_pos_main_end = reader.bit_position();
218 eprintln!("\nmain decode at pos {}: sym={} (bits {}->{})",
219 written, symbol, bit_pos_main_start, bit_pos_main_end);
220 }
221 }
222
223 #[cfg(test)]
224 {
225 if symbol_count < 0 {
226 eprint!("main[{}]={} ", symbol_count, symbol);
227 symbol_count += 1;
228 }
229 }
230
231 if symbol < 256 {
232 #[cfg(test)]
234 {
235 let written = self.lzss.total_written();
236 if written >= 0 && written < 0 {
237 eprint!("[{}:{}='{}'] ", written, symbol, symbol as u8 as char);
238 }
239 }
240 self.lzss.write_literal(symbol as u8);
241 } else if symbol == 256 {
242 if !reader.is_eof() {
244 let new_tables = reader.read_bit()?;
246 if new_tables {
247 self.huffman.read_tables(reader)?;
248 }
249 }
250 break;
251 } else if symbol == 257 {
252 break;
254 } else if symbol == 258 {
255 if self.last_len > 0 {
257 self.lzss.copy_match(self.last_dist, self.last_len)?;
258 }
259 } else if symbol < 263 {
260 let idx = (symbol - 259) as usize;
262 let distance = self.old_dist[idx];
263
264 let length = self.decode_length_from_table(reader)?;
266
267 #[cfg(test)]
268 {
269 let written = self.lzss.total_written();
270 if written >= 0 && written < 0 {
271 eprintln!("[{}:old idx={},len={},dist={}]", written, idx, length, distance);
272 }
273 }
274
275 self.lzss.copy_match(distance, length)?;
276
277 for i in (1..=idx).rev() {
279 self.old_dist[i] = self.old_dist[i - 1];
280 }
281 self.old_dist[0] = distance;
282 self.last_dist = distance;
283 self.last_len = length;
284 } else if symbol <= 270 {
285 let idx = (symbol - 263) as usize;
287 let base = SHORT_BASES[idx];
288 let bits = SHORT_BITS[idx];
289 let extra = if bits > 0 {
290 reader.read_bits(bits as u32)?
291 } else {
292 0
293 };
294 let distance = base + extra + 1;
295 let length = 2u32;
296
297 #[cfg(test)]
298 {
299 let written = self.lzss.total_written();
300 if written >= 0 && written < 0 {
301 eprintln!("[{}:short sym={}, idx={}, base={}, bits={}, extra={}, dist={}]",
302 written, symbol, idx, base, bits, extra, distance);
303 }
304 }
305
306 self.lzss.copy_match(distance, length)?;
307
308 for i in (1..4).rev() {
310 self.old_dist[i] = self.old_dist[i - 1];
311 }
312 self.old_dist[0] = distance;
313 self.old_dist_ptr = 0;
314 self.last_dist = distance;
315 self.last_len = length;
316 } else {
317 #[cfg(test)]
319 let bit_before_len = reader.bit_position();
320
321 let len_idx = (symbol - 271) as usize;
322 let length = if len_idx < LENGTH_BASE.len() {
323 let base = LENGTH_BASE[len_idx];
324 let extra = LENGTH_EXTRA[len_idx];
325 let extra_val = if extra > 0 {
326 reader.read_bits(extra as u32)?
327 } else {
328 0
329 };
330 #[cfg(test)]
331 {
332 let written = self.lzss.total_written();
333 if written >= 0 && written < 0 {
334 let bit_after_len = reader.bit_position();
335 eprintln!("[{}:long sym={}, len_idx={}, base={}, extra_bits={}, extra_val={}, len={}, bits {}->{}]",
336 written, symbol, len_idx, base, extra, extra_val, base + extra_val + 3,
337 bit_before_len, bit_after_len);
338 }
339 }
340 base + extra_val + 3 } else {
342 #[cfg(test)]
343 eprintln!("\nlen_idx {} out of range at written={}", len_idx, self.lzss.total_written());
344 return Err(DecompressError::InvalidHuffmanCode);
345 };
346
347 let dist_symbol = {
349 #[cfg(test)]
350 let bit_pos_before = reader.bit_position();
351
352 let dist_table = self.huffman.dist_table.as_ref().unwrap();
353 match dist_table.decode(reader) {
354 Ok(s) => {
355 #[cfg(test)]
356 {
357 let written = self.lzss.total_written();
358 if written >= 0 && written < 0 {
359 let bit_pos_after = reader.bit_position();
360 eprintln!(" decoded dist_symbol={} (bits {}->{})", s, bit_pos_before, bit_pos_after);
361 }
362 }
363 s
364 }
365 Err(e) => {
366 #[cfg(test)]
367 eprintln!("\nOffset decode failed at written={}, len={}", self.lzss.total_written(), length);
368 return Err(e);
369 }
370 }
371 };
372
373 let dist_code = dist_symbol as usize;
374 let distance = if dist_code < DIST_BASE.len() {
375 let base = DIST_BASE[dist_code];
376 let extra = DIST_EXTRA[dist_code];
377
378 let extra_val = if extra > 0 {
379 if dist_code > 9 {
380 let high = if extra > 4 {
383 #[cfg(test)]
384 let high_bit_pos = reader.bit_position();
385 let h = reader.read_bits((extra - 4) as u32)?;
386 #[cfg(test)]
387 {
388 let written = self.lzss.total_written();
389 if written >= 0 && written < 0 {
390 eprintln!(" high bits: {} bits = {}, pos {}->{}",
391 extra - 4, h, high_bit_pos, reader.bit_position());
392 }
393 }
394 h << 4
395 } else {
396 0
397 };
398 let low = if self.low_offset_repeat_count > 0 {
400 self.low_offset_repeat_count -= 1;
401 #[cfg(test)]
402 {
403 let written = self.lzss.total_written();
404 if written >= 0 && written < 0 {
405 eprintln!(" low_offset repeat: prev={}", self.prev_low_offset);
406 }
407 }
408 self.prev_low_offset
409 } else {
410 #[cfg(test)]
411 let bit_pos_before = reader.bit_position();
412 #[cfg(test)]
413 let raw_bits_16 = reader.peek_bits(16);
414 let low_table = self.huffman.low_dist_table.as_ref().unwrap();
415 let sym = low_table.decode(reader)? as u32;
416 #[cfg(test)]
417 {
418 let written = self.lzss.total_written();
419 if written >= 0 && written < 0 {
420 let bit_pos_after = reader.bit_position();
421 eprintln!(" low_offset decode: sym={} (bits {}->{}), raw peek = {:016b}",
422 sym, bit_pos_before, bit_pos_after, raw_bits_16);
423 }
424 }
425
426 if sym == 16 {
427 self.low_offset_repeat_count = 15 - 1; self.prev_low_offset
430 } else {
431 self.prev_low_offset = sym;
432 sym
433 }
434 };
435 #[cfg(test)]
436 {
437 let written = self.lzss.total_written();
438 if written >= 0 && written < 0 {
439 eprintln!(" low_offset: dist_code={}, base={}, extra={}, high={}, low={}, dist={}",
440 dist_code, base, extra, high, low, base + high + low + 1);
441 }
442 }
443 high + low
444 } else {
445 #[cfg(test)]
447 let peek = reader.peek_bits(extra as u32);
448 let val = reader.read_bits(extra as u32)?;
449 #[cfg(test)]
450 {
451 let written = self.lzss.total_written();
452 if written >= 0 && written < 0 {
453 eprintln!(" direct: dist_code={}, base={}, extra_bits={}, peek={:04b}, extra_val={}, distance={}",
454 dist_code, base, extra, peek, val, base + val + 1);
455 }
456 }
457 val
458 }
459 } else {
460 0
461 };
462 base + extra_val + 1
463 } else {
464 #[cfg(test)]
465 eprintln!("\ndist_code {} out of range at written={}", dist_code, self.lzss.total_written());
466 return Err(DecompressError::InvalidHuffmanCode);
467 };
468
469 #[cfg(test)]
470 {
471 let written = self.lzss.total_written();
472 if written >= 0 && written < 0 {
473 eprintln!("[{}:long len={},dist={}]", written, length, distance);
474 }
475 }
476
477 self.lzss.copy_match(distance, length)?;
478
479 for i in (1..4).rev() {
481 self.old_dist[i] = self.old_dist[i - 1];
482 }
483 self.old_dist[0] = distance;
484 self.old_dist_ptr = 0;
485 self.last_dist = distance;
486 self.last_len = length;
487 }
488 }
489
490 Ok(())
491 }
492
493 fn decode_length_from_table(&mut self, reader: &mut BitReader) -> Result<u32> {
495 let symbol = {
496 let len_table = self.huffman.len_table.as_ref()
497 .ok_or(DecompressError::InvalidHuffmanCode)?;
498 len_table.decode(reader)?
499 };
500
501 let sym = symbol as usize;
502 if sym < LENGTH_BASE.len() {
503 let base = LENGTH_BASE[sym];
504 let extra = LENGTH_EXTRA[sym];
505 let extra_val = if extra > 0 {
506 reader.read_bits(extra as u32)?
507 } else {
508 0
509 };
510 Ok(base + extra_val + 2)
511 } else {
512 Err(DecompressError::InvalidHuffmanCode)
513 }
514 }
515
516 fn decode_block_ppm(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
518 let ppm = self.ppm.as_mut().ok_or(DecompressError::UnsupportedMethod(0x33))?;
519 let coder = self.ppm_coder.as_mut().ok_or(DecompressError::UnsupportedMethod(0x33))?;
520 let esc_char = self.ppm_esc_char;
521
522 while self.lzss.total_written() < max_size && !reader.is_eof() {
523 let ch = ppm.decode_char(coder, reader)
524 .map_err(|_e| {
525 #[cfg(test)]
526 eprintln!("PPM decode_char failed at pos {}: {}", self.lzss.total_written(), _e);
527 DecompressError::InvalidHuffmanCode
528 })?;
529
530 if ch < 0 {
531 #[cfg(test)]
533 eprintln!("PPM decode_char returned negative: {}", ch);
534 return Err(DecompressError::InvalidHuffmanCode);
535 }
536
537 #[cfg(test)]
538 {
539 if self.lzss.total_written() < 20 {
540 eprint!("[{}:{}] ", self.lzss.total_written(), ch);
541 }
542 }
543
544 if ch != esc_char {
545 self.lzss.write_literal(ch as u8);
547 } else {
548 let ctrl = ppm.decode_char(coder, reader)
550 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
551
552 if ctrl < 0 {
553 return Err(DecompressError::InvalidHuffmanCode);
554 }
555
556 match ctrl {
557 0 => {
558 break;
560 }
561 1 => {
562 self.lzss.write_literal(esc_char as u8);
564 }
565 2 => {
566 break;
568 }
569 3 => {
570 let first_byte = ppm.decode_char(coder, reader)
572 .map_err(|_| DecompressError::InvalidHuffmanCode)? as u8;
573
574 let mut length = ((first_byte & 7) + 1) as u32;
576 if length == 7 {
577 let b1 = ppm.decode_char(coder, reader)
578 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
579 length = (b1 as u32) + 7;
580 } else if length == 8 {
581 let b1 = ppm.decode_char(coder, reader)
582 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
583 let b2 = ppm.decode_char(coder, reader)
584 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
585 length = (b1 as u32) * 256 + (b2 as u32);
586 }
587
588 if length == 0 {
589 continue;
590 }
591
592 let mut vm_code = vec![0u8; length as usize];
594 for i in 0..length as usize {
595 let ch = ppm.decode_char(coder, reader)
596 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
597 vm_code[i] = ch as u8;
598 }
599
600 self.vm.add_code(first_byte, &vm_code);
602 }
603 4 => {
604 let mut distance: u32 = 0;
606 for _ in 0..3 {
607 let ch = ppm.decode_char(coder, reader)
608 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
609 distance = (distance << 8) + (ch as u32);
610 }
611 let len = ppm.decode_char(coder, reader)
612 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
613
614 let distance = distance + 2;
616 let length = (len as u32) + 32;
617
618 self.lzss.copy_match(distance, length)?;
619 self.last_dist = distance;
620 self.last_len = length;
621 }
622 5 => {
623 let len = ppm.decode_char(coder, reader)
625 .map_err(|_| DecompressError::InvalidHuffmanCode)?;
626
627 let length = (len as u32) + 4;
629
630 self.lzss.copy_match(1, length)?;
631 self.last_dist = 1;
632 self.last_len = length;
633 }
634 _ => {
635 #[cfg(test)]
637 eprintln!("Unknown PPM control code: {}", ctrl);
638 return Err(DecompressError::InvalidHuffmanCode);
639 }
640 }
641 }
642 }
643
644 Ok(())
645 }
646
647 pub fn reset(&mut self) {
649 self.lzss.reset();
650 self.vm.reset();
651 self.ppm = None;
652 self.ppm_coder = None;
653 self.ppm_esc_char = -1;
654 self.old_dist = [0; 4];
655 self.old_dist_ptr = 0;
656 self.last_dist = 0;
657 self.last_len = 0;
658 self.ppm_mode = false;
659 self.tables_read = false;
660 self.prev_low_offset = 0;
661 self.low_offset_repeat_count = 0;
662 }
663
664 pub fn bytes_written(&self) -> u64 {
666 self.lzss.total_written()
667 }
668}
669
670impl Default for Rar29Decoder {
671 fn default() -> Self {
672 Self::new()
673 }
674}
675
676pub struct Rar29StreamDecoder {
679 decoder: Rar29Decoder,
680 input_buffer: Vec<u8>,
682 input_pos: usize,
684 unpacked_size: u64,
686}
687
688impl Rar29StreamDecoder {
689 pub fn new(unpacked_size: u64) -> Self {
691 Self {
692 decoder: Rar29Decoder::new(),
693 input_buffer: Vec::new(),
694 input_pos: 0,
695 unpacked_size,
696 }
697 }
698
699 pub fn feed(&mut self, data: &[u8]) -> Result<Vec<u8>> {
702 self.input_buffer.extend_from_slice(data);
703
704 let result = self.decoder.decompress(
706 &self.input_buffer[self.input_pos..],
707 self.unpacked_size,
708 )?;
709
710 Ok(result)
711 }
712
713 pub fn is_complete(&self) -> bool {
715 self.decoder.bytes_written() >= self.unpacked_size
716 }
717
718 pub fn bytes_written(&self) -> u64 {
720 self.decoder.bytes_written()
721 }
722}
723
724#[cfg(test)]
725mod tests {
726 use super::*;
727
728 #[test]
729 fn test_decoder_creation() {
730 let decoder = Rar29Decoder::new();
731 assert_eq!(decoder.bytes_written(), 0);
732 assert!(!decoder.tables_read);
733 }
734
735 }