Skip to main content

rar_stream/decompress/
rar29.rs

1//! RAR 2.9 (RAR4) decompression.
2//!
3//! Implements the LZSS + Huffman decompression used in RAR versions 2.x-4.x.
4//! This is the most common format for scene releases.
5
6// Allow disabled debug blocks in test code (written >= 0 && written < 0 is intentionally false)
7#![cfg_attr(test, allow(clippy::logic_bug))]
8
9use super::{
10    bit_reader::BitReader,
11    huffman::HuffmanDecoder,
12    lzss::LzssDecoder,
13    ppm::{PpmModel, RangeCoder},
14    vm::RarVM,
15    DecompressError, Result,
16};
17
18#[allow(dead_code)]
19/// Number of main codes (literals + length symbols).
20const MAIN_CODES: usize = 299;
21
22#[allow(dead_code)]
23/// Number of distance codes.
24const DIST_CODES: usize = 60;
25
26#[allow(dead_code)]
27/// Number of low distance codes.
28const LOW_DIST_CODES: usize = 17;
29
30#[allow(dead_code)]
31/// Number of length codes.
32const LEN_CODES: usize = 28;
33
34#[allow(dead_code)]
35/// Maximum match length.
36const MAX_MATCH_LEN: u32 = 258;
37
38/// Short distance bases for symbols 263-270.
39const SHORT_BASES: [u32; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
40
41/// Short distance extra bits for symbols 263-270.
42const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
43
44/// Base lengths for length codes.
45const LENGTH_BASE: [u32; 28] = [
46    0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
47    160, 192, 224,
48];
49
50/// Extra bits for length codes.
51const LENGTH_EXTRA: [u8; 28] = [
52    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
53];
54
55/// Base distances for distance codes (48 entries for RAR3).
56const DIST_BASE: [u32; 60] = [
57    0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
58    2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
59    262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
60    1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872,
61    3670016, 3932160,
62];
63
64/// Extra bits for distance codes (60 entries for RAR3).
65const DIST_EXTRA: [u8; 60] = [
66    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
67    13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18,
68    18, 18, 18, 18, 18, 18, 18,
69];
70
71/// RAR 2.9 decoder state.
72pub struct Rar29Decoder {
73    /// LZSS sliding window
74    lzss: LzssDecoder,
75    /// Huffman decoder
76    huffman: HuffmanDecoder,
77    /// VM for filter execution
78    vm: RarVM,
79    /// PPMd model (used when ppm_mode is true)
80    ppm: Option<PpmModel>,
81    /// PPMd range coder (used when ppm_mode is true)
82    ppm_coder: Option<RangeCoder>,
83    /// PPMd escape character
84    ppm_esc_char: i32,
85    /// Previous distances for repeat matches
86    old_dist: [u32; 4],
87    /// Current distance history index
88    old_dist_ptr: usize,
89    /// Last distance used
90    last_dist: u32,
91    /// Last length used
92    last_len: u32,
93    /// PPMd mode flag
94    ppm_mode: bool,
95    /// Tables need reading
96    tables_read: bool,
97    /// Previous low offset value for repeat
98    prev_low_offset: u32,
99    /// Low offset repeat counter
100    low_offset_repeat_count: u32,
101    /// Next position where we need to check filters (optimization to avoid O(n) scan)
102    next_filter_check: u64,
103}
104
105impl Rar29Decoder {
106    /// Create a new RAR29 decoder with default window size (4MB).
107    pub fn new() -> Self {
108        Self::with_window_size(0x400000) // 4MB default (max common size)
109    }
110
111    /// Create a new RAR29 decoder with specified window size.
112    /// Window size must be a power of 2.
113    pub fn with_window_size(window_size: usize) -> Self {
114        Self {
115            lzss: LzssDecoder::new(window_size),
116            huffman: HuffmanDecoder::new(),
117            vm: RarVM::new(),
118            ppm: None,
119            ppm_coder: None,
120            ppm_esc_char: -1,
121            old_dist: [0; 4],
122            old_dist_ptr: 0,
123            last_dist: 0,
124            last_len: 0,
125            ppm_mode: false,
126            tables_read: false,
127            prev_low_offset: 0,
128            low_offset_repeat_count: 0,
129            next_filter_check: u64::MAX,
130        }
131    }
132
133    /// Get partial output (for debugging failed decompression)
134    #[cfg(test)]
135    pub fn get_output(&self) -> Vec<u8> {
136        self.lzss.output().to_vec()
137    }
138
139    /// Decompress a block of data.
140    /// Returns the decompressed data.
141    pub fn decompress(&mut self, data: &[u8], unpacked_size: u64) -> Result<Vec<u8>> {
142        let mut reader = BitReader::new(data);
143
144        // Enable output accumulation for files (especially those larger than window)
145        self.lzss.enable_output(unpacked_size as usize);
146
147        // Read tables if needed
148        if !self.tables_read {
149            self.read_tables(&mut reader)?;
150        }
151
152        // Decompress until we have enough data
153        while self.lzss.total_written() < unpacked_size {
154            if reader.is_eof() {
155                break;
156            }
157
158            self.decode_block(&mut reader, unpacked_size)?;
159        }
160
161        // Execute any remaining pending VM filters
162        let total_written = self.lzss.total_written();
163        let window_mask = self.lzss.window_mask() as usize;
164
165        // Execute filters in order of their block_start position
166        loop {
167            // Find the earliest filter that is ready
168            let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
169                Some((idx, pos)) => (idx, pos),
170                None => break,
171            };
172
173            // Flush up to filter start
174            let flushed = self.lzss.flushed_pos();
175            if flushed < next_pos {
176                self.lzss.flush_to_output(next_pos);
177            }
178
179            let window = self.lzss.window();
180            if let Some((_filter_end, filtered_data)) =
181                self.vm
182                    .execute_filter_at_index(filter_idx, window, window_mask, total_written)
183            {
184                // Write filtered data directly to output
185                self.lzss.write_filtered_to_output(filtered_data, next_pos);
186            } else {
187                break;
188            }
189        }
190
191        // Flush any remaining data to output
192        self.lzss.flush_to_output(total_written);
193
194        // Extract the decompressed data
195        Ok(self.lzss.take_output())
196    }
197
198    /// Read Huffman tables from the bit stream.
199    fn read_tables(&mut self, reader: &mut BitReader) -> Result<()> {
200        #[cfg(test)]
201        {
202            let byte_pos = reader.bit_position() / 8;
203            eprintln!(
204                "read_tables ENTRY: bit_pos={}, byte_pos={}",
205                reader.bit_position(),
206                byte_pos
207            );
208            eprintln!("  raw bytes at pos: {:02x?}", reader.peek_bytes(8));
209        }
210        // Align to byte boundary (like unrar)
211        reader.align_to_byte();
212        #[cfg(test)]
213        {
214            let byte_pos = reader.bit_position() / 8;
215            eprintln!(
216                "read_tables AFTER align: bit_pos={}, byte_pos={}",
217                reader.bit_position(),
218                byte_pos
219            );
220            eprintln!("  raw bytes at pos: {:02x?}", reader.peek_bytes(8));
221        };
222
223        // Peek at the high bit to check for PPM mode
224        // In unrar, this is done by peeking 16 bits and checking bit 15
225        let ppm_flag = reader.peek_bits(1) != 0;
226
227        self.ppm_mode = ppm_flag;
228
229        if self.ppm_mode {
230            // DON'T consume the PPM flag bit - it's part of the MaxOrder byte
231            // Initialize or reuse PPMd model
232            let ppm = self.ppm.get_or_insert_with(PpmModel::new);
233            match ppm.init(reader) {
234                Ok((coder, esc_char)) => {
235                    self.ppm_coder = Some(coder);
236                    self.ppm_esc_char = esc_char;
237                    #[cfg(test)]
238                    println!("PPMd initialized: esc_char={}", esc_char);
239                }
240                Err(e) => {
241                    #[cfg(test)]
242                    println!("PPMd init failed: {}", e);
243                    #[cfg(not(test))]
244                    let _ = e;
245                    return Err(DecompressError::UnsupportedMethod(0x33));
246                }
247            }
248        } else {
249            // LZ mode - reset low dist state (per unrar ReadTables30)
250            self.prev_low_offset = 0;
251            self.low_offset_repeat_count = 0;
252
253            // Check bit 1 (0x4000) for reset tables
254            let reset_tables = reader.peek_bits(2) & 1 == 0; // Bit 14 inverted (0 means reset)
255                                                             // Consume the 2 header bits (PPM flag + reset flag)
256            reader.advance_bits(2);
257
258            if reset_tables {
259                self.huffman.reset_tables();
260            }
261
262            // Read Huffman tables
263            self.huffman.read_tables_after_header(reader)?;
264        }
265
266        self.tables_read = true;
267        Ok(())
268    }
269
270    /// Decode a block of data.
271    fn decode_block(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
272        if self.ppm_mode {
273            return self.decode_block_ppm(reader, max_size);
274        }
275
276        // Validate tables exist
277        if self.huffman.main_table.is_none() || self.huffman.dist_table.is_none() {
278            return Err(DecompressError::InvalidHuffmanCode);
279        }
280
281        #[cfg(test)]
282        let mut symbol_count = 0;
283
284        while self.lzss.total_written() < max_size && !reader.is_eof() {
285            // Check if we need to execute pending VM filters
286            self.maybe_execute_filters();
287
288            // Decode main symbol
289            #[cfg(test)]
290            let bit_pos_main_start = reader.bit_position();
291            #[cfg(test)]
292            let peek_bits = reader.peek_bits(16);
293
294            // SAFETY: We validated main_table.is_some() above
295            let symbol = unsafe {
296                self.huffman
297                    .main_table
298                    .as_ref()
299                    .unwrap_unchecked()
300                    .decode(reader)?
301            };
302
303            #[cfg(test)]
304            {
305                let pos = self.lzss.total_written();
306                if pos >= 1498580 && pos <= 1498610 {
307                    let bit_pos_after = reader.bit_position();
308                    eprintln!(
309                        "MAIN sym={} at pos={}, bits {}->{}  peek={:016b}",
310                        symbol, pos, bit_pos_main_start, bit_pos_after, peek_bits
311                    );
312                }
313            }
314
315            if symbol < 256 {
316                // Literal byte — most common case, skip rest of dispatch
317                #[cfg(test)]
318                {
319                    let pos = self.lzss.total_written();
320                    if pos >= 1498595 && pos <= 1498610 {
321                        eprintln!("WRITING literal 0x{:02x} at output pos {}", symbol, pos);
322                    }
323                }
324                self.lzss.write_literal(symbol as u8);
325            } else if symbol == 256 {
326                // End of block / new tables
327                // From unrar ReadEndOfBlock:
328                // "1"  - no new file, new table just here.
329                // "00" - new file,    no new table.
330                // "01" - new file,    new table (in beginning of next file).
331                #[cfg(test)]
332                eprintln!(
333                    "\n=== SYMBOL 256 (end of block) at output pos {}, bit_pos {} ===",
334                    self.lzss.total_written(),
335                    reader.bit_position()
336                );
337                if !reader.is_eof() {
338                    let first_bit = reader.read_bit()?;
339                    #[cfg(test)]
340                    eprintln!(
341                        "  first_bit={}, bit_pos after={}",
342                        first_bit,
343                        reader.bit_position()
344                    );
345                    if first_bit {
346                        // "1" = new tables, continue decompression
347                        // Reset low dist state when reading new tables
348                        self.prev_low_offset = 0;
349                        self.low_offset_repeat_count = 0;
350                        // Call full read_tables which aligns to byte and reads header
351                        self.read_tables(reader)?;
352                        #[cfg(test)]
353                        {
354                            eprintln!(
355                                "After new tables: bit_pos={}, next 16 bits={:016b}",
356                                reader.bit_position(),
357                                reader.peek_bits(16)
358                            );
359                            eprintln!("  About to decode first symbol after table read");
360                        }
361                        // Continue decompressing - don't break!
362                        continue;
363                    }
364                    // "0x" = new file (end of this file's data)
365                    let _second_bit = reader.read_bit()?; // consume the second bit
366                                                          // Break out - we're done with this file
367                }
368                break;
369            } else if symbol == 257 {
370                // VM filter code - read and skip it
371                #[cfg(test)]
372                eprintln!(
373                    "\n=== SYMBOL 257 (VM code) at output pos {} ===",
374                    self.lzss.total_written()
375                );
376                self.read_vm_code(reader)?;
377            } else if symbol == 258 {
378                // Repeat last match
379                if self.last_len > 0 {
380                    #[cfg(test)]
381                    {
382                        let pos = self.lzss.total_written();
383                        let end = pos + self.last_len as u64;
384                        if pos <= 1498598 && end > 1498598 {
385                            eprintln!(
386                                "!!! AT 1498598: symbol 258 repeat, last_dist={}, last_len={}",
387                                self.last_dist, self.last_len
388                            );
389                        }
390                    }
391                    self.lzss.copy_match(self.last_dist, self.last_len)?;
392                }
393            } else if symbol < 263 {
394                // Use one of the old distances (symbols 259-262 = indices 0-3)
395                let idx = (symbol - 259) as usize;
396                let distance = self.old_dist[idx];
397
398                // Decode length using the length table
399                let length = self.decode_length_from_table(reader)?;
400
401                #[cfg(test)]
402                {
403                    let written = self.lzss.total_written();
404                    let end = written + length as u64;
405                    if written <= 1498598 && end > 1498598 {
406                        eprintln!(
407                            "!!! AT 1498598: old idx={},len={},dist={}",
408                            idx, length, distance
409                        );
410                    }
411                }
412
413                self.lzss.copy_match(distance, length)?;
414
415                // Shift old distances: move entries 0..idx up by 1, put this at 0
416                for i in (1..=idx).rev() {
417                    self.old_dist[i] = self.old_dist[i - 1];
418                }
419                self.old_dist[0] = distance;
420                self.last_dist = distance;
421                self.last_len = length;
422            } else if symbol <= 270 {
423                // Short match (symbols 263-270): fixed length=2, short distance
424                let idx = (symbol - 263) as usize;
425                let base = SHORT_BASES[idx];
426                let bits = SHORT_BITS[idx];
427                let extra = if bits > 0 {
428                    reader.read_bits(bits as u32)?
429                } else {
430                    0
431                };
432                let distance = base + extra + 1;
433                let length = 2u32;
434
435                #[cfg(test)]
436                {
437                    let written = self.lzss.total_written();
438                    let end = written + length as u64;
439                    if written <= 1498598 && end > 1498598 {
440                        eprintln!(
441                            "!!! AT 1498598: short sym={}, idx={}, base={}, bits={}, extra={}, dist={}",
442                            symbol, idx, base, bits, extra, distance
443                        );
444                    }
445                }
446
447                self.lzss.copy_match(distance, length)?;
448
449                // Shift old distances
450                for i in (1..4).rev() {
451                    self.old_dist[i] = self.old_dist[i - 1];
452                }
453                self.old_dist[0] = distance;
454                self.old_dist_ptr = 0;
455                self.last_dist = distance;
456                self.last_len = length;
457            } else {
458                // Long match (symbols 271-298): length from main symbol, distance from offset table
459                #[cfg(test)]
460                let bit_before_len = reader.bit_position();
461
462                let len_idx = (symbol - 271) as usize;
463                let length = if len_idx < LENGTH_BASE.len() {
464                    let base = LENGTH_BASE[len_idx];
465                    let extra = LENGTH_EXTRA[len_idx];
466                    let extra_val = if extra > 0 {
467                        reader.read_bits(extra as u32)?
468                    } else {
469                        0
470                    };
471                    #[cfg(test)]
472                    {
473                        let written = self.lzss.total_written();
474                        if written >= 1498595 && written <= 1498602 {
475                            let bit_after_len = reader.bit_position();
476                            eprintln!(
477                                "!!! LONG DECODE at {}: sym={}, len_idx={}, len={}, bits {}->{}]",
478                                written,
479                                symbol,
480                                len_idx,
481                                base + extra_val + 3,
482                                bit_before_len,
483                                bit_after_len
484                            );
485                        }
486                    }
487                    base + extra_val + 3 // +3 because minimum match length for long matches is 3
488                } else {
489                    #[cfg(test)]
490                    eprintln!(
491                        "\nlen_idx {} out of range at written={}",
492                        len_idx,
493                        self.lzss.total_written()
494                    );
495                    return Err(DecompressError::InvalidHuffmanCode);
496                };
497
498                // Decode distance from offset table
499                let dist_symbol = {
500                    #[cfg(test)]
501                    let bit_pos_before = reader.bit_position();
502
503                    // SAFETY: We validated dist_table.is_some() at function start
504                    let dist_table = unsafe { self.huffman.dist_table.as_ref().unwrap_unchecked() };
505                    match dist_table.decode(reader) {
506                        Ok(s) => {
507                            #[cfg(test)]
508                            {
509                                let written = self.lzss.total_written();
510                                if written >= 1498595 && written <= 1498610 {
511                                    let bit_pos_after = reader.bit_position();
512                                    eprintln!(
513                                        "  dist_symbol={} at pos {} (bits {}->{})",
514                                        s, written, bit_pos_before, bit_pos_after
515                                    );
516                                }
517                            }
518                            s
519                        }
520                        Err(e) => {
521                            #[cfg(test)]
522                            eprintln!(
523                                "\nOffset decode failed at written={}, len={}",
524                                self.lzss.total_written(),
525                                length
526                            );
527                            return Err(e);
528                        }
529                    }
530                };
531
532                let dist_code = dist_symbol as usize;
533                let distance = if dist_code < DIST_BASE.len() {
534                    let base = DIST_BASE[dist_code];
535                    let extra = DIST_EXTRA[dist_code];
536
537                    let extra_val = if extra > 0 {
538                        if dist_code > 9 {
539                            // For dist_code > 9, use low offset table
540                            // First read high bits if extra > 4
541                            let high = if extra > 4 {
542                                #[cfg(test)]
543                                let high_bit_pos = reader.bit_position();
544                                let h = reader.read_bits((extra - 4) as u32)?;
545                                #[cfg(test)]
546                                {
547                                    let written = self.lzss.total_written();
548                                    if (written >= 1498595 && written <= 1498610)
549                                        || (written >= 2176060 && written <= 2176080)
550                                    {
551                                        eprintln!(
552                                            "    high bits at {}: {} bits = {} (0b{:016b}), pos {}->{}",
553                                            written,
554                                            extra - 4,
555                                            h, h,
556                                            high_bit_pos,
557                                            reader.bit_position()
558                                        );
559                                    }
560                                }
561                                h << 4
562                            } else {
563                                0
564                            };
565                            // Then decode low offset (0-15 or 16 for repeat)
566                            let low = if self.low_offset_repeat_count > 0 {
567                                self.low_offset_repeat_count -= 1;
568                                #[cfg(test)]
569                                {
570                                    let written = self.lzss.total_written();
571                                    if written >= 1498550 && written <= 1498610 {
572                                        eprintln!(
573                                            "!!! low_offset REPEAT at {}: prev={}",
574                                            written, self.prev_low_offset
575                                        );
576                                    }
577                                }
578                                self.prev_low_offset
579                            } else {
580                                #[cfg(test)]
581                                let bit_pos_before = reader.bit_position();
582                                #[cfg(test)]
583                                let raw_bits_16 = reader.peek_bits(16);
584                                // SAFETY: low_dist_table is always initialized when we reach here
585                                let low_table = unsafe {
586                                    self.huffman.low_dist_table.as_ref().unwrap_unchecked()
587                                };
588                                #[cfg(test)]
589                                {
590                                    let written = self.lzss.total_written();
591                                    if written == 1498598 {
592                                        // Dump the decode_len array and symbols
593                                        eprintln!(
594                                            "!!! LOW_TABLE at 1498598 decode_len: {:?}",
595                                            low_table.dump_decode_len()
596                                        );
597                                        eprintln!(
598                                            "!!! LOW_TABLE at 1498598 symbols: {:?}",
599                                            low_table.dump_symbols()
600                                        );
601                                    }
602                                }
603                                let sym = low_table.decode(reader)? as u32;
604                                #[cfg(test)]
605                                {
606                                    let written = self.lzss.total_written();
607                                    if written >= 1498550 && written <= 1498610 {
608                                        let bit_pos_after = reader.bit_position();
609                                        eprintln!("!!! low_offset at {}: sym={} (bits {}->{}), raw peek = {:016b}", 
610                                            written, sym, bit_pos_before, bit_pos_after, raw_bits_16);
611                                    }
612                                }
613
614                                if sym == 16 {
615                                    // Repeat previous low offset - total 16 uses (this one + 15 more)
616                                    // unrar: LowDistRepCount=LOW_DIST_REP_COUNT-1 where LOW_DIST_REP_COUNT=16
617                                    self.low_offset_repeat_count = 16 - 1; // 15 more uses after this one
618                                    self.prev_low_offset
619                                } else {
620                                    self.prev_low_offset = sym;
621                                    sym
622                                }
623                            };
624                            #[cfg(test)]
625                            {
626                                let written = self.lzss.total_written();
627                                if written >= 2176060 && written <= 2176080 {
628                                    if self.low_offset_repeat_count > 0 {
629                                        eprintln!(
630                                            "  low_offset REPEAT at {}: prev={}, remaining={}",
631                                            written,
632                                            self.prev_low_offset,
633                                            self.low_offset_repeat_count
634                                        );
635                                    } else {
636                                        eprintln!("  low_offset at {}: dist_code={}, base={}, extra={}, high={}, low={}, dist={}", 
637                                            written, dist_code, base, extra, high, low, base + high + low + 1);
638                                    }
639                                }
640                            }
641                            high + low
642                        } else {
643                            // For dist_code <= 9, read extra bits directly
644                            #[cfg(test)]
645                            let peek = reader.peek_bits(extra as u32);
646                            let val = reader.read_bits(extra as u32)?;
647                            #[cfg(test)]
648                            {
649                                let written = self.lzss.total_written();
650                                if written >= 0 && written < 0 {
651                                    eprintln!("  direct: dist_code={}, base={}, extra_bits={}, peek={:04b}, extra_val={}, distance={}", 
652                                        dist_code, base, extra, peek, val, base + val + 1);
653                                }
654                            }
655                            val
656                        }
657                    } else {
658                        0
659                    };
660                    base + extra_val + 1
661                } else {
662                    #[cfg(test)]
663                    eprintln!(
664                        "\ndist_code {} out of range at written={}",
665                        dist_code,
666                        self.lzss.total_written()
667                    );
668                    return Err(DecompressError::InvalidHuffmanCode);
669                };
670
671                // Length bonus for long distances (RAR3 specific)
672                // Per unrar: if (Distance>=0x2000) { Length++; if (Distance>=0x40000) Length++; }
673                let length = if distance >= 0x2000 {
674                    if distance >= 0x40000 {
675                        length + 2
676                    } else {
677                        length + 1
678                    }
679                } else {
680                    length
681                };
682
683                #[cfg(test)]
684                {
685                    let written = self.lzss.total_written();
686                    let end = written + length as u64;
687                    if written <= 1498598 && end > 1498598 {
688                        eprintln!(
689                            "!!! AT 1498598: long match dist={}, len={}",
690                            distance, length
691                        );
692                        // Check what's in the window at source position
693                        let src_pos = (written as u32).wrapping_sub(distance) as usize;
694                        let mask = self.lzss.window_mask() as usize;
695                        let window = self.lzss.window();
696                        eprintln!(
697                            "  window src[{}..{}]: {:02x?}",
698                            src_pos,
699                            src_pos + length as usize,
700                            &window[src_pos..src_pos + length as usize]
701                        );
702                    }
703                    if written >= 1498595 && written <= 1498602 {
704                        eprintln!(
705                            "LONG MATCH at {}: dist={}, len={}",
706                            written, distance, length
707                        );
708                    }
709                }
710
711                self.lzss.copy_match(distance, length)?;
712
713                // Shift old distances
714                for i in (1..4).rev() {
715                    self.old_dist[i] = self.old_dist[i - 1];
716                }
717                self.old_dist[0] = distance;
718                self.old_dist_ptr = 0;
719                self.last_dist = distance;
720                self.last_len = length;
721            }
722        }
723
724        Ok(())
725    }
726
727    /// Decode a length value using the length table.
728    fn decode_length_from_table(&mut self, reader: &mut BitReader) -> Result<u32> {
729        let symbol = {
730            let len_table = self
731                .huffman
732                .len_table
733                .as_ref()
734                .ok_or(DecompressError::InvalidHuffmanCode)?;
735            len_table.decode(reader)?
736        };
737
738        let sym = symbol as usize;
739        if sym < LENGTH_BASE.len() {
740            let base = LENGTH_BASE[sym];
741            let extra = LENGTH_EXTRA[sym];
742            let extra_val = if extra > 0 {
743                reader.read_bits(extra as u32)?
744            } else {
745                0
746            };
747            Ok(base + extra_val + 2)
748        } else {
749            Err(DecompressError::InvalidHuffmanCode)
750        }
751    }
752
753    /// Read VM filter code from bit stream (for LZ mode, symbol 257).
754    /// We read the VM code and register it with the VM for later execution.
755    #[cold]
756    fn read_vm_code(&mut self, reader: &mut BitReader) -> Result<()> {
757        #[cfg(test)]
758        let bit_pos_start = reader.bit_position();
759
760        // Read first byte
761        let first_byte = reader.read_bits(8)? as u8;
762
763        // Calculate length based on unrar's ReadVMCode logic:
764        // Length = (FirstByte & 7) + 1
765        // if Length == 7, read another byte and add 7
766        // if Length == 8, read 16 bits as length
767        let length = {
768            let base = (first_byte & 7) + 1;
769            match base {
770                7 => {
771                    // Read one more byte, add 7
772                    let next = reader.read_bits(8)? as u32;
773                    next + 7
774                }
775                8 => {
776                    // Read 16 bits as length
777                    reader.read_bits(16)?
778                }
779                _ => base as u32,
780            }
781        };
782
783        #[cfg(test)]
784        eprintln!(
785            "  read_vm_code: first_byte=0x{:02x}, length={}, bit_pos_start={}",
786            first_byte, length, bit_pos_start
787        );
788
789        if length == 0 {
790            return Ok(());
791        }
792
793        // Read VM code bytes
794        let mut vm_code = vec![0u8; length as usize];
795        for i in 0..length as usize {
796            vm_code[i] = reader.read_bits(8)? as u8;
797        }
798
799        #[cfg(test)]
800        eprintln!("    vm_code end bit_pos={}", reader.bit_position());
801
802        // Add to VM for later execution - use absolute total_written, not wrapped window position
803        let total_written = self.lzss.total_written();
804        let window_mask = self.lzss.window_mask();
805
806        #[cfg(test)]
807        eprintln!(
808            "    add_code: total_written={}, window_mask={:x}",
809            total_written, window_mask
810        );
811
812        #[cfg(test)]
813        {
814            let had_pending_before = self.vm.has_pending_filters();
815            let result = self
816                .vm
817                .add_code(first_byte, &vm_code, total_written, window_mask);
818            let has_pending_after = self.vm.has_pending_filters();
819            if let Some(next_pos) = self.vm.next_filter_pos() {
820                eprintln!(
821                    "    vm.add_code: added={}, pending={}->{}, next_pos={}",
822                    result, had_pending_before, has_pending_after, next_pos
823                );
824            } else {
825                eprintln!(
826                    "    vm.add_code: added={}, pending={}->{}, next_pos=NONE",
827                    result, had_pending_before, has_pending_after
828                );
829            }
830        }
831        #[cfg(not(test))]
832        self.vm
833            .add_code(first_byte, &vm_code, total_written, window_mask);
834
835        // Update next_filter_check when a filter is added
836        if let Some(end) = self.vm.next_filter_end() {
837            self.next_filter_check = self.next_filter_check.min(end);
838        }
839
840        Ok(())
841    }
842
843    /// Execute pending VM filters if we've reached their block_start position.
844    /// Applies filters to window data, writes filtered output directly to output buffer.
845    #[inline]
846    fn maybe_execute_filters(&mut self) {
847        let total_written = self.lzss.total_written();
848
849        // Fast path: skip if we haven't reached the next filter check position
850        if total_written < self.next_filter_check {
851            return;
852        }
853
854        let window_mask = self.lzss.window_mask() as usize;
855
856        // Execute filters that are ready, in order of their block_start position
857        loop {
858            // Find the earliest filter that is ready to execute
859            let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
860                Some((idx, pos)) => (idx, pos),
861                None => break,
862            };
863
864            // Flush up to filter start first (unfiltered data before this filter)
865            let flushed = self.lzss.flushed_pos();
866            if flushed < next_pos {
867                self.lzss.flush_to_output(next_pos);
868            }
869
870            // Execute the filter on the window (read-only) and get filtered output
871            let window = self.lzss.window();
872            if let Some((filter_end, filtered_data)) =
873                self.vm
874                    .execute_filter_at_index(filter_idx, window, window_mask, total_written)
875            {
876                // Write filtered data directly to output (bypasses window)
877                self.lzss.write_filtered_to_output(filtered_data, next_pos);
878                // Update next check to after this filter
879                self.next_filter_check = filter_end;
880            } else {
881                break;
882            }
883        }
884
885        // Update next_filter_check based on remaining filters
886        self.next_filter_check = self.vm.next_filter_end().unwrap_or(u64::MAX);
887    }
888
889    /// Decode a block using PPMd.
890    fn decode_block_ppm(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
891        let ppm = self
892            .ppm
893            .as_mut()
894            .ok_or(DecompressError::UnsupportedMethod(0x33))?;
895        let coder = self
896            .ppm_coder
897            .as_mut()
898            .ok_or(DecompressError::UnsupportedMethod(0x33))?;
899        let esc_char = self.ppm_esc_char;
900
901        while self.lzss.total_written() < max_size && !reader.is_eof() {
902            let ch = ppm.decode_char(coder, reader).map_err(|e| {
903                #[cfg(test)]
904                eprintln!(
905                    "PPM decode_char failed at pos {}: {}",
906                    self.lzss.total_written(),
907                    e
908                );
909                #[cfg(not(test))]
910                let _ = e;
911                DecompressError::InvalidHuffmanCode
912            })?;
913
914            if ch < 0 {
915                // Decode error
916                #[cfg(test)]
917                eprintln!("PPM decode_char returned negative: {}", ch);
918                return Err(DecompressError::InvalidHuffmanCode);
919            }
920
921            #[cfg(test)]
922            {
923                if self.lzss.total_written() < 20 {
924                    eprint!("[{}:{}] ", self.lzss.total_written(), ch);
925                }
926            }
927
928            if ch != esc_char {
929                // Regular character
930                self.lzss.write_literal(ch as u8);
931            } else {
932                // Escape sequence - decode control code
933                let ctrl = ppm
934                    .decode_char(coder, reader)
935                    .map_err(|_| DecompressError::InvalidHuffmanCode)?;
936
937                if ctrl < 0 {
938                    return Err(DecompressError::InvalidHuffmanCode);
939                }
940
941                match ctrl {
942                    0 => {
943                        // Should not happen (NextCh starts at 0)
944                        break;
945                    }
946                    1 => {
947                        // Write escape character itself
948                        self.lzss.write_literal(esc_char as u8);
949                    }
950                    2 => {
951                        // End of PPM block
952                        break;
953                    }
954                    3 => {
955                        // VM code - read and add to VM
956                        let first_byte = ppm
957                            .decode_char(coder, reader)
958                            .map_err(|_| DecompressError::InvalidHuffmanCode)?
959                            as u8;
960
961                        // Decode length from first byte
962                        let mut length = ((first_byte & 7) + 1) as u32;
963                        if length == 7 {
964                            let b1 = ppm
965                                .decode_char(coder, reader)
966                                .map_err(|_| DecompressError::InvalidHuffmanCode)?;
967                            length = (b1 as u32) + 7;
968                        } else if length == 8 {
969                            let b1 = ppm
970                                .decode_char(coder, reader)
971                                .map_err(|_| DecompressError::InvalidHuffmanCode)?;
972                            let b2 = ppm
973                                .decode_char(coder, reader)
974                                .map_err(|_| DecompressError::InvalidHuffmanCode)?;
975                            length = (b1 as u32) * 256 + (b2 as u32);
976                        }
977
978                        if length == 0 {
979                            continue;
980                        }
981
982                        // Read VM code bytes
983                        let mut vm_code = vec![0u8; length as usize];
984                        for i in 0..length as usize {
985                            let ch = ppm
986                                .decode_char(coder, reader)
987                                .map_err(|_| DecompressError::InvalidHuffmanCode)?;
988                            vm_code[i] = ch as u8;
989                        }
990
991                        // Add to VM
992                        let total_written = self.lzss.total_written();
993                        let window_mask = self.lzss.window_mask();
994                        self.vm
995                            .add_code(first_byte, &vm_code, total_written, window_mask);
996
997                        // Update next_filter_check when a filter is added
998                        if let Some(end) = self.vm.next_filter_end() {
999                            self.next_filter_check = self.next_filter_check.min(end);
1000                        }
1001                    }
1002                    4 => {
1003                        // LZ match: 3 bytes distance (MSB first), 1 byte length
1004                        let mut distance: u32 = 0;
1005                        for _ in 0..3 {
1006                            let ch = ppm
1007                                .decode_char(coder, reader)
1008                                .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1009                            distance = (distance << 8) + (ch as u32);
1010                        }
1011                        let len = ppm
1012                            .decode_char(coder, reader)
1013                            .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1014
1015                        // Distance+2, Length+32
1016                        let distance = distance + 2;
1017                        let length = (len as u32) + 32;
1018
1019                        self.lzss.copy_match(distance, length)?;
1020                        self.last_dist = distance;
1021                        self.last_len = length;
1022                    }
1023                    5 => {
1024                        // RLE match: 1 byte length, distance = 1
1025                        let len = ppm
1026                            .decode_char(coder, reader)
1027                            .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1028
1029                        // Length+4, Distance=1
1030                        let length = (len as u32) + 4;
1031
1032                        self.lzss.copy_match(1, length)?;
1033                        self.last_dist = 1;
1034                        self.last_len = length;
1035                    }
1036                    _ => {
1037                        // Unknown control code - likely corruption
1038                        #[cfg(test)]
1039                        eprintln!("Unknown PPM control code: {}", ctrl);
1040                        return Err(DecompressError::InvalidHuffmanCode);
1041                    }
1042                }
1043            }
1044        }
1045
1046        Ok(())
1047    }
1048
1049    /// Reset the decoder state for a new file.
1050    pub fn reset(&mut self) {
1051        self.lzss.reset();
1052        self.vm.reset();
1053        // Keep ppm model for reuse (SubAllocator reuses buffer if same size)
1054        self.ppm_coder = None;
1055        self.ppm_esc_char = -1;
1056        self.old_dist = [0; 4];
1057        self.old_dist_ptr = 0;
1058        self.last_dist = 0;
1059        self.last_len = 0;
1060        self.ppm_mode = false;
1061        self.tables_read = false;
1062        self.prev_low_offset = 0;
1063        self.low_offset_repeat_count = 0;
1064        self.next_filter_check = u64::MAX;
1065    }
1066
1067    /// Get total bytes decompressed.
1068    pub fn bytes_written(&self) -> u64 {
1069        self.lzss.total_written()
1070    }
1071}
1072
1073impl Default for Rar29Decoder {
1074    fn default() -> Self {
1075        Self::new()
1076    }
1077}
1078
1079// WIP: streaming decoder
1080/// Streaming decompressor for RAR29.
1081/// Allows decompressing chunks at a time.
1082#[allow(dead_code)]
1083pub struct Rar29StreamDecoder {
1084    decoder: Rar29Decoder,
1085    /// Accumulated compressed data
1086    input_buffer: Vec<u8>,
1087    /// Current position in input buffer
1088    input_pos: usize,
1089    /// Total expected unpacked size
1090    unpacked_size: u64,
1091}
1092
1093#[allow(dead_code)]
1094impl Rar29StreamDecoder {
1095    /// Create a new streaming decoder.
1096    pub fn new(unpacked_size: u64) -> Self {
1097        Self {
1098            decoder: Rar29Decoder::new(),
1099            input_buffer: Vec::new(),
1100            input_pos: 0,
1101            unpacked_size,
1102        }
1103    }
1104
1105    /// Feed compressed data to the decoder.
1106    /// Returns decompressed data available so far.
1107    pub fn feed(&mut self, data: &[u8]) -> Result<Vec<u8>> {
1108        self.input_buffer.extend_from_slice(data);
1109
1110        // Try to decompress with available data
1111        let result = self
1112            .decoder
1113            .decompress(&self.input_buffer[self.input_pos..], self.unpacked_size)?;
1114
1115        Ok(result)
1116    }
1117
1118    /// Check if decompression is complete.
1119    pub fn is_complete(&self) -> bool {
1120        self.decoder.bytes_written() >= self.unpacked_size
1121    }
1122
1123    /// Get total bytes decompressed.
1124    pub fn bytes_written(&self) -> u64 {
1125        self.decoder.bytes_written()
1126    }
1127}
1128
1129#[cfg(test)]
1130mod tests {
1131    use super::*;
1132
1133    #[test]
1134    fn test_decoder_creation() {
1135        let decoder = Rar29Decoder::new();
1136        assert_eq!(decoder.bytes_written(), 0);
1137        assert!(!decoder.tables_read);
1138    }
1139
1140    // More tests would require actual RAR compressed data
1141}