Skip to main content

rar_stream/decompress/
rar29.rs

1//! RAR 2.9 (RAR4) decompression.
2//!
3//! Implements the LZSS + Huffman decompression used in RAR versions 2.x-4.x.
4//! This is the most common format for scene releases.
5
6// Allow disabled debug blocks in test code (written >= 0 && written < 0 is intentionally false)
7#![cfg_attr(test, allow(clippy::logic_bug))]
8
9use super::{
10    bit_reader::BitReader,
11    huffman::HuffmanDecoder,
12    lzss::LzssDecoder,
13    ppm::{PpmModel, RangeCoder},
14    vm::RarVM,
15    DecompressError, Result,
16};
17
18#[allow(dead_code)]
19/// Number of main codes (literals + length symbols).
20const MAIN_CODES: usize = 299;
21
22#[allow(dead_code)]
23/// Number of distance codes.
24const DIST_CODES: usize = 60;
25
26#[allow(dead_code)]
27/// Number of low distance codes.
28const LOW_DIST_CODES: usize = 17;
29
30#[allow(dead_code)]
31/// Number of length codes.
32const LEN_CODES: usize = 28;
33
34#[allow(dead_code)]
35/// Maximum match length.
36const MAX_MATCH_LEN: u32 = 258;
37
38/// Short distance bases for symbols 263-270.
39const SHORT_BASES: [u32; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
40
41/// Short distance extra bits for symbols 263-270.
42const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
43
44/// Base lengths for length codes.
45const LENGTH_BASE: [u32; 28] = [
46    0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
47    160, 192, 224,
48];
49
50/// Extra bits for length codes.
51const LENGTH_EXTRA: [u8; 28] = [
52    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
53];
54
55/// Base distances for distance codes (48 entries for RAR3).
56const DIST_BASE: [u32; 60] = [
57    0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
58    2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
59    262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
60    1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872,
61    3670016, 3932160,
62];
63
64/// Extra bits for distance codes (60 entries for RAR3).
65const DIST_EXTRA: [u8; 60] = [
66    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
67    13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18,
68    18, 18, 18, 18, 18, 18, 18,
69];
70
71/// RAR 2.9 (RAR4) decoder state.
72///
73/// Handles LZSS + Huffman decompression with PPMd fallback and VM-based
74/// filters for RAR 1.5–4.x archives.
75///
76/// # Example
77///
78/// ```
79/// use rar_stream::Rar29Decoder;
80///
81/// let mut decoder = Rar29Decoder::new();
82/// // decoder.decompress(&compressed_data, expected_size) to decompress
83/// ```
84pub struct Rar29Decoder {
85    /// LZSS sliding window
86    lzss: LzssDecoder,
87    /// Huffman decoder
88    huffman: HuffmanDecoder,
89    /// VM for filter execution
90    vm: RarVM,
91    /// PPMd model (used when ppm_mode is true)
92    ppm: Option<PpmModel>,
93    /// PPMd range coder (used when ppm_mode is true)
94    ppm_coder: Option<RangeCoder>,
95    /// PPMd escape character
96    ppm_esc_char: i32,
97    /// Previous distances for repeat matches
98    old_dist: [u32; 4],
99    /// Current distance history index
100    old_dist_ptr: usize,
101    /// Last distance used
102    last_dist: u32,
103    /// Last length used
104    last_len: u32,
105    /// PPMd mode flag
106    ppm_mode: bool,
107    /// Tables need reading
108    tables_read: bool,
109    /// Previous low offset value for repeat
110    prev_low_offset: u32,
111    /// Low offset repeat counter
112    low_offset_repeat_count: u32,
113    /// Next position where we need to check filters (optimization to avoid O(n) scan)
114    next_filter_check: u64,
115}
116
117impl Rar29Decoder {
118    /// Create a new RAR29 decoder with default window size (4MB).
119    pub fn new() -> Self {
120        Self::with_window_size(0x400000) // 4MB default (max common size)
121    }
122
123    /// Create a new RAR29 decoder with specified window size.
124    /// Window size must be a power of 2.
125    pub fn with_window_size(window_size: usize) -> Self {
126        Self {
127            lzss: LzssDecoder::new(window_size),
128            huffman: HuffmanDecoder::new(),
129            vm: RarVM::new(),
130            ppm: None,
131            ppm_coder: None,
132            ppm_esc_char: -1,
133            old_dist: [0; 4],
134            old_dist_ptr: 0,
135            last_dist: 0,
136            last_len: 0,
137            ppm_mode: false,
138            tables_read: false,
139            prev_low_offset: 0,
140            low_offset_repeat_count: 0,
141            next_filter_check: u64::MAX,
142        }
143    }
144
145    /// Get partial output (for debugging failed decompression)
146    #[cfg(test)]
147    pub fn get_output(&self) -> Vec<u8> {
148        self.lzss.output().to_vec()
149    }
150
151    /// Decompress a block of data.
152    /// Returns the decompressed data.
153    pub fn decompress(&mut self, data: &[u8], unpacked_size: u64) -> Result<Vec<u8>> {
154        let mut reader = BitReader::new(data);
155
156        // Enable output accumulation for files (especially those larger than window)
157        self.lzss.enable_output(unpacked_size as usize);
158
159        // Read tables if needed
160        if !self.tables_read {
161            self.read_tables(&mut reader)?;
162        }
163
164        // Decompress until we have enough data
165        while self.lzss.total_written() < unpacked_size {
166            if reader.is_eof() {
167                break;
168            }
169
170            self.decode_block(&mut reader, unpacked_size)?;
171        }
172
173        // Execute any remaining pending VM filters
174        let total_written = self.lzss.total_written();
175        let window_mask = self.lzss.window_mask() as usize;
176
177        // Execute filters in order of their block_start position
178        loop {
179            // Find the earliest filter that is ready
180            let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
181                Some((idx, pos)) => (idx, pos),
182                None => break,
183            };
184
185            // Flush up to filter start
186            let flushed = self.lzss.flushed_pos();
187            if flushed < next_pos {
188                self.lzss.flush_to_output(next_pos);
189            }
190
191            let window = self.lzss.window();
192            if let Some((_filter_end, filtered_data)) =
193                self.vm
194                    .execute_filter_at_index(filter_idx, window, window_mask, total_written)
195            {
196                // Write filtered data directly to output
197                self.lzss.write_filtered_to_output(filtered_data, next_pos);
198            } else {
199                break;
200            }
201        }
202
203        // Flush any remaining data to output
204        self.lzss.flush_to_output(total_written);
205
206        // Extract the decompressed data
207        Ok(self.lzss.take_output())
208    }
209
210    /// Read Huffman tables from the bit stream.
211    fn read_tables(&mut self, reader: &mut BitReader) -> Result<()> {
212        #[cfg(test)]
213        {
214            let byte_pos = reader.bit_position() / 8;
215            eprintln!(
216                "read_tables ENTRY: bit_pos={}, byte_pos={}",
217                reader.bit_position(),
218                byte_pos
219            );
220            eprintln!("  raw bytes at pos: {:02x?}", reader.peek_bytes(8));
221        }
222        // Align to byte boundary (like unrar)
223        reader.align_to_byte();
224        #[cfg(test)]
225        {
226            let byte_pos = reader.bit_position() / 8;
227            eprintln!(
228                "read_tables AFTER align: bit_pos={}, byte_pos={}",
229                reader.bit_position(),
230                byte_pos
231            );
232            eprintln!("  raw bytes at pos: {:02x?}", reader.peek_bytes(8));
233        };
234
235        // Peek at the high bit to check for PPM mode
236        // In unrar, this is done by peeking 16 bits and checking bit 15
237        let ppm_flag = reader.peek_bits(1) != 0;
238
239        self.ppm_mode = ppm_flag;
240
241        if self.ppm_mode {
242            // DON'T consume the PPM flag bit - it's part of the MaxOrder byte
243            // Initialize or reuse PPMd model
244            let ppm = self.ppm.get_or_insert_with(PpmModel::new);
245            match ppm.init(reader) {
246                Ok((coder, esc_char)) => {
247                    self.ppm_coder = Some(coder);
248                    self.ppm_esc_char = esc_char;
249                    #[cfg(test)]
250                    println!("PPMd initialized: esc_char={}", esc_char);
251                }
252                Err(e) => {
253                    #[cfg(test)]
254                    println!("PPMd init failed: {}", e);
255                    #[cfg(not(test))]
256                    let _ = e;
257                    return Err(DecompressError::UnsupportedMethod(0x33));
258                }
259            }
260        } else {
261            // LZ mode - reset low dist state (per unrar ReadTables30)
262            self.prev_low_offset = 0;
263            self.low_offset_repeat_count = 0;
264
265            // Check bit 1 (0x4000) for reset tables
266            let reset_tables = reader.peek_bits(2) & 1 == 0; // Bit 14 inverted (0 means reset)
267                                                             // Consume the 2 header bits (PPM flag + reset flag)
268            reader.advance_bits(2);
269
270            if reset_tables {
271                self.huffman.reset_tables();
272            }
273
274            // Read Huffman tables
275            self.huffman.read_tables_after_header(reader)?;
276        }
277
278        self.tables_read = true;
279        Ok(())
280    }
281
282    /// Decode a block of data.
283    fn decode_block(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
284        if self.ppm_mode {
285            return self.decode_block_ppm(reader, max_size);
286        }
287
288        // Validate tables exist
289        if self.huffman.main_table.is_none() || self.huffman.dist_table.is_none() {
290            return Err(DecompressError::InvalidHuffmanCode);
291        }
292
293        while self.lzss.total_written() < max_size && !reader.is_eof() {
294            // Check if we need to execute pending VM filters
295            self.maybe_execute_filters();
296
297            // Decode main symbol
298            #[cfg(test)]
299            let bit_pos_main_start = reader.bit_position();
300            #[cfg(test)]
301            let peek_bits = reader.peek_bits(16);
302
303            // SAFETY: We validated main_table.is_some() above
304            let symbol = unsafe {
305                self.huffman
306                    .main_table
307                    .as_ref()
308                    .unwrap_unchecked()
309                    .decode(reader)?
310            };
311
312            #[cfg(test)]
313            {
314                let pos = self.lzss.total_written();
315                if pos >= 1498580 && pos <= 1498610 {
316                    let bit_pos_after = reader.bit_position();
317                    eprintln!(
318                        "MAIN sym={} at pos={}, bits {}->{}  peek={:016b}",
319                        symbol, pos, bit_pos_main_start, bit_pos_after, peek_bits
320                    );
321                }
322            }
323
324            if symbol < 256 {
325                // Literal byte — most common case, skip rest of dispatch
326                #[cfg(test)]
327                {
328                    let pos = self.lzss.total_written();
329                    if pos >= 1498595 && pos <= 1498610 {
330                        eprintln!("WRITING literal 0x{:02x} at output pos {}", symbol, pos);
331                    }
332                }
333                self.lzss.write_literal(symbol as u8);
334            } else if symbol == 256 {
335                // End of block / new tables
336                // From unrar ReadEndOfBlock:
337                // "1"  - no new file, new table just here.
338                // "00" - new file,    no new table.
339                // "01" - new file,    new table (in beginning of next file).
340                #[cfg(test)]
341                eprintln!(
342                    "\n=== SYMBOL 256 (end of block) at output pos {}, bit_pos {} ===",
343                    self.lzss.total_written(),
344                    reader.bit_position()
345                );
346                if !reader.is_eof() {
347                    let first_bit = reader.read_bit()?;
348                    #[cfg(test)]
349                    eprintln!(
350                        "  first_bit={}, bit_pos after={}",
351                        first_bit,
352                        reader.bit_position()
353                    );
354                    if first_bit {
355                        // "1" = new tables, continue decompression
356                        // Reset low dist state when reading new tables
357                        self.prev_low_offset = 0;
358                        self.low_offset_repeat_count = 0;
359                        // Call full read_tables which aligns to byte and reads header
360                        self.read_tables(reader)?;
361                        #[cfg(test)]
362                        {
363                            eprintln!(
364                                "After new tables: bit_pos={}, next 16 bits={:016b}",
365                                reader.bit_position(),
366                                reader.peek_bits(16)
367                            );
368                            eprintln!("  About to decode first symbol after table read");
369                        }
370                        // Continue decompressing - don't break!
371                        continue;
372                    }
373                    // "0x" = new file (end of this file's data)
374                    let _second_bit = reader.read_bit()?; // consume the second bit
375                                                          // Break out - we're done with this file
376                }
377                break;
378            } else if symbol == 257 {
379                // VM filter code - read and skip it
380                #[cfg(test)]
381                eprintln!(
382                    "\n=== SYMBOL 257 (VM code) at output pos {} ===",
383                    self.lzss.total_written()
384                );
385                self.read_vm_code(reader)?;
386            } else if symbol == 258 {
387                // Repeat last match
388                if self.last_len > 0 {
389                    #[cfg(test)]
390                    {
391                        let pos = self.lzss.total_written();
392                        let end = pos + self.last_len as u64;
393                        if pos <= 1498598 && end > 1498598 {
394                            eprintln!(
395                                "!!! AT 1498598: symbol 258 repeat, last_dist={}, last_len={}",
396                                self.last_dist, self.last_len
397                            );
398                        }
399                    }
400                    self.lzss.copy_match(self.last_dist, self.last_len)?;
401                }
402            } else if symbol < 263 {
403                // Use one of the old distances (symbols 259-262 = indices 0-3)
404                let idx = (symbol - 259) as usize;
405                let distance = self.old_dist[idx];
406
407                // Decode length using the length table
408                let length = self.decode_length_from_table(reader)?;
409
410                #[cfg(test)]
411                {
412                    let written = self.lzss.total_written();
413                    let end = written + length as u64;
414                    if written <= 1498598 && end > 1498598 {
415                        eprintln!(
416                            "!!! AT 1498598: old idx={},len={},dist={}",
417                            idx, length, distance
418                        );
419                    }
420                }
421
422                self.lzss.copy_match(distance, length)?;
423
424                // Shift old distances: move entries 0..idx up by 1, put this at 0
425                for i in (1..=idx).rev() {
426                    self.old_dist[i] = self.old_dist[i - 1];
427                }
428                self.old_dist[0] = distance;
429                self.last_dist = distance;
430                self.last_len = length;
431            } else if symbol <= 270 {
432                // Short match (symbols 263-270): fixed length=2, short distance
433                let idx = (symbol - 263) as usize;
434                let base = SHORT_BASES[idx];
435                let bits = SHORT_BITS[idx];
436                let extra = if bits > 0 {
437                    reader.read_bits(bits as u32)?
438                } else {
439                    0
440                };
441                let distance = base + extra + 1;
442                let length = 2u32;
443
444                #[cfg(test)]
445                {
446                    let written = self.lzss.total_written();
447                    let end = written + length as u64;
448                    if written <= 1498598 && end > 1498598 {
449                        eprintln!(
450                            "!!! AT 1498598: short sym={}, idx={}, base={}, bits={}, extra={}, dist={}",
451                            symbol, idx, base, bits, extra, distance
452                        );
453                    }
454                }
455
456                self.lzss.copy_match(distance, length)?;
457
458                // Shift old distances
459                for i in (1..4).rev() {
460                    self.old_dist[i] = self.old_dist[i - 1];
461                }
462                self.old_dist[0] = distance;
463                self.old_dist_ptr = 0;
464                self.last_dist = distance;
465                self.last_len = length;
466            } else {
467                // Long match (symbols 271-298): length from main symbol, distance from offset table
468                #[cfg(test)]
469                let bit_before_len = reader.bit_position();
470
471                let len_idx = (symbol - 271) as usize;
472                let length = if len_idx < LENGTH_BASE.len() {
473                    let base = LENGTH_BASE[len_idx];
474                    let extra = LENGTH_EXTRA[len_idx];
475                    let extra_val = if extra > 0 {
476                        reader.read_bits(extra as u32)?
477                    } else {
478                        0
479                    };
480                    #[cfg(test)]
481                    {
482                        let written = self.lzss.total_written();
483                        if written >= 1498595 && written <= 1498602 {
484                            let bit_after_len = reader.bit_position();
485                            eprintln!(
486                                "!!! LONG DECODE at {}: sym={}, len_idx={}, len={}, bits {}->{}]",
487                                written,
488                                symbol,
489                                len_idx,
490                                base + extra_val + 3,
491                                bit_before_len,
492                                bit_after_len
493                            );
494                        }
495                    }
496                    base + extra_val + 3 // +3 because minimum match length for long matches is 3
497                } else {
498                    #[cfg(test)]
499                    eprintln!(
500                        "\nlen_idx {} out of range at written={}",
501                        len_idx,
502                        self.lzss.total_written()
503                    );
504                    return Err(DecompressError::InvalidHuffmanCode);
505                };
506
507                // Decode distance from offset table
508                let dist_symbol = {
509                    #[cfg(test)]
510                    let bit_pos_before = reader.bit_position();
511
512                    // SAFETY: We validated dist_table.is_some() at function start
513                    let dist_table = unsafe { self.huffman.dist_table.as_ref().unwrap_unchecked() };
514                    match dist_table.decode(reader) {
515                        Ok(s) => {
516                            #[cfg(test)]
517                            {
518                                let written = self.lzss.total_written();
519                                if written >= 1498595 && written <= 1498610 {
520                                    let bit_pos_after = reader.bit_position();
521                                    eprintln!(
522                                        "  dist_symbol={} at pos {} (bits {}->{})",
523                                        s, written, bit_pos_before, bit_pos_after
524                                    );
525                                }
526                            }
527                            s
528                        }
529                        Err(e) => {
530                            #[cfg(test)]
531                            eprintln!(
532                                "\nOffset decode failed at written={}, len={}",
533                                self.lzss.total_written(),
534                                length
535                            );
536                            return Err(e);
537                        }
538                    }
539                };
540
541                let dist_code = dist_symbol as usize;
542                let distance = if dist_code < DIST_BASE.len() {
543                    let base = DIST_BASE[dist_code];
544                    let extra = DIST_EXTRA[dist_code];
545
546                    let extra_val = if extra > 0 {
547                        if dist_code > 9 {
548                            // For dist_code > 9, use low offset table
549                            // First read high bits if extra > 4
550                            let high = if extra > 4 {
551                                #[cfg(test)]
552                                let high_bit_pos = reader.bit_position();
553                                let h = reader.read_bits((extra - 4) as u32)?;
554                                #[cfg(test)]
555                                {
556                                    let written = self.lzss.total_written();
557                                    if (written >= 1498595 && written <= 1498610)
558                                        || (written >= 2176060 && written <= 2176080)
559                                    {
560                                        eprintln!(
561                                            "    high bits at {}: {} bits = {} (0b{:016b}), pos {}->{}",
562                                            written,
563                                            extra - 4,
564                                            h, h,
565                                            high_bit_pos,
566                                            reader.bit_position()
567                                        );
568                                    }
569                                }
570                                h << 4
571                            } else {
572                                0
573                            };
574                            // Then decode low offset (0-15 or 16 for repeat)
575                            let low = if self.low_offset_repeat_count > 0 {
576                                self.low_offset_repeat_count -= 1;
577                                #[cfg(test)]
578                                {
579                                    let written = self.lzss.total_written();
580                                    if written >= 1498550 && written <= 1498610 {
581                                        eprintln!(
582                                            "!!! low_offset REPEAT at {}: prev={}",
583                                            written, self.prev_low_offset
584                                        );
585                                    }
586                                }
587                                self.prev_low_offset
588                            } else {
589                                #[cfg(test)]
590                                let bit_pos_before = reader.bit_position();
591                                #[cfg(test)]
592                                let raw_bits_16 = reader.peek_bits(16);
593                                // SAFETY: low_dist_table is always initialized when we reach here
594                                let low_table = unsafe {
595                                    self.huffman.low_dist_table.as_ref().unwrap_unchecked()
596                                };
597                                #[cfg(test)]
598                                {
599                                    let written = self.lzss.total_written();
600                                    if written == 1498598 {
601                                        // Dump the decode_len array and symbols
602                                        eprintln!(
603                                            "!!! LOW_TABLE at 1498598 decode_len: {:?}",
604                                            low_table.dump_decode_len()
605                                        );
606                                        eprintln!(
607                                            "!!! LOW_TABLE at 1498598 symbols: {:?}",
608                                            low_table.dump_symbols()
609                                        );
610                                    }
611                                }
612                                let sym = low_table.decode(reader)? as u32;
613                                #[cfg(test)]
614                                {
615                                    let written = self.lzss.total_written();
616                                    if written >= 1498550 && written <= 1498610 {
617                                        let bit_pos_after = reader.bit_position();
618                                        eprintln!("!!! low_offset at {}: sym={} (bits {}->{}), raw peek = {:016b}", 
619                                            written, sym, bit_pos_before, bit_pos_after, raw_bits_16);
620                                    }
621                                }
622
623                                if sym == 16 {
624                                    // Repeat previous low offset - total 16 uses (this one + 15 more)
625                                    // unrar: LowDistRepCount=LOW_DIST_REP_COUNT-1 where LOW_DIST_REP_COUNT=16
626                                    self.low_offset_repeat_count = 16 - 1; // 15 more uses after this one
627                                    self.prev_low_offset
628                                } else {
629                                    self.prev_low_offset = sym;
630                                    sym
631                                }
632                            };
633                            #[cfg(test)]
634                            {
635                                let written = self.lzss.total_written();
636                                if written >= 2176060 && written <= 2176080 {
637                                    if self.low_offset_repeat_count > 0 {
638                                        eprintln!(
639                                            "  low_offset REPEAT at {}: prev={}, remaining={}",
640                                            written,
641                                            self.prev_low_offset,
642                                            self.low_offset_repeat_count
643                                        );
644                                    } else {
645                                        eprintln!("  low_offset at {}: dist_code={}, base={}, extra={}, high={}, low={}, dist={}", 
646                                            written, dist_code, base, extra, high, low, base + high + low + 1);
647                                    }
648                                }
649                            }
650                            high + low
651                        } else {
652                            // For dist_code <= 9, read extra bits directly
653                            reader.read_bits(extra as u32)?
654                        }
655                    } else {
656                        0
657                    };
658                    base + extra_val + 1
659                } else {
660                    #[cfg(test)]
661                    eprintln!(
662                        "\ndist_code {} out of range at written={}",
663                        dist_code,
664                        self.lzss.total_written()
665                    );
666                    return Err(DecompressError::InvalidHuffmanCode);
667                };
668
669                // Length bonus for long distances (RAR3 specific)
670                // Per unrar: if (Distance>=0x2000) { Length++; if (Distance>=0x40000) Length++; }
671                let length = if distance >= 0x2000 {
672                    if distance >= 0x40000 {
673                        length + 2
674                    } else {
675                        length + 1
676                    }
677                } else {
678                    length
679                };
680
681                #[cfg(test)]
682                {
683                    let written = self.lzss.total_written();
684                    let end = written + length as u64;
685                    if written <= 1498598 && end > 1498598 {
686                        eprintln!(
687                            "!!! AT 1498598: long match dist={}, len={}",
688                            distance, length
689                        );
690                        // Check what's in the window at source position
691                        let src_pos = (written as u32).wrapping_sub(distance) as usize;
692                        let _mask = self.lzss.window_mask() as usize;
693                        let window = self.lzss.window();
694                        eprintln!(
695                            "  window src[{}..{}]: {:02x?}",
696                            src_pos,
697                            src_pos + length as usize,
698                            &window[src_pos..src_pos + length as usize]
699                        );
700                    }
701                    if written >= 1498595 && written <= 1498602 {
702                        eprintln!(
703                            "LONG MATCH at {}: dist={}, len={}",
704                            written, distance, length
705                        );
706                    }
707                }
708
709                self.lzss.copy_match(distance, length)?;
710
711                // Shift old distances
712                for i in (1..4).rev() {
713                    self.old_dist[i] = self.old_dist[i - 1];
714                }
715                self.old_dist[0] = distance;
716                self.old_dist_ptr = 0;
717                self.last_dist = distance;
718                self.last_len = length;
719            }
720        }
721
722        Ok(())
723    }
724
725    /// Decode a length value using the length table.
726    fn decode_length_from_table(&mut self, reader: &mut BitReader) -> Result<u32> {
727        let symbol = {
728            let len_table = self
729                .huffman
730                .len_table
731                .as_ref()
732                .ok_or(DecompressError::InvalidHuffmanCode)?;
733            len_table.decode(reader)?
734        };
735
736        let sym = symbol as usize;
737        if sym < LENGTH_BASE.len() {
738            let base = LENGTH_BASE[sym];
739            let extra = LENGTH_EXTRA[sym];
740            let extra_val = if extra > 0 {
741                reader.read_bits(extra as u32)?
742            } else {
743                0
744            };
745            Ok(base + extra_val + 2)
746        } else {
747            Err(DecompressError::InvalidHuffmanCode)
748        }
749    }
750
751    /// Read VM filter code from bit stream (for LZ mode, symbol 257).
752    /// We read the VM code and register it with the VM for later execution.
753    #[cold]
754    fn read_vm_code(&mut self, reader: &mut BitReader) -> Result<()> {
755        #[cfg(test)]
756        let bit_pos_start = reader.bit_position();
757
758        // Read first byte
759        let first_byte = reader.read_bits(8)? as u8;
760
761        // Calculate length based on unrar's ReadVMCode logic:
762        // Length = (FirstByte & 7) + 1
763        // if Length == 7, read another byte and add 7
764        // if Length == 8, read 16 bits as length
765        let length = {
766            let base = (first_byte & 7) + 1;
767            match base {
768                7 => {
769                    // Read one more byte, add 7
770                    let next = reader.read_bits(8)? as u32;
771                    next + 7
772                }
773                8 => {
774                    // Read 16 bits as length
775                    reader.read_bits(16)?
776                }
777                _ => base as u32,
778            }
779        };
780
781        #[cfg(test)]
782        eprintln!(
783            "  read_vm_code: first_byte=0x{:02x}, length={}, bit_pos_start={}",
784            first_byte, length, bit_pos_start
785        );
786
787        if length == 0 {
788            return Ok(());
789        }
790
791        // Read VM code bytes
792        let mut vm_code = vec![0u8; length as usize];
793        for i in 0..length as usize {
794            vm_code[i] = reader.read_bits(8)? as u8;
795        }
796
797        #[cfg(test)]
798        eprintln!("    vm_code end bit_pos={}", reader.bit_position());
799
800        // Add to VM for later execution - use absolute total_written, not wrapped window position
801        let total_written = self.lzss.total_written();
802        let window_mask = self.lzss.window_mask();
803
804        #[cfg(test)]
805        eprintln!(
806            "    add_code: total_written={}, window_mask={:x}",
807            total_written, window_mask
808        );
809
810        #[cfg(test)]
811        {
812            let had_pending_before = self.vm.has_pending_filters();
813            let result = self
814                .vm
815                .add_code(first_byte, &vm_code, total_written, window_mask);
816            let has_pending_after = self.vm.has_pending_filters();
817            if let Some(next_pos) = self.vm.next_filter_pos() {
818                eprintln!(
819                    "    vm.add_code: added={}, pending={}->{}, next_pos={}",
820                    result, had_pending_before, has_pending_after, next_pos
821                );
822            } else {
823                eprintln!(
824                    "    vm.add_code: added={}, pending={}->{}, next_pos=NONE",
825                    result, had_pending_before, has_pending_after
826                );
827            }
828        }
829        #[cfg(not(test))]
830        self.vm
831            .add_code(first_byte, &vm_code, total_written, window_mask);
832
833        // Update next_filter_check when a filter is added
834        if let Some(end) = self.vm.next_filter_end() {
835            self.next_filter_check = self.next_filter_check.min(end);
836        }
837
838        Ok(())
839    }
840
841    /// Execute pending VM filters if we've reached their block_start position.
842    /// Applies filters to window data, writes filtered output directly to output buffer.
843    #[inline]
844    fn maybe_execute_filters(&mut self) {
845        let total_written = self.lzss.total_written();
846
847        // Fast path: skip if we haven't reached the next filter check position
848        if total_written < self.next_filter_check {
849            return;
850        }
851
852        let window_mask = self.lzss.window_mask() as usize;
853
854        // Execute filters that are ready, in order of their block_start position
855        loop {
856            // Find the earliest filter that is ready to execute
857            let (filter_idx, next_pos) = match self.vm.find_ready_filter(total_written) {
858                Some((idx, pos)) => (idx, pos),
859                None => break,
860            };
861
862            // Flush up to filter start first (unfiltered data before this filter)
863            let flushed = self.lzss.flushed_pos();
864            if flushed < next_pos {
865                self.lzss.flush_to_output(next_pos);
866            }
867
868            // Execute the filter on the window (read-only) and get filtered output
869            let window = self.lzss.window();
870            if let Some((filter_end, filtered_data)) =
871                self.vm
872                    .execute_filter_at_index(filter_idx, window, window_mask, total_written)
873            {
874                // Write filtered data directly to output (bypasses window)
875                self.lzss.write_filtered_to_output(filtered_data, next_pos);
876                // Update next check to after this filter
877                self.next_filter_check = filter_end;
878            } else {
879                break;
880            }
881        }
882
883        // Update next_filter_check based on remaining filters
884        self.next_filter_check = self.vm.next_filter_end().unwrap_or(u64::MAX);
885    }
886
887    /// Decode a block using PPMd.
888    fn decode_block_ppm(&mut self, reader: &mut BitReader, max_size: u64) -> Result<()> {
889        let ppm = self
890            .ppm
891            .as_mut()
892            .ok_or(DecompressError::UnsupportedMethod(0x33))?;
893        let coder = self
894            .ppm_coder
895            .as_mut()
896            .ok_or(DecompressError::UnsupportedMethod(0x33))?;
897        let esc_char = self.ppm_esc_char;
898
899        while self.lzss.total_written() < max_size && !reader.is_eof() {
900            let ch = ppm.decode_char(coder, reader).map_err(|e| {
901                #[cfg(test)]
902                eprintln!(
903                    "PPM decode_char failed at pos {}: {}",
904                    self.lzss.total_written(),
905                    e
906                );
907                #[cfg(not(test))]
908                let _ = e;
909                DecompressError::InvalidHuffmanCode
910            })?;
911
912            if ch < 0 {
913                // Decode error
914                #[cfg(test)]
915                eprintln!("PPM decode_char returned negative: {}", ch);
916                return Err(DecompressError::InvalidHuffmanCode);
917            }
918
919            #[cfg(test)]
920            {
921                if self.lzss.total_written() < 20 {
922                    eprint!("[{}:{}] ", self.lzss.total_written(), ch);
923                }
924            }
925
926            if ch != esc_char {
927                // Regular character
928                self.lzss.write_literal(ch as u8);
929            } else {
930                // Escape sequence - decode control code
931                let ctrl = ppm
932                    .decode_char(coder, reader)
933                    .map_err(|_| DecompressError::InvalidHuffmanCode)?;
934
935                if ctrl < 0 {
936                    return Err(DecompressError::InvalidHuffmanCode);
937                }
938
939                match ctrl {
940                    0 => {
941                        // Should not happen (NextCh starts at 0)
942                        break;
943                    }
944                    1 => {
945                        // Write escape character itself
946                        self.lzss.write_literal(esc_char as u8);
947                    }
948                    2 => {
949                        // End of PPM block
950                        break;
951                    }
952                    3 => {
953                        // VM code - read and add to VM
954                        let first_byte = ppm
955                            .decode_char(coder, reader)
956                            .map_err(|_| DecompressError::InvalidHuffmanCode)?
957                            as u8;
958
959                        // Decode length from first byte
960                        let mut length = ((first_byte & 7) + 1) as u32;
961                        if length == 7 {
962                            let b1 = ppm
963                                .decode_char(coder, reader)
964                                .map_err(|_| DecompressError::InvalidHuffmanCode)?;
965                            length = (b1 as u32) + 7;
966                        } else if length == 8 {
967                            let b1 = ppm
968                                .decode_char(coder, reader)
969                                .map_err(|_| DecompressError::InvalidHuffmanCode)?;
970                            let b2 = ppm
971                                .decode_char(coder, reader)
972                                .map_err(|_| DecompressError::InvalidHuffmanCode)?;
973                            length = (b1 as u32) * 256 + (b2 as u32);
974                        }
975
976                        if length == 0 {
977                            continue;
978                        }
979
980                        // Read VM code bytes
981                        let mut vm_code = vec![0u8; length as usize];
982                        for i in 0..length as usize {
983                            let ch = ppm
984                                .decode_char(coder, reader)
985                                .map_err(|_| DecompressError::InvalidHuffmanCode)?;
986                            vm_code[i] = ch as u8;
987                        }
988
989                        // Add to VM
990                        let total_written = self.lzss.total_written();
991                        let window_mask = self.lzss.window_mask();
992                        self.vm
993                            .add_code(first_byte, &vm_code, total_written, window_mask);
994
995                        // Update next_filter_check when a filter is added
996                        if let Some(end) = self.vm.next_filter_end() {
997                            self.next_filter_check = self.next_filter_check.min(end);
998                        }
999                    }
1000                    4 => {
1001                        // LZ match: 3 bytes distance (MSB first), 1 byte length
1002                        let mut distance: u32 = 0;
1003                        for _ in 0..3 {
1004                            let ch = ppm
1005                                .decode_char(coder, reader)
1006                                .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1007                            distance = (distance << 8) + (ch as u32);
1008                        }
1009                        let len = ppm
1010                            .decode_char(coder, reader)
1011                            .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1012
1013                        // Distance+2, Length+32
1014                        let distance = distance + 2;
1015                        let length = (len as u32) + 32;
1016
1017                        self.lzss.copy_match(distance, length)?;
1018                        self.last_dist = distance;
1019                        self.last_len = length;
1020                    }
1021                    5 => {
1022                        // RLE match: 1 byte length, distance = 1
1023                        let len = ppm
1024                            .decode_char(coder, reader)
1025                            .map_err(|_| DecompressError::InvalidHuffmanCode)?;
1026
1027                        // Length+4, Distance=1
1028                        let length = (len as u32) + 4;
1029
1030                        self.lzss.copy_match(1, length)?;
1031                        self.last_dist = 1;
1032                        self.last_len = length;
1033                    }
1034                    _ => {
1035                        // Unknown control code - likely corruption
1036                        #[cfg(test)]
1037                        eprintln!("Unknown PPM control code: {}", ctrl);
1038                        return Err(DecompressError::InvalidHuffmanCode);
1039                    }
1040                }
1041            }
1042        }
1043
1044        Ok(())
1045    }
1046
1047    /// Reset the decoder state for a new file.
1048    pub fn reset(&mut self) {
1049        self.lzss.reset();
1050        self.vm.reset();
1051        // Keep ppm model for reuse (SubAllocator reuses buffer if same size)
1052        self.ppm_coder = None;
1053        self.ppm_esc_char = -1;
1054        self.old_dist = [0; 4];
1055        self.old_dist_ptr = 0;
1056        self.last_dist = 0;
1057        self.last_len = 0;
1058        self.ppm_mode = false;
1059        self.tables_read = false;
1060        self.prev_low_offset = 0;
1061        self.low_offset_repeat_count = 0;
1062        self.next_filter_check = u64::MAX;
1063    }
1064
1065    /// Get total bytes decompressed.
1066    pub fn bytes_written(&self) -> u64 {
1067        self.lzss.total_written()
1068    }
1069}
1070
1071impl Default for Rar29Decoder {
1072    fn default() -> Self {
1073        Self::new()
1074    }
1075}
1076
1077// WIP: streaming decoder
1078/// Streaming decompressor for RAR29.
1079/// Allows decompressing chunks at a time.
1080#[allow(dead_code)]
1081pub struct Rar29StreamDecoder {
1082    decoder: Rar29Decoder,
1083    /// Accumulated compressed data
1084    input_buffer: Vec<u8>,
1085    /// Current position in input buffer
1086    input_pos: usize,
1087    /// Total expected unpacked size
1088    unpacked_size: u64,
1089}
1090
1091#[allow(dead_code)]
1092impl Rar29StreamDecoder {
1093    /// Create a new streaming decoder.
1094    pub fn new(unpacked_size: u64) -> Self {
1095        Self {
1096            decoder: Rar29Decoder::new(),
1097            input_buffer: Vec::new(),
1098            input_pos: 0,
1099            unpacked_size,
1100        }
1101    }
1102
1103    /// Feed compressed data to the decoder.
1104    /// Returns decompressed data available so far.
1105    pub fn feed(&mut self, data: &[u8]) -> Result<Vec<u8>> {
1106        self.input_buffer.extend_from_slice(data);
1107
1108        // Try to decompress with available data
1109        let result = self
1110            .decoder
1111            .decompress(&self.input_buffer[self.input_pos..], self.unpacked_size)?;
1112
1113        Ok(result)
1114    }
1115
1116    /// Check if decompression is complete.
1117    pub fn is_complete(&self) -> bool {
1118        self.decoder.bytes_written() >= self.unpacked_size
1119    }
1120
1121    /// Get total bytes decompressed.
1122    pub fn bytes_written(&self) -> u64 {
1123        self.decoder.bytes_written()
1124    }
1125}
1126
1127#[cfg(test)]
1128mod tests {
1129    use super::*;
1130
1131    #[test]
1132    fn test_decoder_creation() {
1133        let decoder = Rar29Decoder::new();
1134        assert_eq!(decoder.bytes_written(), 0);
1135        assert!(!decoder.tables_read);
1136    }
1137
1138    // More tests would require actual RAR compressed data
1139}