Skip to main content

rars_codec/
rar29.rs

1use crate::filters::{self, DeltaErrorMessages, FilterOp};
2use crate::huffman;
3use crate::ppmd::{PpmdByteReader, PpmdDecoder, PpmdEncoder};
4use crate::rarvm;
5use crate::{Error, Result};
6use rars_crc32::crc32;
7use std::io::{Read, Write};
8use std::ops::Range;
9
10const MAIN_COUNT: usize = 299;
11const OFFSET_COUNT: usize = 60;
12const LOW_OFFSET_COUNT: usize = 17;
13const LENGTH_COUNT: usize = 28;
14const LEVEL_COUNT: usize = 20;
15const TABLE_COUNT: usize = MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT + LENGTH_COUNT;
16const MAX_HISTORY: usize = 4 * 1024 * 1024;
17const STREAM_CHUNK: usize = 1024 * 1024;
18const MAX_VM_FILTER_BLOCK_SIZE: usize = 128 * 1024;
19// The standard AUDIO bytecode uses separate input/output regions inside RARVM
20// memory. Keep generated blocks below the overlap boundary accepted by period
21// decoders.
22const MAX_VM_DELTA_FILTER_BLOCK_SIZE: usize = 120_000;
23const MAX_VM_AUDIO_FILTER_BLOCK_SIZE: usize = 120_000;
24const MAX_VM_GLOBAL_DATA: usize = 0x2000;
25const MAX_VM_CODE_SIZE: usize = 64 * 1024;
26const MAX_VM_PROGRAMS: usize = 8192;
27const MAX_VM_FILTERS: usize = 8192;
28
29const LENGTH_BASES: [usize; LENGTH_COUNT] = [
30    0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
31    160, 192, 224,
32];
33const LENGTH_BITS: [u8; LENGTH_COUNT] = [
34    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
35];
36const OFFSET_BASES: [usize; OFFSET_COUNT] = [
37    0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
38    2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
39    262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
40    1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872,
41    3670016, 3932160,
42];
43const OFFSET_BITS: [u8; OFFSET_COUNT] = [
44    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
45    13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18,
46    18, 18, 18, 18, 18, 18, 18,
47];
48const SHORT_BASES: [usize; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
49const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
50const MAX_ENCODER_MATCH_OFFSET: usize = 1024 * 1024;
51const MAX_ENCODER_MATCH_LENGTH: usize = 258;
52const MATCH_HASH_BUCKETS: usize = 4096;
53const MAX_MATCH_CANDIDATES: usize = 256;
54const MAX_PPMD_MATCH_LENGTH: usize = 255;
55const MIN_PPMD_MATCH_LENGTH: usize = 32;
56const MAX_PPMD_REPEAT_LENGTH: usize = 259;
57
58// RAR 3.x standard filters are stored as RARVM bytecode in the compressed
59// stream. RAR15_40_FORMAT_SPECIFICATION.md §20 and FILTER_TRANSFORMS.md §9
60// define these blobs by byte length plus CRC32 fingerprint; keep the bytes
61// verbatim so writer output and reader recognition use the same wire identity.
62const RAR3_E8_FILTER_BYTECODE: &[u8] = &[
63    0x97, 0x1b, 0x01, 0x28, 0x07, 0x06, 0x98, 0x08, 0x00, 0x00, 0x00, 0xd1, 0x3a, 0x10, 0x15, 0x92,
64    0xec, 0x50, 0xcb, 0x99, 0x20, 0xb9, 0x25, 0xf0, 0x29, 0x19, 0x15, 0x53, 0x03, 0x12, 0xae, 0x51,
65    0x10, 0x35, 0x59, 0x2b, 0x60, 0x04, 0x15, 0x6d, 0x40, 0x66, 0xab, 0x02, 0x34, 0x49, 0x04, 0x36,
66    0x02, 0x52, 0x3e, 0x97, 0x00,
67];
68const RAR3_E8E9_FILTER_BYTECODE: &[u8] = &[
69    0x84, 0x1b, 0x01, 0x28, 0x11, 0x10, 0x69, 0x80, 0x80, 0x00, 0x00, 0x0d, 0x13, 0xa1, 0x01, 0xc6,
70    0x89, 0xd2, 0x80, 0xac, 0x97, 0x62, 0x85, 0x5c, 0xc9, 0x05, 0xc9, 0x2f, 0x81, 0x48, 0xc8, 0xaa,
71    0x98, 0x18, 0x95, 0x72, 0x88, 0x81, 0xaa, 0xc9, 0x5b, 0x00, 0x20, 0xab, 0x6a, 0x03, 0x35, 0x58,
72    0x11, 0xa2, 0x48, 0x21, 0xb0, 0x12, 0x91, 0xf4, 0xb8,
73];
74const RAR3_DELTA_FILTER_BYTECODE: &[u8] = &[
75    0x2f, 0x01, 0x9a, 0x41, 0x80, 0xec, 0x27, 0x48, 0x2f, 0x09, 0x76, 0x6d, 0xd3, 0xea, 0x41, 0x5b,
76    0x59, 0x44, 0xe8, 0x17, 0x5c, 0xe1, 0x6c, 0x91, 0x4c, 0x4e, 0x3f, 0x77, 0x00,
77];
78const RAR3_ITANIUM_FILTER_BYTECODE: &[u8] = &[
79    0x46, 0x9e, 0x08, 0x08, 0x0c, 0x0c, 0x00, 0x00, 0x0e, 0x0e, 0x08, 0x08, 0x00, 0x00, 0x08, 0x08,
80    0x00, 0x00, 0x6c, 0x11, 0x5a, 0x04, 0xac, 0x0c, 0xc4, 0xcc, 0x5c, 0x08, 0x18, 0x46, 0x24, 0x08,
81    0xf9, 0xa0, 0x44, 0x25, 0x12, 0x12, 0x45, 0x85, 0x99, 0x0c, 0x14, 0x00, 0x26, 0x25, 0x58, 0x99,
82    0x90, 0x03, 0x38, 0x1a, 0x08, 0xdc, 0x02, 0x30, 0x0c, 0x4e, 0xd1, 0x1d, 0x89, 0xa1, 0xe2, 0xd0,
83    0x55, 0x11, 0x33, 0x60, 0x8c, 0x5a, 0x23, 0x06, 0xde, 0x06, 0x18, 0x00, 0x7f, 0xff, 0xfc, 0x4d,
84    0xcc, 0x19, 0x17, 0xb3, 0x06, 0xc4, 0x44, 0xb2, 0x32, 0x5a, 0x44, 0xc4, 0xa6, 0x01, 0xf4, 0x24,
85    0x88, 0x83, 0x38, 0xcc, 0xc4, 0x11, 0x09, 0x87, 0xa6, 0xe0, 0x46, 0x02, 0xb2, 0x24, 0x03, 0xe2,
86    0xa0, 0x32, 0x54, 0x83, 0x52, 0xc5, 0xb1, 0x70,
87];
88const RAR3_RGB_FILTER_BYTECODE: &[u8] = &[
89    0xc5, 0x01, 0x9a, 0x41, 0x95, 0xc9, 0xa6, 0x4d, 0xba, 0x4b, 0x14, 0x0a, 0xf4, 0x9b, 0x80, 0x4c,
90    0x00, 0x15, 0xa6, 0xa8, 0x07, 0x26, 0x2a, 0xc9, 0xc4, 0x8b, 0x86, 0x62, 0x32, 0x0f, 0x86, 0x64,
91    0x24, 0x06, 0x66, 0x71, 0x19, 0x98, 0xcc, 0x43, 0x33, 0x31, 0x99, 0x00, 0x66, 0x88, 0x33, 0x30,
92    0xcc, 0xd1, 0x0e, 0x98, 0x0b, 0x33, 0x34, 0x40, 0x0c, 0xd1, 0x46, 0x66, 0x19, 0x9a, 0x28, 0xcc,
93    0x49, 0x80, 0xb3, 0x33, 0x45, 0x00, 0xcd, 0x18, 0x66, 0x61, 0x99, 0xa3, 0x0c, 0xc8, 0x98, 0x0b,
94    0x33, 0x34, 0x60, 0x4c, 0xd1, 0x06, 0x68, 0xa5, 0x20, 0x62, 0x66, 0x88, 0x33, 0x46, 0x28, 0x05,
95    0x0f, 0x32, 0x0c, 0x4c, 0xd1, 0x46, 0x68, 0xc5, 0x00, 0x41, 0xe4, 0x8f, 0xc8, 0x85, 0x5e, 0x02,
96    0x7c, 0xc9, 0x26, 0x81, 0x83, 0xb0, 0x9d, 0xc2, 0xde, 0x9c, 0x78, 0xac, 0xd6, 0x68, 0xb4, 0x0e,
97    0x71, 0xdb, 0xb2, 0x49, 0x38, 0x6e, 0x02, 0x2a, 0x2c, 0x41, 0x2b, 0x10, 0x98, 0x82, 0x49, 0x03,
98    0x14, 0xf4, 0xe1, 0x97, 0x00,
99];
100const RAR3_AUDIO_FILTER_BYTECODE: &[u8] = &[
101    0x47, 0x01, 0x9a, 0x41, 0x95, 0xe5, 0x72, 0x0d, 0xc2, 0x64, 0x82, 0x74, 0x93, 0x24, 0xb1, 0x40,
102    0x06, 0xd8, 0x38, 0x44, 0x00, 0xa8, 0x01, 0x34, 0x11, 0xdc, 0xa1, 0xba, 0x01, 0x99, 0x0c, 0xc4,
103    0x03, 0x31, 0x19, 0xa4, 0x06, 0x66, 0x22, 0x60, 0x4d, 0x9a, 0x40, 0x0d, 0x66, 0x8e, 0x60, 0xd0,
104    0x30, 0x40, 0x18, 0x26, 0xc1, 0xc8, 0xf6, 0xe6, 0x26, 0x13, 0x78, 0x92, 0x08, 0xe8, 0x50, 0xbc,
105    0x5a, 0x07, 0xc6, 0xe9, 0xf5, 0x20, 0xa9, 0xa0, 0xed, 0x37, 0x33, 0x47, 0x39, 0x66, 0x90, 0x70,
106    0x19, 0xa3, 0x9b, 0xcf, 0x25, 0x83, 0x80, 0xc1, 0xbd, 0x30, 0x16, 0x6e, 0x23, 0x34, 0x93, 0x81,
107    0x16, 0x09, 0xb0, 0x50, 0x18, 0x3b, 0x4d, 0xc8, 0x4c, 0x05, 0x9b, 0x88, 0xc5, 0x28, 0xe0, 0x76,
108    0x93, 0x90, 0x98, 0x0b, 0x37, 0x11, 0x8a, 0x59, 0xc4, 0x80, 0x42, 0x48, 0x43, 0xa9, 0x47, 0xee,
109    0x43, 0x34, 0x60, 0x47, 0xd4, 0x4a, 0x0d, 0xbb, 0xd3, 0x59, 0xa4, 0x86, 0xee, 0x05, 0x09, 0x40,
110    0x26, 0xc9, 0x34, 0x24, 0x76, 0xa0, 0x30, 0x6a, 0x20, 0xea, 0x02, 0x20, 0x04, 0xa0, 0x41, 0x50,
111    0x9e, 0x50, 0x3f, 0xe6, 0xe1, 0x28, 0x94, 0x46, 0x01, 0xbd, 0x8b, 0x40, 0xf0, 0x68, 0x11, 0x36,
112    0xc9, 0xa1, 0x92, 0x38, 0x11, 0x41, 0x9c, 0xa8, 0x95, 0x10, 0xee, 0x50, 0x66, 0x2b, 0x00, 0x20,
113    0x95, 0x11, 0x04, 0x02, 0x62, 0xac, 0x66, 0x8c, 0x6a, 0xca, 0x26, 0x40, 0xb2, 0x67, 0x1b, 0x4b,
114    0x26, 0xcc, 0x64, 0x8a, 0x62, 0x71, 0xa2, 0xb8,
115];
116
117pub fn unpack29_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
118    let mut decoder = Unpack29::new();
119    decoder.decode_non_solid_member(input, output_size)
120}
121
122pub fn unpack29_encode_literals(input: &[u8]) -> Result<Vec<u8>> {
123    encode_member(input, &[])
124}
125
126pub fn unpack29_encode_literals_with_options(
127    input: &[u8],
128    options: EncodeOptions,
129) -> Result<Vec<u8>> {
130    encode_member_with_options(input, &[], options)
131}
132
133pub fn unpack29_encode_ppmd_literals(input: &[u8]) -> Result<Vec<u8>> {
134    encode_ppmd_member(input, false, &[])
135}
136
137pub fn unpack29_encode_ppmd(input: &[u8]) -> Result<Vec<u8>> {
138    encode_ppmd_member(input, true, &[])
139}
140
141pub fn unpack29_encode_ppmd_with_filter(input: &[u8], filter: Rar29FilterSpec) -> Result<Vec<u8>> {
142    encode_ppmd_filtered_member(input, filter, true)
143}
144
145pub fn unpack29_encode_ppmd_literals_with_filter(
146    input: &[u8],
147    filter: Rar29FilterSpec,
148) -> Result<Vec<u8>> {
149    encode_ppmd_filtered_member(input, filter, false)
150}
151
152fn encode_ppmd_filtered_member(
153    input: &[u8],
154    filter: Rar29FilterSpec,
155    lz_escapes: bool,
156) -> Result<Vec<u8>> {
157    let filters = split_large_filter(input.len(), filter)?;
158    let filtered = filtered_members(input, &filters)?;
159    let records = encoded_filter_records(&filtered.records)?;
160    encode_ppmd_member(&filtered.data, lz_escapes, &records)
161}
162
163fn filtered_members(input: &[u8], filters: &[Rar29FilterSpec]) -> Result<FilteredMembers> {
164    let mut data = input.to_vec();
165    let mut records = Vec::with_capacity(filters.len());
166    for filter in filters {
167        let filtered = filtered_member(input, filter)?;
168        let range = filtered.block_start..filtered.block_start + filtered.block_size;
169        data[range.clone()].copy_from_slice(&filtered.data[range]);
170        records.push(OwnedVmFilterRecord {
171            block_start: filtered.block_start,
172            block_size: filtered.block_size,
173            init_regs: filtered.init_regs,
174            code: filtered.code,
175        });
176    }
177    Ok(FilteredMembers { data, records })
178}
179
180struct FilteredMembers {
181    data: Vec<u8>,
182    records: Vec<OwnedVmFilterRecord>,
183}
184
185fn split_large_filter(input_len: usize, filter: Rar29FilterSpec) -> Result<Vec<Rar29FilterSpec>> {
186    let range = filter.range.clone().unwrap_or(0..input_len);
187    if range.start >= range.end || range.end > input_len {
188        return Err(Error::InvalidData("RAR 2.9 VM filter range is invalid"));
189    }
190
191    let chunk_size = match filter.kind {
192        Rar29FilterKind::Delta { channels } => {
193            if channels == 0 || channels > MAX_VM_DELTA_FILTER_BLOCK_SIZE {
194                return Err(Error::InvalidData(
195                    "RAR 2.9 VM filter channel count is invalid",
196                ));
197            }
198            MAX_VM_DELTA_FILTER_BLOCK_SIZE - (MAX_VM_DELTA_FILTER_BLOCK_SIZE % channels)
199        }
200        Rar29FilterKind::Audio { channels } => {
201            if channels == 0 || channels > MAX_VM_AUDIO_FILTER_BLOCK_SIZE {
202                return Err(Error::InvalidData(
203                    "RAR 2.9 VM filter channel count is invalid",
204                ));
205            }
206            MAX_VM_AUDIO_FILTER_BLOCK_SIZE - (MAX_VM_AUDIO_FILTER_BLOCK_SIZE % channels)
207        }
208        Rar29FilterKind::Rgb { width, .. } => {
209            if width == 0 || width > MAX_VM_FILTER_BLOCK_SIZE {
210                return Err(Error::InvalidData(
211                    "RAR 2.9 RGB filter scanline width is invalid",
212                ));
213            }
214            MAX_VM_FILTER_BLOCK_SIZE - (MAX_VM_FILTER_BLOCK_SIZE % width)
215        }
216        Rar29FilterKind::E8 | Rar29FilterKind::E8E9 | Rar29FilterKind::Itanium => {
217            MAX_VM_FILTER_BLOCK_SIZE
218        }
219    };
220    if range.len() <= chunk_size {
221        return Ok(vec![filter]);
222    }
223    if chunk_size == 0 {
224        return Err(Error::InvalidData(
225            "RAR 2.9 VM filter chunk size is invalid",
226        ));
227    }
228
229    let mut filters = Vec::new();
230    let mut start = range.start;
231    while start < range.end {
232        let end = (start + chunk_size).min(range.end);
233        filters.push(Rar29FilterSpec::range(filter.kind, start..end));
234        start = end;
235    }
236    Ok(filters)
237}
238
239struct OwnedVmFilterRecord {
240    block_start: usize,
241    block_size: usize,
242    init_regs: Vec<(usize, u32)>,
243    code: &'static [u8],
244}
245
246fn encode_ppmd_member(
247    input: &[u8],
248    lz_escapes: bool,
249    initial_filters: &[Vec<u8>],
250) -> Result<Vec<u8>> {
251    encode_ppmd_block(input, lz_escapes, initial_filters)
252}
253
254fn encode_ppmd_block(
255    input: &[u8],
256    lz_escapes: bool,
257    initial_filters: &[Vec<u8>],
258) -> Result<Vec<u8>> {
259    const PPMD_ORDER: usize = 8;
260    const PPMD_DICTIONARY_MB: u8 = 25;
261    const PPMD_ESC: u8 = 2;
262
263    let mut out = Vec::new();
264    out.push(0x80 | 0x20 | ((PPMD_ORDER as u8) - 1));
265    out.push(PPMD_DICTIONARY_MB - 1);
266    let mut encoder = PpmdEncoder::new(PPMD_ORDER, PPMD_ESC, usize::from(PPMD_DICTIONARY_MB))?;
267    for record in initial_filters {
268        encoder.encode_vm_filter_record(record)?;
269    }
270    for token in encode_ppmd_tokens(input, lz_escapes) {
271        match token {
272            PpmdEncodeToken::Literal(byte) => encoder.encode_literal(byte)?,
273            PpmdEncodeToken::RepeatOffsetOne { length } => {
274                encoder.encode_repeat_offset_one(length)?
275            }
276            PpmdEncodeToken::Match { offset, length } => encoder.encode_match(offset, length)?,
277        }
278    }
279    out.extend_from_slice(&encoder.finish()?);
280    Ok(out)
281}
282
283#[derive(Debug, Clone, Copy, PartialEq, Eq)]
284enum PpmdEncodeToken {
285    Literal(u8),
286    RepeatOffsetOne { length: usize },
287    Match { offset: usize, length: usize },
288}
289
290#[derive(Debug, Clone, PartialEq, Eq)]
291pub struct Rar29FilterSpec {
292    pub kind: Rar29FilterKind,
293    pub range: Option<Range<usize>>,
294}
295
296impl Rar29FilterSpec {
297    pub fn whole(kind: Rar29FilterKind) -> Self {
298        Self { kind, range: None }
299    }
300
301    pub fn range(kind: Rar29FilterKind, range: Range<usize>) -> Self {
302        Self {
303            kind,
304            range: Some(range),
305        }
306    }
307}
308
309#[derive(Debug, Clone, Copy, PartialEq, Eq)]
310pub enum Rar29FilterKind {
311    E8,
312    E8E9,
313    Delta { channels: usize },
314    Itanium,
315    Rgb { width: usize, pos_r: usize },
316    Audio { channels: usize },
317}
318
319struct FilteredMember {
320    data: Vec<u8>,
321    block_start: usize,
322    block_size: usize,
323    init_regs: Vec<(usize, u32)>,
324    code: &'static [u8],
325}
326
327fn filtered_member(input: &[u8], filter: &Rar29FilterSpec) -> Result<FilteredMember> {
328    let range = filter.range.clone().unwrap_or(0..input.len());
329    if range.start >= range.end || range.end > input.len() {
330        return Err(Error::InvalidData("RAR 2.9 VM filter range is invalid"));
331    }
332    let mut filtered = input.to_vec();
333    let (init_regs, code): (Vec<(usize, u32)>, &'static [u8]) = match filter.kind {
334        Rar29FilterKind::E8 => {
335            filters::encode_in_place(
336                FilterOp::E8,
337                &mut filtered[range.clone()],
338                range.start as u32,
339                rar29_delta_messages(),
340            )?;
341            (Vec::new(), RAR3_E8_FILTER_BYTECODE)
342        }
343        Rar29FilterKind::E8E9 => {
344            filters::encode_in_place(
345                FilterOp::E8E9,
346                &mut filtered[range.clone()],
347                range.start as u32,
348                rar29_delta_messages(),
349            )?;
350            (Vec::new(), RAR3_E8E9_FILTER_BYTECODE)
351        }
352        Rar29FilterKind::Delta { channels } => {
353            filters::encode_in_place(
354                FilterOp::Delta { channels },
355                &mut filtered[range.clone()],
356                0,
357                rar29_delta_messages(),
358            )?;
359            (vec![(0, channels as u32)], RAR3_DELTA_FILTER_BYTECODE)
360        }
361        Rar29FilterKind::Itanium => {
362            itanium_encode(&mut filtered[range.clone()], range.start as u32);
363            (Vec::new(), RAR3_ITANIUM_FILTER_BYTECODE)
364        }
365        Rar29FilterKind::Rgb { width, pos_r } => {
366            filtered[range.clone()].copy_from_slice(&rgb_encode(
367                &input[range.clone()],
368                width,
369                pos_r,
370            )?);
371            let init_regs = if pos_r == 0 {
372                vec![(0, width as u32 + 3)]
373            } else {
374                vec![(0, width as u32 + 3), (1, pos_r as u32)]
375            };
376            (init_regs, RAR3_RGB_FILTER_BYTECODE)
377        }
378        Rar29FilterKind::Audio { channels } => {
379            filtered[range.clone()]
380                .copy_from_slice(&audio_encode(&input[range.clone()], channels)?);
381            (vec![(0, channels as u32)], RAR3_AUDIO_FILTER_BYTECODE)
382        }
383    };
384    Ok(FilteredMember {
385        data: filtered,
386        block_start: range.start,
387        block_size: range.end - range.start,
388        init_regs,
389        code,
390    })
391}
392
393fn rar29_delta_messages() -> DeltaErrorMessages {
394    DeltaErrorMessages {
395        invalid_channels: "RAR 2.9 DELTA filter channel count is invalid",
396        zero_channels: "RAR 2.9 DELTA filter has zero channels",
397        truncated_source: "RAR 2.9 DELTA filter source is truncated",
398    }
399}
400
401#[derive(Debug, Clone, Copy, PartialEq, Eq)]
402#[non_exhaustive]
403pub struct EncodeOptions {
404    pub max_match_candidates: usize,
405    pub lazy_matching: bool,
406    pub lazy_lookahead: usize,
407    pub max_match_distance: usize,
408    pub block_size: Option<usize>,
409}
410
411impl EncodeOptions {
412    pub const fn new(max_match_candidates: usize) -> Self {
413        Self {
414            max_match_candidates,
415            lazy_matching: false,
416            lazy_lookahead: 1,
417            max_match_distance: MAX_ENCODER_MATCH_OFFSET,
418            block_size: None,
419        }
420    }
421
422    pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
423        self.lazy_matching = enabled;
424        self
425    }
426
427    pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
428        self.lazy_lookahead = bytes;
429        self
430    }
431
432    pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
433        self.max_match_distance = distance;
434        self
435    }
436
437    pub const fn with_block_size(mut self, bytes: usize) -> Self {
438        self.block_size = Some(bytes);
439        self
440    }
441}
442
443impl Default for EncodeOptions {
444    fn default() -> Self {
445        Self::new(MAX_MATCH_CANDIDATES)
446    }
447}
448
449#[derive(Debug, Clone, Default)]
450pub struct Unpack29Encoder {
451    history: Vec<u8>,
452    options: EncodeOptions,
453}
454
455impl Unpack29Encoder {
456    pub fn new() -> Self {
457        Self::default()
458    }
459
460    pub fn with_options(options: EncodeOptions) -> Self {
461        Self {
462            history: Vec::new(),
463            options,
464        }
465    }
466
467    pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
468        let packed = encode_member_with_options(input, &self.history, self.options)?;
469        self.remember(input);
470        Ok(packed)
471    }
472
473    pub fn encode_member_with_filter(
474        &mut self,
475        input: &[u8],
476        filter: Rar29FilterSpec,
477    ) -> Result<Vec<u8>> {
478        let filters = split_large_filter(input.len(), filter)?;
479        let filtered = filtered_members(input, &filters)?;
480        let records = encoded_filter_records(&filtered.records)?;
481        let packed = encode_member_with_initial_filters(
482            &filtered.data,
483            &self.history,
484            &records,
485            self.options,
486        )?;
487        self.remember(input);
488        Ok(packed)
489    }
490
491    pub fn encode_member_with_filters(
492        &mut self,
493        input: &[u8],
494        filters: &[Rar29FilterSpec],
495    ) -> Result<Vec<u8>> {
496        let mut split_filters = Vec::new();
497        for filter in filters {
498            split_filters.extend(split_large_filter(input.len(), filter.clone())?);
499        }
500        let filtered = filtered_members(input, &split_filters)?;
501        let records = encoded_filter_records(&filtered.records)?;
502        let packed = encode_member_with_initial_filters(
503            &filtered.data,
504            &self.history,
505            &records,
506            self.options,
507        )?;
508        self.remember(input);
509        Ok(packed)
510    }
511
512    fn remember(&mut self, input: &[u8]) {
513        self.history.extend_from_slice(input);
514        let keep_from = self.history.len().saturating_sub(MAX_HISTORY);
515        if keep_from != 0 {
516            self.history.drain(..keep_from);
517        }
518    }
519}
520
521fn encode_member(input: &[u8], history: &[u8]) -> Result<Vec<u8>> {
522    encode_member_with_options(input, history, EncodeOptions::default())
523}
524
525fn encode_member_with_options(
526    input: &[u8],
527    history: &[u8],
528    options: EncodeOptions,
529) -> Result<Vec<u8>> {
530    if let Some(block_size) = options.block_size.filter(|&size| size != 0) {
531        if input.len() > block_size {
532            return encode_member_blocks(input, history, options, block_size);
533        }
534    }
535    encode_member_inner(input, history, &[], options)
536}
537
538fn encode_member_blocks(
539    input: &[u8],
540    history: &[u8],
541    mut options: EncodeOptions,
542    block_size: usize,
543) -> Result<Vec<u8>> {
544    options.block_size = None;
545    let mut out = Vec::new();
546    let mut local_history = history[history.len().saturating_sub(MAX_HISTORY)..].to_vec();
547    for chunk in input.chunks(block_size) {
548        out.extend_from_slice(&encode_member_inner(chunk, &local_history, &[], options)?);
549        local_history.extend_from_slice(chunk);
550        let keep_from = local_history.len().saturating_sub(MAX_HISTORY);
551        if keep_from != 0 {
552            local_history.drain(..keep_from);
553        }
554    }
555    Ok(out)
556}
557
558fn encode_member_with_initial_filters(
559    input: &[u8],
560    history: &[u8],
561    filters: &[Vec<u8>],
562    options: EncodeOptions,
563) -> Result<Vec<u8>> {
564    encode_member_inner(input, history, filters, options)
565}
566
567fn encode_member_inner(
568    input: &[u8],
569    history: &[u8],
570    initial_filters: &[Vec<u8>],
571    options: EncodeOptions,
572) -> Result<Vec<u8>> {
573    let tokens = encode_tokens(input, history, options);
574    let mut main_frequencies = vec![0usize; MAIN_COUNT];
575    let mut offset_frequencies = vec![0usize; OFFSET_COUNT];
576    let mut low_offset_frequencies = vec![0usize; LOW_OFFSET_COUNT];
577    let mut length_frequencies = vec![0usize; LENGTH_COUNT];
578    main_frequencies[257] += initial_filters.len();
579    let mut match_state = EncoderMatchState::default();
580    for token in &tokens {
581        match *token {
582            EncodeToken::Literal(byte) => {
583                main_frequencies[byte as usize] += 1;
584            }
585            EncodeToken::Match { length, offset } => {
586                match match_state.encode_match(length, offset)? {
587                    EncodedMatch::LastLengthRepeat => {
588                        main_frequencies[258] += 1;
589                    }
590                    EncodedMatch::RepeatOffset {
591                        index, length_slot, ..
592                    } => {
593                        main_frequencies[259 + index] += 1;
594                        length_frequencies[length_slot] += 1;
595                    }
596                    EncodedMatch::Fresh {
597                        length_slot,
598                        offset_slot,
599                        offset_extra,
600                        ..
601                    } => {
602                        main_frequencies[271 + length_slot] += 1;
603                        offset_frequencies[offset_slot] += 1;
604                        if offset_slot > 9 {
605                            low_offset_frequencies[offset_extra & 0x0f] += 1;
606                        }
607                    }
608                }
609                match_state.remember(length, offset);
610            }
611        }
612    }
613    main_frequencies[256] += 1;
614
615    let mut table_lengths = [0u8; TABLE_COUNT];
616    if low_offset_frequencies
617        .iter()
618        .all(|&frequency| frequency == 0)
619    {
620        low_offset_frequencies[0] = 1;
621    }
622    let main_lengths = huffman::lengths_for_frequencies(&main_frequencies, 15);
623    let offset_lengths = huffman::lengths_for_frequencies(&offset_frequencies, 15);
624    let low_offset_lengths = huffman::lengths_for_frequencies(&low_offset_frequencies, 15);
625    let length_lengths = huffman::lengths_for_frequencies(&length_frequencies, 15);
626    table_lengths[..MAIN_COUNT].copy_from_slice(&main_lengths);
627    table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT].copy_from_slice(&offset_lengths);
628    table_lengths[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT]
629        .copy_from_slice(&low_offset_lengths);
630    table_lengths[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..].copy_from_slice(&length_lengths);
631
632    let level_tokens = encode_table_level_tokens(&table_lengths);
633    let level_lengths = level_code_lengths(&level_tokens);
634    let level_codes = canonical_codes(&level_lengths)?;
635    let main_codes = canonical_codes(&table_lengths[..MAIN_COUNT])?;
636
637    let mut bits = BitWriter::default();
638    bits.write_bit(false); // LZ block.
639    bits.write_bit(false); // do not keep previous tables.
640    for &len in &level_lengths {
641        bits.write_bits(len as u32, 4);
642    }
643    for token in level_tokens {
644        let code = level_codes[token.symbol].ok_or(Error::InvalidData(
645            "RAR 2.9 encoder missing level Huffman code",
646        ))?;
647        bits.write_bits(code.code as u32, code.len);
648        if token.extra_bits != 0 {
649            bits.write_bits(token.extra_value as u32, token.extra_bits);
650        }
651    }
652    let offset_codes = canonical_codes(&table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
653    let low_offset_codes = canonical_codes(
654        &table_lengths[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT],
655    )?;
656    let length_codes =
657        canonical_codes(&table_lengths[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..])?;
658    for filter in initial_filters {
659        let code = main_codes[257].ok_or(Error::InvalidData(
660            "RAR 2.9 encoder missing VM filter Huffman code",
661        ))?;
662        bits.write_bits(code.code as u32, code.len);
663        for &byte in filter {
664            bits.write_bits(u32::from(byte), 8);
665        }
666    }
667    let mut match_state = EncoderMatchState::default();
668    for token in tokens {
669        match token {
670            EncodeToken::Literal(byte) => {
671                let code = main_codes[byte as usize].ok_or(Error::InvalidData(
672                    "RAR 2.9 encoder missing literal Huffman code",
673                ))?;
674                bits.write_bits(code.code as u32, code.len);
675            }
676            EncodeToken::Match { length, offset } => {
677                match match_state.encode_match(length, offset)? {
678                    EncodedMatch::LastLengthRepeat => {
679                        let code = main_codes[258].ok_or(Error::InvalidData(
680                            "RAR 2.9 encoder missing last-length repeat Huffman code",
681                        ))?;
682                        bits.write_bits(code.code as u32, code.len);
683                    }
684                    EncodedMatch::RepeatOffset {
685                        index,
686                        length_slot,
687                        length_extra,
688                    } => {
689                        let code = main_codes[259 + index].ok_or(Error::InvalidData(
690                            "RAR 2.9 encoder missing repeat-offset Huffman code",
691                        ))?;
692                        bits.write_bits(code.code as u32, code.len);
693                        let length_code = length_codes[length_slot].ok_or(Error::InvalidData(
694                            "RAR 2.9 encoder missing repeat length Huffman code",
695                        ))?;
696                        bits.write_bits(length_code.code as u32, length_code.len);
697                        if LENGTH_BITS[length_slot] != 0 {
698                            bits.write_bits(length_extra as u32, LENGTH_BITS[length_slot]);
699                        }
700                    }
701                    EncodedMatch::Fresh {
702                        length_slot,
703                        length_extra,
704                        offset_slot,
705                        offset_extra,
706                    } => {
707                        let code = main_codes[271 + length_slot].ok_or(Error::InvalidData(
708                            "RAR 2.9 encoder missing match Huffman code",
709                        ))?;
710                        bits.write_bits(code.code as u32, code.len);
711                        if LENGTH_BITS[length_slot] != 0 {
712                            bits.write_bits(length_extra as u32, LENGTH_BITS[length_slot]);
713                        }
714                        let offset = offset_codes[offset_slot].ok_or(Error::InvalidData(
715                            "RAR 2.9 encoder missing offset Huffman code",
716                        ))?;
717                        bits.write_bits(offset.code as u32, offset.len);
718                        if offset_slot > 9 {
719                            let offset_bits = OFFSET_BITS[offset_slot];
720                            if offset_bits > 4 {
721                                bits.write_bits((offset_extra >> 4) as u32, offset_bits - 4);
722                            }
723                            let low_offset =
724                                low_offset_codes[offset_extra & 0x0f].ok_or(Error::InvalidData(
725                                    "RAR 2.9 encoder missing low-offset Huffman code",
726                                ))?;
727                            bits.write_bits(low_offset.code as u32, low_offset.len);
728                        } else if OFFSET_BITS[offset_slot] != 0 {
729                            bits.write_bits(offset_extra as u32, OFFSET_BITS[offset_slot]);
730                        }
731                    }
732                }
733                match_state.remember(length, offset);
734            }
735        }
736    }
737    let end = main_codes[256].ok_or(Error::InvalidData(
738        "RAR 2.9 encoder missing end-of-block Huffman code",
739    ))?;
740    bits.write_bits(end.code as u32, end.len);
741    bits.write_bit(true); // end member, no following table.
742    Ok(bits.finish())
743}
744
745fn encoded_filter_records(filters: &[OwnedVmFilterRecord]) -> Result<Vec<Vec<u8>>> {
746    let mut programs: Vec<&'static [u8]> = Vec::new();
747    let mut records = Vec::with_capacity(filters.len());
748    for filter in filters {
749        let existing = (filter.code != RAR3_AUDIO_FILTER_BYTECODE)
750            .then(|| programs.iter().position(|&code| code == filter.code))
751            .flatten();
752        let (program_selector, include_code) = match existing {
753            Some(index) => (
754                u32::try_from(index + 1)
755                    .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?,
756                false,
757            ),
758            None => {
759                let selector = if programs.is_empty() {
760                    0
761                } else {
762                    u32::try_from(programs.len() + 1)
763                        .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?
764                };
765                programs.push(filter.code);
766                (selector, true)
767            }
768        };
769        records.push(encode_vm_filter_record_inner(
770            VmFilterRecord {
771                block_start: filter.block_start,
772                block_size: filter.block_size,
773                init_regs: &filter.init_regs,
774                code: filter.code,
775            },
776            program_selector,
777            include_code,
778        )?);
779    }
780    Ok(records)
781}
782
783#[derive(Debug, Clone, Copy)]
784struct VmFilterRecord<'a> {
785    block_start: usize,
786    block_size: usize,
787    init_regs: &'a [(usize, u32)],
788    code: &'a [u8],
789}
790
791fn encode_vm_filter_record_inner(
792    record: VmFilterRecord<'_>,
793    program_selector: u32,
794    include_code: bool,
795) -> Result<Vec<u8>> {
796    if record.block_size == 0 {
797        return Err(Error::InvalidData("RAR 2.9 VM filter block is empty"));
798    }
799    if include_code && record.code.is_empty() {
800        return Err(Error::InvalidData("RAR 2.9 VM filter bytecode is empty"));
801    }
802
803    let mut body = BitWriter::default();
804    body.write_encoded_u32(program_selector);
805    body.write_encoded_u32(
806        u32::try_from(record.block_start)
807            .map_err(|_| Error::InvalidData("RAR 2.9 VM block start overflows"))?,
808    );
809    body.write_encoded_u32(
810        u32::try_from(record.block_size)
811            .map_err(|_| Error::InvalidData("RAR 2.9 VM block size overflows"))?,
812    );
813    if !record.init_regs.is_empty() {
814        let mut mask = 0u32;
815        for &(index, _) in record.init_regs {
816            if index >= 7 {
817                return Err(Error::InvalidData(
818                    "RAR 2.9 VM init register index is invalid",
819                ));
820            }
821            mask |= 1 << index;
822        }
823        body.write_bits(mask, 7);
824        for index in 0..7 {
825            if let Some((_, value)) = record.init_regs.iter().find(|(reg, _)| *reg == index) {
826                body.write_encoded_u32(*value);
827            }
828        }
829    }
830    if include_code {
831        body.write_encoded_u32(
832            u32::try_from(record.code.len())
833                .map_err(|_| Error::InvalidData("RAR 2.9 VM code size overflows"))?,
834        );
835        for &byte in record.code {
836            body.write_bits(u32::from(byte), 8);
837        }
838    }
839    let body = body.finish();
840
841    let mut out = Vec::new();
842    let mut first = 0x80 | 0x20;
843    if !record.init_regs.is_empty() {
844        first |= 0x10;
845    }
846    match body.len() {
847        1..=6 => first |= (body.len() as u8) - 1,
848        7..=262 => {
849            first |= 6;
850            out.push((body.len() - 7) as u8);
851        }
852        263..=65535 => {
853            first |= 7;
854            out.extend_from_slice(&(body.len() as u16).to_be_bytes());
855        }
856        _ => return Err(Error::InvalidData("RAR 2.9 VM filter record is too large")),
857    }
858    out.insert(0, first);
859    out.extend_from_slice(&body);
860    Ok(out)
861}
862
863fn rgb_encode(data: &[u8], width: usize, pos_r: usize) -> Result<Vec<u8>> {
864    if data.len() < 3 || width == 0 || !width.is_multiple_of(3) || width > data.len() || pos_r > 2 {
865        return Err(Error::InvalidData(
866            "RAR 2.9 RGB filter parameters are invalid",
867        ));
868    }
869    let mut work = data.to_vec();
870    for i in (pos_r..work.len().saturating_sub(2)).step_by(3) {
871        let green = work[i + 1];
872        work[i] = work[i].wrapping_sub(green);
873        work[i + 2] = work[i + 2].wrapping_sub(green);
874    }
875
876    let mut out = Vec::with_capacity(data.len());
877    for channel in 0..3 {
878        let mut prev = 0u8;
879        let mut i = channel;
880        while i < work.len() {
881            let predicted = if i >= width + 3 {
882                rgb_predict(prev, work[i - width], work[i - width - 3])
883            } else {
884                prev
885            };
886            let byte = work[i];
887            out.push(predicted.wrapping_sub(byte));
888            prev = byte;
889            i += 3;
890        }
891    }
892    Ok(out)
893}
894
895fn audio_encode(data: &[u8], channels: usize) -> Result<Vec<u8>> {
896    if channels == 0 || channels > 32 {
897        return Err(Error::InvalidData(
898            "RAR 2.9 AUDIO filter channel count is invalid",
899        ));
900    }
901    let mut out = Vec::with_capacity(data.len());
902    for channel in 0..channels {
903        let mut prev_byte = 0u32;
904        let mut prev_delta = 0i32;
905        let mut d1 = 0i32;
906        let mut d2 = 0i32;
907        let mut k1 = 0i32;
908        let mut k2 = 0i32;
909        let mut k3 = 0i32;
910        let mut dif = [0u32; 7];
911        let mut byte_count = 0usize;
912        let mut i = channel;
913        while i < data.len() {
914            let d3 = d2;
915            d2 = prev_delta - d1;
916            d1 = prev_delta;
917            let predicted = ((8 * prev_byte as i32 + k1 * d1 + k2 * d2 + k3 * d3) >> 3) & 0xff;
918            let decoded = data[i];
919            let encoded = (predicted as u8).wrapping_sub(decoded);
920            out.push(encoded);
921            prev_delta = decoded.wrapping_sub(prev_byte as u8) as i8 as i32;
922            prev_byte = decoded as u32;
923            let d = (encoded as i8 as i32) << 3;
924            dif[0] += d.unsigned_abs();
925            dif[1] += (d - d1).unsigned_abs();
926            dif[2] += (d + d1).unsigned_abs();
927            dif[3] += (d - d2).unsigned_abs();
928            dif[4] += (d + d2).unsigned_abs();
929            dif[5] += (d - d3).unsigned_abs();
930            dif[6] += (d + d3).unsigned_abs();
931            if byte_count & 0x1f == 0 {
932                let mut min = dif[0];
933                let mut min_index = 0usize;
934                dif[0] = 0;
935                for (index, value) in dif.iter_mut().enumerate().skip(1) {
936                    if *value < min {
937                        min = *value;
938                        min_index = index;
939                    }
940                    *value = 0;
941                }
942                match min_index {
943                    1 if k1 >= -16 => k1 -= 1,
944                    2 if k1 < 16 => k1 += 1,
945                    3 if k2 >= -16 => k2 -= 1,
946                    4 if k2 < 16 => k2 += 1,
947                    5 if k3 >= -16 => k3 -= 1,
948                    6 if k3 < 16 => k3 += 1,
949                    _ => {}
950                }
951            }
952            byte_count += 1;
953            i += channels;
954        }
955    }
956    Ok(out)
957}
958
959fn itanium_encode(data: &mut [u8], file_offset: u32) {
960    if data.len() <= 21 {
961        return;
962    }
963    let base_offset = file_offset >> 4;
964    let block_count = (data.len() - 21).div_ceil(16);
965    for block in 0..block_count {
966        let pos = block * 16;
967        let file_offset = base_offset.wrapping_add(block as u32);
968        let mut mask = (0x334b_0000u32 >> (data[pos] & 0x1e)) & 3;
969        if mask != 0 {
970            mask += 1;
971            while mask <= 4 {
972                let p = pos + (mask as usize * 5 - 8);
973                if ((data[p + 3] >> mask) & 15) == 5 {
974                    let raw = u32::from_le_bytes([data[p], data[p + 1], data[p + 2], data[p + 3]]);
975                    let mut value = raw >> mask;
976                    value = value.wrapping_add(file_offset) & 0x000f_ffff;
977                    let raw = (raw & !(0x000f_ffff << mask)) | (value << mask);
978                    data[p..p + 4].copy_from_slice(&raw.to_le_bytes());
979                }
980                mask += 1;
981            }
982        }
983    }
984}
985
986#[derive(Debug, Clone, Copy)]
987enum EncodeToken {
988    Literal(u8),
989    Match { length: usize, offset: usize },
990}
991
992#[derive(Debug, Clone, Copy, Default)]
993struct EncoderMatchState {
994    old_offsets: [usize; 4],
995    last_offset: usize,
996    last_length: usize,
997}
998
999#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1000enum EncodedMatch {
1001    LastLengthRepeat,
1002    RepeatOffset {
1003        index: usize,
1004        length_slot: usize,
1005        length_extra: usize,
1006    },
1007    Fresh {
1008        length_slot: usize,
1009        length_extra: usize,
1010        offset_slot: usize,
1011        offset_extra: usize,
1012    },
1013}
1014
1015impl EncoderMatchState {
1016    fn encode_match(&self, length: usize, offset: usize) -> Result<EncodedMatch> {
1017        if offset == self.last_offset && length == self.last_length && self.last_length != 0 {
1018            return Ok(EncodedMatch::LastLengthRepeat);
1019        }
1020        if let Some(index) = self
1021            .old_offsets
1022            .iter()
1023            .position(|&old_offset| old_offset == offset && old_offset != 0)
1024        {
1025            let (length_slot, length_extra) = length_slot_for_repeat_match(length)?;
1026            return Ok(EncodedMatch::RepeatOffset {
1027                index,
1028                length_slot,
1029                length_extra,
1030            });
1031        }
1032        let encoded_length =
1033            length
1034                .checked_sub(match_length_adjustment(offset))
1035                .ok_or(Error::InvalidData(
1036                    "RAR 2.9 adjusted match length underflows",
1037                ))?;
1038        let (length_slot, length_extra) = length_slot_for_match(encoded_length)?;
1039        let (offset_slot, offset_extra) = offset_slot_for_match(offset)?;
1040        Ok(EncodedMatch::Fresh {
1041            length_slot,
1042            length_extra,
1043            offset_slot,
1044            offset_extra,
1045        })
1046    }
1047
1048    fn remember(&mut self, length: usize, offset: usize) {
1049        if offset == self.last_offset && length == self.last_length && self.last_length != 0 {
1050            return;
1051        }
1052        if let Some(index) = self
1053            .old_offsets
1054            .iter()
1055            .position(|&old_offset| old_offset == offset)
1056        {
1057            self.old_offsets[..=index].rotate_right(1);
1058        } else {
1059            self.old_offsets.rotate_right(1);
1060            self.old_offsets[0] = offset;
1061        }
1062        self.last_offset = offset;
1063        self.last_length = length;
1064    }
1065}
1066
1067fn encode_tokens(input: &[u8], history: &[u8], options: EncodeOptions) -> Vec<EncodeToken> {
1068    let mut tokens = Vec::new();
1069    let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1070    let history = &history[history.len().saturating_sub(options.max_match_distance)..];
1071    let mut combined = Vec::with_capacity(history.len() + input.len());
1072    combined.extend_from_slice(history);
1073    combined.extend_from_slice(input);
1074    for history_pos in 0..history.len().saturating_sub(2) {
1075        insert_match_position(&combined, history_pos, &mut buckets);
1076    }
1077
1078    let mut pos = history.len();
1079    let end = combined.len();
1080    let mut state = EncoderMatchState::default();
1081    while pos < end {
1082        if let Some(candidate) = best_match(&combined, pos, end, &buckets, options, &state) {
1083            if should_lazy_emit_literal(&combined, pos, end, &buckets, options, &state, candidate) {
1084                tokens.push(EncodeToken::Literal(combined[pos]));
1085                insert_match_position(&combined, pos, &mut buckets);
1086                pos += 1;
1087                continue;
1088            }
1089            let MatchCandidate { length, offset, .. } = candidate;
1090            tokens.push(EncodeToken::Match { length, offset });
1091            state.remember(length, offset);
1092            for history_pos in pos..pos + length {
1093                insert_match_position(&combined, history_pos, &mut buckets);
1094            }
1095            pos += length;
1096        } else {
1097            tokens.push(EncodeToken::Literal(combined[pos]));
1098            insert_match_position(&combined, pos, &mut buckets);
1099            pos += 1;
1100        }
1101    }
1102    tokens
1103}
1104
1105fn should_lazy_emit_literal(
1106    input: &[u8],
1107    pos: usize,
1108    end: usize,
1109    buckets: &[Vec<usize>],
1110    options: EncodeOptions,
1111    state: &EncoderMatchState,
1112    current: MatchCandidate,
1113) -> bool {
1114    if !options.lazy_matching || pos + 1 >= end {
1115        return false;
1116    }
1117    let lookahead = options.lazy_lookahead.max(1);
1118    (1..=lookahead)
1119        .take_while(|offset| pos + offset < end)
1120        .any(|offset| {
1121            best_match(input, pos + offset, end, buckets, options, state).is_some_and(|next| {
1122                let skipped_literal_score = offset as isize * 8;
1123                next.score > current.score + skipped_literal_score
1124            })
1125        })
1126}
1127
1128#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1129struct MatchCandidate {
1130    length: usize,
1131    offset: usize,
1132    score: isize,
1133}
1134
1135fn encode_ppmd_tokens(input: &[u8], lz_escapes: bool) -> Vec<PpmdEncodeToken> {
1136    if !lz_escapes {
1137        return input
1138            .iter()
1139            .copied()
1140            .map(PpmdEncodeToken::Literal)
1141            .collect();
1142    }
1143
1144    let mut tokens = Vec::new();
1145    let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1146    let mut pos = 0usize;
1147    while pos < input.len() {
1148        if let Some(length) = ppmd_offset_one_repeat(input, pos) {
1149            tokens.push(PpmdEncodeToken::RepeatOffsetOne { length });
1150            for history_pos in pos..pos + length {
1151                insert_match_position(input, history_pos, &mut buckets);
1152            }
1153            pos += length;
1154            continue;
1155        }
1156
1157        if let Some((length, offset)) = best_ppmd_match(input, pos, &buckets) {
1158            tokens.push(PpmdEncodeToken::Match { offset, length });
1159            for history_pos in pos..pos + length {
1160                insert_match_position(input, history_pos, &mut buckets);
1161            }
1162            pos += length;
1163            continue;
1164        }
1165
1166        tokens.push(PpmdEncodeToken::Literal(input[pos]));
1167        insert_match_position(input, pos, &mut buckets);
1168        pos += 1;
1169    }
1170    tokens
1171}
1172
1173fn ppmd_offset_one_repeat(input: &[u8], pos: usize) -> Option<usize> {
1174    if pos == 0 || input[pos] != input[pos - 1] {
1175        return None;
1176    }
1177    let mut length = 0usize;
1178    while pos + length < input.len()
1179        && input[pos + length] == input[pos - 1]
1180        && length < MAX_PPMD_REPEAT_LENGTH
1181    {
1182        length += 1;
1183    }
1184    (length >= 4).then_some(length)
1185}
1186
1187fn best_ppmd_match(input: &[u8], pos: usize, buckets: &[Vec<usize>]) -> Option<(usize, usize)> {
1188    let max_offset = pos.min(0x1000001).min(MAX_ENCODER_MATCH_OFFSET);
1189    let max_length = (input.len() - pos).min(MAX_PPMD_MATCH_LENGTH);
1190    if max_offset < 2 || max_length < MIN_PPMD_MATCH_LENGTH || pos + 2 >= input.len() {
1191        return None;
1192    }
1193    let bucket = &buckets[match_hash(input, pos)];
1194    let mut best = None;
1195    let mut checked = 0usize;
1196    for &candidate in bucket.iter().rev() {
1197        if candidate >= pos {
1198            continue;
1199        }
1200        let offset = pos - candidate;
1201        if offset > max_offset {
1202            break;
1203        }
1204        if offset < 2 {
1205            continue;
1206        }
1207        checked += 1;
1208        let mut length = 0usize;
1209        while length < max_length && input[pos + length] == input[pos + length - offset] {
1210            length += 1;
1211        }
1212        if length >= MIN_PPMD_MATCH_LENGTH
1213            && best.is_none_or(|(best_length, best_offset)| {
1214                length > best_length || (length == best_length && offset < best_offset)
1215            })
1216        {
1217            best = Some((length, offset));
1218            if length == max_length {
1219                break;
1220            }
1221        }
1222        if checked >= MAX_MATCH_CANDIDATES {
1223            break;
1224        }
1225    }
1226    best
1227}
1228
1229fn best_match(
1230    input: &[u8],
1231    pos: usize,
1232    end: usize,
1233    buckets: &[Vec<usize>],
1234    options: EncodeOptions,
1235    state: &EncoderMatchState,
1236) -> Option<MatchCandidate> {
1237    let max_offset = pos.min(options.max_match_distance);
1238    let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
1239    if options.max_match_candidates == 0
1240        || max_offset == 0
1241        || max_length < 4
1242        || pos + 2 >= input.len()
1243    {
1244        return None;
1245    }
1246    let bucket = &buckets[match_hash(input, pos)];
1247    let mut best = None;
1248    let mut checked = 0usize;
1249    for offset in state.old_offsets {
1250        if offset == 0 || offset > max_offset {
1251            continue;
1252        }
1253        let length = match_length(input, pos, offset, max_length);
1254        consider_match_candidate(&mut best, state, length, offset);
1255    }
1256    for &candidate in bucket.iter().rev() {
1257        if candidate >= pos {
1258            continue;
1259        }
1260        let offset = pos - candidate;
1261        if offset > max_offset {
1262            break;
1263        }
1264        checked += 1;
1265        let length = match_length(input, pos, offset, max_length);
1266        consider_match_candidate(&mut best, state, length, offset);
1267        if best.is_some_and(|candidate| candidate.length == max_length) {
1268            break;
1269        }
1270        if checked >= options.max_match_candidates {
1271            break;
1272        }
1273    }
1274    best
1275}
1276
1277fn match_length(input: &[u8], pos: usize, offset: usize, max_length: usize) -> usize {
1278    crate::fast::match_length(input, pos, offset, max_length)
1279}
1280
1281fn consider_match_candidate(
1282    best: &mut Option<MatchCandidate>,
1283    state: &EncoderMatchState,
1284    length: usize,
1285    offset: usize,
1286) {
1287    if length < 4 {
1288        return;
1289    }
1290    let Ok(cost) = estimated_match_cost(state, length, offset) else {
1291        return;
1292    };
1293    let score = (length as isize * 8) - cost as isize;
1294    let candidate = MatchCandidate {
1295        length,
1296        offset,
1297        score,
1298    };
1299    if best.is_none_or(|best| {
1300        candidate.score > best.score
1301            || (candidate.score == best.score
1302                && (candidate.length > best.length
1303                    || (candidate.length == best.length && candidate.offset < best.offset)))
1304    }) {
1305        *best = Some(candidate);
1306    }
1307}
1308
1309fn estimated_match_cost(state: &EncoderMatchState, length: usize, offset: usize) -> Result<usize> {
1310    match state.encode_match(length, offset)? {
1311        EncodedMatch::LastLengthRepeat => Ok(2),
1312        EncodedMatch::RepeatOffset { length_slot, .. } => {
1313            Ok(5 + usize::from(LENGTH_BITS[length_slot]))
1314        }
1315        EncodedMatch::Fresh {
1316            length_slot,
1317            offset_slot,
1318            ..
1319        } => {
1320            let low_offset_cost = usize::from(offset_slot > 9) * 4;
1321            Ok(8 + usize::from(LENGTH_BITS[length_slot])
1322                + usize::from(OFFSET_BITS[offset_slot])
1323                + low_offset_cost)
1324        }
1325    }
1326}
1327
1328fn match_length_adjustment(offset: usize) -> usize {
1329    usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
1330}
1331
1332fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
1333    if pos + 2 < input.len() {
1334        buckets[match_hash(input, pos)].push(pos);
1335    }
1336}
1337
1338fn match_hash(input: &[u8], pos: usize) -> usize {
1339    let value =
1340        ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1341    value & (MATCH_HASH_BUCKETS - 1)
1342}
1343
1344fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
1345    if length < 3 {
1346        return Err(Error::InvalidData("RAR 2.9 match length is too short"));
1347    }
1348    let adjusted = length - 3;
1349    for (slot, &base) in LENGTH_BASES.iter().enumerate() {
1350        let extra_bits = LENGTH_BITS[slot];
1351        let max = base
1352            + if extra_bits == 0 {
1353                0
1354            } else {
1355                (1usize << extra_bits) - 1
1356            };
1357        if adjusted >= base && adjusted <= max {
1358            return Ok((slot, adjusted - base));
1359        }
1360    }
1361    Err(Error::InvalidData("RAR 2.9 match length is too long"))
1362}
1363
1364fn length_slot_for_repeat_match(length: usize) -> Result<(usize, usize)> {
1365    if length < 2 {
1366        return Err(Error::InvalidData(
1367            "RAR 2.9 repeat match length is too short",
1368        ));
1369    }
1370    let adjusted = length - 2;
1371    for (slot, &base) in LENGTH_BASES.iter().enumerate() {
1372        let extra_bits = LENGTH_BITS[slot];
1373        let max = base
1374            + if extra_bits == 0 {
1375                0
1376            } else {
1377                (1usize << extra_bits) - 1
1378            };
1379        if adjusted >= base && adjusted <= max {
1380            return Ok((slot, adjusted - base));
1381        }
1382    }
1383    Err(Error::InvalidData(
1384        "RAR 2.9 repeat match length is too long",
1385    ))
1386}
1387
1388fn offset_slot_for_match(offset: usize) -> Result<(usize, usize)> {
1389    if offset == 0 {
1390        return Err(Error::InvalidData("RAR 2.9 match offset is zero"));
1391    }
1392    let adjusted = offset - 1;
1393    for (slot, &base) in OFFSET_BASES.iter().enumerate() {
1394        let extra_bits = OFFSET_BITS[slot];
1395        let max = base
1396            + if extra_bits == 0 {
1397                0
1398            } else {
1399                (1usize << extra_bits) - 1
1400            };
1401        if adjusted >= base && adjusted <= max {
1402            return Ok((slot, adjusted - base));
1403        }
1404    }
1405    Err(Error::InvalidData("RAR 2.9 match offset is too large"))
1406}
1407
1408#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1409struct LevelToken {
1410    symbol: usize,
1411    extra_bits: u8,
1412    extra_value: u8,
1413}
1414
1415impl LevelToken {
1416    const fn plain(symbol: usize) -> Self {
1417        Self {
1418            symbol,
1419            extra_bits: 0,
1420            extra_value: 0,
1421        }
1422    }
1423
1424    const fn repeat_previous_short(count: usize) -> Self {
1425        Self {
1426            symbol: 16,
1427            extra_bits: 3,
1428            extra_value: (count - 3) as u8,
1429        }
1430    }
1431
1432    const fn repeat_previous_long(count: usize) -> Self {
1433        Self {
1434            symbol: 17,
1435            extra_bits: 7,
1436            extra_value: (count - 11) as u8,
1437        }
1438    }
1439
1440    const fn zero_run_short(count: usize) -> Self {
1441        Self {
1442            symbol: 18,
1443            extra_bits: 3,
1444            extra_value: (count - 3) as u8,
1445        }
1446    }
1447
1448    const fn zero_run_long(count: usize) -> Self {
1449        Self {
1450            symbol: 19,
1451            extra_bits: 7,
1452            extra_value: (count - 11) as u8,
1453        }
1454    }
1455}
1456
1457fn encode_table_level_tokens(lengths: &[u8; TABLE_COUNT]) -> Vec<LevelToken> {
1458    encode_level_tokens(lengths)
1459}
1460
1461fn encode_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
1462    let mut tokens = Vec::new();
1463    let mut pos = 0usize;
1464    let mut previous = None;
1465    while pos < lengths.len() {
1466        let value = lengths[pos];
1467        let mut run = 1usize;
1468        while pos + run < lengths.len() && lengths[pos + run] == value {
1469            run += 1;
1470        }
1471
1472        if value == 0 {
1473            emit_zero_level_run(&mut tokens, run);
1474            previous = Some(0);
1475            pos += run;
1476            continue;
1477        }
1478
1479        if previous == Some(value) && run >= 3 {
1480            emit_repeat_level_run(&mut tokens, run);
1481            pos += run;
1482            continue;
1483        }
1484
1485        tokens.push(LevelToken::plain(value as usize));
1486        previous = Some(value);
1487        pos += 1;
1488    }
1489    tokens
1490}
1491
1492fn emit_repeat_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1493    while run != 0 {
1494        if run >= 11 {
1495            let mut chunk = run.min(138);
1496            if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1497                chunk -= 3;
1498            }
1499            tokens.push(LevelToken::repeat_previous_long(chunk));
1500            run -= chunk;
1501        } else if run >= 3 {
1502            let chunk = run.min(10);
1503            tokens.push(LevelToken::repeat_previous_short(chunk));
1504            run -= chunk;
1505        } else {
1506            break;
1507        }
1508    }
1509}
1510
1511fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1512    while run != 0 {
1513        if run >= 11 {
1514            let mut chunk = run.min(138);
1515            if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1516                chunk -= 3;
1517            }
1518            tokens.push(LevelToken::zero_run_long(chunk));
1519            run -= chunk;
1520        } else if run >= 3 {
1521            let chunk = run.min(10);
1522            tokens.push(LevelToken::zero_run_short(chunk));
1523            run -= chunk;
1524        } else {
1525            tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
1526            break;
1527        }
1528    }
1529}
1530
1531fn level_code_lengths(tokens: &[LevelToken]) -> [u8; LEVEL_COUNT] {
1532    let mut lengths = [0u8; LEVEL_COUNT];
1533    let mut used = [false; LEVEL_COUNT];
1534    for token in tokens {
1535        used[token.symbol] = true;
1536    }
1537    let used_count = used.iter().filter(|&&used| used).count();
1538    let len = huffman::bits_for_symbol_count(used_count);
1539    for (symbol, is_used) in used.into_iter().enumerate() {
1540        if is_used {
1541            lengths[symbol] = len;
1542        }
1543    }
1544    lengths
1545}
1546
1547#[derive(Debug, Clone, Copy)]
1548struct HuffmanCode {
1549    code: u16,
1550    len: u8,
1551}
1552
1553fn canonical_codes(lengths: &[u8]) -> Result<Vec<Option<HuffmanCode>>> {
1554    let mut count = [0u16; 16];
1555    for &len in lengths {
1556        if len > 15 {
1557            return Err(Error::InvalidData("RAR 2.9 Huffman length is too large"));
1558        }
1559        if len != 0 {
1560            count[len as usize] += 1;
1561        }
1562    }
1563    validate_huffman_counts(&count)?;
1564
1565    let mut next_code = [0u16; 16];
1566    let mut code = 0u16;
1567    for len in 1..=15 {
1568        code = (code + count[len - 1]) << 1;
1569        next_code[len] = code;
1570    }
1571
1572    let mut codes = vec![None; lengths.len()];
1573    for (symbol, &len) in lengths.iter().enumerate() {
1574        if len == 0 {
1575            continue;
1576        }
1577        let code = next_code[len as usize];
1578        next_code[len as usize] += 1;
1579        codes[symbol] = Some(HuffmanCode { code, len });
1580    }
1581    Ok(codes)
1582}
1583
1584#[derive(Debug, Clone)]
1585pub struct Unpack29 {
1586    bits: BitReader,
1587    levels: [u8; TABLE_COUNT],
1588    main: Huffman,
1589    offsets: Huffman,
1590    low_offsets: Huffman,
1591    lengths: Huffman,
1592    old_offsets: [usize; 4],
1593    last_offset: usize,
1594    last_length: usize,
1595    last_low_offset: usize,
1596    low_offset_repeats: usize,
1597    pending_match: Option<(usize, usize)>,
1598    in_lz_block: bool,
1599    block_mode: BlockMode,
1600    ppmd: PpmdDecoder,
1601    ppmd_esc: u8,
1602    filters: Vec<VmFilter>,
1603    programs: Vec<VmProgram>,
1604    last_filter: usize,
1605    base_offset: usize,
1606    output: Vec<u8>,
1607}
1608
1609#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1610enum BlockMode {
1611    Lz,
1612    Ppmd,
1613}
1614
1615#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1616enum LzBlockEnd {
1617    SameFileNewTable,
1618    NewFileKeepTables,
1619    NewFileNewTables,
1620}
1621
1622#[derive(Debug, Clone)]
1623struct VmFilter {
1624    program: usize,
1625    start: usize,
1626    size: usize,
1627    regs: [u32; 7],
1628    global_data: Vec<u8>,
1629}
1630
1631#[derive(Debug, Clone)]
1632struct VmProgram {
1633    kind: VmProgramKind,
1634    block_size: usize,
1635    exec_count: u32,
1636    globals: Vec<u8>,
1637}
1638
1639#[derive(Debug, Clone)]
1640enum VmProgramKind {
1641    Standard(StandardFilter),
1642    Generic(rarvm::Program),
1643}
1644
1645#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1646enum StandardFilter {
1647    E8,
1648    E8E9,
1649    Itanium,
1650    Delta,
1651    Rgb,
1652    Audio,
1653}
1654
1655impl Unpack29 {
1656    pub fn new() -> Self {
1657        Self {
1658            bits: BitReader::new(),
1659            levels: [0; TABLE_COUNT],
1660            main: Huffman::empty(),
1661            offsets: Huffman::empty(),
1662            low_offsets: Huffman::empty(),
1663            lengths: Huffman::empty(),
1664            old_offsets: [0; 4],
1665            last_offset: 0,
1666            last_length: 0,
1667            last_low_offset: 0,
1668            low_offset_repeats: 0,
1669            pending_match: None,
1670            in_lz_block: false,
1671            block_mode: BlockMode::Lz,
1672            ppmd: PpmdDecoder::new(),
1673            ppmd_esc: 2,
1674            filters: Vec::new(),
1675            programs: Vec::new(),
1676            last_filter: 0,
1677            base_offset: 0,
1678            output: Vec::new(),
1679        }
1680    }
1681
1682    pub fn reset_non_solid(&mut self) {
1683        *self = Self::new();
1684    }
1685
1686    pub fn decode_non_solid_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1687        self.reset_non_solid();
1688        self.decode_member(input, output_size)
1689    }
1690
1691    pub fn decode_non_solid_member_to(
1692        &mut self,
1693        input: &[u8],
1694        output_size: usize,
1695        out: &mut impl Write,
1696    ) -> Result<()> {
1697        self.reset_non_solid();
1698        self.decode_member_to(input, output_size, out)
1699    }
1700
1701    pub fn decode_non_solid_member_from_reader(
1702        &mut self,
1703        input: &mut impl Read,
1704        output_size: usize,
1705        out: &mut impl Write,
1706    ) -> Result<()> {
1707        self.reset_non_solid();
1708        self.decode_member_from_reader(input, output_size, out)
1709    }
1710
1711    pub fn decode_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1712        let start = self.current_pos();
1713        let target = start
1714            .checked_add(output_size)
1715            .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1716        if !input.is_empty() {
1717            self.bits = BitReader::new();
1718        }
1719        self.bits.append(input);
1720        self.decode_until(target).map_err(|error| match error {
1721            Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1722            error => error,
1723        })?;
1724        self.finish_member().map_err(|error| match error {
1725            Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1726            error => error,
1727        })?;
1728        let out = self.filtered_range(start, target, start)?;
1729        self.trim_history(target, target);
1730        Ok(out)
1731    }
1732
1733    pub fn decode_member_to(
1734        &mut self,
1735        input: &[u8],
1736        output_size: usize,
1737        out: &mut impl Write,
1738    ) -> Result<()> {
1739        let start = self.current_pos();
1740        let final_target = start
1741            .checked_add(output_size)
1742            .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1743        if !input.is_empty() {
1744            self.bits = BitReader::new();
1745        }
1746        self.bits.append(input);
1747
1748        let mut flushed = start;
1749        let mut target = start.saturating_add(STREAM_CHUNK).min(final_target);
1750        while flushed < final_target {
1751            self.decode_until(target)?;
1752            let safe_end = self.safe_flush_end(flushed, target, final_target)?;
1753            if safe_end <= flushed {
1754                if target == final_target {
1755                    return Err(Error::InvalidData(
1756                        "RAR 2.9 VM filter extends beyond output",
1757                    ));
1758                }
1759                target = self
1760                    .current_pos()
1761                    .saturating_add(STREAM_CHUNK)
1762                    .min(final_target);
1763                continue;
1764            }
1765
1766            let decoded = self.filtered_range(flushed, safe_end, start)?;
1767            out.write_all(&decoded)
1768                .map_err(|_| Error::InvalidData("RAR 2.9 output write failed"))?;
1769            flushed = safe_end;
1770            self.trim_history(flushed, self.current_pos());
1771            target = self
1772                .current_pos()
1773                .saturating_add(STREAM_CHUNK)
1774                .min(final_target);
1775        }
1776        self.finish_member()?;
1777        Ok(())
1778    }
1779
1780    pub fn decode_member_from_reader(
1781        &mut self,
1782        input: &mut impl Read,
1783        output_size: usize,
1784        out: &mut impl Write,
1785    ) -> Result<()> {
1786        self.bits = BitReader::new();
1787        let start = self.current_pos();
1788        let final_target = start
1789            .checked_add(output_size)
1790            .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1791        let mut flushed = start;
1792        let mut target = start.saturating_add(STREAM_CHUNK).min(final_target);
1793        let mut packed = Vec::new();
1794        input
1795            .read_to_end(&mut packed)
1796            .map_err(|_| Error::InvalidData("RAR 2.9 input read failed"))?;
1797        self.bits.append(&packed);
1798        // Empty members in solid mode still carry their own block init bytes
1799        // (typically the (esc, 0) end-of-block marker + 4-byte range coder
1800        // flush). When output_size is zero, decode_until skips its loop body
1801        // and never reads tables, so do the init here so finish_member can
1802        // observe the block end.
1803        if final_target == start && !self.in_lz_block && !packed.is_empty() {
1804            self.read_tables().map_err(|error| match error {
1805                Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1806                error => error,
1807            })?;
1808            self.in_lz_block = true;
1809        }
1810
1811        while flushed < final_target {
1812            self.decode_until(target).map_err(|error| match error {
1813                Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1814                error => error,
1815            })?;
1816
1817            let safe_end = self.safe_flush_end(flushed, target, final_target)?;
1818            if safe_end <= flushed {
1819                if target == final_target {
1820                    return Err(Error::InvalidData(
1821                        "RAR 2.9 VM filter extends beyond output",
1822                    ));
1823                }
1824                target = self
1825                    .current_pos()
1826                    .saturating_add(STREAM_CHUNK)
1827                    .min(final_target);
1828                continue;
1829            }
1830
1831            let decoded = self.filtered_range(flushed, safe_end, start)?;
1832            out.write_all(&decoded)
1833                .map_err(|_| Error::InvalidData("RAR 2.9 output write failed"))?;
1834            flushed = safe_end;
1835            self.trim_history(flushed, self.current_pos());
1836            target = self
1837                .current_pos()
1838                .saturating_add(STREAM_CHUNK)
1839                .min(final_target);
1840        }
1841        self.finish_member().map_err(|error| match error {
1842            Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1843            error => error,
1844        })?;
1845        Ok(())
1846    }
1847
1848    fn decode_until(&mut self, target: usize) -> Result<()> {
1849        while self.current_pos() < target {
1850            self.drain_pending_match(target)?;
1851            if self.current_pos() >= target {
1852                break;
1853            }
1854            if !self.in_lz_block {
1855                self.read_tables()?;
1856                self.in_lz_block = true;
1857            }
1858            match self.block_mode {
1859                BlockMode::Lz => self.decode_lz(target)?,
1860                BlockMode::Ppmd => self.decode_ppmd(target)?,
1861            }
1862        }
1863        Ok(())
1864    }
1865
1866    fn read_tables(&mut self) -> Result<()> {
1867        self.bits.align_byte();
1868        if self.bits.peek_bit()? != 0 {
1869            let first_byte = self.bits.read_bits(8)? as u8;
1870            self.ppmd
1871                .decode_init(first_byte, &mut self.bits, &mut self.ppmd_esc)?;
1872            self.block_mode = BlockMode::Ppmd;
1873            return Ok(());
1874        }
1875        self.bits.read_bit()?;
1876        self.block_mode = BlockMode::Lz;
1877        let keep_tables = self.bits.read_bit()? != 0;
1878        self.last_low_offset = 0;
1879        self.low_offset_repeats = 0;
1880        if !keep_tables {
1881            self.levels = [0; TABLE_COUNT];
1882        }
1883
1884        let level_lengths = Self::read_level_lengths(&mut self.bits)?;
1885        let level_decoder = Huffman::from_lengths(&level_lengths)?;
1886        let mut new_levels = [0u8; TABLE_COUNT];
1887        let mut pos = 0usize;
1888        while pos < TABLE_COUNT {
1889            let symbol = level_decoder.decode(&mut self.bits)?;
1890            match symbol {
1891                0..=15 => {
1892                    new_levels[pos] = (self.levels[pos].wrapping_add(symbol as u8)) & 0x0f;
1893                    pos += 1;
1894                }
1895                16 => {
1896                    if pos == 0 {
1897                        return Err(Error::InvalidData("RAR 2.9 table repeat at start"));
1898                    }
1899                    let count = 3 + self.bits.read_bits(3)? as usize;
1900                    let value = new_levels[pos - 1];
1901                    fill_levels(&mut new_levels, &mut pos, count, value)?;
1902                }
1903                17 => {
1904                    if pos == 0 {
1905                        return Err(Error::InvalidData("RAR 2.9 long table repeat at start"));
1906                    }
1907                    let count = 11 + self.bits.read_bits(7)? as usize;
1908                    let value = new_levels[pos - 1];
1909                    fill_levels(&mut new_levels, &mut pos, count, value)?;
1910                }
1911                18 => {
1912                    let count = 3 + self.bits.read_bits(3)? as usize;
1913                    fill_levels(&mut new_levels, &mut pos, count, 0)?;
1914                }
1915                19 => {
1916                    let count = 11 + self.bits.read_bits(7)? as usize;
1917                    fill_levels(&mut new_levels, &mut pos, count, 0)?;
1918                }
1919                _ => return Err(Error::InvalidData("RAR 2.9 invalid level symbol")),
1920            }
1921        }
1922
1923        self.levels = new_levels;
1924        self.main = Huffman::from_lengths(&self.levels[..MAIN_COUNT])?;
1925        self.offsets = Huffman::from_lengths(&self.levels[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
1926        self.low_offsets = Huffman::from_lengths(
1927            &self.levels[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT],
1928        )?;
1929        self.lengths =
1930            Huffman::from_lengths(&self.levels[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..])?;
1931        Ok(())
1932    }
1933
1934    fn read_level_lengths(bits: &mut BitReader) -> Result<[u8; LEVEL_COUNT]> {
1935        let mut lengths = [0u8; LEVEL_COUNT];
1936        let mut pos = 0usize;
1937        while pos < LEVEL_COUNT {
1938            let value = bits.read_bits(4)? as u8;
1939            if value == 15 {
1940                let zero_count = bits.read_bits(4)? as usize;
1941                if zero_count == 0 {
1942                    lengths[pos] = 15;
1943                    pos += 1;
1944                } else {
1945                    pos = pos.saturating_add(zero_count + 2).min(LEVEL_COUNT);
1946                }
1947            } else {
1948                lengths[pos] = value;
1949                pos += 1;
1950            }
1951        }
1952        Ok(lengths)
1953    }
1954
1955    fn decode_lz(&mut self, output_size: usize) -> Result<()> {
1956        while self.current_pos() < output_size {
1957            let symbol = self.main.decode(&mut self.bits)?;
1958            match symbol {
1959                0..=255 => self.output.push(symbol as u8),
1960                256 => {
1961                    self.read_end_of_block()?;
1962                    return Ok(());
1963                }
1964                257 => {
1965                    self.read_vm_code()?;
1966                }
1967                258 => {
1968                    if self.last_length != 0 {
1969                        self.copy_match(self.last_length, self.last_offset, output_size)?;
1970                    }
1971                }
1972                259..=262 => {
1973                    let index = symbol - 259;
1974                    let offset = self.old_offsets[index];
1975                    let length_slot = self.lengths.decode(&mut self.bits)?;
1976                    if length_slot >= LENGTH_COUNT {
1977                        return Err(Error::InvalidData("RAR 2.9 invalid repeat length slot"));
1978                    }
1979                    let mut length = LENGTH_BASES[length_slot] + 2;
1980                    if LENGTH_BITS[length_slot] != 0 {
1981                        length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
1982                    }
1983                    self.rotate_old_offset(index);
1984                    self.last_offset = offset;
1985                    self.last_length = length;
1986                    self.copy_match(length, offset, output_size)?;
1987                }
1988                263..=270 => {
1989                    let index = symbol - 263;
1990                    let mut offset = SHORT_BASES[index] + 1;
1991                    if SHORT_BITS[index] != 0 {
1992                        offset += self.bits.read_bits(SHORT_BITS[index])? as usize;
1993                    }
1994                    self.push_old_offset(offset);
1995                    self.last_offset = offset;
1996                    self.last_length = 2;
1997                    self.copy_match(2, offset, output_size)?;
1998                }
1999                271..=298 => {
2000                    let length_slot = symbol - 271;
2001                    let mut length = LENGTH_BASES[length_slot] + 3;
2002                    if LENGTH_BITS[length_slot] != 0 {
2003                        length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
2004                    }
2005                    let offset = self.read_offset()?;
2006                    if offset >= 0x2000 {
2007                        length += 1;
2008                    }
2009                    if offset >= 0x40000 {
2010                        length += 1;
2011                    }
2012                    self.push_old_offset(offset);
2013                    self.last_offset = offset;
2014                    self.last_length = length;
2015                    self.copy_match(length, offset, output_size)?;
2016                }
2017                _ => return Err(Error::InvalidData("RAR 2.9 invalid main symbol")),
2018            }
2019        }
2020        Ok(())
2021    }
2022
2023    fn decode_ppmd(&mut self, output_size: usize) -> Result<()> {
2024        while self.current_pos() < output_size {
2025            let Some(symbol) = self.ppmd.decode_symbol(&mut self.bits)? else {
2026                return Ok(());
2027            };
2028            if symbol != self.ppmd_esc {
2029                self.output.push(symbol);
2030                continue;
2031            }
2032
2033            let Some(next) = self.ppmd.decode_symbol(&mut self.bits)? else {
2034                return Ok(());
2035            };
2036            match next {
2037                0 => {
2038                    self.in_lz_block = false;
2039                    return Ok(());
2040                }
2041                1 | 6..=u8::MAX => self.output.push(self.ppmd_esc),
2042                2 => {
2043                    self.in_lz_block = false;
2044                    return Ok(());
2045                }
2046                3 => {
2047                    self.read_vm_code_ppmd()?;
2048                }
2049                4 => {
2050                    let mut offset = 0usize;
2051                    for _ in 0..3 {
2052                        offset = (offset << 8) | self.read_ppmd_required_byte()? as usize;
2053                    }
2054                    offset += 2;
2055                    let length = self.read_ppmd_required_byte()? as usize + 32;
2056                    self.copy_match(length, offset, output_size)?;
2057                }
2058                5 => {
2059                    let length = self.read_ppmd_required_byte()? as usize + 4;
2060                    self.copy_match(length, 1, output_size)?;
2061                }
2062            }
2063        }
2064        Ok(())
2065    }
2066
2067    fn read_ppmd_required_byte(&mut self) -> Result<u8> {
2068        self.ppmd
2069            .decode_symbol(&mut self.bits)?
2070            .ok_or(Error::InvalidData("RAR 2.9 PPMd stream ended early"))
2071    }
2072
2073    fn finish_ppmd_member(&mut self) -> Result<()> {
2074        if self.block_mode != BlockMode::Ppmd {
2075            return Ok(());
2076        }
2077        let Some(symbol) = self.ppmd.decode_symbol(&mut self.bits)? else {
2078            return Ok(());
2079        };
2080        if symbol != self.ppmd_esc {
2081            return Err(Error::InvalidData("RAR 2.9 PPMd member has trailing data"));
2082        }
2083        let Some(next) = self.ppmd.decode_symbol(&mut self.bits)? else {
2084            return Ok(());
2085        };
2086        match next {
2087            2 => {
2088                self.in_lz_block = false;
2089                Ok(())
2090            }
2091            0 => {
2092                self.in_lz_block = false;
2093                Ok(())
2094            }
2095            _ => Err(Error::InvalidData("RAR 2.9 PPMd member has trailing data")),
2096        }
2097    }
2098
2099    fn finish_member(&mut self) -> Result<()> {
2100        match self.block_mode {
2101            BlockMode::Lz => self.finish_lz_member(),
2102            BlockMode::Ppmd => self.finish_ppmd_member(),
2103        }
2104    }
2105
2106    fn finish_lz_member(&mut self) -> Result<()> {
2107        loop {
2108            if !self.in_lz_block {
2109                return Ok(());
2110            }
2111            let symbol = self.main.decode(&mut self.bits)?;
2112            if symbol != 256 {
2113                return Err(Error::InvalidData("RAR 2.9 LZ member has trailing data"));
2114            }
2115            match self.read_end_of_block()? {
2116                LzBlockEnd::SameFileNewTable => {
2117                    if self.bits.remaining_bits_are_zero() {
2118                        return Ok(());
2119                    }
2120                    if let Err(error) = self.read_tables() {
2121                        if error == Error::NeedMoreInput {
2122                            return Ok(());
2123                        }
2124                        return Err(error);
2125                    }
2126                    self.in_lz_block = true;
2127                }
2128                LzBlockEnd::NewFileKeepTables | LzBlockEnd::NewFileNewTables => return Ok(()),
2129            }
2130        }
2131    }
2132
2133    fn read_end_of_block(&mut self) -> Result<LzBlockEnd> {
2134        if self.bits.read_bit()? != 0 {
2135            self.in_lz_block = false;
2136            return Ok(LzBlockEnd::SameFileNewTable);
2137        }
2138        if self.bits.read_bit()? != 0 {
2139            self.in_lz_block = false;
2140            Ok(LzBlockEnd::NewFileNewTables)
2141        } else {
2142            self.in_lz_block = true;
2143            Ok(LzBlockEnd::NewFileKeepTables)
2144        }
2145    }
2146
2147    fn read_offset(&mut self) -> Result<usize> {
2148        let slot = self.offsets.decode(&mut self.bits)?;
2149        if slot >= OFFSET_COUNT {
2150            return Err(Error::InvalidData("RAR 2.9 invalid offset slot"));
2151        }
2152        let mut offset = OFFSET_BASES[slot] + 1;
2153        let extra_bits = OFFSET_BITS[slot];
2154        if extra_bits != 0 {
2155            if slot > 9 {
2156                if extra_bits > 4 {
2157                    offset += (self.bits.read_bits(extra_bits - 4)? as usize) << 4;
2158                }
2159                if self.low_offset_repeats > 0 {
2160                    self.low_offset_repeats -= 1;
2161                    offset += self.last_low_offset;
2162                } else {
2163                    let low = self.low_offsets.decode(&mut self.bits)?;
2164                    if low == 16 {
2165                        self.low_offset_repeats = 15;
2166                        offset += self.last_low_offset;
2167                    } else if low < 16 {
2168                        self.last_low_offset = low;
2169                        offset += low;
2170                    } else {
2171                        return Err(Error::InvalidData("RAR 2.9 invalid low offset symbol"));
2172                    }
2173                }
2174            } else {
2175                offset += self.bits.read_bits(extra_bits)? as usize;
2176            }
2177        }
2178        Ok(offset)
2179    }
2180
2181    fn read_vm_code(&mut self) -> Result<()> {
2182        let first_byte = self.bits.read_bits(8)?;
2183        let mut len = (first_byte & 7) + 1;
2184        if len == 7 {
2185            len = self.bits.read_bits(8)? + 7;
2186        } else if len == 8 {
2187            len = self.bits.read_bits(16)?;
2188        }
2189        let mut data = Vec::with_capacity(len as usize);
2190        for _ in 0..len {
2191            data.push(self.bits.read_bits(8)? as u8);
2192        }
2193
2194        self.parse_vm_code(first_byte, data)
2195    }
2196
2197    fn read_vm_code_ppmd(&mut self) -> Result<()> {
2198        let first_byte = u32::from(self.read_ppmd_required_byte()?);
2199        let mut len = (first_byte & 7) + 1;
2200        if len == 7 {
2201            len = u32::from(self.read_ppmd_required_byte()?) + 7;
2202        } else if len == 8 {
2203            len = (u32::from(self.read_ppmd_required_byte()?) << 8)
2204                | u32::from(self.read_ppmd_required_byte()?);
2205        }
2206        let mut data = Vec::with_capacity(len as usize);
2207        for _ in 0..len {
2208            data.push(self.read_ppmd_required_byte()?);
2209        }
2210
2211        self.parse_vm_code(first_byte, data)
2212    }
2213
2214    fn parse_vm_code(&mut self, first_byte: u32, data: Vec<u8>) -> Result<()> {
2215        let mut vm = BitReader::from_bytes(&data);
2216        let program_index = if first_byte & 0x80 != 0 {
2217            let value = vm.read_encoded_u32()?;
2218            if value == 0 {
2219                self.filters.clear();
2220                self.programs.clear();
2221                0
2222            } else {
2223                usize::try_from(value - 1)
2224                    .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?
2225            }
2226        } else {
2227            self.last_filter
2228        };
2229        if program_index > self.programs.len() {
2230            return Err(Error::InvalidData("RAR 2.9 VM program index is invalid"));
2231        }
2232        self.last_filter = program_index;
2233        let new_program = program_index == self.programs.len();
2234
2235        let mut block_start = vm.read_encoded_u32()? as usize;
2236        if first_byte & 0x40 != 0 {
2237            block_start += 258;
2238        }
2239        block_start = self
2240            .current_pos()
2241            .checked_add(block_start)
2242            .ok_or(Error::InvalidData("RAR 2.9 VM block start overflows"))?;
2243
2244        let mut block_size = self
2245            .programs
2246            .get(program_index)
2247            .map(|program| program.block_size)
2248            .unwrap_or(0);
2249        if first_byte & 0x20 != 0 {
2250            block_size = vm.read_encoded_u32()? as usize;
2251        }
2252
2253        let mut regs = [0u32; 7];
2254        regs[3] = 0x3c000;
2255        regs[4] = block_size as u32;
2256        if let Some(program) = self.programs.get(program_index) {
2257            regs[5] = program.exec_count;
2258        }
2259        if first_byte & 0x10 != 0 {
2260            let mask = vm.read_bits(7)?;
2261            for (index, reg) in regs.iter_mut().enumerate() {
2262                if mask & (1 << index) != 0 {
2263                    *reg = vm.read_encoded_u32()?;
2264                }
2265            }
2266        }
2267
2268        if new_program {
2269            if self.programs.len() >= MAX_VM_PROGRAMS {
2270                return Err(Error::InvalidData("RAR 2.9 VM program limit exceeded"));
2271            }
2272            let code_size = vm.read_encoded_u32()? as usize;
2273            if code_size == 0 {
2274                return Err(Error::InvalidData("RAR 2.9 VM code is empty"));
2275            }
2276            if code_size > MAX_VM_CODE_SIZE {
2277                return Err(Error::InvalidData("RAR 2.9 VM code is too large"));
2278            }
2279            let mut code = Vec::with_capacity(code_size);
2280            for _ in 0..code_size {
2281                code.push(vm.read_bits(8)? as u8);
2282            }
2283            let kind = identify_standard_filter(&code)
2284                .map(VmProgramKind::Standard)
2285                .map_or_else(
2286                    || rarvm::Program::parse(&code).map(VmProgramKind::Generic),
2287                    Ok,
2288                )?;
2289            self.programs.push(VmProgram {
2290                kind,
2291                block_size,
2292                exec_count: 0,
2293                globals: Vec::new(),
2294            });
2295        } else if let Some(program) = self.programs.get_mut(program_index) {
2296            program.exec_count = program.exec_count.wrapping_add(1);
2297            program.block_size = block_size;
2298        }
2299
2300        let mut global_data = Vec::new();
2301        if first_byte & 0x08 != 0 {
2302            let data_size = vm.read_encoded_u32()? as usize;
2303            global_data.reserve(data_size.min(MAX_VM_GLOBAL_DATA));
2304            for _ in 0..data_size {
2305                let byte = vm.read_bits(8)? as u8;
2306                if global_data.len() < MAX_VM_GLOBAL_DATA {
2307                    global_data.push(byte);
2308                }
2309            }
2310        }
2311
2312        if self.filters.len() >= MAX_VM_FILTERS {
2313            return Err(Error::InvalidData("RAR 2.9 VM filter limit exceeded"));
2314        }
2315        self.filters.push(VmFilter {
2316            program: program_index,
2317            start: block_start,
2318            size: block_size,
2319            regs,
2320            global_data,
2321        });
2322        Ok(())
2323    }
2324
2325    fn filtered_range(&mut self, start: usize, end: usize, member_start: usize) -> Result<Vec<u8>> {
2326        let mut out = Vec::with_capacity(end - start);
2327        let mut pos = start;
2328        let filters: Vec<_> = self
2329            .filters
2330            .iter()
2331            .enumerate()
2332            .filter_map(|(index, filter)| {
2333                (filter.start >= start && filter.start + filter.size <= end).then_some(index)
2334            })
2335            .collect();
2336        for filter_index in filters {
2337            let (program_index, filter_start, filter_size, regs, global_data) = {
2338                let filter = self
2339                    .filters
2340                    .get(filter_index)
2341                    .ok_or(Error::InvalidData("RAR 2.9 VM filter is missing"))?;
2342                (
2343                    filter.program,
2344                    filter.start,
2345                    filter.size,
2346                    filter.regs,
2347                    filter.global_data.clone(),
2348                )
2349            };
2350            if filter_start < pos {
2351                continue;
2352            }
2353            out.extend_from_slice(self.raw_range(pos, filter_start)?);
2354            let mut block = self
2355                .raw_range(filter_start, filter_start + filter_size)?
2356                .to_vec();
2357            let file_offset = filter_start
2358                .checked_sub(member_start)
2359                .ok_or(Error::InvalidData("RAR 2.9 VM filter starts before file"))?
2360                as u32;
2361            let program = self
2362                .programs
2363                .get_mut(program_index)
2364                .ok_or(Error::InvalidData("RAR 2.9 VM program is missing"))?;
2365            match &program.kind {
2366                VmProgramKind::Standard(standard) => {
2367                    apply_standard_filter(*standard, &mut block, file_offset, &regs)?
2368                }
2369                VmProgramKind::Generic(generic) => {
2370                    let globals = if global_data.is_empty() {
2371                        program.globals.as_slice()
2372                    } else {
2373                        global_data.as_slice()
2374                    };
2375                    let result = generic.execute(rarvm::Invocation {
2376                        input: &block,
2377                        regs,
2378                        global_data: globals,
2379                        file_offset: file_offset as u64,
2380                        exec_count: program.exec_count,
2381                    })?;
2382                    program.globals = result.globals;
2383                    block = result.output;
2384                }
2385            }
2386            out.extend_from_slice(&block);
2387            pos = filter_start + filter_size;
2388        }
2389        out.extend_from_slice(self.raw_range(pos, end)?);
2390        Ok(out)
2391    }
2392
2393    fn safe_flush_end(&self, start: usize, end: usize, final_target: usize) -> Result<usize> {
2394        let current = self.current_pos();
2395        let mut safe_end = end;
2396        for filter in &self.filters {
2397            let filter_end = filter
2398                .start
2399                .checked_add(filter.size)
2400                .ok_or(Error::InvalidData("RAR 2.9 VM filter size overflows"))?;
2401            if filter.start >= safe_end || filter_end <= start {
2402                continue;
2403            }
2404            if filter_end > final_target {
2405                return Err(Error::InvalidData(
2406                    "RAR 2.9 VM filter extends beyond output",
2407                ));
2408            }
2409            if filter_end > current {
2410                safe_end = safe_end.min(filter.start);
2411            }
2412        }
2413        Ok(safe_end)
2414    }
2415
2416    fn copy_match(&mut self, length: usize, offset: usize, output_size: usize) -> Result<()> {
2417        // The bitstream normally encodes match distances as offset+1, so zero
2418        // is not emitted for fresh matches. Keep the legacy decoder boundary
2419        // tolerant here: a zero internal offset behaves as distance one.
2420        let offset = if offset == 0 { 1 } else { offset };
2421        let current = self.current_pos();
2422        if offset > current {
2423            return Err(Error::InvalidData("RAR 2.9 match distance is out of range"));
2424        }
2425        for index in 0..length {
2426            if self.current_pos() >= output_size {
2427                self.pending_match = Some((length - index, offset));
2428                break;
2429            }
2430            let src = self.current_pos() - offset;
2431            let byte = *self
2432                .raw_byte(src)
2433                .ok_or(Error::InvalidData("RAR 2.9 match distance is out of range"))?;
2434            self.output.push(byte);
2435        }
2436        Ok(())
2437    }
2438
2439    fn drain_pending_match(&mut self, output_size: usize) -> Result<()> {
2440        let Some((length, offset)) = self.pending_match.take() else {
2441            return Ok(());
2442        };
2443        self.copy_match(length, offset, output_size)
2444    }
2445
2446    fn push_old_offset(&mut self, offset: usize) {
2447        self.old_offsets[3] = self.old_offsets[2];
2448        self.old_offsets[2] = self.old_offsets[1];
2449        self.old_offsets[1] = self.old_offsets[0];
2450        self.old_offsets[0] = offset;
2451    }
2452
2453    fn rotate_old_offset(&mut self, index: usize) {
2454        let value = self.old_offsets[index];
2455        for i in (1..=index).rev() {
2456            self.old_offsets[i] = self.old_offsets[i - 1];
2457        }
2458        self.old_offsets[0] = value;
2459    }
2460
2461    fn current_pos(&self) -> usize {
2462        self.base_offset + self.output.len()
2463    }
2464
2465    fn raw_byte(&self, position: usize) -> Option<&u8> {
2466        self.output.get(position.checked_sub(self.base_offset)?)
2467    }
2468
2469    fn raw_range(&self, start: usize, end: usize) -> Result<&[u8]> {
2470        if start < self.base_offset || end < start {
2471            return Err(Error::InvalidData(
2472                "RAR 2.9 retained history is unavailable",
2473            ));
2474        }
2475        let rel_start = start - self.base_offset;
2476        let rel_end = end - self.base_offset;
2477        self.output
2478            .get(rel_start..rel_end)
2479            .ok_or(Error::InvalidData(
2480                "RAR 2.9 retained history is unavailable",
2481            ))
2482    }
2483
2484    fn trim_history(&mut self, flushed_pos: usize, current_pos: usize) {
2485        let keep_from = current_pos.saturating_sub(MAX_HISTORY);
2486        let keep_from = keep_from.min(flushed_pos);
2487        if keep_from <= self.base_offset {
2488            return;
2489        }
2490        let drain = keep_from - self.base_offset;
2491        self.output.drain(..drain);
2492        self.base_offset = keep_from;
2493        self.filters
2494            .retain(|filter| filter.start + filter.size > self.base_offset);
2495    }
2496}
2497
2498impl Default for Unpack29 {
2499    fn default() -> Self {
2500        Self::new()
2501    }
2502}
2503
2504fn fill_levels(levels: &mut [u8], pos: &mut usize, count: usize, value: u8) -> Result<()> {
2505    let end = pos
2506        .checked_add(count)
2507        .ok_or(Error::InvalidData("RAR 2.9 table run overflows"))?;
2508    let end = end.min(levels.len());
2509    for item in &mut levels[*pos..end] {
2510        *item = value;
2511    }
2512    *pos = end;
2513    Ok(())
2514}
2515
2516#[derive(Debug, Clone)]
2517struct Huffman {
2518    symbols: Vec<HuffmanSymbol>,
2519    first_code: [u16; 16],
2520    first_index: [usize; 16],
2521    counts: [u16; 16],
2522}
2523
2524#[derive(Debug, Clone)]
2525struct HuffmanSymbol {
2526    code: u16,
2527    len: u8,
2528    symbol: usize,
2529}
2530
2531impl Huffman {
2532    fn empty() -> Self {
2533        Self {
2534            symbols: Vec::new(),
2535            first_code: [0; 16],
2536            first_index: [0; 16],
2537            counts: [0; 16],
2538        }
2539    }
2540
2541    fn from_lengths(lengths: &[u8]) -> Result<Self> {
2542        let mut count = [0u16; 16];
2543        for &len in lengths {
2544            if len > 15 {
2545                return Err(Error::InvalidData("RAR 2.9 Huffman length is too large"));
2546            }
2547            if len != 0 {
2548                count[len as usize] += 1;
2549            }
2550        }
2551        if count.iter().all(|&value| value == 0) {
2552            return Ok(Self::empty());
2553        }
2554        validate_huffman_counts(&count)?;
2555
2556        let mut first_code = [0u16; 16];
2557        let mut next_code = [0u16; 16];
2558        let mut code = 0u16;
2559        for len in 1..=15 {
2560            code = (code + count[len - 1]) << 1;
2561            first_code[len] = code;
2562            next_code[len] = code;
2563        }
2564
2565        let mut first_index = [0usize; 16];
2566        let mut index = 0usize;
2567        for len in 1..=15 {
2568            first_index[len] = index;
2569            index += usize::from(count[len]);
2570        }
2571
2572        let mut symbols = Vec::new();
2573        for (symbol, &len) in lengths.iter().enumerate() {
2574            if len == 0 {
2575                continue;
2576            }
2577            let code = next_code[len as usize];
2578            next_code[len as usize] += 1;
2579            symbols.push(HuffmanSymbol { code, len, symbol });
2580        }
2581        symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
2582        Ok(Self {
2583            symbols,
2584            first_code,
2585            first_index,
2586            counts: count,
2587        })
2588    }
2589
2590    fn decode(&self, bits: &mut BitReader) -> Result<usize> {
2591        let mut code = 0u16;
2592        if self.symbols.is_empty() {
2593            return Err(Error::InvalidData("RAR 2.9 empty Huffman table"));
2594        }
2595        for len in 1..=15 {
2596            code = (code << 1) | bits.read_bit()? as u16;
2597            let count = self.counts[len];
2598            if count != 0 {
2599                let first = self.first_code[len];
2600                let offset = code.wrapping_sub(first);
2601                if offset < count {
2602                    let index = self.first_index[len] + usize::from(offset);
2603                    return Ok(self.symbols[index].symbol);
2604                }
2605            }
2606        }
2607        Err(Error::InvalidData("RAR 2.9 invalid Huffman code"))
2608    }
2609}
2610
2611fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
2612    let mut available = 1i32;
2613    for &len_count in count.iter().skip(1) {
2614        available = (available << 1) - i32::from(len_count);
2615        if available < 0 {
2616            return Err(Error::InvalidData("RAR 2.9 oversubscribed Huffman table"));
2617        }
2618    }
2619    Ok(())
2620}
2621
2622#[derive(Debug, Clone)]
2623struct BitReader {
2624    input: Vec<u8>,
2625    bit_pos: usize,
2626}
2627
2628impl BitReader {
2629    fn new() -> Self {
2630        Self {
2631            input: Vec::new(),
2632            bit_pos: 0,
2633        }
2634    }
2635
2636    fn from_bytes(input: &[u8]) -> Self {
2637        Self {
2638            input: input.to_vec(),
2639            bit_pos: 0,
2640        }
2641    }
2642
2643    fn append(&mut self, input: &[u8]) {
2644        self.compact();
2645        self.input.extend_from_slice(input);
2646    }
2647
2648    fn compact(&mut self) {
2649        let bytes = self.bit_pos / 8;
2650        if bytes == 0 {
2651            return;
2652        }
2653        self.input.drain(..bytes);
2654        self.bit_pos -= bytes * 8;
2655    }
2656
2657    fn align_byte(&mut self) {
2658        self.bit_pos = (self.bit_pos + 7) & !7;
2659    }
2660
2661    fn peek_bit(&self) -> Result<u8> {
2662        self.peek_bits(1).map(|value| value as u8)
2663    }
2664
2665    fn read_bit(&mut self) -> Result<u8> {
2666        self.read_bits(1).map(|value| value as u8)
2667    }
2668
2669    fn read_bits(&mut self, count: u8) -> Result<u32> {
2670        let value = self.peek_bits(count)?;
2671        self.bit_pos += count as usize;
2672        Ok(value)
2673    }
2674
2675    fn remaining_bits_are_zero(&self) -> bool {
2676        let full_bytes = self.bit_pos / 8;
2677        let bit_offset = self.bit_pos % 8;
2678        let Some((&first, rest)) = self
2679            .input
2680            .get(full_bytes)
2681            .zip(self.input.get(full_bytes + 1..))
2682        else {
2683            return true;
2684        };
2685        if bit_offset != 0 && first << bit_offset != 0 {
2686            return false;
2687        }
2688        if bit_offset == 0 && first != 0 {
2689            return false;
2690        }
2691        rest.iter().all(|&byte| byte == 0)
2692    }
2693
2694    fn peek_bits(&self, count: u8) -> Result<u32> {
2695        if count > 24 {
2696            return Err(Error::InvalidData("RAR 2.9 bit read is too wide"));
2697        }
2698        let mut value = 0u32;
2699        for i in 0..count as usize {
2700            let bit_index = self.bit_pos + i;
2701            let byte = *self.input.get(bit_index / 8).ok_or(Error::NeedMoreInput)?;
2702            let bit = (byte >> (7 - (bit_index % 8))) & 1;
2703            value = (value << 1) | bit as u32;
2704        }
2705        Ok(value)
2706    }
2707
2708    fn read_encoded_u32(&mut self) -> Result<u32> {
2709        match self.read_bits(2)? {
2710            0 => self.read_bits(4),
2711            1 => {
2712                let high = self.read_bits(8)?;
2713                if high >= 16 {
2714                    Ok(high)
2715                } else {
2716                    Ok(0xffff_ff00 | (high << 4) | self.read_bits(4)?)
2717                }
2718            }
2719            2 => self.read_bits(16),
2720            _ => Ok((self.read_bits(16)? << 16) | self.read_bits(16)?),
2721        }
2722    }
2723}
2724
2725impl PpmdByteReader for BitReader {
2726    fn read_ppmd_byte(&mut self) -> Result<u8> {
2727        self.read_bits(8).map(|value| value as u8)
2728    }
2729}
2730
2731#[derive(Default)]
2732struct BitWriter {
2733    bytes: Vec<u8>,
2734    bit_pos: usize,
2735}
2736
2737impl BitWriter {
2738    fn write_bits(&mut self, value: u32, count: u8) {
2739        for shift in (0..count).rev() {
2740            self.write_bit(((value >> shift) & 1) != 0);
2741        }
2742    }
2743
2744    fn write_encoded_u32(&mut self, value: u32) {
2745        if value < 16 {
2746            self.write_bits(0, 2);
2747            self.write_bits(value, 4);
2748        } else if value < 256 {
2749            self.write_bits(1, 2);
2750            self.write_bits(value, 8);
2751        } else if value <= 0xffff {
2752            self.write_bits(2, 2);
2753            self.write_bits(value, 16);
2754        } else {
2755            self.write_bits(3, 2);
2756            self.write_bits(value >> 16, 16);
2757            self.write_bits(value & 0xffff, 16);
2758        }
2759    }
2760
2761    fn write_bit(&mut self, bit: bool) {
2762        if self.bit_pos.is_multiple_of(8) {
2763            self.bytes.push(0);
2764        }
2765        if bit {
2766            let shift = 7 - (self.bit_pos % 8);
2767            *self.bytes.last_mut().unwrap() |= 1 << shift;
2768        }
2769        self.bit_pos += 1;
2770    }
2771
2772    fn finish(self) -> Vec<u8> {
2773        self.bytes
2774    }
2775}
2776
2777fn identify_standard_filter(code: &[u8]) -> Option<StandardFilter> {
2778    if code.iter().fold(0u8, |acc, &byte| acc ^ byte) != 0 {
2779        return None;
2780    }
2781    match (code.len(), crc32(code)) {
2782        (53, 0xad57_6887) => Some(StandardFilter::E8),
2783        (57, 0x3cd7_e57e) => Some(StandardFilter::E8E9),
2784        (120, 0x3769_893f) => Some(StandardFilter::Itanium),
2785        (29, 0x0e06_077d) => Some(StandardFilter::Delta),
2786        (149, 0x1c2c_5dc8) => Some(StandardFilter::Rgb),
2787        (216, 0xbc85_e701) => Some(StandardFilter::Audio),
2788        _ => None,
2789    }
2790}
2791
2792fn apply_standard_filter(
2793    filter: StandardFilter,
2794    data: &mut Vec<u8>,
2795    file_offset: u32,
2796    regs: &[u32; 7],
2797) -> Result<()> {
2798    match filter {
2799        StandardFilter::E8 => {
2800            filters::decode_in_place(FilterOp::E8, data, file_offset, rar29_delta_messages())?
2801        }
2802        StandardFilter::E8E9 => {
2803            filters::decode_in_place(FilterOp::E8E9, data, file_offset, rar29_delta_messages())?
2804        }
2805        StandardFilter::Itanium => itanium_decode(data, file_offset),
2806        StandardFilter::Delta => {
2807            let channels = regs[0] as usize;
2808            if channels == 0 {
2809                return Err(Error::InvalidData("RAR 2.9 DELTA filter has zero channels"));
2810            }
2811            filters::decode_in_place(
2812                FilterOp::Delta { channels },
2813                data,
2814                0,
2815                rar29_delta_messages(),
2816            )?;
2817        }
2818        StandardFilter::Rgb => {
2819            if regs[0] < 3 || regs[1] > 2 {
2820                return Err(Error::InvalidData(
2821                    "RAR 2.9 RGB filter parameters are invalid",
2822                ));
2823            }
2824            let width = regs[0] as usize - 3;
2825            let pos_r = regs[1] as usize;
2826            *data = rgb_decode(data, width, pos_r)?;
2827        }
2828        StandardFilter::Audio => {
2829            let channels = regs[0] as usize;
2830            if channels == 0 {
2831                return Err(Error::InvalidData("RAR 2.9 AUDIO filter has zero channels"));
2832            }
2833            *data = audio_decode(data, channels)?;
2834        }
2835    }
2836    Ok(())
2837}
2838
2839fn itanium_decode(data: &mut [u8], file_offset: u32) {
2840    if data.len() <= 21 {
2841        return;
2842    }
2843    let base_offset = file_offset >> 4;
2844    // Each 16-byte Itanium bundle can inspect a 4-byte instruction field that
2845    // starts up to 13 bytes into the bundle. Keeping a 21-byte tail prevents
2846    // decoding a partial final bundle.
2847    let block_count = (data.len() - 21).div_ceil(16);
2848    for block in 0..block_count {
2849        let pos = block * 16;
2850        let file_offset = base_offset.wrapping_add(block as u32);
2851        let mut mask = (0x334b_0000u32 >> (data[pos] & 0x1e)) & 3;
2852        if mask != 0 {
2853            mask += 1;
2854            while mask <= 4 {
2855                let p = pos + (mask as usize * 5 - 8);
2856                if ((data[p + 3] >> mask) & 15) == 5 {
2857                    let raw = u32::from_le_bytes([data[p], data[p + 1], data[p + 2], data[p + 3]]);
2858                    let mut value = raw >> mask;
2859                    value = value.wrapping_sub(file_offset) & 0x000f_ffff;
2860                    let raw = (raw & !(0x000f_ffff << mask)) | (value << mask);
2861                    data[p..p + 4].copy_from_slice(&raw.to_le_bytes());
2862                }
2863                mask += 1;
2864            }
2865        }
2866    }
2867}
2868
2869fn rgb_decode(data: &[u8], width: usize, pos_r: usize) -> Result<Vec<u8>> {
2870    if data.len() < 3 || width == 0 || !width.is_multiple_of(3) || width > data.len() || pos_r > 2 {
2871        return Err(Error::InvalidData(
2872            "RAR 2.9 RGB filter parameters are invalid",
2873        ));
2874    }
2875    let mut out = vec![0u8; data.len()];
2876    let mut src = 0usize;
2877    for channel in 0..3 {
2878        let mut prev = 0u8;
2879        let mut i = channel;
2880        while i < data.len() {
2881            let predicted = if i >= width + 3 {
2882                rgb_predict(prev, out[i - width], out[i - width - 3])
2883            } else {
2884                prev
2885            };
2886            let encoded = *data
2887                .get(src)
2888                .ok_or(Error::InvalidData("RAR 2.9 RGB filter source is truncated"))?;
2889            prev = predicted.wrapping_sub(encoded);
2890            out[i] = prev;
2891            src += 1;
2892            i += 3;
2893        }
2894    }
2895    for i in (pos_r..data.len().saturating_sub(2)).step_by(3) {
2896        let green = out[i + 1];
2897        out[i] = out[i].wrapping_add(green);
2898        out[i + 2] = out[i + 2].wrapping_add(green);
2899    }
2900    Ok(out)
2901}
2902
2903fn rgb_predict(prev: u8, upper: u8, upper_left: u8) -> u8 {
2904    let predicted = i32::from(prev) + i32::from(upper) - i32::from(upper_left);
2905    let pa = (predicted - i32::from(prev)).abs();
2906    let pb = (predicted - i32::from(upper)).abs();
2907    let pc = (predicted - i32::from(upper_left)).abs();
2908    if pa <= pb && pa <= pc {
2909        prev
2910    } else if pb <= pc {
2911        upper
2912    } else {
2913        upper_left
2914    }
2915}
2916
2917fn audio_decode(data: &[u8], channels: usize) -> Result<Vec<u8>> {
2918    let mut out = vec![0u8; data.len()];
2919    let mut src = 0usize;
2920    for channel in 0..channels {
2921        let mut prev_byte = 0u32;
2922        let mut prev_delta = 0i32;
2923        let mut d1 = 0i32;
2924        let mut d2 = 0i32;
2925        let mut k1 = 0i32;
2926        let mut k2 = 0i32;
2927        let mut k3 = 0i32;
2928        let mut dif = [0u32; 7];
2929        let mut byte_count = 0usize;
2930        let mut i = channel;
2931        while i < data.len() {
2932            let d3 = d2;
2933            d2 = prev_delta - d1;
2934            d1 = prev_delta;
2935            let predicted = ((8 * prev_byte as i32 + k1 * d1 + k2 * d2 + k3 * d3) >> 3) & 0xff;
2936            let encoded = *data.get(src).ok_or(Error::InvalidData(
2937                "RAR 2.9 AUDIO filter source is truncated",
2938            ))?;
2939            src += 1;
2940            let decoded = (predicted as u8).wrapping_sub(encoded);
2941            out[i] = decoded;
2942            prev_delta = decoded.wrapping_sub(prev_byte as u8) as i8 as i32;
2943            prev_byte = decoded as u32;
2944            let d = (encoded as i8 as i32) << 3;
2945            dif[0] += d.unsigned_abs();
2946            dif[1] += (d - d1).unsigned_abs();
2947            dif[2] += (d + d1).unsigned_abs();
2948            dif[3] += (d - d2).unsigned_abs();
2949            dif[4] += (d + d2).unsigned_abs();
2950            dif[5] += (d - d3).unsigned_abs();
2951            dif[6] += (d + d3).unsigned_abs();
2952            if byte_count & 0x1f == 0 {
2953                let mut min = dif[0];
2954                let mut min_index = 0usize;
2955                dif[0] = 0;
2956                for (index, value) in dif.iter_mut().enumerate().skip(1) {
2957                    if *value < min {
2958                        min = *value;
2959                        min_index = index;
2960                    }
2961                    *value = 0;
2962                }
2963                match min_index {
2964                    1 if k1 >= -16 => k1 -= 1,
2965                    2 if k1 < 16 => k1 += 1,
2966                    3 if k2 >= -16 => k2 -= 1,
2967                    4 if k2 < 16 => k2 += 1,
2968                    5 if k3 >= -16 => k3 -= 1,
2969                    6 if k3 < 16 => k3 += 1,
2970                    _ => {}
2971                }
2972            }
2973            byte_count += 1;
2974            i += channels;
2975        }
2976    }
2977    Ok(out)
2978}
2979
2980#[cfg(test)]
2981mod tests {
2982    use crate::rarvm::{Instruction, Opcode, Operand, Program};
2983    use std::ops::Range;
2984
2985    use super::{
2986        apply_standard_filter, audio_encode, best_match, encode_ppmd_tokens,
2987        encode_table_level_tokens, encode_tokens, encoded_filter_records, insert_match_position,
2988        itanium_decode, itanium_encode, should_lazy_emit_literal, split_large_filter,
2989        unpack29_decode, unpack29_encode_literals, unpack29_encode_ppmd,
2990        unpack29_encode_ppmd_literals, unpack29_encode_ppmd_with_filter, BitReader, BitWriter,
2991        EncodeOptions, EncodeToken, EncoderMatchState, Error, Huffman, LevelToken,
2992        OwnedVmFilterRecord, PpmdEncodeToken, Rar29FilterKind, Rar29FilterSpec, Result,
2993        StandardFilter, Unpack29, Unpack29Encoder, VmFilter, VmProgram, VmProgramKind, MAIN_COUNT,
2994        MATCH_HASH_BUCKETS, MAX_MATCH_CANDIDATES, MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
2995        MAX_VM_DELTA_FILTER_BLOCK_SIZE, MAX_VM_FILTER_BLOCK_SIZE, RAR3_AUDIO_FILTER_BYTECODE,
2996        TABLE_COUNT,
2997    };
2998
2999    const COMPRESSED_TEXT: &[u8] = &[
3000        0x09, 0x10, 0x10, 0x93, 0xe4, 0xce, 0x7f, 0xa2, 0xba, 0x80, 0x46, 0x16, 0x82, 0x63, 0xe9,
3001        0x9a, 0x19, 0xe4, 0x10, 0xe0, 0x41, 0x3d, 0x16, 0xfc, 0x4d, 0xfa, 0x6f, 0xf2, 0x5c, 0xae,
3002        0x32, 0x86, 0xc9, 0x95, 0x9d, 0xf1, 0x04, 0xa4, 0xe8, 0x92, 0x8f, 0x12, 0xd7, 0xe7, 0xba,
3003        0xcb, 0x26, 0xf1, 0x97, 0xac, 0x7c, 0x5f, 0xfd, 0xa0, 0x00, 0x1f, 0x77, 0x50,
3004    ];
3005
3006    #[test]
3007    fn decodes_rar29_lz_member() {
3008        assert_eq!(
3009            unpack29_decode(COMPRESSED_TEXT, 2400).unwrap(),
3010            expected_text()
3011        );
3012    }
3013
3014    #[test]
3015    fn rejects_oversubscribed_rar29_huffman_tables() {
3016        assert!(matches!(
3017            Huffman::from_lengths(&[1, 1, 1]),
3018            Err(Error::InvalidData("RAR 2.9 oversubscribed Huffman table"))
3019        ));
3020    }
3021
3022    #[test]
3023    fn literal_encoder_round_trips_rar29_lz_blocks() {
3024        let input = b"literal-only RAR 2.9 baseline\nwith repeated text literal-only\n";
3025        let packed = unpack29_encode_literals(input).unwrap();
3026
3027        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3028    }
3029
3030    #[test]
3031    fn multi_block_lz_encoding_round_trips_large_repeated_documents() {
3032        let seed = b"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\n\
3033<HTML><BODY><P>RAR29 repeated document body with enough structured text to \
3034exercise LZSS block table selection.</P></BODY></HTML>\n"
3035            .repeat(96);
3036        let input = seed.repeat(180);
3037        let single =
3038            super::encode_member_with_options(&input, &[], EncodeOptions::new(96)).unwrap();
3039        let blocked = super::encode_member_with_options(
3040            &input,
3041            &[],
3042            EncodeOptions::new(96).with_block_size(1024 * 1024),
3043        )
3044        .unwrap();
3045
3046        assert_eq!(unpack29_decode(&single, input.len()).unwrap(), input);
3047        assert_eq!(unpack29_decode(&blocked, input.len()).unwrap(), input);
3048        assert!(blocked.len() < input.len());
3049    }
3050
3051    #[test]
3052    fn table_level_encoder_uses_rar29_run_symbols() {
3053        let mut lengths = [0u8; TABLE_COUNT];
3054        lengths[..4].fill(5);
3055        lengths[8..21].fill(0);
3056
3057        let tokens = encode_table_level_tokens(&lengths);
3058
3059        assert!(tokens.contains(&LevelToken::repeat_previous_short(3)));
3060        assert!(tokens.iter().any(|token| token.symbol == 19));
3061    }
3062
3063    #[test]
3064    fn lazy_lz_parser_defers_short_match_for_longer_next_match() {
3065        let input = b"abcdXbcdYYYYYYYYYYYYabcdYYYYYYYYYYYY";
3066        let greedy = encode_tokens(input, &[], EncodeOptions::new(MAX_MATCH_CANDIDATES));
3067        let lazy = encode_tokens(
3068            input,
3069            &[],
3070            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3071        );
3072        let packed = Unpack29Encoder::with_options(
3073            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3074        )
3075        .encode_member(input)
3076        .unwrap();
3077
3078        assert!(greedy
3079            .iter()
3080            .any(|token| matches!(token, EncodeToken::Match { length: 4, .. })));
3081        assert!(lazy
3082            .iter()
3083            .any(|token| matches!(token, EncodeToken::Match { length, .. } if *length > 8)));
3084        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3085    }
3086
3087    #[test]
3088    fn lazy_lz_parser_uses_match_cost_not_only_match_length() {
3089        let pos = 300_000usize;
3090        let mut input = vec![0u8; pos + 16];
3091        input[100..106].copy_from_slice(b"BCDEFG");
3092        input[106] = b'!';
3093        input[pos - 10..pos - 5].copy_from_slice(b"ABCD!");
3094        input[pos..pos + 7].copy_from_slice(b"ABCDEFG");
3095        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3096        insert_match_position(&input, 100, &mut buckets);
3097        insert_match_position(&input, pos - 10, &mut buckets);
3098
3099        let current = best_match(
3100            &input,
3101            pos,
3102            input.len(),
3103            &buckets,
3104            EncodeOptions::new(MAX_MATCH_CANDIDATES),
3105            &EncoderMatchState::default(),
3106        )
3107        .unwrap();
3108        let next = best_match(
3109            &input,
3110            pos + 1,
3111            input.len(),
3112            &buckets,
3113            EncodeOptions::new(MAX_MATCH_CANDIDATES),
3114            &EncoderMatchState::default(),
3115        )
3116        .unwrap();
3117
3118        assert_eq!(current.length, 4);
3119        assert_eq!(current.offset, 10);
3120        assert_eq!(next.length, 6);
3121        assert!(next.offset > 0x40000);
3122        assert!(!should_lazy_emit_literal(
3123            &input,
3124            pos,
3125            input.len(),
3126            &buckets,
3127            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3128            &EncoderMatchState::default(),
3129            current,
3130        ));
3131    }
3132
3133    #[test]
3134    fn lazy_lz_parser_uses_bounded_cost_lookahead() {
3135        let pos = 160;
3136        let mut input: Vec<u8> = (0..240u16)
3137            .map(|value| value.wrapping_mul(91) as u8)
3138            .collect();
3139        input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3140        input[pos - 80..pos - 64].copy_from_slice(b"CDEFGHIJKLMNOPQR");
3141        input[pos..pos + 18].copy_from_slice(b"ABCDEFGHIJKLMNOPQR");
3142
3143        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3144        for candidate in 0..pos {
3145            insert_match_position(&input, candidate, &mut buckets);
3146        }
3147        let current = best_match(
3148            &input,
3149            pos,
3150            input.len(),
3151            &buckets,
3152            EncodeOptions::default(),
3153            &EncoderMatchState::default(),
3154        )
3155        .unwrap();
3156
3157        assert_eq!((current.length, current.offset), (8, 30));
3158        assert!(!should_lazy_emit_literal(
3159            &input,
3160            pos,
3161            input.len(),
3162            &buckets,
3163            EncodeOptions::default()
3164                .with_lazy_matching(true)
3165                .with_lazy_lookahead(1),
3166            &EncoderMatchState::default(),
3167            current,
3168        ));
3169        assert!(should_lazy_emit_literal(
3170            &input,
3171            pos,
3172            input.len(),
3173            &buckets,
3174            EncodeOptions::default()
3175                .with_lazy_matching(true)
3176                .with_lazy_lookahead(2),
3177            &EncoderMatchState::default(),
3178            current,
3179        ));
3180    }
3181
3182    #[test]
3183    fn match_state_encodes_last_length_and_repeat_offset_symbols() {
3184        let mut state = EncoderMatchState::default();
3185        assert!(matches!(
3186            state.encode_match(12, 64).unwrap(),
3187            super::EncodedMatch::Fresh { .. }
3188        ));
3189        state.remember(12, 64);
3190
3191        assert_eq!(
3192            state.encode_match(12, 64).unwrap(),
3193            super::EncodedMatch::LastLengthRepeat
3194        );
3195        assert!(matches!(
3196            state.encode_match(9, 64).unwrap(),
3197            super::EncodedMatch::RepeatOffset { index: 0, .. }
3198        ));
3199    }
3200
3201    #[test]
3202    fn cost_aware_match_selection_prefers_repeat_offset_token() {
3203        let pos = 600usize;
3204        let mut input: Vec<u8> = (0..pos + 16)
3205            .map(|index| (index as u8).wrapping_mul(37))
3206            .collect();
3207        input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3208        input[pos - 512..pos - 503].copy_from_slice(b"ABCDEFGHI");
3209        input[pos..pos + 9].copy_from_slice(b"ABCDEFGHI");
3210        input[pos - 22] = 0x11;
3211        input[pos - 503] = 0x22;
3212        input[pos + 9] = 0x33;
3213        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3214        insert_match_position(&input, pos - 30, &mut buckets);
3215        insert_match_position(&input, pos - 512, &mut buckets);
3216
3217        let fresh = best_match(
3218            &input,
3219            pos,
3220            input.len(),
3221            &buckets,
3222            EncodeOptions::default(),
3223            &EncoderMatchState::default(),
3224        )
3225        .unwrap();
3226        let repeat = best_match(
3227            &input,
3228            pos,
3229            input.len(),
3230            &buckets,
3231            EncodeOptions::default(),
3232            &EncoderMatchState {
3233                old_offsets: [30, 0, 0, 0],
3234                last_offset: 0,
3235                last_length: 0,
3236            },
3237        )
3238        .unwrap();
3239
3240        assert_eq!((fresh.length, fresh.offset), (9, 512));
3241        assert_eq!((repeat.length, repeat.offset), (8, 30));
3242    }
3243
3244    #[test]
3245    fn match_finder_respects_configured_maximum_distance() {
3246        let phrase = b"rar29 bounded dictionary phrase";
3247        let mut input = Vec::new();
3248        input.extend_from_slice(phrase);
3249        input.extend(std::iter::repeat_n(0u8, 256 * 1024));
3250        input.extend_from_slice(phrase);
3251
3252        let bounded = encode_tokens(
3253            &input,
3254            &[],
3255            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_max_match_distance(128 * 1024),
3256        );
3257        let unbounded = encode_tokens(
3258            &input,
3259            &[],
3260            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_max_match_distance(1024 * 1024),
3261        );
3262
3263        assert!(!bounded.iter().any(
3264            |token| matches!(token, EncodeToken::Match { offset, .. } if *offset > 128 * 1024)
3265        ));
3266        assert!(unbounded.iter().any(
3267            |token| matches!(token, EncodeToken::Match { offset, .. } if *offset > 128 * 1024)
3268        ));
3269    }
3270
3271    #[test]
3272    fn lz_encoder_uses_weighted_rar29_huffman_tables() {
3273        let mut input = Vec::new();
3274        for byte in 0u8..120 {
3275            input.push(b'A');
3276            input.push(byte);
3277        }
3278        let packed = Unpack29Encoder::new().encode_member(&input).unwrap();
3279        let mut decoder = Unpack29::new();
3280        decoder.bits.append(&packed);
3281        decoder.read_tables().unwrap();
3282        let main_lengths = &decoder.levels[..MAIN_COUNT];
3283        let nonzero_lengths = main_lengths
3284            .iter()
3285            .copied()
3286            .filter(|&length| length != 0)
3287            .collect::<std::collections::BTreeSet<_>>();
3288
3289        assert!(nonzero_lengths.len() > 1);
3290        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3291    }
3292
3293    #[test]
3294    fn copy_match_treats_zero_offset_as_distance_one() {
3295        let mut decoder = Unpack29::new();
3296        decoder.output.push(b'Z');
3297
3298        decoder.copy_match(4, 0, 5).unwrap();
3299
3300        assert_eq!(decoder.output, b"ZZZZZ");
3301    }
3302
3303    #[test]
3304    fn ppmd_literal_encoder_round_trips_rar29_ppmd_blocks() {
3305        let mut input = b"rar29 ppmd literal text payload alpha beta gamma\n".repeat(64);
3306        input.extend_from_slice(&[2, 2, 2, b'e', b's', b'c']);
3307        let packed = unpack29_encode_ppmd_literals(&input).unwrap();
3308
3309        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3310        assert_ne!(packed.first().copied(), Some(0));
3311    }
3312
3313    #[test]
3314    fn ppmd_encoder_advertises_period_compatible_model_for_external_decoders() {
3315        let packed = unpack29_encode_ppmd(b"rar29 ppmd dictionary header").unwrap();
3316
3317        assert_eq!(packed[0], 0xa7);
3318        assert_eq!(packed[1], 24);
3319    }
3320
3321    #[test]
3322    fn ppmd_encoder_emits_offset_one_repeat_escapes() {
3323        let input = b"seed "
3324            .iter()
3325            .copied()
3326            .chain(std::iter::repeat_n(b'Z', 512))
3327            .collect::<Vec<_>>();
3328        let tokens = encode_ppmd_tokens(&input, true);
3329        let packed = unpack29_encode_ppmd(&input).unwrap();
3330
3331        assert!(tokens.iter().any(
3332            |token| matches!(token, PpmdEncodeToken::RepeatOffsetOne { length } if *length >= 4)
3333        ));
3334        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3335    }
3336
3337    #[test]
3338    fn ppmd_encoder_emits_distance_match_escapes() {
3339        let phrase = b"repeated phrase for rar29 ppmd distance escape 4 ";
3340        let mut input = Vec::new();
3341        input.extend_from_slice(phrase);
3342        input.extend_from_slice(b"middle bytes make the repeat distance greater than one ");
3343        input.extend_from_slice(phrase);
3344        input.extend_from_slice(phrase);
3345        input.extend_from_slice(b"tail");
3346        let tokens = encode_ppmd_tokens(&input, true);
3347        let packed = unpack29_encode_ppmd(&input).unwrap();
3348
3349        assert!(tokens
3350            .iter()
3351            .any(|token| matches!(token, PpmdEncodeToken::Match { offset, length } if *offset > 1 && *length >= 32)));
3352        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3353    }
3354
3355    #[test]
3356    fn ppmd_distance_match_lengths_stay_period_decoder_compatible() {
3357        let phrase = b"<html><body>RAR PPMd LZSS conversion phrase</body></html>\n";
3358        let mut input = Vec::new();
3359        for _ in 0..200 {
3360            input.extend_from_slice(phrase);
3361        }
3362        let tokens = encode_ppmd_tokens(&input, true);
3363
3364        assert!(tokens.iter().any(
3365            |token| matches!(token, PpmdEncodeToken::Match { offset, length } if *offset > 1 && *length >= 32)
3366        ));
3367        assert!(!tokens
3368            .iter()
3369            .any(|token| matches!(token, PpmdEncodeToken::Match { length, .. } if *length > 255)));
3370    }
3371
3372    #[test]
3373    fn ppmd_encoder_emits_embedded_vm_filter_escape() {
3374        let input = b"\xe8\0\0\0\0rar29 ppmd embedded e8 filter payload\n".repeat(16);
3375        let packed =
3376            unpack29_encode_ppmd_with_filter(&input, Rar29FilterSpec::whole(Rar29FilterKind::E8))
3377                .unwrap();
3378        let plain_ppmd = unpack29_encode_ppmd(&input).unwrap();
3379        let filtered_lz = Unpack29Encoder::new()
3380            .encode_member_with_filter(&input, Rar29FilterSpec::whole(Rar29FilterKind::E8))
3381            .unwrap();
3382
3383        assert!(packed.len() != plain_ppmd.len() || packed.len() != filtered_lz.len());
3384        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3385    }
3386
3387    fn encode_with_filter(input: &[u8], kind: Rar29FilterKind) -> Result<Vec<u8>> {
3388        Unpack29Encoder::new().encode_member_with_filter(input, Rar29FilterSpec::whole(kind))
3389    }
3390
3391    fn encode_with_filter_range(
3392        input: &[u8],
3393        kind: Rar29FilterKind,
3394        range: Range<usize>,
3395    ) -> Result<Vec<u8>> {
3396        Unpack29Encoder::new().encode_member_with_filter(input, Rar29FilterSpec::range(kind, range))
3397    }
3398
3399    fn encode_with_filter_ranges(
3400        input: &[u8],
3401        kind: Rar29FilterKind,
3402        ranges: Vec<Range<usize>>,
3403    ) -> Result<Vec<u8>> {
3404        let filters: Vec<_> = ranges
3405            .into_iter()
3406            .map(|range| Rar29FilterSpec::range(kind, range))
3407            .collect();
3408        Unpack29Encoder::new().encode_member_with_filters(input, &filters)
3409    }
3410
3411    #[test]
3412    fn encoder_emits_rar29_offset_one_matches_for_repeated_bytes() {
3413        let input = b"Z".repeat(1024);
3414        let packed = unpack29_encode_literals(&input).unwrap();
3415
3416        assert!(packed.len() < input.len() / 4);
3417        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3418    }
3419
3420    #[test]
3421    fn encoder_emits_rar29_dictionary_matches_for_repeated_sequences() {
3422        let input = b"abc123xyz-".repeat(128);
3423        let packed = unpack29_encode_literals(&input).unwrap();
3424
3425        assert!(packed.len() < input.len() / 2);
3426        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3427    }
3428
3429    #[test]
3430    fn encoder_finds_rar29_matches_beyond_near_offsets() {
3431        let phrase = b"long-distance repeated phrase for rar29 low-offset coding.";
3432        let mut input = Vec::new();
3433        input.extend_from_slice(phrase);
3434        input.extend(std::iter::repeat_n(0, 300 * 1024));
3435        input.extend_from_slice(phrase);
3436        input.extend_from_slice(phrase);
3437        let tokens = encode_tokens(&input, &[], EncodeOptions::default());
3438        let packed = unpack29_encode_literals(&input).unwrap();
3439
3440        assert!(tokens.iter().any(|token| matches!(
3441            token,
3442            EncodeToken::Match { offset, .. } if *offset > 0x40000
3443        )));
3444        assert!(packed.len() < input.len());
3445        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3446        assert!(
3447            decoded == input,
3448            "RAR 2.9 long-distance match round-trip failed"
3449        );
3450    }
3451
3452    #[test]
3453    fn encoder_emits_rar29_e8_vm_filter_record() {
3454        let input = b"\xe8\0\0\0\0rar29 e8 filter writer payload\n".repeat(8);
3455        let packed = encode_with_filter(&input, Rar29FilterKind::E8).unwrap();
3456        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3457
3458        assert!(
3459            decoded == input,
3460            "RAR 2.9 multi-filter E8 round-trip failed"
3461        );
3462    }
3463
3464    #[test]
3465    fn encoder_emits_rar29_e8e9_vm_filter_record() {
3466        let input = b"\xe9\0\0\0\0rar29 e8e9 filter writer payload\n".repeat(8);
3467        let packed = encode_with_filter(&input, Rar29FilterKind::E8E9).unwrap();
3468        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3469
3470        assert_eq!(decoded, input);
3471    }
3472
3473    #[test]
3474    fn encoder_emits_rar29_segmented_e8_vm_filter_record() {
3475        let mut input = b"prefix data that should not be x86 filtered ".to_vec();
3476        let start = input.len();
3477        input.extend_from_slice(b"\xe8\0\0\0\0segmented e8 filtered payload\n");
3478        let end = input.len();
3479        input.extend_from_slice(b" suffix data that should also remain raw");
3480        let packed = encode_with_filter_range(&input, Rar29FilterKind::E8, start..end).unwrap();
3481        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3482
3483        assert_eq!(decoded, input);
3484    }
3485
3486    #[test]
3487    fn encoder_emits_rar29_multiple_e8_vm_filter_records() {
3488        let mut input = vec![0x41u8; 80_000];
3489        for cluster_start in [8_000, 60_000] {
3490            for index in 0..8 {
3491                let pos = cluster_start + index * 64;
3492                input[pos] = 0xe8;
3493                input[pos + 1..pos + 5].copy_from_slice(&(0x2000u32 + index as u32).to_le_bytes());
3494            }
3495        }
3496
3497        let packed = encode_with_filter_ranges(
3498            &input,
3499            Rar29FilterKind::E8,
3500            vec![8_000..8_512, 60_000..60_512],
3501        )
3502        .unwrap();
3503        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3504
3505        assert_eq!(decoded, input);
3506    }
3507
3508    #[test]
3509    fn encoder_emits_rar29_segmented_e8e9_vm_filter_record() {
3510        let mut input = b"prefix data that should not be x86 filtered ".to_vec();
3511        let start = input.len();
3512        input.extend_from_slice(b"\xe9\0\0\0\0segmented e8e9 filtered payload\n");
3513        let end = input.len();
3514        input.extend_from_slice(b" suffix data that should also remain raw");
3515        let packed = encode_with_filter_range(&input, Rar29FilterKind::E8E9, start..end).unwrap();
3516        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3517
3518        assert_eq!(decoded, input);
3519    }
3520
3521    #[test]
3522    fn encoder_emits_rar29_delta_vm_filter_record() {
3523        let input: Vec<u8> = (0..192).map(|index| (index * 13 + 7) as u8).collect();
3524        let packed = encode_with_filter(&input, Rar29FilterKind::Delta { channels: 3 }).unwrap();
3525        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3526
3527        assert_eq!(decoded, input);
3528    }
3529
3530    #[test]
3531    fn encoder_emits_rar29_segmented_delta_vm_filter_record() {
3532        let mut input = b"prefix bytes before delta segment ".to_vec();
3533        let start = input.len();
3534        input.extend((0..192).map(|index| (index * 13 + 7) as u8));
3535        let end = input.len();
3536        input.extend_from_slice(b" suffix bytes after delta segment");
3537        let packed =
3538            encode_with_filter_range(&input, Rar29FilterKind::Delta { channels: 3 }, start..end)
3539                .unwrap();
3540        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3541
3542        assert_eq!(decoded, input);
3543    }
3544
3545    #[test]
3546    fn encoder_emits_rar29_itanium_vm_filter_record() {
3547        let mut input = vec![0u8; 48];
3548        input[16] = 22;
3549        input[21] = 20;
3550        input.extend_from_slice(b"rar29 itanium filter writer payload\n");
3551        let packed = encode_with_filter(&input, Rar29FilterKind::Itanium).unwrap();
3552        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3553
3554        assert_eq!(decoded, input);
3555    }
3556
3557    #[test]
3558    fn encoder_emits_rar29_segmented_itanium_vm_filter_record() {
3559        let mut input = b"prefix bytes before itanium segment ".to_vec();
3560        let start = input.len();
3561        input.extend_from_slice(&[0; 48]);
3562        input[start + 16] = 22;
3563        input[start + 21] = 20;
3564        input.extend_from_slice(b"rar29 segmented itanium filter writer payload\n");
3565        let end = input.len();
3566        input.extend_from_slice(b" suffix bytes after itanium segment");
3567        let packed =
3568            encode_with_filter_range(&input, Rar29FilterKind::Itanium, start..end).unwrap();
3569        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3570
3571        assert_eq!(decoded, input);
3572    }
3573
3574    #[test]
3575    fn encoder_emits_rar29_rgb_vm_filter_record() {
3576        let width = 12;
3577        let input: Vec<u8> = (0..96).map(|index| (index * 29 + 11) as u8).collect();
3578        let packed = encode_with_filter(&input, Rar29FilterKind::Rgb { width, pos_r: 0 }).unwrap();
3579        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3580
3581        assert_eq!(decoded, input);
3582    }
3583
3584    #[test]
3585    fn encoder_emits_rar29_segmented_rgb_vm_filter_record() {
3586        let width = 12;
3587        let mut input = b"prefix bytes before rgb segment ".to_vec();
3588        let start = input.len();
3589        input.extend((0..96).map(|index| (index * 29 + 11) as u8));
3590        let end = input.len();
3591        input.extend_from_slice(b" suffix bytes after rgb segment");
3592        let packed =
3593            encode_with_filter_range(&input, Rar29FilterKind::Rgb { width, pos_r: 0 }, start..end)
3594                .unwrap();
3595        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3596
3597        assert_eq!(decoded, input);
3598    }
3599
3600    #[test]
3601    fn encoder_rejects_rar29_rgb_filter_with_unaligned_scanline_width() {
3602        let input: Vec<u8> = (0..96).map(|index| (index * 29 + 11) as u8).collect();
3603        assert!(encode_with_filter(&input, Rar29FilterKind::Rgb { width: 8, pos_r: 0 }).is_err());
3604    }
3605
3606    #[test]
3607    fn encoder_emits_rar29_audio_vm_filter_record() {
3608        let input: Vec<u8> = (0..160)
3609            .map(|index| (index * 7 + index / 3) as u8)
3610            .collect();
3611        let packed = encode_with_filter(&input, Rar29FilterKind::Audio { channels: 2 }).unwrap();
3612        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3613
3614        assert_eq!(decoded, input);
3615    }
3616
3617    #[test]
3618    fn audio_filter_bytecode_matches_builtin_transform() {
3619        let channels = 2;
3620        let input: Vec<u8> = (0..MAX_VM_AUDIO_FILTER_BLOCK_SIZE)
3621            .map(|index| (index * 7 + index / channels + index / 257) as u8)
3622            .collect();
3623        let encoded = audio_encode(&input, channels).unwrap();
3624        let program = Program::parse(RAR3_AUDIO_FILTER_BYTECODE).unwrap();
3625        let result = program
3626            .execute(crate::rarvm::Invocation {
3627                input: &encoded,
3628                regs: [channels as u32, 0, 0, 0, 0, 0, 0],
3629                global_data: &[],
3630                file_offset: 0,
3631                exec_count: 0,
3632            })
3633            .unwrap();
3634
3635        assert_eq!(result.output, input);
3636    }
3637
3638    #[test]
3639    fn large_audio_filters_are_split_into_rarvm_safe_blocks() {
3640        let filters = split_large_filter(
3641            MAX_VM_FILTER_BLOCK_SIZE * 2 + 123,
3642            Rar29FilterSpec::whole(Rar29FilterKind::Audio { channels: 4 }),
3643        )
3644        .unwrap();
3645
3646        assert_eq!(filters.len(), 3);
3647        assert_eq!(filters[0].range, Some(0..MAX_VM_AUDIO_FILTER_BLOCK_SIZE));
3648        assert_eq!(
3649            filters[1].range,
3650            Some(MAX_VM_AUDIO_FILTER_BLOCK_SIZE..MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2)
3651        );
3652        assert_eq!(
3653            filters[2].range,
3654            Some(MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2..MAX_VM_FILTER_BLOCK_SIZE * 2 + 123)
3655        );
3656    }
3657
3658    #[test]
3659    fn large_delta_filters_are_split_into_rarvm_safe_blocks() {
3660        let filters = split_large_filter(
3661            MAX_VM_FILTER_BLOCK_SIZE * 2 + 123,
3662            Rar29FilterSpec::whole(Rar29FilterKind::Delta { channels: 4 }),
3663        )
3664        .unwrap();
3665
3666        assert_eq!(filters.len(), 3);
3667        assert_eq!(filters[0].range, Some(0..MAX_VM_DELTA_FILTER_BLOCK_SIZE));
3668        assert_eq!(
3669            filters[1].range,
3670            Some(MAX_VM_DELTA_FILTER_BLOCK_SIZE..MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2)
3671        );
3672        assert_eq!(
3673            filters[2].range,
3674            Some(MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2..MAX_VM_FILTER_BLOCK_SIZE * 2 + 123)
3675        );
3676    }
3677
3678    #[test]
3679    fn segmented_audio_filters_redeclare_program_state() {
3680        let filters = [
3681            OwnedVmFilterRecord {
3682                block_start: 0,
3683                block_size: MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
3684                init_regs: vec![(0, 4)],
3685                code: RAR3_AUDIO_FILTER_BYTECODE,
3686            },
3687            OwnedVmFilterRecord {
3688                block_start: MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
3689                block_size: 4096,
3690                init_regs: vec![(0, 4)],
3691                code: RAR3_AUDIO_FILTER_BYTECODE,
3692            },
3693        ];
3694        let records = encoded_filter_records(&filters).unwrap();
3695
3696        assert_vm_filter_declares_program(&records[0], 0);
3697        assert_vm_filter_declares_program(&records[1], 2);
3698    }
3699
3700    #[test]
3701    fn encoder_emits_rar29_segmented_audio_vm_filter_record() {
3702        let mut input = b"prefix bytes before audio segment ".to_vec();
3703        let start = input.len();
3704        input.extend((0..160).map(|index| (index * 7 + index / 3) as u8));
3705        let end = input.len();
3706        input.extend_from_slice(b" suffix bytes after audio segment");
3707        let packed =
3708            encode_with_filter_range(&input, Rar29FilterKind::Audio { channels: 2 }, start..end)
3709                .unwrap();
3710        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3711
3712        assert_eq!(decoded, input);
3713    }
3714
3715    #[test]
3716    fn encoder_emits_multiple_rar29_audio_vm_filter_records_for_large_ranges() {
3717        let input: Vec<u8> = (0..(MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2 + 64))
3718            .map(|index| (index * 7 + index / 3 + index / 257) as u8)
3719            .collect();
3720        let packed = encode_with_filter(&input, Rar29FilterKind::Audio { channels: 4 }).unwrap();
3721        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3722
3723        assert_eq!(decoded, input);
3724    }
3725
3726    #[test]
3727    fn encoder_emits_multiple_rar29_delta_vm_filter_records_for_large_ranges() {
3728        let input: Vec<u8> = (0..(MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2 + 64))
3729            .map(|index| (index * 11 + index / 5 + index / 251) as u8)
3730            .collect();
3731        let packed = encode_with_filter(&input, Rar29FilterKind::Delta { channels: 4 }).unwrap();
3732        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3733
3734        assert_eq!(decoded, input);
3735    }
3736
3737    fn assert_vm_filter_declares_program(record: &[u8], expected_selector: u32) {
3738        let first = record[0];
3739        assert_ne!(first & 0x80, 0);
3740        assert_ne!(first & 0x20, 0);
3741        assert_ne!(first & 0x10, 0);
3742        let inline_len = match first & 7 {
3743            len @ 0..=5 => len as usize + 1,
3744            6 => usize::from(record[1]) + 7,
3745            _ => u16::from_be_bytes([record[1], record[2]]) as usize,
3746        };
3747        let body_start = match first & 7 {
3748            0..=5 => 1,
3749            6 => 2,
3750            _ => 3,
3751        };
3752        let body = &record[body_start..body_start + inline_len];
3753        let mut bits = BitReader::from_bytes(body);
3754        assert_eq!(bits.read_encoded_u32().unwrap(), expected_selector);
3755        let _block_start = bits.read_encoded_u32().unwrap();
3756        let _block_size = bits.read_encoded_u32().unwrap();
3757        let mask = bits.read_bits(7).unwrap();
3758        for index in 0..7 {
3759            if mask & (1 << index) != 0 {
3760                let _ = bits.read_encoded_u32().unwrap();
3761            }
3762        }
3763        assert_eq!(
3764            bits.read_encoded_u32().unwrap() as usize,
3765            RAR3_AUDIO_FILTER_BYTECODE.len()
3766        );
3767    }
3768
3769    #[test]
3770    fn solid_encoder_emits_rar29_matches_against_previous_member_history() {
3771        let first = b"solid rar29 shared phrase alpha beta gamma ".repeat(4);
3772        let second = b"solid rar29 shared phrase alpha beta gamma ".repeat(2);
3773        let independent = unpack29_encode_literals(&second).unwrap();
3774        let mut encoder = Unpack29Encoder::new();
3775        let first_packed = encoder.encode_member(&first).unwrap();
3776        let second_packed = encoder.encode_member(&second).unwrap();
3777
3778        assert!(second_packed.len() < independent.len());
3779        let mut decoder = Unpack29::new();
3780        assert_eq!(
3781            decoder.decode_member(&first_packed, first.len()).unwrap(),
3782            first
3783        );
3784        assert_eq!(
3785            decoder.decode_member(&second_packed, second.len()).unwrap(),
3786            second
3787        );
3788    }
3789
3790    #[test]
3791    fn decode_member_from_reader_accepts_incremental_input() {
3792        struct TinyReader<'a> {
3793            input: &'a [u8],
3794        }
3795
3796        impl std::io::Read for TinyReader<'_> {
3797            fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
3798                if self.input.is_empty() {
3799                    return Ok(0);
3800                }
3801                let len = self.input.len().min(out.len()).min(3);
3802                out[..len].copy_from_slice(&self.input[..len]);
3803                self.input = &self.input[len..];
3804                Ok(len)
3805            }
3806        }
3807
3808        let mut decoder = Unpack29::new();
3809        let mut reader = TinyReader {
3810            input: COMPRESSED_TEXT,
3811        };
3812        let mut output = Vec::new();
3813        decoder
3814            .decode_member_from_reader(&mut reader, 2400, &mut output)
3815            .unwrap();
3816
3817        assert_eq!(output, expected_text());
3818    }
3819
3820    #[test]
3821    fn decode_non_solid_member_resets_reusable_decoder_state() {
3822        let mut decoder = Unpack29::new();
3823        decoder.output.extend_from_slice(b"stale history");
3824        decoder.filters.push(VmFilter {
3825            program: 0,
3826            start: 0,
3827            size: 1,
3828            regs: [0; 7],
3829            global_data: vec![1, 2, 3],
3830        });
3831
3832        let output = decoder
3833            .decode_non_solid_member(COMPRESSED_TEXT, 2400)
3834            .unwrap();
3835
3836        assert_eq!(output, expected_text());
3837        assert!(decoder.filters.is_empty());
3838    }
3839
3840    #[test]
3841    fn e8_filter_uses_member_relative_offset_in_solid_stream() {
3842        let mut decoder = Unpack29::new();
3843        let member_start = 1000usize;
3844        let filter_start = member_start + 100;
3845        decoder.output.resize(filter_start + 8, 0);
3846        decoder.output[filter_start] = 0xe8;
3847
3848        let call_operand_pos = 1u32;
3849        let member_relative_filter_start = (filter_start - member_start) as u32;
3850        let decoded_addr = 0x2000u32;
3851        let encoded_addr = decoded_addr
3852            .wrapping_add(member_relative_filter_start)
3853            .wrapping_add(call_operand_pos);
3854        decoder.output[filter_start + 1..filter_start + 5]
3855            .copy_from_slice(&encoded_addr.to_le_bytes());
3856        decoder.programs.push(VmProgram {
3857            kind: VmProgramKind::Standard(StandardFilter::E8),
3858            block_size: 5,
3859            exec_count: 0,
3860            globals: Vec::new(),
3861        });
3862        decoder.filters.push(VmFilter {
3863            program: 0,
3864            start: filter_start,
3865            size: 5,
3866            regs: [0; 7],
3867            global_data: Vec::new(),
3868        });
3869
3870        let filtered = decoder
3871            .filtered_range(member_start, filter_start + 5, member_start)
3872            .unwrap();
3873        let operand =
3874            u32::from_le_bytes([filtered[101], filtered[102], filtered[103], filtered[104]]);
3875
3876        assert_eq!(operand, decoded_addr);
3877    }
3878
3879    #[test]
3880    fn generic_vm_filter_executes_from_filtered_range() {
3881        let mut decoder = Unpack29::new();
3882        decoder.output.extend_from_slice(&[0x11, 0x22, 0x33]);
3883        decoder.programs.push(VmProgram {
3884            kind: VmProgramKind::Generic(Program {
3885                static_data: Vec::new(),
3886                instructions: vec![
3887                    Instruction {
3888                        opcode: Opcode::Mov,
3889                        byte_mode: true,
3890                        operands: vec![Operand::Absolute(0), Operand::Immediate(0x44)],
3891                    },
3892                    Instruction {
3893                        opcode: Opcode::Ret,
3894                        byte_mode: false,
3895                        operands: Vec::new(),
3896                    },
3897                ],
3898            }),
3899            block_size: 3,
3900            exec_count: 0,
3901            globals: Vec::new(),
3902        });
3903        decoder.filters.push(VmFilter {
3904            program: 0,
3905            start: 0,
3906            size: 3,
3907            regs: [0; 7],
3908            global_data: Vec::new(),
3909        });
3910
3911        let filtered = decoder.filtered_range(0, 3, 0).unwrap();
3912
3913        assert_eq!(filtered, [0x44, 0x22, 0x33]);
3914    }
3915
3916    #[test]
3917    fn standard_filters_reject_malformed_delta_and_rgb_registers() {
3918        let mut delta = vec![0; 32];
3919        let mut delta_regs = [0; 7];
3920        delta_regs[0] = 33;
3921        assert_eq!(
3922            apply_standard_filter(StandardFilter::Delta, &mut delta, 0, &delta_regs),
3923            Err(Error::InvalidData(
3924                "RAR 2.9 DELTA filter channel count is invalid"
3925            ))
3926        );
3927
3928        let mut rgb = vec![0; 32];
3929        let mut rgb_regs = [0; 7];
3930        rgb_regs[0] = 2;
3931        assert_eq!(
3932            apply_standard_filter(StandardFilter::Rgb, &mut rgb, 0, &rgb_regs),
3933            Err(Error::InvalidData(
3934                "RAR 2.9 RGB filter parameters are invalid"
3935            ))
3936        );
3937        rgb_regs[0] = 15;
3938        rgb_regs[1] = 3;
3939        assert_eq!(
3940            apply_standard_filter(StandardFilter::Rgb, &mut rgb, 0, &rgb_regs),
3941            Err(Error::InvalidData(
3942                "RAR 2.9 RGB filter parameters are invalid"
3943            ))
3944        );
3945    }
3946
3947    #[test]
3948    fn vm_encoded_u32_accepts_32_bit_form() {
3949        let mut bits = super::BitReader::from_bytes(&[0xff; 5]);
3950
3951        assert_eq!(bits.read_encoded_u32().unwrap(), 0xffff_ffff);
3952    }
3953
3954    #[test]
3955    fn vm_global_data_size_does_not_reserve_untrusted_declared_size() {
3956        let mut decoder = Unpack29::new();
3957        decoder.programs.push(VmProgram {
3958            kind: VmProgramKind::Standard(StandardFilter::E8),
3959            block_size: 1,
3960            exec_count: 0,
3961            globals: Vec::new(),
3962        });
3963
3964        let mut data = BitWriter::default();
3965        data.write_encoded_u32(1);
3966        data.write_encoded_u32(0);
3967        data.write_encoded_u32(u32::MAX);
3968
3969        assert_eq!(
3970            decoder.parse_vm_code(0x80 | 0x08, data.finish()),
3971            Err(Error::NeedMoreInput)
3972        );
3973    }
3974
3975    #[test]
3976    fn vm_code_size_is_capped_before_allocation() {
3977        let mut decoder = Unpack29::new();
3978        let mut data = BitWriter::default();
3979        data.write_encoded_u32(0);
3980        data.write_encoded_u32(1);
3981        data.write_encoded_u32((super::MAX_VM_CODE_SIZE + 1) as u32);
3982
3983        assert_eq!(
3984            decoder.parse_vm_code(0x80, data.finish()),
3985            Err(Error::InvalidData("RAR 2.9 VM code is too large"))
3986        );
3987    }
3988
3989    #[test]
3990    fn vm_program_and_filter_counts_are_capped() {
3991        let mut decoder = Unpack29::new();
3992        decoder
3993            .programs
3994            .resize_with(super::MAX_VM_PROGRAMS, || VmProgram {
3995                kind: VmProgramKind::Standard(StandardFilter::E8),
3996                block_size: 1,
3997                exec_count: 0,
3998                globals: Vec::new(),
3999            });
4000
4001        let mut new_program = BitWriter::default();
4002        new_program.write_encoded_u32((super::MAX_VM_PROGRAMS + 1) as u32);
4003        new_program.write_encoded_u32(1);
4004        new_program.write_encoded_u32(1);
4005        new_program.write_bits(0, 8);
4006        assert_eq!(
4007            decoder.parse_vm_code(0x80, new_program.finish()),
4008            Err(Error::InvalidData("RAR 2.9 VM program limit exceeded"))
4009        );
4010
4011        decoder.programs.truncate(1);
4012        decoder.last_filter = 0;
4013        decoder
4014            .filters
4015            .resize_with(super::MAX_VM_FILTERS, || VmFilter {
4016                program: 0,
4017                start: 0,
4018                size: 1,
4019                regs: [0; 7],
4020                global_data: Vec::new(),
4021            });
4022        let mut reused_program = BitWriter::default();
4023        reused_program.write_encoded_u32(0);
4024        assert_eq!(
4025            decoder.parse_vm_code(0, reused_program.finish()),
4026            Err(Error::InvalidData("RAR 2.9 VM filter limit exceeded"))
4027        );
4028    }
4029
4030    #[test]
4031    fn itanium_filter_round_trips_with_high_file_offset() {
4032        let mut data = vec![0u8; 64];
4033        for (index, byte) in data.iter_mut().enumerate() {
4034            *byte = index as u8;
4035        }
4036        data[0] = 0;
4037        data[7] = 5 << 3;
4038        let original = data.clone();
4039
4040        itanium_encode(&mut data, u32::MAX);
4041        itanium_decode(&mut data, u32::MAX);
4042
4043        assert_eq!(data, original);
4044    }
4045
4046    fn expected_text() -> Vec<u8> {
4047        "Hello, RAR 3.x fixture world.\n".repeat(80).into_bytes()
4048    }
4049}