Skip to main content

rars_codec/
rar29.rs

1use crate::filters::{self, DeltaErrorMessages, FilterOp};
2use crate::huffman;
3use crate::ppmd::{PpmdByteReader, PpmdDecoder, PpmdEncoder};
4use crate::rarvm;
5use crate::{Error, Result};
6use rars_crc32::crc32;
7use std::io::{Read, Write};
8use std::ops::Range;
9
10const MAIN_COUNT: usize = 299;
11const OFFSET_COUNT: usize = 60;
12const LOW_OFFSET_COUNT: usize = 17;
13const LENGTH_COUNT: usize = 28;
14const LEVEL_COUNT: usize = 20;
15const TABLE_COUNT: usize = MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT + LENGTH_COUNT;
16const MAX_HISTORY: usize = 4 * 1024 * 1024;
17const INPUT_CHUNK: usize = 64 * 1024;
18const STREAM_CHUNK: usize = 1024 * 1024;
19const MAX_VM_FILTER_BLOCK_SIZE: usize = 128 * 1024;
20// The standard AUDIO bytecode uses separate input/output regions inside RARVM
21// memory. Keep generated blocks below the overlap boundary accepted by period
22// decoders.
23const MAX_VM_DELTA_FILTER_BLOCK_SIZE: usize = 120_000;
24const MAX_VM_AUDIO_FILTER_BLOCK_SIZE: usize = 120_000;
25const MAX_VM_GLOBAL_DATA: usize = 0x2000;
26const MAX_VM_CODE_SIZE: usize = 64 * 1024;
27const MAX_VM_PROGRAMS: usize = 1024;
28const MAX_VM_FILTERS: usize = 1024;
29
30const LENGTH_BASES: [usize; LENGTH_COUNT] = [
31    0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
32    160, 192, 224,
33];
34const LENGTH_BITS: [u8; LENGTH_COUNT] = [
35    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
36];
37const OFFSET_BASES: [usize; OFFSET_COUNT] = [
38    0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
39    2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
40    262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
41    1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872,
42    3670016, 3932160,
43];
44const OFFSET_BITS: [u8; OFFSET_COUNT] = [
45    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
46    13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18,
47    18, 18, 18, 18, 18, 18, 18,
48];
49const SHORT_BASES: [usize; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
50const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
51const MAX_ENCODER_MATCH_OFFSET: usize = 1024 * 1024;
52const MAX_ENCODER_MATCH_LENGTH: usize = 258;
53const MATCH_HASH_BUCKETS: usize = 4096;
54const MAX_MATCH_CANDIDATES: usize = 256;
55const MAX_PPMD_MATCH_LENGTH: usize = 255;
56const MIN_PPMD_MATCH_LENGTH: usize = 32;
57const MAX_PPMD_REPEAT_LENGTH: usize = 259;
58
59// RAR 3.x standard filters are stored as RARVM bytecode in the compressed
60// stream. RAR15_40_FORMAT_SPECIFICATION.md §20 and FILTER_TRANSFORMS.md §9
61// define these blobs by byte length plus CRC32 fingerprint; keep the bytes
62// verbatim so writer output and reader recognition use the same wire identity.
63const RAR3_E8_FILTER_BYTECODE: &[u8] = &[
64    0x97, 0x1b, 0x01, 0x28, 0x07, 0x06, 0x98, 0x08, 0x00, 0x00, 0x00, 0xd1, 0x3a, 0x10, 0x15, 0x92,
65    0xec, 0x50, 0xcb, 0x99, 0x20, 0xb9, 0x25, 0xf0, 0x29, 0x19, 0x15, 0x53, 0x03, 0x12, 0xae, 0x51,
66    0x10, 0x35, 0x59, 0x2b, 0x60, 0x04, 0x15, 0x6d, 0x40, 0x66, 0xab, 0x02, 0x34, 0x49, 0x04, 0x36,
67    0x02, 0x52, 0x3e, 0x97, 0x00,
68];
69const RAR3_E8E9_FILTER_BYTECODE: &[u8] = &[
70    0x84, 0x1b, 0x01, 0x28, 0x11, 0x10, 0x69, 0x80, 0x80, 0x00, 0x00, 0x0d, 0x13, 0xa1, 0x01, 0xc6,
71    0x89, 0xd2, 0x80, 0xac, 0x97, 0x62, 0x85, 0x5c, 0xc9, 0x05, 0xc9, 0x2f, 0x81, 0x48, 0xc8, 0xaa,
72    0x98, 0x18, 0x95, 0x72, 0x88, 0x81, 0xaa, 0xc9, 0x5b, 0x00, 0x20, 0xab, 0x6a, 0x03, 0x35, 0x58,
73    0x11, 0xa2, 0x48, 0x21, 0xb0, 0x12, 0x91, 0xf4, 0xb8,
74];
75const RAR3_DELTA_FILTER_BYTECODE: &[u8] = &[
76    0x2f, 0x01, 0x9a, 0x41, 0x80, 0xec, 0x27, 0x48, 0x2f, 0x09, 0x76, 0x6d, 0xd3, 0xea, 0x41, 0x5b,
77    0x59, 0x44, 0xe8, 0x17, 0x5c, 0xe1, 0x6c, 0x91, 0x4c, 0x4e, 0x3f, 0x77, 0x00,
78];
79const RAR3_ITANIUM_FILTER_BYTECODE: &[u8] = &[
80    0x46, 0x9e, 0x08, 0x08, 0x0c, 0x0c, 0x00, 0x00, 0x0e, 0x0e, 0x08, 0x08, 0x00, 0x00, 0x08, 0x08,
81    0x00, 0x00, 0x6c, 0x11, 0x5a, 0x04, 0xac, 0x0c, 0xc4, 0xcc, 0x5c, 0x08, 0x18, 0x46, 0x24, 0x08,
82    0xf9, 0xa0, 0x44, 0x25, 0x12, 0x12, 0x45, 0x85, 0x99, 0x0c, 0x14, 0x00, 0x26, 0x25, 0x58, 0x99,
83    0x90, 0x03, 0x38, 0x1a, 0x08, 0xdc, 0x02, 0x30, 0x0c, 0x4e, 0xd1, 0x1d, 0x89, 0xa1, 0xe2, 0xd0,
84    0x55, 0x11, 0x33, 0x60, 0x8c, 0x5a, 0x23, 0x06, 0xde, 0x06, 0x18, 0x00, 0x7f, 0xff, 0xfc, 0x4d,
85    0xcc, 0x19, 0x17, 0xb3, 0x06, 0xc4, 0x44, 0xb2, 0x32, 0x5a, 0x44, 0xc4, 0xa6, 0x01, 0xf4, 0x24,
86    0x88, 0x83, 0x38, 0xcc, 0xc4, 0x11, 0x09, 0x87, 0xa6, 0xe0, 0x46, 0x02, 0xb2, 0x24, 0x03, 0xe2,
87    0xa0, 0x32, 0x54, 0x83, 0x52, 0xc5, 0xb1, 0x70,
88];
89const RAR3_RGB_FILTER_BYTECODE: &[u8] = &[
90    0xc5, 0x01, 0x9a, 0x41, 0x95, 0xc9, 0xa6, 0x4d, 0xba, 0x4b, 0x14, 0x0a, 0xf4, 0x9b, 0x80, 0x4c,
91    0x00, 0x15, 0xa6, 0xa8, 0x07, 0x26, 0x2a, 0xc9, 0xc4, 0x8b, 0x86, 0x62, 0x32, 0x0f, 0x86, 0x64,
92    0x24, 0x06, 0x66, 0x71, 0x19, 0x98, 0xcc, 0x43, 0x33, 0x31, 0x99, 0x00, 0x66, 0x88, 0x33, 0x30,
93    0xcc, 0xd1, 0x0e, 0x98, 0x0b, 0x33, 0x34, 0x40, 0x0c, 0xd1, 0x46, 0x66, 0x19, 0x9a, 0x28, 0xcc,
94    0x49, 0x80, 0xb3, 0x33, 0x45, 0x00, 0xcd, 0x18, 0x66, 0x61, 0x99, 0xa3, 0x0c, 0xc8, 0x98, 0x0b,
95    0x33, 0x34, 0x60, 0x4c, 0xd1, 0x06, 0x68, 0xa5, 0x20, 0x62, 0x66, 0x88, 0x33, 0x46, 0x28, 0x05,
96    0x0f, 0x32, 0x0c, 0x4c, 0xd1, 0x46, 0x68, 0xc5, 0x00, 0x41, 0xe4, 0x8f, 0xc8, 0x85, 0x5e, 0x02,
97    0x7c, 0xc9, 0x26, 0x81, 0x83, 0xb0, 0x9d, 0xc2, 0xde, 0x9c, 0x78, 0xac, 0xd6, 0x68, 0xb4, 0x0e,
98    0x71, 0xdb, 0xb2, 0x49, 0x38, 0x6e, 0x02, 0x2a, 0x2c, 0x41, 0x2b, 0x10, 0x98, 0x82, 0x49, 0x03,
99    0x14, 0xf4, 0xe1, 0x97, 0x00,
100];
101const RAR3_AUDIO_FILTER_BYTECODE: &[u8] = &[
102    0x47, 0x01, 0x9a, 0x41, 0x95, 0xe5, 0x72, 0x0d, 0xc2, 0x64, 0x82, 0x74, 0x93, 0x24, 0xb1, 0x40,
103    0x06, 0xd8, 0x38, 0x44, 0x00, 0xa8, 0x01, 0x34, 0x11, 0xdc, 0xa1, 0xba, 0x01, 0x99, 0x0c, 0xc4,
104    0x03, 0x31, 0x19, 0xa4, 0x06, 0x66, 0x22, 0x60, 0x4d, 0x9a, 0x40, 0x0d, 0x66, 0x8e, 0x60, 0xd0,
105    0x30, 0x40, 0x18, 0x26, 0xc1, 0xc8, 0xf6, 0xe6, 0x26, 0x13, 0x78, 0x92, 0x08, 0xe8, 0x50, 0xbc,
106    0x5a, 0x07, 0xc6, 0xe9, 0xf5, 0x20, 0xa9, 0xa0, 0xed, 0x37, 0x33, 0x47, 0x39, 0x66, 0x90, 0x70,
107    0x19, 0xa3, 0x9b, 0xcf, 0x25, 0x83, 0x80, 0xc1, 0xbd, 0x30, 0x16, 0x6e, 0x23, 0x34, 0x93, 0x81,
108    0x16, 0x09, 0xb0, 0x50, 0x18, 0x3b, 0x4d, 0xc8, 0x4c, 0x05, 0x9b, 0x88, 0xc5, 0x28, 0xe0, 0x76,
109    0x93, 0x90, 0x98, 0x0b, 0x37, 0x11, 0x8a, 0x59, 0xc4, 0x80, 0x42, 0x48, 0x43, 0xa9, 0x47, 0xee,
110    0x43, 0x34, 0x60, 0x47, 0xd4, 0x4a, 0x0d, 0xbb, 0xd3, 0x59, 0xa4, 0x86, 0xee, 0x05, 0x09, 0x40,
111    0x26, 0xc9, 0x34, 0x24, 0x76, 0xa0, 0x30, 0x6a, 0x20, 0xea, 0x02, 0x20, 0x04, 0xa0, 0x41, 0x50,
112    0x9e, 0x50, 0x3f, 0xe6, 0xe1, 0x28, 0x94, 0x46, 0x01, 0xbd, 0x8b, 0x40, 0xf0, 0x68, 0x11, 0x36,
113    0xc9, 0xa1, 0x92, 0x38, 0x11, 0x41, 0x9c, 0xa8, 0x95, 0x10, 0xee, 0x50, 0x66, 0x2b, 0x00, 0x20,
114    0x95, 0x11, 0x04, 0x02, 0x62, 0xac, 0x66, 0x8c, 0x6a, 0xca, 0x26, 0x40, 0xb2, 0x67, 0x1b, 0x4b,
115    0x26, 0xcc, 0x64, 0x8a, 0x62, 0x71, 0xa2, 0xb8,
116];
117
118pub fn unpack29_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
119    let mut decoder = Unpack29::new();
120    decoder.decode_non_solid_member(input, output_size)
121}
122
123pub fn unpack29_encode_literals(input: &[u8]) -> Result<Vec<u8>> {
124    encode_member(input, &[])
125}
126
127pub fn unpack29_encode_literals_with_options(
128    input: &[u8],
129    options: EncodeOptions,
130) -> Result<Vec<u8>> {
131    encode_member_with_options(input, &[], options)
132}
133
134pub fn unpack29_encode_ppmd_literals(input: &[u8]) -> Result<Vec<u8>> {
135    encode_ppmd_member(input, false, &[])
136}
137
138pub fn unpack29_encode_ppmd(input: &[u8]) -> Result<Vec<u8>> {
139    encode_ppmd_member(input, true, &[])
140}
141
142pub fn unpack29_encode_ppmd_with_filter(input: &[u8], filter: Rar29FilterSpec) -> Result<Vec<u8>> {
143    encode_ppmd_filtered_member(input, filter, true)
144}
145
146pub fn unpack29_encode_ppmd_literals_with_filter(
147    input: &[u8],
148    filter: Rar29FilterSpec,
149) -> Result<Vec<u8>> {
150    encode_ppmd_filtered_member(input, filter, false)
151}
152
153fn encode_ppmd_filtered_member(
154    input: &[u8],
155    filter: Rar29FilterSpec,
156    lz_escapes: bool,
157) -> Result<Vec<u8>> {
158    let filters = split_large_filter(input.len(), filter)?;
159    let filtered = filtered_members(input, &filters)?;
160    let records = encoded_filter_records(&filtered.records)?;
161    encode_ppmd_member(&filtered.data, lz_escapes, &records)
162}
163
164fn filtered_members(input: &[u8], filters: &[Rar29FilterSpec]) -> Result<FilteredMembers> {
165    let mut data = input.to_vec();
166    let mut records = Vec::with_capacity(filters.len());
167    for filter in filters {
168        let filtered = filtered_member(input, filter)?;
169        let range = filtered.block_start..filtered.block_start + filtered.block_size;
170        data[range.clone()].copy_from_slice(&filtered.data[range]);
171        records.push(OwnedVmFilterRecord {
172            block_start: filtered.block_start,
173            block_size: filtered.block_size,
174            init_regs: filtered.init_regs,
175            code: filtered.code,
176        });
177    }
178    Ok(FilteredMembers { data, records })
179}
180
181struct FilteredMembers {
182    data: Vec<u8>,
183    records: Vec<OwnedVmFilterRecord>,
184}
185
186fn split_large_filter(input_len: usize, filter: Rar29FilterSpec) -> Result<Vec<Rar29FilterSpec>> {
187    let range = filter.range.clone().unwrap_or(0..input_len);
188    if range.start >= range.end || range.end > input_len {
189        return Err(Error::InvalidData("RAR 2.9 VM filter range is invalid"));
190    }
191
192    let chunk_size = match filter.kind {
193        Rar29FilterKind::Delta { channels } => {
194            if channels == 0 || channels > MAX_VM_DELTA_FILTER_BLOCK_SIZE {
195                return Err(Error::InvalidData(
196                    "RAR 2.9 VM filter channel count is invalid",
197                ));
198            }
199            MAX_VM_DELTA_FILTER_BLOCK_SIZE - (MAX_VM_DELTA_FILTER_BLOCK_SIZE % channels)
200        }
201        Rar29FilterKind::Audio { channels } => {
202            if channels == 0 || channels > MAX_VM_AUDIO_FILTER_BLOCK_SIZE {
203                return Err(Error::InvalidData(
204                    "RAR 2.9 VM filter channel count is invalid",
205                ));
206            }
207            MAX_VM_AUDIO_FILTER_BLOCK_SIZE - (MAX_VM_AUDIO_FILTER_BLOCK_SIZE % channels)
208        }
209        Rar29FilterKind::Rgb { width, .. } => {
210            if width == 0 || width > MAX_VM_FILTER_BLOCK_SIZE {
211                return Err(Error::InvalidData(
212                    "RAR 2.9 RGB filter scanline width is invalid",
213                ));
214            }
215            MAX_VM_FILTER_BLOCK_SIZE - (MAX_VM_FILTER_BLOCK_SIZE % width)
216        }
217        Rar29FilterKind::E8 | Rar29FilterKind::E8E9 | Rar29FilterKind::Itanium => {
218            MAX_VM_FILTER_BLOCK_SIZE
219        }
220    };
221    if range.len() <= chunk_size {
222        return Ok(vec![filter]);
223    }
224    if chunk_size == 0 {
225        return Err(Error::InvalidData(
226            "RAR 2.9 VM filter chunk size is invalid",
227        ));
228    }
229
230    let mut filters = Vec::new();
231    let mut start = range.start;
232    while start < range.end {
233        let end = (start + chunk_size).min(range.end);
234        filters.push(Rar29FilterSpec::range(filter.kind, start..end));
235        start = end;
236    }
237    Ok(filters)
238}
239
240struct OwnedVmFilterRecord {
241    block_start: usize,
242    block_size: usize,
243    init_regs: Vec<(usize, u32)>,
244    code: &'static [u8],
245}
246
247fn encode_ppmd_member(
248    input: &[u8],
249    lz_escapes: bool,
250    initial_filters: &[Vec<u8>],
251) -> Result<Vec<u8>> {
252    encode_ppmd_block(input, lz_escapes, initial_filters)
253}
254
255fn encode_ppmd_block(
256    input: &[u8],
257    lz_escapes: bool,
258    initial_filters: &[Vec<u8>],
259) -> Result<Vec<u8>> {
260    const PPMD_ORDER: usize = 8;
261    const PPMD_DICTIONARY_MB: u8 = 25;
262    const PPMD_ESC: u8 = 2;
263
264    let mut out = Vec::new();
265    out.push(0x80 | 0x20 | ((PPMD_ORDER as u8) - 1));
266    out.push(PPMD_DICTIONARY_MB - 1);
267    let mut encoder = PpmdEncoder::new(PPMD_ORDER, PPMD_ESC, usize::from(PPMD_DICTIONARY_MB))?;
268    for record in initial_filters {
269        encoder.encode_vm_filter_record(record)?;
270    }
271    for token in encode_ppmd_tokens(input, lz_escapes) {
272        match token {
273            PpmdEncodeToken::Literal(byte) => encoder.encode_literal(byte)?,
274            PpmdEncodeToken::RepeatOffsetOne { length } => {
275                encoder.encode_repeat_offset_one(length)?
276            }
277            PpmdEncodeToken::Match { offset, length } => encoder.encode_match(offset, length)?,
278        }
279    }
280    out.extend_from_slice(&encoder.finish()?);
281    Ok(out)
282}
283
284#[derive(Debug, Clone, Copy, PartialEq, Eq)]
285enum PpmdEncodeToken {
286    Literal(u8),
287    RepeatOffsetOne { length: usize },
288    Match { offset: usize, length: usize },
289}
290
291#[derive(Debug, Clone, PartialEq, Eq)]
292pub struct Rar29FilterSpec {
293    pub kind: Rar29FilterKind,
294    pub range: Option<Range<usize>>,
295}
296
297impl Rar29FilterSpec {
298    pub fn whole(kind: Rar29FilterKind) -> Self {
299        Self { kind, range: None }
300    }
301
302    pub fn range(kind: Rar29FilterKind, range: Range<usize>) -> Self {
303        Self {
304            kind,
305            range: Some(range),
306        }
307    }
308}
309
310#[derive(Debug, Clone, Copy, PartialEq, Eq)]
311pub enum Rar29FilterKind {
312    E8,
313    E8E9,
314    Delta { channels: usize },
315    Itanium,
316    Rgb { width: usize, pos_r: usize },
317    Audio { channels: usize },
318}
319
320struct FilteredMember {
321    data: Vec<u8>,
322    block_start: usize,
323    block_size: usize,
324    init_regs: Vec<(usize, u32)>,
325    code: &'static [u8],
326}
327
328fn filtered_member(input: &[u8], filter: &Rar29FilterSpec) -> Result<FilteredMember> {
329    let range = filter.range.clone().unwrap_or(0..input.len());
330    if range.start >= range.end || range.end > input.len() {
331        return Err(Error::InvalidData("RAR 2.9 VM filter range is invalid"));
332    }
333    let mut filtered = input.to_vec();
334    let (init_regs, code): (Vec<(usize, u32)>, &'static [u8]) = match filter.kind {
335        Rar29FilterKind::E8 => {
336            filters::encode_in_place(
337                FilterOp::E8,
338                &mut filtered[range.clone()],
339                range.start as u32,
340                rar29_delta_messages(),
341            )?;
342            (Vec::new(), RAR3_E8_FILTER_BYTECODE)
343        }
344        Rar29FilterKind::E8E9 => {
345            filters::encode_in_place(
346                FilterOp::E8E9,
347                &mut filtered[range.clone()],
348                range.start as u32,
349                rar29_delta_messages(),
350            )?;
351            (Vec::new(), RAR3_E8E9_FILTER_BYTECODE)
352        }
353        Rar29FilterKind::Delta { channels } => {
354            filters::encode_in_place(
355                FilterOp::Delta { channels },
356                &mut filtered[range.clone()],
357                0,
358                rar29_delta_messages(),
359            )?;
360            (vec![(0, channels as u32)], RAR3_DELTA_FILTER_BYTECODE)
361        }
362        Rar29FilterKind::Itanium => {
363            itanium_encode(&mut filtered[range.clone()], range.start as u32);
364            (Vec::new(), RAR3_ITANIUM_FILTER_BYTECODE)
365        }
366        Rar29FilterKind::Rgb { width, pos_r } => {
367            filtered[range.clone()].copy_from_slice(&rgb_encode(
368                &input[range.clone()],
369                width,
370                pos_r,
371            )?);
372            let init_regs = if pos_r == 0 {
373                vec![(0, width as u32 + 3)]
374            } else {
375                vec![(0, width as u32 + 3), (1, pos_r as u32)]
376            };
377            (init_regs, RAR3_RGB_FILTER_BYTECODE)
378        }
379        Rar29FilterKind::Audio { channels } => {
380            filtered[range.clone()]
381                .copy_from_slice(&audio_encode(&input[range.clone()], channels)?);
382            (vec![(0, channels as u32)], RAR3_AUDIO_FILTER_BYTECODE)
383        }
384    };
385    Ok(FilteredMember {
386        data: filtered,
387        block_start: range.start,
388        block_size: range.end - range.start,
389        init_regs,
390        code,
391    })
392}
393
394fn rar29_delta_messages() -> DeltaErrorMessages {
395    DeltaErrorMessages {
396        invalid_channels: "RAR 2.9 DELTA filter channel count is invalid",
397        zero_channels: "RAR 2.9 DELTA filter has zero channels",
398        truncated_source: "RAR 2.9 DELTA filter source is truncated",
399    }
400}
401
402#[derive(Debug, Clone, Copy, PartialEq, Eq)]
403#[non_exhaustive]
404pub struct EncodeOptions {
405    pub max_match_candidates: usize,
406    pub lazy_matching: bool,
407    pub lazy_lookahead: usize,
408    pub max_match_distance: usize,
409    pub block_size: Option<usize>,
410}
411
412impl EncodeOptions {
413    pub const fn new(max_match_candidates: usize) -> Self {
414        Self {
415            max_match_candidates,
416            lazy_matching: false,
417            lazy_lookahead: 1,
418            max_match_distance: MAX_ENCODER_MATCH_OFFSET,
419            block_size: None,
420        }
421    }
422
423    pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
424        self.lazy_matching = enabled;
425        self
426    }
427
428    pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
429        self.lazy_lookahead = bytes;
430        self
431    }
432
433    pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
434        self.max_match_distance = distance;
435        self
436    }
437
438    pub const fn with_block_size(mut self, bytes: usize) -> Self {
439        self.block_size = Some(bytes);
440        self
441    }
442}
443
444impl Default for EncodeOptions {
445    fn default() -> Self {
446        Self::new(MAX_MATCH_CANDIDATES)
447    }
448}
449
450#[derive(Debug, Clone, Default)]
451pub struct Unpack29Encoder {
452    history: Vec<u8>,
453    options: EncodeOptions,
454}
455
456impl Unpack29Encoder {
457    pub fn new() -> Self {
458        Self::default()
459    }
460
461    pub fn with_options(options: EncodeOptions) -> Self {
462        Self {
463            history: Vec::new(),
464            options,
465        }
466    }
467
468    pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
469        let packed = encode_member_with_options(input, &self.history, self.options)?;
470        self.remember(input);
471        Ok(packed)
472    }
473
474    pub fn encode_member_with_filter(
475        &mut self,
476        input: &[u8],
477        filter: Rar29FilterSpec,
478    ) -> Result<Vec<u8>> {
479        let filters = split_large_filter(input.len(), filter)?;
480        let filtered = filtered_members(input, &filters)?;
481        let records = encoded_filter_records(&filtered.records)?;
482        let packed = encode_member_with_initial_filters(
483            &filtered.data,
484            &self.history,
485            &records,
486            self.options,
487        )?;
488        self.remember(input);
489        Ok(packed)
490    }
491
492    pub fn encode_member_with_filters(
493        &mut self,
494        input: &[u8],
495        filters: &[Rar29FilterSpec],
496    ) -> Result<Vec<u8>> {
497        let mut split_filters = Vec::new();
498        for filter in filters {
499            split_filters.extend(split_large_filter(input.len(), filter.clone())?);
500        }
501        let filtered = filtered_members(input, &split_filters)?;
502        let records = encoded_filter_records(&filtered.records)?;
503        let packed = encode_member_with_initial_filters(
504            &filtered.data,
505            &self.history,
506            &records,
507            self.options,
508        )?;
509        self.remember(input);
510        Ok(packed)
511    }
512
513    fn remember(&mut self, input: &[u8]) {
514        self.history.extend_from_slice(input);
515        let keep_from = self.history.len().saturating_sub(MAX_HISTORY);
516        if keep_from != 0 {
517            self.history.drain(..keep_from);
518        }
519    }
520}
521
522fn encode_member(input: &[u8], history: &[u8]) -> Result<Vec<u8>> {
523    encode_member_with_options(input, history, EncodeOptions::default())
524}
525
526fn encode_member_with_options(
527    input: &[u8],
528    history: &[u8],
529    options: EncodeOptions,
530) -> Result<Vec<u8>> {
531    if let Some(block_size) = options.block_size.filter(|&size| size != 0) {
532        if input.len() > block_size {
533            return encode_member_blocks(input, history, options, block_size);
534        }
535    }
536    encode_member_inner(input, history, &[], options)
537}
538
539fn encode_member_blocks(
540    input: &[u8],
541    history: &[u8],
542    mut options: EncodeOptions,
543    block_size: usize,
544) -> Result<Vec<u8>> {
545    options.block_size = None;
546    let mut out = Vec::new();
547    let mut local_history = history[history.len().saturating_sub(MAX_HISTORY)..].to_vec();
548    for chunk in input.chunks(block_size) {
549        out.extend_from_slice(&encode_member_inner(chunk, &local_history, &[], options)?);
550        local_history.extend_from_slice(chunk);
551        let keep_from = local_history.len().saturating_sub(MAX_HISTORY);
552        if keep_from != 0 {
553            local_history.drain(..keep_from);
554        }
555    }
556    Ok(out)
557}
558
559fn encode_member_with_initial_filters(
560    input: &[u8],
561    history: &[u8],
562    filters: &[Vec<u8>],
563    options: EncodeOptions,
564) -> Result<Vec<u8>> {
565    encode_member_inner(input, history, filters, options)
566}
567
568fn encode_member_inner(
569    input: &[u8],
570    history: &[u8],
571    initial_filters: &[Vec<u8>],
572    options: EncodeOptions,
573) -> Result<Vec<u8>> {
574    let tokens = encode_tokens(input, history, options);
575    let mut main_frequencies = vec![0usize; MAIN_COUNT];
576    let mut offset_frequencies = vec![0usize; OFFSET_COUNT];
577    let mut low_offset_frequencies = vec![0usize; LOW_OFFSET_COUNT];
578    let mut length_frequencies = vec![0usize; LENGTH_COUNT];
579    main_frequencies[257] += initial_filters.len();
580    let mut match_state = EncoderMatchState::default();
581    for token in &tokens {
582        match *token {
583            EncodeToken::Literal(byte) => {
584                main_frequencies[byte as usize] += 1;
585            }
586            EncodeToken::Match { length, offset } => {
587                match match_state.encode_match(length, offset)? {
588                    EncodedMatch::LastLengthRepeat => {
589                        main_frequencies[258] += 1;
590                    }
591                    EncodedMatch::RepeatOffset {
592                        index, length_slot, ..
593                    } => {
594                        main_frequencies[259 + index] += 1;
595                        length_frequencies[length_slot] += 1;
596                    }
597                    EncodedMatch::Fresh {
598                        length_slot,
599                        offset_slot,
600                        offset_extra,
601                        ..
602                    } => {
603                        main_frequencies[271 + length_slot] += 1;
604                        offset_frequencies[offset_slot] += 1;
605                        if offset_slot > 9 {
606                            low_offset_frequencies[offset_extra & 0x0f] += 1;
607                        }
608                    }
609                }
610                match_state.remember(length, offset);
611            }
612        }
613    }
614    main_frequencies[256] += 1;
615
616    let mut table_lengths = [0u8; TABLE_COUNT];
617    if low_offset_frequencies
618        .iter()
619        .all(|&frequency| frequency == 0)
620    {
621        low_offset_frequencies[0] = 1;
622    }
623    let main_lengths = huffman::lengths_for_frequencies(&main_frequencies, 15);
624    let offset_lengths = huffman::lengths_for_frequencies(&offset_frequencies, 15);
625    let low_offset_lengths = huffman::lengths_for_frequencies(&low_offset_frequencies, 15);
626    let length_lengths = huffman::lengths_for_frequencies(&length_frequencies, 15);
627    table_lengths[..MAIN_COUNT].copy_from_slice(&main_lengths);
628    table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT].copy_from_slice(&offset_lengths);
629    table_lengths[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT]
630        .copy_from_slice(&low_offset_lengths);
631    table_lengths[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..].copy_from_slice(&length_lengths);
632
633    let level_tokens = encode_table_level_tokens(&table_lengths);
634    let level_lengths = level_code_lengths(&level_tokens);
635    let level_codes = canonical_codes(&level_lengths)?;
636    let main_codes = canonical_codes(&table_lengths[..MAIN_COUNT])?;
637
638    let mut bits = BitWriter::default();
639    bits.write_bit(false); // LZ block.
640    bits.write_bit(false); // do not keep previous tables.
641    for &len in &level_lengths {
642        bits.write_bits(len as u32, 4);
643    }
644    for token in level_tokens {
645        let code = level_codes[token.symbol].ok_or(Error::InvalidData(
646            "RAR 2.9 encoder missing level Huffman code",
647        ))?;
648        bits.write_bits(code.code as u32, code.len);
649        if token.extra_bits != 0 {
650            bits.write_bits(token.extra_value as u32, token.extra_bits);
651        }
652    }
653    let offset_codes = canonical_codes(&table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
654    let low_offset_codes = canonical_codes(
655        &table_lengths[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT],
656    )?;
657    let length_codes =
658        canonical_codes(&table_lengths[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..])?;
659    for filter in initial_filters {
660        let code = main_codes[257].ok_or(Error::InvalidData(
661            "RAR 2.9 encoder missing VM filter Huffman code",
662        ))?;
663        bits.write_bits(code.code as u32, code.len);
664        for &byte in filter {
665            bits.write_bits(u32::from(byte), 8);
666        }
667    }
668    let mut match_state = EncoderMatchState::default();
669    for token in tokens {
670        match token {
671            EncodeToken::Literal(byte) => {
672                let code = main_codes[byte as usize].ok_or(Error::InvalidData(
673                    "RAR 2.9 encoder missing literal Huffman code",
674                ))?;
675                bits.write_bits(code.code as u32, code.len);
676            }
677            EncodeToken::Match { length, offset } => {
678                match match_state.encode_match(length, offset)? {
679                    EncodedMatch::LastLengthRepeat => {
680                        let code = main_codes[258].ok_or(Error::InvalidData(
681                            "RAR 2.9 encoder missing last-length repeat Huffman code",
682                        ))?;
683                        bits.write_bits(code.code as u32, code.len);
684                    }
685                    EncodedMatch::RepeatOffset {
686                        index,
687                        length_slot,
688                        length_extra,
689                    } => {
690                        let code = main_codes[259 + index].ok_or(Error::InvalidData(
691                            "RAR 2.9 encoder missing repeat-offset Huffman code",
692                        ))?;
693                        bits.write_bits(code.code as u32, code.len);
694                        let length_code = length_codes[length_slot].ok_or(Error::InvalidData(
695                            "RAR 2.9 encoder missing repeat length Huffman code",
696                        ))?;
697                        bits.write_bits(length_code.code as u32, length_code.len);
698                        if LENGTH_BITS[length_slot] != 0 {
699                            bits.write_bits(length_extra as u32, LENGTH_BITS[length_slot]);
700                        }
701                    }
702                    EncodedMatch::Fresh {
703                        length_slot,
704                        length_extra,
705                        offset_slot,
706                        offset_extra,
707                    } => {
708                        let code = main_codes[271 + length_slot].ok_or(Error::InvalidData(
709                            "RAR 2.9 encoder missing match Huffman code",
710                        ))?;
711                        bits.write_bits(code.code as u32, code.len);
712                        if LENGTH_BITS[length_slot] != 0 {
713                            bits.write_bits(length_extra as u32, LENGTH_BITS[length_slot]);
714                        }
715                        let offset = offset_codes[offset_slot].ok_or(Error::InvalidData(
716                            "RAR 2.9 encoder missing offset Huffman code",
717                        ))?;
718                        bits.write_bits(offset.code as u32, offset.len);
719                        if offset_slot > 9 {
720                            let offset_bits = OFFSET_BITS[offset_slot];
721                            if offset_bits > 4 {
722                                bits.write_bits((offset_extra >> 4) as u32, offset_bits - 4);
723                            }
724                            let low_offset =
725                                low_offset_codes[offset_extra & 0x0f].ok_or(Error::InvalidData(
726                                    "RAR 2.9 encoder missing low-offset Huffman code",
727                                ))?;
728                            bits.write_bits(low_offset.code as u32, low_offset.len);
729                        } else if OFFSET_BITS[offset_slot] != 0 {
730                            bits.write_bits(offset_extra as u32, OFFSET_BITS[offset_slot]);
731                        }
732                    }
733                }
734                match_state.remember(length, offset);
735            }
736        }
737    }
738    let end = main_codes[256].ok_or(Error::InvalidData(
739        "RAR 2.9 encoder missing end-of-block Huffman code",
740    ))?;
741    bits.write_bits(end.code as u32, end.len);
742    bits.write_bit(true); // end member, no following table.
743    Ok(bits.finish())
744}
745
746fn encoded_filter_records(filters: &[OwnedVmFilterRecord]) -> Result<Vec<Vec<u8>>> {
747    let mut programs: Vec<&'static [u8]> = Vec::new();
748    let mut records = Vec::with_capacity(filters.len());
749    for filter in filters {
750        let existing = (filter.code != RAR3_AUDIO_FILTER_BYTECODE)
751            .then(|| programs.iter().position(|&code| code == filter.code))
752            .flatten();
753        let (program_selector, include_code) = match existing {
754            Some(index) => (
755                u32::try_from(index + 1)
756                    .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?,
757                false,
758            ),
759            None => {
760                let selector = if programs.is_empty() {
761                    0
762                } else {
763                    u32::try_from(programs.len() + 1)
764                        .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?
765                };
766                programs.push(filter.code);
767                (selector, true)
768            }
769        };
770        records.push(encode_vm_filter_record_inner(
771            VmFilterRecord {
772                block_start: filter.block_start,
773                block_size: filter.block_size,
774                init_regs: &filter.init_regs,
775                code: filter.code,
776            },
777            program_selector,
778            include_code,
779        )?);
780    }
781    Ok(records)
782}
783
784#[derive(Debug, Clone, Copy)]
785struct VmFilterRecord<'a> {
786    block_start: usize,
787    block_size: usize,
788    init_regs: &'a [(usize, u32)],
789    code: &'a [u8],
790}
791
792fn encode_vm_filter_record_inner(
793    record: VmFilterRecord<'_>,
794    program_selector: u32,
795    include_code: bool,
796) -> Result<Vec<u8>> {
797    if record.block_size == 0 {
798        return Err(Error::InvalidData("RAR 2.9 VM filter block is empty"));
799    }
800    if include_code && record.code.is_empty() {
801        return Err(Error::InvalidData("RAR 2.9 VM filter bytecode is empty"));
802    }
803
804    let mut body = BitWriter::default();
805    body.write_encoded_u32(program_selector);
806    body.write_encoded_u32(
807        u32::try_from(record.block_start)
808            .map_err(|_| Error::InvalidData("RAR 2.9 VM block start overflows"))?,
809    );
810    body.write_encoded_u32(
811        u32::try_from(record.block_size)
812            .map_err(|_| Error::InvalidData("RAR 2.9 VM block size overflows"))?,
813    );
814    if !record.init_regs.is_empty() {
815        let mut mask = 0u32;
816        for &(index, _) in record.init_regs {
817            if index >= 7 {
818                return Err(Error::InvalidData(
819                    "RAR 2.9 VM init register index is invalid",
820                ));
821            }
822            mask |= 1 << index;
823        }
824        body.write_bits(mask, 7);
825        for index in 0..7 {
826            if let Some((_, value)) = record.init_regs.iter().find(|(reg, _)| *reg == index) {
827                body.write_encoded_u32(*value);
828            }
829        }
830    }
831    if include_code {
832        body.write_encoded_u32(
833            u32::try_from(record.code.len())
834                .map_err(|_| Error::InvalidData("RAR 2.9 VM code size overflows"))?,
835        );
836        for &byte in record.code {
837            body.write_bits(u32::from(byte), 8);
838        }
839    }
840    let body = body.finish();
841
842    let mut out = Vec::new();
843    let mut first = 0x80 | 0x20;
844    if !record.init_regs.is_empty() {
845        first |= 0x10;
846    }
847    match body.len() {
848        1..=6 => first |= (body.len() as u8) - 1,
849        7..=262 => {
850            first |= 6;
851            out.push((body.len() - 7) as u8);
852        }
853        263..=65535 => {
854            first |= 7;
855            out.extend_from_slice(&(body.len() as u16).to_be_bytes());
856        }
857        _ => return Err(Error::InvalidData("RAR 2.9 VM filter record is too large")),
858    }
859    out.insert(0, first);
860    out.extend_from_slice(&body);
861    Ok(out)
862}
863
864fn rgb_encode(data: &[u8], width: usize, pos_r: usize) -> Result<Vec<u8>> {
865    if data.len() < 3 || width == 0 || !width.is_multiple_of(3) || width > data.len() || pos_r > 2 {
866        return Err(Error::InvalidData(
867            "RAR 2.9 RGB filter parameters are invalid",
868        ));
869    }
870    let mut work = data.to_vec();
871    for i in (pos_r..work.len().saturating_sub(2)).step_by(3) {
872        let green = work[i + 1];
873        work[i] = work[i].wrapping_sub(green);
874        work[i + 2] = work[i + 2].wrapping_sub(green);
875    }
876
877    let mut out = Vec::with_capacity(data.len());
878    for channel in 0..3 {
879        let mut prev = 0u8;
880        let mut i = channel;
881        while i < work.len() {
882            let predicted = if i >= width + 3 {
883                rgb_predict(prev, work[i - width], work[i - width - 3])
884            } else {
885                prev
886            };
887            let byte = work[i];
888            out.push(predicted.wrapping_sub(byte));
889            prev = byte;
890            i += 3;
891        }
892    }
893    Ok(out)
894}
895
896fn audio_encode(data: &[u8], channels: usize) -> Result<Vec<u8>> {
897    if channels == 0 || channels > 32 {
898        return Err(Error::InvalidData(
899            "RAR 2.9 AUDIO filter channel count is invalid",
900        ));
901    }
902    let mut out = Vec::with_capacity(data.len());
903    for channel in 0..channels {
904        let mut prev_byte = 0u32;
905        let mut prev_delta = 0i32;
906        let mut d1 = 0i32;
907        let mut d2 = 0i32;
908        let mut k1 = 0i32;
909        let mut k2 = 0i32;
910        let mut k3 = 0i32;
911        let mut dif = [0u32; 7];
912        let mut byte_count = 0usize;
913        let mut i = channel;
914        while i < data.len() {
915            let d3 = d2;
916            d2 = prev_delta - d1;
917            d1 = prev_delta;
918            let predicted = ((8 * prev_byte as i32 + k1 * d1 + k2 * d2 + k3 * d3) >> 3) & 0xff;
919            let decoded = data[i];
920            let encoded = (predicted as u8).wrapping_sub(decoded);
921            out.push(encoded);
922            prev_delta = decoded.wrapping_sub(prev_byte as u8) as i8 as i32;
923            prev_byte = decoded as u32;
924            let d = (encoded as i8 as i32) << 3;
925            dif[0] += d.unsigned_abs();
926            dif[1] += (d - d1).unsigned_abs();
927            dif[2] += (d + d1).unsigned_abs();
928            dif[3] += (d - d2).unsigned_abs();
929            dif[4] += (d + d2).unsigned_abs();
930            dif[5] += (d - d3).unsigned_abs();
931            dif[6] += (d + d3).unsigned_abs();
932            if byte_count & 0x1f == 0 {
933                let mut min = dif[0];
934                let mut min_index = 0usize;
935                dif[0] = 0;
936                for (index, value) in dif.iter_mut().enumerate().skip(1) {
937                    if *value < min {
938                        min = *value;
939                        min_index = index;
940                    }
941                    *value = 0;
942                }
943                match min_index {
944                    1 if k1 >= -16 => k1 -= 1,
945                    2 if k1 < 16 => k1 += 1,
946                    3 if k2 >= -16 => k2 -= 1,
947                    4 if k2 < 16 => k2 += 1,
948                    5 if k3 >= -16 => k3 -= 1,
949                    6 if k3 < 16 => k3 += 1,
950                    _ => {}
951                }
952            }
953            byte_count += 1;
954            i += channels;
955        }
956    }
957    Ok(out)
958}
959
960fn itanium_encode(data: &mut [u8], file_offset: u32) {
961    if data.len() <= 21 {
962        return;
963    }
964    let base_offset = file_offset >> 4;
965    let block_count = (data.len() - 21).div_ceil(16);
966    for block in 0..block_count {
967        let pos = block * 16;
968        let file_offset = base_offset.wrapping_add(block as u32);
969        let mut mask = (0x334b_0000u32 >> (data[pos] & 0x1e)) & 3;
970        if mask != 0 {
971            mask += 1;
972            while mask <= 4 {
973                let p = pos + (mask as usize * 5 - 8);
974                if ((data[p + 3] >> mask) & 15) == 5 {
975                    let raw = u32::from_le_bytes([data[p], data[p + 1], data[p + 2], data[p + 3]]);
976                    let mut value = raw >> mask;
977                    value = value.wrapping_add(file_offset) & 0x000f_ffff;
978                    let raw = (raw & !(0x000f_ffff << mask)) | (value << mask);
979                    data[p..p + 4].copy_from_slice(&raw.to_le_bytes());
980                }
981                mask += 1;
982            }
983        }
984    }
985}
986
987#[derive(Debug, Clone, Copy)]
988enum EncodeToken {
989    Literal(u8),
990    Match { length: usize, offset: usize },
991}
992
993#[derive(Debug, Clone, Copy, Default)]
994struct EncoderMatchState {
995    old_offsets: [usize; 4],
996    last_offset: usize,
997    last_length: usize,
998}
999
1000#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1001enum EncodedMatch {
1002    LastLengthRepeat,
1003    RepeatOffset {
1004        index: usize,
1005        length_slot: usize,
1006        length_extra: usize,
1007    },
1008    Fresh {
1009        length_slot: usize,
1010        length_extra: usize,
1011        offset_slot: usize,
1012        offset_extra: usize,
1013    },
1014}
1015
1016impl EncoderMatchState {
1017    fn encode_match(&self, length: usize, offset: usize) -> Result<EncodedMatch> {
1018        if offset == self.last_offset && length == self.last_length && self.last_length != 0 {
1019            return Ok(EncodedMatch::LastLengthRepeat);
1020        }
1021        if let Some(index) = self
1022            .old_offsets
1023            .iter()
1024            .position(|&old_offset| old_offset == offset && old_offset != 0)
1025        {
1026            let (length_slot, length_extra) = length_slot_for_repeat_match(length)?;
1027            return Ok(EncodedMatch::RepeatOffset {
1028                index,
1029                length_slot,
1030                length_extra,
1031            });
1032        }
1033        let encoded_length =
1034            length
1035                .checked_sub(match_length_adjustment(offset))
1036                .ok_or(Error::InvalidData(
1037                    "RAR 2.9 adjusted match length underflows",
1038                ))?;
1039        let (length_slot, length_extra) = length_slot_for_match(encoded_length)?;
1040        let (offset_slot, offset_extra) = offset_slot_for_match(offset)?;
1041        Ok(EncodedMatch::Fresh {
1042            length_slot,
1043            length_extra,
1044            offset_slot,
1045            offset_extra,
1046        })
1047    }
1048
1049    fn remember(&mut self, length: usize, offset: usize) {
1050        if offset == self.last_offset && length == self.last_length && self.last_length != 0 {
1051            return;
1052        }
1053        if let Some(index) = self
1054            .old_offsets
1055            .iter()
1056            .position(|&old_offset| old_offset == offset)
1057        {
1058            self.old_offsets[..=index].rotate_right(1);
1059        } else {
1060            self.old_offsets.rotate_right(1);
1061            self.old_offsets[0] = offset;
1062        }
1063        self.last_offset = offset;
1064        self.last_length = length;
1065    }
1066}
1067
1068fn encode_tokens(input: &[u8], history: &[u8], options: EncodeOptions) -> Vec<EncodeToken> {
1069    let mut tokens = Vec::new();
1070    let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1071    let history = &history[history.len().saturating_sub(options.max_match_distance)..];
1072    let mut combined = Vec::with_capacity(history.len() + input.len());
1073    combined.extend_from_slice(history);
1074    combined.extend_from_slice(input);
1075    for history_pos in 0..history.len().saturating_sub(2) {
1076        insert_match_position(&combined, history_pos, &mut buckets);
1077    }
1078
1079    let mut pos = history.len();
1080    let end = combined.len();
1081    let mut state = EncoderMatchState::default();
1082    while pos < end {
1083        if let Some(candidate) = best_match(&combined, pos, end, &buckets, options, &state) {
1084            if should_lazy_emit_literal(&combined, pos, end, &buckets, options, &state, candidate) {
1085                tokens.push(EncodeToken::Literal(combined[pos]));
1086                insert_match_position(&combined, pos, &mut buckets);
1087                pos += 1;
1088                continue;
1089            }
1090            let MatchCandidate { length, offset, .. } = candidate;
1091            tokens.push(EncodeToken::Match { length, offset });
1092            state.remember(length, offset);
1093            for history_pos in pos..pos + length {
1094                insert_match_position(&combined, history_pos, &mut buckets);
1095            }
1096            pos += length;
1097        } else {
1098            tokens.push(EncodeToken::Literal(combined[pos]));
1099            insert_match_position(&combined, pos, &mut buckets);
1100            pos += 1;
1101        }
1102    }
1103    tokens
1104}
1105
1106fn should_lazy_emit_literal(
1107    input: &[u8],
1108    pos: usize,
1109    end: usize,
1110    buckets: &[Vec<usize>],
1111    options: EncodeOptions,
1112    state: &EncoderMatchState,
1113    current: MatchCandidate,
1114) -> bool {
1115    if !options.lazy_matching || pos + 1 >= end {
1116        return false;
1117    }
1118    let lookahead = options.lazy_lookahead.max(1);
1119    (1..=lookahead)
1120        .take_while(|offset| pos + offset < end)
1121        .any(|offset| {
1122            best_match(input, pos + offset, end, buckets, options, state).is_some_and(|next| {
1123                let skipped_literal_score = offset as isize * 8;
1124                next.score > current.score + skipped_literal_score
1125            })
1126        })
1127}
1128
1129#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1130struct MatchCandidate {
1131    length: usize,
1132    offset: usize,
1133    score: isize,
1134}
1135
1136fn encode_ppmd_tokens(input: &[u8], lz_escapes: bool) -> Vec<PpmdEncodeToken> {
1137    if !lz_escapes {
1138        return input
1139            .iter()
1140            .copied()
1141            .map(PpmdEncodeToken::Literal)
1142            .collect();
1143    }
1144
1145    let mut tokens = Vec::new();
1146    let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1147    let mut pos = 0usize;
1148    while pos < input.len() {
1149        if let Some(length) = ppmd_offset_one_repeat(input, pos) {
1150            tokens.push(PpmdEncodeToken::RepeatOffsetOne { length });
1151            for history_pos in pos..pos + length {
1152                insert_match_position(input, history_pos, &mut buckets);
1153            }
1154            pos += length;
1155            continue;
1156        }
1157
1158        if let Some((length, offset)) = best_ppmd_match(input, pos, &buckets) {
1159            tokens.push(PpmdEncodeToken::Match { offset, length });
1160            for history_pos in pos..pos + length {
1161                insert_match_position(input, history_pos, &mut buckets);
1162            }
1163            pos += length;
1164            continue;
1165        }
1166
1167        tokens.push(PpmdEncodeToken::Literal(input[pos]));
1168        insert_match_position(input, pos, &mut buckets);
1169        pos += 1;
1170    }
1171    tokens
1172}
1173
1174fn ppmd_offset_one_repeat(input: &[u8], pos: usize) -> Option<usize> {
1175    if pos == 0 || input[pos] != input[pos - 1] {
1176        return None;
1177    }
1178    let mut length = 0usize;
1179    while pos + length < input.len()
1180        && input[pos + length] == input[pos - 1]
1181        && length < MAX_PPMD_REPEAT_LENGTH
1182    {
1183        length += 1;
1184    }
1185    (length >= 4).then_some(length)
1186}
1187
1188fn best_ppmd_match(input: &[u8], pos: usize, buckets: &[Vec<usize>]) -> Option<(usize, usize)> {
1189    let max_offset = pos.min(0x1000001).min(MAX_ENCODER_MATCH_OFFSET);
1190    let max_length = (input.len() - pos).min(MAX_PPMD_MATCH_LENGTH);
1191    if max_offset < 2 || max_length < MIN_PPMD_MATCH_LENGTH || pos + 2 >= input.len() {
1192        return None;
1193    }
1194    let bucket = &buckets[match_hash(input, pos)];
1195    let mut best = None;
1196    let mut checked = 0usize;
1197    for &candidate in bucket.iter().rev() {
1198        if candidate >= pos {
1199            continue;
1200        }
1201        let offset = pos - candidate;
1202        if offset > max_offset {
1203            break;
1204        }
1205        if offset < 2 {
1206            continue;
1207        }
1208        checked += 1;
1209        let mut length = 0usize;
1210        while length < max_length && input[pos + length] == input[pos + length - offset] {
1211            length += 1;
1212        }
1213        if length >= MIN_PPMD_MATCH_LENGTH
1214            && best.is_none_or(|(best_length, best_offset)| {
1215                length > best_length || (length == best_length && offset < best_offset)
1216            })
1217        {
1218            best = Some((length, offset));
1219            if length == max_length {
1220                break;
1221            }
1222        }
1223        if checked >= MAX_MATCH_CANDIDATES {
1224            break;
1225        }
1226    }
1227    best
1228}
1229
1230fn best_match(
1231    input: &[u8],
1232    pos: usize,
1233    end: usize,
1234    buckets: &[Vec<usize>],
1235    options: EncodeOptions,
1236    state: &EncoderMatchState,
1237) -> Option<MatchCandidate> {
1238    let max_offset = pos.min(options.max_match_distance);
1239    let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
1240    if options.max_match_candidates == 0
1241        || max_offset == 0
1242        || max_length < 4
1243        || pos + 2 >= input.len()
1244    {
1245        return None;
1246    }
1247    let bucket = &buckets[match_hash(input, pos)];
1248    let mut best = None;
1249    let mut checked = 0usize;
1250    for offset in state.old_offsets {
1251        if offset == 0 || offset > max_offset {
1252            continue;
1253        }
1254        let length = match_length(input, pos, offset, max_length);
1255        consider_match_candidate(&mut best, state, length, offset);
1256    }
1257    for &candidate in bucket.iter().rev() {
1258        if candidate >= pos {
1259            continue;
1260        }
1261        let offset = pos - candidate;
1262        if offset > max_offset {
1263            break;
1264        }
1265        checked += 1;
1266        let length = match_length(input, pos, offset, max_length);
1267        consider_match_candidate(&mut best, state, length, offset);
1268        if best.is_some_and(|candidate| candidate.length == max_length) {
1269            break;
1270        }
1271        if checked >= options.max_match_candidates {
1272            break;
1273        }
1274    }
1275    best
1276}
1277
1278fn match_length(input: &[u8], pos: usize, offset: usize, max_length: usize) -> usize {
1279    let mut length = 0usize;
1280    while length < max_length && input[pos + length] == input[pos + length - offset] {
1281        length += 1;
1282    }
1283    length
1284}
1285
1286fn consider_match_candidate(
1287    best: &mut Option<MatchCandidate>,
1288    state: &EncoderMatchState,
1289    length: usize,
1290    offset: usize,
1291) {
1292    if length < 4 {
1293        return;
1294    }
1295    let Ok(cost) = estimated_match_cost(state, length, offset) else {
1296        return;
1297    };
1298    let score = (length as isize * 8) - cost as isize;
1299    let candidate = MatchCandidate {
1300        length,
1301        offset,
1302        score,
1303    };
1304    if best.is_none_or(|best| {
1305        candidate.score > best.score
1306            || (candidate.score == best.score
1307                && (candidate.length > best.length
1308                    || (candidate.length == best.length && candidate.offset < best.offset)))
1309    }) {
1310        *best = Some(candidate);
1311    }
1312}
1313
1314fn estimated_match_cost(state: &EncoderMatchState, length: usize, offset: usize) -> Result<usize> {
1315    match state.encode_match(length, offset)? {
1316        EncodedMatch::LastLengthRepeat => Ok(2),
1317        EncodedMatch::RepeatOffset { length_slot, .. } => {
1318            Ok(5 + usize::from(LENGTH_BITS[length_slot]))
1319        }
1320        EncodedMatch::Fresh {
1321            length_slot,
1322            offset_slot,
1323            ..
1324        } => {
1325            let low_offset_cost = usize::from(offset_slot > 9) * 4;
1326            Ok(8 + usize::from(LENGTH_BITS[length_slot])
1327                + usize::from(OFFSET_BITS[offset_slot])
1328                + low_offset_cost)
1329        }
1330    }
1331}
1332
1333fn match_length_adjustment(offset: usize) -> usize {
1334    usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
1335}
1336
1337fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
1338    if pos + 2 < input.len() {
1339        buckets[match_hash(input, pos)].push(pos);
1340    }
1341}
1342
1343fn match_hash(input: &[u8], pos: usize) -> usize {
1344    let value =
1345        ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1346    value & (MATCH_HASH_BUCKETS - 1)
1347}
1348
1349fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
1350    if length < 3 {
1351        return Err(Error::InvalidData("RAR 2.9 match length is too short"));
1352    }
1353    let adjusted = length - 3;
1354    for (slot, &base) in LENGTH_BASES.iter().enumerate() {
1355        let extra_bits = LENGTH_BITS[slot];
1356        let max = base
1357            + if extra_bits == 0 {
1358                0
1359            } else {
1360                (1usize << extra_bits) - 1
1361            };
1362        if adjusted >= base && adjusted <= max {
1363            return Ok((slot, adjusted - base));
1364        }
1365    }
1366    Err(Error::InvalidData("RAR 2.9 match length is too long"))
1367}
1368
1369fn length_slot_for_repeat_match(length: usize) -> Result<(usize, usize)> {
1370    if length < 2 {
1371        return Err(Error::InvalidData(
1372            "RAR 2.9 repeat match length is too short",
1373        ));
1374    }
1375    let adjusted = length - 2;
1376    for (slot, &base) in LENGTH_BASES.iter().enumerate() {
1377        let extra_bits = LENGTH_BITS[slot];
1378        let max = base
1379            + if extra_bits == 0 {
1380                0
1381            } else {
1382                (1usize << extra_bits) - 1
1383            };
1384        if adjusted >= base && adjusted <= max {
1385            return Ok((slot, adjusted - base));
1386        }
1387    }
1388    Err(Error::InvalidData(
1389        "RAR 2.9 repeat match length is too long",
1390    ))
1391}
1392
1393fn offset_slot_for_match(offset: usize) -> Result<(usize, usize)> {
1394    if offset == 0 {
1395        return Err(Error::InvalidData("RAR 2.9 match offset is zero"));
1396    }
1397    let adjusted = offset - 1;
1398    for (slot, &base) in OFFSET_BASES.iter().enumerate() {
1399        let extra_bits = OFFSET_BITS[slot];
1400        let max = base
1401            + if extra_bits == 0 {
1402                0
1403            } else {
1404                (1usize << extra_bits) - 1
1405            };
1406        if adjusted >= base && adjusted <= max {
1407            return Ok((slot, adjusted - base));
1408        }
1409    }
1410    Err(Error::InvalidData("RAR 2.9 match offset is too large"))
1411}
1412
1413#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1414struct LevelToken {
1415    symbol: usize,
1416    extra_bits: u8,
1417    extra_value: u8,
1418}
1419
1420impl LevelToken {
1421    const fn plain(symbol: usize) -> Self {
1422        Self {
1423            symbol,
1424            extra_bits: 0,
1425            extra_value: 0,
1426        }
1427    }
1428
1429    const fn repeat_previous_short(count: usize) -> Self {
1430        Self {
1431            symbol: 16,
1432            extra_bits: 3,
1433            extra_value: (count - 3) as u8,
1434        }
1435    }
1436
1437    const fn repeat_previous_long(count: usize) -> Self {
1438        Self {
1439            symbol: 17,
1440            extra_bits: 7,
1441            extra_value: (count - 11) as u8,
1442        }
1443    }
1444
1445    const fn zero_run_short(count: usize) -> Self {
1446        Self {
1447            symbol: 18,
1448            extra_bits: 3,
1449            extra_value: (count - 3) as u8,
1450        }
1451    }
1452
1453    const fn zero_run_long(count: usize) -> Self {
1454        Self {
1455            symbol: 19,
1456            extra_bits: 7,
1457            extra_value: (count - 11) as u8,
1458        }
1459    }
1460}
1461
1462fn encode_table_level_tokens(lengths: &[u8; TABLE_COUNT]) -> Vec<LevelToken> {
1463    encode_level_tokens(lengths)
1464}
1465
1466fn encode_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
1467    let mut tokens = Vec::new();
1468    let mut pos = 0usize;
1469    let mut previous = None;
1470    while pos < lengths.len() {
1471        let value = lengths[pos];
1472        let mut run = 1usize;
1473        while pos + run < lengths.len() && lengths[pos + run] == value {
1474            run += 1;
1475        }
1476
1477        if value == 0 {
1478            emit_zero_level_run(&mut tokens, run);
1479            previous = Some(0);
1480            pos += run;
1481            continue;
1482        }
1483
1484        if previous == Some(value) && run >= 3 {
1485            emit_repeat_level_run(&mut tokens, run);
1486            pos += run;
1487            continue;
1488        }
1489
1490        tokens.push(LevelToken::plain(value as usize));
1491        previous = Some(value);
1492        pos += 1;
1493    }
1494    tokens
1495}
1496
1497fn emit_repeat_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1498    while run != 0 {
1499        if run >= 11 {
1500            let mut chunk = run.min(138);
1501            if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1502                chunk -= 3;
1503            }
1504            tokens.push(LevelToken::repeat_previous_long(chunk));
1505            run -= chunk;
1506        } else if run >= 3 {
1507            let chunk = run.min(10);
1508            tokens.push(LevelToken::repeat_previous_short(chunk));
1509            run -= chunk;
1510        } else {
1511            break;
1512        }
1513    }
1514}
1515
1516fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1517    while run != 0 {
1518        if run >= 11 {
1519            let mut chunk = run.min(138);
1520            if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1521                chunk -= 3;
1522            }
1523            tokens.push(LevelToken::zero_run_long(chunk));
1524            run -= chunk;
1525        } else if run >= 3 {
1526            let chunk = run.min(10);
1527            tokens.push(LevelToken::zero_run_short(chunk));
1528            run -= chunk;
1529        } else {
1530            tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
1531            break;
1532        }
1533    }
1534}
1535
1536fn level_code_lengths(tokens: &[LevelToken]) -> [u8; LEVEL_COUNT] {
1537    let mut lengths = [0u8; LEVEL_COUNT];
1538    let mut used = [false; LEVEL_COUNT];
1539    for token in tokens {
1540        used[token.symbol] = true;
1541    }
1542    let used_count = used.iter().filter(|&&used| used).count();
1543    let len = huffman::bits_for_symbol_count(used_count);
1544    for (symbol, is_used) in used.into_iter().enumerate() {
1545        if is_used {
1546            lengths[symbol] = len;
1547        }
1548    }
1549    lengths
1550}
1551
1552#[derive(Debug, Clone, Copy)]
1553struct HuffmanCode {
1554    code: u16,
1555    len: u8,
1556}
1557
1558fn canonical_codes(lengths: &[u8]) -> Result<Vec<Option<HuffmanCode>>> {
1559    let mut count = [0u16; 16];
1560    for &len in lengths {
1561        if len > 15 {
1562            return Err(Error::InvalidData("RAR 2.9 Huffman length is too large"));
1563        }
1564        if len != 0 {
1565            count[len as usize] += 1;
1566        }
1567    }
1568    validate_huffman_counts(&count)?;
1569
1570    let mut next_code = [0u16; 16];
1571    let mut code = 0u16;
1572    for len in 1..=15 {
1573        code = (code + count[len - 1]) << 1;
1574        next_code[len] = code;
1575    }
1576
1577    let mut codes = vec![None; lengths.len()];
1578    for (symbol, &len) in lengths.iter().enumerate() {
1579        if len == 0 {
1580            continue;
1581        }
1582        let code = next_code[len as usize];
1583        next_code[len as usize] += 1;
1584        codes[symbol] = Some(HuffmanCode { code, len });
1585    }
1586    Ok(codes)
1587}
1588
1589#[derive(Debug, Clone)]
1590pub struct Unpack29 {
1591    bits: BitReader,
1592    levels: [u8; TABLE_COUNT],
1593    main: Huffman,
1594    offsets: Huffman,
1595    low_offsets: Huffman,
1596    lengths: Huffman,
1597    old_offsets: [usize; 4],
1598    last_offset: usize,
1599    last_length: usize,
1600    last_low_offset: usize,
1601    low_offset_repeats: usize,
1602    pending_match: Option<(usize, usize)>,
1603    in_lz_block: bool,
1604    block_mode: BlockMode,
1605    ppmd: PpmdDecoder,
1606    ppmd_esc: u8,
1607    filters: Vec<VmFilter>,
1608    programs: Vec<VmProgram>,
1609    last_filter: usize,
1610    base_offset: usize,
1611    output: Vec<u8>,
1612}
1613
1614#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1615enum BlockMode {
1616    Lz,
1617    Ppmd,
1618}
1619
1620#[derive(Debug, Clone)]
1621struct VmFilter {
1622    program: usize,
1623    start: usize,
1624    size: usize,
1625    regs: [u32; 7],
1626    global_data: Vec<u8>,
1627}
1628
1629#[derive(Debug, Clone)]
1630struct VmProgram {
1631    kind: VmProgramKind,
1632    block_size: usize,
1633    exec_count: u32,
1634    globals: Vec<u8>,
1635}
1636
1637#[derive(Debug, Clone)]
1638enum VmProgramKind {
1639    Standard(StandardFilter),
1640    Generic(rarvm::Program),
1641}
1642
1643#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1644enum StandardFilter {
1645    E8,
1646    E8E9,
1647    Itanium,
1648    Delta,
1649    Rgb,
1650    Audio,
1651}
1652
1653impl Unpack29 {
1654    pub fn new() -> Self {
1655        Self {
1656            bits: BitReader::new(),
1657            levels: [0; TABLE_COUNT],
1658            main: Huffman::empty(),
1659            offsets: Huffman::empty(),
1660            low_offsets: Huffman::empty(),
1661            lengths: Huffman::empty(),
1662            old_offsets: [0; 4],
1663            last_offset: 0,
1664            last_length: 0,
1665            last_low_offset: 0,
1666            low_offset_repeats: 0,
1667            pending_match: None,
1668            in_lz_block: false,
1669            block_mode: BlockMode::Lz,
1670            ppmd: PpmdDecoder::new(),
1671            ppmd_esc: 2,
1672            filters: Vec::new(),
1673            programs: Vec::new(),
1674            last_filter: 0,
1675            base_offset: 0,
1676            output: Vec::new(),
1677        }
1678    }
1679
1680    pub fn reset_non_solid(&mut self) {
1681        *self = Self::new();
1682    }
1683
1684    pub fn decode_non_solid_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1685        self.reset_non_solid();
1686        self.decode_member(input, output_size)
1687    }
1688
1689    pub fn decode_non_solid_member_to(
1690        &mut self,
1691        input: &[u8],
1692        output_size: usize,
1693        out: &mut impl Write,
1694    ) -> Result<()> {
1695        self.reset_non_solid();
1696        self.decode_member_to(input, output_size, out)
1697    }
1698
1699    pub fn decode_non_solid_member_from_reader(
1700        &mut self,
1701        input: &mut impl Read,
1702        output_size: usize,
1703        out: &mut impl Write,
1704    ) -> Result<()> {
1705        self.reset_non_solid();
1706        self.decode_member_from_reader(input, output_size, out)
1707    }
1708
1709    pub fn decode_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1710        let start = self.current_pos();
1711        let target = start
1712            .checked_add(output_size)
1713            .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1714        if !input.is_empty() {
1715            self.bits = BitReader::new();
1716        }
1717        self.bits.append(input);
1718        self.decode_until(target).map_err(|error| match error {
1719            Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1720            error => error,
1721        })?;
1722        self.finish_member().map_err(|error| match error {
1723            Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1724            error => error,
1725        })?;
1726        let out = self.filtered_range(start, target, start)?;
1727        self.trim_history(target, target);
1728        Ok(out)
1729    }
1730
1731    pub fn decode_member_to(
1732        &mut self,
1733        input: &[u8],
1734        output_size: usize,
1735        out: &mut impl Write,
1736    ) -> Result<()> {
1737        let start = self.current_pos();
1738        let final_target = start
1739            .checked_add(output_size)
1740            .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1741        if !input.is_empty() {
1742            self.bits = BitReader::new();
1743        }
1744        self.bits.append(input);
1745
1746        let mut flushed = start;
1747        let mut target = start.saturating_add(STREAM_CHUNK).min(final_target);
1748        while flushed < final_target {
1749            self.decode_until(target)?;
1750            let safe_end = self.safe_flush_end(flushed, target, final_target)?;
1751            if safe_end <= flushed {
1752                if target == final_target {
1753                    return Err(Error::InvalidData(
1754                        "RAR 2.9 VM filter extends beyond output",
1755                    ));
1756                }
1757                target = self
1758                    .current_pos()
1759                    .saturating_add(STREAM_CHUNK)
1760                    .min(final_target);
1761                continue;
1762            }
1763
1764            let decoded = self.filtered_range(flushed, safe_end, start)?;
1765            out.write_all(&decoded)
1766                .map_err(|_| Error::InvalidData("RAR 2.9 output write failed"))?;
1767            flushed = safe_end;
1768            self.trim_history(flushed, self.current_pos());
1769            target = self
1770                .current_pos()
1771                .saturating_add(STREAM_CHUNK)
1772                .min(final_target);
1773        }
1774        self.finish_member()?;
1775        Ok(())
1776    }
1777
1778    pub fn decode_member_from_reader(
1779        &mut self,
1780        input: &mut impl Read,
1781        output_size: usize,
1782        out: &mut impl Write,
1783    ) -> Result<()> {
1784        self.bits = BitReader::new();
1785        let start = self.current_pos();
1786        let final_target = start
1787            .checked_add(output_size)
1788            .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1789        let mut flushed = start;
1790        let mut target = start.saturating_add(STREAM_CHUNK).min(final_target);
1791        let mut input_done = false;
1792        let mut buffer = [0u8; INPUT_CHUNK];
1793
1794        while flushed < final_target {
1795            loop {
1796                let checkpoint = self.clone();
1797                match self.decode_until(target) {
1798                    Ok(()) => break,
1799                    Err(Error::NeedMoreInput) if !input_done => {
1800                        *self = checkpoint;
1801                        let read = input
1802                            .read(&mut buffer)
1803                            .map_err(|_| Error::InvalidData("RAR 2.9 input read failed"))?;
1804                        if read == 0 {
1805                            input_done = true;
1806                        } else {
1807                            self.bits.append(&buffer[..read]);
1808                        }
1809                    }
1810                    Err(Error::NeedMoreInput) => {
1811                        return Err(Error::InvalidData("RAR 2.9 bitstream is truncated"));
1812                    }
1813                    Err(error) => return Err(error),
1814                }
1815            }
1816
1817            let safe_end = self.safe_flush_end(flushed, target, final_target)?;
1818            if safe_end <= flushed {
1819                if target == final_target {
1820                    return Err(Error::InvalidData(
1821                        "RAR 2.9 VM filter extends beyond output",
1822                    ));
1823                }
1824                target = self
1825                    .current_pos()
1826                    .saturating_add(STREAM_CHUNK)
1827                    .min(final_target);
1828                continue;
1829            }
1830
1831            let decoded = self.filtered_range(flushed, safe_end, start)?;
1832            out.write_all(&decoded)
1833                .map_err(|_| Error::InvalidData("RAR 2.9 output write failed"))?;
1834            flushed = safe_end;
1835            self.trim_history(flushed, self.current_pos());
1836            target = self
1837                .current_pos()
1838                .saturating_add(STREAM_CHUNK)
1839                .min(final_target);
1840        }
1841        loop {
1842            let checkpoint = self.clone();
1843            match self.finish_member() {
1844                Ok(()) => break,
1845                Err(Error::NeedMoreInput) if !input_done => {
1846                    *self = checkpoint;
1847                    let read = input
1848                        .read(&mut buffer)
1849                        .map_err(|_| Error::InvalidData("RAR 2.9 input read failed"))?;
1850                    if read == 0 {
1851                        input_done = true;
1852                    } else {
1853                        self.bits.append(&buffer[..read]);
1854                    }
1855                }
1856                Err(Error::NeedMoreInput) => {
1857                    return Err(Error::InvalidData("RAR 2.9 bitstream is truncated"));
1858                }
1859                Err(error) => return Err(error),
1860            }
1861        }
1862        if self.block_mode == BlockMode::Ppmd {
1863            loop {
1864                let read = input
1865                    .read(&mut buffer)
1866                    .map_err(|_| Error::InvalidData("RAR 2.9 input read failed"))?;
1867                if read == 0 {
1868                    break;
1869                }
1870                self.bits.append(&buffer[..read]);
1871            }
1872        }
1873        Ok(())
1874    }
1875
1876    fn decode_until(&mut self, target: usize) -> Result<()> {
1877        while self.current_pos() < target {
1878            self.drain_pending_match(target)?;
1879            if self.current_pos() >= target {
1880                break;
1881            }
1882            if !self.in_lz_block {
1883                self.read_tables()?;
1884                self.in_lz_block = true;
1885            }
1886            match self.block_mode {
1887                BlockMode::Lz => self.decode_lz(target)?,
1888                BlockMode::Ppmd => self.decode_ppmd(target)?,
1889            }
1890        }
1891        Ok(())
1892    }
1893
1894    fn read_tables(&mut self) -> Result<()> {
1895        self.bits.align_byte();
1896        if self.bits.peek_bit()? != 0 {
1897            let first_byte = self.bits.read_bits(8)? as u8;
1898            self.ppmd
1899                .decode_init(first_byte, &mut self.bits, &mut self.ppmd_esc)?;
1900            self.block_mode = BlockMode::Ppmd;
1901            return Ok(());
1902        }
1903        self.bits.read_bit()?;
1904        self.block_mode = BlockMode::Lz;
1905        let keep_tables = self.bits.read_bit()? != 0;
1906        self.last_low_offset = 0;
1907        self.low_offset_repeats = 0;
1908        if !keep_tables {
1909            self.levels = [0; TABLE_COUNT];
1910        }
1911
1912        let level_lengths = Self::read_level_lengths(&mut self.bits)?;
1913        let level_decoder = Huffman::from_lengths(&level_lengths)?;
1914        let mut new_levels = [0u8; TABLE_COUNT];
1915        let mut pos = 0usize;
1916        while pos < TABLE_COUNT {
1917            let symbol = level_decoder.decode(&mut self.bits)?;
1918            match symbol {
1919                0..=15 => {
1920                    new_levels[pos] = (self.levels[pos].wrapping_add(symbol as u8)) & 0x0f;
1921                    pos += 1;
1922                }
1923                16 => {
1924                    if pos == 0 {
1925                        return Err(Error::InvalidData("RAR 2.9 table repeat at start"));
1926                    }
1927                    let count = 3 + self.bits.read_bits(3)? as usize;
1928                    let value = new_levels[pos - 1];
1929                    fill_levels(&mut new_levels, &mut pos, count, value)?;
1930                }
1931                17 => {
1932                    if pos == 0 {
1933                        return Err(Error::InvalidData("RAR 2.9 long table repeat at start"));
1934                    }
1935                    let count = 11 + self.bits.read_bits(7)? as usize;
1936                    let value = new_levels[pos - 1];
1937                    fill_levels(&mut new_levels, &mut pos, count, value)?;
1938                }
1939                18 => {
1940                    let count = 3 + self.bits.read_bits(3)? as usize;
1941                    fill_levels(&mut new_levels, &mut pos, count, 0)?;
1942                }
1943                19 => {
1944                    let count = 11 + self.bits.read_bits(7)? as usize;
1945                    fill_levels(&mut new_levels, &mut pos, count, 0)?;
1946                }
1947                _ => return Err(Error::InvalidData("RAR 2.9 invalid level symbol")),
1948            }
1949        }
1950
1951        self.levels = new_levels;
1952        self.main = Huffman::from_lengths(&self.levels[..MAIN_COUNT])?;
1953        self.offsets = Huffman::from_lengths(&self.levels[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
1954        self.low_offsets = Huffman::from_lengths(
1955            &self.levels[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT],
1956        )?;
1957        self.lengths =
1958            Huffman::from_lengths(&self.levels[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..])?;
1959        Ok(())
1960    }
1961
1962    fn read_level_lengths(bits: &mut BitReader) -> Result<[u8; LEVEL_COUNT]> {
1963        let mut lengths = [0u8; LEVEL_COUNT];
1964        let mut pos = 0usize;
1965        while pos < LEVEL_COUNT {
1966            let value = bits.read_bits(4)? as u8;
1967            if value == 15 {
1968                let zero_count = bits.read_bits(4)? as usize;
1969                if zero_count == 0 {
1970                    lengths[pos] = 15;
1971                    pos += 1;
1972                } else {
1973                    pos = pos.saturating_add(zero_count + 2).min(LEVEL_COUNT);
1974                }
1975            } else {
1976                lengths[pos] = value;
1977                pos += 1;
1978            }
1979        }
1980        Ok(lengths)
1981    }
1982
1983    fn decode_lz(&mut self, output_size: usize) -> Result<()> {
1984        while self.current_pos() < output_size {
1985            let symbol = self.main.decode(&mut self.bits)?;
1986            match symbol {
1987                0..=255 => self.output.push(symbol as u8),
1988                256 => {
1989                    self.read_end_of_block()?;
1990                    return Ok(());
1991                }
1992                257 => {
1993                    self.read_vm_code()?;
1994                }
1995                258 => {
1996                    if self.last_length != 0 {
1997                        self.copy_match(self.last_length, self.last_offset, output_size)?;
1998                    }
1999                }
2000                259..=262 => {
2001                    let index = symbol - 259;
2002                    let offset = self.old_offsets[index];
2003                    let length_slot = self.lengths.decode(&mut self.bits)?;
2004                    if length_slot >= LENGTH_COUNT {
2005                        return Err(Error::InvalidData("RAR 2.9 invalid repeat length slot"));
2006                    }
2007                    let mut length = LENGTH_BASES[length_slot] + 2;
2008                    if LENGTH_BITS[length_slot] != 0 {
2009                        length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
2010                    }
2011                    self.rotate_old_offset(index);
2012                    self.last_offset = offset;
2013                    self.last_length = length;
2014                    self.copy_match(length, offset, output_size)?;
2015                }
2016                263..=270 => {
2017                    let index = symbol - 263;
2018                    let mut offset = SHORT_BASES[index] + 1;
2019                    if SHORT_BITS[index] != 0 {
2020                        offset += self.bits.read_bits(SHORT_BITS[index])? as usize;
2021                    }
2022                    self.push_old_offset(offset);
2023                    self.last_offset = offset;
2024                    self.last_length = 2;
2025                    self.copy_match(2, offset, output_size)?;
2026                }
2027                271..=298 => {
2028                    let length_slot = symbol - 271;
2029                    let mut length = LENGTH_BASES[length_slot] + 3;
2030                    if LENGTH_BITS[length_slot] != 0 {
2031                        length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
2032                    }
2033                    let offset = self.read_offset()?;
2034                    if offset >= 0x2000 {
2035                        length += 1;
2036                    }
2037                    if offset >= 0x40000 {
2038                        length += 1;
2039                    }
2040                    self.push_old_offset(offset);
2041                    self.last_offset = offset;
2042                    self.last_length = length;
2043                    self.copy_match(length, offset, output_size)?;
2044                }
2045                _ => return Err(Error::InvalidData("RAR 2.9 invalid main symbol")),
2046            }
2047        }
2048        Ok(())
2049    }
2050
2051    fn decode_ppmd(&mut self, output_size: usize) -> Result<()> {
2052        while self.current_pos() < output_size {
2053            let Some(symbol) = self.ppmd.decode_symbol(&mut self.bits)? else {
2054                return Ok(());
2055            };
2056            if symbol != self.ppmd_esc {
2057                self.output.push(symbol);
2058                continue;
2059            }
2060
2061            let Some(next) = self.ppmd.decode_symbol(&mut self.bits)? else {
2062                return Ok(());
2063            };
2064            match next {
2065                0 => {
2066                    self.in_lz_block = false;
2067                    return Ok(());
2068                }
2069                1 | 6..=u8::MAX => self.output.push(self.ppmd_esc),
2070                2 => return Ok(()),
2071                3 => {
2072                    self.read_vm_code_ppmd()?;
2073                }
2074                4 => {
2075                    let mut offset = 0usize;
2076                    for _ in 0..3 {
2077                        offset = (offset << 8) | self.read_ppmd_required_byte()? as usize;
2078                    }
2079                    offset += 2;
2080                    let length = self.read_ppmd_required_byte()? as usize + 32;
2081                    self.copy_match(length, offset, output_size)?;
2082                }
2083                5 => {
2084                    let length = self.read_ppmd_required_byte()? as usize + 4;
2085                    self.copy_match(length, 1, output_size)?;
2086                }
2087            }
2088        }
2089        Ok(())
2090    }
2091
2092    fn read_ppmd_required_byte(&mut self) -> Result<u8> {
2093        self.ppmd
2094            .decode_symbol(&mut self.bits)?
2095            .ok_or(Error::InvalidData("RAR 2.9 PPMd stream ended early"))
2096    }
2097
2098    fn finish_ppmd_member(&mut self) -> Result<()> {
2099        if self.block_mode != BlockMode::Ppmd {
2100            return Ok(());
2101        }
2102        let Some(symbol) = self.ppmd.decode_symbol(&mut self.bits)? else {
2103            return Ok(());
2104        };
2105        if symbol != self.ppmd_esc {
2106            return Err(Error::InvalidData("RAR 2.9 PPMd member has trailing data"));
2107        }
2108        let Some(next) = self.ppmd.decode_symbol(&mut self.bits)? else {
2109            return Ok(());
2110        };
2111        match next {
2112            2 => {
2113                self.in_lz_block = false;
2114                Ok(())
2115            }
2116            0 => {
2117                self.in_lz_block = false;
2118                Ok(())
2119            }
2120            _ => Err(Error::InvalidData("RAR 2.9 PPMd member has trailing data")),
2121        }
2122    }
2123
2124    fn finish_member(&mut self) -> Result<()> {
2125        match self.block_mode {
2126            BlockMode::Lz => self.finish_lz_member(),
2127            BlockMode::Ppmd => self.finish_ppmd_member(),
2128        }
2129    }
2130
2131    fn finish_lz_member(&mut self) -> Result<()> {
2132        if !self.in_lz_block {
2133            return Ok(());
2134        }
2135        let symbol = self.main.decode(&mut self.bits)?;
2136        if symbol != 256 {
2137            return Err(Error::InvalidData("RAR 2.9 LZ member has trailing data"));
2138        }
2139        self.read_end_of_block()
2140    }
2141
2142    fn read_end_of_block(&mut self) -> Result<()> {
2143        let new_table = if self.bits.read_bit()? != 0 {
2144            true
2145        } else {
2146            self.bits.read_bit()? != 0
2147        };
2148        self.in_lz_block = !new_table;
2149        Ok(())
2150    }
2151
2152    fn read_offset(&mut self) -> Result<usize> {
2153        let slot = self.offsets.decode(&mut self.bits)?;
2154        if slot >= OFFSET_COUNT {
2155            return Err(Error::InvalidData("RAR 2.9 invalid offset slot"));
2156        }
2157        let mut offset = OFFSET_BASES[slot] + 1;
2158        let extra_bits = OFFSET_BITS[slot];
2159        if extra_bits != 0 {
2160            if slot > 9 {
2161                if extra_bits > 4 {
2162                    offset += (self.bits.read_bits(extra_bits - 4)? as usize) << 4;
2163                }
2164                if self.low_offset_repeats > 0 {
2165                    self.low_offset_repeats -= 1;
2166                    offset += self.last_low_offset;
2167                } else {
2168                    let low = self.low_offsets.decode(&mut self.bits)?;
2169                    if low == 16 {
2170                        self.low_offset_repeats = 15;
2171                        offset += self.last_low_offset;
2172                    } else if low < 16 {
2173                        self.last_low_offset = low;
2174                        offset += low;
2175                    } else {
2176                        return Err(Error::InvalidData("RAR 2.9 invalid low offset symbol"));
2177                    }
2178                }
2179            } else {
2180                offset += self.bits.read_bits(extra_bits)? as usize;
2181            }
2182        }
2183        Ok(offset)
2184    }
2185
2186    fn read_vm_code(&mut self) -> Result<()> {
2187        let first_byte = self.bits.read_bits(8)?;
2188        let mut len = (first_byte & 7) + 1;
2189        if len == 7 {
2190            len = self.bits.read_bits(8)? + 7;
2191        } else if len == 8 {
2192            len = self.bits.read_bits(16)?;
2193        }
2194        let mut data = Vec::with_capacity(len as usize);
2195        for _ in 0..len {
2196            data.push(self.bits.read_bits(8)? as u8);
2197        }
2198
2199        self.parse_vm_code(first_byte, data)
2200    }
2201
2202    fn read_vm_code_ppmd(&mut self) -> Result<()> {
2203        let first_byte = u32::from(self.read_ppmd_required_byte()?);
2204        let mut len = (first_byte & 7) + 1;
2205        if len == 7 {
2206            len = u32::from(self.read_ppmd_required_byte()?) + 7;
2207        } else if len == 8 {
2208            len = (u32::from(self.read_ppmd_required_byte()?) << 8)
2209                | u32::from(self.read_ppmd_required_byte()?);
2210        }
2211        let mut data = Vec::with_capacity(len as usize);
2212        for _ in 0..len {
2213            data.push(self.read_ppmd_required_byte()?);
2214        }
2215
2216        self.parse_vm_code(first_byte, data)
2217    }
2218
2219    fn parse_vm_code(&mut self, first_byte: u32, data: Vec<u8>) -> Result<()> {
2220        let mut vm = BitReader::from_bytes(&data);
2221        let program_index = if first_byte & 0x80 != 0 {
2222            let value = vm.read_encoded_u32()?;
2223            if value == 0 {
2224                self.filters.clear();
2225                self.programs.clear();
2226                0
2227            } else {
2228                usize::try_from(value - 1)
2229                    .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?
2230            }
2231        } else {
2232            self.last_filter
2233        };
2234        if program_index > self.programs.len() {
2235            return Err(Error::InvalidData("RAR 2.9 VM program index is invalid"));
2236        }
2237        self.last_filter = program_index;
2238        let new_program = program_index == self.programs.len();
2239
2240        let mut block_start = vm.read_encoded_u32()? as usize;
2241        if first_byte & 0x40 != 0 {
2242            block_start += 258;
2243        }
2244        block_start = self
2245            .current_pos()
2246            .checked_add(block_start)
2247            .ok_or(Error::InvalidData("RAR 2.9 VM block start overflows"))?;
2248
2249        let mut block_size = self
2250            .programs
2251            .get(program_index)
2252            .map(|program| program.block_size)
2253            .unwrap_or(0);
2254        if first_byte & 0x20 != 0 {
2255            block_size = vm.read_encoded_u32()? as usize;
2256        }
2257
2258        let mut regs = [0u32; 7];
2259        regs[3] = 0x3c000;
2260        regs[4] = block_size as u32;
2261        if let Some(program) = self.programs.get(program_index) {
2262            regs[5] = program.exec_count;
2263        }
2264        if first_byte & 0x10 != 0 {
2265            let mask = vm.read_bits(7)?;
2266            for (index, reg) in regs.iter_mut().enumerate() {
2267                if mask & (1 << index) != 0 {
2268                    *reg = vm.read_encoded_u32()?;
2269                }
2270            }
2271        }
2272
2273        if new_program {
2274            if self.programs.len() >= MAX_VM_PROGRAMS {
2275                return Err(Error::InvalidData("RAR 2.9 VM program limit exceeded"));
2276            }
2277            let code_size = vm.read_encoded_u32()? as usize;
2278            if code_size == 0 {
2279                return Err(Error::InvalidData("RAR 2.9 VM code is empty"));
2280            }
2281            if code_size > MAX_VM_CODE_SIZE {
2282                return Err(Error::InvalidData("RAR 2.9 VM code is too large"));
2283            }
2284            let mut code = Vec::with_capacity(code_size);
2285            for _ in 0..code_size {
2286                code.push(vm.read_bits(8)? as u8);
2287            }
2288            let kind = identify_standard_filter(&code)
2289                .map(VmProgramKind::Standard)
2290                .map_or_else(
2291                    || rarvm::Program::parse(&code).map(VmProgramKind::Generic),
2292                    Ok,
2293                )?;
2294            self.programs.push(VmProgram {
2295                kind,
2296                block_size,
2297                exec_count: 0,
2298                globals: Vec::new(),
2299            });
2300        } else if let Some(program) = self.programs.get_mut(program_index) {
2301            program.exec_count = program.exec_count.wrapping_add(1);
2302            program.block_size = block_size;
2303        }
2304
2305        let mut global_data = Vec::new();
2306        if first_byte & 0x08 != 0 {
2307            let data_size = vm.read_encoded_u32()? as usize;
2308            global_data.reserve(data_size.min(MAX_VM_GLOBAL_DATA));
2309            for _ in 0..data_size {
2310                let byte = vm.read_bits(8)? as u8;
2311                if global_data.len() < MAX_VM_GLOBAL_DATA {
2312                    global_data.push(byte);
2313                }
2314            }
2315        }
2316
2317        if self.filters.len() >= MAX_VM_FILTERS {
2318            return Err(Error::InvalidData("RAR 2.9 VM filter limit exceeded"));
2319        }
2320        self.filters.push(VmFilter {
2321            program: program_index,
2322            start: block_start,
2323            size: block_size,
2324            regs,
2325            global_data,
2326        });
2327        Ok(())
2328    }
2329
2330    fn filtered_range(&mut self, start: usize, end: usize, member_start: usize) -> Result<Vec<u8>> {
2331        let mut out = Vec::with_capacity(end - start);
2332        let mut pos = start;
2333        let filters: Vec<_> = self
2334            .filters
2335            .iter()
2336            .enumerate()
2337            .filter_map(|(index, filter)| {
2338                (filter.start >= start && filter.start + filter.size <= end).then_some(index)
2339            })
2340            .collect();
2341        for filter_index in filters {
2342            let (program_index, filter_start, filter_size, regs, global_data) = {
2343                let filter = self
2344                    .filters
2345                    .get(filter_index)
2346                    .ok_or(Error::InvalidData("RAR 2.9 VM filter is missing"))?;
2347                (
2348                    filter.program,
2349                    filter.start,
2350                    filter.size,
2351                    filter.regs,
2352                    filter.global_data.clone(),
2353                )
2354            };
2355            if filter_start < pos {
2356                continue;
2357            }
2358            out.extend_from_slice(self.raw_range(pos, filter_start)?);
2359            let mut block = self
2360                .raw_range(filter_start, filter_start + filter_size)?
2361                .to_vec();
2362            let file_offset = filter_start
2363                .checked_sub(member_start)
2364                .ok_or(Error::InvalidData("RAR 2.9 VM filter starts before file"))?
2365                as u32;
2366            let program = self
2367                .programs
2368                .get_mut(program_index)
2369                .ok_or(Error::InvalidData("RAR 2.9 VM program is missing"))?;
2370            match &program.kind {
2371                VmProgramKind::Standard(standard) => {
2372                    apply_standard_filter(*standard, &mut block, file_offset, &regs)?
2373                }
2374                VmProgramKind::Generic(generic) => {
2375                    let globals = if global_data.is_empty() {
2376                        program.globals.as_slice()
2377                    } else {
2378                        global_data.as_slice()
2379                    };
2380                    let result = generic.execute(rarvm::Invocation {
2381                        input: &block,
2382                        regs,
2383                        global_data: globals,
2384                        file_offset: file_offset as u64,
2385                        exec_count: program.exec_count,
2386                    })?;
2387                    program.globals = result.globals;
2388                    block = result.output;
2389                }
2390            }
2391            out.extend_from_slice(&block);
2392            pos = filter_start + filter_size;
2393        }
2394        out.extend_from_slice(self.raw_range(pos, end)?);
2395        Ok(out)
2396    }
2397
2398    fn safe_flush_end(&self, start: usize, end: usize, final_target: usize) -> Result<usize> {
2399        let current = self.current_pos();
2400        let mut safe_end = end;
2401        for filter in &self.filters {
2402            let filter_end = filter
2403                .start
2404                .checked_add(filter.size)
2405                .ok_or(Error::InvalidData("RAR 2.9 VM filter size overflows"))?;
2406            if filter.start >= safe_end || filter_end <= start {
2407                continue;
2408            }
2409            if filter_end > final_target {
2410                return Err(Error::InvalidData(
2411                    "RAR 2.9 VM filter extends beyond output",
2412                ));
2413            }
2414            if filter_end > current {
2415                safe_end = safe_end.min(filter.start);
2416            }
2417        }
2418        Ok(safe_end)
2419    }
2420
2421    fn copy_match(&mut self, length: usize, offset: usize, output_size: usize) -> Result<()> {
2422        // The bitstream normally encodes match distances as offset+1, so zero
2423        // is not emitted for fresh matches. Keep the legacy decoder boundary
2424        // tolerant here: a zero internal offset behaves as distance one.
2425        let offset = if offset == 0 { 1 } else { offset };
2426        let current = self.current_pos();
2427        if offset > current {
2428            return Err(Error::InvalidData("RAR 2.9 match distance is out of range"));
2429        }
2430        for index in 0..length {
2431            if self.current_pos() >= output_size {
2432                self.pending_match = Some((length - index, offset));
2433                break;
2434            }
2435            let src = self.current_pos() - offset;
2436            let byte = *self
2437                .raw_byte(src)
2438                .ok_or(Error::InvalidData("RAR 2.9 match distance is out of range"))?;
2439            self.output.push(byte);
2440        }
2441        Ok(())
2442    }
2443
2444    fn drain_pending_match(&mut self, output_size: usize) -> Result<()> {
2445        let Some((length, offset)) = self.pending_match.take() else {
2446            return Ok(());
2447        };
2448        self.copy_match(length, offset, output_size)
2449    }
2450
2451    fn push_old_offset(&mut self, offset: usize) {
2452        self.old_offsets[3] = self.old_offsets[2];
2453        self.old_offsets[2] = self.old_offsets[1];
2454        self.old_offsets[1] = self.old_offsets[0];
2455        self.old_offsets[0] = offset;
2456    }
2457
2458    fn rotate_old_offset(&mut self, index: usize) {
2459        let value = self.old_offsets[index];
2460        for i in (1..=index).rev() {
2461            self.old_offsets[i] = self.old_offsets[i - 1];
2462        }
2463        self.old_offsets[0] = value;
2464    }
2465
2466    fn current_pos(&self) -> usize {
2467        self.base_offset + self.output.len()
2468    }
2469
2470    fn raw_byte(&self, position: usize) -> Option<&u8> {
2471        self.output.get(position.checked_sub(self.base_offset)?)
2472    }
2473
2474    fn raw_range(&self, start: usize, end: usize) -> Result<&[u8]> {
2475        if start < self.base_offset || end < start {
2476            return Err(Error::InvalidData(
2477                "RAR 2.9 retained history is unavailable",
2478            ));
2479        }
2480        let rel_start = start - self.base_offset;
2481        let rel_end = end - self.base_offset;
2482        self.output
2483            .get(rel_start..rel_end)
2484            .ok_or(Error::InvalidData(
2485                "RAR 2.9 retained history is unavailable",
2486            ))
2487    }
2488
2489    fn trim_history(&mut self, flushed_pos: usize, current_pos: usize) {
2490        let keep_from = current_pos.saturating_sub(MAX_HISTORY);
2491        let keep_from = keep_from.min(flushed_pos);
2492        if keep_from <= self.base_offset {
2493            return;
2494        }
2495        let drain = keep_from - self.base_offset;
2496        self.output.drain(..drain);
2497        self.base_offset = keep_from;
2498        self.filters
2499            .retain(|filter| filter.start + filter.size > self.base_offset);
2500    }
2501}
2502
2503impl Default for Unpack29 {
2504    fn default() -> Self {
2505        Self::new()
2506    }
2507}
2508
2509fn fill_levels(levels: &mut [u8], pos: &mut usize, count: usize, value: u8) -> Result<()> {
2510    let end = pos
2511        .checked_add(count)
2512        .ok_or(Error::InvalidData("RAR 2.9 table run overflows"))?;
2513    let end = end.min(levels.len());
2514    for item in &mut levels[*pos..end] {
2515        *item = value;
2516    }
2517    *pos = end;
2518    Ok(())
2519}
2520
2521#[derive(Debug, Clone)]
2522struct Huffman {
2523    symbols: Vec<HuffmanSymbol>,
2524    first_code: [u16; 16],
2525    first_index: [usize; 16],
2526    counts: [u16; 16],
2527}
2528
2529#[derive(Debug, Clone)]
2530struct HuffmanSymbol {
2531    code: u16,
2532    len: u8,
2533    symbol: usize,
2534}
2535
2536impl Huffman {
2537    fn empty() -> Self {
2538        Self {
2539            symbols: Vec::new(),
2540            first_code: [0; 16],
2541            first_index: [0; 16],
2542            counts: [0; 16],
2543        }
2544    }
2545
2546    fn from_lengths(lengths: &[u8]) -> Result<Self> {
2547        let mut count = [0u16; 16];
2548        for &len in lengths {
2549            if len > 15 {
2550                return Err(Error::InvalidData("RAR 2.9 Huffman length is too large"));
2551            }
2552            if len != 0 {
2553                count[len as usize] += 1;
2554            }
2555        }
2556        if count.iter().all(|&value| value == 0) {
2557            return Ok(Self::empty());
2558        }
2559        validate_huffman_counts(&count)?;
2560
2561        let mut first_code = [0u16; 16];
2562        let mut next_code = [0u16; 16];
2563        let mut code = 0u16;
2564        for len in 1..=15 {
2565            code = (code + count[len - 1]) << 1;
2566            first_code[len] = code;
2567            next_code[len] = code;
2568        }
2569
2570        let mut first_index = [0usize; 16];
2571        let mut index = 0usize;
2572        for len in 1..=15 {
2573            first_index[len] = index;
2574            index += usize::from(count[len]);
2575        }
2576
2577        let mut symbols = Vec::new();
2578        for (symbol, &len) in lengths.iter().enumerate() {
2579            if len == 0 {
2580                continue;
2581            }
2582            let code = next_code[len as usize];
2583            next_code[len as usize] += 1;
2584            symbols.push(HuffmanSymbol { code, len, symbol });
2585        }
2586        symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
2587        Ok(Self {
2588            symbols,
2589            first_code,
2590            first_index,
2591            counts: count,
2592        })
2593    }
2594
2595    fn decode(&self, bits: &mut BitReader) -> Result<usize> {
2596        let mut code = 0u16;
2597        if self.symbols.is_empty() {
2598            return Err(Error::InvalidData("RAR 2.9 empty Huffman table"));
2599        }
2600        for len in 1..=15 {
2601            code = (code << 1) | bits.read_bit()? as u16;
2602            let count = self.counts[len];
2603            if count != 0 {
2604                let first = self.first_code[len];
2605                let offset = code.wrapping_sub(first);
2606                if offset < count {
2607                    let index = self.first_index[len] + usize::from(offset);
2608                    return Ok(self.symbols[index].symbol);
2609                }
2610            }
2611        }
2612        Err(Error::InvalidData("RAR 2.9 invalid Huffman code"))
2613    }
2614}
2615
2616fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
2617    let mut available = 1i32;
2618    for &len_count in count.iter().skip(1) {
2619        available = (available << 1) - i32::from(len_count);
2620        if available < 0 {
2621            return Err(Error::InvalidData("RAR 2.9 oversubscribed Huffman table"));
2622        }
2623    }
2624    Ok(())
2625}
2626
2627#[derive(Debug, Clone)]
2628struct BitReader {
2629    input: Vec<u8>,
2630    bit_pos: usize,
2631}
2632
2633impl BitReader {
2634    fn new() -> Self {
2635        Self {
2636            input: Vec::new(),
2637            bit_pos: 0,
2638        }
2639    }
2640
2641    fn from_bytes(input: &[u8]) -> Self {
2642        Self {
2643            input: input.to_vec(),
2644            bit_pos: 0,
2645        }
2646    }
2647
2648    fn append(&mut self, input: &[u8]) {
2649        self.compact();
2650        self.input.extend_from_slice(input);
2651    }
2652
2653    fn compact(&mut self) {
2654        let bytes = self.bit_pos / 8;
2655        if bytes == 0 {
2656            return;
2657        }
2658        self.input.drain(..bytes);
2659        self.bit_pos -= bytes * 8;
2660    }
2661
2662    fn align_byte(&mut self) {
2663        self.bit_pos = (self.bit_pos + 7) & !7;
2664    }
2665
2666    fn peek_bit(&self) -> Result<u8> {
2667        self.peek_bits(1).map(|value| value as u8)
2668    }
2669
2670    fn read_bit(&mut self) -> Result<u8> {
2671        self.read_bits(1).map(|value| value as u8)
2672    }
2673
2674    fn read_bits(&mut self, count: u8) -> Result<u32> {
2675        let value = self.peek_bits(count)?;
2676        self.bit_pos += count as usize;
2677        Ok(value)
2678    }
2679
2680    fn peek_bits(&self, count: u8) -> Result<u32> {
2681        if count > 24 {
2682            return Err(Error::InvalidData("RAR 2.9 bit read is too wide"));
2683        }
2684        let mut value = 0u32;
2685        for i in 0..count as usize {
2686            let bit_index = self.bit_pos + i;
2687            let byte = *self.input.get(bit_index / 8).ok_or(Error::NeedMoreInput)?;
2688            let bit = (byte >> (7 - (bit_index % 8))) & 1;
2689            value = (value << 1) | bit as u32;
2690        }
2691        Ok(value)
2692    }
2693
2694    fn read_encoded_u32(&mut self) -> Result<u32> {
2695        match self.read_bits(2)? {
2696            0 => self.read_bits(4),
2697            1 => {
2698                let high = self.read_bits(8)?;
2699                if high >= 16 {
2700                    Ok(high)
2701                } else {
2702                    Ok(0xffff_ff00 | (high << 4) | self.read_bits(4)?)
2703                }
2704            }
2705            2 => self.read_bits(16),
2706            _ => Ok((self.read_bits(16)? << 16) | self.read_bits(16)?),
2707        }
2708    }
2709}
2710
2711impl PpmdByteReader for BitReader {
2712    fn read_ppmd_byte(&mut self) -> Result<u8> {
2713        self.read_bits(8).map(|value| value as u8)
2714    }
2715}
2716
2717#[derive(Default)]
2718struct BitWriter {
2719    bytes: Vec<u8>,
2720    bit_pos: usize,
2721}
2722
2723impl BitWriter {
2724    fn write_bits(&mut self, value: u32, count: u8) {
2725        for shift in (0..count).rev() {
2726            self.write_bit(((value >> shift) & 1) != 0);
2727        }
2728    }
2729
2730    fn write_encoded_u32(&mut self, value: u32) {
2731        if value < 16 {
2732            self.write_bits(0, 2);
2733            self.write_bits(value, 4);
2734        } else if value < 256 {
2735            self.write_bits(1, 2);
2736            self.write_bits(value, 8);
2737        } else if value <= 0xffff {
2738            self.write_bits(2, 2);
2739            self.write_bits(value, 16);
2740        } else {
2741            self.write_bits(3, 2);
2742            self.write_bits(value >> 16, 16);
2743            self.write_bits(value & 0xffff, 16);
2744        }
2745    }
2746
2747    fn write_bit(&mut self, bit: bool) {
2748        if self.bit_pos.is_multiple_of(8) {
2749            self.bytes.push(0);
2750        }
2751        if bit {
2752            let shift = 7 - (self.bit_pos % 8);
2753            *self.bytes.last_mut().unwrap() |= 1 << shift;
2754        }
2755        self.bit_pos += 1;
2756    }
2757
2758    fn finish(self) -> Vec<u8> {
2759        self.bytes
2760    }
2761}
2762
2763fn identify_standard_filter(code: &[u8]) -> Option<StandardFilter> {
2764    if code.iter().fold(0u8, |acc, &byte| acc ^ byte) != 0 {
2765        return None;
2766    }
2767    match (code.len(), crc32(code)) {
2768        (53, 0xad57_6887) => Some(StandardFilter::E8),
2769        (57, 0x3cd7_e57e) => Some(StandardFilter::E8E9),
2770        (120, 0x3769_893f) => Some(StandardFilter::Itanium),
2771        (29, 0x0e06_077d) => Some(StandardFilter::Delta),
2772        (149, 0x1c2c_5dc8) => Some(StandardFilter::Rgb),
2773        (216, 0xbc85_e701) => Some(StandardFilter::Audio),
2774        _ => None,
2775    }
2776}
2777
2778fn apply_standard_filter(
2779    filter: StandardFilter,
2780    data: &mut Vec<u8>,
2781    file_offset: u32,
2782    regs: &[u32; 7],
2783) -> Result<()> {
2784    match filter {
2785        StandardFilter::E8 => {
2786            filters::decode_in_place(FilterOp::E8, data, file_offset, rar29_delta_messages())?
2787        }
2788        StandardFilter::E8E9 => {
2789            filters::decode_in_place(FilterOp::E8E9, data, file_offset, rar29_delta_messages())?
2790        }
2791        StandardFilter::Itanium => itanium_decode(data, file_offset),
2792        StandardFilter::Delta => {
2793            let channels = regs[0] as usize;
2794            if channels == 0 {
2795                return Err(Error::InvalidData("RAR 2.9 DELTA filter has zero channels"));
2796            }
2797            filters::decode_in_place(
2798                FilterOp::Delta { channels },
2799                data,
2800                0,
2801                rar29_delta_messages(),
2802            )?;
2803        }
2804        StandardFilter::Rgb => {
2805            if regs[0] < 3 || regs[1] > 2 {
2806                return Err(Error::InvalidData(
2807                    "RAR 2.9 RGB filter parameters are invalid",
2808                ));
2809            }
2810            let width = regs[0] as usize - 3;
2811            let pos_r = regs[1] as usize;
2812            *data = rgb_decode(data, width, pos_r)?;
2813        }
2814        StandardFilter::Audio => {
2815            let channels = regs[0] as usize;
2816            if channels == 0 {
2817                return Err(Error::InvalidData("RAR 2.9 AUDIO filter has zero channels"));
2818            }
2819            *data = audio_decode(data, channels)?;
2820        }
2821    }
2822    Ok(())
2823}
2824
2825fn itanium_decode(data: &mut [u8], file_offset: u32) {
2826    if data.len() <= 21 {
2827        return;
2828    }
2829    let base_offset = file_offset >> 4;
2830    // Each 16-byte Itanium bundle can inspect a 4-byte instruction field that
2831    // starts up to 13 bytes into the bundle. Keeping a 21-byte tail prevents
2832    // decoding a partial final bundle.
2833    let block_count = (data.len() - 21).div_ceil(16);
2834    for block in 0..block_count {
2835        let pos = block * 16;
2836        let file_offset = base_offset.wrapping_add(block as u32);
2837        let mut mask = (0x334b_0000u32 >> (data[pos] & 0x1e)) & 3;
2838        if mask != 0 {
2839            mask += 1;
2840            while mask <= 4 {
2841                let p = pos + (mask as usize * 5 - 8);
2842                if ((data[p + 3] >> mask) & 15) == 5 {
2843                    let raw = u32::from_le_bytes([data[p], data[p + 1], data[p + 2], data[p + 3]]);
2844                    let mut value = raw >> mask;
2845                    value = value.wrapping_sub(file_offset) & 0x000f_ffff;
2846                    let raw = (raw & !(0x000f_ffff << mask)) | (value << mask);
2847                    data[p..p + 4].copy_from_slice(&raw.to_le_bytes());
2848                }
2849                mask += 1;
2850            }
2851        }
2852    }
2853}
2854
2855fn rgb_decode(data: &[u8], width: usize, pos_r: usize) -> Result<Vec<u8>> {
2856    if data.len() < 3 || width == 0 || !width.is_multiple_of(3) || width > data.len() || pos_r > 2 {
2857        return Err(Error::InvalidData(
2858            "RAR 2.9 RGB filter parameters are invalid",
2859        ));
2860    }
2861    let mut out = vec![0u8; data.len()];
2862    let mut src = 0usize;
2863    for channel in 0..3 {
2864        let mut prev = 0u8;
2865        let mut i = channel;
2866        while i < data.len() {
2867            let predicted = if i >= width + 3 {
2868                rgb_predict(prev, out[i - width], out[i - width - 3])
2869            } else {
2870                prev
2871            };
2872            let encoded = *data
2873                .get(src)
2874                .ok_or(Error::InvalidData("RAR 2.9 RGB filter source is truncated"))?;
2875            prev = predicted.wrapping_sub(encoded);
2876            out[i] = prev;
2877            src += 1;
2878            i += 3;
2879        }
2880    }
2881    for i in (pos_r..data.len().saturating_sub(2)).step_by(3) {
2882        let green = out[i + 1];
2883        out[i] = out[i].wrapping_add(green);
2884        out[i + 2] = out[i + 2].wrapping_add(green);
2885    }
2886    Ok(out)
2887}
2888
2889fn rgb_predict(prev: u8, upper: u8, upper_left: u8) -> u8 {
2890    let predicted = i32::from(prev) + i32::from(upper) - i32::from(upper_left);
2891    let pa = (predicted - i32::from(prev)).abs();
2892    let pb = (predicted - i32::from(upper)).abs();
2893    let pc = (predicted - i32::from(upper_left)).abs();
2894    if pa <= pb && pa <= pc {
2895        prev
2896    } else if pb <= pc {
2897        upper
2898    } else {
2899        upper_left
2900    }
2901}
2902
2903fn audio_decode(data: &[u8], channels: usize) -> Result<Vec<u8>> {
2904    let mut out = vec![0u8; data.len()];
2905    let mut src = 0usize;
2906    for channel in 0..channels {
2907        let mut prev_byte = 0u32;
2908        let mut prev_delta = 0i32;
2909        let mut d1 = 0i32;
2910        let mut d2 = 0i32;
2911        let mut k1 = 0i32;
2912        let mut k2 = 0i32;
2913        let mut k3 = 0i32;
2914        let mut dif = [0u32; 7];
2915        let mut byte_count = 0usize;
2916        let mut i = channel;
2917        while i < data.len() {
2918            let d3 = d2;
2919            d2 = prev_delta - d1;
2920            d1 = prev_delta;
2921            let predicted = ((8 * prev_byte as i32 + k1 * d1 + k2 * d2 + k3 * d3) >> 3) & 0xff;
2922            let encoded = *data.get(src).ok_or(Error::InvalidData(
2923                "RAR 2.9 AUDIO filter source is truncated",
2924            ))?;
2925            src += 1;
2926            let decoded = (predicted as u8).wrapping_sub(encoded);
2927            out[i] = decoded;
2928            prev_delta = decoded.wrapping_sub(prev_byte as u8) as i8 as i32;
2929            prev_byte = decoded as u32;
2930            let d = (encoded as i8 as i32) << 3;
2931            dif[0] += d.unsigned_abs();
2932            dif[1] += (d - d1).unsigned_abs();
2933            dif[2] += (d + d1).unsigned_abs();
2934            dif[3] += (d - d2).unsigned_abs();
2935            dif[4] += (d + d2).unsigned_abs();
2936            dif[5] += (d - d3).unsigned_abs();
2937            dif[6] += (d + d3).unsigned_abs();
2938            if byte_count & 0x1f == 0 {
2939                let mut min = dif[0];
2940                let mut min_index = 0usize;
2941                dif[0] = 0;
2942                for (index, value) in dif.iter_mut().enumerate().skip(1) {
2943                    if *value < min {
2944                        min = *value;
2945                        min_index = index;
2946                    }
2947                    *value = 0;
2948                }
2949                match min_index {
2950                    1 if k1 >= -16 => k1 -= 1,
2951                    2 if k1 < 16 => k1 += 1,
2952                    3 if k2 >= -16 => k2 -= 1,
2953                    4 if k2 < 16 => k2 += 1,
2954                    5 if k3 >= -16 => k3 -= 1,
2955                    6 if k3 < 16 => k3 += 1,
2956                    _ => {}
2957                }
2958            }
2959            byte_count += 1;
2960            i += channels;
2961        }
2962    }
2963    Ok(out)
2964}
2965
2966#[cfg(test)]
2967mod tests {
2968    use crate::rarvm::{Instruction, Opcode, Operand, Program};
2969    use std::ops::Range;
2970
2971    use super::{
2972        apply_standard_filter, audio_encode, best_match, encode_ppmd_tokens,
2973        encode_table_level_tokens, encode_tokens, encoded_filter_records, insert_match_position,
2974        itanium_decode, itanium_encode, should_lazy_emit_literal, split_large_filter,
2975        unpack29_decode, unpack29_encode_literals, unpack29_encode_ppmd,
2976        unpack29_encode_ppmd_literals, unpack29_encode_ppmd_with_filter, BitReader, BitWriter,
2977        EncodeOptions, EncodeToken, EncoderMatchState, Error, Huffman, LevelToken,
2978        OwnedVmFilterRecord, PpmdEncodeToken, Rar29FilterKind, Rar29FilterSpec, Result,
2979        StandardFilter, Unpack29, Unpack29Encoder, VmFilter, VmProgram, VmProgramKind, MAIN_COUNT,
2980        MATCH_HASH_BUCKETS, MAX_MATCH_CANDIDATES, MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
2981        MAX_VM_DELTA_FILTER_BLOCK_SIZE, MAX_VM_FILTER_BLOCK_SIZE, RAR3_AUDIO_FILTER_BYTECODE,
2982        TABLE_COUNT,
2983    };
2984
2985    const COMPRESSED_TEXT: &[u8] = &[
2986        0x09, 0x10, 0x10, 0x93, 0xe4, 0xce, 0x7f, 0xa2, 0xba, 0x80, 0x46, 0x16, 0x82, 0x63, 0xe9,
2987        0x9a, 0x19, 0xe4, 0x10, 0xe0, 0x41, 0x3d, 0x16, 0xfc, 0x4d, 0xfa, 0x6f, 0xf2, 0x5c, 0xae,
2988        0x32, 0x86, 0xc9, 0x95, 0x9d, 0xf1, 0x04, 0xa4, 0xe8, 0x92, 0x8f, 0x12, 0xd7, 0xe7, 0xba,
2989        0xcb, 0x26, 0xf1, 0x97, 0xac, 0x7c, 0x5f, 0xfd, 0xa0, 0x00, 0x1f, 0x77, 0x50,
2990    ];
2991
2992    #[test]
2993    fn decodes_rar29_lz_member() {
2994        assert_eq!(
2995            unpack29_decode(COMPRESSED_TEXT, 2400).unwrap(),
2996            expected_text()
2997        );
2998    }
2999
3000    #[test]
3001    fn rejects_oversubscribed_rar29_huffman_tables() {
3002        assert!(matches!(
3003            Huffman::from_lengths(&[1, 1, 1]),
3004            Err(Error::InvalidData("RAR 2.9 oversubscribed Huffman table"))
3005        ));
3006    }
3007
3008    #[test]
3009    fn literal_encoder_round_trips_rar29_lz_blocks() {
3010        let input = b"literal-only RAR 2.9 baseline\nwith repeated text literal-only\n";
3011        let packed = unpack29_encode_literals(input).unwrap();
3012
3013        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3014    }
3015
3016    #[test]
3017    fn multi_block_lz_encoding_round_trips_large_repeated_documents() {
3018        let seed = b"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\n\
3019<HTML><BODY><P>RAR29 repeated document body with enough structured text to \
3020exercise LZSS block table selection.</P></BODY></HTML>\n"
3021            .repeat(96);
3022        let input = seed.repeat(180);
3023        let single =
3024            super::encode_member_with_options(&input, &[], EncodeOptions::new(96)).unwrap();
3025        let blocked = super::encode_member_with_options(
3026            &input,
3027            &[],
3028            EncodeOptions::new(96).with_block_size(1024 * 1024),
3029        )
3030        .unwrap();
3031
3032        assert_eq!(unpack29_decode(&single, input.len()).unwrap(), input);
3033        assert_eq!(unpack29_decode(&blocked, input.len()).unwrap(), input);
3034        assert!(blocked.len() < input.len());
3035    }
3036
3037    #[test]
3038    fn table_level_encoder_uses_rar29_run_symbols() {
3039        let mut lengths = [0u8; TABLE_COUNT];
3040        lengths[..4].fill(5);
3041        lengths[8..21].fill(0);
3042
3043        let tokens = encode_table_level_tokens(&lengths);
3044
3045        assert!(tokens.contains(&LevelToken::repeat_previous_short(3)));
3046        assert!(tokens.iter().any(|token| token.symbol == 19));
3047    }
3048
3049    #[test]
3050    fn lazy_lz_parser_defers_short_match_for_longer_next_match() {
3051        let input = b"abcdXbcdYYYYYYYYYYYYabcdYYYYYYYYYYYY";
3052        let greedy = encode_tokens(input, &[], EncodeOptions::new(MAX_MATCH_CANDIDATES));
3053        let lazy = encode_tokens(
3054            input,
3055            &[],
3056            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3057        );
3058        let packed = Unpack29Encoder::with_options(
3059            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3060        )
3061        .encode_member(input)
3062        .unwrap();
3063
3064        assert!(greedy
3065            .iter()
3066            .any(|token| matches!(token, EncodeToken::Match { length: 4, .. })));
3067        assert!(lazy
3068            .iter()
3069            .any(|token| matches!(token, EncodeToken::Match { length, .. } if *length > 8)));
3070        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3071    }
3072
3073    #[test]
3074    fn lazy_lz_parser_uses_match_cost_not_only_match_length() {
3075        let pos = 300_000usize;
3076        let mut input = vec![0u8; pos + 16];
3077        input[100..106].copy_from_slice(b"BCDEFG");
3078        input[106] = b'!';
3079        input[pos - 10..pos - 5].copy_from_slice(b"ABCD!");
3080        input[pos..pos + 7].copy_from_slice(b"ABCDEFG");
3081        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3082        insert_match_position(&input, 100, &mut buckets);
3083        insert_match_position(&input, pos - 10, &mut buckets);
3084
3085        let current = best_match(
3086            &input,
3087            pos,
3088            input.len(),
3089            &buckets,
3090            EncodeOptions::new(MAX_MATCH_CANDIDATES),
3091            &EncoderMatchState::default(),
3092        )
3093        .unwrap();
3094        let next = best_match(
3095            &input,
3096            pos + 1,
3097            input.len(),
3098            &buckets,
3099            EncodeOptions::new(MAX_MATCH_CANDIDATES),
3100            &EncoderMatchState::default(),
3101        )
3102        .unwrap();
3103
3104        assert_eq!(current.length, 4);
3105        assert_eq!(current.offset, 10);
3106        assert_eq!(next.length, 6);
3107        assert!(next.offset > 0x40000);
3108        assert!(!should_lazy_emit_literal(
3109            &input,
3110            pos,
3111            input.len(),
3112            &buckets,
3113            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3114            &EncoderMatchState::default(),
3115            current,
3116        ));
3117    }
3118
3119    #[test]
3120    fn lazy_lz_parser_uses_bounded_cost_lookahead() {
3121        let pos = 160;
3122        let mut input: Vec<u8> = (0..240u16)
3123            .map(|value| value.wrapping_mul(91) as u8)
3124            .collect();
3125        input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3126        input[pos - 80..pos - 64].copy_from_slice(b"CDEFGHIJKLMNOPQR");
3127        input[pos..pos + 18].copy_from_slice(b"ABCDEFGHIJKLMNOPQR");
3128
3129        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3130        for candidate in 0..pos {
3131            insert_match_position(&input, candidate, &mut buckets);
3132        }
3133        let current = best_match(
3134            &input,
3135            pos,
3136            input.len(),
3137            &buckets,
3138            EncodeOptions::default(),
3139            &EncoderMatchState::default(),
3140        )
3141        .unwrap();
3142
3143        assert_eq!((current.length, current.offset), (8, 30));
3144        assert!(!should_lazy_emit_literal(
3145            &input,
3146            pos,
3147            input.len(),
3148            &buckets,
3149            EncodeOptions::default()
3150                .with_lazy_matching(true)
3151                .with_lazy_lookahead(1),
3152            &EncoderMatchState::default(),
3153            current,
3154        ));
3155        assert!(should_lazy_emit_literal(
3156            &input,
3157            pos,
3158            input.len(),
3159            &buckets,
3160            EncodeOptions::default()
3161                .with_lazy_matching(true)
3162                .with_lazy_lookahead(2),
3163            &EncoderMatchState::default(),
3164            current,
3165        ));
3166    }
3167
3168    #[test]
3169    fn match_state_encodes_last_length_and_repeat_offset_symbols() {
3170        let mut state = EncoderMatchState::default();
3171        assert!(matches!(
3172            state.encode_match(12, 64).unwrap(),
3173            super::EncodedMatch::Fresh { .. }
3174        ));
3175        state.remember(12, 64);
3176
3177        assert_eq!(
3178            state.encode_match(12, 64).unwrap(),
3179            super::EncodedMatch::LastLengthRepeat
3180        );
3181        assert!(matches!(
3182            state.encode_match(9, 64).unwrap(),
3183            super::EncodedMatch::RepeatOffset { index: 0, .. }
3184        ));
3185    }
3186
3187    #[test]
3188    fn cost_aware_match_selection_prefers_repeat_offset_token() {
3189        let pos = 600usize;
3190        let mut input: Vec<u8> = (0..pos + 16)
3191            .map(|index| (index as u8).wrapping_mul(37))
3192            .collect();
3193        input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3194        input[pos - 512..pos - 503].copy_from_slice(b"ABCDEFGHI");
3195        input[pos..pos + 9].copy_from_slice(b"ABCDEFGHI");
3196        input[pos - 22] = 0x11;
3197        input[pos - 503] = 0x22;
3198        input[pos + 9] = 0x33;
3199        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3200        insert_match_position(&input, pos - 30, &mut buckets);
3201        insert_match_position(&input, pos - 512, &mut buckets);
3202
3203        let fresh = best_match(
3204            &input,
3205            pos,
3206            input.len(),
3207            &buckets,
3208            EncodeOptions::default(),
3209            &EncoderMatchState::default(),
3210        )
3211        .unwrap();
3212        let repeat = best_match(
3213            &input,
3214            pos,
3215            input.len(),
3216            &buckets,
3217            EncodeOptions::default(),
3218            &EncoderMatchState {
3219                old_offsets: [30, 0, 0, 0],
3220                last_offset: 0,
3221                last_length: 0,
3222            },
3223        )
3224        .unwrap();
3225
3226        assert_eq!((fresh.length, fresh.offset), (9, 512));
3227        assert_eq!((repeat.length, repeat.offset), (8, 30));
3228    }
3229
3230    #[test]
3231    fn match_finder_respects_configured_maximum_distance() {
3232        let phrase = b"rar29 bounded dictionary phrase";
3233        let mut input = Vec::new();
3234        input.extend_from_slice(phrase);
3235        input.extend(std::iter::repeat_n(0u8, 256 * 1024));
3236        input.extend_from_slice(phrase);
3237
3238        let bounded = encode_tokens(
3239            &input,
3240            &[],
3241            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_max_match_distance(128 * 1024),
3242        );
3243        let unbounded = encode_tokens(
3244            &input,
3245            &[],
3246            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_max_match_distance(1024 * 1024),
3247        );
3248
3249        assert!(!bounded.iter().any(
3250            |token| matches!(token, EncodeToken::Match { offset, .. } if *offset > 128 * 1024)
3251        ));
3252        assert!(unbounded.iter().any(
3253            |token| matches!(token, EncodeToken::Match { offset, .. } if *offset > 128 * 1024)
3254        ));
3255    }
3256
3257    #[test]
3258    fn lz_encoder_uses_weighted_rar29_huffman_tables() {
3259        let mut input = Vec::new();
3260        for byte in 0u8..120 {
3261            input.push(b'A');
3262            input.push(byte);
3263        }
3264        let packed = Unpack29Encoder::new().encode_member(&input).unwrap();
3265        let mut decoder = Unpack29::new();
3266        decoder.bits.append(&packed);
3267        decoder.read_tables().unwrap();
3268        let main_lengths = &decoder.levels[..MAIN_COUNT];
3269        let nonzero_lengths = main_lengths
3270            .iter()
3271            .copied()
3272            .filter(|&length| length != 0)
3273            .collect::<std::collections::BTreeSet<_>>();
3274
3275        assert!(nonzero_lengths.len() > 1);
3276        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3277    }
3278
3279    #[test]
3280    fn copy_match_treats_zero_offset_as_distance_one() {
3281        let mut decoder = Unpack29::new();
3282        decoder.output.push(b'Z');
3283
3284        decoder.copy_match(4, 0, 5).unwrap();
3285
3286        assert_eq!(decoder.output, b"ZZZZZ");
3287    }
3288
3289    #[test]
3290    fn ppmd_literal_encoder_round_trips_rar29_ppmd_blocks() {
3291        let mut input = b"rar29 ppmd literal text payload alpha beta gamma\n".repeat(64);
3292        input.extend_from_slice(&[2, 2, 2, b'e', b's', b'c']);
3293        let packed = unpack29_encode_ppmd_literals(&input).unwrap();
3294
3295        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3296        assert_ne!(packed.first().copied(), Some(0));
3297    }
3298
3299    #[test]
3300    fn ppmd_encoder_advertises_period_compatible_model_for_external_decoders() {
3301        let packed = unpack29_encode_ppmd(b"rar29 ppmd dictionary header").unwrap();
3302
3303        assert_eq!(packed[0], 0xa7);
3304        assert_eq!(packed[1], 24);
3305    }
3306
3307    #[test]
3308    fn ppmd_encoder_emits_offset_one_repeat_escapes() {
3309        let input = b"seed "
3310            .iter()
3311            .copied()
3312            .chain(std::iter::repeat_n(b'Z', 512))
3313            .collect::<Vec<_>>();
3314        let tokens = encode_ppmd_tokens(&input, true);
3315        let packed = unpack29_encode_ppmd(&input).unwrap();
3316
3317        assert!(tokens.iter().any(
3318            |token| matches!(token, PpmdEncodeToken::RepeatOffsetOne { length } if *length >= 4)
3319        ));
3320        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3321    }
3322
3323    #[test]
3324    fn ppmd_encoder_emits_distance_match_escapes() {
3325        let phrase = b"repeated phrase for rar29 ppmd distance escape 4 ";
3326        let mut input = Vec::new();
3327        input.extend_from_slice(phrase);
3328        input.extend_from_slice(b"middle bytes make the repeat distance greater than one ");
3329        input.extend_from_slice(phrase);
3330        input.extend_from_slice(phrase);
3331        input.extend_from_slice(b"tail");
3332        let tokens = encode_ppmd_tokens(&input, true);
3333        let packed = unpack29_encode_ppmd(&input).unwrap();
3334
3335        assert!(tokens
3336            .iter()
3337            .any(|token| matches!(token, PpmdEncodeToken::Match { offset, length } if *offset > 1 && *length >= 32)));
3338        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3339    }
3340
3341    #[test]
3342    fn ppmd_distance_match_lengths_stay_period_decoder_compatible() {
3343        let phrase = b"<html><body>RAR PPMd LZSS conversion phrase</body></html>\n";
3344        let mut input = Vec::new();
3345        for _ in 0..200 {
3346            input.extend_from_slice(phrase);
3347        }
3348        let tokens = encode_ppmd_tokens(&input, true);
3349
3350        assert!(tokens.iter().any(
3351            |token| matches!(token, PpmdEncodeToken::Match { offset, length } if *offset > 1 && *length >= 32)
3352        ));
3353        assert!(!tokens
3354            .iter()
3355            .any(|token| matches!(token, PpmdEncodeToken::Match { length, .. } if *length > 255)));
3356    }
3357
3358    #[test]
3359    fn ppmd_encoder_emits_embedded_vm_filter_escape() {
3360        let input = b"\xe8\0\0\0\0rar29 ppmd embedded e8 filter payload\n".repeat(16);
3361        let packed =
3362            unpack29_encode_ppmd_with_filter(&input, Rar29FilterSpec::whole(Rar29FilterKind::E8))
3363                .unwrap();
3364        let plain_ppmd = unpack29_encode_ppmd(&input).unwrap();
3365        let filtered_lz = Unpack29Encoder::new()
3366            .encode_member_with_filter(&input, Rar29FilterSpec::whole(Rar29FilterKind::E8))
3367            .unwrap();
3368
3369        assert!(packed.len() != plain_ppmd.len() || packed.len() != filtered_lz.len());
3370        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3371    }
3372
3373    fn encode_with_filter(input: &[u8], kind: Rar29FilterKind) -> Result<Vec<u8>> {
3374        Unpack29Encoder::new().encode_member_with_filter(input, Rar29FilterSpec::whole(kind))
3375    }
3376
3377    fn encode_with_filter_range(
3378        input: &[u8],
3379        kind: Rar29FilterKind,
3380        range: Range<usize>,
3381    ) -> Result<Vec<u8>> {
3382        Unpack29Encoder::new().encode_member_with_filter(input, Rar29FilterSpec::range(kind, range))
3383    }
3384
3385    fn encode_with_filter_ranges(
3386        input: &[u8],
3387        kind: Rar29FilterKind,
3388        ranges: Vec<Range<usize>>,
3389    ) -> Result<Vec<u8>> {
3390        let filters: Vec<_> = ranges
3391            .into_iter()
3392            .map(|range| Rar29FilterSpec::range(kind, range))
3393            .collect();
3394        Unpack29Encoder::new().encode_member_with_filters(input, &filters)
3395    }
3396
3397    #[test]
3398    fn encoder_emits_rar29_offset_one_matches_for_repeated_bytes() {
3399        let input = b"Z".repeat(1024);
3400        let packed = unpack29_encode_literals(&input).unwrap();
3401
3402        assert!(packed.len() < input.len() / 4);
3403        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3404    }
3405
3406    #[test]
3407    fn encoder_emits_rar29_dictionary_matches_for_repeated_sequences() {
3408        let input = b"abc123xyz-".repeat(128);
3409        let packed = unpack29_encode_literals(&input).unwrap();
3410
3411        assert!(packed.len() < input.len() / 2);
3412        assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3413    }
3414
3415    #[test]
3416    fn encoder_finds_rar29_matches_beyond_near_offsets() {
3417        let phrase = b"long-distance repeated phrase for rar29 low-offset coding.";
3418        let mut input = Vec::new();
3419        input.extend_from_slice(phrase);
3420        input.extend(std::iter::repeat_n(0, 300 * 1024));
3421        input.extend_from_slice(phrase);
3422        input.extend_from_slice(phrase);
3423        let tokens = encode_tokens(&input, &[], EncodeOptions::default());
3424        let packed = unpack29_encode_literals(&input).unwrap();
3425
3426        assert!(tokens.iter().any(|token| matches!(
3427            token,
3428            EncodeToken::Match { offset, .. } if *offset > 0x40000
3429        )));
3430        assert!(packed.len() < input.len());
3431        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3432        assert!(
3433            decoded == input,
3434            "RAR 2.9 long-distance match round-trip failed"
3435        );
3436    }
3437
3438    #[test]
3439    fn encoder_emits_rar29_e8_vm_filter_record() {
3440        let input = b"\xe8\0\0\0\0rar29 e8 filter writer payload\n".repeat(8);
3441        let packed = encode_with_filter(&input, Rar29FilterKind::E8).unwrap();
3442        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3443
3444        assert!(
3445            decoded == input,
3446            "RAR 2.9 multi-filter E8 round-trip failed"
3447        );
3448    }
3449
3450    #[test]
3451    fn encoder_emits_rar29_e8e9_vm_filter_record() {
3452        let input = b"\xe9\0\0\0\0rar29 e8e9 filter writer payload\n".repeat(8);
3453        let packed = encode_with_filter(&input, Rar29FilterKind::E8E9).unwrap();
3454        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3455
3456        assert_eq!(decoded, input);
3457    }
3458
3459    #[test]
3460    fn encoder_emits_rar29_segmented_e8_vm_filter_record() {
3461        let mut input = b"prefix data that should not be x86 filtered ".to_vec();
3462        let start = input.len();
3463        input.extend_from_slice(b"\xe8\0\0\0\0segmented e8 filtered payload\n");
3464        let end = input.len();
3465        input.extend_from_slice(b" suffix data that should also remain raw");
3466        let packed = encode_with_filter_range(&input, Rar29FilterKind::E8, start..end).unwrap();
3467        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3468
3469        assert_eq!(decoded, input);
3470    }
3471
3472    #[test]
3473    fn encoder_emits_rar29_multiple_e8_vm_filter_records() {
3474        let mut input = vec![0x41u8; 80_000];
3475        for cluster_start in [8_000, 60_000] {
3476            for index in 0..8 {
3477                let pos = cluster_start + index * 64;
3478                input[pos] = 0xe8;
3479                input[pos + 1..pos + 5].copy_from_slice(&(0x2000u32 + index as u32).to_le_bytes());
3480            }
3481        }
3482
3483        let packed = encode_with_filter_ranges(
3484            &input,
3485            Rar29FilterKind::E8,
3486            vec![8_000..8_512, 60_000..60_512],
3487        )
3488        .unwrap();
3489        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3490
3491        assert_eq!(decoded, input);
3492    }
3493
3494    #[test]
3495    fn encoder_emits_rar29_segmented_e8e9_vm_filter_record() {
3496        let mut input = b"prefix data that should not be x86 filtered ".to_vec();
3497        let start = input.len();
3498        input.extend_from_slice(b"\xe9\0\0\0\0segmented e8e9 filtered payload\n");
3499        let end = input.len();
3500        input.extend_from_slice(b" suffix data that should also remain raw");
3501        let packed = encode_with_filter_range(&input, Rar29FilterKind::E8E9, start..end).unwrap();
3502        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3503
3504        assert_eq!(decoded, input);
3505    }
3506
3507    #[test]
3508    fn encoder_emits_rar29_delta_vm_filter_record() {
3509        let input: Vec<u8> = (0..192).map(|index| (index * 13 + 7) as u8).collect();
3510        let packed = encode_with_filter(&input, Rar29FilterKind::Delta { channels: 3 }).unwrap();
3511        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3512
3513        assert_eq!(decoded, input);
3514    }
3515
3516    #[test]
3517    fn encoder_emits_rar29_segmented_delta_vm_filter_record() {
3518        let mut input = b"prefix bytes before delta segment ".to_vec();
3519        let start = input.len();
3520        input.extend((0..192).map(|index| (index * 13 + 7) as u8));
3521        let end = input.len();
3522        input.extend_from_slice(b" suffix bytes after delta segment");
3523        let packed =
3524            encode_with_filter_range(&input, Rar29FilterKind::Delta { channels: 3 }, start..end)
3525                .unwrap();
3526        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3527
3528        assert_eq!(decoded, input);
3529    }
3530
3531    #[test]
3532    fn encoder_emits_rar29_itanium_vm_filter_record() {
3533        let mut input = vec![0u8; 48];
3534        input[16] = 22;
3535        input[21] = 20;
3536        input.extend_from_slice(b"rar29 itanium filter writer payload\n");
3537        let packed = encode_with_filter(&input, Rar29FilterKind::Itanium).unwrap();
3538        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3539
3540        assert_eq!(decoded, input);
3541    }
3542
3543    #[test]
3544    fn encoder_emits_rar29_segmented_itanium_vm_filter_record() {
3545        let mut input = b"prefix bytes before itanium segment ".to_vec();
3546        let start = input.len();
3547        input.extend_from_slice(&[0; 48]);
3548        input[start + 16] = 22;
3549        input[start + 21] = 20;
3550        input.extend_from_slice(b"rar29 segmented itanium filter writer payload\n");
3551        let end = input.len();
3552        input.extend_from_slice(b" suffix bytes after itanium segment");
3553        let packed =
3554            encode_with_filter_range(&input, Rar29FilterKind::Itanium, start..end).unwrap();
3555        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3556
3557        assert_eq!(decoded, input);
3558    }
3559
3560    #[test]
3561    fn encoder_emits_rar29_rgb_vm_filter_record() {
3562        let width = 12;
3563        let input: Vec<u8> = (0..96).map(|index| (index * 29 + 11) as u8).collect();
3564        let packed = encode_with_filter(&input, Rar29FilterKind::Rgb { width, pos_r: 0 }).unwrap();
3565        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3566
3567        assert_eq!(decoded, input);
3568    }
3569
3570    #[test]
3571    fn encoder_emits_rar29_segmented_rgb_vm_filter_record() {
3572        let width = 12;
3573        let mut input = b"prefix bytes before rgb segment ".to_vec();
3574        let start = input.len();
3575        input.extend((0..96).map(|index| (index * 29 + 11) as u8));
3576        let end = input.len();
3577        input.extend_from_slice(b" suffix bytes after rgb segment");
3578        let packed =
3579            encode_with_filter_range(&input, Rar29FilterKind::Rgb { width, pos_r: 0 }, start..end)
3580                .unwrap();
3581        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3582
3583        assert_eq!(decoded, input);
3584    }
3585
3586    #[test]
3587    fn encoder_rejects_rar29_rgb_filter_with_unaligned_scanline_width() {
3588        let input: Vec<u8> = (0..96).map(|index| (index * 29 + 11) as u8).collect();
3589        assert!(encode_with_filter(&input, Rar29FilterKind::Rgb { width: 8, pos_r: 0 }).is_err());
3590    }
3591
3592    #[test]
3593    fn encoder_emits_rar29_audio_vm_filter_record() {
3594        let input: Vec<u8> = (0..160)
3595            .map(|index| (index * 7 + index / 3) as u8)
3596            .collect();
3597        let packed = encode_with_filter(&input, Rar29FilterKind::Audio { channels: 2 }).unwrap();
3598        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3599
3600        assert_eq!(decoded, input);
3601    }
3602
3603    #[test]
3604    fn audio_filter_bytecode_matches_builtin_transform() {
3605        let channels = 2;
3606        let input: Vec<u8> = (0..MAX_VM_AUDIO_FILTER_BLOCK_SIZE)
3607            .map(|index| (index * 7 + index / channels + index / 257) as u8)
3608            .collect();
3609        let encoded = audio_encode(&input, channels).unwrap();
3610        let program = Program::parse(RAR3_AUDIO_FILTER_BYTECODE).unwrap();
3611        let result = program
3612            .execute(crate::rarvm::Invocation {
3613                input: &encoded,
3614                regs: [channels as u32, 0, 0, 0, 0, 0, 0],
3615                global_data: &[],
3616                file_offset: 0,
3617                exec_count: 0,
3618            })
3619            .unwrap();
3620
3621        assert_eq!(result.output, input);
3622    }
3623
3624    #[test]
3625    fn large_audio_filters_are_split_into_rarvm_safe_blocks() {
3626        let filters = split_large_filter(
3627            MAX_VM_FILTER_BLOCK_SIZE * 2 + 123,
3628            Rar29FilterSpec::whole(Rar29FilterKind::Audio { channels: 4 }),
3629        )
3630        .unwrap();
3631
3632        assert_eq!(filters.len(), 3);
3633        assert_eq!(filters[0].range, Some(0..MAX_VM_AUDIO_FILTER_BLOCK_SIZE));
3634        assert_eq!(
3635            filters[1].range,
3636            Some(MAX_VM_AUDIO_FILTER_BLOCK_SIZE..MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2)
3637        );
3638        assert_eq!(
3639            filters[2].range,
3640            Some(MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2..MAX_VM_FILTER_BLOCK_SIZE * 2 + 123)
3641        );
3642    }
3643
3644    #[test]
3645    fn large_delta_filters_are_split_into_rarvm_safe_blocks() {
3646        let filters = split_large_filter(
3647            MAX_VM_FILTER_BLOCK_SIZE * 2 + 123,
3648            Rar29FilterSpec::whole(Rar29FilterKind::Delta { channels: 4 }),
3649        )
3650        .unwrap();
3651
3652        assert_eq!(filters.len(), 3);
3653        assert_eq!(filters[0].range, Some(0..MAX_VM_DELTA_FILTER_BLOCK_SIZE));
3654        assert_eq!(
3655            filters[1].range,
3656            Some(MAX_VM_DELTA_FILTER_BLOCK_SIZE..MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2)
3657        );
3658        assert_eq!(
3659            filters[2].range,
3660            Some(MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2..MAX_VM_FILTER_BLOCK_SIZE * 2 + 123)
3661        );
3662    }
3663
3664    #[test]
3665    fn segmented_audio_filters_redeclare_program_state() {
3666        let filters = [
3667            OwnedVmFilterRecord {
3668                block_start: 0,
3669                block_size: MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
3670                init_regs: vec![(0, 4)],
3671                code: RAR3_AUDIO_FILTER_BYTECODE,
3672            },
3673            OwnedVmFilterRecord {
3674                block_start: MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
3675                block_size: 4096,
3676                init_regs: vec![(0, 4)],
3677                code: RAR3_AUDIO_FILTER_BYTECODE,
3678            },
3679        ];
3680        let records = encoded_filter_records(&filters).unwrap();
3681
3682        assert_vm_filter_declares_program(&records[0], 0);
3683        assert_vm_filter_declares_program(&records[1], 2);
3684    }
3685
3686    #[test]
3687    fn encoder_emits_rar29_segmented_audio_vm_filter_record() {
3688        let mut input = b"prefix bytes before audio segment ".to_vec();
3689        let start = input.len();
3690        input.extend((0..160).map(|index| (index * 7 + index / 3) as u8));
3691        let end = input.len();
3692        input.extend_from_slice(b" suffix bytes after audio segment");
3693        let packed =
3694            encode_with_filter_range(&input, Rar29FilterKind::Audio { channels: 2 }, start..end)
3695                .unwrap();
3696        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3697
3698        assert_eq!(decoded, input);
3699    }
3700
3701    #[test]
3702    fn encoder_emits_multiple_rar29_audio_vm_filter_records_for_large_ranges() {
3703        let input: Vec<u8> = (0..(MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2 + 64))
3704            .map(|index| (index * 7 + index / 3 + index / 257) as u8)
3705            .collect();
3706        let packed = encode_with_filter(&input, Rar29FilterKind::Audio { channels: 4 }).unwrap();
3707        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3708
3709        assert_eq!(decoded, input);
3710    }
3711
3712    #[test]
3713    fn encoder_emits_multiple_rar29_delta_vm_filter_records_for_large_ranges() {
3714        let input: Vec<u8> = (0..(MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2 + 64))
3715            .map(|index| (index * 11 + index / 5 + index / 251) as u8)
3716            .collect();
3717        let packed = encode_with_filter(&input, Rar29FilterKind::Delta { channels: 4 }).unwrap();
3718        let decoded = unpack29_decode(&packed, input.len()).unwrap();
3719
3720        assert_eq!(decoded, input);
3721    }
3722
3723    fn assert_vm_filter_declares_program(record: &[u8], expected_selector: u32) {
3724        let first = record[0];
3725        assert_ne!(first & 0x80, 0);
3726        assert_ne!(first & 0x20, 0);
3727        assert_ne!(first & 0x10, 0);
3728        let inline_len = match first & 7 {
3729            len @ 0..=5 => len as usize + 1,
3730            6 => usize::from(record[1]) + 7,
3731            _ => u16::from_be_bytes([record[1], record[2]]) as usize,
3732        };
3733        let body_start = match first & 7 {
3734            0..=5 => 1,
3735            6 => 2,
3736            _ => 3,
3737        };
3738        let body = &record[body_start..body_start + inline_len];
3739        let mut bits = BitReader::from_bytes(body);
3740        assert_eq!(bits.read_encoded_u32().unwrap(), expected_selector);
3741        let _block_start = bits.read_encoded_u32().unwrap();
3742        let _block_size = bits.read_encoded_u32().unwrap();
3743        let mask = bits.read_bits(7).unwrap();
3744        for index in 0..7 {
3745            if mask & (1 << index) != 0 {
3746                let _ = bits.read_encoded_u32().unwrap();
3747            }
3748        }
3749        assert_eq!(
3750            bits.read_encoded_u32().unwrap() as usize,
3751            RAR3_AUDIO_FILTER_BYTECODE.len()
3752        );
3753    }
3754
3755    #[test]
3756    fn solid_encoder_emits_rar29_matches_against_previous_member_history() {
3757        let first = b"solid rar29 shared phrase alpha beta gamma ".repeat(4);
3758        let second = b"solid rar29 shared phrase alpha beta gamma ".repeat(2);
3759        let independent = unpack29_encode_literals(&second).unwrap();
3760        let mut encoder = Unpack29Encoder::new();
3761        let first_packed = encoder.encode_member(&first).unwrap();
3762        let second_packed = encoder.encode_member(&second).unwrap();
3763
3764        assert!(second_packed.len() < independent.len());
3765        let mut decoder = Unpack29::new();
3766        assert_eq!(
3767            decoder.decode_member(&first_packed, first.len()).unwrap(),
3768            first
3769        );
3770        assert_eq!(
3771            decoder.decode_member(&second_packed, second.len()).unwrap(),
3772            second
3773        );
3774    }
3775
3776    #[test]
3777    fn decode_member_from_reader_accepts_incremental_input() {
3778        struct TinyReader<'a> {
3779            input: &'a [u8],
3780        }
3781
3782        impl std::io::Read for TinyReader<'_> {
3783            fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
3784                if self.input.is_empty() {
3785                    return Ok(0);
3786                }
3787                let len = self.input.len().min(out.len()).min(3);
3788                out[..len].copy_from_slice(&self.input[..len]);
3789                self.input = &self.input[len..];
3790                Ok(len)
3791            }
3792        }
3793
3794        let mut decoder = Unpack29::new();
3795        let mut reader = TinyReader {
3796            input: COMPRESSED_TEXT,
3797        };
3798        let mut output = Vec::new();
3799        decoder
3800            .decode_member_from_reader(&mut reader, 2400, &mut output)
3801            .unwrap();
3802
3803        assert_eq!(output, expected_text());
3804    }
3805
3806    #[test]
3807    fn decode_non_solid_member_resets_reusable_decoder_state() {
3808        let mut decoder = Unpack29::new();
3809        decoder.output.extend_from_slice(b"stale history");
3810        decoder.filters.push(VmFilter {
3811            program: 0,
3812            start: 0,
3813            size: 1,
3814            regs: [0; 7],
3815            global_data: vec![1, 2, 3],
3816        });
3817
3818        let output = decoder
3819            .decode_non_solid_member(COMPRESSED_TEXT, 2400)
3820            .unwrap();
3821
3822        assert_eq!(output, expected_text());
3823        assert!(decoder.filters.is_empty());
3824    }
3825
3826    #[test]
3827    fn e8_filter_uses_member_relative_offset_in_solid_stream() {
3828        let mut decoder = Unpack29::new();
3829        let member_start = 1000usize;
3830        let filter_start = member_start + 100;
3831        decoder.output.resize(filter_start + 8, 0);
3832        decoder.output[filter_start] = 0xe8;
3833
3834        let call_operand_pos = 1u32;
3835        let member_relative_filter_start = (filter_start - member_start) as u32;
3836        let decoded_addr = 0x2000u32;
3837        let encoded_addr = decoded_addr
3838            .wrapping_add(member_relative_filter_start)
3839            .wrapping_add(call_operand_pos);
3840        decoder.output[filter_start + 1..filter_start + 5]
3841            .copy_from_slice(&encoded_addr.to_le_bytes());
3842        decoder.programs.push(VmProgram {
3843            kind: VmProgramKind::Standard(StandardFilter::E8),
3844            block_size: 5,
3845            exec_count: 0,
3846            globals: Vec::new(),
3847        });
3848        decoder.filters.push(VmFilter {
3849            program: 0,
3850            start: filter_start,
3851            size: 5,
3852            regs: [0; 7],
3853            global_data: Vec::new(),
3854        });
3855
3856        let filtered = decoder
3857            .filtered_range(member_start, filter_start + 5, member_start)
3858            .unwrap();
3859        let operand =
3860            u32::from_le_bytes([filtered[101], filtered[102], filtered[103], filtered[104]]);
3861
3862        assert_eq!(operand, decoded_addr);
3863    }
3864
3865    #[test]
3866    fn generic_vm_filter_executes_from_filtered_range() {
3867        let mut decoder = Unpack29::new();
3868        decoder.output.extend_from_slice(&[0x11, 0x22, 0x33]);
3869        decoder.programs.push(VmProgram {
3870            kind: VmProgramKind::Generic(Program {
3871                static_data: Vec::new(),
3872                instructions: vec![
3873                    Instruction {
3874                        opcode: Opcode::Mov,
3875                        byte_mode: true,
3876                        operands: vec![Operand::Absolute(0), Operand::Immediate(0x44)],
3877                    },
3878                    Instruction {
3879                        opcode: Opcode::Ret,
3880                        byte_mode: false,
3881                        operands: Vec::new(),
3882                    },
3883                ],
3884            }),
3885            block_size: 3,
3886            exec_count: 0,
3887            globals: Vec::new(),
3888        });
3889        decoder.filters.push(VmFilter {
3890            program: 0,
3891            start: 0,
3892            size: 3,
3893            regs: [0; 7],
3894            global_data: Vec::new(),
3895        });
3896
3897        let filtered = decoder.filtered_range(0, 3, 0).unwrap();
3898
3899        assert_eq!(filtered, [0x44, 0x22, 0x33]);
3900    }
3901
3902    #[test]
3903    fn standard_filters_reject_malformed_delta_and_rgb_registers() {
3904        let mut delta = vec![0; 32];
3905        let mut delta_regs = [0; 7];
3906        delta_regs[0] = 33;
3907        assert_eq!(
3908            apply_standard_filter(StandardFilter::Delta, &mut delta, 0, &delta_regs),
3909            Err(Error::InvalidData(
3910                "RAR 2.9 DELTA filter channel count is invalid"
3911            ))
3912        );
3913
3914        let mut rgb = vec![0; 32];
3915        let mut rgb_regs = [0; 7];
3916        rgb_regs[0] = 2;
3917        assert_eq!(
3918            apply_standard_filter(StandardFilter::Rgb, &mut rgb, 0, &rgb_regs),
3919            Err(Error::InvalidData(
3920                "RAR 2.9 RGB filter parameters are invalid"
3921            ))
3922        );
3923        rgb_regs[0] = 15;
3924        rgb_regs[1] = 3;
3925        assert_eq!(
3926            apply_standard_filter(StandardFilter::Rgb, &mut rgb, 0, &rgb_regs),
3927            Err(Error::InvalidData(
3928                "RAR 2.9 RGB filter parameters are invalid"
3929            ))
3930        );
3931    }
3932
3933    #[test]
3934    fn vm_encoded_u32_accepts_32_bit_form() {
3935        let mut bits = super::BitReader::from_bytes(&[0xff; 5]);
3936
3937        assert_eq!(bits.read_encoded_u32().unwrap(), 0xffff_ffff);
3938    }
3939
3940    #[test]
3941    fn vm_global_data_size_does_not_reserve_untrusted_declared_size() {
3942        let mut decoder = Unpack29::new();
3943        decoder.programs.push(VmProgram {
3944            kind: VmProgramKind::Standard(StandardFilter::E8),
3945            block_size: 1,
3946            exec_count: 0,
3947            globals: Vec::new(),
3948        });
3949
3950        let mut data = BitWriter::default();
3951        data.write_encoded_u32(1);
3952        data.write_encoded_u32(0);
3953        data.write_encoded_u32(u32::MAX);
3954
3955        assert_eq!(
3956            decoder.parse_vm_code(0x80 | 0x08, data.finish()),
3957            Err(Error::NeedMoreInput)
3958        );
3959    }
3960
3961    #[test]
3962    fn vm_code_size_is_capped_before_allocation() {
3963        let mut decoder = Unpack29::new();
3964        let mut data = BitWriter::default();
3965        data.write_encoded_u32(0);
3966        data.write_encoded_u32(1);
3967        data.write_encoded_u32((super::MAX_VM_CODE_SIZE + 1) as u32);
3968
3969        assert_eq!(
3970            decoder.parse_vm_code(0x80, data.finish()),
3971            Err(Error::InvalidData("RAR 2.9 VM code is too large"))
3972        );
3973    }
3974
3975    #[test]
3976    fn vm_program_and_filter_counts_are_capped() {
3977        let mut decoder = Unpack29::new();
3978        decoder
3979            .programs
3980            .resize_with(super::MAX_VM_PROGRAMS, || VmProgram {
3981                kind: VmProgramKind::Standard(StandardFilter::E8),
3982                block_size: 1,
3983                exec_count: 0,
3984                globals: Vec::new(),
3985            });
3986
3987        let mut new_program = BitWriter::default();
3988        new_program.write_encoded_u32((super::MAX_VM_PROGRAMS + 1) as u32);
3989        new_program.write_encoded_u32(1);
3990        new_program.write_encoded_u32(1);
3991        new_program.write_bits(0, 8);
3992        assert_eq!(
3993            decoder.parse_vm_code(0x80, new_program.finish()),
3994            Err(Error::InvalidData("RAR 2.9 VM program limit exceeded"))
3995        );
3996
3997        decoder.programs.truncate(1);
3998        decoder.last_filter = 0;
3999        decoder
4000            .filters
4001            .resize_with(super::MAX_VM_FILTERS, || VmFilter {
4002                program: 0,
4003                start: 0,
4004                size: 1,
4005                regs: [0; 7],
4006                global_data: Vec::new(),
4007            });
4008        let mut reused_program = BitWriter::default();
4009        reused_program.write_encoded_u32(0);
4010        assert_eq!(
4011            decoder.parse_vm_code(0, reused_program.finish()),
4012            Err(Error::InvalidData("RAR 2.9 VM filter limit exceeded"))
4013        );
4014    }
4015
4016    #[test]
4017    fn itanium_filter_round_trips_with_high_file_offset() {
4018        let mut data = vec![0u8; 64];
4019        for (index, byte) in data.iter_mut().enumerate() {
4020            *byte = index as u8;
4021        }
4022        data[0] = 0;
4023        data[7] = 5 << 3;
4024        let original = data.clone();
4025
4026        itanium_encode(&mut data, u32::MAX);
4027        itanium_decode(&mut data, u32::MAX);
4028
4029        assert_eq!(data, original);
4030    }
4031
4032    fn expected_text() -> Vec<u8> {
4033        "Hello, RAR 3.x fixture world.\n".repeat(80).into_bytes()
4034    }
4035}