1use crate::filters::{self, DeltaErrorMessages, FilterOp};
2use crate::huffman;
3use crate::ppmd::{PpmdByteReader, PpmdDecoder, PpmdEncoder};
4use crate::rarvm;
5use crate::{Error, Result};
6use rars_crc32::crc32;
7use std::io::{Read, Write};
8use std::ops::Range;
9
10const MAIN_COUNT: usize = 299;
11const OFFSET_COUNT: usize = 60;
12const LOW_OFFSET_COUNT: usize = 17;
13const LENGTH_COUNT: usize = 28;
14const LEVEL_COUNT: usize = 20;
15const TABLE_COUNT: usize = MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT + LENGTH_COUNT;
16const MAX_HISTORY: usize = 4 * 1024 * 1024;
17const INPUT_CHUNK: usize = 64 * 1024;
18const STREAM_CHUNK: usize = 1024 * 1024;
19const MAX_VM_FILTER_BLOCK_SIZE: usize = 128 * 1024;
20const MAX_VM_DELTA_FILTER_BLOCK_SIZE: usize = 120_000;
24const MAX_VM_AUDIO_FILTER_BLOCK_SIZE: usize = 120_000;
25const MAX_VM_GLOBAL_DATA: usize = 0x2000;
26const MAX_VM_CODE_SIZE: usize = 64 * 1024;
27const MAX_VM_PROGRAMS: usize = 1024;
28const MAX_VM_FILTERS: usize = 1024;
29
30const LENGTH_BASES: [usize; LENGTH_COUNT] = [
31 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
32 160, 192, 224,
33];
34const LENGTH_BITS: [u8; LENGTH_COUNT] = [
35 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
36];
37const OFFSET_BASES: [usize; OFFSET_COUNT] = [
38 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
39 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
40 262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
41 1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872,
42 3670016, 3932160,
43];
44const OFFSET_BITS: [u8; OFFSET_COUNT] = [
45 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
46 13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18,
47 18, 18, 18, 18, 18, 18, 18,
48];
49const SHORT_BASES: [usize; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
50const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
51const MAX_ENCODER_MATCH_OFFSET: usize = 1024 * 1024;
52const MAX_ENCODER_MATCH_LENGTH: usize = 258;
53const MATCH_HASH_BUCKETS: usize = 4096;
54const MAX_MATCH_CANDIDATES: usize = 256;
55const MAX_PPMD_MATCH_LENGTH: usize = 255;
56const MIN_PPMD_MATCH_LENGTH: usize = 32;
57const MAX_PPMD_REPEAT_LENGTH: usize = 259;
58
59const RAR3_E8_FILTER_BYTECODE: &[u8] = &[
64 0x97, 0x1b, 0x01, 0x28, 0x07, 0x06, 0x98, 0x08, 0x00, 0x00, 0x00, 0xd1, 0x3a, 0x10, 0x15, 0x92,
65 0xec, 0x50, 0xcb, 0x99, 0x20, 0xb9, 0x25, 0xf0, 0x29, 0x19, 0x15, 0x53, 0x03, 0x12, 0xae, 0x51,
66 0x10, 0x35, 0x59, 0x2b, 0x60, 0x04, 0x15, 0x6d, 0x40, 0x66, 0xab, 0x02, 0x34, 0x49, 0x04, 0x36,
67 0x02, 0x52, 0x3e, 0x97, 0x00,
68];
69const RAR3_E8E9_FILTER_BYTECODE: &[u8] = &[
70 0x84, 0x1b, 0x01, 0x28, 0x11, 0x10, 0x69, 0x80, 0x80, 0x00, 0x00, 0x0d, 0x13, 0xa1, 0x01, 0xc6,
71 0x89, 0xd2, 0x80, 0xac, 0x97, 0x62, 0x85, 0x5c, 0xc9, 0x05, 0xc9, 0x2f, 0x81, 0x48, 0xc8, 0xaa,
72 0x98, 0x18, 0x95, 0x72, 0x88, 0x81, 0xaa, 0xc9, 0x5b, 0x00, 0x20, 0xab, 0x6a, 0x03, 0x35, 0x58,
73 0x11, 0xa2, 0x48, 0x21, 0xb0, 0x12, 0x91, 0xf4, 0xb8,
74];
75const RAR3_DELTA_FILTER_BYTECODE: &[u8] = &[
76 0x2f, 0x01, 0x9a, 0x41, 0x80, 0xec, 0x27, 0x48, 0x2f, 0x09, 0x76, 0x6d, 0xd3, 0xea, 0x41, 0x5b,
77 0x59, 0x44, 0xe8, 0x17, 0x5c, 0xe1, 0x6c, 0x91, 0x4c, 0x4e, 0x3f, 0x77, 0x00,
78];
79const RAR3_ITANIUM_FILTER_BYTECODE: &[u8] = &[
80 0x46, 0x9e, 0x08, 0x08, 0x0c, 0x0c, 0x00, 0x00, 0x0e, 0x0e, 0x08, 0x08, 0x00, 0x00, 0x08, 0x08,
81 0x00, 0x00, 0x6c, 0x11, 0x5a, 0x04, 0xac, 0x0c, 0xc4, 0xcc, 0x5c, 0x08, 0x18, 0x46, 0x24, 0x08,
82 0xf9, 0xa0, 0x44, 0x25, 0x12, 0x12, 0x45, 0x85, 0x99, 0x0c, 0x14, 0x00, 0x26, 0x25, 0x58, 0x99,
83 0x90, 0x03, 0x38, 0x1a, 0x08, 0xdc, 0x02, 0x30, 0x0c, 0x4e, 0xd1, 0x1d, 0x89, 0xa1, 0xe2, 0xd0,
84 0x55, 0x11, 0x33, 0x60, 0x8c, 0x5a, 0x23, 0x06, 0xde, 0x06, 0x18, 0x00, 0x7f, 0xff, 0xfc, 0x4d,
85 0xcc, 0x19, 0x17, 0xb3, 0x06, 0xc4, 0x44, 0xb2, 0x32, 0x5a, 0x44, 0xc4, 0xa6, 0x01, 0xf4, 0x24,
86 0x88, 0x83, 0x38, 0xcc, 0xc4, 0x11, 0x09, 0x87, 0xa6, 0xe0, 0x46, 0x02, 0xb2, 0x24, 0x03, 0xe2,
87 0xa0, 0x32, 0x54, 0x83, 0x52, 0xc5, 0xb1, 0x70,
88];
89const RAR3_RGB_FILTER_BYTECODE: &[u8] = &[
90 0xc5, 0x01, 0x9a, 0x41, 0x95, 0xc9, 0xa6, 0x4d, 0xba, 0x4b, 0x14, 0x0a, 0xf4, 0x9b, 0x80, 0x4c,
91 0x00, 0x15, 0xa6, 0xa8, 0x07, 0x26, 0x2a, 0xc9, 0xc4, 0x8b, 0x86, 0x62, 0x32, 0x0f, 0x86, 0x64,
92 0x24, 0x06, 0x66, 0x71, 0x19, 0x98, 0xcc, 0x43, 0x33, 0x31, 0x99, 0x00, 0x66, 0x88, 0x33, 0x30,
93 0xcc, 0xd1, 0x0e, 0x98, 0x0b, 0x33, 0x34, 0x40, 0x0c, 0xd1, 0x46, 0x66, 0x19, 0x9a, 0x28, 0xcc,
94 0x49, 0x80, 0xb3, 0x33, 0x45, 0x00, 0xcd, 0x18, 0x66, 0x61, 0x99, 0xa3, 0x0c, 0xc8, 0x98, 0x0b,
95 0x33, 0x34, 0x60, 0x4c, 0xd1, 0x06, 0x68, 0xa5, 0x20, 0x62, 0x66, 0x88, 0x33, 0x46, 0x28, 0x05,
96 0x0f, 0x32, 0x0c, 0x4c, 0xd1, 0x46, 0x68, 0xc5, 0x00, 0x41, 0xe4, 0x8f, 0xc8, 0x85, 0x5e, 0x02,
97 0x7c, 0xc9, 0x26, 0x81, 0x83, 0xb0, 0x9d, 0xc2, 0xde, 0x9c, 0x78, 0xac, 0xd6, 0x68, 0xb4, 0x0e,
98 0x71, 0xdb, 0xb2, 0x49, 0x38, 0x6e, 0x02, 0x2a, 0x2c, 0x41, 0x2b, 0x10, 0x98, 0x82, 0x49, 0x03,
99 0x14, 0xf4, 0xe1, 0x97, 0x00,
100];
101const RAR3_AUDIO_FILTER_BYTECODE: &[u8] = &[
102 0x47, 0x01, 0x9a, 0x41, 0x95, 0xe5, 0x72, 0x0d, 0xc2, 0x64, 0x82, 0x74, 0x93, 0x24, 0xb1, 0x40,
103 0x06, 0xd8, 0x38, 0x44, 0x00, 0xa8, 0x01, 0x34, 0x11, 0xdc, 0xa1, 0xba, 0x01, 0x99, 0x0c, 0xc4,
104 0x03, 0x31, 0x19, 0xa4, 0x06, 0x66, 0x22, 0x60, 0x4d, 0x9a, 0x40, 0x0d, 0x66, 0x8e, 0x60, 0xd0,
105 0x30, 0x40, 0x18, 0x26, 0xc1, 0xc8, 0xf6, 0xe6, 0x26, 0x13, 0x78, 0x92, 0x08, 0xe8, 0x50, 0xbc,
106 0x5a, 0x07, 0xc6, 0xe9, 0xf5, 0x20, 0xa9, 0xa0, 0xed, 0x37, 0x33, 0x47, 0x39, 0x66, 0x90, 0x70,
107 0x19, 0xa3, 0x9b, 0xcf, 0x25, 0x83, 0x80, 0xc1, 0xbd, 0x30, 0x16, 0x6e, 0x23, 0x34, 0x93, 0x81,
108 0x16, 0x09, 0xb0, 0x50, 0x18, 0x3b, 0x4d, 0xc8, 0x4c, 0x05, 0x9b, 0x88, 0xc5, 0x28, 0xe0, 0x76,
109 0x93, 0x90, 0x98, 0x0b, 0x37, 0x11, 0x8a, 0x59, 0xc4, 0x80, 0x42, 0x48, 0x43, 0xa9, 0x47, 0xee,
110 0x43, 0x34, 0x60, 0x47, 0xd4, 0x4a, 0x0d, 0xbb, 0xd3, 0x59, 0xa4, 0x86, 0xee, 0x05, 0x09, 0x40,
111 0x26, 0xc9, 0x34, 0x24, 0x76, 0xa0, 0x30, 0x6a, 0x20, 0xea, 0x02, 0x20, 0x04, 0xa0, 0x41, 0x50,
112 0x9e, 0x50, 0x3f, 0xe6, 0xe1, 0x28, 0x94, 0x46, 0x01, 0xbd, 0x8b, 0x40, 0xf0, 0x68, 0x11, 0x36,
113 0xc9, 0xa1, 0x92, 0x38, 0x11, 0x41, 0x9c, 0xa8, 0x95, 0x10, 0xee, 0x50, 0x66, 0x2b, 0x00, 0x20,
114 0x95, 0x11, 0x04, 0x02, 0x62, 0xac, 0x66, 0x8c, 0x6a, 0xca, 0x26, 0x40, 0xb2, 0x67, 0x1b, 0x4b,
115 0x26, 0xcc, 0x64, 0x8a, 0x62, 0x71, 0xa2, 0xb8,
116];
117
118pub fn unpack29_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
119 let mut decoder = Unpack29::new();
120 decoder.decode_non_solid_member(input, output_size)
121}
122
123pub fn unpack29_encode_literals(input: &[u8]) -> Result<Vec<u8>> {
124 encode_member(input, &[])
125}
126
127pub fn unpack29_encode_literals_with_options(
128 input: &[u8],
129 options: EncodeOptions,
130) -> Result<Vec<u8>> {
131 encode_member_with_options(input, &[], options)
132}
133
134pub fn unpack29_encode_ppmd_literals(input: &[u8]) -> Result<Vec<u8>> {
135 encode_ppmd_member(input, false, &[])
136}
137
138pub fn unpack29_encode_ppmd(input: &[u8]) -> Result<Vec<u8>> {
139 encode_ppmd_member(input, true, &[])
140}
141
142pub fn unpack29_encode_ppmd_with_filter(input: &[u8], filter: Rar29FilterSpec) -> Result<Vec<u8>> {
143 encode_ppmd_filtered_member(input, filter, true)
144}
145
146pub fn unpack29_encode_ppmd_literals_with_filter(
147 input: &[u8],
148 filter: Rar29FilterSpec,
149) -> Result<Vec<u8>> {
150 encode_ppmd_filtered_member(input, filter, false)
151}
152
153fn encode_ppmd_filtered_member(
154 input: &[u8],
155 filter: Rar29FilterSpec,
156 lz_escapes: bool,
157) -> Result<Vec<u8>> {
158 let filters = split_large_filter(input.len(), filter)?;
159 let filtered = filtered_members(input, &filters)?;
160 let records = encoded_filter_records(&filtered.records)?;
161 encode_ppmd_member(&filtered.data, lz_escapes, &records)
162}
163
164fn filtered_members(input: &[u8], filters: &[Rar29FilterSpec]) -> Result<FilteredMembers> {
165 let mut data = input.to_vec();
166 let mut records = Vec::with_capacity(filters.len());
167 for filter in filters {
168 let filtered = filtered_member(input, filter)?;
169 let range = filtered.block_start..filtered.block_start + filtered.block_size;
170 data[range.clone()].copy_from_slice(&filtered.data[range]);
171 records.push(OwnedVmFilterRecord {
172 block_start: filtered.block_start,
173 block_size: filtered.block_size,
174 init_regs: filtered.init_regs,
175 code: filtered.code,
176 });
177 }
178 Ok(FilteredMembers { data, records })
179}
180
181struct FilteredMembers {
182 data: Vec<u8>,
183 records: Vec<OwnedVmFilterRecord>,
184}
185
186fn split_large_filter(input_len: usize, filter: Rar29FilterSpec) -> Result<Vec<Rar29FilterSpec>> {
187 let range = filter.range.clone().unwrap_or(0..input_len);
188 if range.start >= range.end || range.end > input_len {
189 return Err(Error::InvalidData("RAR 2.9 VM filter range is invalid"));
190 }
191
192 let chunk_size = match filter.kind {
193 Rar29FilterKind::Delta { channels } => {
194 if channels == 0 || channels > MAX_VM_DELTA_FILTER_BLOCK_SIZE {
195 return Err(Error::InvalidData(
196 "RAR 2.9 VM filter channel count is invalid",
197 ));
198 }
199 MAX_VM_DELTA_FILTER_BLOCK_SIZE - (MAX_VM_DELTA_FILTER_BLOCK_SIZE % channels)
200 }
201 Rar29FilterKind::Audio { channels } => {
202 if channels == 0 || channels > MAX_VM_AUDIO_FILTER_BLOCK_SIZE {
203 return Err(Error::InvalidData(
204 "RAR 2.9 VM filter channel count is invalid",
205 ));
206 }
207 MAX_VM_AUDIO_FILTER_BLOCK_SIZE - (MAX_VM_AUDIO_FILTER_BLOCK_SIZE % channels)
208 }
209 Rar29FilterKind::Rgb { width, .. } => {
210 if width == 0 || width > MAX_VM_FILTER_BLOCK_SIZE {
211 return Err(Error::InvalidData(
212 "RAR 2.9 RGB filter scanline width is invalid",
213 ));
214 }
215 MAX_VM_FILTER_BLOCK_SIZE - (MAX_VM_FILTER_BLOCK_SIZE % width)
216 }
217 Rar29FilterKind::E8 | Rar29FilterKind::E8E9 | Rar29FilterKind::Itanium => {
218 MAX_VM_FILTER_BLOCK_SIZE
219 }
220 };
221 if range.len() <= chunk_size {
222 return Ok(vec![filter]);
223 }
224 if chunk_size == 0 {
225 return Err(Error::InvalidData(
226 "RAR 2.9 VM filter chunk size is invalid",
227 ));
228 }
229
230 let mut filters = Vec::new();
231 let mut start = range.start;
232 while start < range.end {
233 let end = (start + chunk_size).min(range.end);
234 filters.push(Rar29FilterSpec::range(filter.kind, start..end));
235 start = end;
236 }
237 Ok(filters)
238}
239
240struct OwnedVmFilterRecord {
241 block_start: usize,
242 block_size: usize,
243 init_regs: Vec<(usize, u32)>,
244 code: &'static [u8],
245}
246
247fn encode_ppmd_member(
248 input: &[u8],
249 lz_escapes: bool,
250 initial_filters: &[Vec<u8>],
251) -> Result<Vec<u8>> {
252 encode_ppmd_block(input, lz_escapes, initial_filters)
253}
254
255fn encode_ppmd_block(
256 input: &[u8],
257 lz_escapes: bool,
258 initial_filters: &[Vec<u8>],
259) -> Result<Vec<u8>> {
260 const PPMD_ORDER: usize = 8;
261 const PPMD_DICTIONARY_MB: u8 = 25;
262 const PPMD_ESC: u8 = 2;
263
264 let mut out = Vec::new();
265 out.push(0x80 | 0x20 | ((PPMD_ORDER as u8) - 1));
266 out.push(PPMD_DICTIONARY_MB - 1);
267 let mut encoder = PpmdEncoder::new(PPMD_ORDER, PPMD_ESC, usize::from(PPMD_DICTIONARY_MB))?;
268 for record in initial_filters {
269 encoder.encode_vm_filter_record(record)?;
270 }
271 for token in encode_ppmd_tokens(input, lz_escapes) {
272 match token {
273 PpmdEncodeToken::Literal(byte) => encoder.encode_literal(byte)?,
274 PpmdEncodeToken::RepeatOffsetOne { length } => {
275 encoder.encode_repeat_offset_one(length)?
276 }
277 PpmdEncodeToken::Match { offset, length } => encoder.encode_match(offset, length)?,
278 }
279 }
280 out.extend_from_slice(&encoder.finish()?);
281 Ok(out)
282}
283
284#[derive(Debug, Clone, Copy, PartialEq, Eq)]
285enum PpmdEncodeToken {
286 Literal(u8),
287 RepeatOffsetOne { length: usize },
288 Match { offset: usize, length: usize },
289}
290
291#[derive(Debug, Clone, PartialEq, Eq)]
292pub struct Rar29FilterSpec {
293 pub kind: Rar29FilterKind,
294 pub range: Option<Range<usize>>,
295}
296
297impl Rar29FilterSpec {
298 pub fn whole(kind: Rar29FilterKind) -> Self {
299 Self { kind, range: None }
300 }
301
302 pub fn range(kind: Rar29FilterKind, range: Range<usize>) -> Self {
303 Self {
304 kind,
305 range: Some(range),
306 }
307 }
308}
309
310#[derive(Debug, Clone, Copy, PartialEq, Eq)]
311pub enum Rar29FilterKind {
312 E8,
313 E8E9,
314 Delta { channels: usize },
315 Itanium,
316 Rgb { width: usize, pos_r: usize },
317 Audio { channels: usize },
318}
319
320struct FilteredMember {
321 data: Vec<u8>,
322 block_start: usize,
323 block_size: usize,
324 init_regs: Vec<(usize, u32)>,
325 code: &'static [u8],
326}
327
328fn filtered_member(input: &[u8], filter: &Rar29FilterSpec) -> Result<FilteredMember> {
329 let range = filter.range.clone().unwrap_or(0..input.len());
330 if range.start >= range.end || range.end > input.len() {
331 return Err(Error::InvalidData("RAR 2.9 VM filter range is invalid"));
332 }
333 let mut filtered = input.to_vec();
334 let (init_regs, code): (Vec<(usize, u32)>, &'static [u8]) = match filter.kind {
335 Rar29FilterKind::E8 => {
336 filters::encode_in_place(
337 FilterOp::E8,
338 &mut filtered[range.clone()],
339 range.start as u32,
340 rar29_delta_messages(),
341 )?;
342 (Vec::new(), RAR3_E8_FILTER_BYTECODE)
343 }
344 Rar29FilterKind::E8E9 => {
345 filters::encode_in_place(
346 FilterOp::E8E9,
347 &mut filtered[range.clone()],
348 range.start as u32,
349 rar29_delta_messages(),
350 )?;
351 (Vec::new(), RAR3_E8E9_FILTER_BYTECODE)
352 }
353 Rar29FilterKind::Delta { channels } => {
354 filters::encode_in_place(
355 FilterOp::Delta { channels },
356 &mut filtered[range.clone()],
357 0,
358 rar29_delta_messages(),
359 )?;
360 (vec![(0, channels as u32)], RAR3_DELTA_FILTER_BYTECODE)
361 }
362 Rar29FilterKind::Itanium => {
363 itanium_encode(&mut filtered[range.clone()], range.start as u32);
364 (Vec::new(), RAR3_ITANIUM_FILTER_BYTECODE)
365 }
366 Rar29FilterKind::Rgb { width, pos_r } => {
367 filtered[range.clone()].copy_from_slice(&rgb_encode(
368 &input[range.clone()],
369 width,
370 pos_r,
371 )?);
372 let init_regs = if pos_r == 0 {
373 vec![(0, width as u32 + 3)]
374 } else {
375 vec![(0, width as u32 + 3), (1, pos_r as u32)]
376 };
377 (init_regs, RAR3_RGB_FILTER_BYTECODE)
378 }
379 Rar29FilterKind::Audio { channels } => {
380 filtered[range.clone()]
381 .copy_from_slice(&audio_encode(&input[range.clone()], channels)?);
382 (vec![(0, channels as u32)], RAR3_AUDIO_FILTER_BYTECODE)
383 }
384 };
385 Ok(FilteredMember {
386 data: filtered,
387 block_start: range.start,
388 block_size: range.end - range.start,
389 init_regs,
390 code,
391 })
392}
393
394fn rar29_delta_messages() -> DeltaErrorMessages {
395 DeltaErrorMessages {
396 invalid_channels: "RAR 2.9 DELTA filter channel count is invalid",
397 zero_channels: "RAR 2.9 DELTA filter has zero channels",
398 truncated_source: "RAR 2.9 DELTA filter source is truncated",
399 }
400}
401
402#[derive(Debug, Clone, Copy, PartialEq, Eq)]
403#[non_exhaustive]
404pub struct EncodeOptions {
405 pub max_match_candidates: usize,
406 pub lazy_matching: bool,
407 pub lazy_lookahead: usize,
408 pub max_match_distance: usize,
409 pub block_size: Option<usize>,
410}
411
412impl EncodeOptions {
413 pub const fn new(max_match_candidates: usize) -> Self {
414 Self {
415 max_match_candidates,
416 lazy_matching: false,
417 lazy_lookahead: 1,
418 max_match_distance: MAX_ENCODER_MATCH_OFFSET,
419 block_size: None,
420 }
421 }
422
423 pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
424 self.lazy_matching = enabled;
425 self
426 }
427
428 pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
429 self.lazy_lookahead = bytes;
430 self
431 }
432
433 pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
434 self.max_match_distance = distance;
435 self
436 }
437
438 pub const fn with_block_size(mut self, bytes: usize) -> Self {
439 self.block_size = Some(bytes);
440 self
441 }
442}
443
444impl Default for EncodeOptions {
445 fn default() -> Self {
446 Self::new(MAX_MATCH_CANDIDATES)
447 }
448}
449
450#[derive(Debug, Clone, Default)]
451pub struct Unpack29Encoder {
452 history: Vec<u8>,
453 options: EncodeOptions,
454}
455
456impl Unpack29Encoder {
457 pub fn new() -> Self {
458 Self::default()
459 }
460
461 pub fn with_options(options: EncodeOptions) -> Self {
462 Self {
463 history: Vec::new(),
464 options,
465 }
466 }
467
468 pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
469 let packed = encode_member_with_options(input, &self.history, self.options)?;
470 self.remember(input);
471 Ok(packed)
472 }
473
474 pub fn encode_member_with_filter(
475 &mut self,
476 input: &[u8],
477 filter: Rar29FilterSpec,
478 ) -> Result<Vec<u8>> {
479 let filters = split_large_filter(input.len(), filter)?;
480 let filtered = filtered_members(input, &filters)?;
481 let records = encoded_filter_records(&filtered.records)?;
482 let packed = encode_member_with_initial_filters(
483 &filtered.data,
484 &self.history,
485 &records,
486 self.options,
487 )?;
488 self.remember(input);
489 Ok(packed)
490 }
491
492 pub fn encode_member_with_filters(
493 &mut self,
494 input: &[u8],
495 filters: &[Rar29FilterSpec],
496 ) -> Result<Vec<u8>> {
497 let mut split_filters = Vec::new();
498 for filter in filters {
499 split_filters.extend(split_large_filter(input.len(), filter.clone())?);
500 }
501 let filtered = filtered_members(input, &split_filters)?;
502 let records = encoded_filter_records(&filtered.records)?;
503 let packed = encode_member_with_initial_filters(
504 &filtered.data,
505 &self.history,
506 &records,
507 self.options,
508 )?;
509 self.remember(input);
510 Ok(packed)
511 }
512
513 fn remember(&mut self, input: &[u8]) {
514 self.history.extend_from_slice(input);
515 let keep_from = self.history.len().saturating_sub(MAX_HISTORY);
516 if keep_from != 0 {
517 self.history.drain(..keep_from);
518 }
519 }
520}
521
522fn encode_member(input: &[u8], history: &[u8]) -> Result<Vec<u8>> {
523 encode_member_with_options(input, history, EncodeOptions::default())
524}
525
526fn encode_member_with_options(
527 input: &[u8],
528 history: &[u8],
529 options: EncodeOptions,
530) -> Result<Vec<u8>> {
531 if let Some(block_size) = options.block_size.filter(|&size| size != 0) {
532 if input.len() > block_size {
533 return encode_member_blocks(input, history, options, block_size);
534 }
535 }
536 encode_member_inner(input, history, &[], options)
537}
538
539fn encode_member_blocks(
540 input: &[u8],
541 history: &[u8],
542 mut options: EncodeOptions,
543 block_size: usize,
544) -> Result<Vec<u8>> {
545 options.block_size = None;
546 let mut out = Vec::new();
547 let mut local_history = history[history.len().saturating_sub(MAX_HISTORY)..].to_vec();
548 for chunk in input.chunks(block_size) {
549 out.extend_from_slice(&encode_member_inner(chunk, &local_history, &[], options)?);
550 local_history.extend_from_slice(chunk);
551 let keep_from = local_history.len().saturating_sub(MAX_HISTORY);
552 if keep_from != 0 {
553 local_history.drain(..keep_from);
554 }
555 }
556 Ok(out)
557}
558
559fn encode_member_with_initial_filters(
560 input: &[u8],
561 history: &[u8],
562 filters: &[Vec<u8>],
563 options: EncodeOptions,
564) -> Result<Vec<u8>> {
565 encode_member_inner(input, history, filters, options)
566}
567
568fn encode_member_inner(
569 input: &[u8],
570 history: &[u8],
571 initial_filters: &[Vec<u8>],
572 options: EncodeOptions,
573) -> Result<Vec<u8>> {
574 let tokens = encode_tokens(input, history, options);
575 let mut main_frequencies = vec![0usize; MAIN_COUNT];
576 let mut offset_frequencies = vec![0usize; OFFSET_COUNT];
577 let mut low_offset_frequencies = vec![0usize; LOW_OFFSET_COUNT];
578 let mut length_frequencies = vec![0usize; LENGTH_COUNT];
579 main_frequencies[257] += initial_filters.len();
580 let mut match_state = EncoderMatchState::default();
581 for token in &tokens {
582 match *token {
583 EncodeToken::Literal(byte) => {
584 main_frequencies[byte as usize] += 1;
585 }
586 EncodeToken::Match { length, offset } => {
587 match match_state.encode_match(length, offset)? {
588 EncodedMatch::LastLengthRepeat => {
589 main_frequencies[258] += 1;
590 }
591 EncodedMatch::RepeatOffset {
592 index, length_slot, ..
593 } => {
594 main_frequencies[259 + index] += 1;
595 length_frequencies[length_slot] += 1;
596 }
597 EncodedMatch::Fresh {
598 length_slot,
599 offset_slot,
600 offset_extra,
601 ..
602 } => {
603 main_frequencies[271 + length_slot] += 1;
604 offset_frequencies[offset_slot] += 1;
605 if offset_slot > 9 {
606 low_offset_frequencies[offset_extra & 0x0f] += 1;
607 }
608 }
609 }
610 match_state.remember(length, offset);
611 }
612 }
613 }
614 main_frequencies[256] += 1;
615
616 let mut table_lengths = [0u8; TABLE_COUNT];
617 if low_offset_frequencies
618 .iter()
619 .all(|&frequency| frequency == 0)
620 {
621 low_offset_frequencies[0] = 1;
622 }
623 let main_lengths = huffman::lengths_for_frequencies(&main_frequencies, 15);
624 let offset_lengths = huffman::lengths_for_frequencies(&offset_frequencies, 15);
625 let low_offset_lengths = huffman::lengths_for_frequencies(&low_offset_frequencies, 15);
626 let length_lengths = huffman::lengths_for_frequencies(&length_frequencies, 15);
627 table_lengths[..MAIN_COUNT].copy_from_slice(&main_lengths);
628 table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT].copy_from_slice(&offset_lengths);
629 table_lengths[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT]
630 .copy_from_slice(&low_offset_lengths);
631 table_lengths[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..].copy_from_slice(&length_lengths);
632
633 let level_tokens = encode_table_level_tokens(&table_lengths);
634 let level_lengths = level_code_lengths(&level_tokens);
635 let level_codes = canonical_codes(&level_lengths)?;
636 let main_codes = canonical_codes(&table_lengths[..MAIN_COUNT])?;
637
638 let mut bits = BitWriter::default();
639 bits.write_bit(false); bits.write_bit(false); for &len in &level_lengths {
642 bits.write_bits(len as u32, 4);
643 }
644 for token in level_tokens {
645 let code = level_codes[token.symbol].ok_or(Error::InvalidData(
646 "RAR 2.9 encoder missing level Huffman code",
647 ))?;
648 bits.write_bits(code.code as u32, code.len);
649 if token.extra_bits != 0 {
650 bits.write_bits(token.extra_value as u32, token.extra_bits);
651 }
652 }
653 let offset_codes = canonical_codes(&table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
654 let low_offset_codes = canonical_codes(
655 &table_lengths[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT],
656 )?;
657 let length_codes =
658 canonical_codes(&table_lengths[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..])?;
659 for filter in initial_filters {
660 let code = main_codes[257].ok_or(Error::InvalidData(
661 "RAR 2.9 encoder missing VM filter Huffman code",
662 ))?;
663 bits.write_bits(code.code as u32, code.len);
664 for &byte in filter {
665 bits.write_bits(u32::from(byte), 8);
666 }
667 }
668 let mut match_state = EncoderMatchState::default();
669 for token in tokens {
670 match token {
671 EncodeToken::Literal(byte) => {
672 let code = main_codes[byte as usize].ok_or(Error::InvalidData(
673 "RAR 2.9 encoder missing literal Huffman code",
674 ))?;
675 bits.write_bits(code.code as u32, code.len);
676 }
677 EncodeToken::Match { length, offset } => {
678 match match_state.encode_match(length, offset)? {
679 EncodedMatch::LastLengthRepeat => {
680 let code = main_codes[258].ok_or(Error::InvalidData(
681 "RAR 2.9 encoder missing last-length repeat Huffman code",
682 ))?;
683 bits.write_bits(code.code as u32, code.len);
684 }
685 EncodedMatch::RepeatOffset {
686 index,
687 length_slot,
688 length_extra,
689 } => {
690 let code = main_codes[259 + index].ok_or(Error::InvalidData(
691 "RAR 2.9 encoder missing repeat-offset Huffman code",
692 ))?;
693 bits.write_bits(code.code as u32, code.len);
694 let length_code = length_codes[length_slot].ok_or(Error::InvalidData(
695 "RAR 2.9 encoder missing repeat length Huffman code",
696 ))?;
697 bits.write_bits(length_code.code as u32, length_code.len);
698 if LENGTH_BITS[length_slot] != 0 {
699 bits.write_bits(length_extra as u32, LENGTH_BITS[length_slot]);
700 }
701 }
702 EncodedMatch::Fresh {
703 length_slot,
704 length_extra,
705 offset_slot,
706 offset_extra,
707 } => {
708 let code = main_codes[271 + length_slot].ok_or(Error::InvalidData(
709 "RAR 2.9 encoder missing match Huffman code",
710 ))?;
711 bits.write_bits(code.code as u32, code.len);
712 if LENGTH_BITS[length_slot] != 0 {
713 bits.write_bits(length_extra as u32, LENGTH_BITS[length_slot]);
714 }
715 let offset = offset_codes[offset_slot].ok_or(Error::InvalidData(
716 "RAR 2.9 encoder missing offset Huffman code",
717 ))?;
718 bits.write_bits(offset.code as u32, offset.len);
719 if offset_slot > 9 {
720 let offset_bits = OFFSET_BITS[offset_slot];
721 if offset_bits > 4 {
722 bits.write_bits((offset_extra >> 4) as u32, offset_bits - 4);
723 }
724 let low_offset =
725 low_offset_codes[offset_extra & 0x0f].ok_or(Error::InvalidData(
726 "RAR 2.9 encoder missing low-offset Huffman code",
727 ))?;
728 bits.write_bits(low_offset.code as u32, low_offset.len);
729 } else if OFFSET_BITS[offset_slot] != 0 {
730 bits.write_bits(offset_extra as u32, OFFSET_BITS[offset_slot]);
731 }
732 }
733 }
734 match_state.remember(length, offset);
735 }
736 }
737 }
738 let end = main_codes[256].ok_or(Error::InvalidData(
739 "RAR 2.9 encoder missing end-of-block Huffman code",
740 ))?;
741 bits.write_bits(end.code as u32, end.len);
742 bits.write_bit(true); Ok(bits.finish())
744}
745
746fn encoded_filter_records(filters: &[OwnedVmFilterRecord]) -> Result<Vec<Vec<u8>>> {
747 let mut programs: Vec<&'static [u8]> = Vec::new();
748 let mut records = Vec::with_capacity(filters.len());
749 for filter in filters {
750 let existing = (filter.code != RAR3_AUDIO_FILTER_BYTECODE)
751 .then(|| programs.iter().position(|&code| code == filter.code))
752 .flatten();
753 let (program_selector, include_code) = match existing {
754 Some(index) => (
755 u32::try_from(index + 1)
756 .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?,
757 false,
758 ),
759 None => {
760 let selector = if programs.is_empty() {
761 0
762 } else {
763 u32::try_from(programs.len() + 1)
764 .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?
765 };
766 programs.push(filter.code);
767 (selector, true)
768 }
769 };
770 records.push(encode_vm_filter_record_inner(
771 VmFilterRecord {
772 block_start: filter.block_start,
773 block_size: filter.block_size,
774 init_regs: &filter.init_regs,
775 code: filter.code,
776 },
777 program_selector,
778 include_code,
779 )?);
780 }
781 Ok(records)
782}
783
784#[derive(Debug, Clone, Copy)]
785struct VmFilterRecord<'a> {
786 block_start: usize,
787 block_size: usize,
788 init_regs: &'a [(usize, u32)],
789 code: &'a [u8],
790}
791
792fn encode_vm_filter_record_inner(
793 record: VmFilterRecord<'_>,
794 program_selector: u32,
795 include_code: bool,
796) -> Result<Vec<u8>> {
797 if record.block_size == 0 {
798 return Err(Error::InvalidData("RAR 2.9 VM filter block is empty"));
799 }
800 if include_code && record.code.is_empty() {
801 return Err(Error::InvalidData("RAR 2.9 VM filter bytecode is empty"));
802 }
803
804 let mut body = BitWriter::default();
805 body.write_encoded_u32(program_selector);
806 body.write_encoded_u32(
807 u32::try_from(record.block_start)
808 .map_err(|_| Error::InvalidData("RAR 2.9 VM block start overflows"))?,
809 );
810 body.write_encoded_u32(
811 u32::try_from(record.block_size)
812 .map_err(|_| Error::InvalidData("RAR 2.9 VM block size overflows"))?,
813 );
814 if !record.init_regs.is_empty() {
815 let mut mask = 0u32;
816 for &(index, _) in record.init_regs {
817 if index >= 7 {
818 return Err(Error::InvalidData(
819 "RAR 2.9 VM init register index is invalid",
820 ));
821 }
822 mask |= 1 << index;
823 }
824 body.write_bits(mask, 7);
825 for index in 0..7 {
826 if let Some((_, value)) = record.init_regs.iter().find(|(reg, _)| *reg == index) {
827 body.write_encoded_u32(*value);
828 }
829 }
830 }
831 if include_code {
832 body.write_encoded_u32(
833 u32::try_from(record.code.len())
834 .map_err(|_| Error::InvalidData("RAR 2.9 VM code size overflows"))?,
835 );
836 for &byte in record.code {
837 body.write_bits(u32::from(byte), 8);
838 }
839 }
840 let body = body.finish();
841
842 let mut out = Vec::new();
843 let mut first = 0x80 | 0x20;
844 if !record.init_regs.is_empty() {
845 first |= 0x10;
846 }
847 match body.len() {
848 1..=6 => first |= (body.len() as u8) - 1,
849 7..=262 => {
850 first |= 6;
851 out.push((body.len() - 7) as u8);
852 }
853 263..=65535 => {
854 first |= 7;
855 out.extend_from_slice(&(body.len() as u16).to_be_bytes());
856 }
857 _ => return Err(Error::InvalidData("RAR 2.9 VM filter record is too large")),
858 }
859 out.insert(0, first);
860 out.extend_from_slice(&body);
861 Ok(out)
862}
863
864fn rgb_encode(data: &[u8], width: usize, pos_r: usize) -> Result<Vec<u8>> {
865 if data.len() < 3 || width == 0 || !width.is_multiple_of(3) || width > data.len() || pos_r > 2 {
866 return Err(Error::InvalidData(
867 "RAR 2.9 RGB filter parameters are invalid",
868 ));
869 }
870 let mut work = data.to_vec();
871 for i in (pos_r..work.len().saturating_sub(2)).step_by(3) {
872 let green = work[i + 1];
873 work[i] = work[i].wrapping_sub(green);
874 work[i + 2] = work[i + 2].wrapping_sub(green);
875 }
876
877 let mut out = Vec::with_capacity(data.len());
878 for channel in 0..3 {
879 let mut prev = 0u8;
880 let mut i = channel;
881 while i < work.len() {
882 let predicted = if i >= width + 3 {
883 rgb_predict(prev, work[i - width], work[i - width - 3])
884 } else {
885 prev
886 };
887 let byte = work[i];
888 out.push(predicted.wrapping_sub(byte));
889 prev = byte;
890 i += 3;
891 }
892 }
893 Ok(out)
894}
895
896fn audio_encode(data: &[u8], channels: usize) -> Result<Vec<u8>> {
897 if channels == 0 || channels > 32 {
898 return Err(Error::InvalidData(
899 "RAR 2.9 AUDIO filter channel count is invalid",
900 ));
901 }
902 let mut out = Vec::with_capacity(data.len());
903 for channel in 0..channels {
904 let mut prev_byte = 0u32;
905 let mut prev_delta = 0i32;
906 let mut d1 = 0i32;
907 let mut d2 = 0i32;
908 let mut k1 = 0i32;
909 let mut k2 = 0i32;
910 let mut k3 = 0i32;
911 let mut dif = [0u32; 7];
912 let mut byte_count = 0usize;
913 let mut i = channel;
914 while i < data.len() {
915 let d3 = d2;
916 d2 = prev_delta - d1;
917 d1 = prev_delta;
918 let predicted = ((8 * prev_byte as i32 + k1 * d1 + k2 * d2 + k3 * d3) >> 3) & 0xff;
919 let decoded = data[i];
920 let encoded = (predicted as u8).wrapping_sub(decoded);
921 out.push(encoded);
922 prev_delta = decoded.wrapping_sub(prev_byte as u8) as i8 as i32;
923 prev_byte = decoded as u32;
924 let d = (encoded as i8 as i32) << 3;
925 dif[0] += d.unsigned_abs();
926 dif[1] += (d - d1).unsigned_abs();
927 dif[2] += (d + d1).unsigned_abs();
928 dif[3] += (d - d2).unsigned_abs();
929 dif[4] += (d + d2).unsigned_abs();
930 dif[5] += (d - d3).unsigned_abs();
931 dif[6] += (d + d3).unsigned_abs();
932 if byte_count & 0x1f == 0 {
933 let mut min = dif[0];
934 let mut min_index = 0usize;
935 dif[0] = 0;
936 for (index, value) in dif.iter_mut().enumerate().skip(1) {
937 if *value < min {
938 min = *value;
939 min_index = index;
940 }
941 *value = 0;
942 }
943 match min_index {
944 1 if k1 >= -16 => k1 -= 1,
945 2 if k1 < 16 => k1 += 1,
946 3 if k2 >= -16 => k2 -= 1,
947 4 if k2 < 16 => k2 += 1,
948 5 if k3 >= -16 => k3 -= 1,
949 6 if k3 < 16 => k3 += 1,
950 _ => {}
951 }
952 }
953 byte_count += 1;
954 i += channels;
955 }
956 }
957 Ok(out)
958}
959
960fn itanium_encode(data: &mut [u8], file_offset: u32) {
961 if data.len() <= 21 {
962 return;
963 }
964 let base_offset = file_offset >> 4;
965 let block_count = (data.len() - 21).div_ceil(16);
966 for block in 0..block_count {
967 let pos = block * 16;
968 let file_offset = base_offset.wrapping_add(block as u32);
969 let mut mask = (0x334b_0000u32 >> (data[pos] & 0x1e)) & 3;
970 if mask != 0 {
971 mask += 1;
972 while mask <= 4 {
973 let p = pos + (mask as usize * 5 - 8);
974 if ((data[p + 3] >> mask) & 15) == 5 {
975 let raw = u32::from_le_bytes([data[p], data[p + 1], data[p + 2], data[p + 3]]);
976 let mut value = raw >> mask;
977 value = value.wrapping_add(file_offset) & 0x000f_ffff;
978 let raw = (raw & !(0x000f_ffff << mask)) | (value << mask);
979 data[p..p + 4].copy_from_slice(&raw.to_le_bytes());
980 }
981 mask += 1;
982 }
983 }
984 }
985}
986
987#[derive(Debug, Clone, Copy)]
988enum EncodeToken {
989 Literal(u8),
990 Match { length: usize, offset: usize },
991}
992
993#[derive(Debug, Clone, Copy, Default)]
994struct EncoderMatchState {
995 old_offsets: [usize; 4],
996 last_offset: usize,
997 last_length: usize,
998}
999
1000#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1001enum EncodedMatch {
1002 LastLengthRepeat,
1003 RepeatOffset {
1004 index: usize,
1005 length_slot: usize,
1006 length_extra: usize,
1007 },
1008 Fresh {
1009 length_slot: usize,
1010 length_extra: usize,
1011 offset_slot: usize,
1012 offset_extra: usize,
1013 },
1014}
1015
1016impl EncoderMatchState {
1017 fn encode_match(&self, length: usize, offset: usize) -> Result<EncodedMatch> {
1018 if offset == self.last_offset && length == self.last_length && self.last_length != 0 {
1019 return Ok(EncodedMatch::LastLengthRepeat);
1020 }
1021 if let Some(index) = self
1022 .old_offsets
1023 .iter()
1024 .position(|&old_offset| old_offset == offset && old_offset != 0)
1025 {
1026 let (length_slot, length_extra) = length_slot_for_repeat_match(length)?;
1027 return Ok(EncodedMatch::RepeatOffset {
1028 index,
1029 length_slot,
1030 length_extra,
1031 });
1032 }
1033 let encoded_length =
1034 length
1035 .checked_sub(match_length_adjustment(offset))
1036 .ok_or(Error::InvalidData(
1037 "RAR 2.9 adjusted match length underflows",
1038 ))?;
1039 let (length_slot, length_extra) = length_slot_for_match(encoded_length)?;
1040 let (offset_slot, offset_extra) = offset_slot_for_match(offset)?;
1041 Ok(EncodedMatch::Fresh {
1042 length_slot,
1043 length_extra,
1044 offset_slot,
1045 offset_extra,
1046 })
1047 }
1048
1049 fn remember(&mut self, length: usize, offset: usize) {
1050 if offset == self.last_offset && length == self.last_length && self.last_length != 0 {
1051 return;
1052 }
1053 if let Some(index) = self
1054 .old_offsets
1055 .iter()
1056 .position(|&old_offset| old_offset == offset)
1057 {
1058 self.old_offsets[..=index].rotate_right(1);
1059 } else {
1060 self.old_offsets.rotate_right(1);
1061 self.old_offsets[0] = offset;
1062 }
1063 self.last_offset = offset;
1064 self.last_length = length;
1065 }
1066}
1067
1068fn encode_tokens(input: &[u8], history: &[u8], options: EncodeOptions) -> Vec<EncodeToken> {
1069 let mut tokens = Vec::new();
1070 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1071 let history = &history[history.len().saturating_sub(options.max_match_distance)..];
1072 let mut combined = Vec::with_capacity(history.len() + input.len());
1073 combined.extend_from_slice(history);
1074 combined.extend_from_slice(input);
1075 for history_pos in 0..history.len().saturating_sub(2) {
1076 insert_match_position(&combined, history_pos, &mut buckets);
1077 }
1078
1079 let mut pos = history.len();
1080 let end = combined.len();
1081 let mut state = EncoderMatchState::default();
1082 while pos < end {
1083 if let Some(candidate) = best_match(&combined, pos, end, &buckets, options, &state) {
1084 if should_lazy_emit_literal(&combined, pos, end, &buckets, options, &state, candidate) {
1085 tokens.push(EncodeToken::Literal(combined[pos]));
1086 insert_match_position(&combined, pos, &mut buckets);
1087 pos += 1;
1088 continue;
1089 }
1090 let MatchCandidate { length, offset, .. } = candidate;
1091 tokens.push(EncodeToken::Match { length, offset });
1092 state.remember(length, offset);
1093 for history_pos in pos..pos + length {
1094 insert_match_position(&combined, history_pos, &mut buckets);
1095 }
1096 pos += length;
1097 } else {
1098 tokens.push(EncodeToken::Literal(combined[pos]));
1099 insert_match_position(&combined, pos, &mut buckets);
1100 pos += 1;
1101 }
1102 }
1103 tokens
1104}
1105
1106fn should_lazy_emit_literal(
1107 input: &[u8],
1108 pos: usize,
1109 end: usize,
1110 buckets: &[Vec<usize>],
1111 options: EncodeOptions,
1112 state: &EncoderMatchState,
1113 current: MatchCandidate,
1114) -> bool {
1115 if !options.lazy_matching || pos + 1 >= end {
1116 return false;
1117 }
1118 let lookahead = options.lazy_lookahead.max(1);
1119 (1..=lookahead)
1120 .take_while(|offset| pos + offset < end)
1121 .any(|offset| {
1122 best_match(input, pos + offset, end, buckets, options, state).is_some_and(|next| {
1123 let skipped_literal_score = offset as isize * 8;
1124 next.score > current.score + skipped_literal_score
1125 })
1126 })
1127}
1128
1129#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1130struct MatchCandidate {
1131 length: usize,
1132 offset: usize,
1133 score: isize,
1134}
1135
1136fn encode_ppmd_tokens(input: &[u8], lz_escapes: bool) -> Vec<PpmdEncodeToken> {
1137 if !lz_escapes {
1138 return input
1139 .iter()
1140 .copied()
1141 .map(PpmdEncodeToken::Literal)
1142 .collect();
1143 }
1144
1145 let mut tokens = Vec::new();
1146 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1147 let mut pos = 0usize;
1148 while pos < input.len() {
1149 if let Some(length) = ppmd_offset_one_repeat(input, pos) {
1150 tokens.push(PpmdEncodeToken::RepeatOffsetOne { length });
1151 for history_pos in pos..pos + length {
1152 insert_match_position(input, history_pos, &mut buckets);
1153 }
1154 pos += length;
1155 continue;
1156 }
1157
1158 if let Some((length, offset)) = best_ppmd_match(input, pos, &buckets) {
1159 tokens.push(PpmdEncodeToken::Match { offset, length });
1160 for history_pos in pos..pos + length {
1161 insert_match_position(input, history_pos, &mut buckets);
1162 }
1163 pos += length;
1164 continue;
1165 }
1166
1167 tokens.push(PpmdEncodeToken::Literal(input[pos]));
1168 insert_match_position(input, pos, &mut buckets);
1169 pos += 1;
1170 }
1171 tokens
1172}
1173
1174fn ppmd_offset_one_repeat(input: &[u8], pos: usize) -> Option<usize> {
1175 if pos == 0 || input[pos] != input[pos - 1] {
1176 return None;
1177 }
1178 let mut length = 0usize;
1179 while pos + length < input.len()
1180 && input[pos + length] == input[pos - 1]
1181 && length < MAX_PPMD_REPEAT_LENGTH
1182 {
1183 length += 1;
1184 }
1185 (length >= 4).then_some(length)
1186}
1187
1188fn best_ppmd_match(input: &[u8], pos: usize, buckets: &[Vec<usize>]) -> Option<(usize, usize)> {
1189 let max_offset = pos.min(0x1000001).min(MAX_ENCODER_MATCH_OFFSET);
1190 let max_length = (input.len() - pos).min(MAX_PPMD_MATCH_LENGTH);
1191 if max_offset < 2 || max_length < MIN_PPMD_MATCH_LENGTH || pos + 2 >= input.len() {
1192 return None;
1193 }
1194 let bucket = &buckets[match_hash(input, pos)];
1195 let mut best = None;
1196 let mut checked = 0usize;
1197 for &candidate in bucket.iter().rev() {
1198 if candidate >= pos {
1199 continue;
1200 }
1201 let offset = pos - candidate;
1202 if offset > max_offset {
1203 break;
1204 }
1205 if offset < 2 {
1206 continue;
1207 }
1208 checked += 1;
1209 let mut length = 0usize;
1210 while length < max_length && input[pos + length] == input[pos + length - offset] {
1211 length += 1;
1212 }
1213 if length >= MIN_PPMD_MATCH_LENGTH
1214 && best.is_none_or(|(best_length, best_offset)| {
1215 length > best_length || (length == best_length && offset < best_offset)
1216 })
1217 {
1218 best = Some((length, offset));
1219 if length == max_length {
1220 break;
1221 }
1222 }
1223 if checked >= MAX_MATCH_CANDIDATES {
1224 break;
1225 }
1226 }
1227 best
1228}
1229
1230fn best_match(
1231 input: &[u8],
1232 pos: usize,
1233 end: usize,
1234 buckets: &[Vec<usize>],
1235 options: EncodeOptions,
1236 state: &EncoderMatchState,
1237) -> Option<MatchCandidate> {
1238 let max_offset = pos.min(options.max_match_distance);
1239 let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
1240 if options.max_match_candidates == 0
1241 || max_offset == 0
1242 || max_length < 4
1243 || pos + 2 >= input.len()
1244 {
1245 return None;
1246 }
1247 let bucket = &buckets[match_hash(input, pos)];
1248 let mut best = None;
1249 let mut checked = 0usize;
1250 for offset in state.old_offsets {
1251 if offset == 0 || offset > max_offset {
1252 continue;
1253 }
1254 let length = match_length(input, pos, offset, max_length);
1255 consider_match_candidate(&mut best, state, length, offset);
1256 }
1257 for &candidate in bucket.iter().rev() {
1258 if candidate >= pos {
1259 continue;
1260 }
1261 let offset = pos - candidate;
1262 if offset > max_offset {
1263 break;
1264 }
1265 checked += 1;
1266 let length = match_length(input, pos, offset, max_length);
1267 consider_match_candidate(&mut best, state, length, offset);
1268 if best.is_some_and(|candidate| candidate.length == max_length) {
1269 break;
1270 }
1271 if checked >= options.max_match_candidates {
1272 break;
1273 }
1274 }
1275 best
1276}
1277
1278fn match_length(input: &[u8], pos: usize, offset: usize, max_length: usize) -> usize {
1279 let mut length = 0usize;
1280 while length < max_length && input[pos + length] == input[pos + length - offset] {
1281 length += 1;
1282 }
1283 length
1284}
1285
1286fn consider_match_candidate(
1287 best: &mut Option<MatchCandidate>,
1288 state: &EncoderMatchState,
1289 length: usize,
1290 offset: usize,
1291) {
1292 if length < 4 {
1293 return;
1294 }
1295 let Ok(cost) = estimated_match_cost(state, length, offset) else {
1296 return;
1297 };
1298 let score = (length as isize * 8) - cost as isize;
1299 let candidate = MatchCandidate {
1300 length,
1301 offset,
1302 score,
1303 };
1304 if best.is_none_or(|best| {
1305 candidate.score > best.score
1306 || (candidate.score == best.score
1307 && (candidate.length > best.length
1308 || (candidate.length == best.length && candidate.offset < best.offset)))
1309 }) {
1310 *best = Some(candidate);
1311 }
1312}
1313
1314fn estimated_match_cost(state: &EncoderMatchState, length: usize, offset: usize) -> Result<usize> {
1315 match state.encode_match(length, offset)? {
1316 EncodedMatch::LastLengthRepeat => Ok(2),
1317 EncodedMatch::RepeatOffset { length_slot, .. } => {
1318 Ok(5 + usize::from(LENGTH_BITS[length_slot]))
1319 }
1320 EncodedMatch::Fresh {
1321 length_slot,
1322 offset_slot,
1323 ..
1324 } => {
1325 let low_offset_cost = usize::from(offset_slot > 9) * 4;
1326 Ok(8 + usize::from(LENGTH_BITS[length_slot])
1327 + usize::from(OFFSET_BITS[offset_slot])
1328 + low_offset_cost)
1329 }
1330 }
1331}
1332
1333fn match_length_adjustment(offset: usize) -> usize {
1334 usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
1335}
1336
1337fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
1338 if pos + 2 < input.len() {
1339 buckets[match_hash(input, pos)].push(pos);
1340 }
1341}
1342
1343fn match_hash(input: &[u8], pos: usize) -> usize {
1344 let value =
1345 ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1346 value & (MATCH_HASH_BUCKETS - 1)
1347}
1348
1349fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
1350 if length < 3 {
1351 return Err(Error::InvalidData("RAR 2.9 match length is too short"));
1352 }
1353 let adjusted = length - 3;
1354 for (slot, &base) in LENGTH_BASES.iter().enumerate() {
1355 let extra_bits = LENGTH_BITS[slot];
1356 let max = base
1357 + if extra_bits == 0 {
1358 0
1359 } else {
1360 (1usize << extra_bits) - 1
1361 };
1362 if adjusted >= base && adjusted <= max {
1363 return Ok((slot, adjusted - base));
1364 }
1365 }
1366 Err(Error::InvalidData("RAR 2.9 match length is too long"))
1367}
1368
1369fn length_slot_for_repeat_match(length: usize) -> Result<(usize, usize)> {
1370 if length < 2 {
1371 return Err(Error::InvalidData(
1372 "RAR 2.9 repeat match length is too short",
1373 ));
1374 }
1375 let adjusted = length - 2;
1376 for (slot, &base) in LENGTH_BASES.iter().enumerate() {
1377 let extra_bits = LENGTH_BITS[slot];
1378 let max = base
1379 + if extra_bits == 0 {
1380 0
1381 } else {
1382 (1usize << extra_bits) - 1
1383 };
1384 if adjusted >= base && adjusted <= max {
1385 return Ok((slot, adjusted - base));
1386 }
1387 }
1388 Err(Error::InvalidData(
1389 "RAR 2.9 repeat match length is too long",
1390 ))
1391}
1392
1393fn offset_slot_for_match(offset: usize) -> Result<(usize, usize)> {
1394 if offset == 0 {
1395 return Err(Error::InvalidData("RAR 2.9 match offset is zero"));
1396 }
1397 let adjusted = offset - 1;
1398 for (slot, &base) in OFFSET_BASES.iter().enumerate() {
1399 let extra_bits = OFFSET_BITS[slot];
1400 let max = base
1401 + if extra_bits == 0 {
1402 0
1403 } else {
1404 (1usize << extra_bits) - 1
1405 };
1406 if adjusted >= base && adjusted <= max {
1407 return Ok((slot, adjusted - base));
1408 }
1409 }
1410 Err(Error::InvalidData("RAR 2.9 match offset is too large"))
1411}
1412
1413#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1414struct LevelToken {
1415 symbol: usize,
1416 extra_bits: u8,
1417 extra_value: u8,
1418}
1419
1420impl LevelToken {
1421 const fn plain(symbol: usize) -> Self {
1422 Self {
1423 symbol,
1424 extra_bits: 0,
1425 extra_value: 0,
1426 }
1427 }
1428
1429 const fn repeat_previous_short(count: usize) -> Self {
1430 Self {
1431 symbol: 16,
1432 extra_bits: 3,
1433 extra_value: (count - 3) as u8,
1434 }
1435 }
1436
1437 const fn repeat_previous_long(count: usize) -> Self {
1438 Self {
1439 symbol: 17,
1440 extra_bits: 7,
1441 extra_value: (count - 11) as u8,
1442 }
1443 }
1444
1445 const fn zero_run_short(count: usize) -> Self {
1446 Self {
1447 symbol: 18,
1448 extra_bits: 3,
1449 extra_value: (count - 3) as u8,
1450 }
1451 }
1452
1453 const fn zero_run_long(count: usize) -> Self {
1454 Self {
1455 symbol: 19,
1456 extra_bits: 7,
1457 extra_value: (count - 11) as u8,
1458 }
1459 }
1460}
1461
1462fn encode_table_level_tokens(lengths: &[u8; TABLE_COUNT]) -> Vec<LevelToken> {
1463 encode_level_tokens(lengths)
1464}
1465
1466fn encode_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
1467 let mut tokens = Vec::new();
1468 let mut pos = 0usize;
1469 let mut previous = None;
1470 while pos < lengths.len() {
1471 let value = lengths[pos];
1472 let mut run = 1usize;
1473 while pos + run < lengths.len() && lengths[pos + run] == value {
1474 run += 1;
1475 }
1476
1477 if value == 0 {
1478 emit_zero_level_run(&mut tokens, run);
1479 previous = Some(0);
1480 pos += run;
1481 continue;
1482 }
1483
1484 if previous == Some(value) && run >= 3 {
1485 emit_repeat_level_run(&mut tokens, run);
1486 pos += run;
1487 continue;
1488 }
1489
1490 tokens.push(LevelToken::plain(value as usize));
1491 previous = Some(value);
1492 pos += 1;
1493 }
1494 tokens
1495}
1496
1497fn emit_repeat_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1498 while run != 0 {
1499 if run >= 11 {
1500 let mut chunk = run.min(138);
1501 if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1502 chunk -= 3;
1503 }
1504 tokens.push(LevelToken::repeat_previous_long(chunk));
1505 run -= chunk;
1506 } else if run >= 3 {
1507 let chunk = run.min(10);
1508 tokens.push(LevelToken::repeat_previous_short(chunk));
1509 run -= chunk;
1510 } else {
1511 break;
1512 }
1513 }
1514}
1515
1516fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1517 while run != 0 {
1518 if run >= 11 {
1519 let mut chunk = run.min(138);
1520 if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1521 chunk -= 3;
1522 }
1523 tokens.push(LevelToken::zero_run_long(chunk));
1524 run -= chunk;
1525 } else if run >= 3 {
1526 let chunk = run.min(10);
1527 tokens.push(LevelToken::zero_run_short(chunk));
1528 run -= chunk;
1529 } else {
1530 tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
1531 break;
1532 }
1533 }
1534}
1535
1536fn level_code_lengths(tokens: &[LevelToken]) -> [u8; LEVEL_COUNT] {
1537 let mut lengths = [0u8; LEVEL_COUNT];
1538 let mut used = [false; LEVEL_COUNT];
1539 for token in tokens {
1540 used[token.symbol] = true;
1541 }
1542 let used_count = used.iter().filter(|&&used| used).count();
1543 let len = huffman::bits_for_symbol_count(used_count);
1544 for (symbol, is_used) in used.into_iter().enumerate() {
1545 if is_used {
1546 lengths[symbol] = len;
1547 }
1548 }
1549 lengths
1550}
1551
1552#[derive(Debug, Clone, Copy)]
1553struct HuffmanCode {
1554 code: u16,
1555 len: u8,
1556}
1557
1558fn canonical_codes(lengths: &[u8]) -> Result<Vec<Option<HuffmanCode>>> {
1559 let mut count = [0u16; 16];
1560 for &len in lengths {
1561 if len > 15 {
1562 return Err(Error::InvalidData("RAR 2.9 Huffman length is too large"));
1563 }
1564 if len != 0 {
1565 count[len as usize] += 1;
1566 }
1567 }
1568 validate_huffman_counts(&count)?;
1569
1570 let mut next_code = [0u16; 16];
1571 let mut code = 0u16;
1572 for len in 1..=15 {
1573 code = (code + count[len - 1]) << 1;
1574 next_code[len] = code;
1575 }
1576
1577 let mut codes = vec![None; lengths.len()];
1578 for (symbol, &len) in lengths.iter().enumerate() {
1579 if len == 0 {
1580 continue;
1581 }
1582 let code = next_code[len as usize];
1583 next_code[len as usize] += 1;
1584 codes[symbol] = Some(HuffmanCode { code, len });
1585 }
1586 Ok(codes)
1587}
1588
1589#[derive(Debug, Clone)]
1590pub struct Unpack29 {
1591 bits: BitReader,
1592 levels: [u8; TABLE_COUNT],
1593 main: Huffman,
1594 offsets: Huffman,
1595 low_offsets: Huffman,
1596 lengths: Huffman,
1597 old_offsets: [usize; 4],
1598 last_offset: usize,
1599 last_length: usize,
1600 last_low_offset: usize,
1601 low_offset_repeats: usize,
1602 pending_match: Option<(usize, usize)>,
1603 in_lz_block: bool,
1604 block_mode: BlockMode,
1605 ppmd: PpmdDecoder,
1606 ppmd_esc: u8,
1607 filters: Vec<VmFilter>,
1608 programs: Vec<VmProgram>,
1609 last_filter: usize,
1610 base_offset: usize,
1611 output: Vec<u8>,
1612}
1613
1614#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1615enum BlockMode {
1616 Lz,
1617 Ppmd,
1618}
1619
1620#[derive(Debug, Clone)]
1621struct VmFilter {
1622 program: usize,
1623 start: usize,
1624 size: usize,
1625 regs: [u32; 7],
1626 global_data: Vec<u8>,
1627}
1628
1629#[derive(Debug, Clone)]
1630struct VmProgram {
1631 kind: VmProgramKind,
1632 block_size: usize,
1633 exec_count: u32,
1634 globals: Vec<u8>,
1635}
1636
1637#[derive(Debug, Clone)]
1638enum VmProgramKind {
1639 Standard(StandardFilter),
1640 Generic(rarvm::Program),
1641}
1642
1643#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1644enum StandardFilter {
1645 E8,
1646 E8E9,
1647 Itanium,
1648 Delta,
1649 Rgb,
1650 Audio,
1651}
1652
1653impl Unpack29 {
1654 pub fn new() -> Self {
1655 Self {
1656 bits: BitReader::new(),
1657 levels: [0; TABLE_COUNT],
1658 main: Huffman::empty(),
1659 offsets: Huffman::empty(),
1660 low_offsets: Huffman::empty(),
1661 lengths: Huffman::empty(),
1662 old_offsets: [0; 4],
1663 last_offset: 0,
1664 last_length: 0,
1665 last_low_offset: 0,
1666 low_offset_repeats: 0,
1667 pending_match: None,
1668 in_lz_block: false,
1669 block_mode: BlockMode::Lz,
1670 ppmd: PpmdDecoder::new(),
1671 ppmd_esc: 2,
1672 filters: Vec::new(),
1673 programs: Vec::new(),
1674 last_filter: 0,
1675 base_offset: 0,
1676 output: Vec::new(),
1677 }
1678 }
1679
1680 pub fn reset_non_solid(&mut self) {
1681 *self = Self::new();
1682 }
1683
1684 pub fn decode_non_solid_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1685 self.reset_non_solid();
1686 self.decode_member(input, output_size)
1687 }
1688
1689 pub fn decode_non_solid_member_to(
1690 &mut self,
1691 input: &[u8],
1692 output_size: usize,
1693 out: &mut impl Write,
1694 ) -> Result<()> {
1695 self.reset_non_solid();
1696 self.decode_member_to(input, output_size, out)
1697 }
1698
1699 pub fn decode_non_solid_member_from_reader(
1700 &mut self,
1701 input: &mut impl Read,
1702 output_size: usize,
1703 out: &mut impl Write,
1704 ) -> Result<()> {
1705 self.reset_non_solid();
1706 self.decode_member_from_reader(input, output_size, out)
1707 }
1708
1709 pub fn decode_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1710 let start = self.current_pos();
1711 let target = start
1712 .checked_add(output_size)
1713 .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1714 if !input.is_empty() {
1715 self.bits = BitReader::new();
1716 }
1717 self.bits.append(input);
1718 self.decode_until(target).map_err(|error| match error {
1719 Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1720 error => error,
1721 })?;
1722 self.finish_member().map_err(|error| match error {
1723 Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1724 error => error,
1725 })?;
1726 let out = self.filtered_range(start, target, start)?;
1727 self.trim_history(target, target);
1728 Ok(out)
1729 }
1730
1731 pub fn decode_member_to(
1732 &mut self,
1733 input: &[u8],
1734 output_size: usize,
1735 out: &mut impl Write,
1736 ) -> Result<()> {
1737 let start = self.current_pos();
1738 let final_target = start
1739 .checked_add(output_size)
1740 .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1741 if !input.is_empty() {
1742 self.bits = BitReader::new();
1743 }
1744 self.bits.append(input);
1745
1746 let mut flushed = start;
1747 let mut target = start.saturating_add(STREAM_CHUNK).min(final_target);
1748 while flushed < final_target {
1749 self.decode_until(target)?;
1750 let safe_end = self.safe_flush_end(flushed, target, final_target)?;
1751 if safe_end <= flushed {
1752 if target == final_target {
1753 return Err(Error::InvalidData(
1754 "RAR 2.9 VM filter extends beyond output",
1755 ));
1756 }
1757 target = self
1758 .current_pos()
1759 .saturating_add(STREAM_CHUNK)
1760 .min(final_target);
1761 continue;
1762 }
1763
1764 let decoded = self.filtered_range(flushed, safe_end, start)?;
1765 out.write_all(&decoded)
1766 .map_err(|_| Error::InvalidData("RAR 2.9 output write failed"))?;
1767 flushed = safe_end;
1768 self.trim_history(flushed, self.current_pos());
1769 target = self
1770 .current_pos()
1771 .saturating_add(STREAM_CHUNK)
1772 .min(final_target);
1773 }
1774 self.finish_member()?;
1775 Ok(())
1776 }
1777
1778 pub fn decode_member_from_reader(
1779 &mut self,
1780 input: &mut impl Read,
1781 output_size: usize,
1782 out: &mut impl Write,
1783 ) -> Result<()> {
1784 self.bits = BitReader::new();
1785 let start = self.current_pos();
1786 let final_target = start
1787 .checked_add(output_size)
1788 .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1789 let mut flushed = start;
1790 let mut target = start.saturating_add(STREAM_CHUNK).min(final_target);
1791 let mut input_done = false;
1792 let mut buffer = [0u8; INPUT_CHUNK];
1793
1794 while flushed < final_target {
1795 loop {
1796 let checkpoint = self.clone();
1797 match self.decode_until(target) {
1798 Ok(()) => break,
1799 Err(Error::NeedMoreInput) if !input_done => {
1800 *self = checkpoint;
1801 let read = input
1802 .read(&mut buffer)
1803 .map_err(|_| Error::InvalidData("RAR 2.9 input read failed"))?;
1804 if read == 0 {
1805 input_done = true;
1806 } else {
1807 self.bits.append(&buffer[..read]);
1808 }
1809 }
1810 Err(Error::NeedMoreInput) => {
1811 return Err(Error::InvalidData("RAR 2.9 bitstream is truncated"));
1812 }
1813 Err(error) => return Err(error),
1814 }
1815 }
1816
1817 let safe_end = self.safe_flush_end(flushed, target, final_target)?;
1818 if safe_end <= flushed {
1819 if target == final_target {
1820 return Err(Error::InvalidData(
1821 "RAR 2.9 VM filter extends beyond output",
1822 ));
1823 }
1824 target = self
1825 .current_pos()
1826 .saturating_add(STREAM_CHUNK)
1827 .min(final_target);
1828 continue;
1829 }
1830
1831 let decoded = self.filtered_range(flushed, safe_end, start)?;
1832 out.write_all(&decoded)
1833 .map_err(|_| Error::InvalidData("RAR 2.9 output write failed"))?;
1834 flushed = safe_end;
1835 self.trim_history(flushed, self.current_pos());
1836 target = self
1837 .current_pos()
1838 .saturating_add(STREAM_CHUNK)
1839 .min(final_target);
1840 }
1841 loop {
1842 let checkpoint = self.clone();
1843 match self.finish_member() {
1844 Ok(()) => break,
1845 Err(Error::NeedMoreInput) if !input_done => {
1846 *self = checkpoint;
1847 let read = input
1848 .read(&mut buffer)
1849 .map_err(|_| Error::InvalidData("RAR 2.9 input read failed"))?;
1850 if read == 0 {
1851 input_done = true;
1852 } else {
1853 self.bits.append(&buffer[..read]);
1854 }
1855 }
1856 Err(Error::NeedMoreInput) => {
1857 return Err(Error::InvalidData("RAR 2.9 bitstream is truncated"));
1858 }
1859 Err(error) => return Err(error),
1860 }
1861 }
1862 if self.block_mode == BlockMode::Ppmd {
1863 loop {
1864 let read = input
1865 .read(&mut buffer)
1866 .map_err(|_| Error::InvalidData("RAR 2.9 input read failed"))?;
1867 if read == 0 {
1868 break;
1869 }
1870 self.bits.append(&buffer[..read]);
1871 }
1872 }
1873 Ok(())
1874 }
1875
1876 fn decode_until(&mut self, target: usize) -> Result<()> {
1877 while self.current_pos() < target {
1878 self.drain_pending_match(target)?;
1879 if self.current_pos() >= target {
1880 break;
1881 }
1882 if !self.in_lz_block {
1883 self.read_tables()?;
1884 self.in_lz_block = true;
1885 }
1886 match self.block_mode {
1887 BlockMode::Lz => self.decode_lz(target)?,
1888 BlockMode::Ppmd => self.decode_ppmd(target)?,
1889 }
1890 }
1891 Ok(())
1892 }
1893
1894 fn read_tables(&mut self) -> Result<()> {
1895 self.bits.align_byte();
1896 if self.bits.peek_bit()? != 0 {
1897 let first_byte = self.bits.read_bits(8)? as u8;
1898 self.ppmd
1899 .decode_init(first_byte, &mut self.bits, &mut self.ppmd_esc)?;
1900 self.block_mode = BlockMode::Ppmd;
1901 return Ok(());
1902 }
1903 self.bits.read_bit()?;
1904 self.block_mode = BlockMode::Lz;
1905 let keep_tables = self.bits.read_bit()? != 0;
1906 self.last_low_offset = 0;
1907 self.low_offset_repeats = 0;
1908 if !keep_tables {
1909 self.levels = [0; TABLE_COUNT];
1910 }
1911
1912 let level_lengths = Self::read_level_lengths(&mut self.bits)?;
1913 let level_decoder = Huffman::from_lengths(&level_lengths)?;
1914 let mut new_levels = [0u8; TABLE_COUNT];
1915 let mut pos = 0usize;
1916 while pos < TABLE_COUNT {
1917 let symbol = level_decoder.decode(&mut self.bits)?;
1918 match symbol {
1919 0..=15 => {
1920 new_levels[pos] = (self.levels[pos].wrapping_add(symbol as u8)) & 0x0f;
1921 pos += 1;
1922 }
1923 16 => {
1924 if pos == 0 {
1925 return Err(Error::InvalidData("RAR 2.9 table repeat at start"));
1926 }
1927 let count = 3 + self.bits.read_bits(3)? as usize;
1928 let value = new_levels[pos - 1];
1929 fill_levels(&mut new_levels, &mut pos, count, value)?;
1930 }
1931 17 => {
1932 if pos == 0 {
1933 return Err(Error::InvalidData("RAR 2.9 long table repeat at start"));
1934 }
1935 let count = 11 + self.bits.read_bits(7)? as usize;
1936 let value = new_levels[pos - 1];
1937 fill_levels(&mut new_levels, &mut pos, count, value)?;
1938 }
1939 18 => {
1940 let count = 3 + self.bits.read_bits(3)? as usize;
1941 fill_levels(&mut new_levels, &mut pos, count, 0)?;
1942 }
1943 19 => {
1944 let count = 11 + self.bits.read_bits(7)? as usize;
1945 fill_levels(&mut new_levels, &mut pos, count, 0)?;
1946 }
1947 _ => return Err(Error::InvalidData("RAR 2.9 invalid level symbol")),
1948 }
1949 }
1950
1951 self.levels = new_levels;
1952 self.main = Huffman::from_lengths(&self.levels[..MAIN_COUNT])?;
1953 self.offsets = Huffman::from_lengths(&self.levels[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
1954 self.low_offsets = Huffman::from_lengths(
1955 &self.levels[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT],
1956 )?;
1957 self.lengths =
1958 Huffman::from_lengths(&self.levels[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..])?;
1959 Ok(())
1960 }
1961
1962 fn read_level_lengths(bits: &mut BitReader) -> Result<[u8; LEVEL_COUNT]> {
1963 let mut lengths = [0u8; LEVEL_COUNT];
1964 let mut pos = 0usize;
1965 while pos < LEVEL_COUNT {
1966 let value = bits.read_bits(4)? as u8;
1967 if value == 15 {
1968 let zero_count = bits.read_bits(4)? as usize;
1969 if zero_count == 0 {
1970 lengths[pos] = 15;
1971 pos += 1;
1972 } else {
1973 pos = pos.saturating_add(zero_count + 2).min(LEVEL_COUNT);
1974 }
1975 } else {
1976 lengths[pos] = value;
1977 pos += 1;
1978 }
1979 }
1980 Ok(lengths)
1981 }
1982
1983 fn decode_lz(&mut self, output_size: usize) -> Result<()> {
1984 while self.current_pos() < output_size {
1985 let symbol = self.main.decode(&mut self.bits)?;
1986 match symbol {
1987 0..=255 => self.output.push(symbol as u8),
1988 256 => {
1989 self.read_end_of_block()?;
1990 return Ok(());
1991 }
1992 257 => {
1993 self.read_vm_code()?;
1994 }
1995 258 => {
1996 if self.last_length != 0 {
1997 self.copy_match(self.last_length, self.last_offset, output_size)?;
1998 }
1999 }
2000 259..=262 => {
2001 let index = symbol - 259;
2002 let offset = self.old_offsets[index];
2003 let length_slot = self.lengths.decode(&mut self.bits)?;
2004 if length_slot >= LENGTH_COUNT {
2005 return Err(Error::InvalidData("RAR 2.9 invalid repeat length slot"));
2006 }
2007 let mut length = LENGTH_BASES[length_slot] + 2;
2008 if LENGTH_BITS[length_slot] != 0 {
2009 length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
2010 }
2011 self.rotate_old_offset(index);
2012 self.last_offset = offset;
2013 self.last_length = length;
2014 self.copy_match(length, offset, output_size)?;
2015 }
2016 263..=270 => {
2017 let index = symbol - 263;
2018 let mut offset = SHORT_BASES[index] + 1;
2019 if SHORT_BITS[index] != 0 {
2020 offset += self.bits.read_bits(SHORT_BITS[index])? as usize;
2021 }
2022 self.push_old_offset(offset);
2023 self.last_offset = offset;
2024 self.last_length = 2;
2025 self.copy_match(2, offset, output_size)?;
2026 }
2027 271..=298 => {
2028 let length_slot = symbol - 271;
2029 let mut length = LENGTH_BASES[length_slot] + 3;
2030 if LENGTH_BITS[length_slot] != 0 {
2031 length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
2032 }
2033 let offset = self.read_offset()?;
2034 if offset >= 0x2000 {
2035 length += 1;
2036 }
2037 if offset >= 0x40000 {
2038 length += 1;
2039 }
2040 self.push_old_offset(offset);
2041 self.last_offset = offset;
2042 self.last_length = length;
2043 self.copy_match(length, offset, output_size)?;
2044 }
2045 _ => return Err(Error::InvalidData("RAR 2.9 invalid main symbol")),
2046 }
2047 }
2048 Ok(())
2049 }
2050
2051 fn decode_ppmd(&mut self, output_size: usize) -> Result<()> {
2052 while self.current_pos() < output_size {
2053 let Some(symbol) = self.ppmd.decode_symbol(&mut self.bits)? else {
2054 return Ok(());
2055 };
2056 if symbol != self.ppmd_esc {
2057 self.output.push(symbol);
2058 continue;
2059 }
2060
2061 let Some(next) = self.ppmd.decode_symbol(&mut self.bits)? else {
2062 return Ok(());
2063 };
2064 match next {
2065 0 => {
2066 self.in_lz_block = false;
2067 return Ok(());
2068 }
2069 1 | 6..=u8::MAX => self.output.push(self.ppmd_esc),
2070 2 => return Ok(()),
2071 3 => {
2072 self.read_vm_code_ppmd()?;
2073 }
2074 4 => {
2075 let mut offset = 0usize;
2076 for _ in 0..3 {
2077 offset = (offset << 8) | self.read_ppmd_required_byte()? as usize;
2078 }
2079 offset += 2;
2080 let length = self.read_ppmd_required_byte()? as usize + 32;
2081 self.copy_match(length, offset, output_size)?;
2082 }
2083 5 => {
2084 let length = self.read_ppmd_required_byte()? as usize + 4;
2085 self.copy_match(length, 1, output_size)?;
2086 }
2087 }
2088 }
2089 Ok(())
2090 }
2091
2092 fn read_ppmd_required_byte(&mut self) -> Result<u8> {
2093 self.ppmd
2094 .decode_symbol(&mut self.bits)?
2095 .ok_or(Error::InvalidData("RAR 2.9 PPMd stream ended early"))
2096 }
2097
2098 fn finish_ppmd_member(&mut self) -> Result<()> {
2099 if self.block_mode != BlockMode::Ppmd {
2100 return Ok(());
2101 }
2102 let Some(symbol) = self.ppmd.decode_symbol(&mut self.bits)? else {
2103 return Ok(());
2104 };
2105 if symbol != self.ppmd_esc {
2106 return Err(Error::InvalidData("RAR 2.9 PPMd member has trailing data"));
2107 }
2108 let Some(next) = self.ppmd.decode_symbol(&mut self.bits)? else {
2109 return Ok(());
2110 };
2111 match next {
2112 2 => {
2113 self.in_lz_block = false;
2114 Ok(())
2115 }
2116 0 => {
2117 self.in_lz_block = false;
2118 Ok(())
2119 }
2120 _ => Err(Error::InvalidData("RAR 2.9 PPMd member has trailing data")),
2121 }
2122 }
2123
2124 fn finish_member(&mut self) -> Result<()> {
2125 match self.block_mode {
2126 BlockMode::Lz => self.finish_lz_member(),
2127 BlockMode::Ppmd => self.finish_ppmd_member(),
2128 }
2129 }
2130
2131 fn finish_lz_member(&mut self) -> Result<()> {
2132 if !self.in_lz_block {
2133 return Ok(());
2134 }
2135 let symbol = self.main.decode(&mut self.bits)?;
2136 if symbol != 256 {
2137 return Err(Error::InvalidData("RAR 2.9 LZ member has trailing data"));
2138 }
2139 self.read_end_of_block()
2140 }
2141
2142 fn read_end_of_block(&mut self) -> Result<()> {
2143 let new_table = if self.bits.read_bit()? != 0 {
2144 true
2145 } else {
2146 self.bits.read_bit()? != 0
2147 };
2148 self.in_lz_block = !new_table;
2149 Ok(())
2150 }
2151
2152 fn read_offset(&mut self) -> Result<usize> {
2153 let slot = self.offsets.decode(&mut self.bits)?;
2154 if slot >= OFFSET_COUNT {
2155 return Err(Error::InvalidData("RAR 2.9 invalid offset slot"));
2156 }
2157 let mut offset = OFFSET_BASES[slot] + 1;
2158 let extra_bits = OFFSET_BITS[slot];
2159 if extra_bits != 0 {
2160 if slot > 9 {
2161 if extra_bits > 4 {
2162 offset += (self.bits.read_bits(extra_bits - 4)? as usize) << 4;
2163 }
2164 if self.low_offset_repeats > 0 {
2165 self.low_offset_repeats -= 1;
2166 offset += self.last_low_offset;
2167 } else {
2168 let low = self.low_offsets.decode(&mut self.bits)?;
2169 if low == 16 {
2170 self.low_offset_repeats = 15;
2171 offset += self.last_low_offset;
2172 } else if low < 16 {
2173 self.last_low_offset = low;
2174 offset += low;
2175 } else {
2176 return Err(Error::InvalidData("RAR 2.9 invalid low offset symbol"));
2177 }
2178 }
2179 } else {
2180 offset += self.bits.read_bits(extra_bits)? as usize;
2181 }
2182 }
2183 Ok(offset)
2184 }
2185
2186 fn read_vm_code(&mut self) -> Result<()> {
2187 let first_byte = self.bits.read_bits(8)?;
2188 let mut len = (first_byte & 7) + 1;
2189 if len == 7 {
2190 len = self.bits.read_bits(8)? + 7;
2191 } else if len == 8 {
2192 len = self.bits.read_bits(16)?;
2193 }
2194 let mut data = Vec::with_capacity(len as usize);
2195 for _ in 0..len {
2196 data.push(self.bits.read_bits(8)? as u8);
2197 }
2198
2199 self.parse_vm_code(first_byte, data)
2200 }
2201
2202 fn read_vm_code_ppmd(&mut self) -> Result<()> {
2203 let first_byte = u32::from(self.read_ppmd_required_byte()?);
2204 let mut len = (first_byte & 7) + 1;
2205 if len == 7 {
2206 len = u32::from(self.read_ppmd_required_byte()?) + 7;
2207 } else if len == 8 {
2208 len = (u32::from(self.read_ppmd_required_byte()?) << 8)
2209 | u32::from(self.read_ppmd_required_byte()?);
2210 }
2211 let mut data = Vec::with_capacity(len as usize);
2212 for _ in 0..len {
2213 data.push(self.read_ppmd_required_byte()?);
2214 }
2215
2216 self.parse_vm_code(first_byte, data)
2217 }
2218
2219 fn parse_vm_code(&mut self, first_byte: u32, data: Vec<u8>) -> Result<()> {
2220 let mut vm = BitReader::from_bytes(&data);
2221 let program_index = if first_byte & 0x80 != 0 {
2222 let value = vm.read_encoded_u32()?;
2223 if value == 0 {
2224 self.filters.clear();
2225 self.programs.clear();
2226 0
2227 } else {
2228 usize::try_from(value - 1)
2229 .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?
2230 }
2231 } else {
2232 self.last_filter
2233 };
2234 if program_index > self.programs.len() {
2235 return Err(Error::InvalidData("RAR 2.9 VM program index is invalid"));
2236 }
2237 self.last_filter = program_index;
2238 let new_program = program_index == self.programs.len();
2239
2240 let mut block_start = vm.read_encoded_u32()? as usize;
2241 if first_byte & 0x40 != 0 {
2242 block_start += 258;
2243 }
2244 block_start = self
2245 .current_pos()
2246 .checked_add(block_start)
2247 .ok_or(Error::InvalidData("RAR 2.9 VM block start overflows"))?;
2248
2249 let mut block_size = self
2250 .programs
2251 .get(program_index)
2252 .map(|program| program.block_size)
2253 .unwrap_or(0);
2254 if first_byte & 0x20 != 0 {
2255 block_size = vm.read_encoded_u32()? as usize;
2256 }
2257
2258 let mut regs = [0u32; 7];
2259 regs[3] = 0x3c000;
2260 regs[4] = block_size as u32;
2261 if let Some(program) = self.programs.get(program_index) {
2262 regs[5] = program.exec_count;
2263 }
2264 if first_byte & 0x10 != 0 {
2265 let mask = vm.read_bits(7)?;
2266 for (index, reg) in regs.iter_mut().enumerate() {
2267 if mask & (1 << index) != 0 {
2268 *reg = vm.read_encoded_u32()?;
2269 }
2270 }
2271 }
2272
2273 if new_program {
2274 if self.programs.len() >= MAX_VM_PROGRAMS {
2275 return Err(Error::InvalidData("RAR 2.9 VM program limit exceeded"));
2276 }
2277 let code_size = vm.read_encoded_u32()? as usize;
2278 if code_size == 0 {
2279 return Err(Error::InvalidData("RAR 2.9 VM code is empty"));
2280 }
2281 if code_size > MAX_VM_CODE_SIZE {
2282 return Err(Error::InvalidData("RAR 2.9 VM code is too large"));
2283 }
2284 let mut code = Vec::with_capacity(code_size);
2285 for _ in 0..code_size {
2286 code.push(vm.read_bits(8)? as u8);
2287 }
2288 let kind = identify_standard_filter(&code)
2289 .map(VmProgramKind::Standard)
2290 .map_or_else(
2291 || rarvm::Program::parse(&code).map(VmProgramKind::Generic),
2292 Ok,
2293 )?;
2294 self.programs.push(VmProgram {
2295 kind,
2296 block_size,
2297 exec_count: 0,
2298 globals: Vec::new(),
2299 });
2300 } else if let Some(program) = self.programs.get_mut(program_index) {
2301 program.exec_count = program.exec_count.wrapping_add(1);
2302 program.block_size = block_size;
2303 }
2304
2305 let mut global_data = Vec::new();
2306 if first_byte & 0x08 != 0 {
2307 let data_size = vm.read_encoded_u32()? as usize;
2308 global_data.reserve(data_size.min(MAX_VM_GLOBAL_DATA));
2309 for _ in 0..data_size {
2310 let byte = vm.read_bits(8)? as u8;
2311 if global_data.len() < MAX_VM_GLOBAL_DATA {
2312 global_data.push(byte);
2313 }
2314 }
2315 }
2316
2317 if self.filters.len() >= MAX_VM_FILTERS {
2318 return Err(Error::InvalidData("RAR 2.9 VM filter limit exceeded"));
2319 }
2320 self.filters.push(VmFilter {
2321 program: program_index,
2322 start: block_start,
2323 size: block_size,
2324 regs,
2325 global_data,
2326 });
2327 Ok(())
2328 }
2329
2330 fn filtered_range(&mut self, start: usize, end: usize, member_start: usize) -> Result<Vec<u8>> {
2331 let mut out = Vec::with_capacity(end - start);
2332 let mut pos = start;
2333 let filters: Vec<_> = self
2334 .filters
2335 .iter()
2336 .enumerate()
2337 .filter_map(|(index, filter)| {
2338 (filter.start >= start && filter.start + filter.size <= end).then_some(index)
2339 })
2340 .collect();
2341 for filter_index in filters {
2342 let (program_index, filter_start, filter_size, regs, global_data) = {
2343 let filter = self
2344 .filters
2345 .get(filter_index)
2346 .ok_or(Error::InvalidData("RAR 2.9 VM filter is missing"))?;
2347 (
2348 filter.program,
2349 filter.start,
2350 filter.size,
2351 filter.regs,
2352 filter.global_data.clone(),
2353 )
2354 };
2355 if filter_start < pos {
2356 continue;
2357 }
2358 out.extend_from_slice(self.raw_range(pos, filter_start)?);
2359 let mut block = self
2360 .raw_range(filter_start, filter_start + filter_size)?
2361 .to_vec();
2362 let file_offset = filter_start
2363 .checked_sub(member_start)
2364 .ok_or(Error::InvalidData("RAR 2.9 VM filter starts before file"))?
2365 as u32;
2366 let program = self
2367 .programs
2368 .get_mut(program_index)
2369 .ok_or(Error::InvalidData("RAR 2.9 VM program is missing"))?;
2370 match &program.kind {
2371 VmProgramKind::Standard(standard) => {
2372 apply_standard_filter(*standard, &mut block, file_offset, ®s)?
2373 }
2374 VmProgramKind::Generic(generic) => {
2375 let globals = if global_data.is_empty() {
2376 program.globals.as_slice()
2377 } else {
2378 global_data.as_slice()
2379 };
2380 let result = generic.execute(rarvm::Invocation {
2381 input: &block,
2382 regs,
2383 global_data: globals,
2384 file_offset: file_offset as u64,
2385 exec_count: program.exec_count,
2386 })?;
2387 program.globals = result.globals;
2388 block = result.output;
2389 }
2390 }
2391 out.extend_from_slice(&block);
2392 pos = filter_start + filter_size;
2393 }
2394 out.extend_from_slice(self.raw_range(pos, end)?);
2395 Ok(out)
2396 }
2397
2398 fn safe_flush_end(&self, start: usize, end: usize, final_target: usize) -> Result<usize> {
2399 let current = self.current_pos();
2400 let mut safe_end = end;
2401 for filter in &self.filters {
2402 let filter_end = filter
2403 .start
2404 .checked_add(filter.size)
2405 .ok_or(Error::InvalidData("RAR 2.9 VM filter size overflows"))?;
2406 if filter.start >= safe_end || filter_end <= start {
2407 continue;
2408 }
2409 if filter_end > final_target {
2410 return Err(Error::InvalidData(
2411 "RAR 2.9 VM filter extends beyond output",
2412 ));
2413 }
2414 if filter_end > current {
2415 safe_end = safe_end.min(filter.start);
2416 }
2417 }
2418 Ok(safe_end)
2419 }
2420
2421 fn copy_match(&mut self, length: usize, offset: usize, output_size: usize) -> Result<()> {
2422 let offset = if offset == 0 { 1 } else { offset };
2426 let current = self.current_pos();
2427 if offset > current {
2428 return Err(Error::InvalidData("RAR 2.9 match distance is out of range"));
2429 }
2430 for index in 0..length {
2431 if self.current_pos() >= output_size {
2432 self.pending_match = Some((length - index, offset));
2433 break;
2434 }
2435 let src = self.current_pos() - offset;
2436 let byte = *self
2437 .raw_byte(src)
2438 .ok_or(Error::InvalidData("RAR 2.9 match distance is out of range"))?;
2439 self.output.push(byte);
2440 }
2441 Ok(())
2442 }
2443
2444 fn drain_pending_match(&mut self, output_size: usize) -> Result<()> {
2445 let Some((length, offset)) = self.pending_match.take() else {
2446 return Ok(());
2447 };
2448 self.copy_match(length, offset, output_size)
2449 }
2450
2451 fn push_old_offset(&mut self, offset: usize) {
2452 self.old_offsets[3] = self.old_offsets[2];
2453 self.old_offsets[2] = self.old_offsets[1];
2454 self.old_offsets[1] = self.old_offsets[0];
2455 self.old_offsets[0] = offset;
2456 }
2457
2458 fn rotate_old_offset(&mut self, index: usize) {
2459 let value = self.old_offsets[index];
2460 for i in (1..=index).rev() {
2461 self.old_offsets[i] = self.old_offsets[i - 1];
2462 }
2463 self.old_offsets[0] = value;
2464 }
2465
2466 fn current_pos(&self) -> usize {
2467 self.base_offset + self.output.len()
2468 }
2469
2470 fn raw_byte(&self, position: usize) -> Option<&u8> {
2471 self.output.get(position.checked_sub(self.base_offset)?)
2472 }
2473
2474 fn raw_range(&self, start: usize, end: usize) -> Result<&[u8]> {
2475 if start < self.base_offset || end < start {
2476 return Err(Error::InvalidData(
2477 "RAR 2.9 retained history is unavailable",
2478 ));
2479 }
2480 let rel_start = start - self.base_offset;
2481 let rel_end = end - self.base_offset;
2482 self.output
2483 .get(rel_start..rel_end)
2484 .ok_or(Error::InvalidData(
2485 "RAR 2.9 retained history is unavailable",
2486 ))
2487 }
2488
2489 fn trim_history(&mut self, flushed_pos: usize, current_pos: usize) {
2490 let keep_from = current_pos.saturating_sub(MAX_HISTORY);
2491 let keep_from = keep_from.min(flushed_pos);
2492 if keep_from <= self.base_offset {
2493 return;
2494 }
2495 let drain = keep_from - self.base_offset;
2496 self.output.drain(..drain);
2497 self.base_offset = keep_from;
2498 self.filters
2499 .retain(|filter| filter.start + filter.size > self.base_offset);
2500 }
2501}
2502
2503impl Default for Unpack29 {
2504 fn default() -> Self {
2505 Self::new()
2506 }
2507}
2508
2509fn fill_levels(levels: &mut [u8], pos: &mut usize, count: usize, value: u8) -> Result<()> {
2510 let end = pos
2511 .checked_add(count)
2512 .ok_or(Error::InvalidData("RAR 2.9 table run overflows"))?;
2513 let end = end.min(levels.len());
2514 for item in &mut levels[*pos..end] {
2515 *item = value;
2516 }
2517 *pos = end;
2518 Ok(())
2519}
2520
2521#[derive(Debug, Clone)]
2522struct Huffman {
2523 symbols: Vec<HuffmanSymbol>,
2524 first_code: [u16; 16],
2525 first_index: [usize; 16],
2526 counts: [u16; 16],
2527}
2528
2529#[derive(Debug, Clone)]
2530struct HuffmanSymbol {
2531 code: u16,
2532 len: u8,
2533 symbol: usize,
2534}
2535
2536impl Huffman {
2537 fn empty() -> Self {
2538 Self {
2539 symbols: Vec::new(),
2540 first_code: [0; 16],
2541 first_index: [0; 16],
2542 counts: [0; 16],
2543 }
2544 }
2545
2546 fn from_lengths(lengths: &[u8]) -> Result<Self> {
2547 let mut count = [0u16; 16];
2548 for &len in lengths {
2549 if len > 15 {
2550 return Err(Error::InvalidData("RAR 2.9 Huffman length is too large"));
2551 }
2552 if len != 0 {
2553 count[len as usize] += 1;
2554 }
2555 }
2556 if count.iter().all(|&value| value == 0) {
2557 return Ok(Self::empty());
2558 }
2559 validate_huffman_counts(&count)?;
2560
2561 let mut first_code = [0u16; 16];
2562 let mut next_code = [0u16; 16];
2563 let mut code = 0u16;
2564 for len in 1..=15 {
2565 code = (code + count[len - 1]) << 1;
2566 first_code[len] = code;
2567 next_code[len] = code;
2568 }
2569
2570 let mut first_index = [0usize; 16];
2571 let mut index = 0usize;
2572 for len in 1..=15 {
2573 first_index[len] = index;
2574 index += usize::from(count[len]);
2575 }
2576
2577 let mut symbols = Vec::new();
2578 for (symbol, &len) in lengths.iter().enumerate() {
2579 if len == 0 {
2580 continue;
2581 }
2582 let code = next_code[len as usize];
2583 next_code[len as usize] += 1;
2584 symbols.push(HuffmanSymbol { code, len, symbol });
2585 }
2586 symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
2587 Ok(Self {
2588 symbols,
2589 first_code,
2590 first_index,
2591 counts: count,
2592 })
2593 }
2594
2595 fn decode(&self, bits: &mut BitReader) -> Result<usize> {
2596 let mut code = 0u16;
2597 if self.symbols.is_empty() {
2598 return Err(Error::InvalidData("RAR 2.9 empty Huffman table"));
2599 }
2600 for len in 1..=15 {
2601 code = (code << 1) | bits.read_bit()? as u16;
2602 let count = self.counts[len];
2603 if count != 0 {
2604 let first = self.first_code[len];
2605 let offset = code.wrapping_sub(first);
2606 if offset < count {
2607 let index = self.first_index[len] + usize::from(offset);
2608 return Ok(self.symbols[index].symbol);
2609 }
2610 }
2611 }
2612 Err(Error::InvalidData("RAR 2.9 invalid Huffman code"))
2613 }
2614}
2615
2616fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
2617 let mut available = 1i32;
2618 for &len_count in count.iter().skip(1) {
2619 available = (available << 1) - i32::from(len_count);
2620 if available < 0 {
2621 return Err(Error::InvalidData("RAR 2.9 oversubscribed Huffman table"));
2622 }
2623 }
2624 Ok(())
2625}
2626
2627#[derive(Debug, Clone)]
2628struct BitReader {
2629 input: Vec<u8>,
2630 bit_pos: usize,
2631}
2632
2633impl BitReader {
2634 fn new() -> Self {
2635 Self {
2636 input: Vec::new(),
2637 bit_pos: 0,
2638 }
2639 }
2640
2641 fn from_bytes(input: &[u8]) -> Self {
2642 Self {
2643 input: input.to_vec(),
2644 bit_pos: 0,
2645 }
2646 }
2647
2648 fn append(&mut self, input: &[u8]) {
2649 self.compact();
2650 self.input.extend_from_slice(input);
2651 }
2652
2653 fn compact(&mut self) {
2654 let bytes = self.bit_pos / 8;
2655 if bytes == 0 {
2656 return;
2657 }
2658 self.input.drain(..bytes);
2659 self.bit_pos -= bytes * 8;
2660 }
2661
2662 fn align_byte(&mut self) {
2663 self.bit_pos = (self.bit_pos + 7) & !7;
2664 }
2665
2666 fn peek_bit(&self) -> Result<u8> {
2667 self.peek_bits(1).map(|value| value as u8)
2668 }
2669
2670 fn read_bit(&mut self) -> Result<u8> {
2671 self.read_bits(1).map(|value| value as u8)
2672 }
2673
2674 fn read_bits(&mut self, count: u8) -> Result<u32> {
2675 let value = self.peek_bits(count)?;
2676 self.bit_pos += count as usize;
2677 Ok(value)
2678 }
2679
2680 fn peek_bits(&self, count: u8) -> Result<u32> {
2681 if count > 24 {
2682 return Err(Error::InvalidData("RAR 2.9 bit read is too wide"));
2683 }
2684 let mut value = 0u32;
2685 for i in 0..count as usize {
2686 let bit_index = self.bit_pos + i;
2687 let byte = *self.input.get(bit_index / 8).ok_or(Error::NeedMoreInput)?;
2688 let bit = (byte >> (7 - (bit_index % 8))) & 1;
2689 value = (value << 1) | bit as u32;
2690 }
2691 Ok(value)
2692 }
2693
2694 fn read_encoded_u32(&mut self) -> Result<u32> {
2695 match self.read_bits(2)? {
2696 0 => self.read_bits(4),
2697 1 => {
2698 let high = self.read_bits(8)?;
2699 if high >= 16 {
2700 Ok(high)
2701 } else {
2702 Ok(0xffff_ff00 | (high << 4) | self.read_bits(4)?)
2703 }
2704 }
2705 2 => self.read_bits(16),
2706 _ => Ok((self.read_bits(16)? << 16) | self.read_bits(16)?),
2707 }
2708 }
2709}
2710
2711impl PpmdByteReader for BitReader {
2712 fn read_ppmd_byte(&mut self) -> Result<u8> {
2713 self.read_bits(8).map(|value| value as u8)
2714 }
2715}
2716
2717#[derive(Default)]
2718struct BitWriter {
2719 bytes: Vec<u8>,
2720 bit_pos: usize,
2721}
2722
2723impl BitWriter {
2724 fn write_bits(&mut self, value: u32, count: u8) {
2725 for shift in (0..count).rev() {
2726 self.write_bit(((value >> shift) & 1) != 0);
2727 }
2728 }
2729
2730 fn write_encoded_u32(&mut self, value: u32) {
2731 if value < 16 {
2732 self.write_bits(0, 2);
2733 self.write_bits(value, 4);
2734 } else if value < 256 {
2735 self.write_bits(1, 2);
2736 self.write_bits(value, 8);
2737 } else if value <= 0xffff {
2738 self.write_bits(2, 2);
2739 self.write_bits(value, 16);
2740 } else {
2741 self.write_bits(3, 2);
2742 self.write_bits(value >> 16, 16);
2743 self.write_bits(value & 0xffff, 16);
2744 }
2745 }
2746
2747 fn write_bit(&mut self, bit: bool) {
2748 if self.bit_pos.is_multiple_of(8) {
2749 self.bytes.push(0);
2750 }
2751 if bit {
2752 let shift = 7 - (self.bit_pos % 8);
2753 *self.bytes.last_mut().unwrap() |= 1 << shift;
2754 }
2755 self.bit_pos += 1;
2756 }
2757
2758 fn finish(self) -> Vec<u8> {
2759 self.bytes
2760 }
2761}
2762
2763fn identify_standard_filter(code: &[u8]) -> Option<StandardFilter> {
2764 if code.iter().fold(0u8, |acc, &byte| acc ^ byte) != 0 {
2765 return None;
2766 }
2767 match (code.len(), crc32(code)) {
2768 (53, 0xad57_6887) => Some(StandardFilter::E8),
2769 (57, 0x3cd7_e57e) => Some(StandardFilter::E8E9),
2770 (120, 0x3769_893f) => Some(StandardFilter::Itanium),
2771 (29, 0x0e06_077d) => Some(StandardFilter::Delta),
2772 (149, 0x1c2c_5dc8) => Some(StandardFilter::Rgb),
2773 (216, 0xbc85_e701) => Some(StandardFilter::Audio),
2774 _ => None,
2775 }
2776}
2777
2778fn apply_standard_filter(
2779 filter: StandardFilter,
2780 data: &mut Vec<u8>,
2781 file_offset: u32,
2782 regs: &[u32; 7],
2783) -> Result<()> {
2784 match filter {
2785 StandardFilter::E8 => {
2786 filters::decode_in_place(FilterOp::E8, data, file_offset, rar29_delta_messages())?
2787 }
2788 StandardFilter::E8E9 => {
2789 filters::decode_in_place(FilterOp::E8E9, data, file_offset, rar29_delta_messages())?
2790 }
2791 StandardFilter::Itanium => itanium_decode(data, file_offset),
2792 StandardFilter::Delta => {
2793 let channels = regs[0] as usize;
2794 if channels == 0 {
2795 return Err(Error::InvalidData("RAR 2.9 DELTA filter has zero channels"));
2796 }
2797 filters::decode_in_place(
2798 FilterOp::Delta { channels },
2799 data,
2800 0,
2801 rar29_delta_messages(),
2802 )?;
2803 }
2804 StandardFilter::Rgb => {
2805 if regs[0] < 3 || regs[1] > 2 {
2806 return Err(Error::InvalidData(
2807 "RAR 2.9 RGB filter parameters are invalid",
2808 ));
2809 }
2810 let width = regs[0] as usize - 3;
2811 let pos_r = regs[1] as usize;
2812 *data = rgb_decode(data, width, pos_r)?;
2813 }
2814 StandardFilter::Audio => {
2815 let channels = regs[0] as usize;
2816 if channels == 0 {
2817 return Err(Error::InvalidData("RAR 2.9 AUDIO filter has zero channels"));
2818 }
2819 *data = audio_decode(data, channels)?;
2820 }
2821 }
2822 Ok(())
2823}
2824
2825fn itanium_decode(data: &mut [u8], file_offset: u32) {
2826 if data.len() <= 21 {
2827 return;
2828 }
2829 let base_offset = file_offset >> 4;
2830 let block_count = (data.len() - 21).div_ceil(16);
2834 for block in 0..block_count {
2835 let pos = block * 16;
2836 let file_offset = base_offset.wrapping_add(block as u32);
2837 let mut mask = (0x334b_0000u32 >> (data[pos] & 0x1e)) & 3;
2838 if mask != 0 {
2839 mask += 1;
2840 while mask <= 4 {
2841 let p = pos + (mask as usize * 5 - 8);
2842 if ((data[p + 3] >> mask) & 15) == 5 {
2843 let raw = u32::from_le_bytes([data[p], data[p + 1], data[p + 2], data[p + 3]]);
2844 let mut value = raw >> mask;
2845 value = value.wrapping_sub(file_offset) & 0x000f_ffff;
2846 let raw = (raw & !(0x000f_ffff << mask)) | (value << mask);
2847 data[p..p + 4].copy_from_slice(&raw.to_le_bytes());
2848 }
2849 mask += 1;
2850 }
2851 }
2852 }
2853}
2854
2855fn rgb_decode(data: &[u8], width: usize, pos_r: usize) -> Result<Vec<u8>> {
2856 if data.len() < 3 || width == 0 || !width.is_multiple_of(3) || width > data.len() || pos_r > 2 {
2857 return Err(Error::InvalidData(
2858 "RAR 2.9 RGB filter parameters are invalid",
2859 ));
2860 }
2861 let mut out = vec![0u8; data.len()];
2862 let mut src = 0usize;
2863 for channel in 0..3 {
2864 let mut prev = 0u8;
2865 let mut i = channel;
2866 while i < data.len() {
2867 let predicted = if i >= width + 3 {
2868 rgb_predict(prev, out[i - width], out[i - width - 3])
2869 } else {
2870 prev
2871 };
2872 let encoded = *data
2873 .get(src)
2874 .ok_or(Error::InvalidData("RAR 2.9 RGB filter source is truncated"))?;
2875 prev = predicted.wrapping_sub(encoded);
2876 out[i] = prev;
2877 src += 1;
2878 i += 3;
2879 }
2880 }
2881 for i in (pos_r..data.len().saturating_sub(2)).step_by(3) {
2882 let green = out[i + 1];
2883 out[i] = out[i].wrapping_add(green);
2884 out[i + 2] = out[i + 2].wrapping_add(green);
2885 }
2886 Ok(out)
2887}
2888
2889fn rgb_predict(prev: u8, upper: u8, upper_left: u8) -> u8 {
2890 let predicted = i32::from(prev) + i32::from(upper) - i32::from(upper_left);
2891 let pa = (predicted - i32::from(prev)).abs();
2892 let pb = (predicted - i32::from(upper)).abs();
2893 let pc = (predicted - i32::from(upper_left)).abs();
2894 if pa <= pb && pa <= pc {
2895 prev
2896 } else if pb <= pc {
2897 upper
2898 } else {
2899 upper_left
2900 }
2901}
2902
2903fn audio_decode(data: &[u8], channels: usize) -> Result<Vec<u8>> {
2904 let mut out = vec![0u8; data.len()];
2905 let mut src = 0usize;
2906 for channel in 0..channels {
2907 let mut prev_byte = 0u32;
2908 let mut prev_delta = 0i32;
2909 let mut d1 = 0i32;
2910 let mut d2 = 0i32;
2911 let mut k1 = 0i32;
2912 let mut k2 = 0i32;
2913 let mut k3 = 0i32;
2914 let mut dif = [0u32; 7];
2915 let mut byte_count = 0usize;
2916 let mut i = channel;
2917 while i < data.len() {
2918 let d3 = d2;
2919 d2 = prev_delta - d1;
2920 d1 = prev_delta;
2921 let predicted = ((8 * prev_byte as i32 + k1 * d1 + k2 * d2 + k3 * d3) >> 3) & 0xff;
2922 let encoded = *data.get(src).ok_or(Error::InvalidData(
2923 "RAR 2.9 AUDIO filter source is truncated",
2924 ))?;
2925 src += 1;
2926 let decoded = (predicted as u8).wrapping_sub(encoded);
2927 out[i] = decoded;
2928 prev_delta = decoded.wrapping_sub(prev_byte as u8) as i8 as i32;
2929 prev_byte = decoded as u32;
2930 let d = (encoded as i8 as i32) << 3;
2931 dif[0] += d.unsigned_abs();
2932 dif[1] += (d - d1).unsigned_abs();
2933 dif[2] += (d + d1).unsigned_abs();
2934 dif[3] += (d - d2).unsigned_abs();
2935 dif[4] += (d + d2).unsigned_abs();
2936 dif[5] += (d - d3).unsigned_abs();
2937 dif[6] += (d + d3).unsigned_abs();
2938 if byte_count & 0x1f == 0 {
2939 let mut min = dif[0];
2940 let mut min_index = 0usize;
2941 dif[0] = 0;
2942 for (index, value) in dif.iter_mut().enumerate().skip(1) {
2943 if *value < min {
2944 min = *value;
2945 min_index = index;
2946 }
2947 *value = 0;
2948 }
2949 match min_index {
2950 1 if k1 >= -16 => k1 -= 1,
2951 2 if k1 < 16 => k1 += 1,
2952 3 if k2 >= -16 => k2 -= 1,
2953 4 if k2 < 16 => k2 += 1,
2954 5 if k3 >= -16 => k3 -= 1,
2955 6 if k3 < 16 => k3 += 1,
2956 _ => {}
2957 }
2958 }
2959 byte_count += 1;
2960 i += channels;
2961 }
2962 }
2963 Ok(out)
2964}
2965
2966#[cfg(test)]
2967mod tests {
2968 use crate::rarvm::{Instruction, Opcode, Operand, Program};
2969 use std::ops::Range;
2970
2971 use super::{
2972 apply_standard_filter, audio_encode, best_match, encode_ppmd_tokens,
2973 encode_table_level_tokens, encode_tokens, encoded_filter_records, insert_match_position,
2974 itanium_decode, itanium_encode, should_lazy_emit_literal, split_large_filter,
2975 unpack29_decode, unpack29_encode_literals, unpack29_encode_ppmd,
2976 unpack29_encode_ppmd_literals, unpack29_encode_ppmd_with_filter, BitReader, BitWriter,
2977 EncodeOptions, EncodeToken, EncoderMatchState, Error, Huffman, LevelToken,
2978 OwnedVmFilterRecord, PpmdEncodeToken, Rar29FilterKind, Rar29FilterSpec, Result,
2979 StandardFilter, Unpack29, Unpack29Encoder, VmFilter, VmProgram, VmProgramKind, MAIN_COUNT,
2980 MATCH_HASH_BUCKETS, MAX_MATCH_CANDIDATES, MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
2981 MAX_VM_DELTA_FILTER_BLOCK_SIZE, MAX_VM_FILTER_BLOCK_SIZE, RAR3_AUDIO_FILTER_BYTECODE,
2982 TABLE_COUNT,
2983 };
2984
2985 const COMPRESSED_TEXT: &[u8] = &[
2986 0x09, 0x10, 0x10, 0x93, 0xe4, 0xce, 0x7f, 0xa2, 0xba, 0x80, 0x46, 0x16, 0x82, 0x63, 0xe9,
2987 0x9a, 0x19, 0xe4, 0x10, 0xe0, 0x41, 0x3d, 0x16, 0xfc, 0x4d, 0xfa, 0x6f, 0xf2, 0x5c, 0xae,
2988 0x32, 0x86, 0xc9, 0x95, 0x9d, 0xf1, 0x04, 0xa4, 0xe8, 0x92, 0x8f, 0x12, 0xd7, 0xe7, 0xba,
2989 0xcb, 0x26, 0xf1, 0x97, 0xac, 0x7c, 0x5f, 0xfd, 0xa0, 0x00, 0x1f, 0x77, 0x50,
2990 ];
2991
2992 #[test]
2993 fn decodes_rar29_lz_member() {
2994 assert_eq!(
2995 unpack29_decode(COMPRESSED_TEXT, 2400).unwrap(),
2996 expected_text()
2997 );
2998 }
2999
3000 #[test]
3001 fn rejects_oversubscribed_rar29_huffman_tables() {
3002 assert!(matches!(
3003 Huffman::from_lengths(&[1, 1, 1]),
3004 Err(Error::InvalidData("RAR 2.9 oversubscribed Huffman table"))
3005 ));
3006 }
3007
3008 #[test]
3009 fn literal_encoder_round_trips_rar29_lz_blocks() {
3010 let input = b"literal-only RAR 2.9 baseline\nwith repeated text literal-only\n";
3011 let packed = unpack29_encode_literals(input).unwrap();
3012
3013 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3014 }
3015
3016 #[test]
3017 fn multi_block_lz_encoding_round_trips_large_repeated_documents() {
3018 let seed = b"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\n\
3019<HTML><BODY><P>RAR29 repeated document body with enough structured text to \
3020exercise LZSS block table selection.</P></BODY></HTML>\n"
3021 .repeat(96);
3022 let input = seed.repeat(180);
3023 let single =
3024 super::encode_member_with_options(&input, &[], EncodeOptions::new(96)).unwrap();
3025 let blocked = super::encode_member_with_options(
3026 &input,
3027 &[],
3028 EncodeOptions::new(96).with_block_size(1024 * 1024),
3029 )
3030 .unwrap();
3031
3032 assert_eq!(unpack29_decode(&single, input.len()).unwrap(), input);
3033 assert_eq!(unpack29_decode(&blocked, input.len()).unwrap(), input);
3034 assert!(blocked.len() < input.len());
3035 }
3036
3037 #[test]
3038 fn table_level_encoder_uses_rar29_run_symbols() {
3039 let mut lengths = [0u8; TABLE_COUNT];
3040 lengths[..4].fill(5);
3041 lengths[8..21].fill(0);
3042
3043 let tokens = encode_table_level_tokens(&lengths);
3044
3045 assert!(tokens.contains(&LevelToken::repeat_previous_short(3)));
3046 assert!(tokens.iter().any(|token| token.symbol == 19));
3047 }
3048
3049 #[test]
3050 fn lazy_lz_parser_defers_short_match_for_longer_next_match() {
3051 let input = b"abcdXbcdYYYYYYYYYYYYabcdYYYYYYYYYYYY";
3052 let greedy = encode_tokens(input, &[], EncodeOptions::new(MAX_MATCH_CANDIDATES));
3053 let lazy = encode_tokens(
3054 input,
3055 &[],
3056 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3057 );
3058 let packed = Unpack29Encoder::with_options(
3059 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3060 )
3061 .encode_member(input)
3062 .unwrap();
3063
3064 assert!(greedy
3065 .iter()
3066 .any(|token| matches!(token, EncodeToken::Match { length: 4, .. })));
3067 assert!(lazy
3068 .iter()
3069 .any(|token| matches!(token, EncodeToken::Match { length, .. } if *length > 8)));
3070 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3071 }
3072
3073 #[test]
3074 fn lazy_lz_parser_uses_match_cost_not_only_match_length() {
3075 let pos = 300_000usize;
3076 let mut input = vec![0u8; pos + 16];
3077 input[100..106].copy_from_slice(b"BCDEFG");
3078 input[106] = b'!';
3079 input[pos - 10..pos - 5].copy_from_slice(b"ABCD!");
3080 input[pos..pos + 7].copy_from_slice(b"ABCDEFG");
3081 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3082 insert_match_position(&input, 100, &mut buckets);
3083 insert_match_position(&input, pos - 10, &mut buckets);
3084
3085 let current = best_match(
3086 &input,
3087 pos,
3088 input.len(),
3089 &buckets,
3090 EncodeOptions::new(MAX_MATCH_CANDIDATES),
3091 &EncoderMatchState::default(),
3092 )
3093 .unwrap();
3094 let next = best_match(
3095 &input,
3096 pos + 1,
3097 input.len(),
3098 &buckets,
3099 EncodeOptions::new(MAX_MATCH_CANDIDATES),
3100 &EncoderMatchState::default(),
3101 )
3102 .unwrap();
3103
3104 assert_eq!(current.length, 4);
3105 assert_eq!(current.offset, 10);
3106 assert_eq!(next.length, 6);
3107 assert!(next.offset > 0x40000);
3108 assert!(!should_lazy_emit_literal(
3109 &input,
3110 pos,
3111 input.len(),
3112 &buckets,
3113 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3114 &EncoderMatchState::default(),
3115 current,
3116 ));
3117 }
3118
3119 #[test]
3120 fn lazy_lz_parser_uses_bounded_cost_lookahead() {
3121 let pos = 160;
3122 let mut input: Vec<u8> = (0..240u16)
3123 .map(|value| value.wrapping_mul(91) as u8)
3124 .collect();
3125 input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3126 input[pos - 80..pos - 64].copy_from_slice(b"CDEFGHIJKLMNOPQR");
3127 input[pos..pos + 18].copy_from_slice(b"ABCDEFGHIJKLMNOPQR");
3128
3129 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3130 for candidate in 0..pos {
3131 insert_match_position(&input, candidate, &mut buckets);
3132 }
3133 let current = best_match(
3134 &input,
3135 pos,
3136 input.len(),
3137 &buckets,
3138 EncodeOptions::default(),
3139 &EncoderMatchState::default(),
3140 )
3141 .unwrap();
3142
3143 assert_eq!((current.length, current.offset), (8, 30));
3144 assert!(!should_lazy_emit_literal(
3145 &input,
3146 pos,
3147 input.len(),
3148 &buckets,
3149 EncodeOptions::default()
3150 .with_lazy_matching(true)
3151 .with_lazy_lookahead(1),
3152 &EncoderMatchState::default(),
3153 current,
3154 ));
3155 assert!(should_lazy_emit_literal(
3156 &input,
3157 pos,
3158 input.len(),
3159 &buckets,
3160 EncodeOptions::default()
3161 .with_lazy_matching(true)
3162 .with_lazy_lookahead(2),
3163 &EncoderMatchState::default(),
3164 current,
3165 ));
3166 }
3167
3168 #[test]
3169 fn match_state_encodes_last_length_and_repeat_offset_symbols() {
3170 let mut state = EncoderMatchState::default();
3171 assert!(matches!(
3172 state.encode_match(12, 64).unwrap(),
3173 super::EncodedMatch::Fresh { .. }
3174 ));
3175 state.remember(12, 64);
3176
3177 assert_eq!(
3178 state.encode_match(12, 64).unwrap(),
3179 super::EncodedMatch::LastLengthRepeat
3180 );
3181 assert!(matches!(
3182 state.encode_match(9, 64).unwrap(),
3183 super::EncodedMatch::RepeatOffset { index: 0, .. }
3184 ));
3185 }
3186
3187 #[test]
3188 fn cost_aware_match_selection_prefers_repeat_offset_token() {
3189 let pos = 600usize;
3190 let mut input: Vec<u8> = (0..pos + 16)
3191 .map(|index| (index as u8).wrapping_mul(37))
3192 .collect();
3193 input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3194 input[pos - 512..pos - 503].copy_from_slice(b"ABCDEFGHI");
3195 input[pos..pos + 9].copy_from_slice(b"ABCDEFGHI");
3196 input[pos - 22] = 0x11;
3197 input[pos - 503] = 0x22;
3198 input[pos + 9] = 0x33;
3199 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3200 insert_match_position(&input, pos - 30, &mut buckets);
3201 insert_match_position(&input, pos - 512, &mut buckets);
3202
3203 let fresh = best_match(
3204 &input,
3205 pos,
3206 input.len(),
3207 &buckets,
3208 EncodeOptions::default(),
3209 &EncoderMatchState::default(),
3210 )
3211 .unwrap();
3212 let repeat = best_match(
3213 &input,
3214 pos,
3215 input.len(),
3216 &buckets,
3217 EncodeOptions::default(),
3218 &EncoderMatchState {
3219 old_offsets: [30, 0, 0, 0],
3220 last_offset: 0,
3221 last_length: 0,
3222 },
3223 )
3224 .unwrap();
3225
3226 assert_eq!((fresh.length, fresh.offset), (9, 512));
3227 assert_eq!((repeat.length, repeat.offset), (8, 30));
3228 }
3229
3230 #[test]
3231 fn match_finder_respects_configured_maximum_distance() {
3232 let phrase = b"rar29 bounded dictionary phrase";
3233 let mut input = Vec::new();
3234 input.extend_from_slice(phrase);
3235 input.extend(std::iter::repeat_n(0u8, 256 * 1024));
3236 input.extend_from_slice(phrase);
3237
3238 let bounded = encode_tokens(
3239 &input,
3240 &[],
3241 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_max_match_distance(128 * 1024),
3242 );
3243 let unbounded = encode_tokens(
3244 &input,
3245 &[],
3246 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_max_match_distance(1024 * 1024),
3247 );
3248
3249 assert!(!bounded.iter().any(
3250 |token| matches!(token, EncodeToken::Match { offset, .. } if *offset > 128 * 1024)
3251 ));
3252 assert!(unbounded.iter().any(
3253 |token| matches!(token, EncodeToken::Match { offset, .. } if *offset > 128 * 1024)
3254 ));
3255 }
3256
3257 #[test]
3258 fn lz_encoder_uses_weighted_rar29_huffman_tables() {
3259 let mut input = Vec::new();
3260 for byte in 0u8..120 {
3261 input.push(b'A');
3262 input.push(byte);
3263 }
3264 let packed = Unpack29Encoder::new().encode_member(&input).unwrap();
3265 let mut decoder = Unpack29::new();
3266 decoder.bits.append(&packed);
3267 decoder.read_tables().unwrap();
3268 let main_lengths = &decoder.levels[..MAIN_COUNT];
3269 let nonzero_lengths = main_lengths
3270 .iter()
3271 .copied()
3272 .filter(|&length| length != 0)
3273 .collect::<std::collections::BTreeSet<_>>();
3274
3275 assert!(nonzero_lengths.len() > 1);
3276 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3277 }
3278
3279 #[test]
3280 fn copy_match_treats_zero_offset_as_distance_one() {
3281 let mut decoder = Unpack29::new();
3282 decoder.output.push(b'Z');
3283
3284 decoder.copy_match(4, 0, 5).unwrap();
3285
3286 assert_eq!(decoder.output, b"ZZZZZ");
3287 }
3288
3289 #[test]
3290 fn ppmd_literal_encoder_round_trips_rar29_ppmd_blocks() {
3291 let mut input = b"rar29 ppmd literal text payload alpha beta gamma\n".repeat(64);
3292 input.extend_from_slice(&[2, 2, 2, b'e', b's', b'c']);
3293 let packed = unpack29_encode_ppmd_literals(&input).unwrap();
3294
3295 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3296 assert_ne!(packed.first().copied(), Some(0));
3297 }
3298
3299 #[test]
3300 fn ppmd_encoder_advertises_period_compatible_model_for_external_decoders() {
3301 let packed = unpack29_encode_ppmd(b"rar29 ppmd dictionary header").unwrap();
3302
3303 assert_eq!(packed[0], 0xa7);
3304 assert_eq!(packed[1], 24);
3305 }
3306
3307 #[test]
3308 fn ppmd_encoder_emits_offset_one_repeat_escapes() {
3309 let input = b"seed "
3310 .iter()
3311 .copied()
3312 .chain(std::iter::repeat_n(b'Z', 512))
3313 .collect::<Vec<_>>();
3314 let tokens = encode_ppmd_tokens(&input, true);
3315 let packed = unpack29_encode_ppmd(&input).unwrap();
3316
3317 assert!(tokens.iter().any(
3318 |token| matches!(token, PpmdEncodeToken::RepeatOffsetOne { length } if *length >= 4)
3319 ));
3320 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3321 }
3322
3323 #[test]
3324 fn ppmd_encoder_emits_distance_match_escapes() {
3325 let phrase = b"repeated phrase for rar29 ppmd distance escape 4 ";
3326 let mut input = Vec::new();
3327 input.extend_from_slice(phrase);
3328 input.extend_from_slice(b"middle bytes make the repeat distance greater than one ");
3329 input.extend_from_slice(phrase);
3330 input.extend_from_slice(phrase);
3331 input.extend_from_slice(b"tail");
3332 let tokens = encode_ppmd_tokens(&input, true);
3333 let packed = unpack29_encode_ppmd(&input).unwrap();
3334
3335 assert!(tokens
3336 .iter()
3337 .any(|token| matches!(token, PpmdEncodeToken::Match { offset, length } if *offset > 1 && *length >= 32)));
3338 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3339 }
3340
3341 #[test]
3342 fn ppmd_distance_match_lengths_stay_period_decoder_compatible() {
3343 let phrase = b"<html><body>RAR PPMd LZSS conversion phrase</body></html>\n";
3344 let mut input = Vec::new();
3345 for _ in 0..200 {
3346 input.extend_from_slice(phrase);
3347 }
3348 let tokens = encode_ppmd_tokens(&input, true);
3349
3350 assert!(tokens.iter().any(
3351 |token| matches!(token, PpmdEncodeToken::Match { offset, length } if *offset > 1 && *length >= 32)
3352 ));
3353 assert!(!tokens
3354 .iter()
3355 .any(|token| matches!(token, PpmdEncodeToken::Match { length, .. } if *length > 255)));
3356 }
3357
3358 #[test]
3359 fn ppmd_encoder_emits_embedded_vm_filter_escape() {
3360 let input = b"\xe8\0\0\0\0rar29 ppmd embedded e8 filter payload\n".repeat(16);
3361 let packed =
3362 unpack29_encode_ppmd_with_filter(&input, Rar29FilterSpec::whole(Rar29FilterKind::E8))
3363 .unwrap();
3364 let plain_ppmd = unpack29_encode_ppmd(&input).unwrap();
3365 let filtered_lz = Unpack29Encoder::new()
3366 .encode_member_with_filter(&input, Rar29FilterSpec::whole(Rar29FilterKind::E8))
3367 .unwrap();
3368
3369 assert!(packed.len() != plain_ppmd.len() || packed.len() != filtered_lz.len());
3370 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3371 }
3372
3373 fn encode_with_filter(input: &[u8], kind: Rar29FilterKind) -> Result<Vec<u8>> {
3374 Unpack29Encoder::new().encode_member_with_filter(input, Rar29FilterSpec::whole(kind))
3375 }
3376
3377 fn encode_with_filter_range(
3378 input: &[u8],
3379 kind: Rar29FilterKind,
3380 range: Range<usize>,
3381 ) -> Result<Vec<u8>> {
3382 Unpack29Encoder::new().encode_member_with_filter(input, Rar29FilterSpec::range(kind, range))
3383 }
3384
3385 fn encode_with_filter_ranges(
3386 input: &[u8],
3387 kind: Rar29FilterKind,
3388 ranges: Vec<Range<usize>>,
3389 ) -> Result<Vec<u8>> {
3390 let filters: Vec<_> = ranges
3391 .into_iter()
3392 .map(|range| Rar29FilterSpec::range(kind, range))
3393 .collect();
3394 Unpack29Encoder::new().encode_member_with_filters(input, &filters)
3395 }
3396
3397 #[test]
3398 fn encoder_emits_rar29_offset_one_matches_for_repeated_bytes() {
3399 let input = b"Z".repeat(1024);
3400 let packed = unpack29_encode_literals(&input).unwrap();
3401
3402 assert!(packed.len() < input.len() / 4);
3403 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3404 }
3405
3406 #[test]
3407 fn encoder_emits_rar29_dictionary_matches_for_repeated_sequences() {
3408 let input = b"abc123xyz-".repeat(128);
3409 let packed = unpack29_encode_literals(&input).unwrap();
3410
3411 assert!(packed.len() < input.len() / 2);
3412 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3413 }
3414
3415 #[test]
3416 fn encoder_finds_rar29_matches_beyond_near_offsets() {
3417 let phrase = b"long-distance repeated phrase for rar29 low-offset coding.";
3418 let mut input = Vec::new();
3419 input.extend_from_slice(phrase);
3420 input.extend(std::iter::repeat_n(0, 300 * 1024));
3421 input.extend_from_slice(phrase);
3422 input.extend_from_slice(phrase);
3423 let tokens = encode_tokens(&input, &[], EncodeOptions::default());
3424 let packed = unpack29_encode_literals(&input).unwrap();
3425
3426 assert!(tokens.iter().any(|token| matches!(
3427 token,
3428 EncodeToken::Match { offset, .. } if *offset > 0x40000
3429 )));
3430 assert!(packed.len() < input.len());
3431 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3432 assert!(
3433 decoded == input,
3434 "RAR 2.9 long-distance match round-trip failed"
3435 );
3436 }
3437
3438 #[test]
3439 fn encoder_emits_rar29_e8_vm_filter_record() {
3440 let input = b"\xe8\0\0\0\0rar29 e8 filter writer payload\n".repeat(8);
3441 let packed = encode_with_filter(&input, Rar29FilterKind::E8).unwrap();
3442 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3443
3444 assert!(
3445 decoded == input,
3446 "RAR 2.9 multi-filter E8 round-trip failed"
3447 );
3448 }
3449
3450 #[test]
3451 fn encoder_emits_rar29_e8e9_vm_filter_record() {
3452 let input = b"\xe9\0\0\0\0rar29 e8e9 filter writer payload\n".repeat(8);
3453 let packed = encode_with_filter(&input, Rar29FilterKind::E8E9).unwrap();
3454 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3455
3456 assert_eq!(decoded, input);
3457 }
3458
3459 #[test]
3460 fn encoder_emits_rar29_segmented_e8_vm_filter_record() {
3461 let mut input = b"prefix data that should not be x86 filtered ".to_vec();
3462 let start = input.len();
3463 input.extend_from_slice(b"\xe8\0\0\0\0segmented e8 filtered payload\n");
3464 let end = input.len();
3465 input.extend_from_slice(b" suffix data that should also remain raw");
3466 let packed = encode_with_filter_range(&input, Rar29FilterKind::E8, start..end).unwrap();
3467 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3468
3469 assert_eq!(decoded, input);
3470 }
3471
3472 #[test]
3473 fn encoder_emits_rar29_multiple_e8_vm_filter_records() {
3474 let mut input = vec![0x41u8; 80_000];
3475 for cluster_start in [8_000, 60_000] {
3476 for index in 0..8 {
3477 let pos = cluster_start + index * 64;
3478 input[pos] = 0xe8;
3479 input[pos + 1..pos + 5].copy_from_slice(&(0x2000u32 + index as u32).to_le_bytes());
3480 }
3481 }
3482
3483 let packed = encode_with_filter_ranges(
3484 &input,
3485 Rar29FilterKind::E8,
3486 vec![8_000..8_512, 60_000..60_512],
3487 )
3488 .unwrap();
3489 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3490
3491 assert_eq!(decoded, input);
3492 }
3493
3494 #[test]
3495 fn encoder_emits_rar29_segmented_e8e9_vm_filter_record() {
3496 let mut input = b"prefix data that should not be x86 filtered ".to_vec();
3497 let start = input.len();
3498 input.extend_from_slice(b"\xe9\0\0\0\0segmented e8e9 filtered payload\n");
3499 let end = input.len();
3500 input.extend_from_slice(b" suffix data that should also remain raw");
3501 let packed = encode_with_filter_range(&input, Rar29FilterKind::E8E9, start..end).unwrap();
3502 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3503
3504 assert_eq!(decoded, input);
3505 }
3506
3507 #[test]
3508 fn encoder_emits_rar29_delta_vm_filter_record() {
3509 let input: Vec<u8> = (0..192).map(|index| (index * 13 + 7) as u8).collect();
3510 let packed = encode_with_filter(&input, Rar29FilterKind::Delta { channels: 3 }).unwrap();
3511 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3512
3513 assert_eq!(decoded, input);
3514 }
3515
3516 #[test]
3517 fn encoder_emits_rar29_segmented_delta_vm_filter_record() {
3518 let mut input = b"prefix bytes before delta segment ".to_vec();
3519 let start = input.len();
3520 input.extend((0..192).map(|index| (index * 13 + 7) as u8));
3521 let end = input.len();
3522 input.extend_from_slice(b" suffix bytes after delta segment");
3523 let packed =
3524 encode_with_filter_range(&input, Rar29FilterKind::Delta { channels: 3 }, start..end)
3525 .unwrap();
3526 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3527
3528 assert_eq!(decoded, input);
3529 }
3530
3531 #[test]
3532 fn encoder_emits_rar29_itanium_vm_filter_record() {
3533 let mut input = vec![0u8; 48];
3534 input[16] = 22;
3535 input[21] = 20;
3536 input.extend_from_slice(b"rar29 itanium filter writer payload\n");
3537 let packed = encode_with_filter(&input, Rar29FilterKind::Itanium).unwrap();
3538 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3539
3540 assert_eq!(decoded, input);
3541 }
3542
3543 #[test]
3544 fn encoder_emits_rar29_segmented_itanium_vm_filter_record() {
3545 let mut input = b"prefix bytes before itanium segment ".to_vec();
3546 let start = input.len();
3547 input.extend_from_slice(&[0; 48]);
3548 input[start + 16] = 22;
3549 input[start + 21] = 20;
3550 input.extend_from_slice(b"rar29 segmented itanium filter writer payload\n");
3551 let end = input.len();
3552 input.extend_from_slice(b" suffix bytes after itanium segment");
3553 let packed =
3554 encode_with_filter_range(&input, Rar29FilterKind::Itanium, start..end).unwrap();
3555 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3556
3557 assert_eq!(decoded, input);
3558 }
3559
3560 #[test]
3561 fn encoder_emits_rar29_rgb_vm_filter_record() {
3562 let width = 12;
3563 let input: Vec<u8> = (0..96).map(|index| (index * 29 + 11) as u8).collect();
3564 let packed = encode_with_filter(&input, Rar29FilterKind::Rgb { width, pos_r: 0 }).unwrap();
3565 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3566
3567 assert_eq!(decoded, input);
3568 }
3569
3570 #[test]
3571 fn encoder_emits_rar29_segmented_rgb_vm_filter_record() {
3572 let width = 12;
3573 let mut input = b"prefix bytes before rgb segment ".to_vec();
3574 let start = input.len();
3575 input.extend((0..96).map(|index| (index * 29 + 11) as u8));
3576 let end = input.len();
3577 input.extend_from_slice(b" suffix bytes after rgb segment");
3578 let packed =
3579 encode_with_filter_range(&input, Rar29FilterKind::Rgb { width, pos_r: 0 }, start..end)
3580 .unwrap();
3581 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3582
3583 assert_eq!(decoded, input);
3584 }
3585
3586 #[test]
3587 fn encoder_rejects_rar29_rgb_filter_with_unaligned_scanline_width() {
3588 let input: Vec<u8> = (0..96).map(|index| (index * 29 + 11) as u8).collect();
3589 assert!(encode_with_filter(&input, Rar29FilterKind::Rgb { width: 8, pos_r: 0 }).is_err());
3590 }
3591
3592 #[test]
3593 fn encoder_emits_rar29_audio_vm_filter_record() {
3594 let input: Vec<u8> = (0..160)
3595 .map(|index| (index * 7 + index / 3) as u8)
3596 .collect();
3597 let packed = encode_with_filter(&input, Rar29FilterKind::Audio { channels: 2 }).unwrap();
3598 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3599
3600 assert_eq!(decoded, input);
3601 }
3602
3603 #[test]
3604 fn audio_filter_bytecode_matches_builtin_transform() {
3605 let channels = 2;
3606 let input: Vec<u8> = (0..MAX_VM_AUDIO_FILTER_BLOCK_SIZE)
3607 .map(|index| (index * 7 + index / channels + index / 257) as u8)
3608 .collect();
3609 let encoded = audio_encode(&input, channels).unwrap();
3610 let program = Program::parse(RAR3_AUDIO_FILTER_BYTECODE).unwrap();
3611 let result = program
3612 .execute(crate::rarvm::Invocation {
3613 input: &encoded,
3614 regs: [channels as u32, 0, 0, 0, 0, 0, 0],
3615 global_data: &[],
3616 file_offset: 0,
3617 exec_count: 0,
3618 })
3619 .unwrap();
3620
3621 assert_eq!(result.output, input);
3622 }
3623
3624 #[test]
3625 fn large_audio_filters_are_split_into_rarvm_safe_blocks() {
3626 let filters = split_large_filter(
3627 MAX_VM_FILTER_BLOCK_SIZE * 2 + 123,
3628 Rar29FilterSpec::whole(Rar29FilterKind::Audio { channels: 4 }),
3629 )
3630 .unwrap();
3631
3632 assert_eq!(filters.len(), 3);
3633 assert_eq!(filters[0].range, Some(0..MAX_VM_AUDIO_FILTER_BLOCK_SIZE));
3634 assert_eq!(
3635 filters[1].range,
3636 Some(MAX_VM_AUDIO_FILTER_BLOCK_SIZE..MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2)
3637 );
3638 assert_eq!(
3639 filters[2].range,
3640 Some(MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2..MAX_VM_FILTER_BLOCK_SIZE * 2 + 123)
3641 );
3642 }
3643
3644 #[test]
3645 fn large_delta_filters_are_split_into_rarvm_safe_blocks() {
3646 let filters = split_large_filter(
3647 MAX_VM_FILTER_BLOCK_SIZE * 2 + 123,
3648 Rar29FilterSpec::whole(Rar29FilterKind::Delta { channels: 4 }),
3649 )
3650 .unwrap();
3651
3652 assert_eq!(filters.len(), 3);
3653 assert_eq!(filters[0].range, Some(0..MAX_VM_DELTA_FILTER_BLOCK_SIZE));
3654 assert_eq!(
3655 filters[1].range,
3656 Some(MAX_VM_DELTA_FILTER_BLOCK_SIZE..MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2)
3657 );
3658 assert_eq!(
3659 filters[2].range,
3660 Some(MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2..MAX_VM_FILTER_BLOCK_SIZE * 2 + 123)
3661 );
3662 }
3663
3664 #[test]
3665 fn segmented_audio_filters_redeclare_program_state() {
3666 let filters = [
3667 OwnedVmFilterRecord {
3668 block_start: 0,
3669 block_size: MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
3670 init_regs: vec![(0, 4)],
3671 code: RAR3_AUDIO_FILTER_BYTECODE,
3672 },
3673 OwnedVmFilterRecord {
3674 block_start: MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
3675 block_size: 4096,
3676 init_regs: vec![(0, 4)],
3677 code: RAR3_AUDIO_FILTER_BYTECODE,
3678 },
3679 ];
3680 let records = encoded_filter_records(&filters).unwrap();
3681
3682 assert_vm_filter_declares_program(&records[0], 0);
3683 assert_vm_filter_declares_program(&records[1], 2);
3684 }
3685
3686 #[test]
3687 fn encoder_emits_rar29_segmented_audio_vm_filter_record() {
3688 let mut input = b"prefix bytes before audio segment ".to_vec();
3689 let start = input.len();
3690 input.extend((0..160).map(|index| (index * 7 + index / 3) as u8));
3691 let end = input.len();
3692 input.extend_from_slice(b" suffix bytes after audio segment");
3693 let packed =
3694 encode_with_filter_range(&input, Rar29FilterKind::Audio { channels: 2 }, start..end)
3695 .unwrap();
3696 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3697
3698 assert_eq!(decoded, input);
3699 }
3700
3701 #[test]
3702 fn encoder_emits_multiple_rar29_audio_vm_filter_records_for_large_ranges() {
3703 let input: Vec<u8> = (0..(MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2 + 64))
3704 .map(|index| (index * 7 + index / 3 + index / 257) as u8)
3705 .collect();
3706 let packed = encode_with_filter(&input, Rar29FilterKind::Audio { channels: 4 }).unwrap();
3707 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3708
3709 assert_eq!(decoded, input);
3710 }
3711
3712 #[test]
3713 fn encoder_emits_multiple_rar29_delta_vm_filter_records_for_large_ranges() {
3714 let input: Vec<u8> = (0..(MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2 + 64))
3715 .map(|index| (index * 11 + index / 5 + index / 251) as u8)
3716 .collect();
3717 let packed = encode_with_filter(&input, Rar29FilterKind::Delta { channels: 4 }).unwrap();
3718 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3719
3720 assert_eq!(decoded, input);
3721 }
3722
3723 fn assert_vm_filter_declares_program(record: &[u8], expected_selector: u32) {
3724 let first = record[0];
3725 assert_ne!(first & 0x80, 0);
3726 assert_ne!(first & 0x20, 0);
3727 assert_ne!(first & 0x10, 0);
3728 let inline_len = match first & 7 {
3729 len @ 0..=5 => len as usize + 1,
3730 6 => usize::from(record[1]) + 7,
3731 _ => u16::from_be_bytes([record[1], record[2]]) as usize,
3732 };
3733 let body_start = match first & 7 {
3734 0..=5 => 1,
3735 6 => 2,
3736 _ => 3,
3737 };
3738 let body = &record[body_start..body_start + inline_len];
3739 let mut bits = BitReader::from_bytes(body);
3740 assert_eq!(bits.read_encoded_u32().unwrap(), expected_selector);
3741 let _block_start = bits.read_encoded_u32().unwrap();
3742 let _block_size = bits.read_encoded_u32().unwrap();
3743 let mask = bits.read_bits(7).unwrap();
3744 for index in 0..7 {
3745 if mask & (1 << index) != 0 {
3746 let _ = bits.read_encoded_u32().unwrap();
3747 }
3748 }
3749 assert_eq!(
3750 bits.read_encoded_u32().unwrap() as usize,
3751 RAR3_AUDIO_FILTER_BYTECODE.len()
3752 );
3753 }
3754
3755 #[test]
3756 fn solid_encoder_emits_rar29_matches_against_previous_member_history() {
3757 let first = b"solid rar29 shared phrase alpha beta gamma ".repeat(4);
3758 let second = b"solid rar29 shared phrase alpha beta gamma ".repeat(2);
3759 let independent = unpack29_encode_literals(&second).unwrap();
3760 let mut encoder = Unpack29Encoder::new();
3761 let first_packed = encoder.encode_member(&first).unwrap();
3762 let second_packed = encoder.encode_member(&second).unwrap();
3763
3764 assert!(second_packed.len() < independent.len());
3765 let mut decoder = Unpack29::new();
3766 assert_eq!(
3767 decoder.decode_member(&first_packed, first.len()).unwrap(),
3768 first
3769 );
3770 assert_eq!(
3771 decoder.decode_member(&second_packed, second.len()).unwrap(),
3772 second
3773 );
3774 }
3775
3776 #[test]
3777 fn decode_member_from_reader_accepts_incremental_input() {
3778 struct TinyReader<'a> {
3779 input: &'a [u8],
3780 }
3781
3782 impl std::io::Read for TinyReader<'_> {
3783 fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
3784 if self.input.is_empty() {
3785 return Ok(0);
3786 }
3787 let len = self.input.len().min(out.len()).min(3);
3788 out[..len].copy_from_slice(&self.input[..len]);
3789 self.input = &self.input[len..];
3790 Ok(len)
3791 }
3792 }
3793
3794 let mut decoder = Unpack29::new();
3795 let mut reader = TinyReader {
3796 input: COMPRESSED_TEXT,
3797 };
3798 let mut output = Vec::new();
3799 decoder
3800 .decode_member_from_reader(&mut reader, 2400, &mut output)
3801 .unwrap();
3802
3803 assert_eq!(output, expected_text());
3804 }
3805
3806 #[test]
3807 fn decode_non_solid_member_resets_reusable_decoder_state() {
3808 let mut decoder = Unpack29::new();
3809 decoder.output.extend_from_slice(b"stale history");
3810 decoder.filters.push(VmFilter {
3811 program: 0,
3812 start: 0,
3813 size: 1,
3814 regs: [0; 7],
3815 global_data: vec![1, 2, 3],
3816 });
3817
3818 let output = decoder
3819 .decode_non_solid_member(COMPRESSED_TEXT, 2400)
3820 .unwrap();
3821
3822 assert_eq!(output, expected_text());
3823 assert!(decoder.filters.is_empty());
3824 }
3825
3826 #[test]
3827 fn e8_filter_uses_member_relative_offset_in_solid_stream() {
3828 let mut decoder = Unpack29::new();
3829 let member_start = 1000usize;
3830 let filter_start = member_start + 100;
3831 decoder.output.resize(filter_start + 8, 0);
3832 decoder.output[filter_start] = 0xe8;
3833
3834 let call_operand_pos = 1u32;
3835 let member_relative_filter_start = (filter_start - member_start) as u32;
3836 let decoded_addr = 0x2000u32;
3837 let encoded_addr = decoded_addr
3838 .wrapping_add(member_relative_filter_start)
3839 .wrapping_add(call_operand_pos);
3840 decoder.output[filter_start + 1..filter_start + 5]
3841 .copy_from_slice(&encoded_addr.to_le_bytes());
3842 decoder.programs.push(VmProgram {
3843 kind: VmProgramKind::Standard(StandardFilter::E8),
3844 block_size: 5,
3845 exec_count: 0,
3846 globals: Vec::new(),
3847 });
3848 decoder.filters.push(VmFilter {
3849 program: 0,
3850 start: filter_start,
3851 size: 5,
3852 regs: [0; 7],
3853 global_data: Vec::new(),
3854 });
3855
3856 let filtered = decoder
3857 .filtered_range(member_start, filter_start + 5, member_start)
3858 .unwrap();
3859 let operand =
3860 u32::from_le_bytes([filtered[101], filtered[102], filtered[103], filtered[104]]);
3861
3862 assert_eq!(operand, decoded_addr);
3863 }
3864
3865 #[test]
3866 fn generic_vm_filter_executes_from_filtered_range() {
3867 let mut decoder = Unpack29::new();
3868 decoder.output.extend_from_slice(&[0x11, 0x22, 0x33]);
3869 decoder.programs.push(VmProgram {
3870 kind: VmProgramKind::Generic(Program {
3871 static_data: Vec::new(),
3872 instructions: vec![
3873 Instruction {
3874 opcode: Opcode::Mov,
3875 byte_mode: true,
3876 operands: vec![Operand::Absolute(0), Operand::Immediate(0x44)],
3877 },
3878 Instruction {
3879 opcode: Opcode::Ret,
3880 byte_mode: false,
3881 operands: Vec::new(),
3882 },
3883 ],
3884 }),
3885 block_size: 3,
3886 exec_count: 0,
3887 globals: Vec::new(),
3888 });
3889 decoder.filters.push(VmFilter {
3890 program: 0,
3891 start: 0,
3892 size: 3,
3893 regs: [0; 7],
3894 global_data: Vec::new(),
3895 });
3896
3897 let filtered = decoder.filtered_range(0, 3, 0).unwrap();
3898
3899 assert_eq!(filtered, [0x44, 0x22, 0x33]);
3900 }
3901
3902 #[test]
3903 fn standard_filters_reject_malformed_delta_and_rgb_registers() {
3904 let mut delta = vec![0; 32];
3905 let mut delta_regs = [0; 7];
3906 delta_regs[0] = 33;
3907 assert_eq!(
3908 apply_standard_filter(StandardFilter::Delta, &mut delta, 0, &delta_regs),
3909 Err(Error::InvalidData(
3910 "RAR 2.9 DELTA filter channel count is invalid"
3911 ))
3912 );
3913
3914 let mut rgb = vec![0; 32];
3915 let mut rgb_regs = [0; 7];
3916 rgb_regs[0] = 2;
3917 assert_eq!(
3918 apply_standard_filter(StandardFilter::Rgb, &mut rgb, 0, &rgb_regs),
3919 Err(Error::InvalidData(
3920 "RAR 2.9 RGB filter parameters are invalid"
3921 ))
3922 );
3923 rgb_regs[0] = 15;
3924 rgb_regs[1] = 3;
3925 assert_eq!(
3926 apply_standard_filter(StandardFilter::Rgb, &mut rgb, 0, &rgb_regs),
3927 Err(Error::InvalidData(
3928 "RAR 2.9 RGB filter parameters are invalid"
3929 ))
3930 );
3931 }
3932
3933 #[test]
3934 fn vm_encoded_u32_accepts_32_bit_form() {
3935 let mut bits = super::BitReader::from_bytes(&[0xff; 5]);
3936
3937 assert_eq!(bits.read_encoded_u32().unwrap(), 0xffff_ffff);
3938 }
3939
3940 #[test]
3941 fn vm_global_data_size_does_not_reserve_untrusted_declared_size() {
3942 let mut decoder = Unpack29::new();
3943 decoder.programs.push(VmProgram {
3944 kind: VmProgramKind::Standard(StandardFilter::E8),
3945 block_size: 1,
3946 exec_count: 0,
3947 globals: Vec::new(),
3948 });
3949
3950 let mut data = BitWriter::default();
3951 data.write_encoded_u32(1);
3952 data.write_encoded_u32(0);
3953 data.write_encoded_u32(u32::MAX);
3954
3955 assert_eq!(
3956 decoder.parse_vm_code(0x80 | 0x08, data.finish()),
3957 Err(Error::NeedMoreInput)
3958 );
3959 }
3960
3961 #[test]
3962 fn vm_code_size_is_capped_before_allocation() {
3963 let mut decoder = Unpack29::new();
3964 let mut data = BitWriter::default();
3965 data.write_encoded_u32(0);
3966 data.write_encoded_u32(1);
3967 data.write_encoded_u32((super::MAX_VM_CODE_SIZE + 1) as u32);
3968
3969 assert_eq!(
3970 decoder.parse_vm_code(0x80, data.finish()),
3971 Err(Error::InvalidData("RAR 2.9 VM code is too large"))
3972 );
3973 }
3974
3975 #[test]
3976 fn vm_program_and_filter_counts_are_capped() {
3977 let mut decoder = Unpack29::new();
3978 decoder
3979 .programs
3980 .resize_with(super::MAX_VM_PROGRAMS, || VmProgram {
3981 kind: VmProgramKind::Standard(StandardFilter::E8),
3982 block_size: 1,
3983 exec_count: 0,
3984 globals: Vec::new(),
3985 });
3986
3987 let mut new_program = BitWriter::default();
3988 new_program.write_encoded_u32((super::MAX_VM_PROGRAMS + 1) as u32);
3989 new_program.write_encoded_u32(1);
3990 new_program.write_encoded_u32(1);
3991 new_program.write_bits(0, 8);
3992 assert_eq!(
3993 decoder.parse_vm_code(0x80, new_program.finish()),
3994 Err(Error::InvalidData("RAR 2.9 VM program limit exceeded"))
3995 );
3996
3997 decoder.programs.truncate(1);
3998 decoder.last_filter = 0;
3999 decoder
4000 .filters
4001 .resize_with(super::MAX_VM_FILTERS, || VmFilter {
4002 program: 0,
4003 start: 0,
4004 size: 1,
4005 regs: [0; 7],
4006 global_data: Vec::new(),
4007 });
4008 let mut reused_program = BitWriter::default();
4009 reused_program.write_encoded_u32(0);
4010 assert_eq!(
4011 decoder.parse_vm_code(0, reused_program.finish()),
4012 Err(Error::InvalidData("RAR 2.9 VM filter limit exceeded"))
4013 );
4014 }
4015
4016 #[test]
4017 fn itanium_filter_round_trips_with_high_file_offset() {
4018 let mut data = vec![0u8; 64];
4019 for (index, byte) in data.iter_mut().enumerate() {
4020 *byte = index as u8;
4021 }
4022 data[0] = 0;
4023 data[7] = 5 << 3;
4024 let original = data.clone();
4025
4026 itanium_encode(&mut data, u32::MAX);
4027 itanium_decode(&mut data, u32::MAX);
4028
4029 assert_eq!(data, original);
4030 }
4031
4032 fn expected_text() -> Vec<u8> {
4033 "Hello, RAR 3.x fixture world.\n".repeat(80).into_bytes()
4034 }
4035}