1use crate::filters::{self, DeltaErrorMessages, FilterOp};
2use crate::huffman;
3use crate::ppmd::{PpmdByteReader, PpmdDecoder, PpmdEncoder};
4use crate::rarvm;
5use crate::{Error, Result};
6use rars_crc32::crc32;
7use std::io::{Read, Write};
8use std::ops::Range;
9
10const MAIN_COUNT: usize = 299;
11const OFFSET_COUNT: usize = 60;
12const LOW_OFFSET_COUNT: usize = 17;
13const LENGTH_COUNT: usize = 28;
14const LEVEL_COUNT: usize = 20;
15const TABLE_COUNT: usize = MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT + LENGTH_COUNT;
16const MAX_HISTORY: usize = 4 * 1024 * 1024;
17const STREAM_CHUNK: usize = 1024 * 1024;
18const MAX_VM_FILTER_BLOCK_SIZE: usize = 128 * 1024;
19const MAX_VM_DELTA_FILTER_BLOCK_SIZE: usize = 120_000;
23const MAX_VM_AUDIO_FILTER_BLOCK_SIZE: usize = 120_000;
24const MAX_VM_GLOBAL_DATA: usize = 0x2000;
25const MAX_VM_CODE_SIZE: usize = 64 * 1024;
26const MAX_VM_PROGRAMS: usize = 8192;
27const MAX_VM_FILTERS: usize = 8192;
28
29const LENGTH_BASES: [usize; LENGTH_COUNT] = [
30 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
31 160, 192, 224,
32];
33const LENGTH_BITS: [u8; LENGTH_COUNT] = [
34 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
35];
36const OFFSET_BASES: [usize; OFFSET_COUNT] = [
37 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
38 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
39 262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
40 1048576, 1310720, 1572864, 1835008, 2097152, 2359296, 2621440, 2883584, 3145728, 3407872,
41 3670016, 3932160,
42];
43const OFFSET_BITS: [u8; OFFSET_COUNT] = [
44 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
45 13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 18, 18, 18, 18, 18,
46 18, 18, 18, 18, 18, 18, 18,
47];
48const SHORT_BASES: [usize; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
49const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
50const MAX_ENCODER_MATCH_OFFSET: usize = 1024 * 1024;
51const MAX_ENCODER_MATCH_LENGTH: usize = 258;
52const MATCH_HASH_BUCKETS: usize = 4096;
53const MAX_MATCH_CANDIDATES: usize = 256;
54const MAX_PPMD_MATCH_LENGTH: usize = 255;
55const MIN_PPMD_MATCH_LENGTH: usize = 32;
56const MAX_PPMD_REPEAT_LENGTH: usize = 259;
57
58const RAR3_E8_FILTER_BYTECODE: &[u8] = &[
63 0x97, 0x1b, 0x01, 0x28, 0x07, 0x06, 0x98, 0x08, 0x00, 0x00, 0x00, 0xd1, 0x3a, 0x10, 0x15, 0x92,
64 0xec, 0x50, 0xcb, 0x99, 0x20, 0xb9, 0x25, 0xf0, 0x29, 0x19, 0x15, 0x53, 0x03, 0x12, 0xae, 0x51,
65 0x10, 0x35, 0x59, 0x2b, 0x60, 0x04, 0x15, 0x6d, 0x40, 0x66, 0xab, 0x02, 0x34, 0x49, 0x04, 0x36,
66 0x02, 0x52, 0x3e, 0x97, 0x00,
67];
68const RAR3_E8E9_FILTER_BYTECODE: &[u8] = &[
69 0x84, 0x1b, 0x01, 0x28, 0x11, 0x10, 0x69, 0x80, 0x80, 0x00, 0x00, 0x0d, 0x13, 0xa1, 0x01, 0xc6,
70 0x89, 0xd2, 0x80, 0xac, 0x97, 0x62, 0x85, 0x5c, 0xc9, 0x05, 0xc9, 0x2f, 0x81, 0x48, 0xc8, 0xaa,
71 0x98, 0x18, 0x95, 0x72, 0x88, 0x81, 0xaa, 0xc9, 0x5b, 0x00, 0x20, 0xab, 0x6a, 0x03, 0x35, 0x58,
72 0x11, 0xa2, 0x48, 0x21, 0xb0, 0x12, 0x91, 0xf4, 0xb8,
73];
74const RAR3_DELTA_FILTER_BYTECODE: &[u8] = &[
75 0x2f, 0x01, 0x9a, 0x41, 0x80, 0xec, 0x27, 0x48, 0x2f, 0x09, 0x76, 0x6d, 0xd3, 0xea, 0x41, 0x5b,
76 0x59, 0x44, 0xe8, 0x17, 0x5c, 0xe1, 0x6c, 0x91, 0x4c, 0x4e, 0x3f, 0x77, 0x00,
77];
78const RAR3_ITANIUM_FILTER_BYTECODE: &[u8] = &[
79 0x46, 0x9e, 0x08, 0x08, 0x0c, 0x0c, 0x00, 0x00, 0x0e, 0x0e, 0x08, 0x08, 0x00, 0x00, 0x08, 0x08,
80 0x00, 0x00, 0x6c, 0x11, 0x5a, 0x04, 0xac, 0x0c, 0xc4, 0xcc, 0x5c, 0x08, 0x18, 0x46, 0x24, 0x08,
81 0xf9, 0xa0, 0x44, 0x25, 0x12, 0x12, 0x45, 0x85, 0x99, 0x0c, 0x14, 0x00, 0x26, 0x25, 0x58, 0x99,
82 0x90, 0x03, 0x38, 0x1a, 0x08, 0xdc, 0x02, 0x30, 0x0c, 0x4e, 0xd1, 0x1d, 0x89, 0xa1, 0xe2, 0xd0,
83 0x55, 0x11, 0x33, 0x60, 0x8c, 0x5a, 0x23, 0x06, 0xde, 0x06, 0x18, 0x00, 0x7f, 0xff, 0xfc, 0x4d,
84 0xcc, 0x19, 0x17, 0xb3, 0x06, 0xc4, 0x44, 0xb2, 0x32, 0x5a, 0x44, 0xc4, 0xa6, 0x01, 0xf4, 0x24,
85 0x88, 0x83, 0x38, 0xcc, 0xc4, 0x11, 0x09, 0x87, 0xa6, 0xe0, 0x46, 0x02, 0xb2, 0x24, 0x03, 0xe2,
86 0xa0, 0x32, 0x54, 0x83, 0x52, 0xc5, 0xb1, 0x70,
87];
88const RAR3_RGB_FILTER_BYTECODE: &[u8] = &[
89 0xc5, 0x01, 0x9a, 0x41, 0x95, 0xc9, 0xa6, 0x4d, 0xba, 0x4b, 0x14, 0x0a, 0xf4, 0x9b, 0x80, 0x4c,
90 0x00, 0x15, 0xa6, 0xa8, 0x07, 0x26, 0x2a, 0xc9, 0xc4, 0x8b, 0x86, 0x62, 0x32, 0x0f, 0x86, 0x64,
91 0x24, 0x06, 0x66, 0x71, 0x19, 0x98, 0xcc, 0x43, 0x33, 0x31, 0x99, 0x00, 0x66, 0x88, 0x33, 0x30,
92 0xcc, 0xd1, 0x0e, 0x98, 0x0b, 0x33, 0x34, 0x40, 0x0c, 0xd1, 0x46, 0x66, 0x19, 0x9a, 0x28, 0xcc,
93 0x49, 0x80, 0xb3, 0x33, 0x45, 0x00, 0xcd, 0x18, 0x66, 0x61, 0x99, 0xa3, 0x0c, 0xc8, 0x98, 0x0b,
94 0x33, 0x34, 0x60, 0x4c, 0xd1, 0x06, 0x68, 0xa5, 0x20, 0x62, 0x66, 0x88, 0x33, 0x46, 0x28, 0x05,
95 0x0f, 0x32, 0x0c, 0x4c, 0xd1, 0x46, 0x68, 0xc5, 0x00, 0x41, 0xe4, 0x8f, 0xc8, 0x85, 0x5e, 0x02,
96 0x7c, 0xc9, 0x26, 0x81, 0x83, 0xb0, 0x9d, 0xc2, 0xde, 0x9c, 0x78, 0xac, 0xd6, 0x68, 0xb4, 0x0e,
97 0x71, 0xdb, 0xb2, 0x49, 0x38, 0x6e, 0x02, 0x2a, 0x2c, 0x41, 0x2b, 0x10, 0x98, 0x82, 0x49, 0x03,
98 0x14, 0xf4, 0xe1, 0x97, 0x00,
99];
100const RAR3_AUDIO_FILTER_BYTECODE: &[u8] = &[
101 0x47, 0x01, 0x9a, 0x41, 0x95, 0xe5, 0x72, 0x0d, 0xc2, 0x64, 0x82, 0x74, 0x93, 0x24, 0xb1, 0x40,
102 0x06, 0xd8, 0x38, 0x44, 0x00, 0xa8, 0x01, 0x34, 0x11, 0xdc, 0xa1, 0xba, 0x01, 0x99, 0x0c, 0xc4,
103 0x03, 0x31, 0x19, 0xa4, 0x06, 0x66, 0x22, 0x60, 0x4d, 0x9a, 0x40, 0x0d, 0x66, 0x8e, 0x60, 0xd0,
104 0x30, 0x40, 0x18, 0x26, 0xc1, 0xc8, 0xf6, 0xe6, 0x26, 0x13, 0x78, 0x92, 0x08, 0xe8, 0x50, 0xbc,
105 0x5a, 0x07, 0xc6, 0xe9, 0xf5, 0x20, 0xa9, 0xa0, 0xed, 0x37, 0x33, 0x47, 0x39, 0x66, 0x90, 0x70,
106 0x19, 0xa3, 0x9b, 0xcf, 0x25, 0x83, 0x80, 0xc1, 0xbd, 0x30, 0x16, 0x6e, 0x23, 0x34, 0x93, 0x81,
107 0x16, 0x09, 0xb0, 0x50, 0x18, 0x3b, 0x4d, 0xc8, 0x4c, 0x05, 0x9b, 0x88, 0xc5, 0x28, 0xe0, 0x76,
108 0x93, 0x90, 0x98, 0x0b, 0x37, 0x11, 0x8a, 0x59, 0xc4, 0x80, 0x42, 0x48, 0x43, 0xa9, 0x47, 0xee,
109 0x43, 0x34, 0x60, 0x47, 0xd4, 0x4a, 0x0d, 0xbb, 0xd3, 0x59, 0xa4, 0x86, 0xee, 0x05, 0x09, 0x40,
110 0x26, 0xc9, 0x34, 0x24, 0x76, 0xa0, 0x30, 0x6a, 0x20, 0xea, 0x02, 0x20, 0x04, 0xa0, 0x41, 0x50,
111 0x9e, 0x50, 0x3f, 0xe6, 0xe1, 0x28, 0x94, 0x46, 0x01, 0xbd, 0x8b, 0x40, 0xf0, 0x68, 0x11, 0x36,
112 0xc9, 0xa1, 0x92, 0x38, 0x11, 0x41, 0x9c, 0xa8, 0x95, 0x10, 0xee, 0x50, 0x66, 0x2b, 0x00, 0x20,
113 0x95, 0x11, 0x04, 0x02, 0x62, 0xac, 0x66, 0x8c, 0x6a, 0xca, 0x26, 0x40, 0xb2, 0x67, 0x1b, 0x4b,
114 0x26, 0xcc, 0x64, 0x8a, 0x62, 0x71, 0xa2, 0xb8,
115];
116
117pub fn unpack29_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
118 let mut decoder = Unpack29::new();
119 decoder.decode_non_solid_member(input, output_size)
120}
121
122pub fn unpack29_encode_literals(input: &[u8]) -> Result<Vec<u8>> {
123 encode_member(input, &[])
124}
125
126pub fn unpack29_encode_literals_with_options(
127 input: &[u8],
128 options: EncodeOptions,
129) -> Result<Vec<u8>> {
130 encode_member_with_options(input, &[], options)
131}
132
133pub fn unpack29_encode_ppmd_literals(input: &[u8]) -> Result<Vec<u8>> {
134 encode_ppmd_member(input, false, &[])
135}
136
137pub fn unpack29_encode_ppmd(input: &[u8]) -> Result<Vec<u8>> {
138 encode_ppmd_member(input, true, &[])
139}
140
141pub fn unpack29_encode_ppmd_with_filter(input: &[u8], filter: Rar29FilterSpec) -> Result<Vec<u8>> {
142 encode_ppmd_filtered_member(input, filter, true)
143}
144
145pub fn unpack29_encode_ppmd_literals_with_filter(
146 input: &[u8],
147 filter: Rar29FilterSpec,
148) -> Result<Vec<u8>> {
149 encode_ppmd_filtered_member(input, filter, false)
150}
151
152fn encode_ppmd_filtered_member(
153 input: &[u8],
154 filter: Rar29FilterSpec,
155 lz_escapes: bool,
156) -> Result<Vec<u8>> {
157 let filters = split_large_filter(input.len(), filter)?;
158 let filtered = filtered_members(input, &filters)?;
159 let records = encoded_filter_records(&filtered.records)?;
160 encode_ppmd_member(&filtered.data, lz_escapes, &records)
161}
162
163fn filtered_members(input: &[u8], filters: &[Rar29FilterSpec]) -> Result<FilteredMembers> {
164 let mut data = input.to_vec();
165 let mut records = Vec::with_capacity(filters.len());
166 for filter in filters {
167 let filtered = filtered_member(input, filter)?;
168 let range = filtered.block_start..filtered.block_start + filtered.block_size;
169 data[range.clone()].copy_from_slice(&filtered.data[range]);
170 records.push(OwnedVmFilterRecord {
171 block_start: filtered.block_start,
172 block_size: filtered.block_size,
173 init_regs: filtered.init_regs,
174 code: filtered.code,
175 });
176 }
177 Ok(FilteredMembers { data, records })
178}
179
180struct FilteredMembers {
181 data: Vec<u8>,
182 records: Vec<OwnedVmFilterRecord>,
183}
184
185fn split_large_filter(input_len: usize, filter: Rar29FilterSpec) -> Result<Vec<Rar29FilterSpec>> {
186 let range = filter.range.clone().unwrap_or(0..input_len);
187 if range.start >= range.end || range.end > input_len {
188 return Err(Error::InvalidData("RAR 2.9 VM filter range is invalid"));
189 }
190
191 let chunk_size = match filter.kind {
192 Rar29FilterKind::Delta { channels } => {
193 if channels == 0 || channels > MAX_VM_DELTA_FILTER_BLOCK_SIZE {
194 return Err(Error::InvalidData(
195 "RAR 2.9 VM filter channel count is invalid",
196 ));
197 }
198 MAX_VM_DELTA_FILTER_BLOCK_SIZE - (MAX_VM_DELTA_FILTER_BLOCK_SIZE % channels)
199 }
200 Rar29FilterKind::Audio { channels } => {
201 if channels == 0 || channels > MAX_VM_AUDIO_FILTER_BLOCK_SIZE {
202 return Err(Error::InvalidData(
203 "RAR 2.9 VM filter channel count is invalid",
204 ));
205 }
206 MAX_VM_AUDIO_FILTER_BLOCK_SIZE - (MAX_VM_AUDIO_FILTER_BLOCK_SIZE % channels)
207 }
208 Rar29FilterKind::Rgb { width, .. } => {
209 if width == 0 || width > MAX_VM_FILTER_BLOCK_SIZE {
210 return Err(Error::InvalidData(
211 "RAR 2.9 RGB filter scanline width is invalid",
212 ));
213 }
214 MAX_VM_FILTER_BLOCK_SIZE - (MAX_VM_FILTER_BLOCK_SIZE % width)
215 }
216 Rar29FilterKind::E8 | Rar29FilterKind::E8E9 | Rar29FilterKind::Itanium => {
217 MAX_VM_FILTER_BLOCK_SIZE
218 }
219 };
220 if range.len() <= chunk_size {
221 return Ok(vec![filter]);
222 }
223 if chunk_size == 0 {
224 return Err(Error::InvalidData(
225 "RAR 2.9 VM filter chunk size is invalid",
226 ));
227 }
228
229 let mut filters = Vec::new();
230 let mut start = range.start;
231 while start < range.end {
232 let end = (start + chunk_size).min(range.end);
233 filters.push(Rar29FilterSpec::range(filter.kind, start..end));
234 start = end;
235 }
236 Ok(filters)
237}
238
239struct OwnedVmFilterRecord {
240 block_start: usize,
241 block_size: usize,
242 init_regs: Vec<(usize, u32)>,
243 code: &'static [u8],
244}
245
246fn encode_ppmd_member(
247 input: &[u8],
248 lz_escapes: bool,
249 initial_filters: &[Vec<u8>],
250) -> Result<Vec<u8>> {
251 encode_ppmd_block(input, lz_escapes, initial_filters)
252}
253
254fn encode_ppmd_block(
255 input: &[u8],
256 lz_escapes: bool,
257 initial_filters: &[Vec<u8>],
258) -> Result<Vec<u8>> {
259 const PPMD_ORDER: usize = 8;
260 const PPMD_DICTIONARY_MB: u8 = 25;
261 const PPMD_ESC: u8 = 2;
262
263 let mut out = Vec::new();
264 out.push(0x80 | 0x20 | ((PPMD_ORDER as u8) - 1));
265 out.push(PPMD_DICTIONARY_MB - 1);
266 let mut encoder = PpmdEncoder::new(PPMD_ORDER, PPMD_ESC, usize::from(PPMD_DICTIONARY_MB))?;
267 for record in initial_filters {
268 encoder.encode_vm_filter_record(record)?;
269 }
270 for token in encode_ppmd_tokens(input, lz_escapes) {
271 match token {
272 PpmdEncodeToken::Literal(byte) => encoder.encode_literal(byte)?,
273 PpmdEncodeToken::RepeatOffsetOne { length } => {
274 encoder.encode_repeat_offset_one(length)?
275 }
276 PpmdEncodeToken::Match { offset, length } => encoder.encode_match(offset, length)?,
277 }
278 }
279 out.extend_from_slice(&encoder.finish()?);
280 Ok(out)
281}
282
283#[derive(Debug, Clone, Copy, PartialEq, Eq)]
284enum PpmdEncodeToken {
285 Literal(u8),
286 RepeatOffsetOne { length: usize },
287 Match { offset: usize, length: usize },
288}
289
290#[derive(Debug, Clone, PartialEq, Eq)]
291pub struct Rar29FilterSpec {
292 pub kind: Rar29FilterKind,
293 pub range: Option<Range<usize>>,
294}
295
296impl Rar29FilterSpec {
297 pub fn whole(kind: Rar29FilterKind) -> Self {
298 Self { kind, range: None }
299 }
300
301 pub fn range(kind: Rar29FilterKind, range: Range<usize>) -> Self {
302 Self {
303 kind,
304 range: Some(range),
305 }
306 }
307}
308
309#[derive(Debug, Clone, Copy, PartialEq, Eq)]
310pub enum Rar29FilterKind {
311 E8,
312 E8E9,
313 Delta { channels: usize },
314 Itanium,
315 Rgb { width: usize, pos_r: usize },
316 Audio { channels: usize },
317}
318
319struct FilteredMember {
320 data: Vec<u8>,
321 block_start: usize,
322 block_size: usize,
323 init_regs: Vec<(usize, u32)>,
324 code: &'static [u8],
325}
326
327fn filtered_member(input: &[u8], filter: &Rar29FilterSpec) -> Result<FilteredMember> {
328 let range = filter.range.clone().unwrap_or(0..input.len());
329 if range.start >= range.end || range.end > input.len() {
330 return Err(Error::InvalidData("RAR 2.9 VM filter range is invalid"));
331 }
332 let mut filtered = input.to_vec();
333 let (init_regs, code): (Vec<(usize, u32)>, &'static [u8]) = match filter.kind {
334 Rar29FilterKind::E8 => {
335 filters::encode_in_place(
336 FilterOp::E8,
337 &mut filtered[range.clone()],
338 range.start as u32,
339 rar29_delta_messages(),
340 )?;
341 (Vec::new(), RAR3_E8_FILTER_BYTECODE)
342 }
343 Rar29FilterKind::E8E9 => {
344 filters::encode_in_place(
345 FilterOp::E8E9,
346 &mut filtered[range.clone()],
347 range.start as u32,
348 rar29_delta_messages(),
349 )?;
350 (Vec::new(), RAR3_E8E9_FILTER_BYTECODE)
351 }
352 Rar29FilterKind::Delta { channels } => {
353 filters::encode_in_place(
354 FilterOp::Delta { channels },
355 &mut filtered[range.clone()],
356 0,
357 rar29_delta_messages(),
358 )?;
359 (vec![(0, channels as u32)], RAR3_DELTA_FILTER_BYTECODE)
360 }
361 Rar29FilterKind::Itanium => {
362 itanium_encode(&mut filtered[range.clone()], range.start as u32);
363 (Vec::new(), RAR3_ITANIUM_FILTER_BYTECODE)
364 }
365 Rar29FilterKind::Rgb { width, pos_r } => {
366 filtered[range.clone()].copy_from_slice(&rgb_encode(
367 &input[range.clone()],
368 width,
369 pos_r,
370 )?);
371 let init_regs = if pos_r == 0 {
372 vec![(0, width as u32 + 3)]
373 } else {
374 vec![(0, width as u32 + 3), (1, pos_r as u32)]
375 };
376 (init_regs, RAR3_RGB_FILTER_BYTECODE)
377 }
378 Rar29FilterKind::Audio { channels } => {
379 filtered[range.clone()]
380 .copy_from_slice(&audio_encode(&input[range.clone()], channels)?);
381 (vec![(0, channels as u32)], RAR3_AUDIO_FILTER_BYTECODE)
382 }
383 };
384 Ok(FilteredMember {
385 data: filtered,
386 block_start: range.start,
387 block_size: range.end - range.start,
388 init_regs,
389 code,
390 })
391}
392
393fn rar29_delta_messages() -> DeltaErrorMessages {
394 DeltaErrorMessages {
395 invalid_channels: "RAR 2.9 DELTA filter channel count is invalid",
396 zero_channels: "RAR 2.9 DELTA filter has zero channels",
397 truncated_source: "RAR 2.9 DELTA filter source is truncated",
398 }
399}
400
401#[derive(Debug, Clone, Copy, PartialEq, Eq)]
402#[non_exhaustive]
403pub struct EncodeOptions {
404 pub max_match_candidates: usize,
405 pub lazy_matching: bool,
406 pub lazy_lookahead: usize,
407 pub max_match_distance: usize,
408 pub block_size: Option<usize>,
409}
410
411impl EncodeOptions {
412 pub const fn new(max_match_candidates: usize) -> Self {
413 Self {
414 max_match_candidates,
415 lazy_matching: false,
416 lazy_lookahead: 1,
417 max_match_distance: MAX_ENCODER_MATCH_OFFSET,
418 block_size: None,
419 }
420 }
421
422 pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
423 self.lazy_matching = enabled;
424 self
425 }
426
427 pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
428 self.lazy_lookahead = bytes;
429 self
430 }
431
432 pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
433 self.max_match_distance = distance;
434 self
435 }
436
437 pub const fn with_block_size(mut self, bytes: usize) -> Self {
438 self.block_size = Some(bytes);
439 self
440 }
441}
442
443impl Default for EncodeOptions {
444 fn default() -> Self {
445 Self::new(MAX_MATCH_CANDIDATES)
446 }
447}
448
449#[derive(Debug, Clone, Default)]
450pub struct Unpack29Encoder {
451 history: Vec<u8>,
452 options: EncodeOptions,
453}
454
455impl Unpack29Encoder {
456 pub fn new() -> Self {
457 Self::default()
458 }
459
460 pub fn with_options(options: EncodeOptions) -> Self {
461 Self {
462 history: Vec::new(),
463 options,
464 }
465 }
466
467 pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
468 let packed = encode_member_with_options(input, &self.history, self.options)?;
469 self.remember(input);
470 Ok(packed)
471 }
472
473 pub fn encode_member_with_filter(
474 &mut self,
475 input: &[u8],
476 filter: Rar29FilterSpec,
477 ) -> Result<Vec<u8>> {
478 let filters = split_large_filter(input.len(), filter)?;
479 let filtered = filtered_members(input, &filters)?;
480 let records = encoded_filter_records(&filtered.records)?;
481 let packed = encode_member_with_initial_filters(
482 &filtered.data,
483 &self.history,
484 &records,
485 self.options,
486 )?;
487 self.remember(input);
488 Ok(packed)
489 }
490
491 pub fn encode_member_with_filters(
492 &mut self,
493 input: &[u8],
494 filters: &[Rar29FilterSpec],
495 ) -> Result<Vec<u8>> {
496 let mut split_filters = Vec::new();
497 for filter in filters {
498 split_filters.extend(split_large_filter(input.len(), filter.clone())?);
499 }
500 let filtered = filtered_members(input, &split_filters)?;
501 let records = encoded_filter_records(&filtered.records)?;
502 let packed = encode_member_with_initial_filters(
503 &filtered.data,
504 &self.history,
505 &records,
506 self.options,
507 )?;
508 self.remember(input);
509 Ok(packed)
510 }
511
512 fn remember(&mut self, input: &[u8]) {
513 self.history.extend_from_slice(input);
514 let keep_from = self.history.len().saturating_sub(MAX_HISTORY);
515 if keep_from != 0 {
516 self.history.drain(..keep_from);
517 }
518 }
519}
520
521fn encode_member(input: &[u8], history: &[u8]) -> Result<Vec<u8>> {
522 encode_member_with_options(input, history, EncodeOptions::default())
523}
524
525fn encode_member_with_options(
526 input: &[u8],
527 history: &[u8],
528 options: EncodeOptions,
529) -> Result<Vec<u8>> {
530 if let Some(block_size) = options.block_size.filter(|&size| size != 0) {
531 if input.len() > block_size {
532 return encode_member_blocks(input, history, options, block_size);
533 }
534 }
535 encode_member_inner(input, history, &[], options)
536}
537
538fn encode_member_blocks(
539 input: &[u8],
540 history: &[u8],
541 mut options: EncodeOptions,
542 block_size: usize,
543) -> Result<Vec<u8>> {
544 options.block_size = None;
545 let mut out = Vec::new();
546 let mut local_history = history[history.len().saturating_sub(MAX_HISTORY)..].to_vec();
547 for chunk in input.chunks(block_size) {
548 out.extend_from_slice(&encode_member_inner(chunk, &local_history, &[], options)?);
549 local_history.extend_from_slice(chunk);
550 let keep_from = local_history.len().saturating_sub(MAX_HISTORY);
551 if keep_from != 0 {
552 local_history.drain(..keep_from);
553 }
554 }
555 Ok(out)
556}
557
558fn encode_member_with_initial_filters(
559 input: &[u8],
560 history: &[u8],
561 filters: &[Vec<u8>],
562 options: EncodeOptions,
563) -> Result<Vec<u8>> {
564 encode_member_inner(input, history, filters, options)
565}
566
567fn encode_member_inner(
568 input: &[u8],
569 history: &[u8],
570 initial_filters: &[Vec<u8>],
571 options: EncodeOptions,
572) -> Result<Vec<u8>> {
573 let tokens = encode_tokens(input, history, options);
574 let mut main_frequencies = vec![0usize; MAIN_COUNT];
575 let mut offset_frequencies = vec![0usize; OFFSET_COUNT];
576 let mut low_offset_frequencies = vec![0usize; LOW_OFFSET_COUNT];
577 let mut length_frequencies = vec![0usize; LENGTH_COUNT];
578 main_frequencies[257] += initial_filters.len();
579 let mut match_state = EncoderMatchState::default();
580 for token in &tokens {
581 match *token {
582 EncodeToken::Literal(byte) => {
583 main_frequencies[byte as usize] += 1;
584 }
585 EncodeToken::Match { length, offset } => {
586 match match_state.encode_match(length, offset)? {
587 EncodedMatch::LastLengthRepeat => {
588 main_frequencies[258] += 1;
589 }
590 EncodedMatch::RepeatOffset {
591 index, length_slot, ..
592 } => {
593 main_frequencies[259 + index] += 1;
594 length_frequencies[length_slot] += 1;
595 }
596 EncodedMatch::Fresh {
597 length_slot,
598 offset_slot,
599 offset_extra,
600 ..
601 } => {
602 main_frequencies[271 + length_slot] += 1;
603 offset_frequencies[offset_slot] += 1;
604 if offset_slot > 9 {
605 low_offset_frequencies[offset_extra & 0x0f] += 1;
606 }
607 }
608 }
609 match_state.remember(length, offset);
610 }
611 }
612 }
613 main_frequencies[256] += 1;
614
615 let mut table_lengths = [0u8; TABLE_COUNT];
616 if low_offset_frequencies
617 .iter()
618 .all(|&frequency| frequency == 0)
619 {
620 low_offset_frequencies[0] = 1;
621 }
622 let main_lengths = huffman::lengths_for_frequencies(&main_frequencies, 15);
623 let offset_lengths = huffman::lengths_for_frequencies(&offset_frequencies, 15);
624 let low_offset_lengths = huffman::lengths_for_frequencies(&low_offset_frequencies, 15);
625 let length_lengths = huffman::lengths_for_frequencies(&length_frequencies, 15);
626 table_lengths[..MAIN_COUNT].copy_from_slice(&main_lengths);
627 table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT].copy_from_slice(&offset_lengths);
628 table_lengths[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT]
629 .copy_from_slice(&low_offset_lengths);
630 table_lengths[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..].copy_from_slice(&length_lengths);
631
632 let level_tokens = encode_table_level_tokens(&table_lengths);
633 let level_lengths = level_code_lengths(&level_tokens);
634 let level_codes = canonical_codes(&level_lengths)?;
635 let main_codes = canonical_codes(&table_lengths[..MAIN_COUNT])?;
636
637 let mut bits = BitWriter::default();
638 bits.write_bit(false); bits.write_bit(false); for &len in &level_lengths {
641 bits.write_bits(len as u32, 4);
642 }
643 for token in level_tokens {
644 let code = level_codes[token.symbol].ok_or(Error::InvalidData(
645 "RAR 2.9 encoder missing level Huffman code",
646 ))?;
647 bits.write_bits(code.code as u32, code.len);
648 if token.extra_bits != 0 {
649 bits.write_bits(token.extra_value as u32, token.extra_bits);
650 }
651 }
652 let offset_codes = canonical_codes(&table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
653 let low_offset_codes = canonical_codes(
654 &table_lengths[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT],
655 )?;
656 let length_codes =
657 canonical_codes(&table_lengths[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..])?;
658 for filter in initial_filters {
659 let code = main_codes[257].ok_or(Error::InvalidData(
660 "RAR 2.9 encoder missing VM filter Huffman code",
661 ))?;
662 bits.write_bits(code.code as u32, code.len);
663 for &byte in filter {
664 bits.write_bits(u32::from(byte), 8);
665 }
666 }
667 let mut match_state = EncoderMatchState::default();
668 for token in tokens {
669 match token {
670 EncodeToken::Literal(byte) => {
671 let code = main_codes[byte as usize].ok_or(Error::InvalidData(
672 "RAR 2.9 encoder missing literal Huffman code",
673 ))?;
674 bits.write_bits(code.code as u32, code.len);
675 }
676 EncodeToken::Match { length, offset } => {
677 match match_state.encode_match(length, offset)? {
678 EncodedMatch::LastLengthRepeat => {
679 let code = main_codes[258].ok_or(Error::InvalidData(
680 "RAR 2.9 encoder missing last-length repeat Huffman code",
681 ))?;
682 bits.write_bits(code.code as u32, code.len);
683 }
684 EncodedMatch::RepeatOffset {
685 index,
686 length_slot,
687 length_extra,
688 } => {
689 let code = main_codes[259 + index].ok_or(Error::InvalidData(
690 "RAR 2.9 encoder missing repeat-offset Huffman code",
691 ))?;
692 bits.write_bits(code.code as u32, code.len);
693 let length_code = length_codes[length_slot].ok_or(Error::InvalidData(
694 "RAR 2.9 encoder missing repeat length Huffman code",
695 ))?;
696 bits.write_bits(length_code.code as u32, length_code.len);
697 if LENGTH_BITS[length_slot] != 0 {
698 bits.write_bits(length_extra as u32, LENGTH_BITS[length_slot]);
699 }
700 }
701 EncodedMatch::Fresh {
702 length_slot,
703 length_extra,
704 offset_slot,
705 offset_extra,
706 } => {
707 let code = main_codes[271 + length_slot].ok_or(Error::InvalidData(
708 "RAR 2.9 encoder missing match Huffman code",
709 ))?;
710 bits.write_bits(code.code as u32, code.len);
711 if LENGTH_BITS[length_slot] != 0 {
712 bits.write_bits(length_extra as u32, LENGTH_BITS[length_slot]);
713 }
714 let offset = offset_codes[offset_slot].ok_or(Error::InvalidData(
715 "RAR 2.9 encoder missing offset Huffman code",
716 ))?;
717 bits.write_bits(offset.code as u32, offset.len);
718 if offset_slot > 9 {
719 let offset_bits = OFFSET_BITS[offset_slot];
720 if offset_bits > 4 {
721 bits.write_bits((offset_extra >> 4) as u32, offset_bits - 4);
722 }
723 let low_offset =
724 low_offset_codes[offset_extra & 0x0f].ok_or(Error::InvalidData(
725 "RAR 2.9 encoder missing low-offset Huffman code",
726 ))?;
727 bits.write_bits(low_offset.code as u32, low_offset.len);
728 } else if OFFSET_BITS[offset_slot] != 0 {
729 bits.write_bits(offset_extra as u32, OFFSET_BITS[offset_slot]);
730 }
731 }
732 }
733 match_state.remember(length, offset);
734 }
735 }
736 }
737 let end = main_codes[256].ok_or(Error::InvalidData(
738 "RAR 2.9 encoder missing end-of-block Huffman code",
739 ))?;
740 bits.write_bits(end.code as u32, end.len);
741 bits.write_bit(true); Ok(bits.finish())
743}
744
745fn encoded_filter_records(filters: &[OwnedVmFilterRecord]) -> Result<Vec<Vec<u8>>> {
746 let mut programs: Vec<&'static [u8]> = Vec::new();
747 let mut records = Vec::with_capacity(filters.len());
748 for filter in filters {
749 let existing = (filter.code != RAR3_AUDIO_FILTER_BYTECODE)
750 .then(|| programs.iter().position(|&code| code == filter.code))
751 .flatten();
752 let (program_selector, include_code) = match existing {
753 Some(index) => (
754 u32::try_from(index + 1)
755 .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?,
756 false,
757 ),
758 None => {
759 let selector = if programs.is_empty() {
760 0
761 } else {
762 u32::try_from(programs.len() + 1)
763 .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?
764 };
765 programs.push(filter.code);
766 (selector, true)
767 }
768 };
769 records.push(encode_vm_filter_record_inner(
770 VmFilterRecord {
771 block_start: filter.block_start,
772 block_size: filter.block_size,
773 init_regs: &filter.init_regs,
774 code: filter.code,
775 },
776 program_selector,
777 include_code,
778 )?);
779 }
780 Ok(records)
781}
782
783#[derive(Debug, Clone, Copy)]
784struct VmFilterRecord<'a> {
785 block_start: usize,
786 block_size: usize,
787 init_regs: &'a [(usize, u32)],
788 code: &'a [u8],
789}
790
791fn encode_vm_filter_record_inner(
792 record: VmFilterRecord<'_>,
793 program_selector: u32,
794 include_code: bool,
795) -> Result<Vec<u8>> {
796 if record.block_size == 0 {
797 return Err(Error::InvalidData("RAR 2.9 VM filter block is empty"));
798 }
799 if include_code && record.code.is_empty() {
800 return Err(Error::InvalidData("RAR 2.9 VM filter bytecode is empty"));
801 }
802
803 let mut body = BitWriter::default();
804 body.write_encoded_u32(program_selector);
805 body.write_encoded_u32(
806 u32::try_from(record.block_start)
807 .map_err(|_| Error::InvalidData("RAR 2.9 VM block start overflows"))?,
808 );
809 body.write_encoded_u32(
810 u32::try_from(record.block_size)
811 .map_err(|_| Error::InvalidData("RAR 2.9 VM block size overflows"))?,
812 );
813 if !record.init_regs.is_empty() {
814 let mut mask = 0u32;
815 for &(index, _) in record.init_regs {
816 if index >= 7 {
817 return Err(Error::InvalidData(
818 "RAR 2.9 VM init register index is invalid",
819 ));
820 }
821 mask |= 1 << index;
822 }
823 body.write_bits(mask, 7);
824 for index in 0..7 {
825 if let Some((_, value)) = record.init_regs.iter().find(|(reg, _)| *reg == index) {
826 body.write_encoded_u32(*value);
827 }
828 }
829 }
830 if include_code {
831 body.write_encoded_u32(
832 u32::try_from(record.code.len())
833 .map_err(|_| Error::InvalidData("RAR 2.9 VM code size overflows"))?,
834 );
835 for &byte in record.code {
836 body.write_bits(u32::from(byte), 8);
837 }
838 }
839 let body = body.finish();
840
841 let mut out = Vec::new();
842 let mut first = 0x80 | 0x20;
843 if !record.init_regs.is_empty() {
844 first |= 0x10;
845 }
846 match body.len() {
847 1..=6 => first |= (body.len() as u8) - 1,
848 7..=262 => {
849 first |= 6;
850 out.push((body.len() - 7) as u8);
851 }
852 263..=65535 => {
853 first |= 7;
854 out.extend_from_slice(&(body.len() as u16).to_be_bytes());
855 }
856 _ => return Err(Error::InvalidData("RAR 2.9 VM filter record is too large")),
857 }
858 out.insert(0, first);
859 out.extend_from_slice(&body);
860 Ok(out)
861}
862
863fn rgb_encode(data: &[u8], width: usize, pos_r: usize) -> Result<Vec<u8>> {
864 if data.len() < 3 || width == 0 || !width.is_multiple_of(3) || width > data.len() || pos_r > 2 {
865 return Err(Error::InvalidData(
866 "RAR 2.9 RGB filter parameters are invalid",
867 ));
868 }
869 let mut work = data.to_vec();
870 for i in (pos_r..work.len().saturating_sub(2)).step_by(3) {
871 let green = work[i + 1];
872 work[i] = work[i].wrapping_sub(green);
873 work[i + 2] = work[i + 2].wrapping_sub(green);
874 }
875
876 let mut out = Vec::with_capacity(data.len());
877 for channel in 0..3 {
878 let mut prev = 0u8;
879 let mut i = channel;
880 while i < work.len() {
881 let predicted = if i >= width + 3 {
882 rgb_predict(prev, work[i - width], work[i - width - 3])
883 } else {
884 prev
885 };
886 let byte = work[i];
887 out.push(predicted.wrapping_sub(byte));
888 prev = byte;
889 i += 3;
890 }
891 }
892 Ok(out)
893}
894
895fn audio_encode(data: &[u8], channels: usize) -> Result<Vec<u8>> {
896 if channels == 0 || channels > 32 {
897 return Err(Error::InvalidData(
898 "RAR 2.9 AUDIO filter channel count is invalid",
899 ));
900 }
901 let mut out = Vec::with_capacity(data.len());
902 for channel in 0..channels {
903 let mut prev_byte = 0u32;
904 let mut prev_delta = 0i32;
905 let mut d1 = 0i32;
906 let mut d2 = 0i32;
907 let mut k1 = 0i32;
908 let mut k2 = 0i32;
909 let mut k3 = 0i32;
910 let mut dif = [0u32; 7];
911 let mut byte_count = 0usize;
912 let mut i = channel;
913 while i < data.len() {
914 let d3 = d2;
915 d2 = prev_delta - d1;
916 d1 = prev_delta;
917 let predicted = ((8 * prev_byte as i32 + k1 * d1 + k2 * d2 + k3 * d3) >> 3) & 0xff;
918 let decoded = data[i];
919 let encoded = (predicted as u8).wrapping_sub(decoded);
920 out.push(encoded);
921 prev_delta = decoded.wrapping_sub(prev_byte as u8) as i8 as i32;
922 prev_byte = decoded as u32;
923 let d = (encoded as i8 as i32) << 3;
924 dif[0] += d.unsigned_abs();
925 dif[1] += (d - d1).unsigned_abs();
926 dif[2] += (d + d1).unsigned_abs();
927 dif[3] += (d - d2).unsigned_abs();
928 dif[4] += (d + d2).unsigned_abs();
929 dif[5] += (d - d3).unsigned_abs();
930 dif[6] += (d + d3).unsigned_abs();
931 if byte_count & 0x1f == 0 {
932 let mut min = dif[0];
933 let mut min_index = 0usize;
934 dif[0] = 0;
935 for (index, value) in dif.iter_mut().enumerate().skip(1) {
936 if *value < min {
937 min = *value;
938 min_index = index;
939 }
940 *value = 0;
941 }
942 match min_index {
943 1 if k1 >= -16 => k1 -= 1,
944 2 if k1 < 16 => k1 += 1,
945 3 if k2 >= -16 => k2 -= 1,
946 4 if k2 < 16 => k2 += 1,
947 5 if k3 >= -16 => k3 -= 1,
948 6 if k3 < 16 => k3 += 1,
949 _ => {}
950 }
951 }
952 byte_count += 1;
953 i += channels;
954 }
955 }
956 Ok(out)
957}
958
959fn itanium_encode(data: &mut [u8], file_offset: u32) {
960 if data.len() <= 21 {
961 return;
962 }
963 let base_offset = file_offset >> 4;
964 let block_count = (data.len() - 21).div_ceil(16);
965 for block in 0..block_count {
966 let pos = block * 16;
967 let file_offset = base_offset.wrapping_add(block as u32);
968 let mut mask = (0x334b_0000u32 >> (data[pos] & 0x1e)) & 3;
969 if mask != 0 {
970 mask += 1;
971 while mask <= 4 {
972 let p = pos + (mask as usize * 5 - 8);
973 if ((data[p + 3] >> mask) & 15) == 5 {
974 let raw = u32::from_le_bytes([data[p], data[p + 1], data[p + 2], data[p + 3]]);
975 let mut value = raw >> mask;
976 value = value.wrapping_add(file_offset) & 0x000f_ffff;
977 let raw = (raw & !(0x000f_ffff << mask)) | (value << mask);
978 data[p..p + 4].copy_from_slice(&raw.to_le_bytes());
979 }
980 mask += 1;
981 }
982 }
983 }
984}
985
986#[derive(Debug, Clone, Copy)]
987enum EncodeToken {
988 Literal(u8),
989 Match { length: usize, offset: usize },
990}
991
992#[derive(Debug, Clone, Copy, Default)]
993struct EncoderMatchState {
994 old_offsets: [usize; 4],
995 last_offset: usize,
996 last_length: usize,
997}
998
999#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1000enum EncodedMatch {
1001 LastLengthRepeat,
1002 RepeatOffset {
1003 index: usize,
1004 length_slot: usize,
1005 length_extra: usize,
1006 },
1007 Fresh {
1008 length_slot: usize,
1009 length_extra: usize,
1010 offset_slot: usize,
1011 offset_extra: usize,
1012 },
1013}
1014
1015impl EncoderMatchState {
1016 fn encode_match(&self, length: usize, offset: usize) -> Result<EncodedMatch> {
1017 if offset == self.last_offset && length == self.last_length && self.last_length != 0 {
1018 return Ok(EncodedMatch::LastLengthRepeat);
1019 }
1020 if let Some(index) = self
1021 .old_offsets
1022 .iter()
1023 .position(|&old_offset| old_offset == offset && old_offset != 0)
1024 {
1025 let (length_slot, length_extra) = length_slot_for_repeat_match(length)?;
1026 return Ok(EncodedMatch::RepeatOffset {
1027 index,
1028 length_slot,
1029 length_extra,
1030 });
1031 }
1032 let encoded_length =
1033 length
1034 .checked_sub(match_length_adjustment(offset))
1035 .ok_or(Error::InvalidData(
1036 "RAR 2.9 adjusted match length underflows",
1037 ))?;
1038 let (length_slot, length_extra) = length_slot_for_match(encoded_length)?;
1039 let (offset_slot, offset_extra) = offset_slot_for_match(offset)?;
1040 Ok(EncodedMatch::Fresh {
1041 length_slot,
1042 length_extra,
1043 offset_slot,
1044 offset_extra,
1045 })
1046 }
1047
1048 fn remember(&mut self, length: usize, offset: usize) {
1049 if offset == self.last_offset && length == self.last_length && self.last_length != 0 {
1050 return;
1051 }
1052 if let Some(index) = self
1053 .old_offsets
1054 .iter()
1055 .position(|&old_offset| old_offset == offset)
1056 {
1057 self.old_offsets[..=index].rotate_right(1);
1058 } else {
1059 self.old_offsets.rotate_right(1);
1060 self.old_offsets[0] = offset;
1061 }
1062 self.last_offset = offset;
1063 self.last_length = length;
1064 }
1065}
1066
1067fn encode_tokens(input: &[u8], history: &[u8], options: EncodeOptions) -> Vec<EncodeToken> {
1068 let mut tokens = Vec::new();
1069 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1070 let history = &history[history.len().saturating_sub(options.max_match_distance)..];
1071 let mut combined = Vec::with_capacity(history.len() + input.len());
1072 combined.extend_from_slice(history);
1073 combined.extend_from_slice(input);
1074 for history_pos in 0..history.len().saturating_sub(2) {
1075 insert_match_position(&combined, history_pos, &mut buckets);
1076 }
1077
1078 let mut pos = history.len();
1079 let end = combined.len();
1080 let mut state = EncoderMatchState::default();
1081 while pos < end {
1082 if let Some(candidate) = best_match(&combined, pos, end, &buckets, options, &state) {
1083 if should_lazy_emit_literal(&combined, pos, end, &buckets, options, &state, candidate) {
1084 tokens.push(EncodeToken::Literal(combined[pos]));
1085 insert_match_position(&combined, pos, &mut buckets);
1086 pos += 1;
1087 continue;
1088 }
1089 let MatchCandidate { length, offset, .. } = candidate;
1090 tokens.push(EncodeToken::Match { length, offset });
1091 state.remember(length, offset);
1092 for history_pos in pos..pos + length {
1093 insert_match_position(&combined, history_pos, &mut buckets);
1094 }
1095 pos += length;
1096 } else {
1097 tokens.push(EncodeToken::Literal(combined[pos]));
1098 insert_match_position(&combined, pos, &mut buckets);
1099 pos += 1;
1100 }
1101 }
1102 tokens
1103}
1104
1105fn should_lazy_emit_literal(
1106 input: &[u8],
1107 pos: usize,
1108 end: usize,
1109 buckets: &[Vec<usize>],
1110 options: EncodeOptions,
1111 state: &EncoderMatchState,
1112 current: MatchCandidate,
1113) -> bool {
1114 if !options.lazy_matching || pos + 1 >= end {
1115 return false;
1116 }
1117 let lookahead = options.lazy_lookahead.max(1);
1118 (1..=lookahead)
1119 .take_while(|offset| pos + offset < end)
1120 .any(|offset| {
1121 best_match(input, pos + offset, end, buckets, options, state).is_some_and(|next| {
1122 let skipped_literal_score = offset as isize * 8;
1123 next.score > current.score + skipped_literal_score
1124 })
1125 })
1126}
1127
1128#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1129struct MatchCandidate {
1130 length: usize,
1131 offset: usize,
1132 score: isize,
1133}
1134
1135fn encode_ppmd_tokens(input: &[u8], lz_escapes: bool) -> Vec<PpmdEncodeToken> {
1136 if !lz_escapes {
1137 return input
1138 .iter()
1139 .copied()
1140 .map(PpmdEncodeToken::Literal)
1141 .collect();
1142 }
1143
1144 let mut tokens = Vec::new();
1145 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1146 let mut pos = 0usize;
1147 while pos < input.len() {
1148 if let Some(length) = ppmd_offset_one_repeat(input, pos) {
1149 tokens.push(PpmdEncodeToken::RepeatOffsetOne { length });
1150 for history_pos in pos..pos + length {
1151 insert_match_position(input, history_pos, &mut buckets);
1152 }
1153 pos += length;
1154 continue;
1155 }
1156
1157 if let Some((length, offset)) = best_ppmd_match(input, pos, &buckets) {
1158 tokens.push(PpmdEncodeToken::Match { offset, length });
1159 for history_pos in pos..pos + length {
1160 insert_match_position(input, history_pos, &mut buckets);
1161 }
1162 pos += length;
1163 continue;
1164 }
1165
1166 tokens.push(PpmdEncodeToken::Literal(input[pos]));
1167 insert_match_position(input, pos, &mut buckets);
1168 pos += 1;
1169 }
1170 tokens
1171}
1172
1173fn ppmd_offset_one_repeat(input: &[u8], pos: usize) -> Option<usize> {
1174 if pos == 0 || input[pos] != input[pos - 1] {
1175 return None;
1176 }
1177 let mut length = 0usize;
1178 while pos + length < input.len()
1179 && input[pos + length] == input[pos - 1]
1180 && length < MAX_PPMD_REPEAT_LENGTH
1181 {
1182 length += 1;
1183 }
1184 (length >= 4).then_some(length)
1185}
1186
1187fn best_ppmd_match(input: &[u8], pos: usize, buckets: &[Vec<usize>]) -> Option<(usize, usize)> {
1188 let max_offset = pos.min(0x1000001).min(MAX_ENCODER_MATCH_OFFSET);
1189 let max_length = (input.len() - pos).min(MAX_PPMD_MATCH_LENGTH);
1190 if max_offset < 2 || max_length < MIN_PPMD_MATCH_LENGTH || pos + 2 >= input.len() {
1191 return None;
1192 }
1193 let bucket = &buckets[match_hash(input, pos)];
1194 let mut best = None;
1195 let mut checked = 0usize;
1196 for &candidate in bucket.iter().rev() {
1197 if candidate >= pos {
1198 continue;
1199 }
1200 let offset = pos - candidate;
1201 if offset > max_offset {
1202 break;
1203 }
1204 if offset < 2 {
1205 continue;
1206 }
1207 checked += 1;
1208 let mut length = 0usize;
1209 while length < max_length && input[pos + length] == input[pos + length - offset] {
1210 length += 1;
1211 }
1212 if length >= MIN_PPMD_MATCH_LENGTH
1213 && best.is_none_or(|(best_length, best_offset)| {
1214 length > best_length || (length == best_length && offset < best_offset)
1215 })
1216 {
1217 best = Some((length, offset));
1218 if length == max_length {
1219 break;
1220 }
1221 }
1222 if checked >= MAX_MATCH_CANDIDATES {
1223 break;
1224 }
1225 }
1226 best
1227}
1228
1229fn best_match(
1230 input: &[u8],
1231 pos: usize,
1232 end: usize,
1233 buckets: &[Vec<usize>],
1234 options: EncodeOptions,
1235 state: &EncoderMatchState,
1236) -> Option<MatchCandidate> {
1237 let max_offset = pos.min(options.max_match_distance);
1238 let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
1239 if options.max_match_candidates == 0
1240 || max_offset == 0
1241 || max_length < 4
1242 || pos + 2 >= input.len()
1243 {
1244 return None;
1245 }
1246 let bucket = &buckets[match_hash(input, pos)];
1247 let mut best = None;
1248 let mut checked = 0usize;
1249 for offset in state.old_offsets {
1250 if offset == 0 || offset > max_offset {
1251 continue;
1252 }
1253 let length = match_length(input, pos, offset, max_length);
1254 consider_match_candidate(&mut best, state, length, offset);
1255 }
1256 for &candidate in bucket.iter().rev() {
1257 if candidate >= pos {
1258 continue;
1259 }
1260 let offset = pos - candidate;
1261 if offset > max_offset {
1262 break;
1263 }
1264 checked += 1;
1265 let length = match_length(input, pos, offset, max_length);
1266 consider_match_candidate(&mut best, state, length, offset);
1267 if best.is_some_and(|candidate| candidate.length == max_length) {
1268 break;
1269 }
1270 if checked >= options.max_match_candidates {
1271 break;
1272 }
1273 }
1274 best
1275}
1276
1277fn match_length(input: &[u8], pos: usize, offset: usize, max_length: usize) -> usize {
1278 crate::fast::match_length(input, pos, offset, max_length)
1279}
1280
1281fn consider_match_candidate(
1282 best: &mut Option<MatchCandidate>,
1283 state: &EncoderMatchState,
1284 length: usize,
1285 offset: usize,
1286) {
1287 if length < 4 {
1288 return;
1289 }
1290 let Ok(cost) = estimated_match_cost(state, length, offset) else {
1291 return;
1292 };
1293 let score = (length as isize * 8) - cost as isize;
1294 let candidate = MatchCandidate {
1295 length,
1296 offset,
1297 score,
1298 };
1299 if best.is_none_or(|best| {
1300 candidate.score > best.score
1301 || (candidate.score == best.score
1302 && (candidate.length > best.length
1303 || (candidate.length == best.length && candidate.offset < best.offset)))
1304 }) {
1305 *best = Some(candidate);
1306 }
1307}
1308
1309fn estimated_match_cost(state: &EncoderMatchState, length: usize, offset: usize) -> Result<usize> {
1310 match state.encode_match(length, offset)? {
1311 EncodedMatch::LastLengthRepeat => Ok(2),
1312 EncodedMatch::RepeatOffset { length_slot, .. } => {
1313 Ok(5 + usize::from(LENGTH_BITS[length_slot]))
1314 }
1315 EncodedMatch::Fresh {
1316 length_slot,
1317 offset_slot,
1318 ..
1319 } => {
1320 let low_offset_cost = usize::from(offset_slot > 9) * 4;
1321 Ok(8 + usize::from(LENGTH_BITS[length_slot])
1322 + usize::from(OFFSET_BITS[offset_slot])
1323 + low_offset_cost)
1324 }
1325 }
1326}
1327
1328fn match_length_adjustment(offset: usize) -> usize {
1329 usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
1330}
1331
1332fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
1333 if pos + 2 < input.len() {
1334 buckets[match_hash(input, pos)].push(pos);
1335 }
1336}
1337
1338fn match_hash(input: &[u8], pos: usize) -> usize {
1339 let value =
1340 ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1341 value & (MATCH_HASH_BUCKETS - 1)
1342}
1343
1344fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
1345 if length < 3 {
1346 return Err(Error::InvalidData("RAR 2.9 match length is too short"));
1347 }
1348 let adjusted = length - 3;
1349 for (slot, &base) in LENGTH_BASES.iter().enumerate() {
1350 let extra_bits = LENGTH_BITS[slot];
1351 let max = base
1352 + if extra_bits == 0 {
1353 0
1354 } else {
1355 (1usize << extra_bits) - 1
1356 };
1357 if adjusted >= base && adjusted <= max {
1358 return Ok((slot, adjusted - base));
1359 }
1360 }
1361 Err(Error::InvalidData("RAR 2.9 match length is too long"))
1362}
1363
1364fn length_slot_for_repeat_match(length: usize) -> Result<(usize, usize)> {
1365 if length < 2 {
1366 return Err(Error::InvalidData(
1367 "RAR 2.9 repeat match length is too short",
1368 ));
1369 }
1370 let adjusted = length - 2;
1371 for (slot, &base) in LENGTH_BASES.iter().enumerate() {
1372 let extra_bits = LENGTH_BITS[slot];
1373 let max = base
1374 + if extra_bits == 0 {
1375 0
1376 } else {
1377 (1usize << extra_bits) - 1
1378 };
1379 if adjusted >= base && adjusted <= max {
1380 return Ok((slot, adjusted - base));
1381 }
1382 }
1383 Err(Error::InvalidData(
1384 "RAR 2.9 repeat match length is too long",
1385 ))
1386}
1387
1388fn offset_slot_for_match(offset: usize) -> Result<(usize, usize)> {
1389 if offset == 0 {
1390 return Err(Error::InvalidData("RAR 2.9 match offset is zero"));
1391 }
1392 let adjusted = offset - 1;
1393 for (slot, &base) in OFFSET_BASES.iter().enumerate() {
1394 let extra_bits = OFFSET_BITS[slot];
1395 let max = base
1396 + if extra_bits == 0 {
1397 0
1398 } else {
1399 (1usize << extra_bits) - 1
1400 };
1401 if adjusted >= base && adjusted <= max {
1402 return Ok((slot, adjusted - base));
1403 }
1404 }
1405 Err(Error::InvalidData("RAR 2.9 match offset is too large"))
1406}
1407
1408#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1409struct LevelToken {
1410 symbol: usize,
1411 extra_bits: u8,
1412 extra_value: u8,
1413}
1414
1415impl LevelToken {
1416 const fn plain(symbol: usize) -> Self {
1417 Self {
1418 symbol,
1419 extra_bits: 0,
1420 extra_value: 0,
1421 }
1422 }
1423
1424 const fn repeat_previous_short(count: usize) -> Self {
1425 Self {
1426 symbol: 16,
1427 extra_bits: 3,
1428 extra_value: (count - 3) as u8,
1429 }
1430 }
1431
1432 const fn repeat_previous_long(count: usize) -> Self {
1433 Self {
1434 symbol: 17,
1435 extra_bits: 7,
1436 extra_value: (count - 11) as u8,
1437 }
1438 }
1439
1440 const fn zero_run_short(count: usize) -> Self {
1441 Self {
1442 symbol: 18,
1443 extra_bits: 3,
1444 extra_value: (count - 3) as u8,
1445 }
1446 }
1447
1448 const fn zero_run_long(count: usize) -> Self {
1449 Self {
1450 symbol: 19,
1451 extra_bits: 7,
1452 extra_value: (count - 11) as u8,
1453 }
1454 }
1455}
1456
1457fn encode_table_level_tokens(lengths: &[u8; TABLE_COUNT]) -> Vec<LevelToken> {
1458 encode_level_tokens(lengths)
1459}
1460
1461fn encode_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
1462 let mut tokens = Vec::new();
1463 let mut pos = 0usize;
1464 let mut previous = None;
1465 while pos < lengths.len() {
1466 let value = lengths[pos];
1467 let mut run = 1usize;
1468 while pos + run < lengths.len() && lengths[pos + run] == value {
1469 run += 1;
1470 }
1471
1472 if value == 0 {
1473 emit_zero_level_run(&mut tokens, run);
1474 previous = Some(0);
1475 pos += run;
1476 continue;
1477 }
1478
1479 if previous == Some(value) && run >= 3 {
1480 emit_repeat_level_run(&mut tokens, run);
1481 pos += run;
1482 continue;
1483 }
1484
1485 tokens.push(LevelToken::plain(value as usize));
1486 previous = Some(value);
1487 pos += 1;
1488 }
1489 tokens
1490}
1491
1492fn emit_repeat_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1493 while run != 0 {
1494 if run >= 11 {
1495 let mut chunk = run.min(138);
1496 if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1497 chunk -= 3;
1498 }
1499 tokens.push(LevelToken::repeat_previous_long(chunk));
1500 run -= chunk;
1501 } else if run >= 3 {
1502 let chunk = run.min(10);
1503 tokens.push(LevelToken::repeat_previous_short(chunk));
1504 run -= chunk;
1505 } else {
1506 break;
1507 }
1508 }
1509}
1510
1511fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1512 while run != 0 {
1513 if run >= 11 {
1514 let mut chunk = run.min(138);
1515 if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1516 chunk -= 3;
1517 }
1518 tokens.push(LevelToken::zero_run_long(chunk));
1519 run -= chunk;
1520 } else if run >= 3 {
1521 let chunk = run.min(10);
1522 tokens.push(LevelToken::zero_run_short(chunk));
1523 run -= chunk;
1524 } else {
1525 tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
1526 break;
1527 }
1528 }
1529}
1530
1531fn level_code_lengths(tokens: &[LevelToken]) -> [u8; LEVEL_COUNT] {
1532 let mut lengths = [0u8; LEVEL_COUNT];
1533 let mut used = [false; LEVEL_COUNT];
1534 for token in tokens {
1535 used[token.symbol] = true;
1536 }
1537 let used_count = used.iter().filter(|&&used| used).count();
1538 let len = huffman::bits_for_symbol_count(used_count);
1539 for (symbol, is_used) in used.into_iter().enumerate() {
1540 if is_used {
1541 lengths[symbol] = len;
1542 }
1543 }
1544 lengths
1545}
1546
1547#[derive(Debug, Clone, Copy)]
1548struct HuffmanCode {
1549 code: u16,
1550 len: u8,
1551}
1552
1553fn canonical_codes(lengths: &[u8]) -> Result<Vec<Option<HuffmanCode>>> {
1554 let mut count = [0u16; 16];
1555 for &len in lengths {
1556 if len > 15 {
1557 return Err(Error::InvalidData("RAR 2.9 Huffman length is too large"));
1558 }
1559 if len != 0 {
1560 count[len as usize] += 1;
1561 }
1562 }
1563 validate_huffman_counts(&count)?;
1564
1565 let mut next_code = [0u16; 16];
1566 let mut code = 0u16;
1567 for len in 1..=15 {
1568 code = (code + count[len - 1]) << 1;
1569 next_code[len] = code;
1570 }
1571
1572 let mut codes = vec![None; lengths.len()];
1573 for (symbol, &len) in lengths.iter().enumerate() {
1574 if len == 0 {
1575 continue;
1576 }
1577 let code = next_code[len as usize];
1578 next_code[len as usize] += 1;
1579 codes[symbol] = Some(HuffmanCode { code, len });
1580 }
1581 Ok(codes)
1582}
1583
1584#[derive(Debug, Clone)]
1585pub struct Unpack29 {
1586 bits: BitReader,
1587 levels: [u8; TABLE_COUNT],
1588 main: Huffman,
1589 offsets: Huffman,
1590 low_offsets: Huffman,
1591 lengths: Huffman,
1592 old_offsets: [usize; 4],
1593 last_offset: usize,
1594 last_length: usize,
1595 last_low_offset: usize,
1596 low_offset_repeats: usize,
1597 pending_match: Option<(usize, usize)>,
1598 in_lz_block: bool,
1599 block_mode: BlockMode,
1600 ppmd: PpmdDecoder,
1601 ppmd_esc: u8,
1602 filters: Vec<VmFilter>,
1603 programs: Vec<VmProgram>,
1604 last_filter: usize,
1605 base_offset: usize,
1606 output: Vec<u8>,
1607}
1608
1609#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1610enum BlockMode {
1611 Lz,
1612 Ppmd,
1613}
1614
1615#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1616enum LzBlockEnd {
1617 SameFileNewTable,
1618 NewFileKeepTables,
1619 NewFileNewTables,
1620}
1621
1622#[derive(Debug, Clone)]
1623struct VmFilter {
1624 program: usize,
1625 start: usize,
1626 size: usize,
1627 regs: [u32; 7],
1628 global_data: Vec<u8>,
1629}
1630
1631#[derive(Debug, Clone)]
1632struct VmProgram {
1633 kind: VmProgramKind,
1634 block_size: usize,
1635 exec_count: u32,
1636 globals: Vec<u8>,
1637}
1638
1639#[derive(Debug, Clone)]
1640enum VmProgramKind {
1641 Standard(StandardFilter),
1642 Generic(rarvm::Program),
1643}
1644
1645#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1646enum StandardFilter {
1647 E8,
1648 E8E9,
1649 Itanium,
1650 Delta,
1651 Rgb,
1652 Audio,
1653}
1654
1655impl Unpack29 {
1656 pub fn new() -> Self {
1657 Self {
1658 bits: BitReader::new(),
1659 levels: [0; TABLE_COUNT],
1660 main: Huffman::empty(),
1661 offsets: Huffman::empty(),
1662 low_offsets: Huffman::empty(),
1663 lengths: Huffman::empty(),
1664 old_offsets: [0; 4],
1665 last_offset: 0,
1666 last_length: 0,
1667 last_low_offset: 0,
1668 low_offset_repeats: 0,
1669 pending_match: None,
1670 in_lz_block: false,
1671 block_mode: BlockMode::Lz,
1672 ppmd: PpmdDecoder::new(),
1673 ppmd_esc: 2,
1674 filters: Vec::new(),
1675 programs: Vec::new(),
1676 last_filter: 0,
1677 base_offset: 0,
1678 output: Vec::new(),
1679 }
1680 }
1681
1682 pub fn reset_non_solid(&mut self) {
1683 *self = Self::new();
1684 }
1685
1686 pub fn decode_non_solid_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1687 self.reset_non_solid();
1688 self.decode_member(input, output_size)
1689 }
1690
1691 pub fn decode_non_solid_member_to(
1692 &mut self,
1693 input: &[u8],
1694 output_size: usize,
1695 out: &mut impl Write,
1696 ) -> Result<()> {
1697 self.reset_non_solid();
1698 self.decode_member_to(input, output_size, out)
1699 }
1700
1701 pub fn decode_non_solid_member_from_reader(
1702 &mut self,
1703 input: &mut impl Read,
1704 output_size: usize,
1705 out: &mut impl Write,
1706 ) -> Result<()> {
1707 self.reset_non_solid();
1708 self.decode_member_from_reader(input, output_size, out)
1709 }
1710
1711 pub fn decode_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1712 let start = self.current_pos();
1713 let target = start
1714 .checked_add(output_size)
1715 .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1716 if !input.is_empty() {
1717 self.bits = BitReader::new();
1718 }
1719 self.bits.append(input);
1720 self.decode_until(target).map_err(|error| match error {
1721 Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1722 error => error,
1723 })?;
1724 self.finish_member().map_err(|error| match error {
1725 Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1726 error => error,
1727 })?;
1728 let out = self.filtered_range(start, target, start)?;
1729 self.trim_history(target, target);
1730 Ok(out)
1731 }
1732
1733 pub fn decode_member_to(
1734 &mut self,
1735 input: &[u8],
1736 output_size: usize,
1737 out: &mut impl Write,
1738 ) -> Result<()> {
1739 let start = self.current_pos();
1740 let final_target = start
1741 .checked_add(output_size)
1742 .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1743 if !input.is_empty() {
1744 self.bits = BitReader::new();
1745 }
1746 self.bits.append(input);
1747
1748 let mut flushed = start;
1749 let mut target = start.saturating_add(STREAM_CHUNK).min(final_target);
1750 while flushed < final_target {
1751 self.decode_until(target)?;
1752 let safe_end = self.safe_flush_end(flushed, target, final_target)?;
1753 if safe_end <= flushed {
1754 if target == final_target {
1755 return Err(Error::InvalidData(
1756 "RAR 2.9 VM filter extends beyond output",
1757 ));
1758 }
1759 target = self
1760 .current_pos()
1761 .saturating_add(STREAM_CHUNK)
1762 .min(final_target);
1763 continue;
1764 }
1765
1766 let decoded = self.filtered_range(flushed, safe_end, start)?;
1767 out.write_all(&decoded)
1768 .map_err(|_| Error::InvalidData("RAR 2.9 output write failed"))?;
1769 flushed = safe_end;
1770 self.trim_history(flushed, self.current_pos());
1771 target = self
1772 .current_pos()
1773 .saturating_add(STREAM_CHUNK)
1774 .min(final_target);
1775 }
1776 self.finish_member()?;
1777 Ok(())
1778 }
1779
1780 pub fn decode_member_from_reader(
1781 &mut self,
1782 input: &mut impl Read,
1783 output_size: usize,
1784 out: &mut impl Write,
1785 ) -> Result<()> {
1786 self.bits = BitReader::new();
1787 let start = self.current_pos();
1788 let final_target = start
1789 .checked_add(output_size)
1790 .ok_or(Error::InvalidData("RAR 2.9 output size overflows"))?;
1791 let mut flushed = start;
1792 let mut target = start.saturating_add(STREAM_CHUNK).min(final_target);
1793 let mut packed = Vec::new();
1794 input
1795 .read_to_end(&mut packed)
1796 .map_err(|_| Error::InvalidData("RAR 2.9 input read failed"))?;
1797 self.bits.append(&packed);
1798 if final_target == start && !self.in_lz_block && !packed.is_empty() {
1804 self.read_tables().map_err(|error| match error {
1805 Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1806 error => error,
1807 })?;
1808 self.in_lz_block = true;
1809 }
1810
1811 while flushed < final_target {
1812 self.decode_until(target).map_err(|error| match error {
1813 Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1814 error => error,
1815 })?;
1816
1817 let safe_end = self.safe_flush_end(flushed, target, final_target)?;
1818 if safe_end <= flushed {
1819 if target == final_target {
1820 return Err(Error::InvalidData(
1821 "RAR 2.9 VM filter extends beyond output",
1822 ));
1823 }
1824 target = self
1825 .current_pos()
1826 .saturating_add(STREAM_CHUNK)
1827 .min(final_target);
1828 continue;
1829 }
1830
1831 let decoded = self.filtered_range(flushed, safe_end, start)?;
1832 out.write_all(&decoded)
1833 .map_err(|_| Error::InvalidData("RAR 2.9 output write failed"))?;
1834 flushed = safe_end;
1835 self.trim_history(flushed, self.current_pos());
1836 target = self
1837 .current_pos()
1838 .saturating_add(STREAM_CHUNK)
1839 .min(final_target);
1840 }
1841 self.finish_member().map_err(|error| match error {
1842 Error::NeedMoreInput => Error::InvalidData("RAR 2.9 bitstream is truncated"),
1843 error => error,
1844 })?;
1845 Ok(())
1846 }
1847
1848 fn decode_until(&mut self, target: usize) -> Result<()> {
1849 while self.current_pos() < target {
1850 self.drain_pending_match(target)?;
1851 if self.current_pos() >= target {
1852 break;
1853 }
1854 if !self.in_lz_block {
1855 self.read_tables()?;
1856 self.in_lz_block = true;
1857 }
1858 match self.block_mode {
1859 BlockMode::Lz => self.decode_lz(target)?,
1860 BlockMode::Ppmd => self.decode_ppmd(target)?,
1861 }
1862 }
1863 Ok(())
1864 }
1865
1866 fn read_tables(&mut self) -> Result<()> {
1867 self.bits.align_byte();
1868 if self.bits.peek_bit()? != 0 {
1869 let first_byte = self.bits.read_bits(8)? as u8;
1870 self.ppmd
1871 .decode_init(first_byte, &mut self.bits, &mut self.ppmd_esc)?;
1872 self.block_mode = BlockMode::Ppmd;
1873 return Ok(());
1874 }
1875 self.bits.read_bit()?;
1876 self.block_mode = BlockMode::Lz;
1877 let keep_tables = self.bits.read_bit()? != 0;
1878 self.last_low_offset = 0;
1879 self.low_offset_repeats = 0;
1880 if !keep_tables {
1881 self.levels = [0; TABLE_COUNT];
1882 }
1883
1884 let level_lengths = Self::read_level_lengths(&mut self.bits)?;
1885 let level_decoder = Huffman::from_lengths(&level_lengths)?;
1886 let mut new_levels = [0u8; TABLE_COUNT];
1887 let mut pos = 0usize;
1888 while pos < TABLE_COUNT {
1889 let symbol = level_decoder.decode(&mut self.bits)?;
1890 match symbol {
1891 0..=15 => {
1892 new_levels[pos] = (self.levels[pos].wrapping_add(symbol as u8)) & 0x0f;
1893 pos += 1;
1894 }
1895 16 => {
1896 if pos == 0 {
1897 return Err(Error::InvalidData("RAR 2.9 table repeat at start"));
1898 }
1899 let count = 3 + self.bits.read_bits(3)? as usize;
1900 let value = new_levels[pos - 1];
1901 fill_levels(&mut new_levels, &mut pos, count, value)?;
1902 }
1903 17 => {
1904 if pos == 0 {
1905 return Err(Error::InvalidData("RAR 2.9 long table repeat at start"));
1906 }
1907 let count = 11 + self.bits.read_bits(7)? as usize;
1908 let value = new_levels[pos - 1];
1909 fill_levels(&mut new_levels, &mut pos, count, value)?;
1910 }
1911 18 => {
1912 let count = 3 + self.bits.read_bits(3)? as usize;
1913 fill_levels(&mut new_levels, &mut pos, count, 0)?;
1914 }
1915 19 => {
1916 let count = 11 + self.bits.read_bits(7)? as usize;
1917 fill_levels(&mut new_levels, &mut pos, count, 0)?;
1918 }
1919 _ => return Err(Error::InvalidData("RAR 2.9 invalid level symbol")),
1920 }
1921 }
1922
1923 self.levels = new_levels;
1924 self.main = Huffman::from_lengths(&self.levels[..MAIN_COUNT])?;
1925 self.offsets = Huffman::from_lengths(&self.levels[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
1926 self.low_offsets = Huffman::from_lengths(
1927 &self.levels[MAIN_COUNT + OFFSET_COUNT..MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT],
1928 )?;
1929 self.lengths =
1930 Huffman::from_lengths(&self.levels[MAIN_COUNT + OFFSET_COUNT + LOW_OFFSET_COUNT..])?;
1931 Ok(())
1932 }
1933
1934 fn read_level_lengths(bits: &mut BitReader) -> Result<[u8; LEVEL_COUNT]> {
1935 let mut lengths = [0u8; LEVEL_COUNT];
1936 let mut pos = 0usize;
1937 while pos < LEVEL_COUNT {
1938 let value = bits.read_bits(4)? as u8;
1939 if value == 15 {
1940 let zero_count = bits.read_bits(4)? as usize;
1941 if zero_count == 0 {
1942 lengths[pos] = 15;
1943 pos += 1;
1944 } else {
1945 pos = pos.saturating_add(zero_count + 2).min(LEVEL_COUNT);
1946 }
1947 } else {
1948 lengths[pos] = value;
1949 pos += 1;
1950 }
1951 }
1952 Ok(lengths)
1953 }
1954
1955 fn decode_lz(&mut self, output_size: usize) -> Result<()> {
1956 while self.current_pos() < output_size {
1957 let symbol = self.main.decode(&mut self.bits)?;
1958 match symbol {
1959 0..=255 => self.output.push(symbol as u8),
1960 256 => {
1961 self.read_end_of_block()?;
1962 return Ok(());
1963 }
1964 257 => {
1965 self.read_vm_code()?;
1966 }
1967 258 => {
1968 if self.last_length != 0 {
1969 self.copy_match(self.last_length, self.last_offset, output_size)?;
1970 }
1971 }
1972 259..=262 => {
1973 let index = symbol - 259;
1974 let offset = self.old_offsets[index];
1975 let length_slot = self.lengths.decode(&mut self.bits)?;
1976 if length_slot >= LENGTH_COUNT {
1977 return Err(Error::InvalidData("RAR 2.9 invalid repeat length slot"));
1978 }
1979 let mut length = LENGTH_BASES[length_slot] + 2;
1980 if LENGTH_BITS[length_slot] != 0 {
1981 length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
1982 }
1983 self.rotate_old_offset(index);
1984 self.last_offset = offset;
1985 self.last_length = length;
1986 self.copy_match(length, offset, output_size)?;
1987 }
1988 263..=270 => {
1989 let index = symbol - 263;
1990 let mut offset = SHORT_BASES[index] + 1;
1991 if SHORT_BITS[index] != 0 {
1992 offset += self.bits.read_bits(SHORT_BITS[index])? as usize;
1993 }
1994 self.push_old_offset(offset);
1995 self.last_offset = offset;
1996 self.last_length = 2;
1997 self.copy_match(2, offset, output_size)?;
1998 }
1999 271..=298 => {
2000 let length_slot = symbol - 271;
2001 let mut length = LENGTH_BASES[length_slot] + 3;
2002 if LENGTH_BITS[length_slot] != 0 {
2003 length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
2004 }
2005 let offset = self.read_offset()?;
2006 if offset >= 0x2000 {
2007 length += 1;
2008 }
2009 if offset >= 0x40000 {
2010 length += 1;
2011 }
2012 self.push_old_offset(offset);
2013 self.last_offset = offset;
2014 self.last_length = length;
2015 self.copy_match(length, offset, output_size)?;
2016 }
2017 _ => return Err(Error::InvalidData("RAR 2.9 invalid main symbol")),
2018 }
2019 }
2020 Ok(())
2021 }
2022
2023 fn decode_ppmd(&mut self, output_size: usize) -> Result<()> {
2024 while self.current_pos() < output_size {
2025 let Some(symbol) = self.ppmd.decode_symbol(&mut self.bits)? else {
2026 return Ok(());
2027 };
2028 if symbol != self.ppmd_esc {
2029 self.output.push(symbol);
2030 continue;
2031 }
2032
2033 let Some(next) = self.ppmd.decode_symbol(&mut self.bits)? else {
2034 return Ok(());
2035 };
2036 match next {
2037 0 => {
2038 self.in_lz_block = false;
2039 return Ok(());
2040 }
2041 1 | 6..=u8::MAX => self.output.push(self.ppmd_esc),
2042 2 => {
2043 self.in_lz_block = false;
2044 return Ok(());
2045 }
2046 3 => {
2047 self.read_vm_code_ppmd()?;
2048 }
2049 4 => {
2050 let mut offset = 0usize;
2051 for _ in 0..3 {
2052 offset = (offset << 8) | self.read_ppmd_required_byte()? as usize;
2053 }
2054 offset += 2;
2055 let length = self.read_ppmd_required_byte()? as usize + 32;
2056 self.copy_match(length, offset, output_size)?;
2057 }
2058 5 => {
2059 let length = self.read_ppmd_required_byte()? as usize + 4;
2060 self.copy_match(length, 1, output_size)?;
2061 }
2062 }
2063 }
2064 Ok(())
2065 }
2066
2067 fn read_ppmd_required_byte(&mut self) -> Result<u8> {
2068 self.ppmd
2069 .decode_symbol(&mut self.bits)?
2070 .ok_or(Error::InvalidData("RAR 2.9 PPMd stream ended early"))
2071 }
2072
2073 fn finish_ppmd_member(&mut self) -> Result<()> {
2074 if self.block_mode != BlockMode::Ppmd {
2075 return Ok(());
2076 }
2077 let Some(symbol) = self.ppmd.decode_symbol(&mut self.bits)? else {
2078 return Ok(());
2079 };
2080 if symbol != self.ppmd_esc {
2081 return Err(Error::InvalidData("RAR 2.9 PPMd member has trailing data"));
2082 }
2083 let Some(next) = self.ppmd.decode_symbol(&mut self.bits)? else {
2084 return Ok(());
2085 };
2086 match next {
2087 2 => {
2088 self.in_lz_block = false;
2089 Ok(())
2090 }
2091 0 => {
2092 self.in_lz_block = false;
2093 Ok(())
2094 }
2095 _ => Err(Error::InvalidData("RAR 2.9 PPMd member has trailing data")),
2096 }
2097 }
2098
2099 fn finish_member(&mut self) -> Result<()> {
2100 match self.block_mode {
2101 BlockMode::Lz => self.finish_lz_member(),
2102 BlockMode::Ppmd => self.finish_ppmd_member(),
2103 }
2104 }
2105
2106 fn finish_lz_member(&mut self) -> Result<()> {
2107 loop {
2108 if !self.in_lz_block {
2109 return Ok(());
2110 }
2111 let symbol = self.main.decode(&mut self.bits)?;
2112 if symbol != 256 {
2113 return Err(Error::InvalidData("RAR 2.9 LZ member has trailing data"));
2114 }
2115 match self.read_end_of_block()? {
2116 LzBlockEnd::SameFileNewTable => {
2117 if self.bits.remaining_bits_are_zero() {
2118 return Ok(());
2119 }
2120 if let Err(error) = self.read_tables() {
2121 if error == Error::NeedMoreInput {
2122 return Ok(());
2123 }
2124 return Err(error);
2125 }
2126 self.in_lz_block = true;
2127 }
2128 LzBlockEnd::NewFileKeepTables | LzBlockEnd::NewFileNewTables => return Ok(()),
2129 }
2130 }
2131 }
2132
2133 fn read_end_of_block(&mut self) -> Result<LzBlockEnd> {
2134 if self.bits.read_bit()? != 0 {
2135 self.in_lz_block = false;
2136 return Ok(LzBlockEnd::SameFileNewTable);
2137 }
2138 if self.bits.read_bit()? != 0 {
2139 self.in_lz_block = false;
2140 Ok(LzBlockEnd::NewFileNewTables)
2141 } else {
2142 self.in_lz_block = true;
2143 Ok(LzBlockEnd::NewFileKeepTables)
2144 }
2145 }
2146
2147 fn read_offset(&mut self) -> Result<usize> {
2148 let slot = self.offsets.decode(&mut self.bits)?;
2149 if slot >= OFFSET_COUNT {
2150 return Err(Error::InvalidData("RAR 2.9 invalid offset slot"));
2151 }
2152 let mut offset = OFFSET_BASES[slot] + 1;
2153 let extra_bits = OFFSET_BITS[slot];
2154 if extra_bits != 0 {
2155 if slot > 9 {
2156 if extra_bits > 4 {
2157 offset += (self.bits.read_bits(extra_bits - 4)? as usize) << 4;
2158 }
2159 if self.low_offset_repeats > 0 {
2160 self.low_offset_repeats -= 1;
2161 offset += self.last_low_offset;
2162 } else {
2163 let low = self.low_offsets.decode(&mut self.bits)?;
2164 if low == 16 {
2165 self.low_offset_repeats = 15;
2166 offset += self.last_low_offset;
2167 } else if low < 16 {
2168 self.last_low_offset = low;
2169 offset += low;
2170 } else {
2171 return Err(Error::InvalidData("RAR 2.9 invalid low offset symbol"));
2172 }
2173 }
2174 } else {
2175 offset += self.bits.read_bits(extra_bits)? as usize;
2176 }
2177 }
2178 Ok(offset)
2179 }
2180
2181 fn read_vm_code(&mut self) -> Result<()> {
2182 let first_byte = self.bits.read_bits(8)?;
2183 let mut len = (first_byte & 7) + 1;
2184 if len == 7 {
2185 len = self.bits.read_bits(8)? + 7;
2186 } else if len == 8 {
2187 len = self.bits.read_bits(16)?;
2188 }
2189 let mut data = Vec::with_capacity(len as usize);
2190 for _ in 0..len {
2191 data.push(self.bits.read_bits(8)? as u8);
2192 }
2193
2194 self.parse_vm_code(first_byte, data)
2195 }
2196
2197 fn read_vm_code_ppmd(&mut self) -> Result<()> {
2198 let first_byte = u32::from(self.read_ppmd_required_byte()?);
2199 let mut len = (first_byte & 7) + 1;
2200 if len == 7 {
2201 len = u32::from(self.read_ppmd_required_byte()?) + 7;
2202 } else if len == 8 {
2203 len = (u32::from(self.read_ppmd_required_byte()?) << 8)
2204 | u32::from(self.read_ppmd_required_byte()?);
2205 }
2206 let mut data = Vec::with_capacity(len as usize);
2207 for _ in 0..len {
2208 data.push(self.read_ppmd_required_byte()?);
2209 }
2210
2211 self.parse_vm_code(first_byte, data)
2212 }
2213
2214 fn parse_vm_code(&mut self, first_byte: u32, data: Vec<u8>) -> Result<()> {
2215 let mut vm = BitReader::from_bytes(&data);
2216 let program_index = if first_byte & 0x80 != 0 {
2217 let value = vm.read_encoded_u32()?;
2218 if value == 0 {
2219 self.filters.clear();
2220 self.programs.clear();
2221 0
2222 } else {
2223 usize::try_from(value - 1)
2224 .map_err(|_| Error::InvalidData("RAR 2.9 VM program index overflows"))?
2225 }
2226 } else {
2227 self.last_filter
2228 };
2229 if program_index > self.programs.len() {
2230 return Err(Error::InvalidData("RAR 2.9 VM program index is invalid"));
2231 }
2232 self.last_filter = program_index;
2233 let new_program = program_index == self.programs.len();
2234
2235 let mut block_start = vm.read_encoded_u32()? as usize;
2236 if first_byte & 0x40 != 0 {
2237 block_start += 258;
2238 }
2239 block_start = self
2240 .current_pos()
2241 .checked_add(block_start)
2242 .ok_or(Error::InvalidData("RAR 2.9 VM block start overflows"))?;
2243
2244 let mut block_size = self
2245 .programs
2246 .get(program_index)
2247 .map(|program| program.block_size)
2248 .unwrap_or(0);
2249 if first_byte & 0x20 != 0 {
2250 block_size = vm.read_encoded_u32()? as usize;
2251 }
2252
2253 let mut regs = [0u32; 7];
2254 regs[3] = 0x3c000;
2255 regs[4] = block_size as u32;
2256 if let Some(program) = self.programs.get(program_index) {
2257 regs[5] = program.exec_count;
2258 }
2259 if first_byte & 0x10 != 0 {
2260 let mask = vm.read_bits(7)?;
2261 for (index, reg) in regs.iter_mut().enumerate() {
2262 if mask & (1 << index) != 0 {
2263 *reg = vm.read_encoded_u32()?;
2264 }
2265 }
2266 }
2267
2268 if new_program {
2269 if self.programs.len() >= MAX_VM_PROGRAMS {
2270 return Err(Error::InvalidData("RAR 2.9 VM program limit exceeded"));
2271 }
2272 let code_size = vm.read_encoded_u32()? as usize;
2273 if code_size == 0 {
2274 return Err(Error::InvalidData("RAR 2.9 VM code is empty"));
2275 }
2276 if code_size > MAX_VM_CODE_SIZE {
2277 return Err(Error::InvalidData("RAR 2.9 VM code is too large"));
2278 }
2279 let mut code = Vec::with_capacity(code_size);
2280 for _ in 0..code_size {
2281 code.push(vm.read_bits(8)? as u8);
2282 }
2283 let kind = identify_standard_filter(&code)
2284 .map(VmProgramKind::Standard)
2285 .map_or_else(
2286 || rarvm::Program::parse(&code).map(VmProgramKind::Generic),
2287 Ok,
2288 )?;
2289 self.programs.push(VmProgram {
2290 kind,
2291 block_size,
2292 exec_count: 0,
2293 globals: Vec::new(),
2294 });
2295 } else if let Some(program) = self.programs.get_mut(program_index) {
2296 program.exec_count = program.exec_count.wrapping_add(1);
2297 program.block_size = block_size;
2298 }
2299
2300 let mut global_data = Vec::new();
2301 if first_byte & 0x08 != 0 {
2302 let data_size = vm.read_encoded_u32()? as usize;
2303 global_data.reserve(data_size.min(MAX_VM_GLOBAL_DATA));
2304 for _ in 0..data_size {
2305 let byte = vm.read_bits(8)? as u8;
2306 if global_data.len() < MAX_VM_GLOBAL_DATA {
2307 global_data.push(byte);
2308 }
2309 }
2310 }
2311
2312 if self.filters.len() >= MAX_VM_FILTERS {
2313 return Err(Error::InvalidData("RAR 2.9 VM filter limit exceeded"));
2314 }
2315 self.filters.push(VmFilter {
2316 program: program_index,
2317 start: block_start,
2318 size: block_size,
2319 regs,
2320 global_data,
2321 });
2322 Ok(())
2323 }
2324
2325 fn filtered_range(&mut self, start: usize, end: usize, member_start: usize) -> Result<Vec<u8>> {
2326 let mut out = Vec::with_capacity(end - start);
2327 let mut pos = start;
2328 let filters: Vec<_> = self
2329 .filters
2330 .iter()
2331 .enumerate()
2332 .filter_map(|(index, filter)| {
2333 (filter.start >= start && filter.start + filter.size <= end).then_some(index)
2334 })
2335 .collect();
2336 for filter_index in filters {
2337 let (program_index, filter_start, filter_size, regs, global_data) = {
2338 let filter = self
2339 .filters
2340 .get(filter_index)
2341 .ok_or(Error::InvalidData("RAR 2.9 VM filter is missing"))?;
2342 (
2343 filter.program,
2344 filter.start,
2345 filter.size,
2346 filter.regs,
2347 filter.global_data.clone(),
2348 )
2349 };
2350 if filter_start < pos {
2351 continue;
2352 }
2353 out.extend_from_slice(self.raw_range(pos, filter_start)?);
2354 let mut block = self
2355 .raw_range(filter_start, filter_start + filter_size)?
2356 .to_vec();
2357 let file_offset = filter_start
2358 .checked_sub(member_start)
2359 .ok_or(Error::InvalidData("RAR 2.9 VM filter starts before file"))?
2360 as u32;
2361 let program = self
2362 .programs
2363 .get_mut(program_index)
2364 .ok_or(Error::InvalidData("RAR 2.9 VM program is missing"))?;
2365 match &program.kind {
2366 VmProgramKind::Standard(standard) => {
2367 apply_standard_filter(*standard, &mut block, file_offset, ®s)?
2368 }
2369 VmProgramKind::Generic(generic) => {
2370 let globals = if global_data.is_empty() {
2371 program.globals.as_slice()
2372 } else {
2373 global_data.as_slice()
2374 };
2375 let result = generic.execute(rarvm::Invocation {
2376 input: &block,
2377 regs,
2378 global_data: globals,
2379 file_offset: file_offset as u64,
2380 exec_count: program.exec_count,
2381 })?;
2382 program.globals = result.globals;
2383 block = result.output;
2384 }
2385 }
2386 out.extend_from_slice(&block);
2387 pos = filter_start + filter_size;
2388 }
2389 out.extend_from_slice(self.raw_range(pos, end)?);
2390 Ok(out)
2391 }
2392
2393 fn safe_flush_end(&self, start: usize, end: usize, final_target: usize) -> Result<usize> {
2394 let current = self.current_pos();
2395 let mut safe_end = end;
2396 for filter in &self.filters {
2397 let filter_end = filter
2398 .start
2399 .checked_add(filter.size)
2400 .ok_or(Error::InvalidData("RAR 2.9 VM filter size overflows"))?;
2401 if filter.start >= safe_end || filter_end <= start {
2402 continue;
2403 }
2404 if filter_end > final_target {
2405 return Err(Error::InvalidData(
2406 "RAR 2.9 VM filter extends beyond output",
2407 ));
2408 }
2409 if filter_end > current {
2410 safe_end = safe_end.min(filter.start);
2411 }
2412 }
2413 Ok(safe_end)
2414 }
2415
2416 fn copy_match(&mut self, length: usize, offset: usize, output_size: usize) -> Result<()> {
2417 let offset = if offset == 0 { 1 } else { offset };
2421 let current = self.current_pos();
2422 if offset > current {
2423 return Err(Error::InvalidData("RAR 2.9 match distance is out of range"));
2424 }
2425 for index in 0..length {
2426 if self.current_pos() >= output_size {
2427 self.pending_match = Some((length - index, offset));
2428 break;
2429 }
2430 let src = self.current_pos() - offset;
2431 let byte = *self
2432 .raw_byte(src)
2433 .ok_or(Error::InvalidData("RAR 2.9 match distance is out of range"))?;
2434 self.output.push(byte);
2435 }
2436 Ok(())
2437 }
2438
2439 fn drain_pending_match(&mut self, output_size: usize) -> Result<()> {
2440 let Some((length, offset)) = self.pending_match.take() else {
2441 return Ok(());
2442 };
2443 self.copy_match(length, offset, output_size)
2444 }
2445
2446 fn push_old_offset(&mut self, offset: usize) {
2447 self.old_offsets[3] = self.old_offsets[2];
2448 self.old_offsets[2] = self.old_offsets[1];
2449 self.old_offsets[1] = self.old_offsets[0];
2450 self.old_offsets[0] = offset;
2451 }
2452
2453 fn rotate_old_offset(&mut self, index: usize) {
2454 let value = self.old_offsets[index];
2455 for i in (1..=index).rev() {
2456 self.old_offsets[i] = self.old_offsets[i - 1];
2457 }
2458 self.old_offsets[0] = value;
2459 }
2460
2461 fn current_pos(&self) -> usize {
2462 self.base_offset + self.output.len()
2463 }
2464
2465 fn raw_byte(&self, position: usize) -> Option<&u8> {
2466 self.output.get(position.checked_sub(self.base_offset)?)
2467 }
2468
2469 fn raw_range(&self, start: usize, end: usize) -> Result<&[u8]> {
2470 if start < self.base_offset || end < start {
2471 return Err(Error::InvalidData(
2472 "RAR 2.9 retained history is unavailable",
2473 ));
2474 }
2475 let rel_start = start - self.base_offset;
2476 let rel_end = end - self.base_offset;
2477 self.output
2478 .get(rel_start..rel_end)
2479 .ok_or(Error::InvalidData(
2480 "RAR 2.9 retained history is unavailable",
2481 ))
2482 }
2483
2484 fn trim_history(&mut self, flushed_pos: usize, current_pos: usize) {
2485 let keep_from = current_pos.saturating_sub(MAX_HISTORY);
2486 let keep_from = keep_from.min(flushed_pos);
2487 if keep_from <= self.base_offset {
2488 return;
2489 }
2490 let drain = keep_from - self.base_offset;
2491 self.output.drain(..drain);
2492 self.base_offset = keep_from;
2493 self.filters
2494 .retain(|filter| filter.start + filter.size > self.base_offset);
2495 }
2496}
2497
2498impl Default for Unpack29 {
2499 fn default() -> Self {
2500 Self::new()
2501 }
2502}
2503
2504fn fill_levels(levels: &mut [u8], pos: &mut usize, count: usize, value: u8) -> Result<()> {
2505 let end = pos
2506 .checked_add(count)
2507 .ok_or(Error::InvalidData("RAR 2.9 table run overflows"))?;
2508 let end = end.min(levels.len());
2509 for item in &mut levels[*pos..end] {
2510 *item = value;
2511 }
2512 *pos = end;
2513 Ok(())
2514}
2515
2516#[derive(Debug, Clone)]
2517struct Huffman {
2518 symbols: Vec<HuffmanSymbol>,
2519 first_code: [u16; 16],
2520 first_index: [usize; 16],
2521 counts: [u16; 16],
2522}
2523
2524#[derive(Debug, Clone)]
2525struct HuffmanSymbol {
2526 code: u16,
2527 len: u8,
2528 symbol: usize,
2529}
2530
2531impl Huffman {
2532 fn empty() -> Self {
2533 Self {
2534 symbols: Vec::new(),
2535 first_code: [0; 16],
2536 first_index: [0; 16],
2537 counts: [0; 16],
2538 }
2539 }
2540
2541 fn from_lengths(lengths: &[u8]) -> Result<Self> {
2542 let mut count = [0u16; 16];
2543 for &len in lengths {
2544 if len > 15 {
2545 return Err(Error::InvalidData("RAR 2.9 Huffman length is too large"));
2546 }
2547 if len != 0 {
2548 count[len as usize] += 1;
2549 }
2550 }
2551 if count.iter().all(|&value| value == 0) {
2552 return Ok(Self::empty());
2553 }
2554 validate_huffman_counts(&count)?;
2555
2556 let mut first_code = [0u16; 16];
2557 let mut next_code = [0u16; 16];
2558 let mut code = 0u16;
2559 for len in 1..=15 {
2560 code = (code + count[len - 1]) << 1;
2561 first_code[len] = code;
2562 next_code[len] = code;
2563 }
2564
2565 let mut first_index = [0usize; 16];
2566 let mut index = 0usize;
2567 for len in 1..=15 {
2568 first_index[len] = index;
2569 index += usize::from(count[len]);
2570 }
2571
2572 let mut symbols = Vec::new();
2573 for (symbol, &len) in lengths.iter().enumerate() {
2574 if len == 0 {
2575 continue;
2576 }
2577 let code = next_code[len as usize];
2578 next_code[len as usize] += 1;
2579 symbols.push(HuffmanSymbol { code, len, symbol });
2580 }
2581 symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
2582 Ok(Self {
2583 symbols,
2584 first_code,
2585 first_index,
2586 counts: count,
2587 })
2588 }
2589
2590 fn decode(&self, bits: &mut BitReader) -> Result<usize> {
2591 let mut code = 0u16;
2592 if self.symbols.is_empty() {
2593 return Err(Error::InvalidData("RAR 2.9 empty Huffman table"));
2594 }
2595 for len in 1..=15 {
2596 code = (code << 1) | bits.read_bit()? as u16;
2597 let count = self.counts[len];
2598 if count != 0 {
2599 let first = self.first_code[len];
2600 let offset = code.wrapping_sub(first);
2601 if offset < count {
2602 let index = self.first_index[len] + usize::from(offset);
2603 return Ok(self.symbols[index].symbol);
2604 }
2605 }
2606 }
2607 Err(Error::InvalidData("RAR 2.9 invalid Huffman code"))
2608 }
2609}
2610
2611fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
2612 let mut available = 1i32;
2613 for &len_count in count.iter().skip(1) {
2614 available = (available << 1) - i32::from(len_count);
2615 if available < 0 {
2616 return Err(Error::InvalidData("RAR 2.9 oversubscribed Huffman table"));
2617 }
2618 }
2619 Ok(())
2620}
2621
2622#[derive(Debug, Clone)]
2623struct BitReader {
2624 input: Vec<u8>,
2625 bit_pos: usize,
2626}
2627
2628impl BitReader {
2629 fn new() -> Self {
2630 Self {
2631 input: Vec::new(),
2632 bit_pos: 0,
2633 }
2634 }
2635
2636 fn from_bytes(input: &[u8]) -> Self {
2637 Self {
2638 input: input.to_vec(),
2639 bit_pos: 0,
2640 }
2641 }
2642
2643 fn append(&mut self, input: &[u8]) {
2644 self.compact();
2645 self.input.extend_from_slice(input);
2646 }
2647
2648 fn compact(&mut self) {
2649 let bytes = self.bit_pos / 8;
2650 if bytes == 0 {
2651 return;
2652 }
2653 self.input.drain(..bytes);
2654 self.bit_pos -= bytes * 8;
2655 }
2656
2657 fn align_byte(&mut self) {
2658 self.bit_pos = (self.bit_pos + 7) & !7;
2659 }
2660
2661 fn peek_bit(&self) -> Result<u8> {
2662 self.peek_bits(1).map(|value| value as u8)
2663 }
2664
2665 fn read_bit(&mut self) -> Result<u8> {
2666 self.read_bits(1).map(|value| value as u8)
2667 }
2668
2669 fn read_bits(&mut self, count: u8) -> Result<u32> {
2670 let value = self.peek_bits(count)?;
2671 self.bit_pos += count as usize;
2672 Ok(value)
2673 }
2674
2675 fn remaining_bits_are_zero(&self) -> bool {
2676 let full_bytes = self.bit_pos / 8;
2677 let bit_offset = self.bit_pos % 8;
2678 let Some((&first, rest)) = self
2679 .input
2680 .get(full_bytes)
2681 .zip(self.input.get(full_bytes + 1..))
2682 else {
2683 return true;
2684 };
2685 if bit_offset != 0 && first << bit_offset != 0 {
2686 return false;
2687 }
2688 if bit_offset == 0 && first != 0 {
2689 return false;
2690 }
2691 rest.iter().all(|&byte| byte == 0)
2692 }
2693
2694 fn peek_bits(&self, count: u8) -> Result<u32> {
2695 if count > 24 {
2696 return Err(Error::InvalidData("RAR 2.9 bit read is too wide"));
2697 }
2698 let mut value = 0u32;
2699 for i in 0..count as usize {
2700 let bit_index = self.bit_pos + i;
2701 let byte = *self.input.get(bit_index / 8).ok_or(Error::NeedMoreInput)?;
2702 let bit = (byte >> (7 - (bit_index % 8))) & 1;
2703 value = (value << 1) | bit as u32;
2704 }
2705 Ok(value)
2706 }
2707
2708 fn read_encoded_u32(&mut self) -> Result<u32> {
2709 match self.read_bits(2)? {
2710 0 => self.read_bits(4),
2711 1 => {
2712 let high = self.read_bits(8)?;
2713 if high >= 16 {
2714 Ok(high)
2715 } else {
2716 Ok(0xffff_ff00 | (high << 4) | self.read_bits(4)?)
2717 }
2718 }
2719 2 => self.read_bits(16),
2720 _ => Ok((self.read_bits(16)? << 16) | self.read_bits(16)?),
2721 }
2722 }
2723}
2724
2725impl PpmdByteReader for BitReader {
2726 fn read_ppmd_byte(&mut self) -> Result<u8> {
2727 self.read_bits(8).map(|value| value as u8)
2728 }
2729}
2730
2731#[derive(Default)]
2732struct BitWriter {
2733 bytes: Vec<u8>,
2734 bit_pos: usize,
2735}
2736
2737impl BitWriter {
2738 fn write_bits(&mut self, value: u32, count: u8) {
2739 for shift in (0..count).rev() {
2740 self.write_bit(((value >> shift) & 1) != 0);
2741 }
2742 }
2743
2744 fn write_encoded_u32(&mut self, value: u32) {
2745 if value < 16 {
2746 self.write_bits(0, 2);
2747 self.write_bits(value, 4);
2748 } else if value < 256 {
2749 self.write_bits(1, 2);
2750 self.write_bits(value, 8);
2751 } else if value <= 0xffff {
2752 self.write_bits(2, 2);
2753 self.write_bits(value, 16);
2754 } else {
2755 self.write_bits(3, 2);
2756 self.write_bits(value >> 16, 16);
2757 self.write_bits(value & 0xffff, 16);
2758 }
2759 }
2760
2761 fn write_bit(&mut self, bit: bool) {
2762 if self.bit_pos.is_multiple_of(8) {
2763 self.bytes.push(0);
2764 }
2765 if bit {
2766 let shift = 7 - (self.bit_pos % 8);
2767 *self.bytes.last_mut().unwrap() |= 1 << shift;
2768 }
2769 self.bit_pos += 1;
2770 }
2771
2772 fn finish(self) -> Vec<u8> {
2773 self.bytes
2774 }
2775}
2776
2777fn identify_standard_filter(code: &[u8]) -> Option<StandardFilter> {
2778 if code.iter().fold(0u8, |acc, &byte| acc ^ byte) != 0 {
2779 return None;
2780 }
2781 match (code.len(), crc32(code)) {
2782 (53, 0xad57_6887) => Some(StandardFilter::E8),
2783 (57, 0x3cd7_e57e) => Some(StandardFilter::E8E9),
2784 (120, 0x3769_893f) => Some(StandardFilter::Itanium),
2785 (29, 0x0e06_077d) => Some(StandardFilter::Delta),
2786 (149, 0x1c2c_5dc8) => Some(StandardFilter::Rgb),
2787 (216, 0xbc85_e701) => Some(StandardFilter::Audio),
2788 _ => None,
2789 }
2790}
2791
2792fn apply_standard_filter(
2793 filter: StandardFilter,
2794 data: &mut Vec<u8>,
2795 file_offset: u32,
2796 regs: &[u32; 7],
2797) -> Result<()> {
2798 match filter {
2799 StandardFilter::E8 => {
2800 filters::decode_in_place(FilterOp::E8, data, file_offset, rar29_delta_messages())?
2801 }
2802 StandardFilter::E8E9 => {
2803 filters::decode_in_place(FilterOp::E8E9, data, file_offset, rar29_delta_messages())?
2804 }
2805 StandardFilter::Itanium => itanium_decode(data, file_offset),
2806 StandardFilter::Delta => {
2807 let channels = regs[0] as usize;
2808 if channels == 0 {
2809 return Err(Error::InvalidData("RAR 2.9 DELTA filter has zero channels"));
2810 }
2811 filters::decode_in_place(
2812 FilterOp::Delta { channels },
2813 data,
2814 0,
2815 rar29_delta_messages(),
2816 )?;
2817 }
2818 StandardFilter::Rgb => {
2819 if regs[0] < 3 || regs[1] > 2 {
2820 return Err(Error::InvalidData(
2821 "RAR 2.9 RGB filter parameters are invalid",
2822 ));
2823 }
2824 let width = regs[0] as usize - 3;
2825 let pos_r = regs[1] as usize;
2826 *data = rgb_decode(data, width, pos_r)?;
2827 }
2828 StandardFilter::Audio => {
2829 let channels = regs[0] as usize;
2830 if channels == 0 {
2831 return Err(Error::InvalidData("RAR 2.9 AUDIO filter has zero channels"));
2832 }
2833 *data = audio_decode(data, channels)?;
2834 }
2835 }
2836 Ok(())
2837}
2838
2839fn itanium_decode(data: &mut [u8], file_offset: u32) {
2840 if data.len() <= 21 {
2841 return;
2842 }
2843 let base_offset = file_offset >> 4;
2844 let block_count = (data.len() - 21).div_ceil(16);
2848 for block in 0..block_count {
2849 let pos = block * 16;
2850 let file_offset = base_offset.wrapping_add(block as u32);
2851 let mut mask = (0x334b_0000u32 >> (data[pos] & 0x1e)) & 3;
2852 if mask != 0 {
2853 mask += 1;
2854 while mask <= 4 {
2855 let p = pos + (mask as usize * 5 - 8);
2856 if ((data[p + 3] >> mask) & 15) == 5 {
2857 let raw = u32::from_le_bytes([data[p], data[p + 1], data[p + 2], data[p + 3]]);
2858 let mut value = raw >> mask;
2859 value = value.wrapping_sub(file_offset) & 0x000f_ffff;
2860 let raw = (raw & !(0x000f_ffff << mask)) | (value << mask);
2861 data[p..p + 4].copy_from_slice(&raw.to_le_bytes());
2862 }
2863 mask += 1;
2864 }
2865 }
2866 }
2867}
2868
2869fn rgb_decode(data: &[u8], width: usize, pos_r: usize) -> Result<Vec<u8>> {
2870 if data.len() < 3 || width == 0 || !width.is_multiple_of(3) || width > data.len() || pos_r > 2 {
2871 return Err(Error::InvalidData(
2872 "RAR 2.9 RGB filter parameters are invalid",
2873 ));
2874 }
2875 let mut out = vec![0u8; data.len()];
2876 let mut src = 0usize;
2877 for channel in 0..3 {
2878 let mut prev = 0u8;
2879 let mut i = channel;
2880 while i < data.len() {
2881 let predicted = if i >= width + 3 {
2882 rgb_predict(prev, out[i - width], out[i - width - 3])
2883 } else {
2884 prev
2885 };
2886 let encoded = *data
2887 .get(src)
2888 .ok_or(Error::InvalidData("RAR 2.9 RGB filter source is truncated"))?;
2889 prev = predicted.wrapping_sub(encoded);
2890 out[i] = prev;
2891 src += 1;
2892 i += 3;
2893 }
2894 }
2895 for i in (pos_r..data.len().saturating_sub(2)).step_by(3) {
2896 let green = out[i + 1];
2897 out[i] = out[i].wrapping_add(green);
2898 out[i + 2] = out[i + 2].wrapping_add(green);
2899 }
2900 Ok(out)
2901}
2902
2903fn rgb_predict(prev: u8, upper: u8, upper_left: u8) -> u8 {
2904 let predicted = i32::from(prev) + i32::from(upper) - i32::from(upper_left);
2905 let pa = (predicted - i32::from(prev)).abs();
2906 let pb = (predicted - i32::from(upper)).abs();
2907 let pc = (predicted - i32::from(upper_left)).abs();
2908 if pa <= pb && pa <= pc {
2909 prev
2910 } else if pb <= pc {
2911 upper
2912 } else {
2913 upper_left
2914 }
2915}
2916
2917fn audio_decode(data: &[u8], channels: usize) -> Result<Vec<u8>> {
2918 let mut out = vec![0u8; data.len()];
2919 let mut src = 0usize;
2920 for channel in 0..channels {
2921 let mut prev_byte = 0u32;
2922 let mut prev_delta = 0i32;
2923 let mut d1 = 0i32;
2924 let mut d2 = 0i32;
2925 let mut k1 = 0i32;
2926 let mut k2 = 0i32;
2927 let mut k3 = 0i32;
2928 let mut dif = [0u32; 7];
2929 let mut byte_count = 0usize;
2930 let mut i = channel;
2931 while i < data.len() {
2932 let d3 = d2;
2933 d2 = prev_delta - d1;
2934 d1 = prev_delta;
2935 let predicted = ((8 * prev_byte as i32 + k1 * d1 + k2 * d2 + k3 * d3) >> 3) & 0xff;
2936 let encoded = *data.get(src).ok_or(Error::InvalidData(
2937 "RAR 2.9 AUDIO filter source is truncated",
2938 ))?;
2939 src += 1;
2940 let decoded = (predicted as u8).wrapping_sub(encoded);
2941 out[i] = decoded;
2942 prev_delta = decoded.wrapping_sub(prev_byte as u8) as i8 as i32;
2943 prev_byte = decoded as u32;
2944 let d = (encoded as i8 as i32) << 3;
2945 dif[0] += d.unsigned_abs();
2946 dif[1] += (d - d1).unsigned_abs();
2947 dif[2] += (d + d1).unsigned_abs();
2948 dif[3] += (d - d2).unsigned_abs();
2949 dif[4] += (d + d2).unsigned_abs();
2950 dif[5] += (d - d3).unsigned_abs();
2951 dif[6] += (d + d3).unsigned_abs();
2952 if byte_count & 0x1f == 0 {
2953 let mut min = dif[0];
2954 let mut min_index = 0usize;
2955 dif[0] = 0;
2956 for (index, value) in dif.iter_mut().enumerate().skip(1) {
2957 if *value < min {
2958 min = *value;
2959 min_index = index;
2960 }
2961 *value = 0;
2962 }
2963 match min_index {
2964 1 if k1 >= -16 => k1 -= 1,
2965 2 if k1 < 16 => k1 += 1,
2966 3 if k2 >= -16 => k2 -= 1,
2967 4 if k2 < 16 => k2 += 1,
2968 5 if k3 >= -16 => k3 -= 1,
2969 6 if k3 < 16 => k3 += 1,
2970 _ => {}
2971 }
2972 }
2973 byte_count += 1;
2974 i += channels;
2975 }
2976 }
2977 Ok(out)
2978}
2979
2980#[cfg(test)]
2981mod tests {
2982 use crate::rarvm::{Instruction, Opcode, Operand, Program};
2983 use std::ops::Range;
2984
2985 use super::{
2986 apply_standard_filter, audio_encode, best_match, encode_ppmd_tokens,
2987 encode_table_level_tokens, encode_tokens, encoded_filter_records, insert_match_position,
2988 itanium_decode, itanium_encode, should_lazy_emit_literal, split_large_filter,
2989 unpack29_decode, unpack29_encode_literals, unpack29_encode_ppmd,
2990 unpack29_encode_ppmd_literals, unpack29_encode_ppmd_with_filter, BitReader, BitWriter,
2991 EncodeOptions, EncodeToken, EncoderMatchState, Error, Huffman, LevelToken,
2992 OwnedVmFilterRecord, PpmdEncodeToken, Rar29FilterKind, Rar29FilterSpec, Result,
2993 StandardFilter, Unpack29, Unpack29Encoder, VmFilter, VmProgram, VmProgramKind, MAIN_COUNT,
2994 MATCH_HASH_BUCKETS, MAX_MATCH_CANDIDATES, MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
2995 MAX_VM_DELTA_FILTER_BLOCK_SIZE, MAX_VM_FILTER_BLOCK_SIZE, RAR3_AUDIO_FILTER_BYTECODE,
2996 TABLE_COUNT,
2997 };
2998
2999 const COMPRESSED_TEXT: &[u8] = &[
3000 0x09, 0x10, 0x10, 0x93, 0xe4, 0xce, 0x7f, 0xa2, 0xba, 0x80, 0x46, 0x16, 0x82, 0x63, 0xe9,
3001 0x9a, 0x19, 0xe4, 0x10, 0xe0, 0x41, 0x3d, 0x16, 0xfc, 0x4d, 0xfa, 0x6f, 0xf2, 0x5c, 0xae,
3002 0x32, 0x86, 0xc9, 0x95, 0x9d, 0xf1, 0x04, 0xa4, 0xe8, 0x92, 0x8f, 0x12, 0xd7, 0xe7, 0xba,
3003 0xcb, 0x26, 0xf1, 0x97, 0xac, 0x7c, 0x5f, 0xfd, 0xa0, 0x00, 0x1f, 0x77, 0x50,
3004 ];
3005
3006 #[test]
3007 fn decodes_rar29_lz_member() {
3008 assert_eq!(
3009 unpack29_decode(COMPRESSED_TEXT, 2400).unwrap(),
3010 expected_text()
3011 );
3012 }
3013
3014 #[test]
3015 fn rejects_oversubscribed_rar29_huffman_tables() {
3016 assert!(matches!(
3017 Huffman::from_lengths(&[1, 1, 1]),
3018 Err(Error::InvalidData("RAR 2.9 oversubscribed Huffman table"))
3019 ));
3020 }
3021
3022 #[test]
3023 fn literal_encoder_round_trips_rar29_lz_blocks() {
3024 let input = b"literal-only RAR 2.9 baseline\nwith repeated text literal-only\n";
3025 let packed = unpack29_encode_literals(input).unwrap();
3026
3027 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3028 }
3029
3030 #[test]
3031 fn multi_block_lz_encoding_round_trips_large_repeated_documents() {
3032 let seed = b"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\n\
3033<HTML><BODY><P>RAR29 repeated document body with enough structured text to \
3034exercise LZSS block table selection.</P></BODY></HTML>\n"
3035 .repeat(96);
3036 let input = seed.repeat(180);
3037 let single =
3038 super::encode_member_with_options(&input, &[], EncodeOptions::new(96)).unwrap();
3039 let blocked = super::encode_member_with_options(
3040 &input,
3041 &[],
3042 EncodeOptions::new(96).with_block_size(1024 * 1024),
3043 )
3044 .unwrap();
3045
3046 assert_eq!(unpack29_decode(&single, input.len()).unwrap(), input);
3047 assert_eq!(unpack29_decode(&blocked, input.len()).unwrap(), input);
3048 assert!(blocked.len() < input.len());
3049 }
3050
3051 #[test]
3052 fn table_level_encoder_uses_rar29_run_symbols() {
3053 let mut lengths = [0u8; TABLE_COUNT];
3054 lengths[..4].fill(5);
3055 lengths[8..21].fill(0);
3056
3057 let tokens = encode_table_level_tokens(&lengths);
3058
3059 assert!(tokens.contains(&LevelToken::repeat_previous_short(3)));
3060 assert!(tokens.iter().any(|token| token.symbol == 19));
3061 }
3062
3063 #[test]
3064 fn lazy_lz_parser_defers_short_match_for_longer_next_match() {
3065 let input = b"abcdXbcdYYYYYYYYYYYYabcdYYYYYYYYYYYY";
3066 let greedy = encode_tokens(input, &[], EncodeOptions::new(MAX_MATCH_CANDIDATES));
3067 let lazy = encode_tokens(
3068 input,
3069 &[],
3070 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3071 );
3072 let packed = Unpack29Encoder::with_options(
3073 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3074 )
3075 .encode_member(input)
3076 .unwrap();
3077
3078 assert!(greedy
3079 .iter()
3080 .any(|token| matches!(token, EncodeToken::Match { length: 4, .. })));
3081 assert!(lazy
3082 .iter()
3083 .any(|token| matches!(token, EncodeToken::Match { length, .. } if *length > 8)));
3084 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3085 }
3086
3087 #[test]
3088 fn lazy_lz_parser_uses_match_cost_not_only_match_length() {
3089 let pos = 300_000usize;
3090 let mut input = vec![0u8; pos + 16];
3091 input[100..106].copy_from_slice(b"BCDEFG");
3092 input[106] = b'!';
3093 input[pos - 10..pos - 5].copy_from_slice(b"ABCD!");
3094 input[pos..pos + 7].copy_from_slice(b"ABCDEFG");
3095 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3096 insert_match_position(&input, 100, &mut buckets);
3097 insert_match_position(&input, pos - 10, &mut buckets);
3098
3099 let current = best_match(
3100 &input,
3101 pos,
3102 input.len(),
3103 &buckets,
3104 EncodeOptions::new(MAX_MATCH_CANDIDATES),
3105 &EncoderMatchState::default(),
3106 )
3107 .unwrap();
3108 let next = best_match(
3109 &input,
3110 pos + 1,
3111 input.len(),
3112 &buckets,
3113 EncodeOptions::new(MAX_MATCH_CANDIDATES),
3114 &EncoderMatchState::default(),
3115 )
3116 .unwrap();
3117
3118 assert_eq!(current.length, 4);
3119 assert_eq!(current.offset, 10);
3120 assert_eq!(next.length, 6);
3121 assert!(next.offset > 0x40000);
3122 assert!(!should_lazy_emit_literal(
3123 &input,
3124 pos,
3125 input.len(),
3126 &buckets,
3127 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
3128 &EncoderMatchState::default(),
3129 current,
3130 ));
3131 }
3132
3133 #[test]
3134 fn lazy_lz_parser_uses_bounded_cost_lookahead() {
3135 let pos = 160;
3136 let mut input: Vec<u8> = (0..240u16)
3137 .map(|value| value.wrapping_mul(91) as u8)
3138 .collect();
3139 input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3140 input[pos - 80..pos - 64].copy_from_slice(b"CDEFGHIJKLMNOPQR");
3141 input[pos..pos + 18].copy_from_slice(b"ABCDEFGHIJKLMNOPQR");
3142
3143 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3144 for candidate in 0..pos {
3145 insert_match_position(&input, candidate, &mut buckets);
3146 }
3147 let current = best_match(
3148 &input,
3149 pos,
3150 input.len(),
3151 &buckets,
3152 EncodeOptions::default(),
3153 &EncoderMatchState::default(),
3154 )
3155 .unwrap();
3156
3157 assert_eq!((current.length, current.offset), (8, 30));
3158 assert!(!should_lazy_emit_literal(
3159 &input,
3160 pos,
3161 input.len(),
3162 &buckets,
3163 EncodeOptions::default()
3164 .with_lazy_matching(true)
3165 .with_lazy_lookahead(1),
3166 &EncoderMatchState::default(),
3167 current,
3168 ));
3169 assert!(should_lazy_emit_literal(
3170 &input,
3171 pos,
3172 input.len(),
3173 &buckets,
3174 EncodeOptions::default()
3175 .with_lazy_matching(true)
3176 .with_lazy_lookahead(2),
3177 &EncoderMatchState::default(),
3178 current,
3179 ));
3180 }
3181
3182 #[test]
3183 fn match_state_encodes_last_length_and_repeat_offset_symbols() {
3184 let mut state = EncoderMatchState::default();
3185 assert!(matches!(
3186 state.encode_match(12, 64).unwrap(),
3187 super::EncodedMatch::Fresh { .. }
3188 ));
3189 state.remember(12, 64);
3190
3191 assert_eq!(
3192 state.encode_match(12, 64).unwrap(),
3193 super::EncodedMatch::LastLengthRepeat
3194 );
3195 assert!(matches!(
3196 state.encode_match(9, 64).unwrap(),
3197 super::EncodedMatch::RepeatOffset { index: 0, .. }
3198 ));
3199 }
3200
3201 #[test]
3202 fn cost_aware_match_selection_prefers_repeat_offset_token() {
3203 let pos = 600usize;
3204 let mut input: Vec<u8> = (0..pos + 16)
3205 .map(|index| (index as u8).wrapping_mul(37))
3206 .collect();
3207 input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3208 input[pos - 512..pos - 503].copy_from_slice(b"ABCDEFGHI");
3209 input[pos..pos + 9].copy_from_slice(b"ABCDEFGHI");
3210 input[pos - 22] = 0x11;
3211 input[pos - 503] = 0x22;
3212 input[pos + 9] = 0x33;
3213 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3214 insert_match_position(&input, pos - 30, &mut buckets);
3215 insert_match_position(&input, pos - 512, &mut buckets);
3216
3217 let fresh = best_match(
3218 &input,
3219 pos,
3220 input.len(),
3221 &buckets,
3222 EncodeOptions::default(),
3223 &EncoderMatchState::default(),
3224 )
3225 .unwrap();
3226 let repeat = best_match(
3227 &input,
3228 pos,
3229 input.len(),
3230 &buckets,
3231 EncodeOptions::default(),
3232 &EncoderMatchState {
3233 old_offsets: [30, 0, 0, 0],
3234 last_offset: 0,
3235 last_length: 0,
3236 },
3237 )
3238 .unwrap();
3239
3240 assert_eq!((fresh.length, fresh.offset), (9, 512));
3241 assert_eq!((repeat.length, repeat.offset), (8, 30));
3242 }
3243
3244 #[test]
3245 fn match_finder_respects_configured_maximum_distance() {
3246 let phrase = b"rar29 bounded dictionary phrase";
3247 let mut input = Vec::new();
3248 input.extend_from_slice(phrase);
3249 input.extend(std::iter::repeat_n(0u8, 256 * 1024));
3250 input.extend_from_slice(phrase);
3251
3252 let bounded = encode_tokens(
3253 &input,
3254 &[],
3255 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_max_match_distance(128 * 1024),
3256 );
3257 let unbounded = encode_tokens(
3258 &input,
3259 &[],
3260 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_max_match_distance(1024 * 1024),
3261 );
3262
3263 assert!(!bounded.iter().any(
3264 |token| matches!(token, EncodeToken::Match { offset, .. } if *offset > 128 * 1024)
3265 ));
3266 assert!(unbounded.iter().any(
3267 |token| matches!(token, EncodeToken::Match { offset, .. } if *offset > 128 * 1024)
3268 ));
3269 }
3270
3271 #[test]
3272 fn lz_encoder_uses_weighted_rar29_huffman_tables() {
3273 let mut input = Vec::new();
3274 for byte in 0u8..120 {
3275 input.push(b'A');
3276 input.push(byte);
3277 }
3278 let packed = Unpack29Encoder::new().encode_member(&input).unwrap();
3279 let mut decoder = Unpack29::new();
3280 decoder.bits.append(&packed);
3281 decoder.read_tables().unwrap();
3282 let main_lengths = &decoder.levels[..MAIN_COUNT];
3283 let nonzero_lengths = main_lengths
3284 .iter()
3285 .copied()
3286 .filter(|&length| length != 0)
3287 .collect::<std::collections::BTreeSet<_>>();
3288
3289 assert!(nonzero_lengths.len() > 1);
3290 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3291 }
3292
3293 #[test]
3294 fn copy_match_treats_zero_offset_as_distance_one() {
3295 let mut decoder = Unpack29::new();
3296 decoder.output.push(b'Z');
3297
3298 decoder.copy_match(4, 0, 5).unwrap();
3299
3300 assert_eq!(decoder.output, b"ZZZZZ");
3301 }
3302
3303 #[test]
3304 fn ppmd_literal_encoder_round_trips_rar29_ppmd_blocks() {
3305 let mut input = b"rar29 ppmd literal text payload alpha beta gamma\n".repeat(64);
3306 input.extend_from_slice(&[2, 2, 2, b'e', b's', b'c']);
3307 let packed = unpack29_encode_ppmd_literals(&input).unwrap();
3308
3309 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3310 assert_ne!(packed.first().copied(), Some(0));
3311 }
3312
3313 #[test]
3314 fn ppmd_encoder_advertises_period_compatible_model_for_external_decoders() {
3315 let packed = unpack29_encode_ppmd(b"rar29 ppmd dictionary header").unwrap();
3316
3317 assert_eq!(packed[0], 0xa7);
3318 assert_eq!(packed[1], 24);
3319 }
3320
3321 #[test]
3322 fn ppmd_encoder_emits_offset_one_repeat_escapes() {
3323 let input = b"seed "
3324 .iter()
3325 .copied()
3326 .chain(std::iter::repeat_n(b'Z', 512))
3327 .collect::<Vec<_>>();
3328 let tokens = encode_ppmd_tokens(&input, true);
3329 let packed = unpack29_encode_ppmd(&input).unwrap();
3330
3331 assert!(tokens.iter().any(
3332 |token| matches!(token, PpmdEncodeToken::RepeatOffsetOne { length } if *length >= 4)
3333 ));
3334 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3335 }
3336
3337 #[test]
3338 fn ppmd_encoder_emits_distance_match_escapes() {
3339 let phrase = b"repeated phrase for rar29 ppmd distance escape 4 ";
3340 let mut input = Vec::new();
3341 input.extend_from_slice(phrase);
3342 input.extend_from_slice(b"middle bytes make the repeat distance greater than one ");
3343 input.extend_from_slice(phrase);
3344 input.extend_from_slice(phrase);
3345 input.extend_from_slice(b"tail");
3346 let tokens = encode_ppmd_tokens(&input, true);
3347 let packed = unpack29_encode_ppmd(&input).unwrap();
3348
3349 assert!(tokens
3350 .iter()
3351 .any(|token| matches!(token, PpmdEncodeToken::Match { offset, length } if *offset > 1 && *length >= 32)));
3352 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3353 }
3354
3355 #[test]
3356 fn ppmd_distance_match_lengths_stay_period_decoder_compatible() {
3357 let phrase = b"<html><body>RAR PPMd LZSS conversion phrase</body></html>\n";
3358 let mut input = Vec::new();
3359 for _ in 0..200 {
3360 input.extend_from_slice(phrase);
3361 }
3362 let tokens = encode_ppmd_tokens(&input, true);
3363
3364 assert!(tokens.iter().any(
3365 |token| matches!(token, PpmdEncodeToken::Match { offset, length } if *offset > 1 && *length >= 32)
3366 ));
3367 assert!(!tokens
3368 .iter()
3369 .any(|token| matches!(token, PpmdEncodeToken::Match { length, .. } if *length > 255)));
3370 }
3371
3372 #[test]
3373 fn ppmd_encoder_emits_embedded_vm_filter_escape() {
3374 let input = b"\xe8\0\0\0\0rar29 ppmd embedded e8 filter payload\n".repeat(16);
3375 let packed =
3376 unpack29_encode_ppmd_with_filter(&input, Rar29FilterSpec::whole(Rar29FilterKind::E8))
3377 .unwrap();
3378 let plain_ppmd = unpack29_encode_ppmd(&input).unwrap();
3379 let filtered_lz = Unpack29Encoder::new()
3380 .encode_member_with_filter(&input, Rar29FilterSpec::whole(Rar29FilterKind::E8))
3381 .unwrap();
3382
3383 assert!(packed.len() != plain_ppmd.len() || packed.len() != filtered_lz.len());
3384 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3385 }
3386
3387 fn encode_with_filter(input: &[u8], kind: Rar29FilterKind) -> Result<Vec<u8>> {
3388 Unpack29Encoder::new().encode_member_with_filter(input, Rar29FilterSpec::whole(kind))
3389 }
3390
3391 fn encode_with_filter_range(
3392 input: &[u8],
3393 kind: Rar29FilterKind,
3394 range: Range<usize>,
3395 ) -> Result<Vec<u8>> {
3396 Unpack29Encoder::new().encode_member_with_filter(input, Rar29FilterSpec::range(kind, range))
3397 }
3398
3399 fn encode_with_filter_ranges(
3400 input: &[u8],
3401 kind: Rar29FilterKind,
3402 ranges: Vec<Range<usize>>,
3403 ) -> Result<Vec<u8>> {
3404 let filters: Vec<_> = ranges
3405 .into_iter()
3406 .map(|range| Rar29FilterSpec::range(kind, range))
3407 .collect();
3408 Unpack29Encoder::new().encode_member_with_filters(input, &filters)
3409 }
3410
3411 #[test]
3412 fn encoder_emits_rar29_offset_one_matches_for_repeated_bytes() {
3413 let input = b"Z".repeat(1024);
3414 let packed = unpack29_encode_literals(&input).unwrap();
3415
3416 assert!(packed.len() < input.len() / 4);
3417 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3418 }
3419
3420 #[test]
3421 fn encoder_emits_rar29_dictionary_matches_for_repeated_sequences() {
3422 let input = b"abc123xyz-".repeat(128);
3423 let packed = unpack29_encode_literals(&input).unwrap();
3424
3425 assert!(packed.len() < input.len() / 2);
3426 assert_eq!(unpack29_decode(&packed, input.len()).unwrap(), input);
3427 }
3428
3429 #[test]
3430 fn encoder_finds_rar29_matches_beyond_near_offsets() {
3431 let phrase = b"long-distance repeated phrase for rar29 low-offset coding.";
3432 let mut input = Vec::new();
3433 input.extend_from_slice(phrase);
3434 input.extend(std::iter::repeat_n(0, 300 * 1024));
3435 input.extend_from_slice(phrase);
3436 input.extend_from_slice(phrase);
3437 let tokens = encode_tokens(&input, &[], EncodeOptions::default());
3438 let packed = unpack29_encode_literals(&input).unwrap();
3439
3440 assert!(tokens.iter().any(|token| matches!(
3441 token,
3442 EncodeToken::Match { offset, .. } if *offset > 0x40000
3443 )));
3444 assert!(packed.len() < input.len());
3445 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3446 assert!(
3447 decoded == input,
3448 "RAR 2.9 long-distance match round-trip failed"
3449 );
3450 }
3451
3452 #[test]
3453 fn encoder_emits_rar29_e8_vm_filter_record() {
3454 let input = b"\xe8\0\0\0\0rar29 e8 filter writer payload\n".repeat(8);
3455 let packed = encode_with_filter(&input, Rar29FilterKind::E8).unwrap();
3456 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3457
3458 assert!(
3459 decoded == input,
3460 "RAR 2.9 multi-filter E8 round-trip failed"
3461 );
3462 }
3463
3464 #[test]
3465 fn encoder_emits_rar29_e8e9_vm_filter_record() {
3466 let input = b"\xe9\0\0\0\0rar29 e8e9 filter writer payload\n".repeat(8);
3467 let packed = encode_with_filter(&input, Rar29FilterKind::E8E9).unwrap();
3468 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3469
3470 assert_eq!(decoded, input);
3471 }
3472
3473 #[test]
3474 fn encoder_emits_rar29_segmented_e8_vm_filter_record() {
3475 let mut input = b"prefix data that should not be x86 filtered ".to_vec();
3476 let start = input.len();
3477 input.extend_from_slice(b"\xe8\0\0\0\0segmented e8 filtered payload\n");
3478 let end = input.len();
3479 input.extend_from_slice(b" suffix data that should also remain raw");
3480 let packed = encode_with_filter_range(&input, Rar29FilterKind::E8, start..end).unwrap();
3481 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3482
3483 assert_eq!(decoded, input);
3484 }
3485
3486 #[test]
3487 fn encoder_emits_rar29_multiple_e8_vm_filter_records() {
3488 let mut input = vec![0x41u8; 80_000];
3489 for cluster_start in [8_000, 60_000] {
3490 for index in 0..8 {
3491 let pos = cluster_start + index * 64;
3492 input[pos] = 0xe8;
3493 input[pos + 1..pos + 5].copy_from_slice(&(0x2000u32 + index as u32).to_le_bytes());
3494 }
3495 }
3496
3497 let packed = encode_with_filter_ranges(
3498 &input,
3499 Rar29FilterKind::E8,
3500 vec![8_000..8_512, 60_000..60_512],
3501 )
3502 .unwrap();
3503 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3504
3505 assert_eq!(decoded, input);
3506 }
3507
3508 #[test]
3509 fn encoder_emits_rar29_segmented_e8e9_vm_filter_record() {
3510 let mut input = b"prefix data that should not be x86 filtered ".to_vec();
3511 let start = input.len();
3512 input.extend_from_slice(b"\xe9\0\0\0\0segmented e8e9 filtered payload\n");
3513 let end = input.len();
3514 input.extend_from_slice(b" suffix data that should also remain raw");
3515 let packed = encode_with_filter_range(&input, Rar29FilterKind::E8E9, start..end).unwrap();
3516 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3517
3518 assert_eq!(decoded, input);
3519 }
3520
3521 #[test]
3522 fn encoder_emits_rar29_delta_vm_filter_record() {
3523 let input: Vec<u8> = (0..192).map(|index| (index * 13 + 7) as u8).collect();
3524 let packed = encode_with_filter(&input, Rar29FilterKind::Delta { channels: 3 }).unwrap();
3525 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3526
3527 assert_eq!(decoded, input);
3528 }
3529
3530 #[test]
3531 fn encoder_emits_rar29_segmented_delta_vm_filter_record() {
3532 let mut input = b"prefix bytes before delta segment ".to_vec();
3533 let start = input.len();
3534 input.extend((0..192).map(|index| (index * 13 + 7) as u8));
3535 let end = input.len();
3536 input.extend_from_slice(b" suffix bytes after delta segment");
3537 let packed =
3538 encode_with_filter_range(&input, Rar29FilterKind::Delta { channels: 3 }, start..end)
3539 .unwrap();
3540 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3541
3542 assert_eq!(decoded, input);
3543 }
3544
3545 #[test]
3546 fn encoder_emits_rar29_itanium_vm_filter_record() {
3547 let mut input = vec![0u8; 48];
3548 input[16] = 22;
3549 input[21] = 20;
3550 input.extend_from_slice(b"rar29 itanium filter writer payload\n");
3551 let packed = encode_with_filter(&input, Rar29FilterKind::Itanium).unwrap();
3552 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3553
3554 assert_eq!(decoded, input);
3555 }
3556
3557 #[test]
3558 fn encoder_emits_rar29_segmented_itanium_vm_filter_record() {
3559 let mut input = b"prefix bytes before itanium segment ".to_vec();
3560 let start = input.len();
3561 input.extend_from_slice(&[0; 48]);
3562 input[start + 16] = 22;
3563 input[start + 21] = 20;
3564 input.extend_from_slice(b"rar29 segmented itanium filter writer payload\n");
3565 let end = input.len();
3566 input.extend_from_slice(b" suffix bytes after itanium segment");
3567 let packed =
3568 encode_with_filter_range(&input, Rar29FilterKind::Itanium, start..end).unwrap();
3569 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3570
3571 assert_eq!(decoded, input);
3572 }
3573
3574 #[test]
3575 fn encoder_emits_rar29_rgb_vm_filter_record() {
3576 let width = 12;
3577 let input: Vec<u8> = (0..96).map(|index| (index * 29 + 11) as u8).collect();
3578 let packed = encode_with_filter(&input, Rar29FilterKind::Rgb { width, pos_r: 0 }).unwrap();
3579 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3580
3581 assert_eq!(decoded, input);
3582 }
3583
3584 #[test]
3585 fn encoder_emits_rar29_segmented_rgb_vm_filter_record() {
3586 let width = 12;
3587 let mut input = b"prefix bytes before rgb segment ".to_vec();
3588 let start = input.len();
3589 input.extend((0..96).map(|index| (index * 29 + 11) as u8));
3590 let end = input.len();
3591 input.extend_from_slice(b" suffix bytes after rgb segment");
3592 let packed =
3593 encode_with_filter_range(&input, Rar29FilterKind::Rgb { width, pos_r: 0 }, start..end)
3594 .unwrap();
3595 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3596
3597 assert_eq!(decoded, input);
3598 }
3599
3600 #[test]
3601 fn encoder_rejects_rar29_rgb_filter_with_unaligned_scanline_width() {
3602 let input: Vec<u8> = (0..96).map(|index| (index * 29 + 11) as u8).collect();
3603 assert!(encode_with_filter(&input, Rar29FilterKind::Rgb { width: 8, pos_r: 0 }).is_err());
3604 }
3605
3606 #[test]
3607 fn encoder_emits_rar29_audio_vm_filter_record() {
3608 let input: Vec<u8> = (0..160)
3609 .map(|index| (index * 7 + index / 3) as u8)
3610 .collect();
3611 let packed = encode_with_filter(&input, Rar29FilterKind::Audio { channels: 2 }).unwrap();
3612 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3613
3614 assert_eq!(decoded, input);
3615 }
3616
3617 #[test]
3618 fn audio_filter_bytecode_matches_builtin_transform() {
3619 let channels = 2;
3620 let input: Vec<u8> = (0..MAX_VM_AUDIO_FILTER_BLOCK_SIZE)
3621 .map(|index| (index * 7 + index / channels + index / 257) as u8)
3622 .collect();
3623 let encoded = audio_encode(&input, channels).unwrap();
3624 let program = Program::parse(RAR3_AUDIO_FILTER_BYTECODE).unwrap();
3625 let result = program
3626 .execute(crate::rarvm::Invocation {
3627 input: &encoded,
3628 regs: [channels as u32, 0, 0, 0, 0, 0, 0],
3629 global_data: &[],
3630 file_offset: 0,
3631 exec_count: 0,
3632 })
3633 .unwrap();
3634
3635 assert_eq!(result.output, input);
3636 }
3637
3638 #[test]
3639 fn large_audio_filters_are_split_into_rarvm_safe_blocks() {
3640 let filters = split_large_filter(
3641 MAX_VM_FILTER_BLOCK_SIZE * 2 + 123,
3642 Rar29FilterSpec::whole(Rar29FilterKind::Audio { channels: 4 }),
3643 )
3644 .unwrap();
3645
3646 assert_eq!(filters.len(), 3);
3647 assert_eq!(filters[0].range, Some(0..MAX_VM_AUDIO_FILTER_BLOCK_SIZE));
3648 assert_eq!(
3649 filters[1].range,
3650 Some(MAX_VM_AUDIO_FILTER_BLOCK_SIZE..MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2)
3651 );
3652 assert_eq!(
3653 filters[2].range,
3654 Some(MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2..MAX_VM_FILTER_BLOCK_SIZE * 2 + 123)
3655 );
3656 }
3657
3658 #[test]
3659 fn large_delta_filters_are_split_into_rarvm_safe_blocks() {
3660 let filters = split_large_filter(
3661 MAX_VM_FILTER_BLOCK_SIZE * 2 + 123,
3662 Rar29FilterSpec::whole(Rar29FilterKind::Delta { channels: 4 }),
3663 )
3664 .unwrap();
3665
3666 assert_eq!(filters.len(), 3);
3667 assert_eq!(filters[0].range, Some(0..MAX_VM_DELTA_FILTER_BLOCK_SIZE));
3668 assert_eq!(
3669 filters[1].range,
3670 Some(MAX_VM_DELTA_FILTER_BLOCK_SIZE..MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2)
3671 );
3672 assert_eq!(
3673 filters[2].range,
3674 Some(MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2..MAX_VM_FILTER_BLOCK_SIZE * 2 + 123)
3675 );
3676 }
3677
3678 #[test]
3679 fn segmented_audio_filters_redeclare_program_state() {
3680 let filters = [
3681 OwnedVmFilterRecord {
3682 block_start: 0,
3683 block_size: MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
3684 init_regs: vec![(0, 4)],
3685 code: RAR3_AUDIO_FILTER_BYTECODE,
3686 },
3687 OwnedVmFilterRecord {
3688 block_start: MAX_VM_AUDIO_FILTER_BLOCK_SIZE,
3689 block_size: 4096,
3690 init_regs: vec![(0, 4)],
3691 code: RAR3_AUDIO_FILTER_BYTECODE,
3692 },
3693 ];
3694 let records = encoded_filter_records(&filters).unwrap();
3695
3696 assert_vm_filter_declares_program(&records[0], 0);
3697 assert_vm_filter_declares_program(&records[1], 2);
3698 }
3699
3700 #[test]
3701 fn encoder_emits_rar29_segmented_audio_vm_filter_record() {
3702 let mut input = b"prefix bytes before audio segment ".to_vec();
3703 let start = input.len();
3704 input.extend((0..160).map(|index| (index * 7 + index / 3) as u8));
3705 let end = input.len();
3706 input.extend_from_slice(b" suffix bytes after audio segment");
3707 let packed =
3708 encode_with_filter_range(&input, Rar29FilterKind::Audio { channels: 2 }, start..end)
3709 .unwrap();
3710 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3711
3712 assert_eq!(decoded, input);
3713 }
3714
3715 #[test]
3716 fn encoder_emits_multiple_rar29_audio_vm_filter_records_for_large_ranges() {
3717 let input: Vec<u8> = (0..(MAX_VM_AUDIO_FILTER_BLOCK_SIZE * 2 + 64))
3718 .map(|index| (index * 7 + index / 3 + index / 257) as u8)
3719 .collect();
3720 let packed = encode_with_filter(&input, Rar29FilterKind::Audio { channels: 4 }).unwrap();
3721 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3722
3723 assert_eq!(decoded, input);
3724 }
3725
3726 #[test]
3727 fn encoder_emits_multiple_rar29_delta_vm_filter_records_for_large_ranges() {
3728 let input: Vec<u8> = (0..(MAX_VM_DELTA_FILTER_BLOCK_SIZE * 2 + 64))
3729 .map(|index| (index * 11 + index / 5 + index / 251) as u8)
3730 .collect();
3731 let packed = encode_with_filter(&input, Rar29FilterKind::Delta { channels: 4 }).unwrap();
3732 let decoded = unpack29_decode(&packed, input.len()).unwrap();
3733
3734 assert_eq!(decoded, input);
3735 }
3736
3737 fn assert_vm_filter_declares_program(record: &[u8], expected_selector: u32) {
3738 let first = record[0];
3739 assert_ne!(first & 0x80, 0);
3740 assert_ne!(first & 0x20, 0);
3741 assert_ne!(first & 0x10, 0);
3742 let inline_len = match first & 7 {
3743 len @ 0..=5 => len as usize + 1,
3744 6 => usize::from(record[1]) + 7,
3745 _ => u16::from_be_bytes([record[1], record[2]]) as usize,
3746 };
3747 let body_start = match first & 7 {
3748 0..=5 => 1,
3749 6 => 2,
3750 _ => 3,
3751 };
3752 let body = &record[body_start..body_start + inline_len];
3753 let mut bits = BitReader::from_bytes(body);
3754 assert_eq!(bits.read_encoded_u32().unwrap(), expected_selector);
3755 let _block_start = bits.read_encoded_u32().unwrap();
3756 let _block_size = bits.read_encoded_u32().unwrap();
3757 let mask = bits.read_bits(7).unwrap();
3758 for index in 0..7 {
3759 if mask & (1 << index) != 0 {
3760 let _ = bits.read_encoded_u32().unwrap();
3761 }
3762 }
3763 assert_eq!(
3764 bits.read_encoded_u32().unwrap() as usize,
3765 RAR3_AUDIO_FILTER_BYTECODE.len()
3766 );
3767 }
3768
3769 #[test]
3770 fn solid_encoder_emits_rar29_matches_against_previous_member_history() {
3771 let first = b"solid rar29 shared phrase alpha beta gamma ".repeat(4);
3772 let second = b"solid rar29 shared phrase alpha beta gamma ".repeat(2);
3773 let independent = unpack29_encode_literals(&second).unwrap();
3774 let mut encoder = Unpack29Encoder::new();
3775 let first_packed = encoder.encode_member(&first).unwrap();
3776 let second_packed = encoder.encode_member(&second).unwrap();
3777
3778 assert!(second_packed.len() < independent.len());
3779 let mut decoder = Unpack29::new();
3780 assert_eq!(
3781 decoder.decode_member(&first_packed, first.len()).unwrap(),
3782 first
3783 );
3784 assert_eq!(
3785 decoder.decode_member(&second_packed, second.len()).unwrap(),
3786 second
3787 );
3788 }
3789
3790 #[test]
3791 fn decode_member_from_reader_accepts_incremental_input() {
3792 struct TinyReader<'a> {
3793 input: &'a [u8],
3794 }
3795
3796 impl std::io::Read for TinyReader<'_> {
3797 fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
3798 if self.input.is_empty() {
3799 return Ok(0);
3800 }
3801 let len = self.input.len().min(out.len()).min(3);
3802 out[..len].copy_from_slice(&self.input[..len]);
3803 self.input = &self.input[len..];
3804 Ok(len)
3805 }
3806 }
3807
3808 let mut decoder = Unpack29::new();
3809 let mut reader = TinyReader {
3810 input: COMPRESSED_TEXT,
3811 };
3812 let mut output = Vec::new();
3813 decoder
3814 .decode_member_from_reader(&mut reader, 2400, &mut output)
3815 .unwrap();
3816
3817 assert_eq!(output, expected_text());
3818 }
3819
3820 #[test]
3821 fn decode_non_solid_member_resets_reusable_decoder_state() {
3822 let mut decoder = Unpack29::new();
3823 decoder.output.extend_from_slice(b"stale history");
3824 decoder.filters.push(VmFilter {
3825 program: 0,
3826 start: 0,
3827 size: 1,
3828 regs: [0; 7],
3829 global_data: vec![1, 2, 3],
3830 });
3831
3832 let output = decoder
3833 .decode_non_solid_member(COMPRESSED_TEXT, 2400)
3834 .unwrap();
3835
3836 assert_eq!(output, expected_text());
3837 assert!(decoder.filters.is_empty());
3838 }
3839
3840 #[test]
3841 fn e8_filter_uses_member_relative_offset_in_solid_stream() {
3842 let mut decoder = Unpack29::new();
3843 let member_start = 1000usize;
3844 let filter_start = member_start + 100;
3845 decoder.output.resize(filter_start + 8, 0);
3846 decoder.output[filter_start] = 0xe8;
3847
3848 let call_operand_pos = 1u32;
3849 let member_relative_filter_start = (filter_start - member_start) as u32;
3850 let decoded_addr = 0x2000u32;
3851 let encoded_addr = decoded_addr
3852 .wrapping_add(member_relative_filter_start)
3853 .wrapping_add(call_operand_pos);
3854 decoder.output[filter_start + 1..filter_start + 5]
3855 .copy_from_slice(&encoded_addr.to_le_bytes());
3856 decoder.programs.push(VmProgram {
3857 kind: VmProgramKind::Standard(StandardFilter::E8),
3858 block_size: 5,
3859 exec_count: 0,
3860 globals: Vec::new(),
3861 });
3862 decoder.filters.push(VmFilter {
3863 program: 0,
3864 start: filter_start,
3865 size: 5,
3866 regs: [0; 7],
3867 global_data: Vec::new(),
3868 });
3869
3870 let filtered = decoder
3871 .filtered_range(member_start, filter_start + 5, member_start)
3872 .unwrap();
3873 let operand =
3874 u32::from_le_bytes([filtered[101], filtered[102], filtered[103], filtered[104]]);
3875
3876 assert_eq!(operand, decoded_addr);
3877 }
3878
3879 #[test]
3880 fn generic_vm_filter_executes_from_filtered_range() {
3881 let mut decoder = Unpack29::new();
3882 decoder.output.extend_from_slice(&[0x11, 0x22, 0x33]);
3883 decoder.programs.push(VmProgram {
3884 kind: VmProgramKind::Generic(Program {
3885 static_data: Vec::new(),
3886 instructions: vec![
3887 Instruction {
3888 opcode: Opcode::Mov,
3889 byte_mode: true,
3890 operands: vec![Operand::Absolute(0), Operand::Immediate(0x44)],
3891 },
3892 Instruction {
3893 opcode: Opcode::Ret,
3894 byte_mode: false,
3895 operands: Vec::new(),
3896 },
3897 ],
3898 }),
3899 block_size: 3,
3900 exec_count: 0,
3901 globals: Vec::new(),
3902 });
3903 decoder.filters.push(VmFilter {
3904 program: 0,
3905 start: 0,
3906 size: 3,
3907 regs: [0; 7],
3908 global_data: Vec::new(),
3909 });
3910
3911 let filtered = decoder.filtered_range(0, 3, 0).unwrap();
3912
3913 assert_eq!(filtered, [0x44, 0x22, 0x33]);
3914 }
3915
3916 #[test]
3917 fn standard_filters_reject_malformed_delta_and_rgb_registers() {
3918 let mut delta = vec![0; 32];
3919 let mut delta_regs = [0; 7];
3920 delta_regs[0] = 33;
3921 assert_eq!(
3922 apply_standard_filter(StandardFilter::Delta, &mut delta, 0, &delta_regs),
3923 Err(Error::InvalidData(
3924 "RAR 2.9 DELTA filter channel count is invalid"
3925 ))
3926 );
3927
3928 let mut rgb = vec![0; 32];
3929 let mut rgb_regs = [0; 7];
3930 rgb_regs[0] = 2;
3931 assert_eq!(
3932 apply_standard_filter(StandardFilter::Rgb, &mut rgb, 0, &rgb_regs),
3933 Err(Error::InvalidData(
3934 "RAR 2.9 RGB filter parameters are invalid"
3935 ))
3936 );
3937 rgb_regs[0] = 15;
3938 rgb_regs[1] = 3;
3939 assert_eq!(
3940 apply_standard_filter(StandardFilter::Rgb, &mut rgb, 0, &rgb_regs),
3941 Err(Error::InvalidData(
3942 "RAR 2.9 RGB filter parameters are invalid"
3943 ))
3944 );
3945 }
3946
3947 #[test]
3948 fn vm_encoded_u32_accepts_32_bit_form() {
3949 let mut bits = super::BitReader::from_bytes(&[0xff; 5]);
3950
3951 assert_eq!(bits.read_encoded_u32().unwrap(), 0xffff_ffff);
3952 }
3953
3954 #[test]
3955 fn vm_global_data_size_does_not_reserve_untrusted_declared_size() {
3956 let mut decoder = Unpack29::new();
3957 decoder.programs.push(VmProgram {
3958 kind: VmProgramKind::Standard(StandardFilter::E8),
3959 block_size: 1,
3960 exec_count: 0,
3961 globals: Vec::new(),
3962 });
3963
3964 let mut data = BitWriter::default();
3965 data.write_encoded_u32(1);
3966 data.write_encoded_u32(0);
3967 data.write_encoded_u32(u32::MAX);
3968
3969 assert_eq!(
3970 decoder.parse_vm_code(0x80 | 0x08, data.finish()),
3971 Err(Error::NeedMoreInput)
3972 );
3973 }
3974
3975 #[test]
3976 fn vm_code_size_is_capped_before_allocation() {
3977 let mut decoder = Unpack29::new();
3978 let mut data = BitWriter::default();
3979 data.write_encoded_u32(0);
3980 data.write_encoded_u32(1);
3981 data.write_encoded_u32((super::MAX_VM_CODE_SIZE + 1) as u32);
3982
3983 assert_eq!(
3984 decoder.parse_vm_code(0x80, data.finish()),
3985 Err(Error::InvalidData("RAR 2.9 VM code is too large"))
3986 );
3987 }
3988
3989 #[test]
3990 fn vm_program_and_filter_counts_are_capped() {
3991 let mut decoder = Unpack29::new();
3992 decoder
3993 .programs
3994 .resize_with(super::MAX_VM_PROGRAMS, || VmProgram {
3995 kind: VmProgramKind::Standard(StandardFilter::E8),
3996 block_size: 1,
3997 exec_count: 0,
3998 globals: Vec::new(),
3999 });
4000
4001 let mut new_program = BitWriter::default();
4002 new_program.write_encoded_u32((super::MAX_VM_PROGRAMS + 1) as u32);
4003 new_program.write_encoded_u32(1);
4004 new_program.write_encoded_u32(1);
4005 new_program.write_bits(0, 8);
4006 assert_eq!(
4007 decoder.parse_vm_code(0x80, new_program.finish()),
4008 Err(Error::InvalidData("RAR 2.9 VM program limit exceeded"))
4009 );
4010
4011 decoder.programs.truncate(1);
4012 decoder.last_filter = 0;
4013 decoder
4014 .filters
4015 .resize_with(super::MAX_VM_FILTERS, || VmFilter {
4016 program: 0,
4017 start: 0,
4018 size: 1,
4019 regs: [0; 7],
4020 global_data: Vec::new(),
4021 });
4022 let mut reused_program = BitWriter::default();
4023 reused_program.write_encoded_u32(0);
4024 assert_eq!(
4025 decoder.parse_vm_code(0, reused_program.finish()),
4026 Err(Error::InvalidData("RAR 2.9 VM filter limit exceeded"))
4027 );
4028 }
4029
4030 #[test]
4031 fn itanium_filter_round_trips_with_high_file_offset() {
4032 let mut data = vec![0u8; 64];
4033 for (index, byte) in data.iter_mut().enumerate() {
4034 *byte = index as u8;
4035 }
4036 data[0] = 0;
4037 data[7] = 5 << 3;
4038 let original = data.clone();
4039
4040 itanium_encode(&mut data, u32::MAX);
4041 itanium_decode(&mut data, u32::MAX);
4042
4043 assert_eq!(data, original);
4044 }
4045
4046 fn expected_text() -> Vec<u8> {
4047 "Hello, RAR 3.x fixture world.\n".repeat(80).into_bytes()
4048 }
4049}