1use crate::{huffman, Error, Result};
2use std::io::{Read, Write};
3
4const MAIN_COUNT: usize = 298;
5const OFFSET_COUNT: usize = 48;
6const LENGTH_COUNT: usize = 28;
7const LEVEL_COUNT: usize = 19;
8const TABLE_COUNT: usize = MAIN_COUNT + OFFSET_COUNT + LENGTH_COUNT;
9const AUDIO_COUNT: usize = 257;
10const MAX_CHANNELS: usize = 4;
11const OLD_LEVEL_COUNT: usize = AUDIO_COUNT * MAX_CHANNELS;
12const MAX_HISTORY: usize = 1024 * 1024;
13const INPUT_CHUNK: usize = 64 * 1024;
14
15const LENGTH_BASES: [usize; LENGTH_COUNT] = [
16 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
17 160, 192, 224,
18];
19const LENGTH_BITS: [u8; LENGTH_COUNT] = [
20 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
21];
22const OFFSET_BASES: [usize; OFFSET_COUNT] = [
23 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
24 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
25 262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
26];
27const OFFSET_BITS: [u8; OFFSET_COUNT] = [
28 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
29 13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
30];
31const SHORT_BASES: [usize; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
32const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
33const MAX_ENCODER_MATCH_OFFSET: usize = MAX_HISTORY;
34const MAX_ENCODER_MATCH_LENGTH: usize = 258;
35const MATCH_HASH_BUCKETS: usize = 4096;
36const MAX_MATCH_CANDIDATES: usize = 256;
37
38pub fn unpack20_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
39 let mut decoder = Unpack20::new();
40 decoder.decode_member(input, output_size)
41}
42
43pub fn unpack20_encode_literals(input: &[u8]) -> Result<Vec<u8>> {
44 unpack20_encode_literals_with_options(input, EncodeOptions::default())
45}
46
47pub fn unpack20_encode_literals_with_options(
48 input: &[u8],
49 options: EncodeOptions,
50) -> Result<Vec<u8>> {
51 encode_member(input, &[], None, options)
52}
53
54pub fn unpack20_encode_auto(input: &[u8]) -> Result<Vec<u8>> {
55 unpack20_encode_auto_with_options(input, EncodeOptions::default())
56}
57
58pub fn unpack20_encode_auto_with_options(input: &[u8], options: EncodeOptions) -> Result<Vec<u8>> {
59 let lz = unpack20_encode_literals_with_options(input, options)?;
60 let mut best = lz;
61 if options.try_audio {
62 for channels in 1..=MAX_CHANNELS {
63 if input.len() < channels * 64 {
64 continue;
65 }
66 let audio = encode_audio_member(input, channels)?;
67 if audio.len() < best.len() {
68 best = audio;
69 }
70 }
71 }
72 Ok(best)
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76#[non_exhaustive]
77pub struct EncodeOptions {
78 pub max_match_candidates: usize,
79 pub max_match_distance: usize,
80 pub lazy_matching: bool,
81 pub lazy_lookahead: usize,
82 pub try_audio: bool,
83}
84
85impl EncodeOptions {
86 pub const fn new(max_match_candidates: usize) -> Self {
87 Self {
88 max_match_candidates,
89 max_match_distance: MAX_ENCODER_MATCH_OFFSET,
90 lazy_matching: false,
91 lazy_lookahead: 1,
92 try_audio: true,
93 }
94 }
95
96 pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
97 self.max_match_distance = if distance > MAX_ENCODER_MATCH_OFFSET {
98 MAX_ENCODER_MATCH_OFFSET
99 } else {
100 distance
101 };
102 self
103 }
104
105 pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
106 self.lazy_matching = enabled;
107 self
108 }
109
110 pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
111 self.lazy_lookahead = bytes;
112 self
113 }
114
115 pub const fn with_try_audio(mut self, enabled: bool) -> Self {
116 self.try_audio = enabled;
117 self
118 }
119}
120
121impl Default for EncodeOptions {
122 fn default() -> Self {
123 Self::new(MAX_MATCH_CANDIDATES)
124 }
125}
126
127#[derive(Debug, Clone, Default)]
128pub struct Unpack20Encoder {
129 history: Vec<u8>,
130 table: Option<FixedEncodeTable>,
131 options: EncodeOptions,
132}
133
134impl Unpack20Encoder {
135 pub fn new() -> Self {
136 Self::default()
137 }
138
139 pub fn with_options(options: EncodeOptions) -> Self {
140 Self {
141 history: Vec::new(),
142 table: None,
143 options,
144 }
145 }
146
147 pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
148 if input.is_empty() {
149 return Ok(Vec::new());
150 }
151 let table = match self.table {
152 Some(table) => table,
153 None => {
154 let table = FixedEncodeTable::new()?;
155 self.table = Some(table);
156 table
157 }
158 };
159 let packed = encode_member(input, &self.history, Some(table), self.options)?;
160 self.remember(input);
161 Ok(packed)
162 }
163
164 fn remember(&mut self, input: &[u8]) {
165 self.history.extend_from_slice(input);
166 let keep_from = self
167 .history
168 .len()
169 .saturating_sub(self.options.max_match_distance);
170 if keep_from != 0 {
171 self.history.drain(..keep_from);
172 }
173 }
174}
175
176fn encode_member(
177 input: &[u8],
178 history: &[u8],
179 fixed_table: Option<FixedEncodeTable>,
180 options: EncodeOptions,
181) -> Result<Vec<u8>> {
182 if input.is_empty() {
183 return Ok(Vec::new());
184 }
185
186 let tokens = encode_tokens(input, history, options, None);
187 let table_lengths = table_lengths_for_tokens(&tokens, fixed_table)?;
188 let packed = encode_member_with_tables(&tokens, history, fixed_table, &table_lengths)?;
189 if fixed_table.is_some() {
190 return Ok(packed);
191 }
192
193 let cost_model = CostModel::new(&table_lengths);
194 let refined_tokens = encode_tokens(input, history, options, Some(&cost_model));
195 if refined_tokens == tokens {
196 return Ok(packed);
197 }
198 let refined_table_lengths = table_lengths_for_tokens(&refined_tokens, fixed_table)?;
199 let refined_packed = encode_member_with_tables(
200 &refined_tokens,
201 history,
202 fixed_table,
203 &refined_table_lengths,
204 )?;
205 if refined_packed.len() < packed.len() {
206 Ok(refined_packed)
207 } else {
208 Ok(packed)
209 }
210}
211
212fn table_lengths_for_tokens(
213 tokens: &[EncodeToken],
214 fixed_table: Option<FixedEncodeTable>,
215) -> Result<[u8; TABLE_COUNT]> {
216 let mut main_frequencies = [0usize; MAIN_COUNT];
217 let mut offset_frequencies = [0usize; OFFSET_COUNT];
218 let mut length_frequencies = [0usize; LENGTH_COUNT];
219 for token in tokens {
220 match *token {
221 EncodeToken::Literal(byte) => main_frequencies[byte as usize] += 1,
222 EncodeToken::RepeatLast => main_frequencies[256] += 1,
223 EncodeToken::OldOffset {
224 index,
225 length,
226 offset,
227 } => {
228 main_frequencies[257 + index] += 1;
229 let (slot, _) = old_length_slot_for_match(length, offset)?;
230 length_frequencies[slot] += 1;
231 }
232 EncodeToken::ShortOffset { offset } => {
233 let (slot, _) = short_slot_for_match(offset)?;
234 main_frequencies[261 + slot] += 1;
235 }
236 EncodeToken::Match { length, offset } => {
237 let encoded_length = length.checked_sub(match_length_adjustment(offset)).ok_or(
238 Error::InvalidData("RAR 2.0 adjusted match length underflows"),
239 )?;
240 let (slot, _) = length_slot_for_match(encoded_length)?;
241 main_frequencies[270 + slot] += 1;
242 let (offset_slot, _) = offset_slot_for_match(offset)?;
243 offset_frequencies[offset_slot] += 1;
244 }
245 }
246 }
247 let mut table_lengths = [0u8; TABLE_COUNT];
248 let literal_len = if let Some(table) = fixed_table {
249 table.length
250 } else {
251 let main_symbol_count = main_frequencies
252 .iter()
253 .filter(|&&frequency| frequency != 0)
254 .count()
255 + offset_frequencies
256 .iter()
257 .filter(|&&frequency| frequency != 0)
258 .count()
259 + length_frequencies
260 .iter()
261 .filter(|&&frequency| frequency != 0)
262 .count();
263 literal_code_len(main_symbol_count)?
264 };
265
266 if fixed_table.is_some() {
267 for len in &mut table_lengths[..256] {
268 *len = literal_len;
269 }
270 table_lengths[256] = literal_len;
271 for len in &mut table_lengths[270..270 + LENGTH_COUNT] {
272 *len = literal_len;
273 }
274 for len in &mut table_lengths[257..269] {
275 *len = literal_len;
276 }
277 for len in &mut table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT] {
278 *len = literal_len;
279 }
280 for len in &mut table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT] {
281 *len = literal_len;
282 }
283 } else {
284 table_lengths[..MAIN_COUNT]
285 .copy_from_slice(&validated_lengths_for_frequencies(&main_frequencies, 15));
286 table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT]
287 .copy_from_slice(&validated_lengths_for_frequencies(&offset_frequencies, 15));
288 table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT]
289 .copy_from_slice(&validated_lengths_for_frequencies(&length_frequencies, 15));
290 }
291 Ok(table_lengths)
292}
293
294fn encode_member_with_tables(
295 tokens: &[EncodeToken],
296 history: &[u8],
297 fixed_table: Option<FixedEncodeTable>,
298 table_lengths: &[u8; TABLE_COUNT],
299) -> Result<Vec<u8>> {
300 let level_tokens = encode_table_level_tokens(table_lengths);
301 let level_lengths = level_code_lengths_for_tokens(&level_tokens);
302 let level_codes = canonical_codes(&level_lengths)?;
303 let main_codes = canonical_codes(&table_lengths[..MAIN_COUNT])?;
304
305 let mut bits = BitWriter::default();
306 if fixed_table.is_none() || history.is_empty() {
307 bits.write_bits(0, 2); for &len in &level_lengths {
309 bits.write_bits(len as u32, 4);
310 }
311 for token in level_tokens {
312 let code = level_codes[token.symbol].ok_or(Error::InvalidData(
313 "RAR 2.0 encoder missing level Huffman code",
314 ))?;
315 bits.write_bits(code.code as u32, code.len);
316 if token.extra_bits != 0 {
317 bits.write_bits(token.extra_value as u32, token.extra_bits);
318 }
319 }
320 }
321 let offset_codes = canonical_codes(&table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
322 let length_codes = canonical_codes(&table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT])?;
323 for token in tokens {
324 match *token {
325 EncodeToken::Literal(byte) => {
326 let code = main_codes[byte as usize].ok_or(Error::InvalidData(
327 "RAR 2.0 encoder missing literal Huffman code",
328 ))?;
329 bits.write_bits(code.code as u32, code.len);
330 }
331 EncodeToken::RepeatLast => {
332 let code = main_codes[256].ok_or(Error::InvalidData(
333 "RAR 2.0 encoder missing repeat-last Huffman code",
334 ))?;
335 bits.write_bits(code.code as u32, code.len);
336 }
337 EncodeToken::OldOffset {
338 index,
339 length,
340 offset,
341 } => {
342 let code = main_codes[257 + index].ok_or(Error::InvalidData(
343 "RAR 2.0 encoder missing old-offset Huffman code",
344 ))?;
345 bits.write_bits(code.code as u32, code.len);
346 let (slot, extra) = old_length_slot_for_match(length, offset)?;
347 let length_code = length_codes[slot].ok_or(Error::InvalidData(
348 "RAR 2.0 encoder missing old-offset length Huffman code",
349 ))?;
350 bits.write_bits(length_code.code as u32, length_code.len);
351 if LENGTH_BITS[slot] != 0 {
352 bits.write_bits(extra as u32, LENGTH_BITS[slot]);
353 }
354 }
355 EncodeToken::ShortOffset { offset } => {
356 let (slot, extra) = short_slot_for_match(offset)?;
357 let code = main_codes[261 + slot].ok_or(Error::InvalidData(
358 "RAR 2.0 encoder missing short-offset Huffman code",
359 ))?;
360 bits.write_bits(code.code as u32, code.len);
361 if SHORT_BITS[slot] != 0 {
362 bits.write_bits(extra as u32, SHORT_BITS[slot]);
363 }
364 }
365 EncodeToken::Match { length, offset } => {
366 let encoded_length = length.checked_sub(match_length_adjustment(offset)).ok_or(
367 Error::InvalidData("RAR 2.0 adjusted match length underflows"),
368 )?;
369 let (slot, extra) = length_slot_for_match(encoded_length)?;
370 let code = main_codes[270 + slot].ok_or(Error::InvalidData(
371 "RAR 2.0 encoder missing match Huffman code",
372 ))?;
373 bits.write_bits(code.code as u32, code.len);
374 if LENGTH_BITS[slot] != 0 {
375 bits.write_bits(extra as u32, LENGTH_BITS[slot]);
376 }
377 let (offset_slot, offset_extra) = offset_slot_for_match(offset)?;
378 let offset = offset_codes[offset_slot].ok_or(Error::InvalidData(
379 "RAR 2.0 encoder missing offset Huffman code",
380 ))?;
381 bits.write_bits(offset.code as u32, offset.len);
382 if OFFSET_BITS[offset_slot] != 0 {
383 bits.write_bits(offset_extra as u32, OFFSET_BITS[offset_slot]);
384 }
385 }
386 }
387 }
388 Ok(bits.finish())
389}
390
391#[derive(Debug, Clone, Copy)]
392struct FixedEncodeTable {
393 length: u8,
394}
395
396impl FixedEncodeTable {
397 fn new() -> Result<Self> {
398 Ok(Self {
399 length: literal_code_len(256 + LENGTH_COUNT + OFFSET_COUNT)?,
400 })
401 }
402}
403
404#[derive(Debug, Clone, Copy, PartialEq, Eq)]
405enum EncodeToken {
406 Literal(u8),
407 RepeatLast,
408 OldOffset {
409 index: usize,
410 length: usize,
411 offset: usize,
412 },
413 ShortOffset {
414 offset: usize,
415 },
416 Match {
417 length: usize,
418 offset: usize,
419 },
420}
421
422fn encode_tokens(
423 input: &[u8],
424 history: &[u8],
425 options: EncodeOptions,
426 cost_model: Option<&CostModel>,
427) -> Vec<EncodeToken> {
428 let mut tokens = Vec::new();
429 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
430 let history = &history[history.len().saturating_sub(options.max_match_distance)..];
431 let mut combined = Vec::with_capacity(history.len() + input.len());
432 combined.extend_from_slice(history);
433 combined.extend_from_slice(input);
434 for history_pos in 0..history.len().saturating_sub(2) {
435 insert_match_position(&combined, history_pos, &mut buckets);
436 }
437
438 let mut pos = history.len();
439 let end = combined.len();
440 let mut last_match = None;
441 let mut old_offsets = [0usize; 4];
442 while pos < end {
443 let selected = select_match(
444 &combined,
445 pos,
446 end,
447 &buckets,
448 options,
449 &old_offsets,
450 cost_model,
451 );
452 if let Some(selected) = selected {
453 let lazy = LazyMatchContext {
454 input: &combined,
455 end,
456 buckets: &buckets,
457 options,
458 old_offsets: &old_offsets,
459 cost_model,
460 };
461 if should_lazy_emit_literal(pos, selected, lazy) {
462 tokens.push(EncodeToken::Literal(combined[pos]));
463 insert_match_position(&combined, pos, &mut buckets);
464 pos += 1;
465 continue;
466 }
467 let (length, offset) = match selected {
468 SelectedMatch::Fresh { length, offset } => {
469 if last_match == Some((length, offset)) {
470 tokens.push(EncodeToken::RepeatLast);
471 } else {
472 tokens.push(EncodeToken::Match { length, offset });
473 last_match = Some((length, offset));
474 }
475 (length, offset)
476 }
477 SelectedMatch::OldOffset {
478 index,
479 length,
480 offset,
481 } => {
482 if last_match == Some((length, offset)) {
483 tokens.push(EncodeToken::RepeatLast);
484 } else {
485 tokens.push(EncodeToken::OldOffset {
486 index,
487 length,
488 offset,
489 });
490 last_match = Some((length, offset));
491 }
492 (length, offset)
493 }
494 SelectedMatch::ShortOffset { offset } => {
495 let length = 2;
496 tokens.push(EncodeToken::ShortOffset { offset });
497 last_match = Some((length, offset));
498 (length, offset)
499 }
500 };
501 push_old_offset(&mut old_offsets, offset);
502 for history_pos in pos..pos + length {
503 insert_match_position(&combined, history_pos, &mut buckets);
504 }
505 pos += length;
506 } else {
507 tokens.push(EncodeToken::Literal(combined[pos]));
508 insert_match_position(&combined, pos, &mut buckets);
509 pos += 1;
510 }
511 }
512 tokens
513}
514
515#[derive(Debug, Clone, Copy)]
516struct CostModel<'a> {
517 main: &'a [u8],
518 offsets: &'a [u8],
519 lengths: &'a [u8],
520}
521
522impl<'a> CostModel<'a> {
523 fn new(table_lengths: &'a [u8; TABLE_COUNT]) -> Self {
524 Self {
525 main: &table_lengths[..MAIN_COUNT],
526 offsets: &table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT],
527 lengths: &table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT],
528 }
529 }
530
531 fn selected_cost(self, selected: SelectedMatch) -> Option<usize> {
532 match selected {
533 SelectedMatch::Fresh { length, offset } => {
534 let encoded_length = length.checked_sub(match_length_adjustment(offset))?;
535 let (length_slot, _) = length_slot_for_match(encoded_length).ok()?;
536 let (offset_slot, _) = offset_slot_for_match(offset).ok()?;
537 Some(
538 usize::from(self.main[270 + length_slot])
539 + usize::from(LENGTH_BITS[length_slot])
540 + usize::from(self.offsets[offset_slot])
541 + usize::from(OFFSET_BITS[offset_slot]),
542 )
543 }
544 SelectedMatch::OldOffset {
545 index,
546 length,
547 offset,
548 } => {
549 let (length_slot, _) = old_length_slot_for_match(length, offset).ok()?;
550 Some(
551 usize::from(self.main[257 + index])
552 + usize::from(self.lengths[length_slot])
553 + usize::from(LENGTH_BITS[length_slot]),
554 )
555 }
556 SelectedMatch::ShortOffset { offset } => {
557 let (slot, _) = short_slot_for_match(offset).ok()?;
558 Some(usize::from(self.main[261 + slot]) + usize::from(SHORT_BITS[slot]))
559 }
560 }
561 }
562
563 fn selected_score(self, selected: SelectedMatch) -> Option<isize> {
564 let cost = self.selected_cost(selected)?;
565 Some(selected.length() as isize * 8 - cost as isize)
566 }
567}
568
569#[derive(Debug, Clone, Copy)]
570enum SelectedMatch {
571 Fresh {
572 length: usize,
573 offset: usize,
574 },
575 OldOffset {
576 index: usize,
577 length: usize,
578 offset: usize,
579 },
580 ShortOffset {
581 offset: usize,
582 },
583}
584
585impl SelectedMatch {
586 fn length(self) -> usize {
587 match self {
588 SelectedMatch::Fresh { length, .. } | SelectedMatch::OldOffset { length, .. } => length,
589 SelectedMatch::ShortOffset { .. } => 2,
590 }
591 }
592
593 fn score(self) -> isize {
594 let length_score = self.length() as isize * 8;
595 let cost = match self {
596 SelectedMatch::OldOffset { .. } | SelectedMatch::ShortOffset { .. } => 4,
597 SelectedMatch::Fresh { offset, .. } => 8 + OFFSET_BITS[offset_slot_index(offset)],
598 };
599 length_score - isize::from(cost)
600 }
601}
602
603fn select_match(
604 input: &[u8],
605 pos: usize,
606 end: usize,
607 buckets: &[Vec<usize>],
608 options: EncodeOptions,
609 old_offsets: &[usize; 4],
610 cost_model: Option<&CostModel<'_>>,
611) -> Option<SelectedMatch> {
612 let fresh = best_match(input, pos, end, buckets, options, cost_model)
613 .map(|(length, offset)| SelectedMatch::Fresh { length, offset });
614 let old = best_old_offset_match(input, pos, end, old_offsets, cost_model).map(
615 |(index, length, offset)| SelectedMatch::OldOffset {
616 index,
617 length,
618 offset,
619 },
620 );
621 if let Some(cost_model) = cost_model {
622 return [fresh, old, best_short_offset_match(input, pos, end)]
623 .into_iter()
624 .flatten()
625 .max_by_key(|&selected| {
626 (
627 cost_model.selected_score(selected).unwrap_or(isize::MIN),
628 selected.length(),
629 )
630 });
631 }
632
633 let fresh = fresh.and_then(|selected| match selected {
634 SelectedMatch::Fresh { length, offset } => Some((length, offset)),
635 _ => None,
636 });
637 let old = old.and_then(|selected| match selected {
638 SelectedMatch::OldOffset {
639 index,
640 length,
641 offset,
642 } => Some((index, length, offset)),
643 _ => None,
644 });
645 match (fresh, old) {
646 (Some((fresh_length, _)), Some((index, old_length, old_offset)))
647 if old_length + 1 >= fresh_length =>
648 {
649 Some(SelectedMatch::OldOffset {
650 index,
651 length: old_length,
652 offset: old_offset,
653 })
654 }
655 (Some((length, offset)), _) => Some(SelectedMatch::Fresh { length, offset }),
656 (None, Some((index, length, offset))) => Some(SelectedMatch::OldOffset {
657 index,
658 length,
659 offset,
660 }),
661 (None, None) => best_short_offset_match(input, pos, end),
662 }
663}
664
665struct LazyMatchContext<'a> {
666 input: &'a [u8],
667 end: usize,
668 buckets: &'a [Vec<usize>],
669 options: EncodeOptions,
670 old_offsets: &'a [usize; 4],
671 cost_model: Option<&'a CostModel<'a>>,
672}
673
674fn should_lazy_emit_literal(
675 pos: usize,
676 current: SelectedMatch,
677 context: LazyMatchContext<'_>,
678) -> bool {
679 if !context.options.lazy_matching || pos + 1 >= context.end {
680 return false;
681 }
682 let lookahead = context.options.lazy_lookahead.max(1);
683 (1..=lookahead)
684 .take_while(|offset| pos + offset < context.end)
685 .any(|offset| {
686 select_match(
687 context.input,
688 pos + offset,
689 context.end,
690 context.buckets,
691 context.options,
692 context.old_offsets,
693 context.cost_model,
694 )
695 .is_some_and(|next| {
696 let current_score = context
697 .cost_model
698 .and_then(|cost_model| cost_model.selected_score(current))
699 .unwrap_or_else(|| current.score());
700 let next_score = context
701 .cost_model
702 .and_then(|cost_model| cost_model.selected_score(next))
703 .unwrap_or_else(|| next.score());
704 let skipped_literal_score = offset as isize * 8;
705 next_score > current_score + skipped_literal_score
706 })
707 })
708}
709
710fn best_match(
711 input: &[u8],
712 pos: usize,
713 end: usize,
714 buckets: &[Vec<usize>],
715 options: EncodeOptions,
716 cost_model: Option<&CostModel<'_>>,
717) -> Option<(usize, usize)> {
718 let max_offset = pos.min(options.max_match_distance);
719 let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
720 if options.max_match_candidates == 0
721 || max_offset == 0
722 || max_length < 3
723 || pos + 2 >= input.len()
724 {
725 return None;
726 }
727 let bucket = &buckets[match_hash(input, pos)];
728 let mut best = None;
729 let mut checked = 0usize;
730 for &candidate in bucket.iter().rev() {
731 if candidate >= pos {
732 continue;
733 }
734 let offset = pos - candidate;
735 if offset > max_offset {
736 break;
737 }
738 checked += 1;
739 let mut length = 0usize;
740 while length < max_length && input[pos + length] == input[pos + length - offset] {
741 length += 1;
742 }
743 let encodable = length >= 3 + match_length_adjustment(offset);
744 if encodable && is_better_fresh_match(cost_model, length, offset, best) {
745 best = Some((length, offset));
746 if length == max_length {
747 break;
748 }
749 }
750 if checked >= options.max_match_candidates {
751 break;
752 }
753 }
754 best
755}
756
757fn offset_slot_index(offset: usize) -> usize {
758 offset_slot_for_match(offset)
759 .map(|(slot, _)| slot)
760 .unwrap_or(OFFSET_BITS.len() - 1)
761}
762
763fn is_better_fresh_match(
764 cost_model: Option<&CostModel<'_>>,
765 length: usize,
766 offset: usize,
767 best: Option<(usize, usize)>,
768) -> bool {
769 let Some((best_length, best_offset)) = best else {
770 return true;
771 };
772 if let Some(cost_model) = cost_model {
773 let candidate = SelectedMatch::Fresh { length, offset };
774 let best = SelectedMatch::Fresh {
775 length: best_length,
776 offset: best_offset,
777 };
778 let candidate_score = cost_model.selected_score(candidate).unwrap_or(isize::MIN);
779 let best_score = cost_model.selected_score(best).unwrap_or(isize::MIN);
780 return candidate_score > best_score
781 || (candidate_score == best_score
782 && (length > best_length || (length == best_length && offset < best_offset)));
783 }
784 length > best_length || (length == best_length && offset < best_offset)
785}
786
787fn best_old_offset_match(
788 input: &[u8],
789 pos: usize,
790 end: usize,
791 old_offsets: &[usize; 4],
792 cost_model: Option<&CostModel<'_>>,
793) -> Option<(usize, usize, usize)> {
794 let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
795 let mut best = None;
796 for (index, &offset) in old_offsets.iter().enumerate() {
797 if offset == 0 || offset > pos {
798 continue;
799 }
800 let length = match_length_at_offset(input, pos, max_length, offset);
801 if old_length_slot_for_match(length, offset).is_ok()
802 && is_better_old_offset_match(cost_model, index, length, offset, best)
803 {
804 best = Some((index, length, offset));
805 }
806 }
807 best
808}
809
810fn is_better_old_offset_match(
811 cost_model: Option<&CostModel<'_>>,
812 index: usize,
813 length: usize,
814 offset: usize,
815 best: Option<(usize, usize, usize)>,
816) -> bool {
817 let Some((best_index, best_length, best_offset)) = best else {
818 return true;
819 };
820 if let Some(cost_model) = cost_model {
821 let candidate = SelectedMatch::OldOffset {
822 index,
823 length,
824 offset,
825 };
826 let best = SelectedMatch::OldOffset {
827 index: best_index,
828 length: best_length,
829 offset: best_offset,
830 };
831 let candidate_score = cost_model.selected_score(candidate).unwrap_or(isize::MIN);
832 let best_score = cost_model.selected_score(best).unwrap_or(isize::MIN);
833 return candidate_score > best_score
834 || (candidate_score == best_score
835 && (length > best_length || (length == best_length && offset < best_offset)));
836 }
837 length > best_length || (length == best_length && offset < best_offset)
838}
839
840fn best_short_offset_match(input: &[u8], pos: usize, end: usize) -> Option<SelectedMatch> {
841 if end - pos < 2 {
842 return None;
843 }
844 let max_offset = pos.min(256);
845 (1..=max_offset)
846 .find(|&offset| {
847 input[pos] == input[pos - offset] && input[pos + 1] == input[pos + 1 - offset]
848 })
849 .map(|offset| SelectedMatch::ShortOffset { offset })
850}
851
852fn match_length_at_offset(input: &[u8], pos: usize, max_length: usize, offset: usize) -> usize {
853 let mut length = 0usize;
854 while length < max_length && input[pos + length] == input[pos + length - offset] {
855 length += 1;
856 }
857 length
858}
859
860fn match_length_adjustment(offset: usize) -> usize {
861 usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
862}
863
864fn old_length_adjustment(offset: usize) -> usize {
865 usize::from(offset >= 0x101) + usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
866}
867
868fn push_old_offset(old_offsets: &mut [usize; 4], offset: usize) {
869 old_offsets[3] = old_offsets[2];
870 old_offsets[2] = old_offsets[1];
871 old_offsets[1] = old_offsets[0];
872 old_offsets[0] = offset;
873}
874
875fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
876 if pos + 2 < input.len() {
877 buckets[match_hash(input, pos)].push(pos);
878 }
879}
880
881fn match_hash(input: &[u8], pos: usize) -> usize {
882 let value =
883 ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
884 value & (MATCH_HASH_BUCKETS - 1)
885}
886
887fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
888 if length < 3 {
889 return Err(Error::InvalidData("RAR 2.0 match length is too short"));
890 }
891 let adjusted = length - 3;
892 for (slot, &base) in LENGTH_BASES.iter().enumerate() {
893 let extra_bits = LENGTH_BITS[slot];
894 let max = base
895 + if extra_bits == 0 {
896 0
897 } else {
898 (1usize << extra_bits) - 1
899 };
900 if adjusted >= base && adjusted <= max {
901 return Ok((slot, adjusted - base));
902 }
903 }
904 Err(Error::InvalidData("RAR 2.0 match length is too long"))
905}
906
907fn old_length_slot_for_match(length: usize, offset: usize) -> Result<(usize, usize)> {
908 let encoded = length
909 .checked_sub(old_length_adjustment(offset))
910 .ok_or(Error::InvalidData(
911 "RAR 2.0 adjusted old-offset length underflows",
912 ))?;
913 if encoded < 2 {
914 return Err(Error::InvalidData(
915 "RAR 2.0 old-offset match length is too short",
916 ));
917 }
918 let adjusted = encoded - 2;
919 for (slot, &base) in LENGTH_BASES.iter().enumerate() {
920 let extra_bits = LENGTH_BITS[slot];
921 let max = base
922 + if extra_bits == 0 {
923 0
924 } else {
925 (1usize << extra_bits) - 1
926 };
927 if adjusted >= base && adjusted <= max {
928 return Ok((slot, adjusted - base));
929 }
930 }
931 Err(Error::InvalidData(
932 "RAR 2.0 old-offset match length is too long",
933 ))
934}
935
936fn offset_slot_for_match(offset: usize) -> Result<(usize, usize)> {
937 if offset == 0 {
938 return Err(Error::InvalidData("RAR 2.0 match offset is zero"));
939 }
940 let adjusted = offset - 1;
941 for (slot, &base) in OFFSET_BASES.iter().enumerate() {
942 let extra_bits = OFFSET_BITS[slot];
943 let max = base
944 + if extra_bits == 0 {
945 0
946 } else {
947 (1usize << extra_bits) - 1
948 };
949 if adjusted >= base && adjusted <= max {
950 return Ok((slot, adjusted - base));
951 }
952 }
953 Err(Error::InvalidData("RAR 2.0 match offset is too large"))
954}
955
956fn short_slot_for_match(offset: usize) -> Result<(usize, usize)> {
957 if offset == 0 || offset > 256 {
958 return Err(Error::InvalidData(
959 "RAR 2.0 short match offset is out of range",
960 ));
961 }
962 let adjusted = offset - 1;
963 for (slot, &base) in SHORT_BASES.iter().enumerate() {
964 let extra_bits = SHORT_BITS[slot];
965 let max = base
966 + if extra_bits == 0 {
967 0
968 } else {
969 (1usize << extra_bits) - 1
970 };
971 if adjusted >= base && adjusted <= max {
972 return Ok((slot, adjusted - base));
973 }
974 }
975 Err(Error::InvalidData(
976 "RAR 2.0 short match offset is out of range",
977 ))
978}
979
980fn literal_code_len(symbol_count: usize) -> Result<u8> {
981 if symbol_count == 0 {
982 return Err(Error::InvalidData("RAR 2.0 encoder has no literal symbols"));
983 }
984 let len = usize::BITS - (symbol_count - 1).leading_zeros();
985 u8::try_from(len.max(1)).map_err(|_| Error::InvalidData("RAR 2.0 literal table is too large"))
986}
987
988#[derive(Debug, Clone, Copy, PartialEq, Eq)]
989struct LevelToken {
990 symbol: usize,
991 extra_bits: u8,
992 extra_value: u8,
993}
994
995impl LevelToken {
996 const fn plain(symbol: usize) -> Self {
997 Self {
998 symbol,
999 extra_bits: 0,
1000 extra_value: 0,
1001 }
1002 }
1003
1004 const fn repeat_previous(count: usize) -> Self {
1005 Self {
1006 symbol: 16,
1007 extra_bits: 2,
1008 extra_value: (count - 3) as u8,
1009 }
1010 }
1011
1012 const fn zero_run_short(count: usize) -> Self {
1013 Self {
1014 symbol: 17,
1015 extra_bits: 3,
1016 extra_value: (count - 3) as u8,
1017 }
1018 }
1019
1020 const fn zero_run_long(count: usize) -> Self {
1021 Self {
1022 symbol: 18,
1023 extra_bits: 7,
1024 extra_value: (count - 11) as u8,
1025 }
1026 }
1027}
1028
1029fn encode_table_level_tokens(lengths: &[u8; TABLE_COUNT]) -> Vec<LevelToken> {
1030 encode_level_tokens(lengths)
1031}
1032
1033fn encode_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
1034 let mut tokens = Vec::new();
1035 let mut pos = 0usize;
1036 let mut previous = None;
1037 while pos < lengths.len() {
1038 let value = lengths[pos];
1039 let mut run = 1usize;
1040 while pos + run < lengths.len() && lengths[pos + run] == value {
1041 run += 1;
1042 }
1043
1044 if value == 0 {
1045 emit_zero_level_run(&mut tokens, run);
1046 previous = Some(0);
1047 pos += run;
1048 continue;
1049 }
1050
1051 if previous == Some(value) && run >= 3 {
1052 let mut remaining = run;
1053 while remaining != 0 {
1054 let chunk = remaining.min(6);
1055 if chunk >= 3 {
1056 tokens.push(LevelToken::repeat_previous(chunk));
1057 remaining -= chunk;
1058 } else {
1059 tokens.extend(std::iter::repeat_n(
1060 LevelToken::plain(value as usize),
1061 chunk,
1062 ));
1063 remaining = 0;
1064 }
1065 }
1066 pos += run;
1067 continue;
1068 }
1069
1070 tokens.push(LevelToken::plain(value as usize));
1071 previous = Some(value);
1072 pos += 1;
1073 }
1074 tokens
1075}
1076
1077fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1078 while run != 0 {
1079 if run >= 11 {
1080 let mut chunk = run.min(138);
1081 if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1082 chunk -= 3;
1083 }
1084 tokens.push(LevelToken::zero_run_long(chunk));
1085 run -= chunk;
1086 } else if run >= 3 {
1087 let chunk = run.min(10);
1088 tokens.push(LevelToken::zero_run_short(chunk));
1089 run -= chunk;
1090 } else {
1091 tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
1092 break;
1093 }
1094 }
1095}
1096
1097fn level_code_lengths_for_tokens(tokens: &[LevelToken]) -> [u8; LEVEL_COUNT] {
1098 let mut used = [false; LEVEL_COUNT];
1099 for token in tokens {
1100 used[token.symbol] = true;
1101 }
1102 level_code_lengths_for_used_symbols(used)
1103}
1104
1105fn validated_lengths_for_frequencies<const N: usize>(
1106 frequencies: &[usize; N],
1107 max_bits: u8,
1108) -> [u8; N] {
1109 let mut lengths = [0u8; N];
1110 lengths.copy_from_slice(&huffman::lengths_for_frequencies(frequencies, max_bits));
1111 if canonical_codes(&lengths).is_ok() {
1112 return lengths;
1113 }
1114
1115 lengths.copy_from_slice(&huffman::uniform_lengths_for_frequencies(frequencies));
1116 lengths
1117}
1118
1119fn encode_audio_member(input: &[u8], channels: usize) -> Result<Vec<u8>> {
1120 if channels == 0 || channels > MAX_CHANNELS {
1121 return Err(Error::InvalidData("RAR 2.0 audio channel count is invalid"));
1122 }
1123 let deltas = audio_encode(input, channels)?;
1124 let mut levels = vec![0u8; AUDIO_COUNT * channels];
1125 for channel in 0..channels {
1126 let mut frequencies = [0usize; AUDIO_COUNT];
1127 for index in (channel..deltas.len()).step_by(channels) {
1128 frequencies[deltas[index] as usize] += 1;
1129 }
1130 let channel_lengths = huffman::lengths_for_frequency_array(&frequencies, 15);
1131 for (symbol, len) in channel_lengths.into_iter().enumerate() {
1132 levels[channel * AUDIO_COUNT + symbol] = len;
1133 }
1134 }
1135
1136 let level_symbols = encode_audio_table_level_symbols(&levels);
1137 let level_lengths = level_code_lengths_for_symbols(&level_symbols);
1138 let level_codes = canonical_codes(&level_lengths)?;
1139 let mut bits = BitWriter::default();
1140 bits.write_bits(0b10, 2); bits.write_bits((channels - 1) as u32, 2);
1142 for &len in &level_lengths {
1143 bits.write_bits(len as u32, 4);
1144 }
1145 for symbol in level_symbols {
1146 let code = level_codes[symbol].ok_or(Error::InvalidData(
1147 "RAR 2.0 encoder missing audio-level Huffman code",
1148 ))?;
1149 bits.write_bits(code.code as u32, code.len);
1150 match symbol {
1151 17 => bits.write_bits(0, 3),
1152 18 => bits.write_bits(127, 7),
1153 _ => {}
1154 }
1155 }
1156
1157 for channel in 0..channels {
1158 let table = &levels[channel * AUDIO_COUNT..(channel + 1) * AUDIO_COUNT];
1159 validate_audio_table(table)?;
1160 }
1161 let audio_codes = (0..channels)
1162 .map(|channel| canonical_codes(&levels[channel * AUDIO_COUNT..(channel + 1) * AUDIO_COUNT]))
1163 .collect::<Result<Vec<_>>>()?;
1164 for (index, &delta) in deltas.iter().enumerate() {
1165 let channel = index % channels;
1166 let code = audio_codes[channel][delta as usize].ok_or(Error::InvalidData(
1167 "RAR 2.0 encoder missing audio Huffman code",
1168 ))?;
1169 bits.write_bits(code.code as u32, code.len);
1170 }
1171 Ok(bits.finish())
1172}
1173
1174fn encode_audio_table_level_symbols(levels: &[u8]) -> Vec<usize> {
1175 levels.iter().map(|&len| len as usize).collect()
1176}
1177
1178fn level_code_lengths_for_symbols(symbols: &[usize]) -> [u8; LEVEL_COUNT] {
1179 let mut used = [false; LEVEL_COUNT];
1180 for &symbol in symbols {
1181 used[symbol] = true;
1182 }
1183 level_code_lengths_for_used_symbols(used)
1184}
1185
1186fn level_code_lengths_for_used_symbols(used: [bool; LEVEL_COUNT]) -> [u8; LEVEL_COUNT] {
1187 let used_count = used.iter().filter(|&&used| used).count();
1188 let len = huffman::bits_for_symbol_count(used_count);
1189 let mut lengths = [0u8; LEVEL_COUNT];
1190 for (symbol, is_used) in used.into_iter().enumerate() {
1191 if is_used {
1192 lengths[symbol] = len;
1193 }
1194 }
1195 lengths
1196}
1197
1198fn validate_audio_table(lengths: &[u8]) -> Result<()> {
1199 let mut count = [0u16; 16];
1200 for &len in lengths {
1201 if len > 15 {
1202 return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1203 }
1204 if len != 0 {
1205 count[len as usize] += 1;
1206 }
1207 }
1208 validate_huffman_counts(&count)
1209}
1210
1211fn audio_encode(input: &[u8], channels: usize) -> Result<Vec<u8>> {
1212 if channels == 0 || channels > MAX_CHANNELS {
1213 return Err(Error::InvalidData("RAR 2.0 audio channel count is invalid"));
1214 }
1215 let mut states = [AudioState::default(); MAX_CHANNELS];
1216 let mut channel_delta = 0i32;
1217 let mut deltas = Vec::with_capacity(input.len());
1218 for (index, &byte) in input.iter().enumerate() {
1219 let channel = index % channels;
1220 let state = &mut states[channel];
1221 state.byte_count = state.byte_count.wrapping_add(1);
1222 state.d4 = state.d3;
1223 state.d3 = state.d2;
1224 state.d2 = state.last_delta - state.d1;
1225 state.d1 = state.last_delta;
1226
1227 let predicted = 8 * state.last_char
1228 + state.k[0] * state.d1
1229 + state.k[1] * state.d2
1230 + state.k[2] * state.d3
1231 + state.k[3] * state.d4
1232 + state.k[4] * channel_delta;
1233 let predicted = (predicted >> 3) & 0xff;
1234 let delta = (predicted as u8).wrapping_sub(byte);
1235
1236 let d = (delta as i8 as i32) << 3;
1237 state.dif[0] = state.dif[0].wrapping_add(d.unsigned_abs());
1238 state.dif[1] = state.dif[1].wrapping_add((d - state.d1).unsigned_abs());
1239 state.dif[2] = state.dif[2].wrapping_add((d + state.d1).unsigned_abs());
1240 state.dif[3] = state.dif[3].wrapping_add((d - state.d2).unsigned_abs());
1241 state.dif[4] = state.dif[4].wrapping_add((d + state.d2).unsigned_abs());
1242 state.dif[5] = state.dif[5].wrapping_add((d - state.d3).unsigned_abs());
1243 state.dif[6] = state.dif[6].wrapping_add((d + state.d3).unsigned_abs());
1244 state.dif[7] = state.dif[7].wrapping_add((d - state.d4).unsigned_abs());
1245 state.dif[8] = state.dif[8].wrapping_add((d + state.d4).unsigned_abs());
1246 state.dif[9] = state.dif[9].wrapping_add((d - channel_delta).unsigned_abs());
1247 state.dif[10] = state.dif[10].wrapping_add((d + channel_delta).unsigned_abs());
1248
1249 channel_delta = (byte.wrapping_sub(state.last_char as u8)) as i8 as i32;
1250 state.last_delta = channel_delta;
1251 state.last_char = byte as i32;
1252
1253 if state.byte_count & 0x1f == 0 {
1254 let mut min_dif = state.dif[0];
1255 let mut num_min_dif = 0usize;
1256 state.dif[0] = 0;
1257 for diff_index in 1..state.dif.len() {
1258 if state.dif[diff_index] < min_dif {
1259 min_dif = state.dif[diff_index];
1260 num_min_dif = diff_index;
1261 }
1262 state.dif[diff_index] = 0;
1263 }
1264 match num_min_dif {
1265 1 if state.k[0] >= -16 => state.k[0] -= 1,
1266 2 if state.k[0] < 16 => state.k[0] += 1,
1267 3 if state.k[1] >= -16 => state.k[1] -= 1,
1268 4 if state.k[1] < 16 => state.k[1] += 1,
1269 5 if state.k[2] >= -16 => state.k[2] -= 1,
1270 6 if state.k[2] < 16 => state.k[2] += 1,
1271 7 if state.k[3] >= -16 => state.k[3] -= 1,
1272 8 if state.k[3] < 16 => state.k[3] += 1,
1273 9 if state.k[4] >= -16 => state.k[4] -= 1,
1274 10 if state.k[4] < 16 => state.k[4] += 1,
1275 _ => {}
1276 }
1277 }
1278
1279 deltas.push(delta);
1280 }
1281 Ok(deltas)
1282}
1283
1284#[derive(Debug, Clone, Copy)]
1285struct HuffmanCode {
1286 code: u16,
1287 len: u8,
1288}
1289
1290fn canonical_codes(lengths: &[u8]) -> Result<Vec<Option<HuffmanCode>>> {
1291 let mut count = [0u16; 16];
1292 for &len in lengths {
1293 if len > 15 {
1294 return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1295 }
1296 if len != 0 {
1297 count[len as usize] += 1;
1298 }
1299 }
1300 validate_huffman_counts(&count)?;
1301
1302 let mut next_code = [0u16; 16];
1303 let mut code = 0u16;
1304 for len in 1..=15 {
1305 code = (code + count[len - 1]) << 1;
1306 next_code[len] = code;
1307 }
1308
1309 let mut codes = vec![None; lengths.len()];
1310 for (symbol, &len) in lengths.iter().enumerate() {
1311 if len == 0 {
1312 continue;
1313 }
1314 let code = next_code[len as usize];
1315 next_code[len as usize] += 1;
1316 codes[symbol] = Some(HuffmanCode { code, len });
1317 }
1318 Ok(codes)
1319}
1320
1321#[derive(Debug, Clone)]
1322pub struct Unpack20 {
1323 bits: BitReader,
1324 levels: [u8; OLD_LEVEL_COUNT],
1325 main: Huffman,
1326 offsets: Huffman,
1327 lengths: Huffman,
1328 audio_tables: [Huffman; MAX_CHANNELS],
1329 audio_block: bool,
1330 channels: usize,
1331 cur_channel: usize,
1332 audio: [AudioState; MAX_CHANNELS],
1333 channel_delta: i32,
1334 old_offsets: [usize; 4],
1335 last_offset: usize,
1336 last_length: usize,
1337 pending_match: Option<(usize, usize)>,
1338 in_block: bool,
1339 output: Vec<u8>,
1340 base_offset: usize,
1341}
1342
1343impl Unpack20 {
1344 pub fn new() -> Self {
1345 Self {
1346 bits: BitReader::new(),
1347 levels: [0; OLD_LEVEL_COUNT],
1348 main: Huffman::empty(),
1349 offsets: Huffman::empty(),
1350 lengths: Huffman::empty(),
1351 audio_tables: std::array::from_fn(|_| Huffman::empty()),
1352 audio_block: false,
1353 channels: 1,
1354 cur_channel: 0,
1355 audio: [AudioState::default(); MAX_CHANNELS],
1356 channel_delta: 0,
1357 old_offsets: [0; 4],
1358 last_offset: 0,
1359 last_length: 0,
1360 pending_match: None,
1361 in_block: false,
1362 output: Vec::new(),
1363 base_offset: 0,
1364 }
1365 }
1366
1367 pub fn decode_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1368 let start = self.current_pos();
1369 let target = start
1370 .checked_add(output_size)
1371 .ok_or(Error::InvalidData("RAR 2.0 output size overflows"))?;
1372 if !input.is_empty() {
1373 self.bits = BitReader::new();
1374 }
1375 self.bits.append(input);
1376 self.decode_until(target).map_err(|error| match error {
1377 Error::NeedMoreInput => Error::InvalidData("RAR 2.0 bitstream is truncated"),
1378 error => error,
1379 })?;
1380 self.read_last_tables()?;
1381 let out = self.raw_range(start, target)?.to_vec();
1382 self.trim_history(target, target);
1383 Ok(out)
1384 }
1385
1386 pub fn decode_member_to(
1387 &mut self,
1388 input: &[u8],
1389 output_size: usize,
1390 out: &mut impl Write,
1391 ) -> Result<()> {
1392 let decoded = self.decode_member(input, output_size)?;
1393 out.write_all(&decoded)
1394 .map_err(|_| Error::InvalidData("RAR 2.0 output write failed"))
1395 }
1396
1397 pub fn decode_member_from_reader(
1398 &mut self,
1399 input: &mut impl Read,
1400 output_size: usize,
1401 out: &mut impl Write,
1402 ) -> Result<()> {
1403 let start = self.current_pos();
1404 let target = start
1405 .checked_add(output_size)
1406 .ok_or(Error::InvalidData("RAR 2.0 output size overflows"))?;
1407 self.bits = BitReader::new();
1408 let mut input_done = false;
1409 let mut buffer = [0u8; INPUT_CHUNK];
1410
1411 while self.current_pos() < target {
1412 let checkpoint = self.clone();
1413 match self.decode_until(target) {
1414 Ok(()) => {}
1415 Err(Error::NeedMoreInput) if !input_done => {
1416 *self = checkpoint;
1417 let read = input
1418 .read(&mut buffer)
1419 .map_err(|_| Error::InvalidData("RAR 2.0 input read failed"))?;
1420 if read == 0 {
1421 input_done = true;
1422 } else {
1423 self.bits.append(&buffer[..read]);
1424 }
1425 }
1426 Err(Error::NeedMoreInput) => {
1427 return Err(Error::InvalidData("RAR 2.0 bitstream is truncated"));
1428 }
1429 Err(error) => return Err(error),
1430 }
1431 }
1432
1433 loop {
1434 let read = input
1435 .read(&mut buffer)
1436 .map_err(|_| Error::InvalidData("RAR 2.0 input read failed"))?;
1437 if read == 0 {
1438 break;
1439 }
1440 self.bits.append(&buffer[..read]);
1441 }
1442 self.read_last_tables()?;
1443
1444 let decoded = self.raw_range(start, target)?;
1445 out.write_all(decoded)
1446 .map_err(|_| Error::InvalidData("RAR 2.0 output write failed"))?;
1447 self.trim_history(target, target);
1448 Ok(())
1449 }
1450
1451 fn decode_until(&mut self, target: usize) -> Result<()> {
1452 while self.current_pos() < target {
1453 self.drain_pending_match(target)?;
1454 if self.current_pos() >= target {
1455 break;
1456 }
1457 if !self.in_block {
1458 self.read_tables()?;
1459 self.in_block = true;
1460 }
1461 self.decode_lz(target)?;
1462 }
1463 Ok(())
1464 }
1465
1466 fn read_tables(&mut self) -> Result<()> {
1467 let bit_field = self.bits.peek_bits(16)?;
1468 self.audio_block = bit_field & 0x8000 != 0;
1469 let keep_tables = bit_field & 0x4000 != 0;
1470 self.bits.read_bits(2)?;
1471 if !keep_tables {
1472 self.levels = [0; OLD_LEVEL_COUNT];
1473 }
1474
1475 let table_size = if self.audio_block {
1476 self.channels = ((bit_field >> 12) as usize & 3) + 1;
1477 if self.cur_channel >= self.channels {
1478 self.cur_channel = 0;
1479 }
1480 self.bits.read_bits(2)?;
1481 AUDIO_COUNT * self.channels
1482 } else {
1483 TABLE_COUNT
1484 };
1485
1486 let level_lengths = Self::read_level_lengths(&mut self.bits)?;
1487 let level_decoder = Huffman::from_lengths(&level_lengths)?;
1488 let mut new_levels = [0u8; OLD_LEVEL_COUNT];
1489 let mut pos = 0usize;
1490 while pos < table_size {
1491 let symbol = level_decoder.decode(&mut self.bits)?;
1492 match symbol {
1493 0..=15 => {
1494 new_levels[pos] = (self.levels[pos].wrapping_add(symbol as u8)) & 0x0f;
1495 pos += 1;
1496 }
1497 16 => {
1498 if pos == 0 {
1499 return Err(Error::InvalidData("RAR 2.0 table repeat at start"));
1500 }
1501 let count = 3 + self.bits.read_bits(2)? as usize;
1502 let value = new_levels[pos - 1];
1503 fill_levels(&mut new_levels, &mut pos, count, value)?;
1504 }
1505 17 => {
1506 let count = 3 + self.bits.read_bits(3)? as usize;
1507 fill_levels(&mut new_levels, &mut pos, count, 0)?;
1508 }
1509 18 => {
1510 let count = 11 + self.bits.read_bits(7)? as usize;
1511 fill_levels(&mut new_levels, &mut pos, count, 0)?;
1512 }
1513 _ => return Err(Error::InvalidData("RAR 2.0 invalid level symbol")),
1514 }
1515 }
1516
1517 self.levels = new_levels;
1518 if self.audio_block {
1519 for channel in 0..self.channels {
1520 let start = channel * AUDIO_COUNT;
1521 self.audio_tables[channel] =
1522 Huffman::from_lengths(&self.levels[start..start + AUDIO_COUNT])?;
1523 }
1524 } else {
1525 self.main = Huffman::from_lengths(&self.levels[..MAIN_COUNT])?;
1526 self.offsets =
1527 Huffman::from_lengths(&self.levels[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
1528 self.lengths =
1529 Huffman::from_lengths(&self.levels[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT])?;
1530 }
1531 Ok(())
1532 }
1533
1534 fn read_level_lengths(bits: &mut BitReader) -> Result<[u8; LEVEL_COUNT]> {
1535 let mut lengths = [0u8; LEVEL_COUNT];
1536 for length in &mut lengths {
1537 *length = bits.read_bits(4)? as u8;
1538 }
1539 Ok(lengths)
1540 }
1541
1542 fn decode_lz(&mut self, output_size: usize) -> Result<()> {
1543 while self.current_pos() < output_size {
1544 if self.audio_block {
1545 self.decode_audio_byte()?;
1546 if !self.in_block {
1547 return Ok(());
1548 }
1549 continue;
1550 }
1551 let symbol = self.main.decode(&mut self.bits)?;
1552 match symbol {
1553 0..=255 => self.output.push(symbol as u8),
1554 256 => {
1555 if self.last_length != 0 {
1556 let length = self.last_length;
1557 let offset = self.last_offset;
1558 self.push_old_offset(offset);
1559 self.copy_match(length, offset, output_size)?;
1560 }
1561 }
1562 257..=260 => {
1563 let index = symbol - 257;
1564 let offset = self.old_offsets[index];
1565 let length_slot = self.lengths.decode(&mut self.bits)?;
1566 if length_slot >= LENGTH_COUNT {
1567 return Err(Error::InvalidData("RAR 2.0 invalid repeat length slot"));
1568 }
1569 let mut length = LENGTH_BASES[length_slot] + 2;
1570 if LENGTH_BITS[length_slot] != 0 {
1571 length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
1572 }
1573 if offset >= 0x101 {
1574 length += 1;
1575 }
1576 if offset >= 0x2000 {
1577 length += 1;
1578 }
1579 if offset >= 0x40000 {
1580 length += 1;
1581 }
1582 self.push_old_offset(offset);
1583 self.last_offset = offset;
1584 self.last_length = length;
1585 self.copy_match(length, offset, output_size)?;
1586 }
1587 261..=268 => {
1588 let index = symbol - 261;
1589 let mut offset = SHORT_BASES[index] + 1;
1590 if SHORT_BITS[index] != 0 {
1591 offset += self.bits.read_bits(SHORT_BITS[index])? as usize;
1592 }
1593 self.push_old_offset(offset);
1594 self.last_offset = offset;
1595 self.last_length = 2;
1596 self.copy_match(2, offset, output_size)?;
1597 }
1598 269 => {
1599 self.in_block = false;
1600 return Ok(());
1601 }
1602 270..=297 => {
1603 let length_slot = symbol - 270;
1604 let mut length = LENGTH_BASES[length_slot] + 3;
1605 if LENGTH_BITS[length_slot] != 0 {
1606 length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
1607 }
1608 let offset = self.read_offset()?;
1609 if offset >= 0x2000 {
1610 length += 1;
1611 }
1612 if offset >= 0x40000 {
1613 length += 1;
1614 }
1615 self.push_old_offset(offset);
1616 self.last_offset = offset;
1617 self.last_length = length;
1618 self.copy_match(length, offset, output_size)?;
1619 }
1620 _ => return Err(Error::InvalidData("RAR 2.0 invalid main symbol")),
1621 }
1622 }
1623 Ok(())
1624 }
1625
1626 fn decode_audio_byte(&mut self) -> Result<()> {
1627 let symbol = self.audio_tables[self.cur_channel].decode(&mut self.bits)?;
1628 if symbol == 256 {
1629 self.in_block = false;
1630 return Ok(());
1631 }
1632 if symbol > 256 {
1633 return Err(Error::InvalidData("RAR 2.0 invalid audio symbol"));
1634 }
1635 let byte = self.decode_audio(symbol as u8);
1636 self.output.push(byte);
1637 self.cur_channel += 1;
1638 if self.cur_channel == self.channels {
1639 self.cur_channel = 0;
1640 }
1641 Ok(())
1642 }
1643
1644 fn decode_audio(&mut self, delta: u8) -> u8 {
1645 let state = &mut self.audio[self.cur_channel];
1646 state.byte_count = state.byte_count.wrapping_add(1);
1647 state.d4 = state.d3;
1648 state.d3 = state.d2;
1649 state.d2 = state.last_delta - state.d1;
1650 state.d1 = state.last_delta;
1651
1652 let predicted = 8 * state.last_char
1653 + state.k[0] * state.d1
1654 + state.k[1] * state.d2
1655 + state.k[2] * state.d3
1656 + state.k[3] * state.d4
1657 + state.k[4] * self.channel_delta;
1658 let predicted = (predicted >> 3) & 0xff;
1659 let byte = predicted.wrapping_sub(delta as i32) as u8;
1660
1661 let d = (delta as i8 as i32) << 3;
1662 state.dif[0] = state.dif[0].wrapping_add(d.unsigned_abs());
1663 state.dif[1] = state.dif[1].wrapping_add((d - state.d1).unsigned_abs());
1664 state.dif[2] = state.dif[2].wrapping_add((d + state.d1).unsigned_abs());
1665 state.dif[3] = state.dif[3].wrapping_add((d - state.d2).unsigned_abs());
1666 state.dif[4] = state.dif[4].wrapping_add((d + state.d2).unsigned_abs());
1667 state.dif[5] = state.dif[5].wrapping_add((d - state.d3).unsigned_abs());
1668 state.dif[6] = state.dif[6].wrapping_add((d + state.d3).unsigned_abs());
1669 state.dif[7] = state.dif[7].wrapping_add((d - state.d4).unsigned_abs());
1670 state.dif[8] = state.dif[8].wrapping_add((d + state.d4).unsigned_abs());
1671 state.dif[9] = state.dif[9].wrapping_add((d - self.channel_delta).unsigned_abs());
1672 state.dif[10] = state.dif[10].wrapping_add((d + self.channel_delta).unsigned_abs());
1673
1674 self.channel_delta = (byte.wrapping_sub(state.last_char as u8)) as i8 as i32;
1675 state.last_delta = self.channel_delta;
1676 state.last_char = byte as i32;
1677
1678 if state.byte_count & 0x1f == 0 {
1679 let mut min_dif = state.dif[0];
1680 let mut num_min_dif = 0usize;
1681 state.dif[0] = 0;
1682 for index in 1..state.dif.len() {
1683 if state.dif[index] < min_dif {
1684 min_dif = state.dif[index];
1685 num_min_dif = index;
1686 }
1687 state.dif[index] = 0;
1688 }
1689 match num_min_dif {
1690 1 if state.k[0] >= -16 => state.k[0] -= 1,
1691 2 if state.k[0] < 16 => state.k[0] += 1,
1692 3 if state.k[1] >= -16 => state.k[1] -= 1,
1693 4 if state.k[1] < 16 => state.k[1] += 1,
1694 5 if state.k[2] >= -16 => state.k[2] -= 1,
1695 6 if state.k[2] < 16 => state.k[2] += 1,
1696 7 if state.k[3] >= -16 => state.k[3] -= 1,
1697 8 if state.k[3] < 16 => state.k[3] += 1,
1698 9 if state.k[4] >= -16 => state.k[4] -= 1,
1699 10 if state.k[4] < 16 => state.k[4] += 1,
1700 _ => {}
1701 }
1702 }
1703
1704 byte
1705 }
1706
1707 fn read_offset(&mut self) -> Result<usize> {
1708 let slot = self.offsets.decode(&mut self.bits)?;
1709 if slot >= OFFSET_COUNT {
1710 return Err(Error::InvalidData("RAR 2.0 invalid offset slot"));
1711 }
1712 let mut offset = OFFSET_BASES[slot] + 1;
1713 if OFFSET_BITS[slot] != 0 {
1714 offset += self.bits.read_bits(OFFSET_BITS[slot])? as usize;
1715 }
1716 Ok(offset)
1717 }
1718
1719 fn copy_match(&mut self, length: usize, offset: usize, output_size: usize) -> Result<()> {
1720 let offset = if offset == 0 { 1 } else { offset };
1721 let current = self.current_pos();
1722 if offset > current {
1723 return Err(Error::InvalidData("RAR 2.0 match distance is out of range"));
1724 }
1725 for index in 0..length {
1726 if self.current_pos() >= output_size {
1727 self.pending_match = Some((length - index, offset));
1728 break;
1729 }
1730 let src = self.current_pos() - offset;
1731 let byte = *self
1732 .raw_byte(src)
1733 .ok_or(Error::InvalidData("RAR 2.0 match distance is out of range"))?;
1734 self.output.push(byte);
1735 }
1736 Ok(())
1737 }
1738
1739 fn drain_pending_match(&mut self, output_size: usize) -> Result<()> {
1740 let Some((length, offset)) = self.pending_match.take() else {
1741 return Ok(());
1742 };
1743 self.copy_match(length, offset, output_size)
1744 }
1745
1746 fn read_last_tables(&mut self) -> Result<()> {
1747 if self.bits.remaining_bytes_from_current() < 5 {
1748 return Ok(());
1749 }
1750 if self.audio_block {
1751 if self.audio_tables[self.cur_channel].decode(&mut self.bits)? == 256 {
1752 self.read_tables()?;
1753 self.in_block = true;
1754 }
1755 } else if self.main.decode(&mut self.bits)? == 269 {
1756 self.read_tables()?;
1757 self.in_block = true;
1758 }
1759 Ok(())
1760 }
1761
1762 fn push_old_offset(&mut self, offset: usize) {
1763 self.old_offsets[3] = self.old_offsets[2];
1764 self.old_offsets[2] = self.old_offsets[1];
1765 self.old_offsets[1] = self.old_offsets[0];
1766 self.old_offsets[0] = offset;
1767 }
1768
1769 fn current_pos(&self) -> usize {
1770 self.base_offset + self.output.len()
1771 }
1772
1773 fn raw_byte(&self, position: usize) -> Option<&u8> {
1774 self.output.get(position.checked_sub(self.base_offset)?)
1775 }
1776
1777 fn raw_range(&self, start: usize, end: usize) -> Result<&[u8]> {
1778 if start < self.base_offset || end < start {
1779 return Err(Error::InvalidData(
1780 "RAR 2.0 retained history is unavailable",
1781 ));
1782 }
1783 let rel_start = start - self.base_offset;
1784 let rel_end = end - self.base_offset;
1785 self.output
1786 .get(rel_start..rel_end)
1787 .ok_or(Error::InvalidData(
1788 "RAR 2.0 retained history is unavailable",
1789 ))
1790 }
1791
1792 fn trim_history(&mut self, flushed_pos: usize, current_pos: usize) {
1793 let keep_from = current_pos.saturating_sub(MAX_HISTORY).min(flushed_pos);
1794 if keep_from <= self.base_offset {
1795 return;
1796 }
1797 let drain = keep_from - self.base_offset;
1798 self.output.drain(..drain);
1799 self.base_offset = keep_from;
1800 }
1801}
1802
1803impl Default for Unpack20 {
1804 fn default() -> Self {
1805 Self::new()
1806 }
1807}
1808
1809#[derive(Debug, Clone, Copy, Default)]
1810struct AudioState {
1811 k: [i32; 5],
1812 d1: i32,
1813 d2: i32,
1814 d3: i32,
1815 d4: i32,
1816 last_delta: i32,
1817 last_char: i32,
1818 byte_count: u32,
1819 dif: [u32; 11],
1820}
1821
1822fn fill_levels(levels: &mut [u8], pos: &mut usize, count: usize, value: u8) -> Result<()> {
1823 let end = pos
1824 .checked_add(count)
1825 .ok_or(Error::InvalidData("RAR 2.0 table run overflows"))?;
1826 let end = end.min(levels.len());
1827 for item in &mut levels[*pos..end] {
1828 *item = value;
1829 }
1830 *pos = end;
1831 Ok(())
1832}
1833
1834#[derive(Debug, Clone)]
1835struct Huffman {
1836 symbols: Vec<HuffmanSymbol>,
1837 first_code: [u16; 16],
1838 first_index: [usize; 16],
1839 counts: [u16; 16],
1840}
1841
1842#[derive(Debug, Clone)]
1843struct HuffmanSymbol {
1844 code: u16,
1845 len: u8,
1846 symbol: usize,
1847}
1848
1849impl Huffman {
1850 fn empty() -> Self {
1851 Self {
1852 symbols: Vec::new(),
1853 first_code: [0; 16],
1854 first_index: [0; 16],
1855 counts: [0; 16],
1856 }
1857 }
1858
1859 fn from_lengths(lengths: &[u8]) -> Result<Self> {
1860 let mut count = [0u16; 16];
1861 for &len in lengths {
1862 if len > 15 {
1863 return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1864 }
1865 if len != 0 {
1866 count[len as usize] += 1;
1867 }
1868 }
1869 if count.iter().all(|&value| value == 0) {
1870 return Ok(Self::empty());
1871 }
1872 validate_huffman_counts(&count)?;
1873
1874 let mut first_code = [0u16; 16];
1875 let mut next_code = [0u16; 16];
1876 let mut code = 0u16;
1877 for len in 1..=15 {
1878 code = (code + count[len - 1]) << 1;
1879 first_code[len] = code;
1880 next_code[len] = code;
1881 }
1882
1883 let mut first_index = [0usize; 16];
1884 let mut index = 0usize;
1885 for len in 1..=15 {
1886 first_index[len] = index;
1887 index += usize::from(count[len]);
1888 }
1889
1890 let mut symbols = Vec::new();
1891 for (symbol, &len) in lengths.iter().enumerate() {
1892 if len == 0 {
1893 continue;
1894 }
1895 let code = next_code[len as usize];
1896 next_code[len as usize] += 1;
1897 symbols.push(HuffmanSymbol { code, len, symbol });
1898 }
1899 symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
1900 Ok(Self {
1901 symbols,
1902 first_code,
1903 first_index,
1904 counts: count,
1905 })
1906 }
1907
1908 fn decode(&self, bits: &mut BitReader) -> Result<usize> {
1909 let mut code = 0u16;
1910 if self.symbols.is_empty() {
1911 return Err(Error::InvalidData("RAR 2.0 empty Huffman table"));
1912 }
1913 for len in 1..=15 {
1914 code = (code << 1) | bits.read_bit()? as u16;
1915 let count = self.counts[len];
1916 if count != 0 {
1917 let first = self.first_code[len];
1918 let offset = code.wrapping_sub(first);
1919 if offset < count {
1920 let index = self.first_index[len] + usize::from(offset);
1921 return Ok(self.symbols[index].symbol);
1922 }
1923 }
1924 }
1925 Err(Error::InvalidData("RAR 2.0 invalid Huffman code"))
1926 }
1927}
1928
1929fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
1930 let mut available = 1i32;
1931 for &len_count in count.iter().skip(1) {
1932 available = (available << 1) - i32::from(len_count);
1933 if available < 0 {
1934 return Err(Error::InvalidData("RAR 2.0 oversubscribed Huffman table"));
1935 }
1936 }
1937 Ok(())
1938}
1939
1940#[derive(Debug, Clone)]
1941struct BitReader {
1942 input: Vec<u8>,
1943 bit_pos: usize,
1944}
1945
1946impl BitReader {
1947 fn new() -> Self {
1948 Self {
1949 input: Vec::new(),
1950 bit_pos: 0,
1951 }
1952 }
1953
1954 fn append(&mut self, input: &[u8]) {
1955 self.compact();
1956 self.input.extend_from_slice(input);
1957 }
1958
1959 fn compact(&mut self) {
1960 let bytes = self.bit_pos / 8;
1961 if bytes == 0 {
1962 return;
1963 }
1964 self.input.drain(..bytes);
1965 self.bit_pos -= bytes * 8;
1966 }
1967
1968 fn read_bit(&mut self) -> Result<u8> {
1969 self.read_bits(1).map(|value| value as u8)
1970 }
1971
1972 fn read_bits(&mut self, count: u8) -> Result<u32> {
1973 let value = self.peek_bits(count)?;
1974 self.bit_pos += count as usize;
1975 Ok(value)
1976 }
1977
1978 fn peek_bits(&self, count: u8) -> Result<u32> {
1979 if count > 24 {
1980 return Err(Error::InvalidData("RAR 2.0 bit read is too wide"));
1981 }
1982 let mut value = 0u32;
1983 for i in 0..count as usize {
1984 let bit_index = self.bit_pos + i;
1985 let byte = *self.input.get(bit_index / 8).ok_or(Error::NeedMoreInput)?;
1986 let bit = (byte >> (7 - (bit_index % 8))) & 1;
1987 value = (value << 1) | bit as u32;
1988 }
1989 Ok(value)
1990 }
1991
1992 fn remaining_bytes_from_current(&self) -> usize {
1993 self.input.len().saturating_sub(self.bit_pos / 8)
1994 }
1995}
1996
1997#[derive(Default)]
1998struct BitWriter {
1999 bytes: Vec<u8>,
2000 bit_pos: usize,
2001}
2002
2003impl BitWriter {
2004 fn write_bits(&mut self, value: u32, count: u8) {
2005 for shift in (0..count).rev() {
2006 self.write_bit(((value >> shift) & 1) != 0);
2007 }
2008 }
2009
2010 fn write_bit(&mut self, bit: bool) {
2011 if self.bit_pos.is_multiple_of(8) {
2012 self.bytes.push(0);
2013 }
2014 if bit {
2015 let shift = 7 - (self.bit_pos % 8);
2016 *self.bytes.last_mut().unwrap() |= 1 << shift;
2017 }
2018 self.bit_pos += 1;
2019 }
2020
2021 fn finish(self) -> Vec<u8> {
2022 self.bytes
2023 }
2024}
2025
2026#[cfg(test)]
2027mod tests {
2028 use super::{
2029 encode_tokens, unpack20_decode, unpack20_encode_literals, BitWriter, EncodeOptions,
2030 EncodeToken, Error, Huffman, Unpack20, Unpack20Encoder,
2031 };
2032
2033 const AUTOREJ_PACKED: &[u8] = &[
2034 0x09, 0x14, 0x0c, 0x94, 0x00, 0x00, 0x00, 0x00, 0x00, 0xce, 0xf8, 0x1f, 0xc1, 0xe6, 0x05,
2035 0xfc, 0x39, 0xc3, 0x50, 0x65, 0x08, 0x41, 0x94, 0xc4, 0x1d, 0xf3, 0xcd, 0x0d, 0x8e, 0x20,
2036 0xf5, 0x9d, 0x8e, 0x76, 0x1d, 0xc5, 0x19, 0xde, 0x16, 0x5b, 0x52, 0xb8, 0x8e, 0x75, 0xcd,
2037 0xaf, 0x1f, 0xfc, 0x9e, 0xf7, 0x00, 0x01, 0xbe, 0x90,
2038 ];
2039
2040 #[test]
2041 fn decodes_rar20_lz_member() {
2042 assert_eq!(
2043 unpack20_decode(AUTOREJ_PACKED, expected_text().len()).unwrap(),
2044 expected_text()
2045 );
2046 }
2047
2048 #[test]
2049 fn rejects_oversubscribed_rar20_huffman_tables() {
2050 assert!(matches!(
2051 Huffman::from_lengths(&[1, 1, 1]),
2052 Err(Error::InvalidData("RAR 2.0 oversubscribed Huffman table"))
2053 ));
2054 }
2055
2056 #[test]
2057 fn decode_member_from_reader_accepts_incremental_input() {
2058 struct TinyReader<'a> {
2059 input: &'a [u8],
2060 }
2061
2062 impl std::io::Read for TinyReader<'_> {
2063 fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
2064 if self.input.is_empty() {
2065 return Ok(0);
2066 }
2067 let len = self.input.len().min(out.len()).min(3);
2068 out[..len].copy_from_slice(&self.input[..len]);
2069 self.input = &self.input[len..];
2070 Ok(len)
2071 }
2072 }
2073
2074 let mut decoder = Unpack20::new();
2075 let mut reader = TinyReader {
2076 input: AUTOREJ_PACKED,
2077 };
2078 let mut output = Vec::new();
2079 decoder
2080 .decode_member_from_reader(&mut reader, expected_text().len(), &mut output)
2081 .unwrap();
2082
2083 assert_eq!(output, expected_text());
2084 }
2085
2086 #[test]
2087 fn decodes_synthetic_audio_block() {
2088 let packed = synthetic_audio_block(8);
2089 let mut decoder = Unpack20::new();
2090
2091 assert_eq!(decoder.decode_member(&packed, 8).unwrap(), vec![0; 8]);
2092 }
2093
2094 #[test]
2095 fn audio_encoder_round_trips_interleaved_pcm_like_payload() {
2096 let input = interleaved_pcm_like_payload();
2097 let packed = super::encode_audio_member(&input, 4).unwrap();
2098 let decoded = unpack20_decode(&packed, input.len()).unwrap();
2099
2100 assert_eq!(decoded, input);
2101 }
2102
2103 #[test]
2104 fn auto_encoder_uses_audio_when_it_beats_lz() {
2105 let input = interleaved_pcm_like_payload();
2106 let lz = unpack20_encode_literals(&input).unwrap();
2107 let auto = super::unpack20_encode_auto(&input).unwrap();
2108 let decoded = unpack20_decode(&auto, input.len()).unwrap();
2109
2110 assert!(auto.len() < lz.len());
2111 assert_eq!(decoded, input);
2112 }
2113
2114 #[test]
2115 fn default_encode_options_match_legacy_entry_points() {
2116 let input = b"rar20 option plumbing preserves default output ".repeat(128);
2117 assert_eq!(
2118 unpack20_encode_literals(&input).unwrap(),
2119 super::unpack20_encode_literals_with_options(&input, EncodeOptions::default()).unwrap()
2120 );
2121 assert_eq!(
2122 super::unpack20_encode_auto(&input).unwrap(),
2123 super::unpack20_encode_auto_with_options(&input, EncodeOptions::default()).unwrap()
2124 );
2125
2126 let first = b"solid rar20 option seed ".repeat(64);
2127 let second = b"solid rar20 option seed with suffix ".repeat(32);
2128 let mut legacy = Unpack20Encoder::new();
2129 let mut explicit = Unpack20Encoder::with_options(EncodeOptions::default());
2130 assert_eq!(
2131 legacy.encode_member(&first).unwrap(),
2132 explicit.encode_member(&first).unwrap()
2133 );
2134 assert_eq!(
2135 legacy.encode_member(&second).unwrap(),
2136 explicit.encode_member(&second).unwrap()
2137 );
2138 }
2139
2140 #[test]
2141 fn encode_options_can_disable_fresh_lz_matches() {
2142 let input = b"abcdefabcdefabcdefabcdef";
2143 let default_tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2144 let literalish_tokens = encode_tokens(input, &[], EncodeOptions::new(0), None);
2145
2146 assert!(default_tokens
2147 .iter()
2148 .any(|token| matches!(token, EncodeToken::Match { .. })));
2149 assert!(!literalish_tokens
2150 .iter()
2151 .any(|token| matches!(token, EncodeToken::Match { .. })));
2152 }
2153
2154 #[test]
2155 fn table_level_encoder_uses_rar20_run_symbols() {
2156 let lengths = [0, 0, 0, 0, 5, 5, 5, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2];
2157 let tokens = super::encode_level_tokens(&lengths);
2158
2159 assert_eq!(
2160 tokens,
2161 vec![
2162 super::LevelToken::zero_run_short(4),
2163 super::LevelToken::plain(5),
2164 super::LevelToken::repeat_previous(3),
2165 super::LevelToken::plain(7),
2166 super::LevelToken::zero_run_short(10),
2167 super::LevelToken::plain(2),
2168 ]
2169 );
2170 }
2171
2172 #[test]
2173 fn decodes_back_to_back_fresh_audio_blocks() {
2174 let mut bits = BitWriter::default();
2181 write_fresh_audio_block(&mut bits, 4, true);
2182 write_fresh_audio_block(&mut bits, 4, false);
2183 let packed = bits.finish();
2184
2185 let mut decoder = Unpack20::new();
2186
2187 assert_eq!(decoder.decode_member(&packed, 8).unwrap(), vec![0; 8]);
2188 }
2189
2190 fn expected_text() -> Vec<u8> {
2191 b"Hello text not audio.\r\n".repeat(100)
2192 }
2193
2194 fn interleaved_pcm_like_payload() -> Vec<u8> {
2195 let mut input = Vec::new();
2196 for sample in 0..8192i16 {
2197 let left = sample.wrapping_mul(3).wrapping_add(200);
2198 let right = sample.wrapping_mul(3).wrapping_sub(200);
2199 input.extend_from_slice(&left.to_le_bytes());
2200 input.extend_from_slice(&right.to_le_bytes());
2201 }
2202 input
2203 }
2204
2205 fn synthetic_audio_block(samples: usize) -> Vec<u8> {
2206 let mut bits = BitWriter::default();
2207
2208 bits.write_bits(0b10, 2); bits.write_bits(0, 2); for symbol in 0..19 {
2212 let len = if symbol == 1 || symbol == 18 { 1 } else { 0 };
2213 bits.write_bits(len, 4);
2214 }
2215
2216 bits.write_bit(false); bits.write_bit(true); bits.write_bits(127, 7);
2219 bits.write_bit(true); bits.write_bits(107, 7);
2221
2222 for _ in 0..samples {
2223 bits.write_bit(false); }
2225
2226 bits.finish()
2227 }
2228
2229 fn write_fresh_audio_block(bits: &mut BitWriter, samples: usize, emit_end_sentinel: bool) {
2230 bits.write_bits(0b10, 2); bits.write_bits(0, 2); for symbol in 0..19 {
2234 let len = if symbol == 1 || symbol == 18 { 1 } else { 0 };
2235 bits.write_bits(len, 4);
2236 }
2237
2238 bits.write_bit(false); bits.write_bit(true); bits.write_bits(127, 7);
2242 bits.write_bit(true); bits.write_bits(106, 7);
2244 bits.write_bit(false); for _ in 0..samples {
2247 bits.write_bit(false); }
2249 if emit_end_sentinel {
2250 bits.write_bit(true); }
2252 }
2253
2254 #[test]
2255 fn literal_encoder_round_trips_rar20_lz_blocks() {
2256 let input = b"literal-only RAR 2.0 baseline\nwith repeated text literal-only\n";
2257 let packed = unpack20_encode_literals(input).unwrap();
2258
2259 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2260 }
2261
2262 #[test]
2263 fn encoder_emits_rar20_offset_one_matches_for_repeated_bytes() {
2264 let input = b"A".repeat(1024);
2265 let packed = unpack20_encode_literals(&input).unwrap();
2266
2267 assert!(packed.len() < input.len() / 4);
2268 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2269 }
2270
2271 #[test]
2272 fn encoder_emits_rar20_dictionary_matches_for_repeated_sequences() {
2273 let input = b"abc123xyz-".repeat(128);
2274 let packed = unpack20_encode_literals(&input).unwrap();
2275
2276 assert!(packed.len() < input.len() / 2);
2277 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2278 }
2279
2280 #[test]
2281 fn encoder_emits_rar20_repeat_last_matches_for_regular_streams() {
2282 let input = b"\x00\x01\x02\x03".repeat(4096);
2283 let tokens = encode_tokens(&input, &[], EncodeOptions::default(), None);
2284 let packed = unpack20_encode_literals(&input).unwrap();
2285
2286 assert!(tokens
2287 .iter()
2288 .any(|token| matches!(token, EncodeToken::RepeatLast)));
2289 assert!(packed.len() < input.len() / 8);
2290 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2291 }
2292
2293 #[test]
2294 fn encoder_emits_rar20_minimum_length_fresh_matches() {
2295 let input = b"abcabc";
2296 let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2297 let packed = unpack20_encode_literals(input).unwrap();
2298
2299 assert!(matches!(
2300 tokens.as_slice(),
2301 [
2302 EncodeToken::Literal(b'a'),
2303 EncodeToken::Literal(b'b'),
2304 EncodeToken::Literal(b'c'),
2305 EncodeToken::Match {
2306 length: 3,
2307 offset: 3
2308 }
2309 ]
2310 ));
2311 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2312 }
2313
2314 #[test]
2315 fn encoder_emits_rar20_short_offset_matches() {
2316 let input = b"abab";
2317 let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2318 let packed = unpack20_encode_literals(input).unwrap();
2319
2320 assert!(matches!(
2321 tokens.as_slice(),
2322 [
2323 EncodeToken::Literal(b'a'),
2324 EncodeToken::Literal(b'b'),
2325 EncodeToken::ShortOffset { offset: 2 }
2326 ]
2327 ));
2328 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2329 }
2330
2331 #[test]
2332 fn encoder_emits_rar20_old_offset_matches() {
2333 let input = b"abcdabcdXYZXYZwxyzwxyz";
2334 let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2335 let packed = unpack20_encode_literals(input).unwrap();
2336
2337 assert!(tokens
2338 .iter()
2339 .any(|token| matches!(token, EncodeToken::OldOffset { .. })));
2340 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2341 }
2342
2343 #[test]
2344 fn encoder_finds_rar20_matches_beyond_near_offsets() {
2345 let phrase = b"long-distance repeated phrase for rar20 match finder.";
2346 let mut input = Vec::new();
2347 input.extend_from_slice(phrase);
2348 input.extend(std::iter::repeat_n(0, 300 * 1024));
2349 input.extend_from_slice(phrase);
2350 input.extend_from_slice(phrase);
2351 let tokens = encode_tokens(&input, &[], EncodeOptions::default(), None);
2352 let packed = unpack20_encode_literals(&input).unwrap();
2353
2354 assert!(tokens.iter().any(|token| matches!(
2355 token,
2356 EncodeToken::Match { offset, .. } if *offset > 0x40000
2357 )));
2358 assert!(packed.len() < input.len());
2359 let decoded = unpack20_decode(&packed, input.len()).unwrap();
2360 assert!(
2361 decoded == input,
2362 "RAR 2.0 long-distance match round-trip failed"
2363 );
2364 }
2365
2366 #[test]
2367 fn solid_encoder_emits_rar20_matches_against_previous_member_history() {
2368 let first = b"solid rar20 shared phrase alpha beta gamma ".repeat(4);
2369 let second = b"solid rar20 shared phrase alpha beta gamma ".repeat(2);
2370 let independent = unpack20_encode_literals(&second).unwrap();
2371 let mut encoder = Unpack20Encoder::new();
2372 let first_packed = encoder.encode_member(&first).unwrap();
2373 let second_packed = encoder.encode_member(&second).unwrap();
2374
2375 assert!(second_packed.len() < independent.len());
2376 let mut decoder = Unpack20::new();
2377 assert_eq!(
2378 decoder.decode_member(&first_packed, first.len()).unwrap(),
2379 first
2380 );
2381 assert_eq!(
2382 decoder.decode_member(&second_packed, second.len()).unwrap(),
2383 second
2384 );
2385 }
2386
2387 #[test]
2388 fn solid_encoder_reuses_rar20_tables_at_member_boundary() {
2389 let first: Vec<_> = (0u8..=255).cycle().take(4096).collect();
2390 let second = b"short literal member after reused rar20 table boundary\n";
2391 let independent = unpack20_encode_literals(second).unwrap();
2392 let mut encoder = Unpack20Encoder::new();
2393 let first_packed = encoder.encode_member(&first).unwrap();
2394 let second_packed = encoder.encode_member(second).unwrap();
2395
2396 assert!(second_packed.len() < independent.len());
2397 let mut decoder = Unpack20::new();
2398 assert_eq!(
2399 decoder.decode_member(&first_packed, first.len()).unwrap(),
2400 first
2401 );
2402 assert_eq!(
2403 decoder.decode_member(&second_packed, second.len()).unwrap(),
2404 second
2405 );
2406 }
2407
2408 #[test]
2409 fn solid_encoder_matches_immediately_after_rar20_table_boundary() {
2410 let phrase = b"rar20 table boundary match phrase with enough bytes ";
2411 let first = phrase.repeat(128);
2412 let second = phrase.repeat(8);
2413 let independent = unpack20_encode_literals(&second).unwrap();
2414 let mut encoder = Unpack20Encoder::new();
2415 let first_packed = encoder.encode_member(&first).unwrap();
2416 let second_packed = encoder.encode_member(&second).unwrap();
2417 let tokens = encode_tokens(&second, &first, EncodeOptions::default(), None);
2418
2419 assert!(matches!(tokens.first(), Some(EncodeToken::Match { .. })));
2420 assert!(second_packed.len() < independent.len());
2421 let mut decoder = Unpack20::new();
2422 assert_eq!(
2423 decoder.decode_member(&first_packed, first.len()).unwrap(),
2424 first
2425 );
2426 assert_eq!(
2427 decoder.decode_member(&second_packed, second.len()).unwrap(),
2428 second
2429 );
2430 }
2431
2432 #[test]
2433 fn solid_encoder_carries_rar20_history_across_multiple_members() {
2434 let first = b"rar20 multi member solid seed ".repeat(512);
2435 let second = b"rar20 multi member solid seed with middle tail ".repeat(128);
2436 let third = b"with middle tail ".repeat(64);
2437 let independent = unpack20_encode_literals(&third).unwrap();
2438 let mut encoder = Unpack20Encoder::new();
2439 let first_packed = encoder.encode_member(&first).unwrap();
2440 let second_packed = encoder.encode_member(&second).unwrap();
2441 let third_packed = encoder.encode_member(&third).unwrap();
2442
2443 assert!(third_packed.len() < independent.len());
2444 let mut decoder = Unpack20::new();
2445 assert_eq!(
2446 decoder.decode_member(&first_packed, first.len()).unwrap(),
2447 first
2448 );
2449 assert_eq!(
2450 decoder.decode_member(&second_packed, second.len()).unwrap(),
2451 second
2452 );
2453 assert_eq!(
2454 decoder.decode_member(&third_packed, third.len()).unwrap(),
2455 third
2456 );
2457 }
2458
2459 #[test]
2460 fn decode_member_to_streams_decoded_payload_through_writer_sink() {
2461 let input = b"abcabcabcabcabcabcabcabcabcabcabcabc";
2462 let packed = unpack20_encode_literals(input).unwrap();
2463
2464 let mut decoder = Unpack20::new();
2465 let mut sink = Vec::new();
2466 decoder
2467 .decode_member_to(&packed, input.len(), &mut sink)
2468 .unwrap();
2469 assert_eq!(sink, input);
2470
2471 struct FailingWriter;
2474 impl std::io::Write for FailingWriter {
2475 fn write(&mut self, _buf: &[u8]) -> std::io::Result<usize> {
2476 Err(std::io::Error::other("disk full"))
2477 }
2478 fn flush(&mut self) -> std::io::Result<()> {
2479 Ok(())
2480 }
2481 }
2482 let mut decoder = Unpack20::new();
2483 let err = decoder
2484 .decode_member_to(&packed, input.len(), &mut FailingWriter)
2485 .unwrap_err();
2486 assert_eq!(err, Error::InvalidData("RAR 2.0 output write failed"));
2487 }
2488}