1use crate::{huffman, Error, Result};
2use std::io::{Read, Write};
3
4const MAIN_COUNT: usize = 298;
5const OFFSET_COUNT: usize = 48;
6const LENGTH_COUNT: usize = 28;
7const LEVEL_COUNT: usize = 19;
8const TABLE_COUNT: usize = MAIN_COUNT + OFFSET_COUNT + LENGTH_COUNT;
9const AUDIO_COUNT: usize = 257;
10const MAX_CHANNELS: usize = 4;
11const OLD_LEVEL_COUNT: usize = AUDIO_COUNT * MAX_CHANNELS;
12const MAX_HISTORY: usize = 1024 * 1024;
13
14const LENGTH_BASES: [usize; LENGTH_COUNT] = [
15 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
16 160, 192, 224,
17];
18const LENGTH_BITS: [u8; LENGTH_COUNT] = [
19 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
20];
21const OFFSET_BASES: [usize; OFFSET_COUNT] = [
22 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
23 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
24 262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
25];
26const OFFSET_BITS: [u8; OFFSET_COUNT] = [
27 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
28 13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
29];
30const SHORT_BASES: [usize; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
31const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
32const MAX_ENCODER_MATCH_OFFSET: usize = MAX_HISTORY;
33const MAX_ENCODER_MATCH_LENGTH: usize = 258;
34const MATCH_HASH_BUCKETS: usize = 4096;
35const MAX_MATCH_CANDIDATES: usize = 256;
36
37pub fn unpack20_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
38 let mut decoder = Unpack20::new();
39 decoder.decode_member(input, output_size)
40}
41
42pub fn unpack20_encode_literals(input: &[u8]) -> Result<Vec<u8>> {
43 unpack20_encode_literals_with_options(input, EncodeOptions::default())
44}
45
46pub fn unpack20_encode_literals_with_options(
47 input: &[u8],
48 options: EncodeOptions,
49) -> Result<Vec<u8>> {
50 encode_member(input, &[], None, options)
51}
52
53pub fn unpack20_encode_auto(input: &[u8]) -> Result<Vec<u8>> {
54 unpack20_encode_auto_with_options(input, EncodeOptions::default())
55}
56
57pub fn unpack20_encode_auto_with_options(input: &[u8], options: EncodeOptions) -> Result<Vec<u8>> {
58 let lz = unpack20_encode_literals_with_options(input, options)?;
59 let mut best = lz;
60 if options.try_audio {
61 for channels in 1..=MAX_CHANNELS {
62 if input.len() < channels * 64 {
63 continue;
64 }
65 let audio = encode_audio_member(input, channels)?;
66 if audio.len() < best.len() {
67 best = audio;
68 }
69 }
70 }
71 Ok(best)
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq)]
75#[non_exhaustive]
76pub struct EncodeOptions {
77 pub max_match_candidates: usize,
78 pub max_match_distance: usize,
79 pub lazy_matching: bool,
80 pub lazy_lookahead: usize,
81 pub try_audio: bool,
82}
83
84impl EncodeOptions {
85 pub const fn new(max_match_candidates: usize) -> Self {
86 Self {
87 max_match_candidates,
88 max_match_distance: MAX_ENCODER_MATCH_OFFSET,
89 lazy_matching: false,
90 lazy_lookahead: 1,
91 try_audio: true,
92 }
93 }
94
95 pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
96 self.max_match_distance = if distance > MAX_ENCODER_MATCH_OFFSET {
97 MAX_ENCODER_MATCH_OFFSET
98 } else {
99 distance
100 };
101 self
102 }
103
104 pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
105 self.lazy_matching = enabled;
106 self
107 }
108
109 pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
110 self.lazy_lookahead = bytes;
111 self
112 }
113
114 pub const fn with_try_audio(mut self, enabled: bool) -> Self {
115 self.try_audio = enabled;
116 self
117 }
118}
119
120impl Default for EncodeOptions {
121 fn default() -> Self {
122 Self::new(MAX_MATCH_CANDIDATES)
123 }
124}
125
126#[derive(Debug, Clone, Default)]
127pub struct Unpack20Encoder {
128 history: Vec<u8>,
129 table: Option<FixedEncodeTable>,
130 options: EncodeOptions,
131}
132
133impl Unpack20Encoder {
134 pub fn new() -> Self {
135 Self::default()
136 }
137
138 pub fn with_options(options: EncodeOptions) -> Self {
139 Self {
140 history: Vec::new(),
141 table: None,
142 options,
143 }
144 }
145
146 pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
147 if input.is_empty() {
148 return Ok(Vec::new());
149 }
150 let table = match self.table {
151 Some(table) => table,
152 None => {
153 let table = FixedEncodeTable::new()?;
154 self.table = Some(table);
155 table
156 }
157 };
158 let packed = encode_member(input, &self.history, Some(table), self.options)?;
159 self.remember(input);
160 Ok(packed)
161 }
162
163 fn remember(&mut self, input: &[u8]) {
164 self.history.extend_from_slice(input);
165 let keep_from = self
166 .history
167 .len()
168 .saturating_sub(self.options.max_match_distance);
169 if keep_from != 0 {
170 self.history.drain(..keep_from);
171 }
172 }
173}
174
175fn encode_member(
176 input: &[u8],
177 history: &[u8],
178 fixed_table: Option<FixedEncodeTable>,
179 options: EncodeOptions,
180) -> Result<Vec<u8>> {
181 if input.is_empty() {
182 return Ok(Vec::new());
183 }
184
185 let tokens = encode_tokens(input, history, options, None);
186 let table_lengths = table_lengths_for_tokens(&tokens, fixed_table)?;
187 let packed = encode_member_with_tables(&tokens, history, fixed_table, &table_lengths)?;
188 if fixed_table.is_some() {
189 return Ok(packed);
190 }
191
192 let cost_model = CostModel::new(&table_lengths);
193 let refined_tokens = encode_tokens(input, history, options, Some(&cost_model));
194 if refined_tokens == tokens {
195 return Ok(packed);
196 }
197 let refined_table_lengths = table_lengths_for_tokens(&refined_tokens, fixed_table)?;
198 let refined_packed = encode_member_with_tables(
199 &refined_tokens,
200 history,
201 fixed_table,
202 &refined_table_lengths,
203 )?;
204 if refined_packed.len() < packed.len() {
205 Ok(refined_packed)
206 } else {
207 Ok(packed)
208 }
209}
210
211fn table_lengths_for_tokens(
212 tokens: &[EncodeToken],
213 fixed_table: Option<FixedEncodeTable>,
214) -> Result<[u8; TABLE_COUNT]> {
215 let mut main_frequencies = [0usize; MAIN_COUNT];
216 let mut offset_frequencies = [0usize; OFFSET_COUNT];
217 let mut length_frequencies = [0usize; LENGTH_COUNT];
218 for token in tokens {
219 match *token {
220 EncodeToken::Literal(byte) => main_frequencies[byte as usize] += 1,
221 EncodeToken::RepeatLast => main_frequencies[256] += 1,
222 EncodeToken::OldOffset {
223 index,
224 length,
225 offset,
226 } => {
227 main_frequencies[257 + index] += 1;
228 let (slot, _) = old_length_slot_for_match(length, offset)?;
229 length_frequencies[slot] += 1;
230 }
231 EncodeToken::ShortOffset { offset } => {
232 let (slot, _) = short_slot_for_match(offset)?;
233 main_frequencies[261 + slot] += 1;
234 }
235 EncodeToken::Match { length, offset } => {
236 let encoded_length = length.checked_sub(match_length_adjustment(offset)).ok_or(
237 Error::InvalidData("RAR 2.0 adjusted match length underflows"),
238 )?;
239 let (slot, _) = length_slot_for_match(encoded_length)?;
240 main_frequencies[270 + slot] += 1;
241 let (offset_slot, _) = offset_slot_for_match(offset)?;
242 offset_frequencies[offset_slot] += 1;
243 }
244 }
245 }
246 let mut table_lengths = [0u8; TABLE_COUNT];
247 let literal_len = if let Some(table) = fixed_table {
248 table.length
249 } else {
250 let main_symbol_count = main_frequencies
251 .iter()
252 .filter(|&&frequency| frequency != 0)
253 .count()
254 + offset_frequencies
255 .iter()
256 .filter(|&&frequency| frequency != 0)
257 .count()
258 + length_frequencies
259 .iter()
260 .filter(|&&frequency| frequency != 0)
261 .count();
262 literal_code_len(main_symbol_count)?
263 };
264
265 if fixed_table.is_some() {
266 for len in &mut table_lengths[..256] {
267 *len = literal_len;
268 }
269 table_lengths[256] = literal_len;
270 for len in &mut table_lengths[270..270 + LENGTH_COUNT] {
271 *len = literal_len;
272 }
273 for len in &mut table_lengths[257..269] {
274 *len = literal_len;
275 }
276 for len in &mut table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT] {
277 *len = literal_len;
278 }
279 for len in &mut table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT] {
280 *len = literal_len;
281 }
282 } else {
283 table_lengths[..MAIN_COUNT]
284 .copy_from_slice(&validated_lengths_for_frequencies(&main_frequencies, 15));
285 table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT]
286 .copy_from_slice(&validated_lengths_for_frequencies(&offset_frequencies, 15));
287 table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT]
288 .copy_from_slice(&validated_lengths_for_frequencies(&length_frequencies, 15));
289 }
290 Ok(table_lengths)
291}
292
293fn encode_member_with_tables(
294 tokens: &[EncodeToken],
295 history: &[u8],
296 fixed_table: Option<FixedEncodeTable>,
297 table_lengths: &[u8; TABLE_COUNT],
298) -> Result<Vec<u8>> {
299 let level_tokens = encode_table_level_tokens(table_lengths);
300 let level_lengths = level_code_lengths_for_tokens(&level_tokens);
301 let level_codes = canonical_codes(&level_lengths)?;
302 let main_codes = canonical_codes(&table_lengths[..MAIN_COUNT])?;
303
304 let mut bits = BitWriter::default();
305 if fixed_table.is_none() || history.is_empty() {
306 bits.write_bits(0, 2); for &len in &level_lengths {
308 bits.write_bits(len as u32, 4);
309 }
310 for token in level_tokens {
311 let code = level_codes[token.symbol].ok_or(Error::InvalidData(
312 "RAR 2.0 encoder missing level Huffman code",
313 ))?;
314 bits.write_bits(code.code as u32, code.len);
315 if token.extra_bits != 0 {
316 bits.write_bits(token.extra_value as u32, token.extra_bits);
317 }
318 }
319 }
320 let offset_codes = canonical_codes(&table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
321 let length_codes = canonical_codes(&table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT])?;
322 for token in tokens {
323 match *token {
324 EncodeToken::Literal(byte) => {
325 let code = main_codes[byte as usize].ok_or(Error::InvalidData(
326 "RAR 2.0 encoder missing literal Huffman code",
327 ))?;
328 bits.write_bits(code.code as u32, code.len);
329 }
330 EncodeToken::RepeatLast => {
331 let code = main_codes[256].ok_or(Error::InvalidData(
332 "RAR 2.0 encoder missing repeat-last Huffman code",
333 ))?;
334 bits.write_bits(code.code as u32, code.len);
335 }
336 EncodeToken::OldOffset {
337 index,
338 length,
339 offset,
340 } => {
341 let code = main_codes[257 + index].ok_or(Error::InvalidData(
342 "RAR 2.0 encoder missing old-offset Huffman code",
343 ))?;
344 bits.write_bits(code.code as u32, code.len);
345 let (slot, extra) = old_length_slot_for_match(length, offset)?;
346 let length_code = length_codes[slot].ok_or(Error::InvalidData(
347 "RAR 2.0 encoder missing old-offset length Huffman code",
348 ))?;
349 bits.write_bits(length_code.code as u32, length_code.len);
350 if LENGTH_BITS[slot] != 0 {
351 bits.write_bits(extra as u32, LENGTH_BITS[slot]);
352 }
353 }
354 EncodeToken::ShortOffset { offset } => {
355 let (slot, extra) = short_slot_for_match(offset)?;
356 let code = main_codes[261 + slot].ok_or(Error::InvalidData(
357 "RAR 2.0 encoder missing short-offset Huffman code",
358 ))?;
359 bits.write_bits(code.code as u32, code.len);
360 if SHORT_BITS[slot] != 0 {
361 bits.write_bits(extra as u32, SHORT_BITS[slot]);
362 }
363 }
364 EncodeToken::Match { length, offset } => {
365 let encoded_length = length.checked_sub(match_length_adjustment(offset)).ok_or(
366 Error::InvalidData("RAR 2.0 adjusted match length underflows"),
367 )?;
368 let (slot, extra) = length_slot_for_match(encoded_length)?;
369 let code = main_codes[270 + slot].ok_or(Error::InvalidData(
370 "RAR 2.0 encoder missing match Huffman code",
371 ))?;
372 bits.write_bits(code.code as u32, code.len);
373 if LENGTH_BITS[slot] != 0 {
374 bits.write_bits(extra as u32, LENGTH_BITS[slot]);
375 }
376 let (offset_slot, offset_extra) = offset_slot_for_match(offset)?;
377 let offset = offset_codes[offset_slot].ok_or(Error::InvalidData(
378 "RAR 2.0 encoder missing offset Huffman code",
379 ))?;
380 bits.write_bits(offset.code as u32, offset.len);
381 if OFFSET_BITS[offset_slot] != 0 {
382 bits.write_bits(offset_extra as u32, OFFSET_BITS[offset_slot]);
383 }
384 }
385 }
386 }
387 Ok(bits.finish())
388}
389
390#[derive(Debug, Clone, Copy)]
391struct FixedEncodeTable {
392 length: u8,
393}
394
395impl FixedEncodeTable {
396 fn new() -> Result<Self> {
397 Ok(Self {
398 length: literal_code_len(256 + LENGTH_COUNT + OFFSET_COUNT)?,
399 })
400 }
401}
402
403#[derive(Debug, Clone, Copy, PartialEq, Eq)]
404enum EncodeToken {
405 Literal(u8),
406 RepeatLast,
407 OldOffset {
408 index: usize,
409 length: usize,
410 offset: usize,
411 },
412 ShortOffset {
413 offset: usize,
414 },
415 Match {
416 length: usize,
417 offset: usize,
418 },
419}
420
421fn encode_tokens(
422 input: &[u8],
423 history: &[u8],
424 options: EncodeOptions,
425 cost_model: Option<&CostModel>,
426) -> Vec<EncodeToken> {
427 let mut tokens = Vec::new();
428 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
429 let history = &history[history.len().saturating_sub(options.max_match_distance)..];
430 let mut combined = Vec::with_capacity(history.len() + input.len());
431 combined.extend_from_slice(history);
432 combined.extend_from_slice(input);
433 for history_pos in 0..history.len().saturating_sub(2) {
434 insert_match_position(&combined, history_pos, &mut buckets);
435 }
436
437 let mut pos = history.len();
438 let end = combined.len();
439 let mut last_match = None;
440 let mut old_offsets = [0usize; 4];
441 while pos < end {
442 let selected = select_match(
443 &combined,
444 pos,
445 end,
446 &buckets,
447 options,
448 &old_offsets,
449 cost_model,
450 );
451 if let Some(selected) = selected {
452 let lazy = LazyMatchContext {
453 input: &combined,
454 end,
455 buckets: &buckets,
456 options,
457 old_offsets: &old_offsets,
458 cost_model,
459 };
460 if should_lazy_emit_literal(pos, selected, lazy) {
461 tokens.push(EncodeToken::Literal(combined[pos]));
462 insert_match_position(&combined, pos, &mut buckets);
463 pos += 1;
464 continue;
465 }
466 let (length, offset) = match selected {
467 SelectedMatch::Fresh { length, offset } => {
468 if last_match == Some((length, offset)) {
469 tokens.push(EncodeToken::RepeatLast);
470 } else {
471 tokens.push(EncodeToken::Match { length, offset });
472 last_match = Some((length, offset));
473 }
474 (length, offset)
475 }
476 SelectedMatch::OldOffset {
477 index,
478 length,
479 offset,
480 } => {
481 if last_match == Some((length, offset)) {
482 tokens.push(EncodeToken::RepeatLast);
483 } else {
484 tokens.push(EncodeToken::OldOffset {
485 index,
486 length,
487 offset,
488 });
489 last_match = Some((length, offset));
490 }
491 (length, offset)
492 }
493 SelectedMatch::ShortOffset { offset } => {
494 let length = 2;
495 tokens.push(EncodeToken::ShortOffset { offset });
496 last_match = Some((length, offset));
497 (length, offset)
498 }
499 };
500 push_old_offset(&mut old_offsets, offset);
501 for history_pos in pos..pos + length {
502 insert_match_position(&combined, history_pos, &mut buckets);
503 }
504 pos += length;
505 } else {
506 tokens.push(EncodeToken::Literal(combined[pos]));
507 insert_match_position(&combined, pos, &mut buckets);
508 pos += 1;
509 }
510 }
511 tokens
512}
513
514#[derive(Debug, Clone, Copy)]
515struct CostModel<'a> {
516 main: &'a [u8],
517 offsets: &'a [u8],
518 lengths: &'a [u8],
519}
520
521impl<'a> CostModel<'a> {
522 fn new(table_lengths: &'a [u8; TABLE_COUNT]) -> Self {
523 Self {
524 main: &table_lengths[..MAIN_COUNT],
525 offsets: &table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT],
526 lengths: &table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT],
527 }
528 }
529
530 fn selected_cost(self, selected: SelectedMatch) -> Option<usize> {
531 match selected {
532 SelectedMatch::Fresh { length, offset } => {
533 let encoded_length = length.checked_sub(match_length_adjustment(offset))?;
534 let (length_slot, _) = length_slot_for_match(encoded_length).ok()?;
535 let (offset_slot, _) = offset_slot_for_match(offset).ok()?;
536 Some(
537 usize::from(self.main[270 + length_slot])
538 + usize::from(LENGTH_BITS[length_slot])
539 + usize::from(self.offsets[offset_slot])
540 + usize::from(OFFSET_BITS[offset_slot]),
541 )
542 }
543 SelectedMatch::OldOffset {
544 index,
545 length,
546 offset,
547 } => {
548 let (length_slot, _) = old_length_slot_for_match(length, offset).ok()?;
549 Some(
550 usize::from(self.main[257 + index])
551 + usize::from(self.lengths[length_slot])
552 + usize::from(LENGTH_BITS[length_slot]),
553 )
554 }
555 SelectedMatch::ShortOffset { offset } => {
556 let (slot, _) = short_slot_for_match(offset).ok()?;
557 Some(usize::from(self.main[261 + slot]) + usize::from(SHORT_BITS[slot]))
558 }
559 }
560 }
561
562 fn selected_score(self, selected: SelectedMatch) -> Option<isize> {
563 let cost = self.selected_cost(selected)?;
564 Some(selected.length() as isize * 8 - cost as isize)
565 }
566}
567
568#[derive(Debug, Clone, Copy)]
569enum SelectedMatch {
570 Fresh {
571 length: usize,
572 offset: usize,
573 },
574 OldOffset {
575 index: usize,
576 length: usize,
577 offset: usize,
578 },
579 ShortOffset {
580 offset: usize,
581 },
582}
583
584impl SelectedMatch {
585 fn length(self) -> usize {
586 match self {
587 SelectedMatch::Fresh { length, .. } | SelectedMatch::OldOffset { length, .. } => length,
588 SelectedMatch::ShortOffset { .. } => 2,
589 }
590 }
591
592 fn score(self) -> isize {
593 let length_score = self.length() as isize * 8;
594 let cost = match self {
595 SelectedMatch::OldOffset { .. } | SelectedMatch::ShortOffset { .. } => 4,
596 SelectedMatch::Fresh { offset, .. } => 8 + OFFSET_BITS[offset_slot_index(offset)],
597 };
598 length_score - isize::from(cost)
599 }
600}
601
602fn select_match(
603 input: &[u8],
604 pos: usize,
605 end: usize,
606 buckets: &[Vec<usize>],
607 options: EncodeOptions,
608 old_offsets: &[usize; 4],
609 cost_model: Option<&CostModel<'_>>,
610) -> Option<SelectedMatch> {
611 let fresh = best_match(input, pos, end, buckets, options, cost_model)
612 .map(|(length, offset)| SelectedMatch::Fresh { length, offset });
613 let old = best_old_offset_match(input, pos, end, old_offsets, cost_model).map(
614 |(index, length, offset)| SelectedMatch::OldOffset {
615 index,
616 length,
617 offset,
618 },
619 );
620 if let Some(cost_model) = cost_model {
621 return [fresh, old, best_short_offset_match(input, pos, end)]
622 .into_iter()
623 .flatten()
624 .max_by_key(|&selected| {
625 (
626 cost_model.selected_score(selected).unwrap_or(isize::MIN),
627 selected.length(),
628 )
629 });
630 }
631
632 let fresh = fresh.and_then(|selected| match selected {
633 SelectedMatch::Fresh { length, offset } => Some((length, offset)),
634 _ => None,
635 });
636 let old = old.and_then(|selected| match selected {
637 SelectedMatch::OldOffset {
638 index,
639 length,
640 offset,
641 } => Some((index, length, offset)),
642 _ => None,
643 });
644 match (fresh, old) {
645 (Some((fresh_length, _)), Some((index, old_length, old_offset)))
646 if old_length + 1 >= fresh_length =>
647 {
648 Some(SelectedMatch::OldOffset {
649 index,
650 length: old_length,
651 offset: old_offset,
652 })
653 }
654 (Some((length, offset)), _) => Some(SelectedMatch::Fresh { length, offset }),
655 (None, Some((index, length, offset))) => Some(SelectedMatch::OldOffset {
656 index,
657 length,
658 offset,
659 }),
660 (None, None) => best_short_offset_match(input, pos, end),
661 }
662}
663
664struct LazyMatchContext<'a> {
665 input: &'a [u8],
666 end: usize,
667 buckets: &'a [Vec<usize>],
668 options: EncodeOptions,
669 old_offsets: &'a [usize; 4],
670 cost_model: Option<&'a CostModel<'a>>,
671}
672
673fn should_lazy_emit_literal(
674 pos: usize,
675 current: SelectedMatch,
676 context: LazyMatchContext<'_>,
677) -> bool {
678 if !context.options.lazy_matching || pos + 1 >= context.end {
679 return false;
680 }
681 let lookahead = context.options.lazy_lookahead.max(1);
682 (1..=lookahead)
683 .take_while(|offset| pos + offset < context.end)
684 .any(|offset| {
685 select_match(
686 context.input,
687 pos + offset,
688 context.end,
689 context.buckets,
690 context.options,
691 context.old_offsets,
692 context.cost_model,
693 )
694 .is_some_and(|next| {
695 let current_score = context
696 .cost_model
697 .and_then(|cost_model| cost_model.selected_score(current))
698 .unwrap_or_else(|| current.score());
699 let next_score = context
700 .cost_model
701 .and_then(|cost_model| cost_model.selected_score(next))
702 .unwrap_or_else(|| next.score());
703 let skipped_literal_score = offset as isize * 8;
704 next_score > current_score + skipped_literal_score
705 })
706 })
707}
708
709fn best_match(
710 input: &[u8],
711 pos: usize,
712 end: usize,
713 buckets: &[Vec<usize>],
714 options: EncodeOptions,
715 cost_model: Option<&CostModel<'_>>,
716) -> Option<(usize, usize)> {
717 let max_offset = pos.min(options.max_match_distance);
718 let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
719 if options.max_match_candidates == 0
720 || max_offset == 0
721 || max_length < 3
722 || pos + 2 >= input.len()
723 {
724 return None;
725 }
726 let bucket = &buckets[match_hash(input, pos)];
727 let mut best = None;
728 let mut checked = 0usize;
729 for &candidate in bucket.iter().rev() {
730 if candidate >= pos {
731 continue;
732 }
733 let offset = pos - candidate;
734 if offset > max_offset {
735 break;
736 }
737 checked += 1;
738 let mut length = 0usize;
739 while length < max_length && input[pos + length] == input[pos + length - offset] {
740 length += 1;
741 }
742 let encodable = length >= 3 + match_length_adjustment(offset);
743 if encodable && is_better_fresh_match(cost_model, length, offset, best) {
744 best = Some((length, offset));
745 if length == max_length {
746 break;
747 }
748 }
749 if checked >= options.max_match_candidates {
750 break;
751 }
752 }
753 best
754}
755
756fn offset_slot_index(offset: usize) -> usize {
757 offset_slot_for_match(offset)
758 .map(|(slot, _)| slot)
759 .unwrap_or(OFFSET_BITS.len() - 1)
760}
761
762fn is_better_fresh_match(
763 cost_model: Option<&CostModel<'_>>,
764 length: usize,
765 offset: usize,
766 best: Option<(usize, usize)>,
767) -> bool {
768 let Some((best_length, best_offset)) = best else {
769 return true;
770 };
771 if let Some(cost_model) = cost_model {
772 let candidate = SelectedMatch::Fresh { length, offset };
773 let best = SelectedMatch::Fresh {
774 length: best_length,
775 offset: best_offset,
776 };
777 let candidate_score = cost_model.selected_score(candidate).unwrap_or(isize::MIN);
778 let best_score = cost_model.selected_score(best).unwrap_or(isize::MIN);
779 return candidate_score > best_score
780 || (candidate_score == best_score
781 && (length > best_length || (length == best_length && offset < best_offset)));
782 }
783 length > best_length || (length == best_length && offset < best_offset)
784}
785
786fn best_old_offset_match(
787 input: &[u8],
788 pos: usize,
789 end: usize,
790 old_offsets: &[usize; 4],
791 cost_model: Option<&CostModel<'_>>,
792) -> Option<(usize, usize, usize)> {
793 let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
794 let mut best = None;
795 for (index, &offset) in old_offsets.iter().enumerate() {
796 if offset == 0 || offset > pos {
797 continue;
798 }
799 let length = match_length_at_offset(input, pos, max_length, offset);
800 if old_length_slot_for_match(length, offset).is_ok()
801 && is_better_old_offset_match(cost_model, index, length, offset, best)
802 {
803 best = Some((index, length, offset));
804 }
805 }
806 best
807}
808
809fn is_better_old_offset_match(
810 cost_model: Option<&CostModel<'_>>,
811 index: usize,
812 length: usize,
813 offset: usize,
814 best: Option<(usize, usize, usize)>,
815) -> bool {
816 let Some((best_index, best_length, best_offset)) = best else {
817 return true;
818 };
819 if let Some(cost_model) = cost_model {
820 let candidate = SelectedMatch::OldOffset {
821 index,
822 length,
823 offset,
824 };
825 let best = SelectedMatch::OldOffset {
826 index: best_index,
827 length: best_length,
828 offset: best_offset,
829 };
830 let candidate_score = cost_model.selected_score(candidate).unwrap_or(isize::MIN);
831 let best_score = cost_model.selected_score(best).unwrap_or(isize::MIN);
832 return candidate_score > best_score
833 || (candidate_score == best_score
834 && (length > best_length || (length == best_length && offset < best_offset)));
835 }
836 length > best_length || (length == best_length && offset < best_offset)
837}
838
839fn best_short_offset_match(input: &[u8], pos: usize, end: usize) -> Option<SelectedMatch> {
840 if end - pos < 2 {
841 return None;
842 }
843 let max_offset = pos.min(256);
844 (1..=max_offset)
845 .find(|&offset| {
846 input[pos] == input[pos - offset] && input[pos + 1] == input[pos + 1 - offset]
847 })
848 .map(|offset| SelectedMatch::ShortOffset { offset })
849}
850
851fn match_length_at_offset(input: &[u8], pos: usize, max_length: usize, offset: usize) -> usize {
852 let mut length = 0usize;
853 while length < max_length && input[pos + length] == input[pos + length - offset] {
854 length += 1;
855 }
856 length
857}
858
859fn match_length_adjustment(offset: usize) -> usize {
860 usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
861}
862
863fn old_length_adjustment(offset: usize) -> usize {
864 usize::from(offset >= 0x101) + usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
865}
866
867fn push_old_offset(old_offsets: &mut [usize; 4], offset: usize) {
868 old_offsets[3] = old_offsets[2];
869 old_offsets[2] = old_offsets[1];
870 old_offsets[1] = old_offsets[0];
871 old_offsets[0] = offset;
872}
873
874fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
875 if pos + 2 < input.len() {
876 buckets[match_hash(input, pos)].push(pos);
877 }
878}
879
880fn match_hash(input: &[u8], pos: usize) -> usize {
881 let value =
882 ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
883 value & (MATCH_HASH_BUCKETS - 1)
884}
885
886fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
887 if length < 3 {
888 return Err(Error::InvalidData("RAR 2.0 match length is too short"));
889 }
890 let adjusted = length - 3;
891 for (slot, &base) in LENGTH_BASES.iter().enumerate() {
892 let extra_bits = LENGTH_BITS[slot];
893 let max = base
894 + if extra_bits == 0 {
895 0
896 } else {
897 (1usize << extra_bits) - 1
898 };
899 if adjusted >= base && adjusted <= max {
900 return Ok((slot, adjusted - base));
901 }
902 }
903 Err(Error::InvalidData("RAR 2.0 match length is too long"))
904}
905
906fn old_length_slot_for_match(length: usize, offset: usize) -> Result<(usize, usize)> {
907 let encoded = length
908 .checked_sub(old_length_adjustment(offset))
909 .ok_or(Error::InvalidData(
910 "RAR 2.0 adjusted old-offset length underflows",
911 ))?;
912 if encoded < 2 {
913 return Err(Error::InvalidData(
914 "RAR 2.0 old-offset match length is too short",
915 ));
916 }
917 let adjusted = encoded - 2;
918 for (slot, &base) in LENGTH_BASES.iter().enumerate() {
919 let extra_bits = LENGTH_BITS[slot];
920 let max = base
921 + if extra_bits == 0 {
922 0
923 } else {
924 (1usize << extra_bits) - 1
925 };
926 if adjusted >= base && adjusted <= max {
927 return Ok((slot, adjusted - base));
928 }
929 }
930 Err(Error::InvalidData(
931 "RAR 2.0 old-offset match length is too long",
932 ))
933}
934
935fn offset_slot_for_match(offset: usize) -> Result<(usize, usize)> {
936 if offset == 0 {
937 return Err(Error::InvalidData("RAR 2.0 match offset is zero"));
938 }
939 let adjusted = offset - 1;
940 for (slot, &base) in OFFSET_BASES.iter().enumerate() {
941 let extra_bits = OFFSET_BITS[slot];
942 let max = base
943 + if extra_bits == 0 {
944 0
945 } else {
946 (1usize << extra_bits) - 1
947 };
948 if adjusted >= base && adjusted <= max {
949 return Ok((slot, adjusted - base));
950 }
951 }
952 Err(Error::InvalidData("RAR 2.0 match offset is too large"))
953}
954
955fn short_slot_for_match(offset: usize) -> Result<(usize, usize)> {
956 if offset == 0 || offset > 256 {
957 return Err(Error::InvalidData(
958 "RAR 2.0 short match offset is out of range",
959 ));
960 }
961 let adjusted = offset - 1;
962 for (slot, &base) in SHORT_BASES.iter().enumerate() {
963 let extra_bits = SHORT_BITS[slot];
964 let max = base
965 + if extra_bits == 0 {
966 0
967 } else {
968 (1usize << extra_bits) - 1
969 };
970 if adjusted >= base && adjusted <= max {
971 return Ok((slot, adjusted - base));
972 }
973 }
974 Err(Error::InvalidData(
975 "RAR 2.0 short match offset is out of range",
976 ))
977}
978
979fn literal_code_len(symbol_count: usize) -> Result<u8> {
980 if symbol_count == 0 {
981 return Err(Error::InvalidData("RAR 2.0 encoder has no literal symbols"));
982 }
983 let len = usize::BITS - (symbol_count - 1).leading_zeros();
984 u8::try_from(len.max(1)).map_err(|_| Error::InvalidData("RAR 2.0 literal table is too large"))
985}
986
987#[derive(Debug, Clone, Copy, PartialEq, Eq)]
988struct LevelToken {
989 symbol: usize,
990 extra_bits: u8,
991 extra_value: u8,
992}
993
994impl LevelToken {
995 const fn plain(symbol: usize) -> Self {
996 Self {
997 symbol,
998 extra_bits: 0,
999 extra_value: 0,
1000 }
1001 }
1002
1003 const fn repeat_previous(count: usize) -> Self {
1004 Self {
1005 symbol: 16,
1006 extra_bits: 2,
1007 extra_value: (count - 3) as u8,
1008 }
1009 }
1010
1011 const fn zero_run_short(count: usize) -> Self {
1012 Self {
1013 symbol: 17,
1014 extra_bits: 3,
1015 extra_value: (count - 3) as u8,
1016 }
1017 }
1018
1019 const fn zero_run_long(count: usize) -> Self {
1020 Self {
1021 symbol: 18,
1022 extra_bits: 7,
1023 extra_value: (count - 11) as u8,
1024 }
1025 }
1026}
1027
1028fn encode_table_level_tokens(lengths: &[u8; TABLE_COUNT]) -> Vec<LevelToken> {
1029 encode_level_tokens(lengths)
1030}
1031
1032fn encode_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
1033 let mut tokens = Vec::new();
1034 let mut pos = 0usize;
1035 let mut previous = None;
1036 while pos < lengths.len() {
1037 let value = lengths[pos];
1038 let mut run = 1usize;
1039 while pos + run < lengths.len() && lengths[pos + run] == value {
1040 run += 1;
1041 }
1042
1043 if value == 0 {
1044 emit_zero_level_run(&mut tokens, run);
1045 previous = Some(0);
1046 pos += run;
1047 continue;
1048 }
1049
1050 if previous == Some(value) && run >= 3 {
1051 let mut remaining = run;
1052 while remaining != 0 {
1053 let chunk = remaining.min(6);
1054 if chunk >= 3 {
1055 tokens.push(LevelToken::repeat_previous(chunk));
1056 remaining -= chunk;
1057 } else {
1058 tokens.extend(std::iter::repeat_n(
1059 LevelToken::plain(value as usize),
1060 chunk,
1061 ));
1062 remaining = 0;
1063 }
1064 }
1065 pos += run;
1066 continue;
1067 }
1068
1069 tokens.push(LevelToken::plain(value as usize));
1070 previous = Some(value);
1071 pos += 1;
1072 }
1073 tokens
1074}
1075
1076fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1077 while run != 0 {
1078 if run >= 11 {
1079 let mut chunk = run.min(138);
1080 if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1081 chunk -= 3;
1082 }
1083 tokens.push(LevelToken::zero_run_long(chunk));
1084 run -= chunk;
1085 } else if run >= 3 {
1086 let chunk = run.min(10);
1087 tokens.push(LevelToken::zero_run_short(chunk));
1088 run -= chunk;
1089 } else {
1090 tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
1091 break;
1092 }
1093 }
1094}
1095
1096fn level_code_lengths_for_tokens(tokens: &[LevelToken]) -> [u8; LEVEL_COUNT] {
1097 let mut used = [false; LEVEL_COUNT];
1098 for token in tokens {
1099 used[token.symbol] = true;
1100 }
1101 level_code_lengths_for_used_symbols(used)
1102}
1103
1104fn validated_lengths_for_frequencies<const N: usize>(
1105 frequencies: &[usize; N],
1106 max_bits: u8,
1107) -> [u8; N] {
1108 let mut lengths = [0u8; N];
1109 lengths.copy_from_slice(&huffman::lengths_for_frequencies(frequencies, max_bits));
1110 if canonical_codes(&lengths).is_ok() {
1111 return lengths;
1112 }
1113
1114 lengths.copy_from_slice(&huffman::uniform_lengths_for_frequencies(frequencies));
1115 lengths
1116}
1117
1118fn encode_audio_member(input: &[u8], channels: usize) -> Result<Vec<u8>> {
1119 if channels == 0 || channels > MAX_CHANNELS {
1120 return Err(Error::InvalidData("RAR 2.0 audio channel count is invalid"));
1121 }
1122 let deltas = audio_encode(input, channels)?;
1123 let mut levels = vec![0u8; AUDIO_COUNT * channels];
1124 for channel in 0..channels {
1125 let mut frequencies = [0usize; AUDIO_COUNT];
1126 for index in (channel..deltas.len()).step_by(channels) {
1127 frequencies[deltas[index] as usize] += 1;
1128 }
1129 let channel_lengths = huffman::lengths_for_frequency_array(&frequencies, 15);
1130 for (symbol, len) in channel_lengths.into_iter().enumerate() {
1131 levels[channel * AUDIO_COUNT + symbol] = len;
1132 }
1133 }
1134
1135 let level_symbols = encode_audio_table_level_symbols(&levels);
1136 let level_lengths = level_code_lengths_for_symbols(&level_symbols);
1137 let level_codes = canonical_codes(&level_lengths)?;
1138 let mut bits = BitWriter::default();
1139 bits.write_bits(0b10, 2); bits.write_bits((channels - 1) as u32, 2);
1141 for &len in &level_lengths {
1142 bits.write_bits(len as u32, 4);
1143 }
1144 for symbol in level_symbols {
1145 let code = level_codes[symbol].ok_or(Error::InvalidData(
1146 "RAR 2.0 encoder missing audio-level Huffman code",
1147 ))?;
1148 bits.write_bits(code.code as u32, code.len);
1149 match symbol {
1150 17 => bits.write_bits(0, 3),
1151 18 => bits.write_bits(127, 7),
1152 _ => {}
1153 }
1154 }
1155
1156 for channel in 0..channels {
1157 let table = &levels[channel * AUDIO_COUNT..(channel + 1) * AUDIO_COUNT];
1158 validate_audio_table(table)?;
1159 }
1160 let audio_codes = (0..channels)
1161 .map(|channel| canonical_codes(&levels[channel * AUDIO_COUNT..(channel + 1) * AUDIO_COUNT]))
1162 .collect::<Result<Vec<_>>>()?;
1163 for (index, &delta) in deltas.iter().enumerate() {
1164 let channel = index % channels;
1165 let code = audio_codes[channel][delta as usize].ok_or(Error::InvalidData(
1166 "RAR 2.0 encoder missing audio Huffman code",
1167 ))?;
1168 bits.write_bits(code.code as u32, code.len);
1169 }
1170 Ok(bits.finish())
1171}
1172
1173fn encode_audio_table_level_symbols(levels: &[u8]) -> Vec<usize> {
1174 levels.iter().map(|&len| len as usize).collect()
1175}
1176
1177fn level_code_lengths_for_symbols(symbols: &[usize]) -> [u8; LEVEL_COUNT] {
1178 let mut used = [false; LEVEL_COUNT];
1179 for &symbol in symbols {
1180 used[symbol] = true;
1181 }
1182 level_code_lengths_for_used_symbols(used)
1183}
1184
1185fn level_code_lengths_for_used_symbols(used: [bool; LEVEL_COUNT]) -> [u8; LEVEL_COUNT] {
1186 let used_count = used.iter().filter(|&&used| used).count();
1187 let len = huffman::bits_for_symbol_count(used_count);
1188 let mut lengths = [0u8; LEVEL_COUNT];
1189 for (symbol, is_used) in used.into_iter().enumerate() {
1190 if is_used {
1191 lengths[symbol] = len;
1192 }
1193 }
1194 lengths
1195}
1196
1197fn validate_audio_table(lengths: &[u8]) -> Result<()> {
1198 let mut count = [0u16; 16];
1199 for &len in lengths {
1200 if len > 15 {
1201 return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1202 }
1203 if len != 0 {
1204 count[len as usize] += 1;
1205 }
1206 }
1207 validate_huffman_counts(&count)
1208}
1209
1210fn audio_encode(input: &[u8], channels: usize) -> Result<Vec<u8>> {
1211 if channels == 0 || channels > MAX_CHANNELS {
1212 return Err(Error::InvalidData("RAR 2.0 audio channel count is invalid"));
1213 }
1214 let mut states = [AudioState::default(); MAX_CHANNELS];
1215 let mut channel_delta = 0i32;
1216 let mut deltas = Vec::with_capacity(input.len());
1217 for (index, &byte) in input.iter().enumerate() {
1218 let channel = index % channels;
1219 let state = &mut states[channel];
1220 state.byte_count = state.byte_count.wrapping_add(1);
1221 state.d4 = state.d3;
1222 state.d3 = state.d2;
1223 state.d2 = state.last_delta - state.d1;
1224 state.d1 = state.last_delta;
1225
1226 let predicted = 8 * state.last_char
1227 + state.k[0] * state.d1
1228 + state.k[1] * state.d2
1229 + state.k[2] * state.d3
1230 + state.k[3] * state.d4
1231 + state.k[4] * channel_delta;
1232 let predicted = (predicted >> 3) & 0xff;
1233 let delta = (predicted as u8).wrapping_sub(byte);
1234
1235 let d = (delta as i8 as i32) << 3;
1236 state.dif[0] = state.dif[0].wrapping_add(d.unsigned_abs());
1237 state.dif[1] = state.dif[1].wrapping_add((d - state.d1).unsigned_abs());
1238 state.dif[2] = state.dif[2].wrapping_add((d + state.d1).unsigned_abs());
1239 state.dif[3] = state.dif[3].wrapping_add((d - state.d2).unsigned_abs());
1240 state.dif[4] = state.dif[4].wrapping_add((d + state.d2).unsigned_abs());
1241 state.dif[5] = state.dif[5].wrapping_add((d - state.d3).unsigned_abs());
1242 state.dif[6] = state.dif[6].wrapping_add((d + state.d3).unsigned_abs());
1243 state.dif[7] = state.dif[7].wrapping_add((d - state.d4).unsigned_abs());
1244 state.dif[8] = state.dif[8].wrapping_add((d + state.d4).unsigned_abs());
1245 state.dif[9] = state.dif[9].wrapping_add((d - channel_delta).unsigned_abs());
1246 state.dif[10] = state.dif[10].wrapping_add((d + channel_delta).unsigned_abs());
1247
1248 channel_delta = (byte.wrapping_sub(state.last_char as u8)) as i8 as i32;
1249 state.last_delta = channel_delta;
1250 state.last_char = byte as i32;
1251
1252 if state.byte_count & 0x1f == 0 {
1253 let mut min_dif = state.dif[0];
1254 let mut num_min_dif = 0usize;
1255 state.dif[0] = 0;
1256 for diff_index in 1..state.dif.len() {
1257 if state.dif[diff_index] < min_dif {
1258 min_dif = state.dif[diff_index];
1259 num_min_dif = diff_index;
1260 }
1261 state.dif[diff_index] = 0;
1262 }
1263 match num_min_dif {
1264 1 if state.k[0] >= -16 => state.k[0] -= 1,
1265 2 if state.k[0] < 16 => state.k[0] += 1,
1266 3 if state.k[1] >= -16 => state.k[1] -= 1,
1267 4 if state.k[1] < 16 => state.k[1] += 1,
1268 5 if state.k[2] >= -16 => state.k[2] -= 1,
1269 6 if state.k[2] < 16 => state.k[2] += 1,
1270 7 if state.k[3] >= -16 => state.k[3] -= 1,
1271 8 if state.k[3] < 16 => state.k[3] += 1,
1272 9 if state.k[4] >= -16 => state.k[4] -= 1,
1273 10 if state.k[4] < 16 => state.k[4] += 1,
1274 _ => {}
1275 }
1276 }
1277
1278 deltas.push(delta);
1279 }
1280 Ok(deltas)
1281}
1282
1283#[derive(Debug, Clone, Copy)]
1284struct HuffmanCode {
1285 code: u16,
1286 len: u8,
1287}
1288
1289fn canonical_codes(lengths: &[u8]) -> Result<Vec<Option<HuffmanCode>>> {
1290 let mut count = [0u16; 16];
1291 for &len in lengths {
1292 if len > 15 {
1293 return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1294 }
1295 if len != 0 {
1296 count[len as usize] += 1;
1297 }
1298 }
1299 validate_huffman_counts(&count)?;
1300
1301 let mut next_code = [0u16; 16];
1302 let mut code = 0u16;
1303 for len in 1..=15 {
1304 code = (code + count[len - 1]) << 1;
1305 next_code[len] = code;
1306 }
1307
1308 let mut codes = vec![None; lengths.len()];
1309 for (symbol, &len) in lengths.iter().enumerate() {
1310 if len == 0 {
1311 continue;
1312 }
1313 let code = next_code[len as usize];
1314 next_code[len as usize] += 1;
1315 codes[symbol] = Some(HuffmanCode { code, len });
1316 }
1317 Ok(codes)
1318}
1319
1320#[derive(Debug, Clone)]
1321pub struct Unpack20 {
1322 bits: BitReader,
1323 levels: [u8; OLD_LEVEL_COUNT],
1324 main: Huffman,
1325 offsets: Huffman,
1326 lengths: Huffman,
1327 audio_tables: [Huffman; MAX_CHANNELS],
1328 audio_block: bool,
1329 channels: usize,
1330 cur_channel: usize,
1331 audio: [AudioState; MAX_CHANNELS],
1332 channel_delta: i32,
1333 old_offsets: [usize; 4],
1334 last_offset: usize,
1335 last_length: usize,
1336 pending_match: Option<(usize, usize)>,
1337 in_block: bool,
1338 output: Vec<u8>,
1339 base_offset: usize,
1340}
1341
1342impl Unpack20 {
1343 pub fn new() -> Self {
1344 Self {
1345 bits: BitReader::new(),
1346 levels: [0; OLD_LEVEL_COUNT],
1347 main: Huffman::empty(),
1348 offsets: Huffman::empty(),
1349 lengths: Huffman::empty(),
1350 audio_tables: std::array::from_fn(|_| Huffman::empty()),
1351 audio_block: false,
1352 channels: 1,
1353 cur_channel: 0,
1354 audio: [AudioState::default(); MAX_CHANNELS],
1355 channel_delta: 0,
1356 old_offsets: [0; 4],
1357 last_offset: 0,
1358 last_length: 0,
1359 pending_match: None,
1360 in_block: false,
1361 output: Vec::new(),
1362 base_offset: 0,
1363 }
1364 }
1365
1366 pub fn decode_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1367 let start = self.current_pos();
1368 let target = start
1369 .checked_add(output_size)
1370 .ok_or(Error::InvalidData("RAR 2.0 output size overflows"))?;
1371 if !input.is_empty() {
1372 self.bits = BitReader::new();
1373 }
1374 self.bits.append(input);
1375 self.decode_until(target).map_err(|error| match error {
1376 Error::NeedMoreInput => Error::InvalidData("RAR 2.0 bitstream is truncated"),
1377 error => error,
1378 })?;
1379 self.read_last_tables()?;
1380 let out = self.raw_range(start, target)?.to_vec();
1381 self.trim_history(target, target);
1382 Ok(out)
1383 }
1384
1385 pub fn decode_member_to(
1386 &mut self,
1387 input: &[u8],
1388 output_size: usize,
1389 out: &mut impl Write,
1390 ) -> Result<()> {
1391 let decoded = self.decode_member(input, output_size)?;
1392 out.write_all(&decoded)
1393 .map_err(|_| Error::InvalidData("RAR 2.0 output write failed"))
1394 }
1395
1396 pub fn decode_member_from_reader(
1397 &mut self,
1398 input: &mut impl Read,
1399 output_size: usize,
1400 out: &mut impl Write,
1401 ) -> Result<()> {
1402 let start = self.current_pos();
1403 let target = start
1404 .checked_add(output_size)
1405 .ok_or(Error::InvalidData("RAR 2.0 output size overflows"))?;
1406 self.bits = BitReader::new();
1407 let mut packed = Vec::new();
1408 input
1409 .read_to_end(&mut packed)
1410 .map_err(|_| Error::InvalidData("RAR 2.0 input read failed"))?;
1411 self.bits.append(&packed);
1412 if !self.in_block && self.bits.remaining_bytes_from_current() > 0 {
1413 self.read_tables().map_err(|error| match error {
1414 Error::NeedMoreInput => Error::InvalidData("RAR 2.0 bitstream is truncated"),
1415 error => error,
1416 })?;
1417 self.in_block = true;
1418 }
1419 self.decode_until(target).map_err(|error| match error {
1420 Error::NeedMoreInput => Error::InvalidData("RAR 2.0 bitstream is truncated"),
1421 error => error,
1422 })?;
1423 self.read_last_tables()?;
1424
1425 let decoded = self.raw_range(start, target)?;
1426 out.write_all(decoded)
1427 .map_err(|_| Error::InvalidData("RAR 2.0 output write failed"))?;
1428 self.trim_history(target, target);
1429 Ok(())
1430 }
1431
1432 fn decode_until(&mut self, target: usize) -> Result<()> {
1433 while self.current_pos() < target {
1434 self.drain_pending_match(target)?;
1435 if self.current_pos() >= target {
1436 break;
1437 }
1438 if !self.in_block {
1439 self.read_tables()?;
1440 self.in_block = true;
1441 }
1442 self.decode_lz(target)?;
1443 }
1444 Ok(())
1445 }
1446
1447 fn read_tables(&mut self) -> Result<()> {
1448 let bit_field = self.bits.peek_bits(16)?;
1449 self.audio_block = bit_field & 0x8000 != 0;
1450 let keep_tables = bit_field & 0x4000 != 0;
1451 self.bits.read_bits(2)?;
1452 if !keep_tables {
1453 self.levels = [0; OLD_LEVEL_COUNT];
1454 }
1455
1456 let table_size = if self.audio_block {
1457 self.channels = ((bit_field >> 12) as usize & 3) + 1;
1458 if self.cur_channel >= self.channels {
1459 self.cur_channel = 0;
1460 }
1461 self.bits.read_bits(2)?;
1462 AUDIO_COUNT * self.channels
1463 } else {
1464 TABLE_COUNT
1465 };
1466
1467 let level_lengths = Self::read_level_lengths(&mut self.bits)?;
1468 let level_decoder = Huffman::from_lengths(&level_lengths)?;
1469 let mut new_levels = [0u8; OLD_LEVEL_COUNT];
1470 let mut pos = 0usize;
1471 while pos < table_size {
1472 let symbol = level_decoder.decode(&mut self.bits)?;
1473 match symbol {
1474 0..=15 => {
1475 new_levels[pos] = (self.levels[pos].wrapping_add(symbol as u8)) & 0x0f;
1476 pos += 1;
1477 }
1478 16 => {
1479 if pos == 0 {
1480 return Err(Error::InvalidData("RAR 2.0 table repeat at start"));
1481 }
1482 let count = 3 + self.bits.read_bits(2)? as usize;
1483 let value = new_levels[pos - 1];
1484 fill_levels(&mut new_levels, &mut pos, count, value)?;
1485 }
1486 17 => {
1487 let count = 3 + self.bits.read_bits(3)? as usize;
1488 fill_levels(&mut new_levels, &mut pos, count, 0)?;
1489 }
1490 18 => {
1491 let count = 11 + self.bits.read_bits(7)? as usize;
1492 fill_levels(&mut new_levels, &mut pos, count, 0)?;
1493 }
1494 _ => return Err(Error::InvalidData("RAR 2.0 invalid level symbol")),
1495 }
1496 }
1497
1498 self.levels = new_levels;
1499 if self.audio_block {
1500 for channel in 0..self.channels {
1501 let start = channel * AUDIO_COUNT;
1502 self.audio_tables[channel] =
1503 Huffman::from_lengths(&self.levels[start..start + AUDIO_COUNT])?;
1504 }
1505 } else {
1506 self.main = Huffman::from_lengths(&self.levels[..MAIN_COUNT])?;
1507 self.offsets =
1508 Huffman::from_lengths(&self.levels[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
1509 self.lengths =
1510 Huffman::from_lengths(&self.levels[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT])?;
1511 }
1512 Ok(())
1513 }
1514
1515 fn read_level_lengths(bits: &mut BitReader) -> Result<[u8; LEVEL_COUNT]> {
1516 let mut lengths = [0u8; LEVEL_COUNT];
1517 for length in &mut lengths {
1518 *length = bits.read_bits(4)? as u8;
1519 }
1520 Ok(lengths)
1521 }
1522
1523 fn decode_lz(&mut self, output_size: usize) -> Result<()> {
1524 while self.current_pos() < output_size {
1525 if self.audio_block {
1526 self.decode_audio_byte()?;
1527 if !self.in_block {
1528 return Ok(());
1529 }
1530 continue;
1531 }
1532 let symbol = self.main.decode(&mut self.bits)?;
1533 match symbol {
1534 0..=255 => self.output.push(symbol as u8),
1535 256 => {
1536 if self.last_length != 0 {
1537 let length = self.last_length;
1538 let offset = self.last_offset;
1539 self.push_old_offset(offset);
1540 self.copy_match(length, offset, output_size)?;
1541 }
1542 }
1543 257..=260 => {
1544 let index = symbol - 257;
1545 let offset = self.old_offsets[index];
1546 let length_slot = self.lengths.decode(&mut self.bits)?;
1547 if length_slot >= LENGTH_COUNT {
1548 return Err(Error::InvalidData("RAR 2.0 invalid repeat length slot"));
1549 }
1550 let mut length = LENGTH_BASES[length_slot] + 2;
1551 if LENGTH_BITS[length_slot] != 0 {
1552 length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
1553 }
1554 if offset >= 0x101 {
1555 length += 1;
1556 }
1557 if offset >= 0x2000 {
1558 length += 1;
1559 }
1560 if offset >= 0x40000 {
1561 length += 1;
1562 }
1563 self.push_old_offset(offset);
1564 self.last_offset = offset;
1565 self.last_length = length;
1566 self.copy_match(length, offset, output_size)?;
1567 }
1568 261..=268 => {
1569 let index = symbol - 261;
1570 let mut offset = SHORT_BASES[index] + 1;
1571 if SHORT_BITS[index] != 0 {
1572 offset += self.bits.read_bits(SHORT_BITS[index])? as usize;
1573 }
1574 self.push_old_offset(offset);
1575 self.last_offset = offset;
1576 self.last_length = 2;
1577 self.copy_match(2, offset, output_size)?;
1578 }
1579 269 => {
1580 self.in_block = false;
1581 return Ok(());
1582 }
1583 270..=297 => {
1584 let length_slot = symbol - 270;
1585 let mut length = LENGTH_BASES[length_slot] + 3;
1586 if LENGTH_BITS[length_slot] != 0 {
1587 length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
1588 }
1589 let offset = self.read_offset()?;
1590 if offset >= 0x2000 {
1591 length += 1;
1592 }
1593 if offset >= 0x40000 {
1594 length += 1;
1595 }
1596 self.push_old_offset(offset);
1597 self.last_offset = offset;
1598 self.last_length = length;
1599 self.copy_match(length, offset, output_size)?;
1600 }
1601 _ => return Err(Error::InvalidData("RAR 2.0 invalid main symbol")),
1602 }
1603 }
1604 Ok(())
1605 }
1606
1607 fn decode_audio_byte(&mut self) -> Result<()> {
1608 let symbol = self.audio_tables[self.cur_channel].decode(&mut self.bits)?;
1609 if symbol == 256 {
1610 self.in_block = false;
1611 return Ok(());
1612 }
1613 if symbol > 256 {
1614 return Err(Error::InvalidData("RAR 2.0 invalid audio symbol"));
1615 }
1616 let byte = self.decode_audio(symbol as u8);
1617 self.output.push(byte);
1618 self.cur_channel += 1;
1619 if self.cur_channel == self.channels {
1620 self.cur_channel = 0;
1621 }
1622 Ok(())
1623 }
1624
1625 fn decode_audio(&mut self, delta: u8) -> u8 {
1626 let state = &mut self.audio[self.cur_channel];
1627 state.byte_count = state.byte_count.wrapping_add(1);
1628 state.d4 = state.d3;
1629 state.d3 = state.d2;
1630 state.d2 = state.last_delta - state.d1;
1631 state.d1 = state.last_delta;
1632
1633 let predicted = 8 * state.last_char
1634 + state.k[0] * state.d1
1635 + state.k[1] * state.d2
1636 + state.k[2] * state.d3
1637 + state.k[3] * state.d4
1638 + state.k[4] * self.channel_delta;
1639 let predicted = (predicted >> 3) & 0xff;
1640 let byte = predicted.wrapping_sub(delta as i32) as u8;
1641
1642 let d = (delta as i8 as i32) << 3;
1643 state.dif[0] = state.dif[0].wrapping_add(d.unsigned_abs());
1644 state.dif[1] = state.dif[1].wrapping_add((d - state.d1).unsigned_abs());
1645 state.dif[2] = state.dif[2].wrapping_add((d + state.d1).unsigned_abs());
1646 state.dif[3] = state.dif[3].wrapping_add((d - state.d2).unsigned_abs());
1647 state.dif[4] = state.dif[4].wrapping_add((d + state.d2).unsigned_abs());
1648 state.dif[5] = state.dif[5].wrapping_add((d - state.d3).unsigned_abs());
1649 state.dif[6] = state.dif[6].wrapping_add((d + state.d3).unsigned_abs());
1650 state.dif[7] = state.dif[7].wrapping_add((d - state.d4).unsigned_abs());
1651 state.dif[8] = state.dif[8].wrapping_add((d + state.d4).unsigned_abs());
1652 state.dif[9] = state.dif[9].wrapping_add((d - self.channel_delta).unsigned_abs());
1653 state.dif[10] = state.dif[10].wrapping_add((d + self.channel_delta).unsigned_abs());
1654
1655 self.channel_delta = (byte.wrapping_sub(state.last_char as u8)) as i8 as i32;
1656 state.last_delta = self.channel_delta;
1657 state.last_char = byte as i32;
1658
1659 if state.byte_count & 0x1f == 0 {
1660 let mut min_dif = state.dif[0];
1661 let mut num_min_dif = 0usize;
1662 state.dif[0] = 0;
1663 for index in 1..state.dif.len() {
1664 if state.dif[index] < min_dif {
1665 min_dif = state.dif[index];
1666 num_min_dif = index;
1667 }
1668 state.dif[index] = 0;
1669 }
1670 match num_min_dif {
1671 1 if state.k[0] >= -16 => state.k[0] -= 1,
1672 2 if state.k[0] < 16 => state.k[0] += 1,
1673 3 if state.k[1] >= -16 => state.k[1] -= 1,
1674 4 if state.k[1] < 16 => state.k[1] += 1,
1675 5 if state.k[2] >= -16 => state.k[2] -= 1,
1676 6 if state.k[2] < 16 => state.k[2] += 1,
1677 7 if state.k[3] >= -16 => state.k[3] -= 1,
1678 8 if state.k[3] < 16 => state.k[3] += 1,
1679 9 if state.k[4] >= -16 => state.k[4] -= 1,
1680 10 if state.k[4] < 16 => state.k[4] += 1,
1681 _ => {}
1682 }
1683 }
1684
1685 byte
1686 }
1687
1688 fn read_offset(&mut self) -> Result<usize> {
1689 let slot = self.offsets.decode(&mut self.bits)?;
1690 if slot >= OFFSET_COUNT {
1691 return Err(Error::InvalidData("RAR 2.0 invalid offset slot"));
1692 }
1693 let mut offset = OFFSET_BASES[slot] + 1;
1694 if OFFSET_BITS[slot] != 0 {
1695 offset += self.bits.read_bits(OFFSET_BITS[slot])? as usize;
1696 }
1697 Ok(offset)
1698 }
1699
1700 fn copy_match(&mut self, length: usize, offset: usize, output_size: usize) -> Result<()> {
1701 let offset = if offset == 0 { 1 } else { offset };
1702 let current = self.current_pos();
1703 if offset > current {
1704 return Err(Error::InvalidData("RAR 2.0 match distance is out of range"));
1705 }
1706 for index in 0..length {
1707 if self.current_pos() >= output_size {
1708 self.pending_match = Some((length - index, offset));
1709 break;
1710 }
1711 let src = self.current_pos() - offset;
1712 let byte = *self
1713 .raw_byte(src)
1714 .ok_or(Error::InvalidData("RAR 2.0 match distance is out of range"))?;
1715 self.output.push(byte);
1716 }
1717 Ok(())
1718 }
1719
1720 fn drain_pending_match(&mut self, output_size: usize) -> Result<()> {
1721 let Some((length, offset)) = self.pending_match.take() else {
1722 return Ok(());
1723 };
1724 self.copy_match(length, offset, output_size)
1725 }
1726
1727 fn read_last_tables(&mut self) -> Result<()> {
1728 if self.bits.remaining_bytes_from_current() < 5 {
1729 return Ok(());
1730 }
1731 if self.audio_block {
1732 if self.audio_tables[self.cur_channel].symbols.is_empty() {
1733 return Ok(());
1734 }
1735 if self.audio_tables[self.cur_channel].decode(&mut self.bits)? == 256 {
1736 self.read_tables()?;
1737 self.in_block = true;
1738 }
1739 } else {
1740 if self.main.symbols.is_empty() {
1741 return Ok(());
1742 }
1743 if self.main.decode(&mut self.bits)? == 269 {
1744 self.read_tables()?;
1745 self.in_block = true;
1746 }
1747 }
1748 Ok(())
1749 }
1750
1751 fn push_old_offset(&mut self, offset: usize) {
1752 self.old_offsets[3] = self.old_offsets[2];
1753 self.old_offsets[2] = self.old_offsets[1];
1754 self.old_offsets[1] = self.old_offsets[0];
1755 self.old_offsets[0] = offset;
1756 }
1757
1758 fn current_pos(&self) -> usize {
1759 self.base_offset + self.output.len()
1760 }
1761
1762 fn raw_byte(&self, position: usize) -> Option<&u8> {
1763 self.output.get(position.checked_sub(self.base_offset)?)
1764 }
1765
1766 fn raw_range(&self, start: usize, end: usize) -> Result<&[u8]> {
1767 if start < self.base_offset || end < start {
1768 return Err(Error::InvalidData(
1769 "RAR 2.0 retained history is unavailable",
1770 ));
1771 }
1772 let rel_start = start - self.base_offset;
1773 let rel_end = end - self.base_offset;
1774 self.output
1775 .get(rel_start..rel_end)
1776 .ok_or(Error::InvalidData(
1777 "RAR 2.0 retained history is unavailable",
1778 ))
1779 }
1780
1781 fn trim_history(&mut self, flushed_pos: usize, current_pos: usize) {
1782 let keep_from = current_pos.saturating_sub(MAX_HISTORY).min(flushed_pos);
1783 if keep_from <= self.base_offset {
1784 return;
1785 }
1786 let drain = keep_from - self.base_offset;
1787 self.output.drain(..drain);
1788 self.base_offset = keep_from;
1789 }
1790}
1791
1792impl Default for Unpack20 {
1793 fn default() -> Self {
1794 Self::new()
1795 }
1796}
1797
1798#[derive(Debug, Clone, Copy, Default)]
1799struct AudioState {
1800 k: [i32; 5],
1801 d1: i32,
1802 d2: i32,
1803 d3: i32,
1804 d4: i32,
1805 last_delta: i32,
1806 last_char: i32,
1807 byte_count: u32,
1808 dif: [u32; 11],
1809}
1810
1811fn fill_levels(levels: &mut [u8], pos: &mut usize, count: usize, value: u8) -> Result<()> {
1812 let end = pos
1813 .checked_add(count)
1814 .ok_or(Error::InvalidData("RAR 2.0 table run overflows"))?;
1815 let end = end.min(levels.len());
1816 for item in &mut levels[*pos..end] {
1817 *item = value;
1818 }
1819 *pos = end;
1820 Ok(())
1821}
1822
1823#[derive(Debug, Clone)]
1824struct Huffman {
1825 symbols: Vec<HuffmanSymbol>,
1826 first_code: [u16; 16],
1827 first_index: [usize; 16],
1828 counts: [u16; 16],
1829}
1830
1831#[derive(Debug, Clone)]
1832struct HuffmanSymbol {
1833 code: u16,
1834 len: u8,
1835 symbol: usize,
1836}
1837
1838impl Huffman {
1839 fn empty() -> Self {
1840 Self {
1841 symbols: Vec::new(),
1842 first_code: [0; 16],
1843 first_index: [0; 16],
1844 counts: [0; 16],
1845 }
1846 }
1847
1848 fn from_lengths(lengths: &[u8]) -> Result<Self> {
1849 let mut count = [0u16; 16];
1850 for &len in lengths {
1851 if len > 15 {
1852 return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1853 }
1854 if len != 0 {
1855 count[len as usize] += 1;
1856 }
1857 }
1858 if count.iter().all(|&value| value == 0) {
1859 return Ok(Self::empty());
1860 }
1861 validate_huffman_counts(&count)?;
1862
1863 let mut first_code = [0u16; 16];
1864 let mut next_code = [0u16; 16];
1865 let mut code = 0u16;
1866 for len in 1..=15 {
1867 code = (code + count[len - 1]) << 1;
1868 first_code[len] = code;
1869 next_code[len] = code;
1870 }
1871
1872 let mut first_index = [0usize; 16];
1873 let mut index = 0usize;
1874 for len in 1..=15 {
1875 first_index[len] = index;
1876 index += usize::from(count[len]);
1877 }
1878
1879 let mut symbols = Vec::new();
1880 for (symbol, &len) in lengths.iter().enumerate() {
1881 if len == 0 {
1882 continue;
1883 }
1884 let code = next_code[len as usize];
1885 next_code[len as usize] += 1;
1886 symbols.push(HuffmanSymbol { code, len, symbol });
1887 }
1888 symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
1889 Ok(Self {
1890 symbols,
1891 first_code,
1892 first_index,
1893 counts: count,
1894 })
1895 }
1896
1897 fn decode(&self, bits: &mut BitReader) -> Result<usize> {
1898 let mut code = 0u16;
1899 if self.symbols.is_empty() {
1900 return Err(Error::InvalidData("RAR 2.0 empty Huffman table"));
1901 }
1902 for len in 1..=15 {
1903 code = (code << 1) | bits.read_bit()? as u16;
1904 let count = self.counts[len];
1905 if count != 0 {
1906 let first = self.first_code[len];
1907 let offset = code.wrapping_sub(first);
1908 if offset < count {
1909 let index = self.first_index[len] + usize::from(offset);
1910 return Ok(self.symbols[index].symbol);
1911 }
1912 }
1913 }
1914 Err(Error::InvalidData("RAR 2.0 invalid Huffman code"))
1915 }
1916}
1917
1918fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
1919 let mut available = 1i32;
1920 for &len_count in count.iter().skip(1) {
1921 available = (available << 1) - i32::from(len_count);
1922 if available < 0 {
1923 return Err(Error::InvalidData("RAR 2.0 oversubscribed Huffman table"));
1924 }
1925 }
1926 Ok(())
1927}
1928
1929#[derive(Debug, Clone)]
1930struct BitReader {
1931 input: Vec<u8>,
1932 bit_pos: usize,
1933}
1934
1935impl BitReader {
1936 fn new() -> Self {
1937 Self {
1938 input: Vec::new(),
1939 bit_pos: 0,
1940 }
1941 }
1942
1943 fn append(&mut self, input: &[u8]) {
1944 self.compact();
1945 self.input.extend_from_slice(input);
1946 }
1947
1948 fn compact(&mut self) {
1949 let bytes = self.bit_pos / 8;
1950 if bytes == 0 {
1951 return;
1952 }
1953 self.input.drain(..bytes);
1954 self.bit_pos -= bytes * 8;
1955 }
1956
1957 fn read_bit(&mut self) -> Result<u8> {
1958 self.read_bits(1).map(|value| value as u8)
1959 }
1960
1961 fn read_bits(&mut self, count: u8) -> Result<u32> {
1962 let value = self.peek_bits(count)?;
1963 self.bit_pos += count as usize;
1964 Ok(value)
1965 }
1966
1967 fn peek_bits(&self, count: u8) -> Result<u32> {
1968 if count > 24 {
1969 return Err(Error::InvalidData("RAR 2.0 bit read is too wide"));
1970 }
1971 let mut value = 0u32;
1972 for i in 0..count as usize {
1973 let bit_index = self.bit_pos + i;
1974 let byte = *self.input.get(bit_index / 8).ok_or(Error::NeedMoreInput)?;
1975 let bit = (byte >> (7 - (bit_index % 8))) & 1;
1976 value = (value << 1) | bit as u32;
1977 }
1978 Ok(value)
1979 }
1980
1981 fn remaining_bytes_from_current(&self) -> usize {
1982 self.input.len().saturating_sub(self.bit_pos / 8)
1983 }
1984}
1985
1986#[derive(Default)]
1987struct BitWriter {
1988 bytes: Vec<u8>,
1989 bit_pos: usize,
1990}
1991
1992impl BitWriter {
1993 fn write_bits(&mut self, value: u32, count: u8) {
1994 for shift in (0..count).rev() {
1995 self.write_bit(((value >> shift) & 1) != 0);
1996 }
1997 }
1998
1999 fn write_bit(&mut self, bit: bool) {
2000 if self.bit_pos.is_multiple_of(8) {
2001 self.bytes.push(0);
2002 }
2003 if bit {
2004 let shift = 7 - (self.bit_pos % 8);
2005 *self.bytes.last_mut().unwrap() |= 1 << shift;
2006 }
2007 self.bit_pos += 1;
2008 }
2009
2010 fn finish(self) -> Vec<u8> {
2011 self.bytes
2012 }
2013}
2014
2015#[cfg(test)]
2016mod tests {
2017 use super::{
2018 encode_tokens, unpack20_decode, unpack20_encode_literals, BitWriter, EncodeOptions,
2019 EncodeToken, Error, Huffman, Unpack20, Unpack20Encoder,
2020 };
2021
2022 const AUTOREJ_PACKED: &[u8] = &[
2023 0x09, 0x14, 0x0c, 0x94, 0x00, 0x00, 0x00, 0x00, 0x00, 0xce, 0xf8, 0x1f, 0xc1, 0xe6, 0x05,
2024 0xfc, 0x39, 0xc3, 0x50, 0x65, 0x08, 0x41, 0x94, 0xc4, 0x1d, 0xf3, 0xcd, 0x0d, 0x8e, 0x20,
2025 0xf5, 0x9d, 0x8e, 0x76, 0x1d, 0xc5, 0x19, 0xde, 0x16, 0x5b, 0x52, 0xb8, 0x8e, 0x75, 0xcd,
2026 0xaf, 0x1f, 0xfc, 0x9e, 0xf7, 0x00, 0x01, 0xbe, 0x90,
2027 ];
2028
2029 #[test]
2030 fn decodes_rar20_lz_member() {
2031 assert_eq!(
2032 unpack20_decode(AUTOREJ_PACKED, expected_text().len()).unwrap(),
2033 expected_text()
2034 );
2035 }
2036
2037 #[test]
2038 fn rejects_oversubscribed_rar20_huffman_tables() {
2039 assert!(matches!(
2040 Huffman::from_lengths(&[1, 1, 1]),
2041 Err(Error::InvalidData("RAR 2.0 oversubscribed Huffman table"))
2042 ));
2043 }
2044
2045 #[test]
2046 fn decode_member_from_reader_accepts_incremental_input() {
2047 struct TinyReader<'a> {
2048 input: &'a [u8],
2049 }
2050
2051 impl std::io::Read for TinyReader<'_> {
2052 fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
2053 if self.input.is_empty() {
2054 return Ok(0);
2055 }
2056 let len = self.input.len().min(out.len()).min(3);
2057 out[..len].copy_from_slice(&self.input[..len]);
2058 self.input = &self.input[len..];
2059 Ok(len)
2060 }
2061 }
2062
2063 let mut decoder = Unpack20::new();
2064 let mut reader = TinyReader {
2065 input: AUTOREJ_PACKED,
2066 };
2067 let mut output = Vec::new();
2068 decoder
2069 .decode_member_from_reader(&mut reader, expected_text().len(), &mut output)
2070 .unwrap();
2071
2072 assert_eq!(output, expected_text());
2073 }
2074
2075 #[test]
2076 fn decodes_synthetic_audio_block() {
2077 let packed = synthetic_audio_block(8);
2078 let mut decoder = Unpack20::new();
2079
2080 assert_eq!(decoder.decode_member(&packed, 8).unwrap(), vec![0; 8]);
2081 }
2082
2083 #[test]
2084 fn audio_encoder_round_trips_interleaved_pcm_like_payload() {
2085 let input = interleaved_pcm_like_payload();
2086 let packed = super::encode_audio_member(&input, 4).unwrap();
2087 let decoded = unpack20_decode(&packed, input.len()).unwrap();
2088
2089 assert_eq!(decoded, input);
2090 }
2091
2092 #[test]
2093 fn auto_encoder_uses_audio_when_it_beats_lz() {
2094 let input = interleaved_pcm_like_payload();
2095 let lz = unpack20_encode_literals(&input).unwrap();
2096 let auto = super::unpack20_encode_auto(&input).unwrap();
2097 let decoded = unpack20_decode(&auto, input.len()).unwrap();
2098
2099 assert!(auto.len() < lz.len());
2100 assert_eq!(decoded, input);
2101 }
2102
2103 #[test]
2104 fn default_encode_options_match_legacy_entry_points() {
2105 let input = b"rar20 option plumbing preserves default output ".repeat(128);
2106 assert_eq!(
2107 unpack20_encode_literals(&input).unwrap(),
2108 super::unpack20_encode_literals_with_options(&input, EncodeOptions::default()).unwrap()
2109 );
2110 assert_eq!(
2111 super::unpack20_encode_auto(&input).unwrap(),
2112 super::unpack20_encode_auto_with_options(&input, EncodeOptions::default()).unwrap()
2113 );
2114
2115 let first = b"solid rar20 option seed ".repeat(64);
2116 let second = b"solid rar20 option seed with suffix ".repeat(32);
2117 let mut legacy = Unpack20Encoder::new();
2118 let mut explicit = Unpack20Encoder::with_options(EncodeOptions::default());
2119 assert_eq!(
2120 legacy.encode_member(&first).unwrap(),
2121 explicit.encode_member(&first).unwrap()
2122 );
2123 assert_eq!(
2124 legacy.encode_member(&second).unwrap(),
2125 explicit.encode_member(&second).unwrap()
2126 );
2127 }
2128
2129 #[test]
2130 fn encode_options_can_disable_fresh_lz_matches() {
2131 let input = b"abcdefabcdefabcdefabcdef";
2132 let default_tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2133 let literalish_tokens = encode_tokens(input, &[], EncodeOptions::new(0), None);
2134
2135 assert!(default_tokens
2136 .iter()
2137 .any(|token| matches!(token, EncodeToken::Match { .. })));
2138 assert!(!literalish_tokens
2139 .iter()
2140 .any(|token| matches!(token, EncodeToken::Match { .. })));
2141 }
2142
2143 #[test]
2144 fn table_level_encoder_uses_rar20_run_symbols() {
2145 let lengths = [0, 0, 0, 0, 5, 5, 5, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2];
2146 let tokens = super::encode_level_tokens(&lengths);
2147
2148 assert_eq!(
2149 tokens,
2150 vec![
2151 super::LevelToken::zero_run_short(4),
2152 super::LevelToken::plain(5),
2153 super::LevelToken::repeat_previous(3),
2154 super::LevelToken::plain(7),
2155 super::LevelToken::zero_run_short(10),
2156 super::LevelToken::plain(2),
2157 ]
2158 );
2159 }
2160
2161 #[test]
2162 fn decodes_back_to_back_fresh_audio_blocks() {
2163 let mut bits = BitWriter::default();
2170 write_fresh_audio_block(&mut bits, 4, true);
2171 write_fresh_audio_block(&mut bits, 4, false);
2172 let packed = bits.finish();
2173
2174 let mut decoder = Unpack20::new();
2175
2176 assert_eq!(decoder.decode_member(&packed, 8).unwrap(), vec![0; 8]);
2177 }
2178
2179 fn expected_text() -> Vec<u8> {
2180 b"Hello text not audio.\r\n".repeat(100)
2181 }
2182
2183 fn interleaved_pcm_like_payload() -> Vec<u8> {
2184 let mut input = Vec::new();
2185 for sample in 0..8192i16 {
2186 let left = sample.wrapping_mul(3).wrapping_add(200);
2187 let right = sample.wrapping_mul(3).wrapping_sub(200);
2188 input.extend_from_slice(&left.to_le_bytes());
2189 input.extend_from_slice(&right.to_le_bytes());
2190 }
2191 input
2192 }
2193
2194 fn synthetic_audio_block(samples: usize) -> Vec<u8> {
2195 let mut bits = BitWriter::default();
2196
2197 bits.write_bits(0b10, 2); bits.write_bits(0, 2); for symbol in 0..19 {
2201 let len = if symbol == 1 || symbol == 18 { 1 } else { 0 };
2202 bits.write_bits(len, 4);
2203 }
2204
2205 bits.write_bit(false); bits.write_bit(true); bits.write_bits(127, 7);
2208 bits.write_bit(true); bits.write_bits(107, 7);
2210
2211 for _ in 0..samples {
2212 bits.write_bit(false); }
2214
2215 bits.finish()
2216 }
2217
2218 fn write_fresh_audio_block(bits: &mut BitWriter, samples: usize, emit_end_sentinel: bool) {
2219 bits.write_bits(0b10, 2); bits.write_bits(0, 2); for symbol in 0..19 {
2223 let len = if symbol == 1 || symbol == 18 { 1 } else { 0 };
2224 bits.write_bits(len, 4);
2225 }
2226
2227 bits.write_bit(false); bits.write_bit(true); bits.write_bits(127, 7);
2231 bits.write_bit(true); bits.write_bits(106, 7);
2233 bits.write_bit(false); for _ in 0..samples {
2236 bits.write_bit(false); }
2238 if emit_end_sentinel {
2239 bits.write_bit(true); }
2241 }
2242
2243 #[test]
2244 fn literal_encoder_round_trips_rar20_lz_blocks() {
2245 let input = b"literal-only RAR 2.0 baseline\nwith repeated text literal-only\n";
2246 let packed = unpack20_encode_literals(input).unwrap();
2247
2248 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2249 }
2250
2251 #[test]
2252 fn encoder_emits_rar20_offset_one_matches_for_repeated_bytes() {
2253 let input = b"A".repeat(1024);
2254 let packed = unpack20_encode_literals(&input).unwrap();
2255
2256 assert!(packed.len() < input.len() / 4);
2257 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2258 }
2259
2260 #[test]
2261 fn encoder_emits_rar20_dictionary_matches_for_repeated_sequences() {
2262 let input = b"abc123xyz-".repeat(128);
2263 let packed = unpack20_encode_literals(&input).unwrap();
2264
2265 assert!(packed.len() < input.len() / 2);
2266 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2267 }
2268
2269 #[test]
2270 fn encoder_emits_rar20_repeat_last_matches_for_regular_streams() {
2271 let input = b"\x00\x01\x02\x03".repeat(4096);
2272 let tokens = encode_tokens(&input, &[], EncodeOptions::default(), None);
2273 let packed = unpack20_encode_literals(&input).unwrap();
2274
2275 assert!(tokens
2276 .iter()
2277 .any(|token| matches!(token, EncodeToken::RepeatLast)));
2278 assert!(packed.len() < input.len() / 8);
2279 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2280 }
2281
2282 #[test]
2283 fn encoder_emits_rar20_minimum_length_fresh_matches() {
2284 let input = b"abcabc";
2285 let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2286 let packed = unpack20_encode_literals(input).unwrap();
2287
2288 assert!(matches!(
2289 tokens.as_slice(),
2290 [
2291 EncodeToken::Literal(b'a'),
2292 EncodeToken::Literal(b'b'),
2293 EncodeToken::Literal(b'c'),
2294 EncodeToken::Match {
2295 length: 3,
2296 offset: 3
2297 }
2298 ]
2299 ));
2300 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2301 }
2302
2303 #[test]
2304 fn encoder_emits_rar20_short_offset_matches() {
2305 let input = b"abab";
2306 let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2307 let packed = unpack20_encode_literals(input).unwrap();
2308
2309 assert!(matches!(
2310 tokens.as_slice(),
2311 [
2312 EncodeToken::Literal(b'a'),
2313 EncodeToken::Literal(b'b'),
2314 EncodeToken::ShortOffset { offset: 2 }
2315 ]
2316 ));
2317 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2318 }
2319
2320 #[test]
2321 fn encoder_emits_rar20_old_offset_matches() {
2322 let input = b"abcdabcdXYZXYZwxyzwxyz";
2323 let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2324 let packed = unpack20_encode_literals(input).unwrap();
2325
2326 assert!(tokens
2327 .iter()
2328 .any(|token| matches!(token, EncodeToken::OldOffset { .. })));
2329 assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2330 }
2331
2332 #[test]
2333 fn encoder_finds_rar20_matches_beyond_near_offsets() {
2334 let phrase = b"long-distance repeated phrase for rar20 match finder.";
2335 let mut input = Vec::new();
2336 input.extend_from_slice(phrase);
2337 input.extend(std::iter::repeat_n(0, 300 * 1024));
2338 input.extend_from_slice(phrase);
2339 input.extend_from_slice(phrase);
2340 let tokens = encode_tokens(&input, &[], EncodeOptions::default(), None);
2341 let packed = unpack20_encode_literals(&input).unwrap();
2342
2343 assert!(tokens.iter().any(|token| matches!(
2344 token,
2345 EncodeToken::Match { offset, .. } if *offset > 0x40000
2346 )));
2347 assert!(packed.len() < input.len());
2348 let decoded = unpack20_decode(&packed, input.len()).unwrap();
2349 assert!(
2350 decoded == input,
2351 "RAR 2.0 long-distance match round-trip failed"
2352 );
2353 }
2354
2355 #[test]
2356 fn solid_encoder_emits_rar20_matches_against_previous_member_history() {
2357 let first = b"solid rar20 shared phrase alpha beta gamma ".repeat(4);
2358 let second = b"solid rar20 shared phrase alpha beta gamma ".repeat(2);
2359 let independent = unpack20_encode_literals(&second).unwrap();
2360 let mut encoder = Unpack20Encoder::new();
2361 let first_packed = encoder.encode_member(&first).unwrap();
2362 let second_packed = encoder.encode_member(&second).unwrap();
2363
2364 assert!(second_packed.len() < independent.len());
2365 let mut decoder = Unpack20::new();
2366 assert_eq!(
2367 decoder.decode_member(&first_packed, first.len()).unwrap(),
2368 first
2369 );
2370 assert_eq!(
2371 decoder.decode_member(&second_packed, second.len()).unwrap(),
2372 second
2373 );
2374 }
2375
2376 #[test]
2377 fn solid_encoder_reuses_rar20_tables_at_member_boundary() {
2378 let first: Vec<_> = (0u8..=255).cycle().take(4096).collect();
2379 let second = b"short literal member after reused rar20 table boundary\n";
2380 let independent = unpack20_encode_literals(second).unwrap();
2381 let mut encoder = Unpack20Encoder::new();
2382 let first_packed = encoder.encode_member(&first).unwrap();
2383 let second_packed = encoder.encode_member(second).unwrap();
2384
2385 assert!(second_packed.len() < independent.len());
2386 let mut decoder = Unpack20::new();
2387 assert_eq!(
2388 decoder.decode_member(&first_packed, first.len()).unwrap(),
2389 first
2390 );
2391 assert_eq!(
2392 decoder.decode_member(&second_packed, second.len()).unwrap(),
2393 second
2394 );
2395 }
2396
2397 #[test]
2398 fn solid_encoder_matches_immediately_after_rar20_table_boundary() {
2399 let phrase = b"rar20 table boundary match phrase with enough bytes ";
2400 let first = phrase.repeat(128);
2401 let second = phrase.repeat(8);
2402 let independent = unpack20_encode_literals(&second).unwrap();
2403 let mut encoder = Unpack20Encoder::new();
2404 let first_packed = encoder.encode_member(&first).unwrap();
2405 let second_packed = encoder.encode_member(&second).unwrap();
2406 let tokens = encode_tokens(&second, &first, EncodeOptions::default(), None);
2407
2408 assert!(matches!(tokens.first(), Some(EncodeToken::Match { .. })));
2409 assert!(second_packed.len() < independent.len());
2410 let mut decoder = Unpack20::new();
2411 assert_eq!(
2412 decoder.decode_member(&first_packed, first.len()).unwrap(),
2413 first
2414 );
2415 assert_eq!(
2416 decoder.decode_member(&second_packed, second.len()).unwrap(),
2417 second
2418 );
2419 }
2420
2421 #[test]
2422 fn solid_encoder_carries_rar20_history_across_multiple_members() {
2423 let first = b"rar20 multi member solid seed ".repeat(512);
2424 let second = b"rar20 multi member solid seed with middle tail ".repeat(128);
2425 let third = b"with middle tail ".repeat(64);
2426 let independent = unpack20_encode_literals(&third).unwrap();
2427 let mut encoder = Unpack20Encoder::new();
2428 let first_packed = encoder.encode_member(&first).unwrap();
2429 let second_packed = encoder.encode_member(&second).unwrap();
2430 let third_packed = encoder.encode_member(&third).unwrap();
2431
2432 assert!(third_packed.len() < independent.len());
2433 let mut decoder = Unpack20::new();
2434 assert_eq!(
2435 decoder.decode_member(&first_packed, first.len()).unwrap(),
2436 first
2437 );
2438 assert_eq!(
2439 decoder.decode_member(&second_packed, second.len()).unwrap(),
2440 second
2441 );
2442 assert_eq!(
2443 decoder.decode_member(&third_packed, third.len()).unwrap(),
2444 third
2445 );
2446 }
2447
2448 #[test]
2449 fn decode_member_to_streams_decoded_payload_through_writer_sink() {
2450 let input = b"abcabcabcabcabcabcabcabcabcabcabcabc";
2451 let packed = unpack20_encode_literals(input).unwrap();
2452
2453 let mut decoder = Unpack20::new();
2454 let mut sink = Vec::new();
2455 decoder
2456 .decode_member_to(&packed, input.len(), &mut sink)
2457 .unwrap();
2458 assert_eq!(sink, input);
2459
2460 struct FailingWriter;
2463 impl std::io::Write for FailingWriter {
2464 fn write(&mut self, _buf: &[u8]) -> std::io::Result<usize> {
2465 Err(std::io::Error::other("disk full"))
2466 }
2467 fn flush(&mut self) -> std::io::Result<()> {
2468 Ok(())
2469 }
2470 }
2471 let mut decoder = Unpack20::new();
2472 let err = decoder
2473 .decode_member_to(&packed, input.len(), &mut FailingWriter)
2474 .unwrap_err();
2475 assert_eq!(err, Error::InvalidData("RAR 2.0 output write failed"));
2476 }
2477}