Skip to main content

rars_codec/
rar20.rs

1use crate::{huffman, Error, Result};
2use std::io::{Read, Write};
3
4const MAIN_COUNT: usize = 298;
5const OFFSET_COUNT: usize = 48;
6const LENGTH_COUNT: usize = 28;
7const LEVEL_COUNT: usize = 19;
8const TABLE_COUNT: usize = MAIN_COUNT + OFFSET_COUNT + LENGTH_COUNT;
9const AUDIO_COUNT: usize = 257;
10const MAX_CHANNELS: usize = 4;
11const OLD_LEVEL_COUNT: usize = AUDIO_COUNT * MAX_CHANNELS;
12const MAX_HISTORY: usize = 1024 * 1024;
13const INPUT_CHUNK: usize = 64 * 1024;
14
15const LENGTH_BASES: [usize; LENGTH_COUNT] = [
16    0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
17    160, 192, 224,
18];
19const LENGTH_BITS: [u8; LENGTH_COUNT] = [
20    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
21];
22const OFFSET_BASES: [usize; OFFSET_COUNT] = [
23    0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
24    2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
25    262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
26];
27const OFFSET_BITS: [u8; OFFSET_COUNT] = [
28    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
29    13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
30];
31const SHORT_BASES: [usize; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
32const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
33const MAX_ENCODER_MATCH_OFFSET: usize = MAX_HISTORY;
34const MAX_ENCODER_MATCH_LENGTH: usize = 258;
35const MATCH_HASH_BUCKETS: usize = 4096;
36const MAX_MATCH_CANDIDATES: usize = 256;
37
38pub fn unpack20_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
39    let mut decoder = Unpack20::new();
40    decoder.decode_member(input, output_size)
41}
42
43pub fn unpack20_encode_literals(input: &[u8]) -> Result<Vec<u8>> {
44    unpack20_encode_literals_with_options(input, EncodeOptions::default())
45}
46
47pub fn unpack20_encode_literals_with_options(
48    input: &[u8],
49    options: EncodeOptions,
50) -> Result<Vec<u8>> {
51    encode_member(input, &[], None, options)
52}
53
54pub fn unpack20_encode_auto(input: &[u8]) -> Result<Vec<u8>> {
55    unpack20_encode_auto_with_options(input, EncodeOptions::default())
56}
57
58pub fn unpack20_encode_auto_with_options(input: &[u8], options: EncodeOptions) -> Result<Vec<u8>> {
59    let lz = unpack20_encode_literals_with_options(input, options)?;
60    let mut best = lz;
61    if options.try_audio {
62        for channels in 1..=MAX_CHANNELS {
63            if input.len() < channels * 64 {
64                continue;
65            }
66            let audio = encode_audio_member(input, channels)?;
67            if audio.len() < best.len() {
68                best = audio;
69            }
70        }
71    }
72    Ok(best)
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76#[non_exhaustive]
77pub struct EncodeOptions {
78    pub max_match_candidates: usize,
79    pub max_match_distance: usize,
80    pub lazy_matching: bool,
81    pub lazy_lookahead: usize,
82    pub try_audio: bool,
83}
84
85impl EncodeOptions {
86    pub const fn new(max_match_candidates: usize) -> Self {
87        Self {
88            max_match_candidates,
89            max_match_distance: MAX_ENCODER_MATCH_OFFSET,
90            lazy_matching: false,
91            lazy_lookahead: 1,
92            try_audio: true,
93        }
94    }
95
96    pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
97        self.max_match_distance = if distance > MAX_ENCODER_MATCH_OFFSET {
98            MAX_ENCODER_MATCH_OFFSET
99        } else {
100            distance
101        };
102        self
103    }
104
105    pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
106        self.lazy_matching = enabled;
107        self
108    }
109
110    pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
111        self.lazy_lookahead = bytes;
112        self
113    }
114
115    pub const fn with_try_audio(mut self, enabled: bool) -> Self {
116        self.try_audio = enabled;
117        self
118    }
119}
120
121impl Default for EncodeOptions {
122    fn default() -> Self {
123        Self::new(MAX_MATCH_CANDIDATES)
124    }
125}
126
127#[derive(Debug, Clone, Default)]
128pub struct Unpack20Encoder {
129    history: Vec<u8>,
130    table: Option<FixedEncodeTable>,
131    options: EncodeOptions,
132}
133
134impl Unpack20Encoder {
135    pub fn new() -> Self {
136        Self::default()
137    }
138
139    pub fn with_options(options: EncodeOptions) -> Self {
140        Self {
141            history: Vec::new(),
142            table: None,
143            options,
144        }
145    }
146
147    pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
148        if input.is_empty() {
149            return Ok(Vec::new());
150        }
151        let table = match self.table {
152            Some(table) => table,
153            None => {
154                let table = FixedEncodeTable::new()?;
155                self.table = Some(table);
156                table
157            }
158        };
159        let packed = encode_member(input, &self.history, Some(table), self.options)?;
160        self.remember(input);
161        Ok(packed)
162    }
163
164    fn remember(&mut self, input: &[u8]) {
165        self.history.extend_from_slice(input);
166        let keep_from = self
167            .history
168            .len()
169            .saturating_sub(self.options.max_match_distance);
170        if keep_from != 0 {
171            self.history.drain(..keep_from);
172        }
173    }
174}
175
176fn encode_member(
177    input: &[u8],
178    history: &[u8],
179    fixed_table: Option<FixedEncodeTable>,
180    options: EncodeOptions,
181) -> Result<Vec<u8>> {
182    if input.is_empty() {
183        return Ok(Vec::new());
184    }
185
186    let tokens = encode_tokens(input, history, options, None);
187    let table_lengths = table_lengths_for_tokens(&tokens, fixed_table)?;
188    let packed = encode_member_with_tables(&tokens, history, fixed_table, &table_lengths)?;
189    if fixed_table.is_some() {
190        return Ok(packed);
191    }
192
193    let cost_model = CostModel::new(&table_lengths);
194    let refined_tokens = encode_tokens(input, history, options, Some(&cost_model));
195    if refined_tokens == tokens {
196        return Ok(packed);
197    }
198    let refined_table_lengths = table_lengths_for_tokens(&refined_tokens, fixed_table)?;
199    let refined_packed = encode_member_with_tables(
200        &refined_tokens,
201        history,
202        fixed_table,
203        &refined_table_lengths,
204    )?;
205    if refined_packed.len() < packed.len() {
206        Ok(refined_packed)
207    } else {
208        Ok(packed)
209    }
210}
211
212fn table_lengths_for_tokens(
213    tokens: &[EncodeToken],
214    fixed_table: Option<FixedEncodeTable>,
215) -> Result<[u8; TABLE_COUNT]> {
216    let mut main_frequencies = [0usize; MAIN_COUNT];
217    let mut offset_frequencies = [0usize; OFFSET_COUNT];
218    let mut length_frequencies = [0usize; LENGTH_COUNT];
219    for token in tokens {
220        match *token {
221            EncodeToken::Literal(byte) => main_frequencies[byte as usize] += 1,
222            EncodeToken::RepeatLast => main_frequencies[256] += 1,
223            EncodeToken::OldOffset {
224                index,
225                length,
226                offset,
227            } => {
228                main_frequencies[257 + index] += 1;
229                let (slot, _) = old_length_slot_for_match(length, offset)?;
230                length_frequencies[slot] += 1;
231            }
232            EncodeToken::ShortOffset { offset } => {
233                let (slot, _) = short_slot_for_match(offset)?;
234                main_frequencies[261 + slot] += 1;
235            }
236            EncodeToken::Match { length, offset } => {
237                let encoded_length = length.checked_sub(match_length_adjustment(offset)).ok_or(
238                    Error::InvalidData("RAR 2.0 adjusted match length underflows"),
239                )?;
240                let (slot, _) = length_slot_for_match(encoded_length)?;
241                main_frequencies[270 + slot] += 1;
242                let (offset_slot, _) = offset_slot_for_match(offset)?;
243                offset_frequencies[offset_slot] += 1;
244            }
245        }
246    }
247    let mut table_lengths = [0u8; TABLE_COUNT];
248    let literal_len = if let Some(table) = fixed_table {
249        table.length
250    } else {
251        let main_symbol_count = main_frequencies
252            .iter()
253            .filter(|&&frequency| frequency != 0)
254            .count()
255            + offset_frequencies
256                .iter()
257                .filter(|&&frequency| frequency != 0)
258                .count()
259            + length_frequencies
260                .iter()
261                .filter(|&&frequency| frequency != 0)
262                .count();
263        literal_code_len(main_symbol_count)?
264    };
265
266    if fixed_table.is_some() {
267        for len in &mut table_lengths[..256] {
268            *len = literal_len;
269        }
270        table_lengths[256] = literal_len;
271        for len in &mut table_lengths[270..270 + LENGTH_COUNT] {
272            *len = literal_len;
273        }
274        for len in &mut table_lengths[257..269] {
275            *len = literal_len;
276        }
277        for len in &mut table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT] {
278            *len = literal_len;
279        }
280        for len in &mut table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT] {
281            *len = literal_len;
282        }
283    } else {
284        table_lengths[..MAIN_COUNT]
285            .copy_from_slice(&validated_lengths_for_frequencies(&main_frequencies, 15));
286        table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT]
287            .copy_from_slice(&validated_lengths_for_frequencies(&offset_frequencies, 15));
288        table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT]
289            .copy_from_slice(&validated_lengths_for_frequencies(&length_frequencies, 15));
290    }
291    Ok(table_lengths)
292}
293
294fn encode_member_with_tables(
295    tokens: &[EncodeToken],
296    history: &[u8],
297    fixed_table: Option<FixedEncodeTable>,
298    table_lengths: &[u8; TABLE_COUNT],
299) -> Result<Vec<u8>> {
300    let level_tokens = encode_table_level_tokens(table_lengths);
301    let level_lengths = level_code_lengths_for_tokens(&level_tokens);
302    let level_codes = canonical_codes(&level_lengths)?;
303    let main_codes = canonical_codes(&table_lengths[..MAIN_COUNT])?;
304
305    let mut bits = BitWriter::default();
306    if fixed_table.is_none() || history.is_empty() {
307        bits.write_bits(0, 2); // LZ block, do not keep previous tables.
308        for &len in &level_lengths {
309            bits.write_bits(len as u32, 4);
310        }
311        for token in level_tokens {
312            let code = level_codes[token.symbol].ok_or(Error::InvalidData(
313                "RAR 2.0 encoder missing level Huffman code",
314            ))?;
315            bits.write_bits(code.code as u32, code.len);
316            if token.extra_bits != 0 {
317                bits.write_bits(token.extra_value as u32, token.extra_bits);
318            }
319        }
320    }
321    let offset_codes = canonical_codes(&table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
322    let length_codes = canonical_codes(&table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT])?;
323    for token in tokens {
324        match *token {
325            EncodeToken::Literal(byte) => {
326                let code = main_codes[byte as usize].ok_or(Error::InvalidData(
327                    "RAR 2.0 encoder missing literal Huffman code",
328                ))?;
329                bits.write_bits(code.code as u32, code.len);
330            }
331            EncodeToken::RepeatLast => {
332                let code = main_codes[256].ok_or(Error::InvalidData(
333                    "RAR 2.0 encoder missing repeat-last Huffman code",
334                ))?;
335                bits.write_bits(code.code as u32, code.len);
336            }
337            EncodeToken::OldOffset {
338                index,
339                length,
340                offset,
341            } => {
342                let code = main_codes[257 + index].ok_or(Error::InvalidData(
343                    "RAR 2.0 encoder missing old-offset Huffman code",
344                ))?;
345                bits.write_bits(code.code as u32, code.len);
346                let (slot, extra) = old_length_slot_for_match(length, offset)?;
347                let length_code = length_codes[slot].ok_or(Error::InvalidData(
348                    "RAR 2.0 encoder missing old-offset length Huffman code",
349                ))?;
350                bits.write_bits(length_code.code as u32, length_code.len);
351                if LENGTH_BITS[slot] != 0 {
352                    bits.write_bits(extra as u32, LENGTH_BITS[slot]);
353                }
354            }
355            EncodeToken::ShortOffset { offset } => {
356                let (slot, extra) = short_slot_for_match(offset)?;
357                let code = main_codes[261 + slot].ok_or(Error::InvalidData(
358                    "RAR 2.0 encoder missing short-offset Huffman code",
359                ))?;
360                bits.write_bits(code.code as u32, code.len);
361                if SHORT_BITS[slot] != 0 {
362                    bits.write_bits(extra as u32, SHORT_BITS[slot]);
363                }
364            }
365            EncodeToken::Match { length, offset } => {
366                let encoded_length = length.checked_sub(match_length_adjustment(offset)).ok_or(
367                    Error::InvalidData("RAR 2.0 adjusted match length underflows"),
368                )?;
369                let (slot, extra) = length_slot_for_match(encoded_length)?;
370                let code = main_codes[270 + slot].ok_or(Error::InvalidData(
371                    "RAR 2.0 encoder missing match Huffman code",
372                ))?;
373                bits.write_bits(code.code as u32, code.len);
374                if LENGTH_BITS[slot] != 0 {
375                    bits.write_bits(extra as u32, LENGTH_BITS[slot]);
376                }
377                let (offset_slot, offset_extra) = offset_slot_for_match(offset)?;
378                let offset = offset_codes[offset_slot].ok_or(Error::InvalidData(
379                    "RAR 2.0 encoder missing offset Huffman code",
380                ))?;
381                bits.write_bits(offset.code as u32, offset.len);
382                if OFFSET_BITS[offset_slot] != 0 {
383                    bits.write_bits(offset_extra as u32, OFFSET_BITS[offset_slot]);
384                }
385            }
386        }
387    }
388    Ok(bits.finish())
389}
390
391#[derive(Debug, Clone, Copy)]
392struct FixedEncodeTable {
393    length: u8,
394}
395
396impl FixedEncodeTable {
397    fn new() -> Result<Self> {
398        Ok(Self {
399            length: literal_code_len(256 + LENGTH_COUNT + OFFSET_COUNT)?,
400        })
401    }
402}
403
404#[derive(Debug, Clone, Copy, PartialEq, Eq)]
405enum EncodeToken {
406    Literal(u8),
407    RepeatLast,
408    OldOffset {
409        index: usize,
410        length: usize,
411        offset: usize,
412    },
413    ShortOffset {
414        offset: usize,
415    },
416    Match {
417        length: usize,
418        offset: usize,
419    },
420}
421
422fn encode_tokens(
423    input: &[u8],
424    history: &[u8],
425    options: EncodeOptions,
426    cost_model: Option<&CostModel>,
427) -> Vec<EncodeToken> {
428    let mut tokens = Vec::new();
429    let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
430    let history = &history[history.len().saturating_sub(options.max_match_distance)..];
431    let mut combined = Vec::with_capacity(history.len() + input.len());
432    combined.extend_from_slice(history);
433    combined.extend_from_slice(input);
434    for history_pos in 0..history.len().saturating_sub(2) {
435        insert_match_position(&combined, history_pos, &mut buckets);
436    }
437
438    let mut pos = history.len();
439    let end = combined.len();
440    let mut last_match = None;
441    let mut old_offsets = [0usize; 4];
442    while pos < end {
443        let selected = select_match(
444            &combined,
445            pos,
446            end,
447            &buckets,
448            options,
449            &old_offsets,
450            cost_model,
451        );
452        if let Some(selected) = selected {
453            let lazy = LazyMatchContext {
454                input: &combined,
455                end,
456                buckets: &buckets,
457                options,
458                old_offsets: &old_offsets,
459                cost_model,
460            };
461            if should_lazy_emit_literal(pos, selected, lazy) {
462                tokens.push(EncodeToken::Literal(combined[pos]));
463                insert_match_position(&combined, pos, &mut buckets);
464                pos += 1;
465                continue;
466            }
467            let (length, offset) = match selected {
468                SelectedMatch::Fresh { length, offset } => {
469                    if last_match == Some((length, offset)) {
470                        tokens.push(EncodeToken::RepeatLast);
471                    } else {
472                        tokens.push(EncodeToken::Match { length, offset });
473                        last_match = Some((length, offset));
474                    }
475                    (length, offset)
476                }
477                SelectedMatch::OldOffset {
478                    index,
479                    length,
480                    offset,
481                } => {
482                    if last_match == Some((length, offset)) {
483                        tokens.push(EncodeToken::RepeatLast);
484                    } else {
485                        tokens.push(EncodeToken::OldOffset {
486                            index,
487                            length,
488                            offset,
489                        });
490                        last_match = Some((length, offset));
491                    }
492                    (length, offset)
493                }
494                SelectedMatch::ShortOffset { offset } => {
495                    let length = 2;
496                    tokens.push(EncodeToken::ShortOffset { offset });
497                    last_match = Some((length, offset));
498                    (length, offset)
499                }
500            };
501            push_old_offset(&mut old_offsets, offset);
502            for history_pos in pos..pos + length {
503                insert_match_position(&combined, history_pos, &mut buckets);
504            }
505            pos += length;
506        } else {
507            tokens.push(EncodeToken::Literal(combined[pos]));
508            insert_match_position(&combined, pos, &mut buckets);
509            pos += 1;
510        }
511    }
512    tokens
513}
514
515#[derive(Debug, Clone, Copy)]
516struct CostModel<'a> {
517    main: &'a [u8],
518    offsets: &'a [u8],
519    lengths: &'a [u8],
520}
521
522impl<'a> CostModel<'a> {
523    fn new(table_lengths: &'a [u8; TABLE_COUNT]) -> Self {
524        Self {
525            main: &table_lengths[..MAIN_COUNT],
526            offsets: &table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT],
527            lengths: &table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT],
528        }
529    }
530
531    fn selected_cost(self, selected: SelectedMatch) -> Option<usize> {
532        match selected {
533            SelectedMatch::Fresh { length, offset } => {
534                let encoded_length = length.checked_sub(match_length_adjustment(offset))?;
535                let (length_slot, _) = length_slot_for_match(encoded_length).ok()?;
536                let (offset_slot, _) = offset_slot_for_match(offset).ok()?;
537                Some(
538                    usize::from(self.main[270 + length_slot])
539                        + usize::from(LENGTH_BITS[length_slot])
540                        + usize::from(self.offsets[offset_slot])
541                        + usize::from(OFFSET_BITS[offset_slot]),
542                )
543            }
544            SelectedMatch::OldOffset {
545                index,
546                length,
547                offset,
548            } => {
549                let (length_slot, _) = old_length_slot_for_match(length, offset).ok()?;
550                Some(
551                    usize::from(self.main[257 + index])
552                        + usize::from(self.lengths[length_slot])
553                        + usize::from(LENGTH_BITS[length_slot]),
554                )
555            }
556            SelectedMatch::ShortOffset { offset } => {
557                let (slot, _) = short_slot_for_match(offset).ok()?;
558                Some(usize::from(self.main[261 + slot]) + usize::from(SHORT_BITS[slot]))
559            }
560        }
561    }
562
563    fn selected_score(self, selected: SelectedMatch) -> Option<isize> {
564        let cost = self.selected_cost(selected)?;
565        Some(selected.length() as isize * 8 - cost as isize)
566    }
567}
568
569#[derive(Debug, Clone, Copy)]
570enum SelectedMatch {
571    Fresh {
572        length: usize,
573        offset: usize,
574    },
575    OldOffset {
576        index: usize,
577        length: usize,
578        offset: usize,
579    },
580    ShortOffset {
581        offset: usize,
582    },
583}
584
585impl SelectedMatch {
586    fn length(self) -> usize {
587        match self {
588            SelectedMatch::Fresh { length, .. } | SelectedMatch::OldOffset { length, .. } => length,
589            SelectedMatch::ShortOffset { .. } => 2,
590        }
591    }
592
593    fn score(self) -> isize {
594        let length_score = self.length() as isize * 8;
595        let cost = match self {
596            SelectedMatch::OldOffset { .. } | SelectedMatch::ShortOffset { .. } => 4,
597            SelectedMatch::Fresh { offset, .. } => 8 + OFFSET_BITS[offset_slot_index(offset)],
598        };
599        length_score - isize::from(cost)
600    }
601}
602
603fn select_match(
604    input: &[u8],
605    pos: usize,
606    end: usize,
607    buckets: &[Vec<usize>],
608    options: EncodeOptions,
609    old_offsets: &[usize; 4],
610    cost_model: Option<&CostModel<'_>>,
611) -> Option<SelectedMatch> {
612    let fresh = best_match(input, pos, end, buckets, options, cost_model)
613        .map(|(length, offset)| SelectedMatch::Fresh { length, offset });
614    let old = best_old_offset_match(input, pos, end, old_offsets, cost_model).map(
615        |(index, length, offset)| SelectedMatch::OldOffset {
616            index,
617            length,
618            offset,
619        },
620    );
621    if let Some(cost_model) = cost_model {
622        return [fresh, old, best_short_offset_match(input, pos, end)]
623            .into_iter()
624            .flatten()
625            .max_by_key(|&selected| {
626                (
627                    cost_model.selected_score(selected).unwrap_or(isize::MIN),
628                    selected.length(),
629                )
630            });
631    }
632
633    let fresh = fresh.and_then(|selected| match selected {
634        SelectedMatch::Fresh { length, offset } => Some((length, offset)),
635        _ => None,
636    });
637    let old = old.and_then(|selected| match selected {
638        SelectedMatch::OldOffset {
639            index,
640            length,
641            offset,
642        } => Some((index, length, offset)),
643        _ => None,
644    });
645    match (fresh, old) {
646        (Some((fresh_length, _)), Some((index, old_length, old_offset)))
647            if old_length + 1 >= fresh_length =>
648        {
649            Some(SelectedMatch::OldOffset {
650                index,
651                length: old_length,
652                offset: old_offset,
653            })
654        }
655        (Some((length, offset)), _) => Some(SelectedMatch::Fresh { length, offset }),
656        (None, Some((index, length, offset))) => Some(SelectedMatch::OldOffset {
657            index,
658            length,
659            offset,
660        }),
661        (None, None) => best_short_offset_match(input, pos, end),
662    }
663}
664
665struct LazyMatchContext<'a> {
666    input: &'a [u8],
667    end: usize,
668    buckets: &'a [Vec<usize>],
669    options: EncodeOptions,
670    old_offsets: &'a [usize; 4],
671    cost_model: Option<&'a CostModel<'a>>,
672}
673
674fn should_lazy_emit_literal(
675    pos: usize,
676    current: SelectedMatch,
677    context: LazyMatchContext<'_>,
678) -> bool {
679    if !context.options.lazy_matching || pos + 1 >= context.end {
680        return false;
681    }
682    let lookahead = context.options.lazy_lookahead.max(1);
683    (1..=lookahead)
684        .take_while(|offset| pos + offset < context.end)
685        .any(|offset| {
686            select_match(
687                context.input,
688                pos + offset,
689                context.end,
690                context.buckets,
691                context.options,
692                context.old_offsets,
693                context.cost_model,
694            )
695            .is_some_and(|next| {
696                let current_score = context
697                    .cost_model
698                    .and_then(|cost_model| cost_model.selected_score(current))
699                    .unwrap_or_else(|| current.score());
700                let next_score = context
701                    .cost_model
702                    .and_then(|cost_model| cost_model.selected_score(next))
703                    .unwrap_or_else(|| next.score());
704                let skipped_literal_score = offset as isize * 8;
705                next_score > current_score + skipped_literal_score
706            })
707        })
708}
709
710fn best_match(
711    input: &[u8],
712    pos: usize,
713    end: usize,
714    buckets: &[Vec<usize>],
715    options: EncodeOptions,
716    cost_model: Option<&CostModel<'_>>,
717) -> Option<(usize, usize)> {
718    let max_offset = pos.min(options.max_match_distance);
719    let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
720    if options.max_match_candidates == 0
721        || max_offset == 0
722        || max_length < 3
723        || pos + 2 >= input.len()
724    {
725        return None;
726    }
727    let bucket = &buckets[match_hash(input, pos)];
728    let mut best = None;
729    let mut checked = 0usize;
730    for &candidate in bucket.iter().rev() {
731        if candidate >= pos {
732            continue;
733        }
734        let offset = pos - candidate;
735        if offset > max_offset {
736            break;
737        }
738        checked += 1;
739        let mut length = 0usize;
740        while length < max_length && input[pos + length] == input[pos + length - offset] {
741            length += 1;
742        }
743        let encodable = length >= 3 + match_length_adjustment(offset);
744        if encodable && is_better_fresh_match(cost_model, length, offset, best) {
745            best = Some((length, offset));
746            if length == max_length {
747                break;
748            }
749        }
750        if checked >= options.max_match_candidates {
751            break;
752        }
753    }
754    best
755}
756
757fn offset_slot_index(offset: usize) -> usize {
758    offset_slot_for_match(offset)
759        .map(|(slot, _)| slot)
760        .unwrap_or(OFFSET_BITS.len() - 1)
761}
762
763fn is_better_fresh_match(
764    cost_model: Option<&CostModel<'_>>,
765    length: usize,
766    offset: usize,
767    best: Option<(usize, usize)>,
768) -> bool {
769    let Some((best_length, best_offset)) = best else {
770        return true;
771    };
772    if let Some(cost_model) = cost_model {
773        let candidate = SelectedMatch::Fresh { length, offset };
774        let best = SelectedMatch::Fresh {
775            length: best_length,
776            offset: best_offset,
777        };
778        let candidate_score = cost_model.selected_score(candidate).unwrap_or(isize::MIN);
779        let best_score = cost_model.selected_score(best).unwrap_or(isize::MIN);
780        return candidate_score > best_score
781            || (candidate_score == best_score
782                && (length > best_length || (length == best_length && offset < best_offset)));
783    }
784    length > best_length || (length == best_length && offset < best_offset)
785}
786
787fn best_old_offset_match(
788    input: &[u8],
789    pos: usize,
790    end: usize,
791    old_offsets: &[usize; 4],
792    cost_model: Option<&CostModel<'_>>,
793) -> Option<(usize, usize, usize)> {
794    let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
795    let mut best = None;
796    for (index, &offset) in old_offsets.iter().enumerate() {
797        if offset == 0 || offset > pos {
798            continue;
799        }
800        let length = match_length_at_offset(input, pos, max_length, offset);
801        if old_length_slot_for_match(length, offset).is_ok()
802            && is_better_old_offset_match(cost_model, index, length, offset, best)
803        {
804            best = Some((index, length, offset));
805        }
806    }
807    best
808}
809
810fn is_better_old_offset_match(
811    cost_model: Option<&CostModel<'_>>,
812    index: usize,
813    length: usize,
814    offset: usize,
815    best: Option<(usize, usize, usize)>,
816) -> bool {
817    let Some((best_index, best_length, best_offset)) = best else {
818        return true;
819    };
820    if let Some(cost_model) = cost_model {
821        let candidate = SelectedMatch::OldOffset {
822            index,
823            length,
824            offset,
825        };
826        let best = SelectedMatch::OldOffset {
827            index: best_index,
828            length: best_length,
829            offset: best_offset,
830        };
831        let candidate_score = cost_model.selected_score(candidate).unwrap_or(isize::MIN);
832        let best_score = cost_model.selected_score(best).unwrap_or(isize::MIN);
833        return candidate_score > best_score
834            || (candidate_score == best_score
835                && (length > best_length || (length == best_length && offset < best_offset)));
836    }
837    length > best_length || (length == best_length && offset < best_offset)
838}
839
840fn best_short_offset_match(input: &[u8], pos: usize, end: usize) -> Option<SelectedMatch> {
841    if end - pos < 2 {
842        return None;
843    }
844    let max_offset = pos.min(256);
845    (1..=max_offset)
846        .find(|&offset| {
847            input[pos] == input[pos - offset] && input[pos + 1] == input[pos + 1 - offset]
848        })
849        .map(|offset| SelectedMatch::ShortOffset { offset })
850}
851
852fn match_length_at_offset(input: &[u8], pos: usize, max_length: usize, offset: usize) -> usize {
853    let mut length = 0usize;
854    while length < max_length && input[pos + length] == input[pos + length - offset] {
855        length += 1;
856    }
857    length
858}
859
860fn match_length_adjustment(offset: usize) -> usize {
861    usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
862}
863
864fn old_length_adjustment(offset: usize) -> usize {
865    usize::from(offset >= 0x101) + usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
866}
867
868fn push_old_offset(old_offsets: &mut [usize; 4], offset: usize) {
869    old_offsets[3] = old_offsets[2];
870    old_offsets[2] = old_offsets[1];
871    old_offsets[1] = old_offsets[0];
872    old_offsets[0] = offset;
873}
874
875fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
876    if pos + 2 < input.len() {
877        buckets[match_hash(input, pos)].push(pos);
878    }
879}
880
881fn match_hash(input: &[u8], pos: usize) -> usize {
882    let value =
883        ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
884    value & (MATCH_HASH_BUCKETS - 1)
885}
886
887fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
888    if length < 3 {
889        return Err(Error::InvalidData("RAR 2.0 match length is too short"));
890    }
891    let adjusted = length - 3;
892    for (slot, &base) in LENGTH_BASES.iter().enumerate() {
893        let extra_bits = LENGTH_BITS[slot];
894        let max = base
895            + if extra_bits == 0 {
896                0
897            } else {
898                (1usize << extra_bits) - 1
899            };
900        if adjusted >= base && adjusted <= max {
901            return Ok((slot, adjusted - base));
902        }
903    }
904    Err(Error::InvalidData("RAR 2.0 match length is too long"))
905}
906
907fn old_length_slot_for_match(length: usize, offset: usize) -> Result<(usize, usize)> {
908    let encoded = length
909        .checked_sub(old_length_adjustment(offset))
910        .ok_or(Error::InvalidData(
911            "RAR 2.0 adjusted old-offset length underflows",
912        ))?;
913    if encoded < 2 {
914        return Err(Error::InvalidData(
915            "RAR 2.0 old-offset match length is too short",
916        ));
917    }
918    let adjusted = encoded - 2;
919    for (slot, &base) in LENGTH_BASES.iter().enumerate() {
920        let extra_bits = LENGTH_BITS[slot];
921        let max = base
922            + if extra_bits == 0 {
923                0
924            } else {
925                (1usize << extra_bits) - 1
926            };
927        if adjusted >= base && adjusted <= max {
928            return Ok((slot, adjusted - base));
929        }
930    }
931    Err(Error::InvalidData(
932        "RAR 2.0 old-offset match length is too long",
933    ))
934}
935
936fn offset_slot_for_match(offset: usize) -> Result<(usize, usize)> {
937    if offset == 0 {
938        return Err(Error::InvalidData("RAR 2.0 match offset is zero"));
939    }
940    let adjusted = offset - 1;
941    for (slot, &base) in OFFSET_BASES.iter().enumerate() {
942        let extra_bits = OFFSET_BITS[slot];
943        let max = base
944            + if extra_bits == 0 {
945                0
946            } else {
947                (1usize << extra_bits) - 1
948            };
949        if adjusted >= base && adjusted <= max {
950            return Ok((slot, adjusted - base));
951        }
952    }
953    Err(Error::InvalidData("RAR 2.0 match offset is too large"))
954}
955
956fn short_slot_for_match(offset: usize) -> Result<(usize, usize)> {
957    if offset == 0 || offset > 256 {
958        return Err(Error::InvalidData(
959            "RAR 2.0 short match offset is out of range",
960        ));
961    }
962    let adjusted = offset - 1;
963    for (slot, &base) in SHORT_BASES.iter().enumerate() {
964        let extra_bits = SHORT_BITS[slot];
965        let max = base
966            + if extra_bits == 0 {
967                0
968            } else {
969                (1usize << extra_bits) - 1
970            };
971        if adjusted >= base && adjusted <= max {
972            return Ok((slot, adjusted - base));
973        }
974    }
975    Err(Error::InvalidData(
976        "RAR 2.0 short match offset is out of range",
977    ))
978}
979
980fn literal_code_len(symbol_count: usize) -> Result<u8> {
981    if symbol_count == 0 {
982        return Err(Error::InvalidData("RAR 2.0 encoder has no literal symbols"));
983    }
984    let len = usize::BITS - (symbol_count - 1).leading_zeros();
985    u8::try_from(len.max(1)).map_err(|_| Error::InvalidData("RAR 2.0 literal table is too large"))
986}
987
988#[derive(Debug, Clone, Copy, PartialEq, Eq)]
989struct LevelToken {
990    symbol: usize,
991    extra_bits: u8,
992    extra_value: u8,
993}
994
995impl LevelToken {
996    const fn plain(symbol: usize) -> Self {
997        Self {
998            symbol,
999            extra_bits: 0,
1000            extra_value: 0,
1001        }
1002    }
1003
1004    const fn repeat_previous(count: usize) -> Self {
1005        Self {
1006            symbol: 16,
1007            extra_bits: 2,
1008            extra_value: (count - 3) as u8,
1009        }
1010    }
1011
1012    const fn zero_run_short(count: usize) -> Self {
1013        Self {
1014            symbol: 17,
1015            extra_bits: 3,
1016            extra_value: (count - 3) as u8,
1017        }
1018    }
1019
1020    const fn zero_run_long(count: usize) -> Self {
1021        Self {
1022            symbol: 18,
1023            extra_bits: 7,
1024            extra_value: (count - 11) as u8,
1025        }
1026    }
1027}
1028
1029fn encode_table_level_tokens(lengths: &[u8; TABLE_COUNT]) -> Vec<LevelToken> {
1030    encode_level_tokens(lengths)
1031}
1032
1033fn encode_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
1034    let mut tokens = Vec::new();
1035    let mut pos = 0usize;
1036    let mut previous = None;
1037    while pos < lengths.len() {
1038        let value = lengths[pos];
1039        let mut run = 1usize;
1040        while pos + run < lengths.len() && lengths[pos + run] == value {
1041            run += 1;
1042        }
1043
1044        if value == 0 {
1045            emit_zero_level_run(&mut tokens, run);
1046            previous = Some(0);
1047            pos += run;
1048            continue;
1049        }
1050
1051        if previous == Some(value) && run >= 3 {
1052            let mut remaining = run;
1053            while remaining != 0 {
1054                let chunk = remaining.min(6);
1055                if chunk >= 3 {
1056                    tokens.push(LevelToken::repeat_previous(chunk));
1057                    remaining -= chunk;
1058                } else {
1059                    tokens.extend(std::iter::repeat_n(
1060                        LevelToken::plain(value as usize),
1061                        chunk,
1062                    ));
1063                    remaining = 0;
1064                }
1065            }
1066            pos += run;
1067            continue;
1068        }
1069
1070        tokens.push(LevelToken::plain(value as usize));
1071        previous = Some(value);
1072        pos += 1;
1073    }
1074    tokens
1075}
1076
1077fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1078    while run != 0 {
1079        if run >= 11 {
1080            let mut chunk = run.min(138);
1081            if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1082                chunk -= 3;
1083            }
1084            tokens.push(LevelToken::zero_run_long(chunk));
1085            run -= chunk;
1086        } else if run >= 3 {
1087            let chunk = run.min(10);
1088            tokens.push(LevelToken::zero_run_short(chunk));
1089            run -= chunk;
1090        } else {
1091            tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
1092            break;
1093        }
1094    }
1095}
1096
1097fn level_code_lengths_for_tokens(tokens: &[LevelToken]) -> [u8; LEVEL_COUNT] {
1098    let mut used = [false; LEVEL_COUNT];
1099    for token in tokens {
1100        used[token.symbol] = true;
1101    }
1102    level_code_lengths_for_used_symbols(used)
1103}
1104
1105fn validated_lengths_for_frequencies<const N: usize>(
1106    frequencies: &[usize; N],
1107    max_bits: u8,
1108) -> [u8; N] {
1109    let mut lengths = [0u8; N];
1110    lengths.copy_from_slice(&huffman::lengths_for_frequencies(frequencies, max_bits));
1111    if canonical_codes(&lengths).is_ok() {
1112        return lengths;
1113    }
1114
1115    lengths.copy_from_slice(&huffman::uniform_lengths_for_frequencies(frequencies));
1116    lengths
1117}
1118
1119fn encode_audio_member(input: &[u8], channels: usize) -> Result<Vec<u8>> {
1120    if channels == 0 || channels > MAX_CHANNELS {
1121        return Err(Error::InvalidData("RAR 2.0 audio channel count is invalid"));
1122    }
1123    let deltas = audio_encode(input, channels)?;
1124    let mut levels = vec![0u8; AUDIO_COUNT * channels];
1125    for channel in 0..channels {
1126        let mut frequencies = [0usize; AUDIO_COUNT];
1127        for index in (channel..deltas.len()).step_by(channels) {
1128            frequencies[deltas[index] as usize] += 1;
1129        }
1130        let channel_lengths = huffman::lengths_for_frequency_array(&frequencies, 15);
1131        for (symbol, len) in channel_lengths.into_iter().enumerate() {
1132            levels[channel * AUDIO_COUNT + symbol] = len;
1133        }
1134    }
1135
1136    let level_symbols = encode_audio_table_level_symbols(&levels);
1137    let level_lengths = level_code_lengths_for_symbols(&level_symbols);
1138    let level_codes = canonical_codes(&level_lengths)?;
1139    let mut bits = BitWriter::default();
1140    bits.write_bits(0b10, 2); // audio block, do not keep previous tables.
1141    bits.write_bits((channels - 1) as u32, 2);
1142    for &len in &level_lengths {
1143        bits.write_bits(len as u32, 4);
1144    }
1145    for symbol in level_symbols {
1146        let code = level_codes[symbol].ok_or(Error::InvalidData(
1147            "RAR 2.0 encoder missing audio-level Huffman code",
1148        ))?;
1149        bits.write_bits(code.code as u32, code.len);
1150        match symbol {
1151            17 => bits.write_bits(0, 3),
1152            18 => bits.write_bits(127, 7),
1153            _ => {}
1154        }
1155    }
1156
1157    for channel in 0..channels {
1158        let table = &levels[channel * AUDIO_COUNT..(channel + 1) * AUDIO_COUNT];
1159        validate_audio_table(table)?;
1160    }
1161    let audio_codes = (0..channels)
1162        .map(|channel| canonical_codes(&levels[channel * AUDIO_COUNT..(channel + 1) * AUDIO_COUNT]))
1163        .collect::<Result<Vec<_>>>()?;
1164    for (index, &delta) in deltas.iter().enumerate() {
1165        let channel = index % channels;
1166        let code = audio_codes[channel][delta as usize].ok_or(Error::InvalidData(
1167            "RAR 2.0 encoder missing audio Huffman code",
1168        ))?;
1169        bits.write_bits(code.code as u32, code.len);
1170    }
1171    Ok(bits.finish())
1172}
1173
1174fn encode_audio_table_level_symbols(levels: &[u8]) -> Vec<usize> {
1175    levels.iter().map(|&len| len as usize).collect()
1176}
1177
1178fn level_code_lengths_for_symbols(symbols: &[usize]) -> [u8; LEVEL_COUNT] {
1179    let mut used = [false; LEVEL_COUNT];
1180    for &symbol in symbols {
1181        used[symbol] = true;
1182    }
1183    level_code_lengths_for_used_symbols(used)
1184}
1185
1186fn level_code_lengths_for_used_symbols(used: [bool; LEVEL_COUNT]) -> [u8; LEVEL_COUNT] {
1187    let used_count = used.iter().filter(|&&used| used).count();
1188    let len = huffman::bits_for_symbol_count(used_count);
1189    let mut lengths = [0u8; LEVEL_COUNT];
1190    for (symbol, is_used) in used.into_iter().enumerate() {
1191        if is_used {
1192            lengths[symbol] = len;
1193        }
1194    }
1195    lengths
1196}
1197
1198fn validate_audio_table(lengths: &[u8]) -> Result<()> {
1199    let mut count = [0u16; 16];
1200    for &len in lengths {
1201        if len > 15 {
1202            return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1203        }
1204        if len != 0 {
1205            count[len as usize] += 1;
1206        }
1207    }
1208    validate_huffman_counts(&count)
1209}
1210
1211fn audio_encode(input: &[u8], channels: usize) -> Result<Vec<u8>> {
1212    if channels == 0 || channels > MAX_CHANNELS {
1213        return Err(Error::InvalidData("RAR 2.0 audio channel count is invalid"));
1214    }
1215    let mut states = [AudioState::default(); MAX_CHANNELS];
1216    let mut channel_delta = 0i32;
1217    let mut deltas = Vec::with_capacity(input.len());
1218    for (index, &byte) in input.iter().enumerate() {
1219        let channel = index % channels;
1220        let state = &mut states[channel];
1221        state.byte_count = state.byte_count.wrapping_add(1);
1222        state.d4 = state.d3;
1223        state.d3 = state.d2;
1224        state.d2 = state.last_delta - state.d1;
1225        state.d1 = state.last_delta;
1226
1227        let predicted = 8 * state.last_char
1228            + state.k[0] * state.d1
1229            + state.k[1] * state.d2
1230            + state.k[2] * state.d3
1231            + state.k[3] * state.d4
1232            + state.k[4] * channel_delta;
1233        let predicted = (predicted >> 3) & 0xff;
1234        let delta = (predicted as u8).wrapping_sub(byte);
1235
1236        let d = (delta as i8 as i32) << 3;
1237        state.dif[0] = state.dif[0].wrapping_add(d.unsigned_abs());
1238        state.dif[1] = state.dif[1].wrapping_add((d - state.d1).unsigned_abs());
1239        state.dif[2] = state.dif[2].wrapping_add((d + state.d1).unsigned_abs());
1240        state.dif[3] = state.dif[3].wrapping_add((d - state.d2).unsigned_abs());
1241        state.dif[4] = state.dif[4].wrapping_add((d + state.d2).unsigned_abs());
1242        state.dif[5] = state.dif[5].wrapping_add((d - state.d3).unsigned_abs());
1243        state.dif[6] = state.dif[6].wrapping_add((d + state.d3).unsigned_abs());
1244        state.dif[7] = state.dif[7].wrapping_add((d - state.d4).unsigned_abs());
1245        state.dif[8] = state.dif[8].wrapping_add((d + state.d4).unsigned_abs());
1246        state.dif[9] = state.dif[9].wrapping_add((d - channel_delta).unsigned_abs());
1247        state.dif[10] = state.dif[10].wrapping_add((d + channel_delta).unsigned_abs());
1248
1249        channel_delta = (byte.wrapping_sub(state.last_char as u8)) as i8 as i32;
1250        state.last_delta = channel_delta;
1251        state.last_char = byte as i32;
1252
1253        if state.byte_count & 0x1f == 0 {
1254            let mut min_dif = state.dif[0];
1255            let mut num_min_dif = 0usize;
1256            state.dif[0] = 0;
1257            for diff_index in 1..state.dif.len() {
1258                if state.dif[diff_index] < min_dif {
1259                    min_dif = state.dif[diff_index];
1260                    num_min_dif = diff_index;
1261                }
1262                state.dif[diff_index] = 0;
1263            }
1264            match num_min_dif {
1265                1 if state.k[0] >= -16 => state.k[0] -= 1,
1266                2 if state.k[0] < 16 => state.k[0] += 1,
1267                3 if state.k[1] >= -16 => state.k[1] -= 1,
1268                4 if state.k[1] < 16 => state.k[1] += 1,
1269                5 if state.k[2] >= -16 => state.k[2] -= 1,
1270                6 if state.k[2] < 16 => state.k[2] += 1,
1271                7 if state.k[3] >= -16 => state.k[3] -= 1,
1272                8 if state.k[3] < 16 => state.k[3] += 1,
1273                9 if state.k[4] >= -16 => state.k[4] -= 1,
1274                10 if state.k[4] < 16 => state.k[4] += 1,
1275                _ => {}
1276            }
1277        }
1278
1279        deltas.push(delta);
1280    }
1281    Ok(deltas)
1282}
1283
1284#[derive(Debug, Clone, Copy)]
1285struct HuffmanCode {
1286    code: u16,
1287    len: u8,
1288}
1289
1290fn canonical_codes(lengths: &[u8]) -> Result<Vec<Option<HuffmanCode>>> {
1291    let mut count = [0u16; 16];
1292    for &len in lengths {
1293        if len > 15 {
1294            return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1295        }
1296        if len != 0 {
1297            count[len as usize] += 1;
1298        }
1299    }
1300    validate_huffman_counts(&count)?;
1301
1302    let mut next_code = [0u16; 16];
1303    let mut code = 0u16;
1304    for len in 1..=15 {
1305        code = (code + count[len - 1]) << 1;
1306        next_code[len] = code;
1307    }
1308
1309    let mut codes = vec![None; lengths.len()];
1310    for (symbol, &len) in lengths.iter().enumerate() {
1311        if len == 0 {
1312            continue;
1313        }
1314        let code = next_code[len as usize];
1315        next_code[len as usize] += 1;
1316        codes[symbol] = Some(HuffmanCode { code, len });
1317    }
1318    Ok(codes)
1319}
1320
1321#[derive(Debug, Clone)]
1322pub struct Unpack20 {
1323    bits: BitReader,
1324    levels: [u8; OLD_LEVEL_COUNT],
1325    main: Huffman,
1326    offsets: Huffman,
1327    lengths: Huffman,
1328    audio_tables: [Huffman; MAX_CHANNELS],
1329    audio_block: bool,
1330    channels: usize,
1331    cur_channel: usize,
1332    audio: [AudioState; MAX_CHANNELS],
1333    channel_delta: i32,
1334    old_offsets: [usize; 4],
1335    last_offset: usize,
1336    last_length: usize,
1337    pending_match: Option<(usize, usize)>,
1338    in_block: bool,
1339    output: Vec<u8>,
1340    base_offset: usize,
1341}
1342
1343impl Unpack20 {
1344    pub fn new() -> Self {
1345        Self {
1346            bits: BitReader::new(),
1347            levels: [0; OLD_LEVEL_COUNT],
1348            main: Huffman::empty(),
1349            offsets: Huffman::empty(),
1350            lengths: Huffman::empty(),
1351            audio_tables: std::array::from_fn(|_| Huffman::empty()),
1352            audio_block: false,
1353            channels: 1,
1354            cur_channel: 0,
1355            audio: [AudioState::default(); MAX_CHANNELS],
1356            channel_delta: 0,
1357            old_offsets: [0; 4],
1358            last_offset: 0,
1359            last_length: 0,
1360            pending_match: None,
1361            in_block: false,
1362            output: Vec::new(),
1363            base_offset: 0,
1364        }
1365    }
1366
1367    pub fn decode_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1368        let start = self.current_pos();
1369        let target = start
1370            .checked_add(output_size)
1371            .ok_or(Error::InvalidData("RAR 2.0 output size overflows"))?;
1372        if !input.is_empty() {
1373            self.bits = BitReader::new();
1374        }
1375        self.bits.append(input);
1376        self.decode_until(target).map_err(|error| match error {
1377            Error::NeedMoreInput => Error::InvalidData("RAR 2.0 bitstream is truncated"),
1378            error => error,
1379        })?;
1380        self.read_last_tables()?;
1381        let out = self.raw_range(start, target)?.to_vec();
1382        self.trim_history(target, target);
1383        Ok(out)
1384    }
1385
1386    pub fn decode_member_to(
1387        &mut self,
1388        input: &[u8],
1389        output_size: usize,
1390        out: &mut impl Write,
1391    ) -> Result<()> {
1392        let decoded = self.decode_member(input, output_size)?;
1393        out.write_all(&decoded)
1394            .map_err(|_| Error::InvalidData("RAR 2.0 output write failed"))
1395    }
1396
1397    pub fn decode_member_from_reader(
1398        &mut self,
1399        input: &mut impl Read,
1400        output_size: usize,
1401        out: &mut impl Write,
1402    ) -> Result<()> {
1403        let start = self.current_pos();
1404        let target = start
1405            .checked_add(output_size)
1406            .ok_or(Error::InvalidData("RAR 2.0 output size overflows"))?;
1407        self.bits = BitReader::new();
1408        let mut input_done = false;
1409        let mut buffer = [0u8; INPUT_CHUNK];
1410
1411        while self.current_pos() < target {
1412            let checkpoint = self.clone();
1413            match self.decode_until(target) {
1414                Ok(()) => {}
1415                Err(Error::NeedMoreInput) if !input_done => {
1416                    *self = checkpoint;
1417                    let read = input
1418                        .read(&mut buffer)
1419                        .map_err(|_| Error::InvalidData("RAR 2.0 input read failed"))?;
1420                    if read == 0 {
1421                        input_done = true;
1422                    } else {
1423                        self.bits.append(&buffer[..read]);
1424                    }
1425                }
1426                Err(Error::NeedMoreInput) => {
1427                    return Err(Error::InvalidData("RAR 2.0 bitstream is truncated"));
1428                }
1429                Err(error) => return Err(error),
1430            }
1431        }
1432
1433        loop {
1434            let read = input
1435                .read(&mut buffer)
1436                .map_err(|_| Error::InvalidData("RAR 2.0 input read failed"))?;
1437            if read == 0 {
1438                break;
1439            }
1440            self.bits.append(&buffer[..read]);
1441        }
1442        self.read_last_tables()?;
1443
1444        let decoded = self.raw_range(start, target)?;
1445        out.write_all(decoded)
1446            .map_err(|_| Error::InvalidData("RAR 2.0 output write failed"))?;
1447        self.trim_history(target, target);
1448        Ok(())
1449    }
1450
1451    fn decode_until(&mut self, target: usize) -> Result<()> {
1452        while self.current_pos() < target {
1453            self.drain_pending_match(target)?;
1454            if self.current_pos() >= target {
1455                break;
1456            }
1457            if !self.in_block {
1458                self.read_tables()?;
1459                self.in_block = true;
1460            }
1461            self.decode_lz(target)?;
1462        }
1463        Ok(())
1464    }
1465
1466    fn read_tables(&mut self) -> Result<()> {
1467        let bit_field = self.bits.peek_bits(16)?;
1468        self.audio_block = bit_field & 0x8000 != 0;
1469        let keep_tables = bit_field & 0x4000 != 0;
1470        self.bits.read_bits(2)?;
1471        if !keep_tables {
1472            self.levels = [0; OLD_LEVEL_COUNT];
1473        }
1474
1475        let table_size = if self.audio_block {
1476            self.channels = ((bit_field >> 12) as usize & 3) + 1;
1477            if self.cur_channel >= self.channels {
1478                self.cur_channel = 0;
1479            }
1480            self.bits.read_bits(2)?;
1481            AUDIO_COUNT * self.channels
1482        } else {
1483            TABLE_COUNT
1484        };
1485
1486        let level_lengths = Self::read_level_lengths(&mut self.bits)?;
1487        let level_decoder = Huffman::from_lengths(&level_lengths)?;
1488        let mut new_levels = [0u8; OLD_LEVEL_COUNT];
1489        let mut pos = 0usize;
1490        while pos < table_size {
1491            let symbol = level_decoder.decode(&mut self.bits)?;
1492            match symbol {
1493                0..=15 => {
1494                    new_levels[pos] = (self.levels[pos].wrapping_add(symbol as u8)) & 0x0f;
1495                    pos += 1;
1496                }
1497                16 => {
1498                    if pos == 0 {
1499                        return Err(Error::InvalidData("RAR 2.0 table repeat at start"));
1500                    }
1501                    let count = 3 + self.bits.read_bits(2)? as usize;
1502                    let value = new_levels[pos - 1];
1503                    fill_levels(&mut new_levels, &mut pos, count, value)?;
1504                }
1505                17 => {
1506                    let count = 3 + self.bits.read_bits(3)? as usize;
1507                    fill_levels(&mut new_levels, &mut pos, count, 0)?;
1508                }
1509                18 => {
1510                    let count = 11 + self.bits.read_bits(7)? as usize;
1511                    fill_levels(&mut new_levels, &mut pos, count, 0)?;
1512                }
1513                _ => return Err(Error::InvalidData("RAR 2.0 invalid level symbol")),
1514            }
1515        }
1516
1517        self.levels = new_levels;
1518        if self.audio_block {
1519            for channel in 0..self.channels {
1520                let start = channel * AUDIO_COUNT;
1521                self.audio_tables[channel] =
1522                    Huffman::from_lengths(&self.levels[start..start + AUDIO_COUNT])?;
1523            }
1524        } else {
1525            self.main = Huffman::from_lengths(&self.levels[..MAIN_COUNT])?;
1526            self.offsets =
1527                Huffman::from_lengths(&self.levels[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
1528            self.lengths =
1529                Huffman::from_lengths(&self.levels[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT])?;
1530        }
1531        Ok(())
1532    }
1533
1534    fn read_level_lengths(bits: &mut BitReader) -> Result<[u8; LEVEL_COUNT]> {
1535        let mut lengths = [0u8; LEVEL_COUNT];
1536        for length in &mut lengths {
1537            *length = bits.read_bits(4)? as u8;
1538        }
1539        Ok(lengths)
1540    }
1541
1542    fn decode_lz(&mut self, output_size: usize) -> Result<()> {
1543        while self.current_pos() < output_size {
1544            if self.audio_block {
1545                self.decode_audio_byte()?;
1546                if !self.in_block {
1547                    return Ok(());
1548                }
1549                continue;
1550            }
1551            let symbol = self.main.decode(&mut self.bits)?;
1552            match symbol {
1553                0..=255 => self.output.push(symbol as u8),
1554                256 => {
1555                    if self.last_length != 0 {
1556                        let length = self.last_length;
1557                        let offset = self.last_offset;
1558                        self.push_old_offset(offset);
1559                        self.copy_match(length, offset, output_size)?;
1560                    }
1561                }
1562                257..=260 => {
1563                    let index = symbol - 257;
1564                    let offset = self.old_offsets[index];
1565                    let length_slot = self.lengths.decode(&mut self.bits)?;
1566                    if length_slot >= LENGTH_COUNT {
1567                        return Err(Error::InvalidData("RAR 2.0 invalid repeat length slot"));
1568                    }
1569                    let mut length = LENGTH_BASES[length_slot] + 2;
1570                    if LENGTH_BITS[length_slot] != 0 {
1571                        length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
1572                    }
1573                    if offset >= 0x101 {
1574                        length += 1;
1575                    }
1576                    if offset >= 0x2000 {
1577                        length += 1;
1578                    }
1579                    if offset >= 0x40000 {
1580                        length += 1;
1581                    }
1582                    self.push_old_offset(offset);
1583                    self.last_offset = offset;
1584                    self.last_length = length;
1585                    self.copy_match(length, offset, output_size)?;
1586                }
1587                261..=268 => {
1588                    let index = symbol - 261;
1589                    let mut offset = SHORT_BASES[index] + 1;
1590                    if SHORT_BITS[index] != 0 {
1591                        offset += self.bits.read_bits(SHORT_BITS[index])? as usize;
1592                    }
1593                    self.push_old_offset(offset);
1594                    self.last_offset = offset;
1595                    self.last_length = 2;
1596                    self.copy_match(2, offset, output_size)?;
1597                }
1598                269 => {
1599                    self.in_block = false;
1600                    return Ok(());
1601                }
1602                270..=297 => {
1603                    let length_slot = symbol - 270;
1604                    let mut length = LENGTH_BASES[length_slot] + 3;
1605                    if LENGTH_BITS[length_slot] != 0 {
1606                        length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
1607                    }
1608                    let offset = self.read_offset()?;
1609                    if offset >= 0x2000 {
1610                        length += 1;
1611                    }
1612                    if offset >= 0x40000 {
1613                        length += 1;
1614                    }
1615                    self.push_old_offset(offset);
1616                    self.last_offset = offset;
1617                    self.last_length = length;
1618                    self.copy_match(length, offset, output_size)?;
1619                }
1620                _ => return Err(Error::InvalidData("RAR 2.0 invalid main symbol")),
1621            }
1622        }
1623        Ok(())
1624    }
1625
1626    fn decode_audio_byte(&mut self) -> Result<()> {
1627        let symbol = self.audio_tables[self.cur_channel].decode(&mut self.bits)?;
1628        if symbol == 256 {
1629            self.in_block = false;
1630            return Ok(());
1631        }
1632        if symbol > 256 {
1633            return Err(Error::InvalidData("RAR 2.0 invalid audio symbol"));
1634        }
1635        let byte = self.decode_audio(symbol as u8);
1636        self.output.push(byte);
1637        self.cur_channel += 1;
1638        if self.cur_channel == self.channels {
1639            self.cur_channel = 0;
1640        }
1641        Ok(())
1642    }
1643
1644    fn decode_audio(&mut self, delta: u8) -> u8 {
1645        let state = &mut self.audio[self.cur_channel];
1646        state.byte_count = state.byte_count.wrapping_add(1);
1647        state.d4 = state.d3;
1648        state.d3 = state.d2;
1649        state.d2 = state.last_delta - state.d1;
1650        state.d1 = state.last_delta;
1651
1652        let predicted = 8 * state.last_char
1653            + state.k[0] * state.d1
1654            + state.k[1] * state.d2
1655            + state.k[2] * state.d3
1656            + state.k[3] * state.d4
1657            + state.k[4] * self.channel_delta;
1658        let predicted = (predicted >> 3) & 0xff;
1659        let byte = predicted.wrapping_sub(delta as i32) as u8;
1660
1661        let d = (delta as i8 as i32) << 3;
1662        state.dif[0] = state.dif[0].wrapping_add(d.unsigned_abs());
1663        state.dif[1] = state.dif[1].wrapping_add((d - state.d1).unsigned_abs());
1664        state.dif[2] = state.dif[2].wrapping_add((d + state.d1).unsigned_abs());
1665        state.dif[3] = state.dif[3].wrapping_add((d - state.d2).unsigned_abs());
1666        state.dif[4] = state.dif[4].wrapping_add((d + state.d2).unsigned_abs());
1667        state.dif[5] = state.dif[5].wrapping_add((d - state.d3).unsigned_abs());
1668        state.dif[6] = state.dif[6].wrapping_add((d + state.d3).unsigned_abs());
1669        state.dif[7] = state.dif[7].wrapping_add((d - state.d4).unsigned_abs());
1670        state.dif[8] = state.dif[8].wrapping_add((d + state.d4).unsigned_abs());
1671        state.dif[9] = state.dif[9].wrapping_add((d - self.channel_delta).unsigned_abs());
1672        state.dif[10] = state.dif[10].wrapping_add((d + self.channel_delta).unsigned_abs());
1673
1674        self.channel_delta = (byte.wrapping_sub(state.last_char as u8)) as i8 as i32;
1675        state.last_delta = self.channel_delta;
1676        state.last_char = byte as i32;
1677
1678        if state.byte_count & 0x1f == 0 {
1679            let mut min_dif = state.dif[0];
1680            let mut num_min_dif = 0usize;
1681            state.dif[0] = 0;
1682            for index in 1..state.dif.len() {
1683                if state.dif[index] < min_dif {
1684                    min_dif = state.dif[index];
1685                    num_min_dif = index;
1686                }
1687                state.dif[index] = 0;
1688            }
1689            match num_min_dif {
1690                1 if state.k[0] >= -16 => state.k[0] -= 1,
1691                2 if state.k[0] < 16 => state.k[0] += 1,
1692                3 if state.k[1] >= -16 => state.k[1] -= 1,
1693                4 if state.k[1] < 16 => state.k[1] += 1,
1694                5 if state.k[2] >= -16 => state.k[2] -= 1,
1695                6 if state.k[2] < 16 => state.k[2] += 1,
1696                7 if state.k[3] >= -16 => state.k[3] -= 1,
1697                8 if state.k[3] < 16 => state.k[3] += 1,
1698                9 if state.k[4] >= -16 => state.k[4] -= 1,
1699                10 if state.k[4] < 16 => state.k[4] += 1,
1700                _ => {}
1701            }
1702        }
1703
1704        byte
1705    }
1706
1707    fn read_offset(&mut self) -> Result<usize> {
1708        let slot = self.offsets.decode(&mut self.bits)?;
1709        if slot >= OFFSET_COUNT {
1710            return Err(Error::InvalidData("RAR 2.0 invalid offset slot"));
1711        }
1712        let mut offset = OFFSET_BASES[slot] + 1;
1713        if OFFSET_BITS[slot] != 0 {
1714            offset += self.bits.read_bits(OFFSET_BITS[slot])? as usize;
1715        }
1716        Ok(offset)
1717    }
1718
1719    fn copy_match(&mut self, length: usize, offset: usize, output_size: usize) -> Result<()> {
1720        let offset = if offset == 0 { 1 } else { offset };
1721        let current = self.current_pos();
1722        if offset > current {
1723            return Err(Error::InvalidData("RAR 2.0 match distance is out of range"));
1724        }
1725        for index in 0..length {
1726            if self.current_pos() >= output_size {
1727                self.pending_match = Some((length - index, offset));
1728                break;
1729            }
1730            let src = self.current_pos() - offset;
1731            let byte = *self
1732                .raw_byte(src)
1733                .ok_or(Error::InvalidData("RAR 2.0 match distance is out of range"))?;
1734            self.output.push(byte);
1735        }
1736        Ok(())
1737    }
1738
1739    fn drain_pending_match(&mut self, output_size: usize) -> Result<()> {
1740        let Some((length, offset)) = self.pending_match.take() else {
1741            return Ok(());
1742        };
1743        self.copy_match(length, offset, output_size)
1744    }
1745
1746    fn read_last_tables(&mut self) -> Result<()> {
1747        if self.bits.remaining_bytes_from_current() < 5 {
1748            return Ok(());
1749        }
1750        if self.audio_block {
1751            if self.audio_tables[self.cur_channel].decode(&mut self.bits)? == 256 {
1752                self.read_tables()?;
1753                self.in_block = true;
1754            }
1755        } else if self.main.decode(&mut self.bits)? == 269 {
1756            self.read_tables()?;
1757            self.in_block = true;
1758        }
1759        Ok(())
1760    }
1761
1762    fn push_old_offset(&mut self, offset: usize) {
1763        self.old_offsets[3] = self.old_offsets[2];
1764        self.old_offsets[2] = self.old_offsets[1];
1765        self.old_offsets[1] = self.old_offsets[0];
1766        self.old_offsets[0] = offset;
1767    }
1768
1769    fn current_pos(&self) -> usize {
1770        self.base_offset + self.output.len()
1771    }
1772
1773    fn raw_byte(&self, position: usize) -> Option<&u8> {
1774        self.output.get(position.checked_sub(self.base_offset)?)
1775    }
1776
1777    fn raw_range(&self, start: usize, end: usize) -> Result<&[u8]> {
1778        if start < self.base_offset || end < start {
1779            return Err(Error::InvalidData(
1780                "RAR 2.0 retained history is unavailable",
1781            ));
1782        }
1783        let rel_start = start - self.base_offset;
1784        let rel_end = end - self.base_offset;
1785        self.output
1786            .get(rel_start..rel_end)
1787            .ok_or(Error::InvalidData(
1788                "RAR 2.0 retained history is unavailable",
1789            ))
1790    }
1791
1792    fn trim_history(&mut self, flushed_pos: usize, current_pos: usize) {
1793        let keep_from = current_pos.saturating_sub(MAX_HISTORY).min(flushed_pos);
1794        if keep_from <= self.base_offset {
1795            return;
1796        }
1797        let drain = keep_from - self.base_offset;
1798        self.output.drain(..drain);
1799        self.base_offset = keep_from;
1800    }
1801}
1802
1803impl Default for Unpack20 {
1804    fn default() -> Self {
1805        Self::new()
1806    }
1807}
1808
1809#[derive(Debug, Clone, Copy, Default)]
1810struct AudioState {
1811    k: [i32; 5],
1812    d1: i32,
1813    d2: i32,
1814    d3: i32,
1815    d4: i32,
1816    last_delta: i32,
1817    last_char: i32,
1818    byte_count: u32,
1819    dif: [u32; 11],
1820}
1821
1822fn fill_levels(levels: &mut [u8], pos: &mut usize, count: usize, value: u8) -> Result<()> {
1823    let end = pos
1824        .checked_add(count)
1825        .ok_or(Error::InvalidData("RAR 2.0 table run overflows"))?;
1826    let end = end.min(levels.len());
1827    for item in &mut levels[*pos..end] {
1828        *item = value;
1829    }
1830    *pos = end;
1831    Ok(())
1832}
1833
1834#[derive(Debug, Clone)]
1835struct Huffman {
1836    symbols: Vec<HuffmanSymbol>,
1837    first_code: [u16; 16],
1838    first_index: [usize; 16],
1839    counts: [u16; 16],
1840}
1841
1842#[derive(Debug, Clone)]
1843struct HuffmanSymbol {
1844    code: u16,
1845    len: u8,
1846    symbol: usize,
1847}
1848
1849impl Huffman {
1850    fn empty() -> Self {
1851        Self {
1852            symbols: Vec::new(),
1853            first_code: [0; 16],
1854            first_index: [0; 16],
1855            counts: [0; 16],
1856        }
1857    }
1858
1859    fn from_lengths(lengths: &[u8]) -> Result<Self> {
1860        let mut count = [0u16; 16];
1861        for &len in lengths {
1862            if len > 15 {
1863                return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1864            }
1865            if len != 0 {
1866                count[len as usize] += 1;
1867            }
1868        }
1869        if count.iter().all(|&value| value == 0) {
1870            return Ok(Self::empty());
1871        }
1872        validate_huffman_counts(&count)?;
1873
1874        let mut first_code = [0u16; 16];
1875        let mut next_code = [0u16; 16];
1876        let mut code = 0u16;
1877        for len in 1..=15 {
1878            code = (code + count[len - 1]) << 1;
1879            first_code[len] = code;
1880            next_code[len] = code;
1881        }
1882
1883        let mut first_index = [0usize; 16];
1884        let mut index = 0usize;
1885        for len in 1..=15 {
1886            first_index[len] = index;
1887            index += usize::from(count[len]);
1888        }
1889
1890        let mut symbols = Vec::new();
1891        for (symbol, &len) in lengths.iter().enumerate() {
1892            if len == 0 {
1893                continue;
1894            }
1895            let code = next_code[len as usize];
1896            next_code[len as usize] += 1;
1897            symbols.push(HuffmanSymbol { code, len, symbol });
1898        }
1899        symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
1900        Ok(Self {
1901            symbols,
1902            first_code,
1903            first_index,
1904            counts: count,
1905        })
1906    }
1907
1908    fn decode(&self, bits: &mut BitReader) -> Result<usize> {
1909        let mut code = 0u16;
1910        if self.symbols.is_empty() {
1911            return Err(Error::InvalidData("RAR 2.0 empty Huffman table"));
1912        }
1913        for len in 1..=15 {
1914            code = (code << 1) | bits.read_bit()? as u16;
1915            let count = self.counts[len];
1916            if count != 0 {
1917                let first = self.first_code[len];
1918                let offset = code.wrapping_sub(first);
1919                if offset < count {
1920                    let index = self.first_index[len] + usize::from(offset);
1921                    return Ok(self.symbols[index].symbol);
1922                }
1923            }
1924        }
1925        Err(Error::InvalidData("RAR 2.0 invalid Huffman code"))
1926    }
1927}
1928
1929fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
1930    let mut available = 1i32;
1931    for &len_count in count.iter().skip(1) {
1932        available = (available << 1) - i32::from(len_count);
1933        if available < 0 {
1934            return Err(Error::InvalidData("RAR 2.0 oversubscribed Huffman table"));
1935        }
1936    }
1937    Ok(())
1938}
1939
1940#[derive(Debug, Clone)]
1941struct BitReader {
1942    input: Vec<u8>,
1943    bit_pos: usize,
1944}
1945
1946impl BitReader {
1947    fn new() -> Self {
1948        Self {
1949            input: Vec::new(),
1950            bit_pos: 0,
1951        }
1952    }
1953
1954    fn append(&mut self, input: &[u8]) {
1955        self.compact();
1956        self.input.extend_from_slice(input);
1957    }
1958
1959    fn compact(&mut self) {
1960        let bytes = self.bit_pos / 8;
1961        if bytes == 0 {
1962            return;
1963        }
1964        self.input.drain(..bytes);
1965        self.bit_pos -= bytes * 8;
1966    }
1967
1968    fn read_bit(&mut self) -> Result<u8> {
1969        self.read_bits(1).map(|value| value as u8)
1970    }
1971
1972    fn read_bits(&mut self, count: u8) -> Result<u32> {
1973        let value = self.peek_bits(count)?;
1974        self.bit_pos += count as usize;
1975        Ok(value)
1976    }
1977
1978    fn peek_bits(&self, count: u8) -> Result<u32> {
1979        if count > 24 {
1980            return Err(Error::InvalidData("RAR 2.0 bit read is too wide"));
1981        }
1982        let mut value = 0u32;
1983        for i in 0..count as usize {
1984            let bit_index = self.bit_pos + i;
1985            let byte = *self.input.get(bit_index / 8).ok_or(Error::NeedMoreInput)?;
1986            let bit = (byte >> (7 - (bit_index % 8))) & 1;
1987            value = (value << 1) | bit as u32;
1988        }
1989        Ok(value)
1990    }
1991
1992    fn remaining_bytes_from_current(&self) -> usize {
1993        self.input.len().saturating_sub(self.bit_pos / 8)
1994    }
1995}
1996
1997#[derive(Default)]
1998struct BitWriter {
1999    bytes: Vec<u8>,
2000    bit_pos: usize,
2001}
2002
2003impl BitWriter {
2004    fn write_bits(&mut self, value: u32, count: u8) {
2005        for shift in (0..count).rev() {
2006            self.write_bit(((value >> shift) & 1) != 0);
2007        }
2008    }
2009
2010    fn write_bit(&mut self, bit: bool) {
2011        if self.bit_pos.is_multiple_of(8) {
2012            self.bytes.push(0);
2013        }
2014        if bit {
2015            let shift = 7 - (self.bit_pos % 8);
2016            *self.bytes.last_mut().unwrap() |= 1 << shift;
2017        }
2018        self.bit_pos += 1;
2019    }
2020
2021    fn finish(self) -> Vec<u8> {
2022        self.bytes
2023    }
2024}
2025
2026#[cfg(test)]
2027mod tests {
2028    use super::{
2029        encode_tokens, unpack20_decode, unpack20_encode_literals, BitWriter, EncodeOptions,
2030        EncodeToken, Error, Huffman, Unpack20, Unpack20Encoder,
2031    };
2032
2033    const AUTOREJ_PACKED: &[u8] = &[
2034        0x09, 0x14, 0x0c, 0x94, 0x00, 0x00, 0x00, 0x00, 0x00, 0xce, 0xf8, 0x1f, 0xc1, 0xe6, 0x05,
2035        0xfc, 0x39, 0xc3, 0x50, 0x65, 0x08, 0x41, 0x94, 0xc4, 0x1d, 0xf3, 0xcd, 0x0d, 0x8e, 0x20,
2036        0xf5, 0x9d, 0x8e, 0x76, 0x1d, 0xc5, 0x19, 0xde, 0x16, 0x5b, 0x52, 0xb8, 0x8e, 0x75, 0xcd,
2037        0xaf, 0x1f, 0xfc, 0x9e, 0xf7, 0x00, 0x01, 0xbe, 0x90,
2038    ];
2039
2040    #[test]
2041    fn decodes_rar20_lz_member() {
2042        assert_eq!(
2043            unpack20_decode(AUTOREJ_PACKED, expected_text().len()).unwrap(),
2044            expected_text()
2045        );
2046    }
2047
2048    #[test]
2049    fn rejects_oversubscribed_rar20_huffman_tables() {
2050        assert!(matches!(
2051            Huffman::from_lengths(&[1, 1, 1]),
2052            Err(Error::InvalidData("RAR 2.0 oversubscribed Huffman table"))
2053        ));
2054    }
2055
2056    #[test]
2057    fn decode_member_from_reader_accepts_incremental_input() {
2058        struct TinyReader<'a> {
2059            input: &'a [u8],
2060        }
2061
2062        impl std::io::Read for TinyReader<'_> {
2063            fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
2064                if self.input.is_empty() {
2065                    return Ok(0);
2066                }
2067                let len = self.input.len().min(out.len()).min(3);
2068                out[..len].copy_from_slice(&self.input[..len]);
2069                self.input = &self.input[len..];
2070                Ok(len)
2071            }
2072        }
2073
2074        let mut decoder = Unpack20::new();
2075        let mut reader = TinyReader {
2076            input: AUTOREJ_PACKED,
2077        };
2078        let mut output = Vec::new();
2079        decoder
2080            .decode_member_from_reader(&mut reader, expected_text().len(), &mut output)
2081            .unwrap();
2082
2083        assert_eq!(output, expected_text());
2084    }
2085
2086    #[test]
2087    fn decodes_synthetic_audio_block() {
2088        let packed = synthetic_audio_block(8);
2089        let mut decoder = Unpack20::new();
2090
2091        assert_eq!(decoder.decode_member(&packed, 8).unwrap(), vec![0; 8]);
2092    }
2093
2094    #[test]
2095    fn audio_encoder_round_trips_interleaved_pcm_like_payload() {
2096        let input = interleaved_pcm_like_payload();
2097        let packed = super::encode_audio_member(&input, 4).unwrap();
2098        let decoded = unpack20_decode(&packed, input.len()).unwrap();
2099
2100        assert_eq!(decoded, input);
2101    }
2102
2103    #[test]
2104    fn auto_encoder_uses_audio_when_it_beats_lz() {
2105        let input = interleaved_pcm_like_payload();
2106        let lz = unpack20_encode_literals(&input).unwrap();
2107        let auto = super::unpack20_encode_auto(&input).unwrap();
2108        let decoded = unpack20_decode(&auto, input.len()).unwrap();
2109
2110        assert!(auto.len() < lz.len());
2111        assert_eq!(decoded, input);
2112    }
2113
2114    #[test]
2115    fn default_encode_options_match_legacy_entry_points() {
2116        let input = b"rar20 option plumbing preserves default output ".repeat(128);
2117        assert_eq!(
2118            unpack20_encode_literals(&input).unwrap(),
2119            super::unpack20_encode_literals_with_options(&input, EncodeOptions::default()).unwrap()
2120        );
2121        assert_eq!(
2122            super::unpack20_encode_auto(&input).unwrap(),
2123            super::unpack20_encode_auto_with_options(&input, EncodeOptions::default()).unwrap()
2124        );
2125
2126        let first = b"solid rar20 option seed ".repeat(64);
2127        let second = b"solid rar20 option seed with suffix ".repeat(32);
2128        let mut legacy = Unpack20Encoder::new();
2129        let mut explicit = Unpack20Encoder::with_options(EncodeOptions::default());
2130        assert_eq!(
2131            legacy.encode_member(&first).unwrap(),
2132            explicit.encode_member(&first).unwrap()
2133        );
2134        assert_eq!(
2135            legacy.encode_member(&second).unwrap(),
2136            explicit.encode_member(&second).unwrap()
2137        );
2138    }
2139
2140    #[test]
2141    fn encode_options_can_disable_fresh_lz_matches() {
2142        let input = b"abcdefabcdefabcdefabcdef";
2143        let default_tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2144        let literalish_tokens = encode_tokens(input, &[], EncodeOptions::new(0), None);
2145
2146        assert!(default_tokens
2147            .iter()
2148            .any(|token| matches!(token, EncodeToken::Match { .. })));
2149        assert!(!literalish_tokens
2150            .iter()
2151            .any(|token| matches!(token, EncodeToken::Match { .. })));
2152    }
2153
2154    #[test]
2155    fn table_level_encoder_uses_rar20_run_symbols() {
2156        let lengths = [0, 0, 0, 0, 5, 5, 5, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2];
2157        let tokens = super::encode_level_tokens(&lengths);
2158
2159        assert_eq!(
2160            tokens,
2161            vec![
2162                super::LevelToken::zero_run_short(4),
2163                super::LevelToken::plain(5),
2164                super::LevelToken::repeat_previous(3),
2165                super::LevelToken::plain(7),
2166                super::LevelToken::zero_run_short(10),
2167                super::LevelToken::plain(2),
2168            ]
2169        );
2170    }
2171
2172    #[test]
2173    fn decodes_back_to_back_fresh_audio_blocks() {
2174        // No real RAR 2.x encoder we tested emits a mid-stream `audio_block,
2175        // !keep_tables` transition; reference encoders always either keep the
2176        // existing audio tables across boundaries or switch out to LZ. The
2177        // decoder branch that rebuilds `audio_tables` from a freshly-read level
2178        // table inside an audio sequence is therefore only reachable via a
2179        // hand-crafted fixture.
2180        let mut bits = BitWriter::default();
2181        write_fresh_audio_block(&mut bits, 4, /*emit_end_sentinel=*/ true);
2182        write_fresh_audio_block(&mut bits, 4, /*emit_end_sentinel=*/ false);
2183        let packed = bits.finish();
2184
2185        let mut decoder = Unpack20::new();
2186
2187        assert_eq!(decoder.decode_member(&packed, 8).unwrap(), vec![0; 8]);
2188    }
2189
2190    fn expected_text() -> Vec<u8> {
2191        b"Hello text not audio.\r\n".repeat(100)
2192    }
2193
2194    fn interleaved_pcm_like_payload() -> Vec<u8> {
2195        let mut input = Vec::new();
2196        for sample in 0..8192i16 {
2197            let left = sample.wrapping_mul(3).wrapping_add(200);
2198            let right = sample.wrapping_mul(3).wrapping_sub(200);
2199            input.extend_from_slice(&left.to_le_bytes());
2200            input.extend_from_slice(&right.to_le_bytes());
2201        }
2202        input
2203    }
2204
2205    fn synthetic_audio_block(samples: usize) -> Vec<u8> {
2206        let mut bits = BitWriter::default();
2207
2208        bits.write_bits(0b10, 2); // audio block, do not keep previous tables.
2209        bits.write_bits(0, 2); // one channel.
2210
2211        for symbol in 0..19 {
2212            let len = if symbol == 1 || symbol == 18 { 1 } else { 0 };
2213            bits.write_bits(len, 4);
2214        }
2215
2216        bits.write_bit(false); // level symbol 1: audio delta 0 has code length 1.
2217        bits.write_bit(true); // level symbol 18: 138 zeros.
2218        bits.write_bits(127, 7);
2219        bits.write_bit(true); // level symbol 18: 118 zeros.
2220        bits.write_bits(107, 7);
2221
2222        for _ in 0..samples {
2223            bits.write_bit(false); // audio delta 0.
2224        }
2225
2226        bits.finish()
2227    }
2228
2229    fn write_fresh_audio_block(bits: &mut BitWriter, samples: usize, emit_end_sentinel: bool) {
2230        bits.write_bits(0b10, 2); // audio block, do not keep previous tables.
2231        bits.write_bits(0, 2); // one channel.
2232
2233        for symbol in 0..19 {
2234            let len = if symbol == 1 || symbol == 18 { 1 } else { 0 };
2235            bits.write_bits(len, 4);
2236        }
2237
2238        // Audio table: symbol 0 (delta 0) = "0", symbol 256 (block end) = "1".
2239        bits.write_bit(false); // level symbol 1: audio delta 0 has code length 1.
2240        bits.write_bit(true); // level symbol 18: 138 zeros (audio symbols 1..=138).
2241        bits.write_bits(127, 7);
2242        bits.write_bit(true); // level symbol 18: 117 zeros (audio symbols 139..=255).
2243        bits.write_bits(106, 7);
2244        bits.write_bit(false); // level symbol 1: block-end (256) has code length 1.
2245
2246        for _ in 0..samples {
2247            bits.write_bit(false); // audio delta 0.
2248        }
2249        if emit_end_sentinel {
2250            bits.write_bit(true); // audio symbol 256: end of audio block.
2251        }
2252    }
2253
2254    #[test]
2255    fn literal_encoder_round_trips_rar20_lz_blocks() {
2256        let input = b"literal-only RAR 2.0 baseline\nwith repeated text literal-only\n";
2257        let packed = unpack20_encode_literals(input).unwrap();
2258
2259        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2260    }
2261
2262    #[test]
2263    fn encoder_emits_rar20_offset_one_matches_for_repeated_bytes() {
2264        let input = b"A".repeat(1024);
2265        let packed = unpack20_encode_literals(&input).unwrap();
2266
2267        assert!(packed.len() < input.len() / 4);
2268        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2269    }
2270
2271    #[test]
2272    fn encoder_emits_rar20_dictionary_matches_for_repeated_sequences() {
2273        let input = b"abc123xyz-".repeat(128);
2274        let packed = unpack20_encode_literals(&input).unwrap();
2275
2276        assert!(packed.len() < input.len() / 2);
2277        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2278    }
2279
2280    #[test]
2281    fn encoder_emits_rar20_repeat_last_matches_for_regular_streams() {
2282        let input = b"\x00\x01\x02\x03".repeat(4096);
2283        let tokens = encode_tokens(&input, &[], EncodeOptions::default(), None);
2284        let packed = unpack20_encode_literals(&input).unwrap();
2285
2286        assert!(tokens
2287            .iter()
2288            .any(|token| matches!(token, EncodeToken::RepeatLast)));
2289        assert!(packed.len() < input.len() / 8);
2290        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2291    }
2292
2293    #[test]
2294    fn encoder_emits_rar20_minimum_length_fresh_matches() {
2295        let input = b"abcabc";
2296        let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2297        let packed = unpack20_encode_literals(input).unwrap();
2298
2299        assert!(matches!(
2300            tokens.as_slice(),
2301            [
2302                EncodeToken::Literal(b'a'),
2303                EncodeToken::Literal(b'b'),
2304                EncodeToken::Literal(b'c'),
2305                EncodeToken::Match {
2306                    length: 3,
2307                    offset: 3
2308                }
2309            ]
2310        ));
2311        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2312    }
2313
2314    #[test]
2315    fn encoder_emits_rar20_short_offset_matches() {
2316        let input = b"abab";
2317        let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2318        let packed = unpack20_encode_literals(input).unwrap();
2319
2320        assert!(matches!(
2321            tokens.as_slice(),
2322            [
2323                EncodeToken::Literal(b'a'),
2324                EncodeToken::Literal(b'b'),
2325                EncodeToken::ShortOffset { offset: 2 }
2326            ]
2327        ));
2328        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2329    }
2330
2331    #[test]
2332    fn encoder_emits_rar20_old_offset_matches() {
2333        let input = b"abcdabcdXYZXYZwxyzwxyz";
2334        let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2335        let packed = unpack20_encode_literals(input).unwrap();
2336
2337        assert!(tokens
2338            .iter()
2339            .any(|token| matches!(token, EncodeToken::OldOffset { .. })));
2340        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2341    }
2342
2343    #[test]
2344    fn encoder_finds_rar20_matches_beyond_near_offsets() {
2345        let phrase = b"long-distance repeated phrase for rar20 match finder.";
2346        let mut input = Vec::new();
2347        input.extend_from_slice(phrase);
2348        input.extend(std::iter::repeat_n(0, 300 * 1024));
2349        input.extend_from_slice(phrase);
2350        input.extend_from_slice(phrase);
2351        let tokens = encode_tokens(&input, &[], EncodeOptions::default(), None);
2352        let packed = unpack20_encode_literals(&input).unwrap();
2353
2354        assert!(tokens.iter().any(|token| matches!(
2355            token,
2356            EncodeToken::Match { offset, .. } if *offset > 0x40000
2357        )));
2358        assert!(packed.len() < input.len());
2359        let decoded = unpack20_decode(&packed, input.len()).unwrap();
2360        assert!(
2361            decoded == input,
2362            "RAR 2.0 long-distance match round-trip failed"
2363        );
2364    }
2365
2366    #[test]
2367    fn solid_encoder_emits_rar20_matches_against_previous_member_history() {
2368        let first = b"solid rar20 shared phrase alpha beta gamma ".repeat(4);
2369        let second = b"solid rar20 shared phrase alpha beta gamma ".repeat(2);
2370        let independent = unpack20_encode_literals(&second).unwrap();
2371        let mut encoder = Unpack20Encoder::new();
2372        let first_packed = encoder.encode_member(&first).unwrap();
2373        let second_packed = encoder.encode_member(&second).unwrap();
2374
2375        assert!(second_packed.len() < independent.len());
2376        let mut decoder = Unpack20::new();
2377        assert_eq!(
2378            decoder.decode_member(&first_packed, first.len()).unwrap(),
2379            first
2380        );
2381        assert_eq!(
2382            decoder.decode_member(&second_packed, second.len()).unwrap(),
2383            second
2384        );
2385    }
2386
2387    #[test]
2388    fn solid_encoder_reuses_rar20_tables_at_member_boundary() {
2389        let first: Vec<_> = (0u8..=255).cycle().take(4096).collect();
2390        let second = b"short literal member after reused rar20 table boundary\n";
2391        let independent = unpack20_encode_literals(second).unwrap();
2392        let mut encoder = Unpack20Encoder::new();
2393        let first_packed = encoder.encode_member(&first).unwrap();
2394        let second_packed = encoder.encode_member(second).unwrap();
2395
2396        assert!(second_packed.len() < independent.len());
2397        let mut decoder = Unpack20::new();
2398        assert_eq!(
2399            decoder.decode_member(&first_packed, first.len()).unwrap(),
2400            first
2401        );
2402        assert_eq!(
2403            decoder.decode_member(&second_packed, second.len()).unwrap(),
2404            second
2405        );
2406    }
2407
2408    #[test]
2409    fn solid_encoder_matches_immediately_after_rar20_table_boundary() {
2410        let phrase = b"rar20 table boundary match phrase with enough bytes ";
2411        let first = phrase.repeat(128);
2412        let second = phrase.repeat(8);
2413        let independent = unpack20_encode_literals(&second).unwrap();
2414        let mut encoder = Unpack20Encoder::new();
2415        let first_packed = encoder.encode_member(&first).unwrap();
2416        let second_packed = encoder.encode_member(&second).unwrap();
2417        let tokens = encode_tokens(&second, &first, EncodeOptions::default(), None);
2418
2419        assert!(matches!(tokens.first(), Some(EncodeToken::Match { .. })));
2420        assert!(second_packed.len() < independent.len());
2421        let mut decoder = Unpack20::new();
2422        assert_eq!(
2423            decoder.decode_member(&first_packed, first.len()).unwrap(),
2424            first
2425        );
2426        assert_eq!(
2427            decoder.decode_member(&second_packed, second.len()).unwrap(),
2428            second
2429        );
2430    }
2431
2432    #[test]
2433    fn solid_encoder_carries_rar20_history_across_multiple_members() {
2434        let first = b"rar20 multi member solid seed ".repeat(512);
2435        let second = b"rar20 multi member solid seed with middle tail ".repeat(128);
2436        let third = b"with middle tail ".repeat(64);
2437        let independent = unpack20_encode_literals(&third).unwrap();
2438        let mut encoder = Unpack20Encoder::new();
2439        let first_packed = encoder.encode_member(&first).unwrap();
2440        let second_packed = encoder.encode_member(&second).unwrap();
2441        let third_packed = encoder.encode_member(&third).unwrap();
2442
2443        assert!(third_packed.len() < independent.len());
2444        let mut decoder = Unpack20::new();
2445        assert_eq!(
2446            decoder.decode_member(&first_packed, first.len()).unwrap(),
2447            first
2448        );
2449        assert_eq!(
2450            decoder.decode_member(&second_packed, second.len()).unwrap(),
2451            second
2452        );
2453        assert_eq!(
2454            decoder.decode_member(&third_packed, third.len()).unwrap(),
2455            third
2456        );
2457    }
2458
2459    #[test]
2460    fn decode_member_to_streams_decoded_payload_through_writer_sink() {
2461        let input = b"abcabcabcabcabcabcabcabcabcabcabcabc";
2462        let packed = unpack20_encode_literals(input).unwrap();
2463
2464        let mut decoder = Unpack20::new();
2465        let mut sink = Vec::new();
2466        decoder
2467            .decode_member_to(&packed, input.len(), &mut sink)
2468            .unwrap();
2469        assert_eq!(sink, input);
2470
2471        // The error-mapping closure inside decode_member_to fires when the
2472        // sink's write_all returns Err — feed it a writer that always fails.
2473        struct FailingWriter;
2474        impl std::io::Write for FailingWriter {
2475            fn write(&mut self, _buf: &[u8]) -> std::io::Result<usize> {
2476                Err(std::io::Error::other("disk full"))
2477            }
2478            fn flush(&mut self) -> std::io::Result<()> {
2479                Ok(())
2480            }
2481        }
2482        let mut decoder = Unpack20::new();
2483        let err = decoder
2484            .decode_member_to(&packed, input.len(), &mut FailingWriter)
2485            .unwrap_err();
2486        assert_eq!(err, Error::InvalidData("RAR 2.0 output write failed"));
2487    }
2488}