Skip to main content

rars_codec/
rar20.rs

1use crate::{huffman, Error, Result};
2use std::io::{Read, Write};
3
4const MAIN_COUNT: usize = 298;
5const OFFSET_COUNT: usize = 48;
6const LENGTH_COUNT: usize = 28;
7const LEVEL_COUNT: usize = 19;
8const TABLE_COUNT: usize = MAIN_COUNT + OFFSET_COUNT + LENGTH_COUNT;
9const AUDIO_COUNT: usize = 257;
10const MAX_CHANNELS: usize = 4;
11const OLD_LEVEL_COUNT: usize = AUDIO_COUNT * MAX_CHANNELS;
12const MAX_HISTORY: usize = 1024 * 1024;
13
14const LENGTH_BASES: [usize; LENGTH_COUNT] = [
15    0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128,
16    160, 192, 224,
17];
18const LENGTH_BITS: [u8; LENGTH_COUNT] = [
19    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
20];
21const OFFSET_BASES: [usize; OFFSET_COUNT] = [
22    0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536,
23    2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608,
24    262144, 327680, 393216, 458752, 524288, 589824, 655360, 720896, 786432, 851968, 917504, 983040,
25];
26const OFFSET_BITS: [u8; OFFSET_COUNT] = [
27    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
28    13, 14, 14, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
29];
30const SHORT_BASES: [usize; 8] = [0, 4, 8, 16, 32, 64, 128, 192];
31const SHORT_BITS: [u8; 8] = [2, 2, 3, 4, 5, 6, 6, 6];
32const MAX_ENCODER_MATCH_OFFSET: usize = MAX_HISTORY;
33const MAX_ENCODER_MATCH_LENGTH: usize = 258;
34const MATCH_HASH_BUCKETS: usize = 4096;
35const MAX_MATCH_CANDIDATES: usize = 256;
36
37pub fn unpack20_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
38    let mut decoder = Unpack20::new();
39    decoder.decode_member(input, output_size)
40}
41
42pub fn unpack20_encode_literals(input: &[u8]) -> Result<Vec<u8>> {
43    unpack20_encode_literals_with_options(input, EncodeOptions::default())
44}
45
46pub fn unpack20_encode_literals_with_options(
47    input: &[u8],
48    options: EncodeOptions,
49) -> Result<Vec<u8>> {
50    encode_member(input, &[], None, options)
51}
52
53pub fn unpack20_encode_auto(input: &[u8]) -> Result<Vec<u8>> {
54    unpack20_encode_auto_with_options(input, EncodeOptions::default())
55}
56
57pub fn unpack20_encode_auto_with_options(input: &[u8], options: EncodeOptions) -> Result<Vec<u8>> {
58    let lz = unpack20_encode_literals_with_options(input, options)?;
59    let mut best = lz;
60    if options.try_audio {
61        for channels in 1..=MAX_CHANNELS {
62            if input.len() < channels * 64 {
63                continue;
64            }
65            let audio = encode_audio_member(input, channels)?;
66            if audio.len() < best.len() {
67                best = audio;
68            }
69        }
70    }
71    Ok(best)
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq)]
75#[non_exhaustive]
76pub struct EncodeOptions {
77    pub max_match_candidates: usize,
78    pub max_match_distance: usize,
79    pub lazy_matching: bool,
80    pub lazy_lookahead: usize,
81    pub try_audio: bool,
82}
83
84impl EncodeOptions {
85    pub const fn new(max_match_candidates: usize) -> Self {
86        Self {
87            max_match_candidates,
88            max_match_distance: MAX_ENCODER_MATCH_OFFSET,
89            lazy_matching: false,
90            lazy_lookahead: 1,
91            try_audio: true,
92        }
93    }
94
95    pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
96        self.max_match_distance = if distance > MAX_ENCODER_MATCH_OFFSET {
97            MAX_ENCODER_MATCH_OFFSET
98        } else {
99            distance
100        };
101        self
102    }
103
104    pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
105        self.lazy_matching = enabled;
106        self
107    }
108
109    pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
110        self.lazy_lookahead = bytes;
111        self
112    }
113
114    pub const fn with_try_audio(mut self, enabled: bool) -> Self {
115        self.try_audio = enabled;
116        self
117    }
118}
119
120impl Default for EncodeOptions {
121    fn default() -> Self {
122        Self::new(MAX_MATCH_CANDIDATES)
123    }
124}
125
126#[derive(Debug, Clone, Default)]
127pub struct Unpack20Encoder {
128    history: Vec<u8>,
129    table: Option<FixedEncodeTable>,
130    options: EncodeOptions,
131}
132
133impl Unpack20Encoder {
134    pub fn new() -> Self {
135        Self::default()
136    }
137
138    pub fn with_options(options: EncodeOptions) -> Self {
139        Self {
140            history: Vec::new(),
141            table: None,
142            options,
143        }
144    }
145
146    pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
147        if input.is_empty() {
148            return Ok(Vec::new());
149        }
150        let table = match self.table {
151            Some(table) => table,
152            None => {
153                let table = FixedEncodeTable::new()?;
154                self.table = Some(table);
155                table
156            }
157        };
158        let packed = encode_member(input, &self.history, Some(table), self.options)?;
159        self.remember(input);
160        Ok(packed)
161    }
162
163    fn remember(&mut self, input: &[u8]) {
164        self.history.extend_from_slice(input);
165        let keep_from = self
166            .history
167            .len()
168            .saturating_sub(self.options.max_match_distance);
169        if keep_from != 0 {
170            self.history.drain(..keep_from);
171        }
172    }
173}
174
175fn encode_member(
176    input: &[u8],
177    history: &[u8],
178    fixed_table: Option<FixedEncodeTable>,
179    options: EncodeOptions,
180) -> Result<Vec<u8>> {
181    if input.is_empty() {
182        return Ok(Vec::new());
183    }
184
185    let tokens = encode_tokens(input, history, options, None);
186    let table_lengths = table_lengths_for_tokens(&tokens, fixed_table)?;
187    let packed = encode_member_with_tables(&tokens, history, fixed_table, &table_lengths)?;
188    if fixed_table.is_some() {
189        return Ok(packed);
190    }
191
192    let cost_model = CostModel::new(&table_lengths);
193    let refined_tokens = encode_tokens(input, history, options, Some(&cost_model));
194    if refined_tokens == tokens {
195        return Ok(packed);
196    }
197    let refined_table_lengths = table_lengths_for_tokens(&refined_tokens, fixed_table)?;
198    let refined_packed = encode_member_with_tables(
199        &refined_tokens,
200        history,
201        fixed_table,
202        &refined_table_lengths,
203    )?;
204    if refined_packed.len() < packed.len() {
205        Ok(refined_packed)
206    } else {
207        Ok(packed)
208    }
209}
210
211fn table_lengths_for_tokens(
212    tokens: &[EncodeToken],
213    fixed_table: Option<FixedEncodeTable>,
214) -> Result<[u8; TABLE_COUNT]> {
215    let mut main_frequencies = [0usize; MAIN_COUNT];
216    let mut offset_frequencies = [0usize; OFFSET_COUNT];
217    let mut length_frequencies = [0usize; LENGTH_COUNT];
218    for token in tokens {
219        match *token {
220            EncodeToken::Literal(byte) => main_frequencies[byte as usize] += 1,
221            EncodeToken::RepeatLast => main_frequencies[256] += 1,
222            EncodeToken::OldOffset {
223                index,
224                length,
225                offset,
226            } => {
227                main_frequencies[257 + index] += 1;
228                let (slot, _) = old_length_slot_for_match(length, offset)?;
229                length_frequencies[slot] += 1;
230            }
231            EncodeToken::ShortOffset { offset } => {
232                let (slot, _) = short_slot_for_match(offset)?;
233                main_frequencies[261 + slot] += 1;
234            }
235            EncodeToken::Match { length, offset } => {
236                let encoded_length = length.checked_sub(match_length_adjustment(offset)).ok_or(
237                    Error::InvalidData("RAR 2.0 adjusted match length underflows"),
238                )?;
239                let (slot, _) = length_slot_for_match(encoded_length)?;
240                main_frequencies[270 + slot] += 1;
241                let (offset_slot, _) = offset_slot_for_match(offset)?;
242                offset_frequencies[offset_slot] += 1;
243            }
244        }
245    }
246    let mut table_lengths = [0u8; TABLE_COUNT];
247    let literal_len = if let Some(table) = fixed_table {
248        table.length
249    } else {
250        let main_symbol_count = main_frequencies
251            .iter()
252            .filter(|&&frequency| frequency != 0)
253            .count()
254            + offset_frequencies
255                .iter()
256                .filter(|&&frequency| frequency != 0)
257                .count()
258            + length_frequencies
259                .iter()
260                .filter(|&&frequency| frequency != 0)
261                .count();
262        literal_code_len(main_symbol_count)?
263    };
264
265    if fixed_table.is_some() {
266        for len in &mut table_lengths[..256] {
267            *len = literal_len;
268        }
269        table_lengths[256] = literal_len;
270        for len in &mut table_lengths[270..270 + LENGTH_COUNT] {
271            *len = literal_len;
272        }
273        for len in &mut table_lengths[257..269] {
274            *len = literal_len;
275        }
276        for len in &mut table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT] {
277            *len = literal_len;
278        }
279        for len in &mut table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT] {
280            *len = literal_len;
281        }
282    } else {
283        table_lengths[..MAIN_COUNT]
284            .copy_from_slice(&validated_lengths_for_frequencies(&main_frequencies, 15));
285        table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT]
286            .copy_from_slice(&validated_lengths_for_frequencies(&offset_frequencies, 15));
287        table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT]
288            .copy_from_slice(&validated_lengths_for_frequencies(&length_frequencies, 15));
289    }
290    Ok(table_lengths)
291}
292
293fn encode_member_with_tables(
294    tokens: &[EncodeToken],
295    history: &[u8],
296    fixed_table: Option<FixedEncodeTable>,
297    table_lengths: &[u8; TABLE_COUNT],
298) -> Result<Vec<u8>> {
299    let level_tokens = encode_table_level_tokens(table_lengths);
300    let level_lengths = level_code_lengths_for_tokens(&level_tokens);
301    let level_codes = canonical_codes(&level_lengths)?;
302    let main_codes = canonical_codes(&table_lengths[..MAIN_COUNT])?;
303
304    let mut bits = BitWriter::default();
305    if fixed_table.is_none() || history.is_empty() {
306        bits.write_bits(0, 2); // LZ block, do not keep previous tables.
307        for &len in &level_lengths {
308            bits.write_bits(len as u32, 4);
309        }
310        for token in level_tokens {
311            let code = level_codes[token.symbol].ok_or(Error::InvalidData(
312                "RAR 2.0 encoder missing level Huffman code",
313            ))?;
314            bits.write_bits(code.code as u32, code.len);
315            if token.extra_bits != 0 {
316                bits.write_bits(token.extra_value as u32, token.extra_bits);
317            }
318        }
319    }
320    let offset_codes = canonical_codes(&table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
321    let length_codes = canonical_codes(&table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT])?;
322    for token in tokens {
323        match *token {
324            EncodeToken::Literal(byte) => {
325                let code = main_codes[byte as usize].ok_or(Error::InvalidData(
326                    "RAR 2.0 encoder missing literal Huffman code",
327                ))?;
328                bits.write_bits(code.code as u32, code.len);
329            }
330            EncodeToken::RepeatLast => {
331                let code = main_codes[256].ok_or(Error::InvalidData(
332                    "RAR 2.0 encoder missing repeat-last Huffman code",
333                ))?;
334                bits.write_bits(code.code as u32, code.len);
335            }
336            EncodeToken::OldOffset {
337                index,
338                length,
339                offset,
340            } => {
341                let code = main_codes[257 + index].ok_or(Error::InvalidData(
342                    "RAR 2.0 encoder missing old-offset Huffman code",
343                ))?;
344                bits.write_bits(code.code as u32, code.len);
345                let (slot, extra) = old_length_slot_for_match(length, offset)?;
346                let length_code = length_codes[slot].ok_or(Error::InvalidData(
347                    "RAR 2.0 encoder missing old-offset length Huffman code",
348                ))?;
349                bits.write_bits(length_code.code as u32, length_code.len);
350                if LENGTH_BITS[slot] != 0 {
351                    bits.write_bits(extra as u32, LENGTH_BITS[slot]);
352                }
353            }
354            EncodeToken::ShortOffset { offset } => {
355                let (slot, extra) = short_slot_for_match(offset)?;
356                let code = main_codes[261 + slot].ok_or(Error::InvalidData(
357                    "RAR 2.0 encoder missing short-offset Huffman code",
358                ))?;
359                bits.write_bits(code.code as u32, code.len);
360                if SHORT_BITS[slot] != 0 {
361                    bits.write_bits(extra as u32, SHORT_BITS[slot]);
362                }
363            }
364            EncodeToken::Match { length, offset } => {
365                let encoded_length = length.checked_sub(match_length_adjustment(offset)).ok_or(
366                    Error::InvalidData("RAR 2.0 adjusted match length underflows"),
367                )?;
368                let (slot, extra) = length_slot_for_match(encoded_length)?;
369                let code = main_codes[270 + slot].ok_or(Error::InvalidData(
370                    "RAR 2.0 encoder missing match Huffman code",
371                ))?;
372                bits.write_bits(code.code as u32, code.len);
373                if LENGTH_BITS[slot] != 0 {
374                    bits.write_bits(extra as u32, LENGTH_BITS[slot]);
375                }
376                let (offset_slot, offset_extra) = offset_slot_for_match(offset)?;
377                let offset = offset_codes[offset_slot].ok_or(Error::InvalidData(
378                    "RAR 2.0 encoder missing offset Huffman code",
379                ))?;
380                bits.write_bits(offset.code as u32, offset.len);
381                if OFFSET_BITS[offset_slot] != 0 {
382                    bits.write_bits(offset_extra as u32, OFFSET_BITS[offset_slot]);
383                }
384            }
385        }
386    }
387    Ok(bits.finish())
388}
389
390#[derive(Debug, Clone, Copy)]
391struct FixedEncodeTable {
392    length: u8,
393}
394
395impl FixedEncodeTable {
396    fn new() -> Result<Self> {
397        Ok(Self {
398            length: literal_code_len(256 + LENGTH_COUNT + OFFSET_COUNT)?,
399        })
400    }
401}
402
403#[derive(Debug, Clone, Copy, PartialEq, Eq)]
404enum EncodeToken {
405    Literal(u8),
406    RepeatLast,
407    OldOffset {
408        index: usize,
409        length: usize,
410        offset: usize,
411    },
412    ShortOffset {
413        offset: usize,
414    },
415    Match {
416        length: usize,
417        offset: usize,
418    },
419}
420
421fn encode_tokens(
422    input: &[u8],
423    history: &[u8],
424    options: EncodeOptions,
425    cost_model: Option<&CostModel>,
426) -> Vec<EncodeToken> {
427    let mut tokens = Vec::new();
428    let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
429    let history = &history[history.len().saturating_sub(options.max_match_distance)..];
430    let mut combined = Vec::with_capacity(history.len() + input.len());
431    combined.extend_from_slice(history);
432    combined.extend_from_slice(input);
433    for history_pos in 0..history.len().saturating_sub(2) {
434        insert_match_position(&combined, history_pos, &mut buckets);
435    }
436
437    let mut pos = history.len();
438    let end = combined.len();
439    let mut last_match = None;
440    let mut old_offsets = [0usize; 4];
441    while pos < end {
442        let selected = select_match(
443            &combined,
444            pos,
445            end,
446            &buckets,
447            options,
448            &old_offsets,
449            cost_model,
450        );
451        if let Some(selected) = selected {
452            let lazy = LazyMatchContext {
453                input: &combined,
454                end,
455                buckets: &buckets,
456                options,
457                old_offsets: &old_offsets,
458                cost_model,
459            };
460            if should_lazy_emit_literal(pos, selected, lazy) {
461                tokens.push(EncodeToken::Literal(combined[pos]));
462                insert_match_position(&combined, pos, &mut buckets);
463                pos += 1;
464                continue;
465            }
466            let (length, offset) = match selected {
467                SelectedMatch::Fresh { length, offset } => {
468                    if last_match == Some((length, offset)) {
469                        tokens.push(EncodeToken::RepeatLast);
470                    } else {
471                        tokens.push(EncodeToken::Match { length, offset });
472                        last_match = Some((length, offset));
473                    }
474                    (length, offset)
475                }
476                SelectedMatch::OldOffset {
477                    index,
478                    length,
479                    offset,
480                } => {
481                    if last_match == Some((length, offset)) {
482                        tokens.push(EncodeToken::RepeatLast);
483                    } else {
484                        tokens.push(EncodeToken::OldOffset {
485                            index,
486                            length,
487                            offset,
488                        });
489                        last_match = Some((length, offset));
490                    }
491                    (length, offset)
492                }
493                SelectedMatch::ShortOffset { offset } => {
494                    let length = 2;
495                    tokens.push(EncodeToken::ShortOffset { offset });
496                    last_match = Some((length, offset));
497                    (length, offset)
498                }
499            };
500            push_old_offset(&mut old_offsets, offset);
501            for history_pos in pos..pos + length {
502                insert_match_position(&combined, history_pos, &mut buckets);
503            }
504            pos += length;
505        } else {
506            tokens.push(EncodeToken::Literal(combined[pos]));
507            insert_match_position(&combined, pos, &mut buckets);
508            pos += 1;
509        }
510    }
511    tokens
512}
513
514#[derive(Debug, Clone, Copy)]
515struct CostModel<'a> {
516    main: &'a [u8],
517    offsets: &'a [u8],
518    lengths: &'a [u8],
519}
520
521impl<'a> CostModel<'a> {
522    fn new(table_lengths: &'a [u8; TABLE_COUNT]) -> Self {
523        Self {
524            main: &table_lengths[..MAIN_COUNT],
525            offsets: &table_lengths[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT],
526            lengths: &table_lengths[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT],
527        }
528    }
529
530    fn selected_cost(self, selected: SelectedMatch) -> Option<usize> {
531        match selected {
532            SelectedMatch::Fresh { length, offset } => {
533                let encoded_length = length.checked_sub(match_length_adjustment(offset))?;
534                let (length_slot, _) = length_slot_for_match(encoded_length).ok()?;
535                let (offset_slot, _) = offset_slot_for_match(offset).ok()?;
536                Some(
537                    usize::from(self.main[270 + length_slot])
538                        + usize::from(LENGTH_BITS[length_slot])
539                        + usize::from(self.offsets[offset_slot])
540                        + usize::from(OFFSET_BITS[offset_slot]),
541                )
542            }
543            SelectedMatch::OldOffset {
544                index,
545                length,
546                offset,
547            } => {
548                let (length_slot, _) = old_length_slot_for_match(length, offset).ok()?;
549                Some(
550                    usize::from(self.main[257 + index])
551                        + usize::from(self.lengths[length_slot])
552                        + usize::from(LENGTH_BITS[length_slot]),
553                )
554            }
555            SelectedMatch::ShortOffset { offset } => {
556                let (slot, _) = short_slot_for_match(offset).ok()?;
557                Some(usize::from(self.main[261 + slot]) + usize::from(SHORT_BITS[slot]))
558            }
559        }
560    }
561
562    fn selected_score(self, selected: SelectedMatch) -> Option<isize> {
563        let cost = self.selected_cost(selected)?;
564        Some(selected.length() as isize * 8 - cost as isize)
565    }
566}
567
568#[derive(Debug, Clone, Copy)]
569enum SelectedMatch {
570    Fresh {
571        length: usize,
572        offset: usize,
573    },
574    OldOffset {
575        index: usize,
576        length: usize,
577        offset: usize,
578    },
579    ShortOffset {
580        offset: usize,
581    },
582}
583
584impl SelectedMatch {
585    fn length(self) -> usize {
586        match self {
587            SelectedMatch::Fresh { length, .. } | SelectedMatch::OldOffset { length, .. } => length,
588            SelectedMatch::ShortOffset { .. } => 2,
589        }
590    }
591
592    fn score(self) -> isize {
593        let length_score = self.length() as isize * 8;
594        let cost = match self {
595            SelectedMatch::OldOffset { .. } | SelectedMatch::ShortOffset { .. } => 4,
596            SelectedMatch::Fresh { offset, .. } => 8 + OFFSET_BITS[offset_slot_index(offset)],
597        };
598        length_score - isize::from(cost)
599    }
600}
601
602fn select_match(
603    input: &[u8],
604    pos: usize,
605    end: usize,
606    buckets: &[Vec<usize>],
607    options: EncodeOptions,
608    old_offsets: &[usize; 4],
609    cost_model: Option<&CostModel<'_>>,
610) -> Option<SelectedMatch> {
611    let fresh = best_match(input, pos, end, buckets, options, cost_model)
612        .map(|(length, offset)| SelectedMatch::Fresh { length, offset });
613    let old = best_old_offset_match(input, pos, end, old_offsets, cost_model).map(
614        |(index, length, offset)| SelectedMatch::OldOffset {
615            index,
616            length,
617            offset,
618        },
619    );
620    if let Some(cost_model) = cost_model {
621        return [fresh, old, best_short_offset_match(input, pos, end)]
622            .into_iter()
623            .flatten()
624            .max_by_key(|&selected| {
625                (
626                    cost_model.selected_score(selected).unwrap_or(isize::MIN),
627                    selected.length(),
628                )
629            });
630    }
631
632    let fresh = fresh.and_then(|selected| match selected {
633        SelectedMatch::Fresh { length, offset } => Some((length, offset)),
634        _ => None,
635    });
636    let old = old.and_then(|selected| match selected {
637        SelectedMatch::OldOffset {
638            index,
639            length,
640            offset,
641        } => Some((index, length, offset)),
642        _ => None,
643    });
644    match (fresh, old) {
645        (Some((fresh_length, _)), Some((index, old_length, old_offset)))
646            if old_length + 1 >= fresh_length =>
647        {
648            Some(SelectedMatch::OldOffset {
649                index,
650                length: old_length,
651                offset: old_offset,
652            })
653        }
654        (Some((length, offset)), _) => Some(SelectedMatch::Fresh { length, offset }),
655        (None, Some((index, length, offset))) => Some(SelectedMatch::OldOffset {
656            index,
657            length,
658            offset,
659        }),
660        (None, None) => best_short_offset_match(input, pos, end),
661    }
662}
663
664struct LazyMatchContext<'a> {
665    input: &'a [u8],
666    end: usize,
667    buckets: &'a [Vec<usize>],
668    options: EncodeOptions,
669    old_offsets: &'a [usize; 4],
670    cost_model: Option<&'a CostModel<'a>>,
671}
672
673fn should_lazy_emit_literal(
674    pos: usize,
675    current: SelectedMatch,
676    context: LazyMatchContext<'_>,
677) -> bool {
678    if !context.options.lazy_matching || pos + 1 >= context.end {
679        return false;
680    }
681    let lookahead = context.options.lazy_lookahead.max(1);
682    (1..=lookahead)
683        .take_while(|offset| pos + offset < context.end)
684        .any(|offset| {
685            select_match(
686                context.input,
687                pos + offset,
688                context.end,
689                context.buckets,
690                context.options,
691                context.old_offsets,
692                context.cost_model,
693            )
694            .is_some_and(|next| {
695                let current_score = context
696                    .cost_model
697                    .and_then(|cost_model| cost_model.selected_score(current))
698                    .unwrap_or_else(|| current.score());
699                let next_score = context
700                    .cost_model
701                    .and_then(|cost_model| cost_model.selected_score(next))
702                    .unwrap_or_else(|| next.score());
703                let skipped_literal_score = offset as isize * 8;
704                next_score > current_score + skipped_literal_score
705            })
706        })
707}
708
709fn best_match(
710    input: &[u8],
711    pos: usize,
712    end: usize,
713    buckets: &[Vec<usize>],
714    options: EncodeOptions,
715    cost_model: Option<&CostModel<'_>>,
716) -> Option<(usize, usize)> {
717    let max_offset = pos.min(options.max_match_distance);
718    let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
719    if options.max_match_candidates == 0
720        || max_offset == 0
721        || max_length < 3
722        || pos + 2 >= input.len()
723    {
724        return None;
725    }
726    let bucket = &buckets[match_hash(input, pos)];
727    let mut best = None;
728    let mut checked = 0usize;
729    for &candidate in bucket.iter().rev() {
730        if candidate >= pos {
731            continue;
732        }
733        let offset = pos - candidate;
734        if offset > max_offset {
735            break;
736        }
737        checked += 1;
738        let mut length = 0usize;
739        while length < max_length && input[pos + length] == input[pos + length - offset] {
740            length += 1;
741        }
742        let encodable = length >= 3 + match_length_adjustment(offset);
743        if encodable && is_better_fresh_match(cost_model, length, offset, best) {
744            best = Some((length, offset));
745            if length == max_length {
746                break;
747            }
748        }
749        if checked >= options.max_match_candidates {
750            break;
751        }
752    }
753    best
754}
755
756fn offset_slot_index(offset: usize) -> usize {
757    offset_slot_for_match(offset)
758        .map(|(slot, _)| slot)
759        .unwrap_or(OFFSET_BITS.len() - 1)
760}
761
762fn is_better_fresh_match(
763    cost_model: Option<&CostModel<'_>>,
764    length: usize,
765    offset: usize,
766    best: Option<(usize, usize)>,
767) -> bool {
768    let Some((best_length, best_offset)) = best else {
769        return true;
770    };
771    if let Some(cost_model) = cost_model {
772        let candidate = SelectedMatch::Fresh { length, offset };
773        let best = SelectedMatch::Fresh {
774            length: best_length,
775            offset: best_offset,
776        };
777        let candidate_score = cost_model.selected_score(candidate).unwrap_or(isize::MIN);
778        let best_score = cost_model.selected_score(best).unwrap_or(isize::MIN);
779        return candidate_score > best_score
780            || (candidate_score == best_score
781                && (length > best_length || (length == best_length && offset < best_offset)));
782    }
783    length > best_length || (length == best_length && offset < best_offset)
784}
785
786fn best_old_offset_match(
787    input: &[u8],
788    pos: usize,
789    end: usize,
790    old_offsets: &[usize; 4],
791    cost_model: Option<&CostModel<'_>>,
792) -> Option<(usize, usize, usize)> {
793    let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
794    let mut best = None;
795    for (index, &offset) in old_offsets.iter().enumerate() {
796        if offset == 0 || offset > pos {
797            continue;
798        }
799        let length = match_length_at_offset(input, pos, max_length, offset);
800        if old_length_slot_for_match(length, offset).is_ok()
801            && is_better_old_offset_match(cost_model, index, length, offset, best)
802        {
803            best = Some((index, length, offset));
804        }
805    }
806    best
807}
808
809fn is_better_old_offset_match(
810    cost_model: Option<&CostModel<'_>>,
811    index: usize,
812    length: usize,
813    offset: usize,
814    best: Option<(usize, usize, usize)>,
815) -> bool {
816    let Some((best_index, best_length, best_offset)) = best else {
817        return true;
818    };
819    if let Some(cost_model) = cost_model {
820        let candidate = SelectedMatch::OldOffset {
821            index,
822            length,
823            offset,
824        };
825        let best = SelectedMatch::OldOffset {
826            index: best_index,
827            length: best_length,
828            offset: best_offset,
829        };
830        let candidate_score = cost_model.selected_score(candidate).unwrap_or(isize::MIN);
831        let best_score = cost_model.selected_score(best).unwrap_or(isize::MIN);
832        return candidate_score > best_score
833            || (candidate_score == best_score
834                && (length > best_length || (length == best_length && offset < best_offset)));
835    }
836    length > best_length || (length == best_length && offset < best_offset)
837}
838
839fn best_short_offset_match(input: &[u8], pos: usize, end: usize) -> Option<SelectedMatch> {
840    if end - pos < 2 {
841        return None;
842    }
843    let max_offset = pos.min(256);
844    (1..=max_offset)
845        .find(|&offset| {
846            input[pos] == input[pos - offset] && input[pos + 1] == input[pos + 1 - offset]
847        })
848        .map(|offset| SelectedMatch::ShortOffset { offset })
849}
850
851fn match_length_at_offset(input: &[u8], pos: usize, max_length: usize, offset: usize) -> usize {
852    let mut length = 0usize;
853    while length < max_length && input[pos + length] == input[pos + length - offset] {
854        length += 1;
855    }
856    length
857}
858
859fn match_length_adjustment(offset: usize) -> usize {
860    usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
861}
862
863fn old_length_adjustment(offset: usize) -> usize {
864    usize::from(offset >= 0x101) + usize::from(offset >= 0x2000) + usize::from(offset >= 0x40000)
865}
866
867fn push_old_offset(old_offsets: &mut [usize; 4], offset: usize) {
868    old_offsets[3] = old_offsets[2];
869    old_offsets[2] = old_offsets[1];
870    old_offsets[1] = old_offsets[0];
871    old_offsets[0] = offset;
872}
873
874fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
875    if pos + 2 < input.len() {
876        buckets[match_hash(input, pos)].push(pos);
877    }
878}
879
880fn match_hash(input: &[u8], pos: usize) -> usize {
881    let value =
882        ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
883    value & (MATCH_HASH_BUCKETS - 1)
884}
885
886fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
887    if length < 3 {
888        return Err(Error::InvalidData("RAR 2.0 match length is too short"));
889    }
890    let adjusted = length - 3;
891    for (slot, &base) in LENGTH_BASES.iter().enumerate() {
892        let extra_bits = LENGTH_BITS[slot];
893        let max = base
894            + if extra_bits == 0 {
895                0
896            } else {
897                (1usize << extra_bits) - 1
898            };
899        if adjusted >= base && adjusted <= max {
900            return Ok((slot, adjusted - base));
901        }
902    }
903    Err(Error::InvalidData("RAR 2.0 match length is too long"))
904}
905
906fn old_length_slot_for_match(length: usize, offset: usize) -> Result<(usize, usize)> {
907    let encoded = length
908        .checked_sub(old_length_adjustment(offset))
909        .ok_or(Error::InvalidData(
910            "RAR 2.0 adjusted old-offset length underflows",
911        ))?;
912    if encoded < 2 {
913        return Err(Error::InvalidData(
914            "RAR 2.0 old-offset match length is too short",
915        ));
916    }
917    let adjusted = encoded - 2;
918    for (slot, &base) in LENGTH_BASES.iter().enumerate() {
919        let extra_bits = LENGTH_BITS[slot];
920        let max = base
921            + if extra_bits == 0 {
922                0
923            } else {
924                (1usize << extra_bits) - 1
925            };
926        if adjusted >= base && adjusted <= max {
927            return Ok((slot, adjusted - base));
928        }
929    }
930    Err(Error::InvalidData(
931        "RAR 2.0 old-offset match length is too long",
932    ))
933}
934
935fn offset_slot_for_match(offset: usize) -> Result<(usize, usize)> {
936    if offset == 0 {
937        return Err(Error::InvalidData("RAR 2.0 match offset is zero"));
938    }
939    let adjusted = offset - 1;
940    for (slot, &base) in OFFSET_BASES.iter().enumerate() {
941        let extra_bits = OFFSET_BITS[slot];
942        let max = base
943            + if extra_bits == 0 {
944                0
945            } else {
946                (1usize << extra_bits) - 1
947            };
948        if adjusted >= base && adjusted <= max {
949            return Ok((slot, adjusted - base));
950        }
951    }
952    Err(Error::InvalidData("RAR 2.0 match offset is too large"))
953}
954
955fn short_slot_for_match(offset: usize) -> Result<(usize, usize)> {
956    if offset == 0 || offset > 256 {
957        return Err(Error::InvalidData(
958            "RAR 2.0 short match offset is out of range",
959        ));
960    }
961    let adjusted = offset - 1;
962    for (slot, &base) in SHORT_BASES.iter().enumerate() {
963        let extra_bits = SHORT_BITS[slot];
964        let max = base
965            + if extra_bits == 0 {
966                0
967            } else {
968                (1usize << extra_bits) - 1
969            };
970        if adjusted >= base && adjusted <= max {
971            return Ok((slot, adjusted - base));
972        }
973    }
974    Err(Error::InvalidData(
975        "RAR 2.0 short match offset is out of range",
976    ))
977}
978
979fn literal_code_len(symbol_count: usize) -> Result<u8> {
980    if symbol_count == 0 {
981        return Err(Error::InvalidData("RAR 2.0 encoder has no literal symbols"));
982    }
983    let len = usize::BITS - (symbol_count - 1).leading_zeros();
984    u8::try_from(len.max(1)).map_err(|_| Error::InvalidData("RAR 2.0 literal table is too large"))
985}
986
987#[derive(Debug, Clone, Copy, PartialEq, Eq)]
988struct LevelToken {
989    symbol: usize,
990    extra_bits: u8,
991    extra_value: u8,
992}
993
994impl LevelToken {
995    const fn plain(symbol: usize) -> Self {
996        Self {
997            symbol,
998            extra_bits: 0,
999            extra_value: 0,
1000        }
1001    }
1002
1003    const fn repeat_previous(count: usize) -> Self {
1004        Self {
1005            symbol: 16,
1006            extra_bits: 2,
1007            extra_value: (count - 3) as u8,
1008        }
1009    }
1010
1011    const fn zero_run_short(count: usize) -> Self {
1012        Self {
1013            symbol: 17,
1014            extra_bits: 3,
1015            extra_value: (count - 3) as u8,
1016        }
1017    }
1018
1019    const fn zero_run_long(count: usize) -> Self {
1020        Self {
1021            symbol: 18,
1022            extra_bits: 7,
1023            extra_value: (count - 11) as u8,
1024        }
1025    }
1026}
1027
1028fn encode_table_level_tokens(lengths: &[u8; TABLE_COUNT]) -> Vec<LevelToken> {
1029    encode_level_tokens(lengths)
1030}
1031
1032fn encode_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
1033    let mut tokens = Vec::new();
1034    let mut pos = 0usize;
1035    let mut previous = None;
1036    while pos < lengths.len() {
1037        let value = lengths[pos];
1038        let mut run = 1usize;
1039        while pos + run < lengths.len() && lengths[pos + run] == value {
1040            run += 1;
1041        }
1042
1043        if value == 0 {
1044            emit_zero_level_run(&mut tokens, run);
1045            previous = Some(0);
1046            pos += run;
1047            continue;
1048        }
1049
1050        if previous == Some(value) && run >= 3 {
1051            let mut remaining = run;
1052            while remaining != 0 {
1053                let chunk = remaining.min(6);
1054                if chunk >= 3 {
1055                    tokens.push(LevelToken::repeat_previous(chunk));
1056                    remaining -= chunk;
1057                } else {
1058                    tokens.extend(std::iter::repeat_n(
1059                        LevelToken::plain(value as usize),
1060                        chunk,
1061                    ));
1062                    remaining = 0;
1063                }
1064            }
1065            pos += run;
1066            continue;
1067        }
1068
1069        tokens.push(LevelToken::plain(value as usize));
1070        previous = Some(value);
1071        pos += 1;
1072    }
1073    tokens
1074}
1075
1076fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
1077    while run != 0 {
1078        if run >= 11 {
1079            let mut chunk = run.min(138);
1080            if matches!(run - chunk, 1 | 2) && chunk >= 14 {
1081                chunk -= 3;
1082            }
1083            tokens.push(LevelToken::zero_run_long(chunk));
1084            run -= chunk;
1085        } else if run >= 3 {
1086            let chunk = run.min(10);
1087            tokens.push(LevelToken::zero_run_short(chunk));
1088            run -= chunk;
1089        } else {
1090            tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
1091            break;
1092        }
1093    }
1094}
1095
1096fn level_code_lengths_for_tokens(tokens: &[LevelToken]) -> [u8; LEVEL_COUNT] {
1097    let mut used = [false; LEVEL_COUNT];
1098    for token in tokens {
1099        used[token.symbol] = true;
1100    }
1101    level_code_lengths_for_used_symbols(used)
1102}
1103
1104fn validated_lengths_for_frequencies<const N: usize>(
1105    frequencies: &[usize; N],
1106    max_bits: u8,
1107) -> [u8; N] {
1108    let mut lengths = [0u8; N];
1109    lengths.copy_from_slice(&huffman::lengths_for_frequencies(frequencies, max_bits));
1110    if canonical_codes(&lengths).is_ok() {
1111        return lengths;
1112    }
1113
1114    lengths.copy_from_slice(&huffman::uniform_lengths_for_frequencies(frequencies));
1115    lengths
1116}
1117
1118fn encode_audio_member(input: &[u8], channels: usize) -> Result<Vec<u8>> {
1119    if channels == 0 || channels > MAX_CHANNELS {
1120        return Err(Error::InvalidData("RAR 2.0 audio channel count is invalid"));
1121    }
1122    let deltas = audio_encode(input, channels)?;
1123    let mut levels = vec![0u8; AUDIO_COUNT * channels];
1124    for channel in 0..channels {
1125        let mut frequencies = [0usize; AUDIO_COUNT];
1126        for index in (channel..deltas.len()).step_by(channels) {
1127            frequencies[deltas[index] as usize] += 1;
1128        }
1129        let channel_lengths = huffman::lengths_for_frequency_array(&frequencies, 15);
1130        for (symbol, len) in channel_lengths.into_iter().enumerate() {
1131            levels[channel * AUDIO_COUNT + symbol] = len;
1132        }
1133    }
1134
1135    let level_symbols = encode_audio_table_level_symbols(&levels);
1136    let level_lengths = level_code_lengths_for_symbols(&level_symbols);
1137    let level_codes = canonical_codes(&level_lengths)?;
1138    let mut bits = BitWriter::default();
1139    bits.write_bits(0b10, 2); // audio block, do not keep previous tables.
1140    bits.write_bits((channels - 1) as u32, 2);
1141    for &len in &level_lengths {
1142        bits.write_bits(len as u32, 4);
1143    }
1144    for symbol in level_symbols {
1145        let code = level_codes[symbol].ok_or(Error::InvalidData(
1146            "RAR 2.0 encoder missing audio-level Huffman code",
1147        ))?;
1148        bits.write_bits(code.code as u32, code.len);
1149        match symbol {
1150            17 => bits.write_bits(0, 3),
1151            18 => bits.write_bits(127, 7),
1152            _ => {}
1153        }
1154    }
1155
1156    for channel in 0..channels {
1157        let table = &levels[channel * AUDIO_COUNT..(channel + 1) * AUDIO_COUNT];
1158        validate_audio_table(table)?;
1159    }
1160    let audio_codes = (0..channels)
1161        .map(|channel| canonical_codes(&levels[channel * AUDIO_COUNT..(channel + 1) * AUDIO_COUNT]))
1162        .collect::<Result<Vec<_>>>()?;
1163    for (index, &delta) in deltas.iter().enumerate() {
1164        let channel = index % channels;
1165        let code = audio_codes[channel][delta as usize].ok_or(Error::InvalidData(
1166            "RAR 2.0 encoder missing audio Huffman code",
1167        ))?;
1168        bits.write_bits(code.code as u32, code.len);
1169    }
1170    Ok(bits.finish())
1171}
1172
1173fn encode_audio_table_level_symbols(levels: &[u8]) -> Vec<usize> {
1174    levels.iter().map(|&len| len as usize).collect()
1175}
1176
1177fn level_code_lengths_for_symbols(symbols: &[usize]) -> [u8; LEVEL_COUNT] {
1178    let mut used = [false; LEVEL_COUNT];
1179    for &symbol in symbols {
1180        used[symbol] = true;
1181    }
1182    level_code_lengths_for_used_symbols(used)
1183}
1184
1185fn level_code_lengths_for_used_symbols(used: [bool; LEVEL_COUNT]) -> [u8; LEVEL_COUNT] {
1186    let used_count = used.iter().filter(|&&used| used).count();
1187    let len = huffman::bits_for_symbol_count(used_count);
1188    let mut lengths = [0u8; LEVEL_COUNT];
1189    for (symbol, is_used) in used.into_iter().enumerate() {
1190        if is_used {
1191            lengths[symbol] = len;
1192        }
1193    }
1194    lengths
1195}
1196
1197fn validate_audio_table(lengths: &[u8]) -> Result<()> {
1198    let mut count = [0u16; 16];
1199    for &len in lengths {
1200        if len > 15 {
1201            return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1202        }
1203        if len != 0 {
1204            count[len as usize] += 1;
1205        }
1206    }
1207    validate_huffman_counts(&count)
1208}
1209
1210fn audio_encode(input: &[u8], channels: usize) -> Result<Vec<u8>> {
1211    if channels == 0 || channels > MAX_CHANNELS {
1212        return Err(Error::InvalidData("RAR 2.0 audio channel count is invalid"));
1213    }
1214    let mut states = [AudioState::default(); MAX_CHANNELS];
1215    let mut channel_delta = 0i32;
1216    let mut deltas = Vec::with_capacity(input.len());
1217    for (index, &byte) in input.iter().enumerate() {
1218        let channel = index % channels;
1219        let state = &mut states[channel];
1220        state.byte_count = state.byte_count.wrapping_add(1);
1221        state.d4 = state.d3;
1222        state.d3 = state.d2;
1223        state.d2 = state.last_delta - state.d1;
1224        state.d1 = state.last_delta;
1225
1226        let predicted = 8 * state.last_char
1227            + state.k[0] * state.d1
1228            + state.k[1] * state.d2
1229            + state.k[2] * state.d3
1230            + state.k[3] * state.d4
1231            + state.k[4] * channel_delta;
1232        let predicted = (predicted >> 3) & 0xff;
1233        let delta = (predicted as u8).wrapping_sub(byte);
1234
1235        let d = (delta as i8 as i32) << 3;
1236        state.dif[0] = state.dif[0].wrapping_add(d.unsigned_abs());
1237        state.dif[1] = state.dif[1].wrapping_add((d - state.d1).unsigned_abs());
1238        state.dif[2] = state.dif[2].wrapping_add((d + state.d1).unsigned_abs());
1239        state.dif[3] = state.dif[3].wrapping_add((d - state.d2).unsigned_abs());
1240        state.dif[4] = state.dif[4].wrapping_add((d + state.d2).unsigned_abs());
1241        state.dif[5] = state.dif[5].wrapping_add((d - state.d3).unsigned_abs());
1242        state.dif[6] = state.dif[6].wrapping_add((d + state.d3).unsigned_abs());
1243        state.dif[7] = state.dif[7].wrapping_add((d - state.d4).unsigned_abs());
1244        state.dif[8] = state.dif[8].wrapping_add((d + state.d4).unsigned_abs());
1245        state.dif[9] = state.dif[9].wrapping_add((d - channel_delta).unsigned_abs());
1246        state.dif[10] = state.dif[10].wrapping_add((d + channel_delta).unsigned_abs());
1247
1248        channel_delta = (byte.wrapping_sub(state.last_char as u8)) as i8 as i32;
1249        state.last_delta = channel_delta;
1250        state.last_char = byte as i32;
1251
1252        if state.byte_count & 0x1f == 0 {
1253            let mut min_dif = state.dif[0];
1254            let mut num_min_dif = 0usize;
1255            state.dif[0] = 0;
1256            for diff_index in 1..state.dif.len() {
1257                if state.dif[diff_index] < min_dif {
1258                    min_dif = state.dif[diff_index];
1259                    num_min_dif = diff_index;
1260                }
1261                state.dif[diff_index] = 0;
1262            }
1263            match num_min_dif {
1264                1 if state.k[0] >= -16 => state.k[0] -= 1,
1265                2 if state.k[0] < 16 => state.k[0] += 1,
1266                3 if state.k[1] >= -16 => state.k[1] -= 1,
1267                4 if state.k[1] < 16 => state.k[1] += 1,
1268                5 if state.k[2] >= -16 => state.k[2] -= 1,
1269                6 if state.k[2] < 16 => state.k[2] += 1,
1270                7 if state.k[3] >= -16 => state.k[3] -= 1,
1271                8 if state.k[3] < 16 => state.k[3] += 1,
1272                9 if state.k[4] >= -16 => state.k[4] -= 1,
1273                10 if state.k[4] < 16 => state.k[4] += 1,
1274                _ => {}
1275            }
1276        }
1277
1278        deltas.push(delta);
1279    }
1280    Ok(deltas)
1281}
1282
1283#[derive(Debug, Clone, Copy)]
1284struct HuffmanCode {
1285    code: u16,
1286    len: u8,
1287}
1288
1289fn canonical_codes(lengths: &[u8]) -> Result<Vec<Option<HuffmanCode>>> {
1290    let mut count = [0u16; 16];
1291    for &len in lengths {
1292        if len > 15 {
1293            return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1294        }
1295        if len != 0 {
1296            count[len as usize] += 1;
1297        }
1298    }
1299    validate_huffman_counts(&count)?;
1300
1301    let mut next_code = [0u16; 16];
1302    let mut code = 0u16;
1303    for len in 1..=15 {
1304        code = (code + count[len - 1]) << 1;
1305        next_code[len] = code;
1306    }
1307
1308    let mut codes = vec![None; lengths.len()];
1309    for (symbol, &len) in lengths.iter().enumerate() {
1310        if len == 0 {
1311            continue;
1312        }
1313        let code = next_code[len as usize];
1314        next_code[len as usize] += 1;
1315        codes[symbol] = Some(HuffmanCode { code, len });
1316    }
1317    Ok(codes)
1318}
1319
1320#[derive(Debug, Clone)]
1321pub struct Unpack20 {
1322    bits: BitReader,
1323    levels: [u8; OLD_LEVEL_COUNT],
1324    main: Huffman,
1325    offsets: Huffman,
1326    lengths: Huffman,
1327    audio_tables: [Huffman; MAX_CHANNELS],
1328    audio_block: bool,
1329    channels: usize,
1330    cur_channel: usize,
1331    audio: [AudioState; MAX_CHANNELS],
1332    channel_delta: i32,
1333    old_offsets: [usize; 4],
1334    last_offset: usize,
1335    last_length: usize,
1336    pending_match: Option<(usize, usize)>,
1337    in_block: bool,
1338    output: Vec<u8>,
1339    base_offset: usize,
1340}
1341
1342impl Unpack20 {
1343    pub fn new() -> Self {
1344        Self {
1345            bits: BitReader::new(),
1346            levels: [0; OLD_LEVEL_COUNT],
1347            main: Huffman::empty(),
1348            offsets: Huffman::empty(),
1349            lengths: Huffman::empty(),
1350            audio_tables: std::array::from_fn(|_| Huffman::empty()),
1351            audio_block: false,
1352            channels: 1,
1353            cur_channel: 0,
1354            audio: [AudioState::default(); MAX_CHANNELS],
1355            channel_delta: 0,
1356            old_offsets: [0; 4],
1357            last_offset: 0,
1358            last_length: 0,
1359            pending_match: None,
1360            in_block: false,
1361            output: Vec::new(),
1362            base_offset: 0,
1363        }
1364    }
1365
1366    pub fn decode_member(&mut self, input: &[u8], output_size: usize) -> Result<Vec<u8>> {
1367        let start = self.current_pos();
1368        let target = start
1369            .checked_add(output_size)
1370            .ok_or(Error::InvalidData("RAR 2.0 output size overflows"))?;
1371        if !input.is_empty() {
1372            self.bits = BitReader::new();
1373        }
1374        self.bits.append(input);
1375        self.decode_until(target).map_err(|error| match error {
1376            Error::NeedMoreInput => Error::InvalidData("RAR 2.0 bitstream is truncated"),
1377            error => error,
1378        })?;
1379        self.read_last_tables()?;
1380        let out = self.raw_range(start, target)?.to_vec();
1381        self.trim_history(target, target);
1382        Ok(out)
1383    }
1384
1385    pub fn decode_member_to(
1386        &mut self,
1387        input: &[u8],
1388        output_size: usize,
1389        out: &mut impl Write,
1390    ) -> Result<()> {
1391        let decoded = self.decode_member(input, output_size)?;
1392        out.write_all(&decoded)
1393            .map_err(|_| Error::InvalidData("RAR 2.0 output write failed"))
1394    }
1395
1396    pub fn decode_member_from_reader(
1397        &mut self,
1398        input: &mut impl Read,
1399        output_size: usize,
1400        out: &mut impl Write,
1401    ) -> Result<()> {
1402        let start = self.current_pos();
1403        let target = start
1404            .checked_add(output_size)
1405            .ok_or(Error::InvalidData("RAR 2.0 output size overflows"))?;
1406        self.bits = BitReader::new();
1407        let mut packed = Vec::new();
1408        input
1409            .read_to_end(&mut packed)
1410            .map_err(|_| Error::InvalidData("RAR 2.0 input read failed"))?;
1411        self.bits.append(&packed);
1412        if !self.in_block && self.bits.remaining_bytes_from_current() > 0 {
1413            self.read_tables().map_err(|error| match error {
1414                Error::NeedMoreInput => Error::InvalidData("RAR 2.0 bitstream is truncated"),
1415                error => error,
1416            })?;
1417            self.in_block = true;
1418        }
1419        self.decode_until(target).map_err(|error| match error {
1420            Error::NeedMoreInput => Error::InvalidData("RAR 2.0 bitstream is truncated"),
1421            error => error,
1422        })?;
1423        self.read_last_tables()?;
1424
1425        let decoded = self.raw_range(start, target)?;
1426        out.write_all(decoded)
1427            .map_err(|_| Error::InvalidData("RAR 2.0 output write failed"))?;
1428        self.trim_history(target, target);
1429        Ok(())
1430    }
1431
1432    fn decode_until(&mut self, target: usize) -> Result<()> {
1433        while self.current_pos() < target {
1434            self.drain_pending_match(target)?;
1435            if self.current_pos() >= target {
1436                break;
1437            }
1438            if !self.in_block {
1439                self.read_tables()?;
1440                self.in_block = true;
1441            }
1442            self.decode_lz(target)?;
1443        }
1444        Ok(())
1445    }
1446
1447    fn read_tables(&mut self) -> Result<()> {
1448        let bit_field = self.bits.peek_bits(16)?;
1449        self.audio_block = bit_field & 0x8000 != 0;
1450        let keep_tables = bit_field & 0x4000 != 0;
1451        self.bits.read_bits(2)?;
1452        if !keep_tables {
1453            self.levels = [0; OLD_LEVEL_COUNT];
1454        }
1455
1456        let table_size = if self.audio_block {
1457            self.channels = ((bit_field >> 12) as usize & 3) + 1;
1458            if self.cur_channel >= self.channels {
1459                self.cur_channel = 0;
1460            }
1461            self.bits.read_bits(2)?;
1462            AUDIO_COUNT * self.channels
1463        } else {
1464            TABLE_COUNT
1465        };
1466
1467        let level_lengths = Self::read_level_lengths(&mut self.bits)?;
1468        let level_decoder = Huffman::from_lengths(&level_lengths)?;
1469        let mut new_levels = [0u8; OLD_LEVEL_COUNT];
1470        let mut pos = 0usize;
1471        while pos < table_size {
1472            let symbol = level_decoder.decode(&mut self.bits)?;
1473            match symbol {
1474                0..=15 => {
1475                    new_levels[pos] = (self.levels[pos].wrapping_add(symbol as u8)) & 0x0f;
1476                    pos += 1;
1477                }
1478                16 => {
1479                    if pos == 0 {
1480                        return Err(Error::InvalidData("RAR 2.0 table repeat at start"));
1481                    }
1482                    let count = 3 + self.bits.read_bits(2)? as usize;
1483                    let value = new_levels[pos - 1];
1484                    fill_levels(&mut new_levels, &mut pos, count, value)?;
1485                }
1486                17 => {
1487                    let count = 3 + self.bits.read_bits(3)? as usize;
1488                    fill_levels(&mut new_levels, &mut pos, count, 0)?;
1489                }
1490                18 => {
1491                    let count = 11 + self.bits.read_bits(7)? as usize;
1492                    fill_levels(&mut new_levels, &mut pos, count, 0)?;
1493                }
1494                _ => return Err(Error::InvalidData("RAR 2.0 invalid level symbol")),
1495            }
1496        }
1497
1498        self.levels = new_levels;
1499        if self.audio_block {
1500            for channel in 0..self.channels {
1501                let start = channel * AUDIO_COUNT;
1502                self.audio_tables[channel] =
1503                    Huffman::from_lengths(&self.levels[start..start + AUDIO_COUNT])?;
1504            }
1505        } else {
1506            self.main = Huffman::from_lengths(&self.levels[..MAIN_COUNT])?;
1507            self.offsets =
1508                Huffman::from_lengths(&self.levels[MAIN_COUNT..MAIN_COUNT + OFFSET_COUNT])?;
1509            self.lengths =
1510                Huffman::from_lengths(&self.levels[MAIN_COUNT + OFFSET_COUNT..TABLE_COUNT])?;
1511        }
1512        Ok(())
1513    }
1514
1515    fn read_level_lengths(bits: &mut BitReader) -> Result<[u8; LEVEL_COUNT]> {
1516        let mut lengths = [0u8; LEVEL_COUNT];
1517        for length in &mut lengths {
1518            *length = bits.read_bits(4)? as u8;
1519        }
1520        Ok(lengths)
1521    }
1522
1523    fn decode_lz(&mut self, output_size: usize) -> Result<()> {
1524        while self.current_pos() < output_size {
1525            if self.audio_block {
1526                self.decode_audio_byte()?;
1527                if !self.in_block {
1528                    return Ok(());
1529                }
1530                continue;
1531            }
1532            let symbol = self.main.decode(&mut self.bits)?;
1533            match symbol {
1534                0..=255 => self.output.push(symbol as u8),
1535                256 => {
1536                    if self.last_length != 0 {
1537                        let length = self.last_length;
1538                        let offset = self.last_offset;
1539                        self.push_old_offset(offset);
1540                        self.copy_match(length, offset, output_size)?;
1541                    }
1542                }
1543                257..=260 => {
1544                    let index = symbol - 257;
1545                    let offset = self.old_offsets[index];
1546                    let length_slot = self.lengths.decode(&mut self.bits)?;
1547                    if length_slot >= LENGTH_COUNT {
1548                        return Err(Error::InvalidData("RAR 2.0 invalid repeat length slot"));
1549                    }
1550                    let mut length = LENGTH_BASES[length_slot] + 2;
1551                    if LENGTH_BITS[length_slot] != 0 {
1552                        length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
1553                    }
1554                    if offset >= 0x101 {
1555                        length += 1;
1556                    }
1557                    if offset >= 0x2000 {
1558                        length += 1;
1559                    }
1560                    if offset >= 0x40000 {
1561                        length += 1;
1562                    }
1563                    self.push_old_offset(offset);
1564                    self.last_offset = offset;
1565                    self.last_length = length;
1566                    self.copy_match(length, offset, output_size)?;
1567                }
1568                261..=268 => {
1569                    let index = symbol - 261;
1570                    let mut offset = SHORT_BASES[index] + 1;
1571                    if SHORT_BITS[index] != 0 {
1572                        offset += self.bits.read_bits(SHORT_BITS[index])? as usize;
1573                    }
1574                    self.push_old_offset(offset);
1575                    self.last_offset = offset;
1576                    self.last_length = 2;
1577                    self.copy_match(2, offset, output_size)?;
1578                }
1579                269 => {
1580                    self.in_block = false;
1581                    return Ok(());
1582                }
1583                270..=297 => {
1584                    let length_slot = symbol - 270;
1585                    let mut length = LENGTH_BASES[length_slot] + 3;
1586                    if LENGTH_BITS[length_slot] != 0 {
1587                        length += self.bits.read_bits(LENGTH_BITS[length_slot])? as usize;
1588                    }
1589                    let offset = self.read_offset()?;
1590                    if offset >= 0x2000 {
1591                        length += 1;
1592                    }
1593                    if offset >= 0x40000 {
1594                        length += 1;
1595                    }
1596                    self.push_old_offset(offset);
1597                    self.last_offset = offset;
1598                    self.last_length = length;
1599                    self.copy_match(length, offset, output_size)?;
1600                }
1601                _ => return Err(Error::InvalidData("RAR 2.0 invalid main symbol")),
1602            }
1603        }
1604        Ok(())
1605    }
1606
1607    fn decode_audio_byte(&mut self) -> Result<()> {
1608        let symbol = self.audio_tables[self.cur_channel].decode(&mut self.bits)?;
1609        if symbol == 256 {
1610            self.in_block = false;
1611            return Ok(());
1612        }
1613        if symbol > 256 {
1614            return Err(Error::InvalidData("RAR 2.0 invalid audio symbol"));
1615        }
1616        let byte = self.decode_audio(symbol as u8);
1617        self.output.push(byte);
1618        self.cur_channel += 1;
1619        if self.cur_channel == self.channels {
1620            self.cur_channel = 0;
1621        }
1622        Ok(())
1623    }
1624
1625    fn decode_audio(&mut self, delta: u8) -> u8 {
1626        let state = &mut self.audio[self.cur_channel];
1627        state.byte_count = state.byte_count.wrapping_add(1);
1628        state.d4 = state.d3;
1629        state.d3 = state.d2;
1630        state.d2 = state.last_delta - state.d1;
1631        state.d1 = state.last_delta;
1632
1633        let predicted = 8 * state.last_char
1634            + state.k[0] * state.d1
1635            + state.k[1] * state.d2
1636            + state.k[2] * state.d3
1637            + state.k[3] * state.d4
1638            + state.k[4] * self.channel_delta;
1639        let predicted = (predicted >> 3) & 0xff;
1640        let byte = predicted.wrapping_sub(delta as i32) as u8;
1641
1642        let d = (delta as i8 as i32) << 3;
1643        state.dif[0] = state.dif[0].wrapping_add(d.unsigned_abs());
1644        state.dif[1] = state.dif[1].wrapping_add((d - state.d1).unsigned_abs());
1645        state.dif[2] = state.dif[2].wrapping_add((d + state.d1).unsigned_abs());
1646        state.dif[3] = state.dif[3].wrapping_add((d - state.d2).unsigned_abs());
1647        state.dif[4] = state.dif[4].wrapping_add((d + state.d2).unsigned_abs());
1648        state.dif[5] = state.dif[5].wrapping_add((d - state.d3).unsigned_abs());
1649        state.dif[6] = state.dif[6].wrapping_add((d + state.d3).unsigned_abs());
1650        state.dif[7] = state.dif[7].wrapping_add((d - state.d4).unsigned_abs());
1651        state.dif[8] = state.dif[8].wrapping_add((d + state.d4).unsigned_abs());
1652        state.dif[9] = state.dif[9].wrapping_add((d - self.channel_delta).unsigned_abs());
1653        state.dif[10] = state.dif[10].wrapping_add((d + self.channel_delta).unsigned_abs());
1654
1655        self.channel_delta = (byte.wrapping_sub(state.last_char as u8)) as i8 as i32;
1656        state.last_delta = self.channel_delta;
1657        state.last_char = byte as i32;
1658
1659        if state.byte_count & 0x1f == 0 {
1660            let mut min_dif = state.dif[0];
1661            let mut num_min_dif = 0usize;
1662            state.dif[0] = 0;
1663            for index in 1..state.dif.len() {
1664                if state.dif[index] < min_dif {
1665                    min_dif = state.dif[index];
1666                    num_min_dif = index;
1667                }
1668                state.dif[index] = 0;
1669            }
1670            match num_min_dif {
1671                1 if state.k[0] >= -16 => state.k[0] -= 1,
1672                2 if state.k[0] < 16 => state.k[0] += 1,
1673                3 if state.k[1] >= -16 => state.k[1] -= 1,
1674                4 if state.k[1] < 16 => state.k[1] += 1,
1675                5 if state.k[2] >= -16 => state.k[2] -= 1,
1676                6 if state.k[2] < 16 => state.k[2] += 1,
1677                7 if state.k[3] >= -16 => state.k[3] -= 1,
1678                8 if state.k[3] < 16 => state.k[3] += 1,
1679                9 if state.k[4] >= -16 => state.k[4] -= 1,
1680                10 if state.k[4] < 16 => state.k[4] += 1,
1681                _ => {}
1682            }
1683        }
1684
1685        byte
1686    }
1687
1688    fn read_offset(&mut self) -> Result<usize> {
1689        let slot = self.offsets.decode(&mut self.bits)?;
1690        if slot >= OFFSET_COUNT {
1691            return Err(Error::InvalidData("RAR 2.0 invalid offset slot"));
1692        }
1693        let mut offset = OFFSET_BASES[slot] + 1;
1694        if OFFSET_BITS[slot] != 0 {
1695            offset += self.bits.read_bits(OFFSET_BITS[slot])? as usize;
1696        }
1697        Ok(offset)
1698    }
1699
1700    fn copy_match(&mut self, length: usize, offset: usize, output_size: usize) -> Result<()> {
1701        let offset = if offset == 0 { 1 } else { offset };
1702        let current = self.current_pos();
1703        if offset > current {
1704            return Err(Error::InvalidData("RAR 2.0 match distance is out of range"));
1705        }
1706        for index in 0..length {
1707            if self.current_pos() >= output_size {
1708                self.pending_match = Some((length - index, offset));
1709                break;
1710            }
1711            let src = self.current_pos() - offset;
1712            let byte = *self
1713                .raw_byte(src)
1714                .ok_or(Error::InvalidData("RAR 2.0 match distance is out of range"))?;
1715            self.output.push(byte);
1716        }
1717        Ok(())
1718    }
1719
1720    fn drain_pending_match(&mut self, output_size: usize) -> Result<()> {
1721        let Some((length, offset)) = self.pending_match.take() else {
1722            return Ok(());
1723        };
1724        self.copy_match(length, offset, output_size)
1725    }
1726
1727    fn read_last_tables(&mut self) -> Result<()> {
1728        if self.bits.remaining_bytes_from_current() < 5 {
1729            return Ok(());
1730        }
1731        if self.audio_block {
1732            if self.audio_tables[self.cur_channel].symbols.is_empty() {
1733                return Ok(());
1734            }
1735            if self.audio_tables[self.cur_channel].decode(&mut self.bits)? == 256 {
1736                self.read_tables()?;
1737                self.in_block = true;
1738            }
1739        } else {
1740            if self.main.symbols.is_empty() {
1741                return Ok(());
1742            }
1743            if self.main.decode(&mut self.bits)? == 269 {
1744                self.read_tables()?;
1745                self.in_block = true;
1746            }
1747        }
1748        Ok(())
1749    }
1750
1751    fn push_old_offset(&mut self, offset: usize) {
1752        self.old_offsets[3] = self.old_offsets[2];
1753        self.old_offsets[2] = self.old_offsets[1];
1754        self.old_offsets[1] = self.old_offsets[0];
1755        self.old_offsets[0] = offset;
1756    }
1757
1758    fn current_pos(&self) -> usize {
1759        self.base_offset + self.output.len()
1760    }
1761
1762    fn raw_byte(&self, position: usize) -> Option<&u8> {
1763        self.output.get(position.checked_sub(self.base_offset)?)
1764    }
1765
1766    fn raw_range(&self, start: usize, end: usize) -> Result<&[u8]> {
1767        if start < self.base_offset || end < start {
1768            return Err(Error::InvalidData(
1769                "RAR 2.0 retained history is unavailable",
1770            ));
1771        }
1772        let rel_start = start - self.base_offset;
1773        let rel_end = end - self.base_offset;
1774        self.output
1775            .get(rel_start..rel_end)
1776            .ok_or(Error::InvalidData(
1777                "RAR 2.0 retained history is unavailable",
1778            ))
1779    }
1780
1781    fn trim_history(&mut self, flushed_pos: usize, current_pos: usize) {
1782        let keep_from = current_pos.saturating_sub(MAX_HISTORY).min(flushed_pos);
1783        if keep_from <= self.base_offset {
1784            return;
1785        }
1786        let drain = keep_from - self.base_offset;
1787        self.output.drain(..drain);
1788        self.base_offset = keep_from;
1789    }
1790}
1791
1792impl Default for Unpack20 {
1793    fn default() -> Self {
1794        Self::new()
1795    }
1796}
1797
1798#[derive(Debug, Clone, Copy, Default)]
1799struct AudioState {
1800    k: [i32; 5],
1801    d1: i32,
1802    d2: i32,
1803    d3: i32,
1804    d4: i32,
1805    last_delta: i32,
1806    last_char: i32,
1807    byte_count: u32,
1808    dif: [u32; 11],
1809}
1810
1811fn fill_levels(levels: &mut [u8], pos: &mut usize, count: usize, value: u8) -> Result<()> {
1812    let end = pos
1813        .checked_add(count)
1814        .ok_or(Error::InvalidData("RAR 2.0 table run overflows"))?;
1815    let end = end.min(levels.len());
1816    for item in &mut levels[*pos..end] {
1817        *item = value;
1818    }
1819    *pos = end;
1820    Ok(())
1821}
1822
1823#[derive(Debug, Clone)]
1824struct Huffman {
1825    symbols: Vec<HuffmanSymbol>,
1826    first_code: [u16; 16],
1827    first_index: [usize; 16],
1828    counts: [u16; 16],
1829}
1830
1831#[derive(Debug, Clone)]
1832struct HuffmanSymbol {
1833    code: u16,
1834    len: u8,
1835    symbol: usize,
1836}
1837
1838impl Huffman {
1839    fn empty() -> Self {
1840        Self {
1841            symbols: Vec::new(),
1842            first_code: [0; 16],
1843            first_index: [0; 16],
1844            counts: [0; 16],
1845        }
1846    }
1847
1848    fn from_lengths(lengths: &[u8]) -> Result<Self> {
1849        let mut count = [0u16; 16];
1850        for &len in lengths {
1851            if len > 15 {
1852                return Err(Error::InvalidData("RAR 2.0 Huffman length is too large"));
1853            }
1854            if len != 0 {
1855                count[len as usize] += 1;
1856            }
1857        }
1858        if count.iter().all(|&value| value == 0) {
1859            return Ok(Self::empty());
1860        }
1861        validate_huffman_counts(&count)?;
1862
1863        let mut first_code = [0u16; 16];
1864        let mut next_code = [0u16; 16];
1865        let mut code = 0u16;
1866        for len in 1..=15 {
1867            code = (code + count[len - 1]) << 1;
1868            first_code[len] = code;
1869            next_code[len] = code;
1870        }
1871
1872        let mut first_index = [0usize; 16];
1873        let mut index = 0usize;
1874        for len in 1..=15 {
1875            first_index[len] = index;
1876            index += usize::from(count[len]);
1877        }
1878
1879        let mut symbols = Vec::new();
1880        for (symbol, &len) in lengths.iter().enumerate() {
1881            if len == 0 {
1882                continue;
1883            }
1884            let code = next_code[len as usize];
1885            next_code[len as usize] += 1;
1886            symbols.push(HuffmanSymbol { code, len, symbol });
1887        }
1888        symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
1889        Ok(Self {
1890            symbols,
1891            first_code,
1892            first_index,
1893            counts: count,
1894        })
1895    }
1896
1897    fn decode(&self, bits: &mut BitReader) -> Result<usize> {
1898        let mut code = 0u16;
1899        if self.symbols.is_empty() {
1900            return Err(Error::InvalidData("RAR 2.0 empty Huffman table"));
1901        }
1902        for len in 1..=15 {
1903            code = (code << 1) | bits.read_bit()? as u16;
1904            let count = self.counts[len];
1905            if count != 0 {
1906                let first = self.first_code[len];
1907                let offset = code.wrapping_sub(first);
1908                if offset < count {
1909                    let index = self.first_index[len] + usize::from(offset);
1910                    return Ok(self.symbols[index].symbol);
1911                }
1912            }
1913        }
1914        Err(Error::InvalidData("RAR 2.0 invalid Huffman code"))
1915    }
1916}
1917
1918fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
1919    let mut available = 1i32;
1920    for &len_count in count.iter().skip(1) {
1921        available = (available << 1) - i32::from(len_count);
1922        if available < 0 {
1923            return Err(Error::InvalidData("RAR 2.0 oversubscribed Huffman table"));
1924        }
1925    }
1926    Ok(())
1927}
1928
1929#[derive(Debug, Clone)]
1930struct BitReader {
1931    input: Vec<u8>,
1932    bit_pos: usize,
1933}
1934
1935impl BitReader {
1936    fn new() -> Self {
1937        Self {
1938            input: Vec::new(),
1939            bit_pos: 0,
1940        }
1941    }
1942
1943    fn append(&mut self, input: &[u8]) {
1944        self.compact();
1945        self.input.extend_from_slice(input);
1946    }
1947
1948    fn compact(&mut self) {
1949        let bytes = self.bit_pos / 8;
1950        if bytes == 0 {
1951            return;
1952        }
1953        self.input.drain(..bytes);
1954        self.bit_pos -= bytes * 8;
1955    }
1956
1957    fn read_bit(&mut self) -> Result<u8> {
1958        self.read_bits(1).map(|value| value as u8)
1959    }
1960
1961    fn read_bits(&mut self, count: u8) -> Result<u32> {
1962        let value = self.peek_bits(count)?;
1963        self.bit_pos += count as usize;
1964        Ok(value)
1965    }
1966
1967    fn peek_bits(&self, count: u8) -> Result<u32> {
1968        if count > 24 {
1969            return Err(Error::InvalidData("RAR 2.0 bit read is too wide"));
1970        }
1971        let mut value = 0u32;
1972        for i in 0..count as usize {
1973            let bit_index = self.bit_pos + i;
1974            let byte = *self.input.get(bit_index / 8).ok_or(Error::NeedMoreInput)?;
1975            let bit = (byte >> (7 - (bit_index % 8))) & 1;
1976            value = (value << 1) | bit as u32;
1977        }
1978        Ok(value)
1979    }
1980
1981    fn remaining_bytes_from_current(&self) -> usize {
1982        self.input.len().saturating_sub(self.bit_pos / 8)
1983    }
1984}
1985
1986#[derive(Default)]
1987struct BitWriter {
1988    bytes: Vec<u8>,
1989    bit_pos: usize,
1990}
1991
1992impl BitWriter {
1993    fn write_bits(&mut self, value: u32, count: u8) {
1994        for shift in (0..count).rev() {
1995            self.write_bit(((value >> shift) & 1) != 0);
1996        }
1997    }
1998
1999    fn write_bit(&mut self, bit: bool) {
2000        if self.bit_pos.is_multiple_of(8) {
2001            self.bytes.push(0);
2002        }
2003        if bit {
2004            let shift = 7 - (self.bit_pos % 8);
2005            *self.bytes.last_mut().unwrap() |= 1 << shift;
2006        }
2007        self.bit_pos += 1;
2008    }
2009
2010    fn finish(self) -> Vec<u8> {
2011        self.bytes
2012    }
2013}
2014
2015#[cfg(test)]
2016mod tests {
2017    use super::{
2018        encode_tokens, unpack20_decode, unpack20_encode_literals, BitWriter, EncodeOptions,
2019        EncodeToken, Error, Huffman, Unpack20, Unpack20Encoder,
2020    };
2021
2022    const AUTOREJ_PACKED: &[u8] = &[
2023        0x09, 0x14, 0x0c, 0x94, 0x00, 0x00, 0x00, 0x00, 0x00, 0xce, 0xf8, 0x1f, 0xc1, 0xe6, 0x05,
2024        0xfc, 0x39, 0xc3, 0x50, 0x65, 0x08, 0x41, 0x94, 0xc4, 0x1d, 0xf3, 0xcd, 0x0d, 0x8e, 0x20,
2025        0xf5, 0x9d, 0x8e, 0x76, 0x1d, 0xc5, 0x19, 0xde, 0x16, 0x5b, 0x52, 0xb8, 0x8e, 0x75, 0xcd,
2026        0xaf, 0x1f, 0xfc, 0x9e, 0xf7, 0x00, 0x01, 0xbe, 0x90,
2027    ];
2028
2029    #[test]
2030    fn decodes_rar20_lz_member() {
2031        assert_eq!(
2032            unpack20_decode(AUTOREJ_PACKED, expected_text().len()).unwrap(),
2033            expected_text()
2034        );
2035    }
2036
2037    #[test]
2038    fn rejects_oversubscribed_rar20_huffman_tables() {
2039        assert!(matches!(
2040            Huffman::from_lengths(&[1, 1, 1]),
2041            Err(Error::InvalidData("RAR 2.0 oversubscribed Huffman table"))
2042        ));
2043    }
2044
2045    #[test]
2046    fn decode_member_from_reader_accepts_incremental_input() {
2047        struct TinyReader<'a> {
2048            input: &'a [u8],
2049        }
2050
2051        impl std::io::Read for TinyReader<'_> {
2052            fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
2053                if self.input.is_empty() {
2054                    return Ok(0);
2055                }
2056                let len = self.input.len().min(out.len()).min(3);
2057                out[..len].copy_from_slice(&self.input[..len]);
2058                self.input = &self.input[len..];
2059                Ok(len)
2060            }
2061        }
2062
2063        let mut decoder = Unpack20::new();
2064        let mut reader = TinyReader {
2065            input: AUTOREJ_PACKED,
2066        };
2067        let mut output = Vec::new();
2068        decoder
2069            .decode_member_from_reader(&mut reader, expected_text().len(), &mut output)
2070            .unwrap();
2071
2072        assert_eq!(output, expected_text());
2073    }
2074
2075    #[test]
2076    fn decodes_synthetic_audio_block() {
2077        let packed = synthetic_audio_block(8);
2078        let mut decoder = Unpack20::new();
2079
2080        assert_eq!(decoder.decode_member(&packed, 8).unwrap(), vec![0; 8]);
2081    }
2082
2083    #[test]
2084    fn audio_encoder_round_trips_interleaved_pcm_like_payload() {
2085        let input = interleaved_pcm_like_payload();
2086        let packed = super::encode_audio_member(&input, 4).unwrap();
2087        let decoded = unpack20_decode(&packed, input.len()).unwrap();
2088
2089        assert_eq!(decoded, input);
2090    }
2091
2092    #[test]
2093    fn auto_encoder_uses_audio_when_it_beats_lz() {
2094        let input = interleaved_pcm_like_payload();
2095        let lz = unpack20_encode_literals(&input).unwrap();
2096        let auto = super::unpack20_encode_auto(&input).unwrap();
2097        let decoded = unpack20_decode(&auto, input.len()).unwrap();
2098
2099        assert!(auto.len() < lz.len());
2100        assert_eq!(decoded, input);
2101    }
2102
2103    #[test]
2104    fn default_encode_options_match_legacy_entry_points() {
2105        let input = b"rar20 option plumbing preserves default output ".repeat(128);
2106        assert_eq!(
2107            unpack20_encode_literals(&input).unwrap(),
2108            super::unpack20_encode_literals_with_options(&input, EncodeOptions::default()).unwrap()
2109        );
2110        assert_eq!(
2111            super::unpack20_encode_auto(&input).unwrap(),
2112            super::unpack20_encode_auto_with_options(&input, EncodeOptions::default()).unwrap()
2113        );
2114
2115        let first = b"solid rar20 option seed ".repeat(64);
2116        let second = b"solid rar20 option seed with suffix ".repeat(32);
2117        let mut legacy = Unpack20Encoder::new();
2118        let mut explicit = Unpack20Encoder::with_options(EncodeOptions::default());
2119        assert_eq!(
2120            legacy.encode_member(&first).unwrap(),
2121            explicit.encode_member(&first).unwrap()
2122        );
2123        assert_eq!(
2124            legacy.encode_member(&second).unwrap(),
2125            explicit.encode_member(&second).unwrap()
2126        );
2127    }
2128
2129    #[test]
2130    fn encode_options_can_disable_fresh_lz_matches() {
2131        let input = b"abcdefabcdefabcdefabcdef";
2132        let default_tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2133        let literalish_tokens = encode_tokens(input, &[], EncodeOptions::new(0), None);
2134
2135        assert!(default_tokens
2136            .iter()
2137            .any(|token| matches!(token, EncodeToken::Match { .. })));
2138        assert!(!literalish_tokens
2139            .iter()
2140            .any(|token| matches!(token, EncodeToken::Match { .. })));
2141    }
2142
2143    #[test]
2144    fn table_level_encoder_uses_rar20_run_symbols() {
2145        let lengths = [0, 0, 0, 0, 5, 5, 5, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2];
2146        let tokens = super::encode_level_tokens(&lengths);
2147
2148        assert_eq!(
2149            tokens,
2150            vec![
2151                super::LevelToken::zero_run_short(4),
2152                super::LevelToken::plain(5),
2153                super::LevelToken::repeat_previous(3),
2154                super::LevelToken::plain(7),
2155                super::LevelToken::zero_run_short(10),
2156                super::LevelToken::plain(2),
2157            ]
2158        );
2159    }
2160
2161    #[test]
2162    fn decodes_back_to_back_fresh_audio_blocks() {
2163        // No real RAR 2.x encoder we tested emits a mid-stream `audio_block,
2164        // !keep_tables` transition; reference encoders always either keep the
2165        // existing audio tables across boundaries or switch out to LZ. The
2166        // decoder branch that rebuilds `audio_tables` from a freshly-read level
2167        // table inside an audio sequence is therefore only reachable via a
2168        // hand-crafted fixture.
2169        let mut bits = BitWriter::default();
2170        write_fresh_audio_block(&mut bits, 4, /*emit_end_sentinel=*/ true);
2171        write_fresh_audio_block(&mut bits, 4, /*emit_end_sentinel=*/ false);
2172        let packed = bits.finish();
2173
2174        let mut decoder = Unpack20::new();
2175
2176        assert_eq!(decoder.decode_member(&packed, 8).unwrap(), vec![0; 8]);
2177    }
2178
2179    fn expected_text() -> Vec<u8> {
2180        b"Hello text not audio.\r\n".repeat(100)
2181    }
2182
2183    fn interleaved_pcm_like_payload() -> Vec<u8> {
2184        let mut input = Vec::new();
2185        for sample in 0..8192i16 {
2186            let left = sample.wrapping_mul(3).wrapping_add(200);
2187            let right = sample.wrapping_mul(3).wrapping_sub(200);
2188            input.extend_from_slice(&left.to_le_bytes());
2189            input.extend_from_slice(&right.to_le_bytes());
2190        }
2191        input
2192    }
2193
2194    fn synthetic_audio_block(samples: usize) -> Vec<u8> {
2195        let mut bits = BitWriter::default();
2196
2197        bits.write_bits(0b10, 2); // audio block, do not keep previous tables.
2198        bits.write_bits(0, 2); // one channel.
2199
2200        for symbol in 0..19 {
2201            let len = if symbol == 1 || symbol == 18 { 1 } else { 0 };
2202            bits.write_bits(len, 4);
2203        }
2204
2205        bits.write_bit(false); // level symbol 1: audio delta 0 has code length 1.
2206        bits.write_bit(true); // level symbol 18: 138 zeros.
2207        bits.write_bits(127, 7);
2208        bits.write_bit(true); // level symbol 18: 118 zeros.
2209        bits.write_bits(107, 7);
2210
2211        for _ in 0..samples {
2212            bits.write_bit(false); // audio delta 0.
2213        }
2214
2215        bits.finish()
2216    }
2217
2218    fn write_fresh_audio_block(bits: &mut BitWriter, samples: usize, emit_end_sentinel: bool) {
2219        bits.write_bits(0b10, 2); // audio block, do not keep previous tables.
2220        bits.write_bits(0, 2); // one channel.
2221
2222        for symbol in 0..19 {
2223            let len = if symbol == 1 || symbol == 18 { 1 } else { 0 };
2224            bits.write_bits(len, 4);
2225        }
2226
2227        // Audio table: symbol 0 (delta 0) = "0", symbol 256 (block end) = "1".
2228        bits.write_bit(false); // level symbol 1: audio delta 0 has code length 1.
2229        bits.write_bit(true); // level symbol 18: 138 zeros (audio symbols 1..=138).
2230        bits.write_bits(127, 7);
2231        bits.write_bit(true); // level symbol 18: 117 zeros (audio symbols 139..=255).
2232        bits.write_bits(106, 7);
2233        bits.write_bit(false); // level symbol 1: block-end (256) has code length 1.
2234
2235        for _ in 0..samples {
2236            bits.write_bit(false); // audio delta 0.
2237        }
2238        if emit_end_sentinel {
2239            bits.write_bit(true); // audio symbol 256: end of audio block.
2240        }
2241    }
2242
2243    #[test]
2244    fn literal_encoder_round_trips_rar20_lz_blocks() {
2245        let input = b"literal-only RAR 2.0 baseline\nwith repeated text literal-only\n";
2246        let packed = unpack20_encode_literals(input).unwrap();
2247
2248        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2249    }
2250
2251    #[test]
2252    fn encoder_emits_rar20_offset_one_matches_for_repeated_bytes() {
2253        let input = b"A".repeat(1024);
2254        let packed = unpack20_encode_literals(&input).unwrap();
2255
2256        assert!(packed.len() < input.len() / 4);
2257        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2258    }
2259
2260    #[test]
2261    fn encoder_emits_rar20_dictionary_matches_for_repeated_sequences() {
2262        let input = b"abc123xyz-".repeat(128);
2263        let packed = unpack20_encode_literals(&input).unwrap();
2264
2265        assert!(packed.len() < input.len() / 2);
2266        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2267    }
2268
2269    #[test]
2270    fn encoder_emits_rar20_repeat_last_matches_for_regular_streams() {
2271        let input = b"\x00\x01\x02\x03".repeat(4096);
2272        let tokens = encode_tokens(&input, &[], EncodeOptions::default(), None);
2273        let packed = unpack20_encode_literals(&input).unwrap();
2274
2275        assert!(tokens
2276            .iter()
2277            .any(|token| matches!(token, EncodeToken::RepeatLast)));
2278        assert!(packed.len() < input.len() / 8);
2279        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2280    }
2281
2282    #[test]
2283    fn encoder_emits_rar20_minimum_length_fresh_matches() {
2284        let input = b"abcabc";
2285        let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2286        let packed = unpack20_encode_literals(input).unwrap();
2287
2288        assert!(matches!(
2289            tokens.as_slice(),
2290            [
2291                EncodeToken::Literal(b'a'),
2292                EncodeToken::Literal(b'b'),
2293                EncodeToken::Literal(b'c'),
2294                EncodeToken::Match {
2295                    length: 3,
2296                    offset: 3
2297                }
2298            ]
2299        ));
2300        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2301    }
2302
2303    #[test]
2304    fn encoder_emits_rar20_short_offset_matches() {
2305        let input = b"abab";
2306        let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2307        let packed = unpack20_encode_literals(input).unwrap();
2308
2309        assert!(matches!(
2310            tokens.as_slice(),
2311            [
2312                EncodeToken::Literal(b'a'),
2313                EncodeToken::Literal(b'b'),
2314                EncodeToken::ShortOffset { offset: 2 }
2315            ]
2316        ));
2317        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2318    }
2319
2320    #[test]
2321    fn encoder_emits_rar20_old_offset_matches() {
2322        let input = b"abcdabcdXYZXYZwxyzwxyz";
2323        let tokens = encode_tokens(input, &[], EncodeOptions::default(), None);
2324        let packed = unpack20_encode_literals(input).unwrap();
2325
2326        assert!(tokens
2327            .iter()
2328            .any(|token| matches!(token, EncodeToken::OldOffset { .. })));
2329        assert_eq!(unpack20_decode(&packed, input.len()).unwrap(), input);
2330    }
2331
2332    #[test]
2333    fn encoder_finds_rar20_matches_beyond_near_offsets() {
2334        let phrase = b"long-distance repeated phrase for rar20 match finder.";
2335        let mut input = Vec::new();
2336        input.extend_from_slice(phrase);
2337        input.extend(std::iter::repeat_n(0, 300 * 1024));
2338        input.extend_from_slice(phrase);
2339        input.extend_from_slice(phrase);
2340        let tokens = encode_tokens(&input, &[], EncodeOptions::default(), None);
2341        let packed = unpack20_encode_literals(&input).unwrap();
2342
2343        assert!(tokens.iter().any(|token| matches!(
2344            token,
2345            EncodeToken::Match { offset, .. } if *offset > 0x40000
2346        )));
2347        assert!(packed.len() < input.len());
2348        let decoded = unpack20_decode(&packed, input.len()).unwrap();
2349        assert!(
2350            decoded == input,
2351            "RAR 2.0 long-distance match round-trip failed"
2352        );
2353    }
2354
2355    #[test]
2356    fn solid_encoder_emits_rar20_matches_against_previous_member_history() {
2357        let first = b"solid rar20 shared phrase alpha beta gamma ".repeat(4);
2358        let second = b"solid rar20 shared phrase alpha beta gamma ".repeat(2);
2359        let independent = unpack20_encode_literals(&second).unwrap();
2360        let mut encoder = Unpack20Encoder::new();
2361        let first_packed = encoder.encode_member(&first).unwrap();
2362        let second_packed = encoder.encode_member(&second).unwrap();
2363
2364        assert!(second_packed.len() < independent.len());
2365        let mut decoder = Unpack20::new();
2366        assert_eq!(
2367            decoder.decode_member(&first_packed, first.len()).unwrap(),
2368            first
2369        );
2370        assert_eq!(
2371            decoder.decode_member(&second_packed, second.len()).unwrap(),
2372            second
2373        );
2374    }
2375
2376    #[test]
2377    fn solid_encoder_reuses_rar20_tables_at_member_boundary() {
2378        let first: Vec<_> = (0u8..=255).cycle().take(4096).collect();
2379        let second = b"short literal member after reused rar20 table boundary\n";
2380        let independent = unpack20_encode_literals(second).unwrap();
2381        let mut encoder = Unpack20Encoder::new();
2382        let first_packed = encoder.encode_member(&first).unwrap();
2383        let second_packed = encoder.encode_member(second).unwrap();
2384
2385        assert!(second_packed.len() < independent.len());
2386        let mut decoder = Unpack20::new();
2387        assert_eq!(
2388            decoder.decode_member(&first_packed, first.len()).unwrap(),
2389            first
2390        );
2391        assert_eq!(
2392            decoder.decode_member(&second_packed, second.len()).unwrap(),
2393            second
2394        );
2395    }
2396
2397    #[test]
2398    fn solid_encoder_matches_immediately_after_rar20_table_boundary() {
2399        let phrase = b"rar20 table boundary match phrase with enough bytes ";
2400        let first = phrase.repeat(128);
2401        let second = phrase.repeat(8);
2402        let independent = unpack20_encode_literals(&second).unwrap();
2403        let mut encoder = Unpack20Encoder::new();
2404        let first_packed = encoder.encode_member(&first).unwrap();
2405        let second_packed = encoder.encode_member(&second).unwrap();
2406        let tokens = encode_tokens(&second, &first, EncodeOptions::default(), None);
2407
2408        assert!(matches!(tokens.first(), Some(EncodeToken::Match { .. })));
2409        assert!(second_packed.len() < independent.len());
2410        let mut decoder = Unpack20::new();
2411        assert_eq!(
2412            decoder.decode_member(&first_packed, first.len()).unwrap(),
2413            first
2414        );
2415        assert_eq!(
2416            decoder.decode_member(&second_packed, second.len()).unwrap(),
2417            second
2418        );
2419    }
2420
2421    #[test]
2422    fn solid_encoder_carries_rar20_history_across_multiple_members() {
2423        let first = b"rar20 multi member solid seed ".repeat(512);
2424        let second = b"rar20 multi member solid seed with middle tail ".repeat(128);
2425        let third = b"with middle tail ".repeat(64);
2426        let independent = unpack20_encode_literals(&third).unwrap();
2427        let mut encoder = Unpack20Encoder::new();
2428        let first_packed = encoder.encode_member(&first).unwrap();
2429        let second_packed = encoder.encode_member(&second).unwrap();
2430        let third_packed = encoder.encode_member(&third).unwrap();
2431
2432        assert!(third_packed.len() < independent.len());
2433        let mut decoder = Unpack20::new();
2434        assert_eq!(
2435            decoder.decode_member(&first_packed, first.len()).unwrap(),
2436            first
2437        );
2438        assert_eq!(
2439            decoder.decode_member(&second_packed, second.len()).unwrap(),
2440            second
2441        );
2442        assert_eq!(
2443            decoder.decode_member(&third_packed, third.len()).unwrap(),
2444            third
2445        );
2446    }
2447
2448    #[test]
2449    fn decode_member_to_streams_decoded_payload_through_writer_sink() {
2450        let input = b"abcabcabcabcabcabcabcabcabcabcabcabc";
2451        let packed = unpack20_encode_literals(input).unwrap();
2452
2453        let mut decoder = Unpack20::new();
2454        let mut sink = Vec::new();
2455        decoder
2456            .decode_member_to(&packed, input.len(), &mut sink)
2457            .unwrap();
2458        assert_eq!(sink, input);
2459
2460        // The error-mapping closure inside decode_member_to fires when the
2461        // sink's write_all returns Err — feed it a writer that always fails.
2462        struct FailingWriter;
2463        impl std::io::Write for FailingWriter {
2464            fn write(&mut self, _buf: &[u8]) -> std::io::Result<usize> {
2465                Err(std::io::Error::other("disk full"))
2466            }
2467            fn flush(&mut self) -> std::io::Result<()> {
2468                Ok(())
2469            }
2470        }
2471        let mut decoder = Unpack20::new();
2472        let err = decoder
2473            .decode_member_to(&packed, input.len(), &mut FailingWriter)
2474            .unwrap_err();
2475        assert_eq!(err, Error::InvalidData("RAR 2.0 output write failed"));
2476    }
2477}