Skip to main content

rars_codec/
rar50.rs

1use crate::filters::{self, DeltaErrorMessages, FilterOp};
2use crate::{huffman, Error, Result};
3use std::collections::VecDeque;
4use std::io::Read;
5use std::ops::Range;
6
7pub const LEVEL_TABLE_SIZE: usize = 20;
8pub const MAIN_TABLE_SIZE: usize = 306;
9pub const DISTANCE_TABLE_SIZE_50: usize = 64;
10pub const DISTANCE_TABLE_SIZE_70: usize = 80;
11pub const ALIGN_TABLE_SIZE: usize = 16;
12pub const LENGTH_TABLE_SIZE: usize = 44;
13const DEFAULT_DICTIONARY_SIZE: usize = 4 * 1024 * 1024;
14const MAX_INITIAL_OUTPUT_CAPACITY: usize = 1024 * 1024;
15const STREAM_FLUSH_THRESHOLD: usize = 64 * 1024;
16const STREAM_HISTORY_LIMIT: usize = 64 * 1024 * 1024;
17const MAX_ENCODER_MATCH_OFFSET: usize = DEFAULT_DICTIONARY_SIZE;
18const MAX_ENCODER_MATCH_LENGTH: usize = 4096;
19const MAX_COMPRESSED_BLOCK_OUTPUT: usize = 4 * 1024 * 1024;
20const MAX_FILTER_BLOCK_LENGTH: usize = 0x3ffff;
21const MATCH_HASH_BUCKETS: usize = 4096;
22const MAX_MATCH_CANDIDATES: usize = 256;
23
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct CompressedBlock {
26    pub header: CompressedBlockHeader,
27    pub header_len: usize,
28    pub payload: Range<usize>,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub struct CompressedBlockHeader {
33    pub flags: u8,
34    pub is_last: bool,
35    pub has_tables: bool,
36    pub final_byte_bits: u8,
37    pub payload_size: usize,
38    pub payload_bits: usize,
39}
40
41struct OwnedCompressedBlock {
42    header: CompressedBlockHeader,
43    payload: Vec<u8>,
44}
45
46#[derive(Debug)]
47#[doc(hidden)]
48pub enum StreamDecodeError<E> {
49    Decode(Error),
50    FilteredMember,
51    Sink(E),
52}
53
54impl<E> From<Error> for StreamDecodeError<E> {
55    fn from(error: Error) -> Self {
56        Self::Decode(error)
57    }
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61#[doc(hidden)]
62pub enum DecodedChunk<'a> {
63    Bytes(&'a [u8]),
64    Repeated { byte: u8, len: usize },
65}
66
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct TableLengths {
69    pub main: Vec<u8>,
70    pub distance: Vec<u8>,
71    pub align: Vec<u8>,
72    pub length: Vec<u8>,
73}
74
75#[derive(Debug, Clone)]
76pub struct DecodeTables {
77    pub main: HuffmanTable,
78    pub distance: HuffmanTable,
79    pub align: HuffmanTable,
80    pub length: HuffmanTable,
81    pub align_mode: bool,
82}
83
84impl DecodeTables {
85    pub fn from_lengths(lengths: &TableLengths) -> Result<Self> {
86        let align_mode = lengths
87            .align
88            .iter()
89            .any(|&length| length != 0 && length != 4);
90        Ok(Self {
91            main: HuffmanTable::from_lengths(&lengths.main)?,
92            distance: HuffmanTable::from_lengths(&lengths.distance)?,
93            align: HuffmanTable::from_lengths(&lengths.align)?,
94            length: HuffmanTable::from_lengths(&lengths.length)?,
95            align_mode,
96        })
97    }
98}
99
100#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub enum DecodeMode {
102    LiteralOnly,
103    Lz,
104    LzNoFilters,
105}
106
107impl DecodeMode {
108    fn uses_lz(self) -> bool {
109        matches!(self, Self::Lz | Self::LzNoFilters)
110    }
111
112    fn applies_filters(self) -> bool {
113        matches!(self, Self::Lz)
114    }
115}
116
117pub fn parse_compressed_block(input: &[u8]) -> Result<CompressedBlock> {
118    if input.len() < 3 {
119        return Err(Error::NeedMoreInput);
120    }
121
122    let flags = input[0];
123    let checksum = input[1];
124    let size_bytes = match (flags >> 3) & 0x03 {
125        0 => 1,
126        1 => 2,
127        2 => 3,
128        _ => return Err(Error::InvalidData("RAR 5 block size length is invalid")),
129    };
130    let header_len = 2 + size_bytes;
131    if input.len() < header_len {
132        return Err(Error::NeedMoreInput);
133    }
134
135    let size_data = &input[2..header_len];
136    let actual = size_data
137        .iter()
138        .fold(checksum ^ flags, |acc, &byte| acc ^ byte);
139    if actual != 0x5a {
140        return Err(Error::InvalidData("RAR 5 block header checksum mismatch"));
141    }
142
143    let payload_size = size_data
144        .iter()
145        .enumerate()
146        .fold(0usize, |acc, (index, &byte)| {
147            acc | (usize::from(byte) << (index * 8))
148        });
149    let payload_end = header_len
150        .checked_add(payload_size)
151        .ok_or(Error::InvalidData("RAR 5 block size overflows"))?;
152    if input.len() < payload_end {
153        return Err(Error::NeedMoreInput);
154    }
155
156    let final_byte_bits = ((flags & 0x07) + 1).min(8);
157    let payload_bits = if payload_size == 0 {
158        0
159    } else {
160        (payload_size - 1) * 8 + usize::from(final_byte_bits)
161    };
162
163    Ok(CompressedBlock {
164        header: CompressedBlockHeader {
165            flags,
166            is_last: flags & 0x40 != 0,
167            has_tables: flags & 0x80 != 0,
168            final_byte_bits,
169            payload_size,
170            payload_bits,
171        },
172        header_len,
173        payload: header_len..payload_end,
174    })
175}
176
177pub fn read_level_lengths(input: &[u8]) -> Result<([u8; LEVEL_TABLE_SIZE], usize)> {
178    let mut bits = BitReader::new(input);
179    let mut lengths = [0; LEVEL_TABLE_SIZE];
180    let mut pos = 0;
181    while pos < LEVEL_TABLE_SIZE {
182        let length = bits.read_bits(4)? as u8;
183        if length == 15 {
184            let zero_count = bits.read_bits(4)? as usize;
185            if zero_count == 0 {
186                lengths[pos] = 15;
187                pos += 1;
188            } else {
189                let count = zero_count + 2;
190                for _ in 0..count {
191                    if pos >= LEVEL_TABLE_SIZE {
192                        break;
193                    }
194                    lengths[pos] = 0;
195                    pos += 1;
196                }
197            }
198        } else {
199            lengths[pos] = length;
200            pos += 1;
201        }
202    }
203    Ok((lengths, bits.bit_pos))
204}
205
206pub fn table_length_count(algorithm_version: u8) -> Result<usize> {
207    match algorithm_version {
208        0 => Ok(MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_50 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE),
209        1 => Ok(MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_70 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE),
210        _ => Err(Error::InvalidData(
211            "RAR 5 unknown compression algorithm version",
212        )),
213    }
214}
215
216pub fn read_table_lengths(input: &[u8], algorithm_version: u8) -> Result<(TableLengths, usize)> {
217    let table_size = table_length_count(algorithm_version)?;
218    let (level_lengths, level_bits) = read_level_lengths(input)?;
219    let level_decoder = HuffmanTable::from_lengths(&level_lengths)?;
220    let mut bits = BitReader::new(input);
221    bits.bit_pos = level_bits;
222
223    let mut lengths = Vec::with_capacity(table_size);
224    while lengths.len() < table_size {
225        let number = level_decoder.decode(&mut bits)?;
226        match number {
227            0..=15 => lengths.push(number as u8),
228            16 | 17 => {
229                if lengths.is_empty() {
230                    return Err(Error::InvalidData(
231                        "RAR 5 table repeats missing previous length",
232                    ));
233                }
234                let count = if number == 16 {
235                    3 + bits.read_bits(3)? as usize
236                } else {
237                    11 + bits.read_bits(7)? as usize
238                };
239                let previous = *lengths.last().unwrap();
240                for _ in 0..count {
241                    if lengths.len() >= table_size {
242                        break;
243                    }
244                    lengths.push(previous);
245                }
246            }
247            18 | 19 => {
248                let count = if number == 18 {
249                    3 + bits.read_bits(3)? as usize
250                } else {
251                    11 + bits.read_bits(7)? as usize
252                };
253                for _ in 0..count {
254                    if lengths.len() >= table_size {
255                        break;
256                    }
257                    lengths.push(0);
258                }
259            }
260            _ => return Err(Error::InvalidData("RAR 5 invalid level-table symbol")),
261        }
262    }
263
264    let distance_size = match algorithm_version {
265        0 => DISTANCE_TABLE_SIZE_50,
266        1 => DISTANCE_TABLE_SIZE_70,
267        _ => unreachable!("validated by table_length_count"),
268    };
269    let distance_start = MAIN_TABLE_SIZE;
270    let align_start = distance_start + distance_size;
271    let length_start = align_start + ALIGN_TABLE_SIZE;
272
273    Ok((
274        TableLengths {
275            main: lengths[..distance_start].to_vec(),
276            distance: lengths[distance_start..align_start].to_vec(),
277            align: lengths[align_start..length_start].to_vec(),
278            length: lengths[length_start..].to_vec(),
279        },
280        bits.bit_pos,
281    ))
282}
283
284pub fn encode_table_lengths(lengths: &TableLengths, algorithm_version: u8) -> Result<Vec<u8>> {
285    encode_table_lengths_with_bit_count(lengths, algorithm_version).map(|(data, _)| data)
286}
287
288pub fn encode_table_lengths_with_bit_count(
289    lengths: &TableLengths,
290    algorithm_version: u8,
291) -> Result<(Vec<u8>, usize)> {
292    let distance_size = match algorithm_version {
293        0 => DISTANCE_TABLE_SIZE_50,
294        1 => DISTANCE_TABLE_SIZE_70,
295        _ => {
296            return Err(Error::InvalidData(
297                "RAR 5 unknown compression algorithm version",
298            ))
299        }
300    };
301    if lengths.main.len() != MAIN_TABLE_SIZE
302        || lengths.distance.len() != distance_size
303        || lengths.align.len() != ALIGN_TABLE_SIZE
304        || lengths.length.len() != LENGTH_TABLE_SIZE
305    {
306        return Err(Error::InvalidData("RAR 5 table length count mismatch"));
307    }
308
309    let flattened = lengths
310        .main
311        .iter()
312        .chain(lengths.distance.iter())
313        .chain(lengths.align.iter())
314        .chain(lengths.length.iter())
315        .copied()
316        .collect::<Vec<_>>();
317    for &length in &flattened {
318        if length > 15 {
319            return Err(Error::InvalidData("RAR 5 Huffman length is too large"));
320        }
321    }
322
323    let level_tokens = encode_table_level_tokens(&flattened);
324    let level_lengths = level_code_lengths_for_tokens(&level_tokens);
325    let level_table = HuffmanTable::from_lengths(&level_lengths)?;
326    let mut writer = BitWriter::new();
327    write_level_lengths(&mut writer, &level_lengths);
328    for token in level_tokens {
329        let (code, len) = level_table.code_for_symbol(token.symbol)?;
330        writer.write_bits(usize::from(code), usize::from(len));
331        if token.extra_bits != 0 {
332            writer.write_bits(
333                usize::from(token.extra_value),
334                usize::from(token.extra_bits),
335            );
336        }
337    }
338    let bit_count = writer.bit_pos;
339    Ok((writer.finish(), bit_count))
340}
341
342pub fn encode_compressed_block(
343    payload: &[u8],
344    payload_bits: usize,
345    has_tables: bool,
346    is_last: bool,
347) -> Result<Vec<u8>> {
348    if payload_bits > payload.len() * 8 {
349        return Err(Error::InvalidData("RAR 5 block bit count exceeds payload"));
350    }
351    if payload.is_empty() && payload_bits != 0 {
352        return Err(Error::InvalidData("RAR 5 empty block has payload bits"));
353    }
354    if !payload.is_empty() && payload_bits <= (payload.len() - 1) * 8 {
355        return Err(Error::InvalidData("RAR 5 block has unused payload bytes"));
356    }
357    if payload.len() > 0x00ff_ffff {
358        return Err(Error::InvalidData("RAR 5 block payload is too large"));
359    }
360
361    let size_len = if payload.len() <= 0xff {
362        1
363    } else if payload.len() <= 0xffff {
364        2
365    } else {
366        3
367    };
368    let final_byte_bits = if payload.is_empty() {
369        1
370    } else {
371        ((payload_bits - 1) % 8) + 1
372    };
373    let mut flags = (final_byte_bits as u8) - 1;
374    flags |= match size_len {
375        1 => 0,
376        2 => 1 << 3,
377        3 => 2 << 3,
378        _ => unreachable!("size_len is constrained above"),
379    };
380    if is_last {
381        flags |= 0x40;
382    }
383    if has_tables {
384        flags |= 0x80;
385    }
386
387    let mut size_bytes = [0u8; 3];
388    let mut size = payload.len();
389    for byte in &mut size_bytes[..size_len] {
390        *byte = size as u8;
391        size >>= 8;
392    }
393    let checksum = size_bytes[..size_len]
394        .iter()
395        .fold(0x5a ^ flags, |acc, &byte| acc ^ byte);
396    let mut out = Vec::with_capacity(2 + size_len + payload.len());
397    out.push(flags);
398    out.push(checksum);
399    out.extend_from_slice(&size_bytes[..size_len]);
400    out.extend_from_slice(payload);
401    Ok(out)
402}
403
404pub fn decode_literal_only(
405    input: &[u8],
406    algorithm_version: u8,
407    output_size: usize,
408) -> Result<Vec<u8>> {
409    let mut decoder = Unpack50Decoder::new();
410    decoder.decode_member(
411        input,
412        algorithm_version,
413        output_size,
414        false,
415        DecodeMode::LiteralOnly,
416    )
417}
418
419pub fn decode_lz(input: &[u8], algorithm_version: u8, output_size: usize) -> Result<Vec<u8>> {
420    let mut decoder = Unpack50Decoder::new();
421    decoder.decode_member(input, algorithm_version, output_size, false, DecodeMode::Lz)
422}
423
424pub fn encode_literal_only(data: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
425    let distance_size = match algorithm_version {
426        0 => DISTANCE_TABLE_SIZE_50,
427        1 => DISTANCE_TABLE_SIZE_70,
428        _ => {
429            return Err(Error::InvalidData(
430                "RAR 5 unknown compression algorithm version",
431            ))
432        }
433    };
434    let mut lengths = TableLengths {
435        main: vec![0; MAIN_TABLE_SIZE],
436        distance: vec![0; distance_size],
437        align: vec![0; ALIGN_TABLE_SIZE],
438        length: vec![0; LENGTH_TABLE_SIZE],
439    };
440    let present = literal_presence(data);
441    let literal_count = present.iter().filter(|&&used| used).count();
442    let literal_length = huffman::bits_for_symbol_count(literal_count);
443    for (symbol, used) in present.into_iter().enumerate() {
444        if used {
445            lengths.main[symbol] = literal_length;
446        }
447    }
448
449    let table = HuffmanTable::from_lengths(&lengths.main)?;
450    let (table_data, table_bits) =
451        encode_table_lengths_with_bit_count(&lengths, algorithm_version)?;
452    let mut writer = BitWriter {
453        bytes: table_data,
454        bit_pos: table_bits,
455    };
456    for &byte in data {
457        let (code, len) = table.code_for_symbol(byte as usize)?;
458        writer.write_bits(usize::from(code), usize::from(len));
459    }
460    let payload_bits = writer.bit_pos;
461    encode_compressed_block(&writer.finish(), payload_bits, true, true)
462}
463
464pub fn encode_lz_member(data: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
465    encode_lz_member_with_history(data, &[], algorithm_version)
466}
467
468#[derive(Debug, Clone, Copy, PartialEq, Eq)]
469#[non_exhaustive]
470pub struct EncodeOptions {
471    pub max_match_candidates: usize,
472    pub lazy_matching: bool,
473    pub lazy_lookahead: usize,
474    pub max_match_distance: usize,
475}
476
477impl EncodeOptions {
478    pub const fn new(max_match_candidates: usize) -> Self {
479        Self {
480            max_match_candidates,
481            lazy_matching: false,
482            lazy_lookahead: 1,
483            max_match_distance: MAX_ENCODER_MATCH_OFFSET,
484        }
485    }
486
487    pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
488        self.lazy_matching = enabled;
489        self
490    }
491
492    pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
493        self.lazy_lookahead = bytes;
494        self
495    }
496
497    pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
498        self.max_match_distance = distance;
499        self
500    }
501}
502
503impl Default for EncodeOptions {
504    fn default() -> Self {
505        Self::new(MAX_MATCH_CANDIDATES)
506    }
507}
508
509pub fn encode_lz_member_with_history(
510    data: &[u8],
511    history: &[u8],
512    algorithm_version: u8,
513) -> Result<Vec<u8>> {
514    encode_lz_member_inner(
515        data,
516        history,
517        algorithm_version,
518        &[],
519        EncodeOptions::default(),
520    )
521}
522
523pub fn encode_lz_member_with_options(
524    data: &[u8],
525    algorithm_version: u8,
526    options: EncodeOptions,
527) -> Result<Vec<u8>> {
528    encode_lz_member_with_history_and_options(data, &[], algorithm_version, options)
529}
530
531pub fn encode_lz_member_with_history_and_options(
532    data: &[u8],
533    history: &[u8],
534    algorithm_version: u8,
535    options: EncodeOptions,
536) -> Result<Vec<u8>> {
537    encode_lz_member_inner(data, history, algorithm_version, &[], options)
538}
539
540#[derive(Debug, Clone, Copy, PartialEq, Eq)]
541pub enum Rar50FilterKind {
542    Delta { channels: usize },
543    E8,
544    E8E9,
545    Arm,
546}
547
548#[derive(Debug, Clone, PartialEq, Eq)]
549pub struct Rar50FilterSpec {
550    pub kind: Rar50FilterKind,
551    pub range: Option<Range<usize>>,
552}
553
554impl Rar50FilterSpec {
555    pub fn new(kind: Rar50FilterKind) -> Self {
556        Self { kind, range: None }
557    }
558
559    pub fn range(kind: Rar50FilterKind, range: Range<usize>) -> Self {
560        Self {
561            kind,
562            range: Some(range),
563        }
564    }
565}
566
567fn filtered_lz_member(
568    data: &[u8],
569    filters: &[Rar50FilterSpec],
570) -> Result<(Vec<u8>, Vec<EncodeFilter>)> {
571    let mut filtered = data.to_vec();
572    let mut records = Vec::with_capacity(filters.len());
573    for filter in filters {
574        let range = filter.range.clone().unwrap_or(0..data.len());
575        if range.start >= range.end || range.end > data.len() {
576            return Err(Error::InvalidData("RAR 5 filter range is invalid"));
577        }
578        if range.start > u32::MAX as usize {
579            return Err(Error::InvalidData("RAR 5 filter offset is too large"));
580        }
581
582        let filter_data = &mut filtered[range.clone()];
583        let (filter_type, channels) = encode_filter_data(filter.kind, filter_data, range.start)?;
584        records.push(EncodeFilter {
585            offset: range.start,
586            length: range.len(),
587            filter_type,
588            channels,
589        });
590    }
591    Ok((filtered, records))
592}
593
594fn encode_filter_data(
595    kind: Rar50FilterKind,
596    data: &mut [u8],
597    file_offset: usize,
598) -> Result<(FilterType, usize)> {
599    if file_offset > u32::MAX as usize {
600        return Err(Error::InvalidData("RAR 5 filter offset is too large"));
601    }
602    match kind {
603        Rar50FilterKind::Delta { channels } => {
604            filters::encode_in_place(
605                FilterOp::Delta { channels },
606                data,
607                0,
608                rar50_delta_messages(),
609            )?;
610            Ok((FilterType::Delta, channels))
611        }
612        Rar50FilterKind::E8 => {
613            e8e9_encode(data, file_offset as u32, false);
614            Ok((FilterType::E8, 0))
615        }
616        Rar50FilterKind::E8E9 => {
617            e8e9_encode(data, file_offset as u32, true);
618            Ok((FilterType::E8E9, 0))
619        }
620        Rar50FilterKind::Arm => {
621            arm_encode(data, file_offset as u32);
622            Ok((FilterType::Arm, 0))
623        }
624    }
625}
626
627fn filtered_lz_blocks(
628    data: &[u8],
629    filters: &[Rar50FilterSpec],
630    history: &[u8],
631    algorithm_version: u8,
632    options: EncodeOptions,
633) -> Result<Vec<u8>> {
634    let filters = normalized_filter_specs(data.len(), filters)?;
635    let mut out = Vec::new();
636    let mut block_history =
637        history[history.len().saturating_sub(options.max_match_distance)..].to_vec();
638    let mut chunk_start = 0usize;
639    while chunk_start < data.len() {
640        let chunk_end = (chunk_start + MAX_FILTER_BLOCK_LENGTH).min(data.len());
641        let mut chunk = data[chunk_start..chunk_end].to_vec();
642        let mut records = Vec::new();
643        for filter in &filters {
644            let start = filter.range.start.max(chunk_start);
645            let end = filter.range.end.min(chunk_end);
646            if start >= end {
647                continue;
648            }
649            let local_start = start - chunk_start;
650            let local_end = end - chunk_start;
651            let (filter_type, channels) =
652                encode_filter_data(filter.kind, &mut chunk[local_start..local_end], start)?;
653            records.push(EncodeFilter {
654                offset: local_start,
655                length: local_end - local_start,
656                filter_type,
657                channels,
658            });
659        }
660        out.extend(encode_lz_block(
661            &chunk,
662            &block_history,
663            algorithm_version,
664            &records,
665            options,
666            chunk_end == data.len(),
667        )?);
668        block_history.extend_from_slice(&chunk);
669        let keep_from = block_history
670            .len()
671            .saturating_sub(options.max_match_distance);
672        if keep_from != 0 {
673            block_history.drain(..keep_from);
674        }
675        chunk_start = chunk_end;
676    }
677    Ok(out)
678}
679
680#[derive(Debug, Clone, PartialEq, Eq)]
681struct NormalizedFilterSpec {
682    kind: Rar50FilterKind,
683    range: Range<usize>,
684}
685
686fn normalized_filter_specs(
687    data_len: usize,
688    filters: &[Rar50FilterSpec],
689) -> Result<Vec<NormalizedFilterSpec>> {
690    let mut normalized = Vec::with_capacity(filters.len());
691    for filter in filters {
692        let range = filter.range.clone().unwrap_or(0..data_len);
693        if range.start >= range.end || range.end > data_len {
694            return Err(Error::InvalidData("RAR 5 filter range is invalid"));
695        }
696        normalized.push(NormalizedFilterSpec {
697            kind: filter.kind,
698            range,
699        });
700    }
701    Ok(normalized)
702}
703
704fn encode_lz_member_inner(
705    data: &[u8],
706    history: &[u8],
707    algorithm_version: u8,
708    initial_filters: &[EncodeFilter],
709    options: EncodeOptions,
710) -> Result<Vec<u8>> {
711    if data.len() > MAX_COMPRESSED_BLOCK_OUTPUT && initial_filters.is_empty() {
712        let mut out = Vec::new();
713        let mut block_history =
714            history[history.len().saturating_sub(options.max_match_distance)..].to_vec();
715        let mut chunks = data.chunks(MAX_COMPRESSED_BLOCK_OUTPUT).peekable();
716        while let Some(chunk) = chunks.next() {
717            let is_last = chunks.peek().is_none();
718            out.extend(encode_lz_block(
719                chunk,
720                &block_history,
721                algorithm_version,
722                &[],
723                options,
724                is_last,
725            )?);
726            block_history.extend_from_slice(chunk);
727            let keep_from = block_history
728                .len()
729                .saturating_sub(options.max_match_distance);
730            if keep_from != 0 {
731                block_history.drain(..keep_from);
732            }
733        }
734        return Ok(out);
735    }
736
737    encode_lz_block(
738        data,
739        history,
740        algorithm_version,
741        initial_filters,
742        options,
743        true,
744    )
745}
746
747fn encode_lz_block(
748    data: &[u8],
749    history: &[u8],
750    algorithm_version: u8,
751    initial_filters: &[EncodeFilter],
752    options: EncodeOptions,
753    is_last: bool,
754) -> Result<Vec<u8>> {
755    let distance_size = match algorithm_version {
756        0 => DISTANCE_TABLE_SIZE_50,
757        1 => DISTANCE_TABLE_SIZE_70,
758        _ => {
759            return Err(Error::InvalidData(
760                "RAR 5 unknown compression algorithm version",
761            ))
762        }
763    };
764    let mut tokens = Vec::new();
765    tokens.extend(initial_filters.iter().copied().map(EncodeToken::Filter));
766    tokens.extend(encode_tokens(data, history, options, distance_size));
767    let mut lengths = TableLengths {
768        main: vec![0; MAIN_TABLE_SIZE],
769        distance: vec![0; distance_size],
770        align: vec![0; ALIGN_TABLE_SIZE],
771        length: vec![0; LENGTH_TABLE_SIZE],
772    };
773
774    let mut main_frequencies = vec![0usize; MAIN_TABLE_SIZE];
775    let mut distance_frequencies = vec![0usize; distance_size];
776    let mut align_frequencies = vec![0usize; ALIGN_TABLE_SIZE];
777    let mut length_frequencies = vec![0usize; LENGTH_TABLE_SIZE];
778    let mut state = EncoderMatchState::default();
779    for token in &tokens {
780        match *token {
781            EncodeToken::Filter(_) => main_frequencies[256] += 1,
782            EncodeToken::Literal(byte) => main_frequencies[byte as usize] += 1,
783            EncodeToken::Match { length, distance } => {
784                match state.encode_match(length, distance, distance_size)? {
785                    EncodedMatch::LastLengthRepeat => main_frequencies[257] += 1,
786                    EncodedMatch::RepeatDistance {
787                        index, length_slot, ..
788                    } => {
789                        main_frequencies[258 + index] += 1;
790                        length_frequencies[length_slot] += 1;
791                    }
792                    EncodedMatch::New {
793                        length_slot,
794                        distance_slot,
795                        distance_extra,
796                        distance_bit_count,
797                        ..
798                    } => {
799                        main_frequencies[262 + length_slot] += 1;
800                        distance_frequencies[distance_slot] += 1;
801                        if distance_bit_count >= 4 {
802                            align_frequencies[distance_extra & 0x0f] += 1;
803                        }
804                    }
805                }
806                state.remember(length, distance);
807            }
808        }
809    }
810
811    lengths.main = huffman::lengths_for_frequencies(&main_frequencies, 15);
812    lengths.distance = huffman::lengths_for_frequencies(&distance_frequencies, 15);
813    lengths.length = huffman::lengths_for_frequencies(&length_frequencies, 15);
814    lengths.align = huffman::lengths_for_frequencies(&align_frequencies, 15);
815
816    let main_table = HuffmanTable::from_lengths(&lengths.main)?;
817    let distance_table = HuffmanTable::from_lengths(&lengths.distance)?;
818    let align_table = HuffmanTable::from_lengths(&lengths.align)?;
819    let length_table = HuffmanTable::from_lengths(&lengths.length)?;
820    let (table_data, table_bits) =
821        encode_table_lengths_with_bit_count(&lengths, algorithm_version)?;
822    let mut writer = BitWriter {
823        bytes: table_data,
824        bit_pos: table_bits,
825    };
826    let mut state = EncoderMatchState::default();
827    for token in tokens {
828        match token {
829            EncodeToken::Filter(filter) => {
830                let (code, len) = main_table.code_for_symbol(256)?;
831                writer.write_bits(usize::from(code), usize::from(len));
832                write_filter(&mut writer, filter)?;
833            }
834            EncodeToken::Literal(byte) => {
835                let (code, len) = main_table.code_for_symbol(byte as usize)?;
836                writer.write_bits(usize::from(code), usize::from(len));
837            }
838            EncodeToken::Match { length, distance } => {
839                match state.encode_match(length, distance, distance_size)? {
840                    EncodedMatch::LastLengthRepeat => {
841                        let (code, len) = main_table.code_for_symbol(257)?;
842                        writer.write_bits(usize::from(code), usize::from(len));
843                    }
844                    EncodedMatch::RepeatDistance {
845                        index,
846                        length_slot,
847                        length_extra,
848                    } => {
849                        let (code, len) = main_table.code_for_symbol(258 + index)?;
850                        writer.write_bits(usize::from(code), usize::from(len));
851                        let (code, len) = length_table.code_for_symbol(length_slot)?;
852                        writer.write_bits(usize::from(code), usize::from(len));
853                        let length_extra_bits = length_slot_extra_bits(length_slot)?;
854                        if length_extra_bits != 0 {
855                            writer.write_bits(length_extra, usize::from(length_extra_bits));
856                        }
857                    }
858                    EncodedMatch::New {
859                        length_slot,
860                        length_extra,
861                        distance_slot,
862                        distance_extra,
863                        distance_bit_count,
864                    } => {
865                        let (code, len) = main_table.code_for_symbol(262 + length_slot)?;
866                        writer.write_bits(usize::from(code), usize::from(len));
867                        let length_extra_bits = length_slot_extra_bits(length_slot)?;
868                        if length_extra_bits != 0 {
869                            writer.write_bits(length_extra, usize::from(length_extra_bits));
870                        }
871                        let (code, len) = distance_table.code_for_symbol(distance_slot)?;
872                        writer.write_bits(usize::from(code), usize::from(len));
873                        if distance_bit_count >= 4 {
874                            if distance_bit_count > 4 {
875                                writer.write_bits(distance_extra >> 4, distance_bit_count - 4);
876                            }
877                            let (code, len) = align_table.code_for_symbol(distance_extra & 0x0f)?;
878                            writer.write_bits(usize::from(code), usize::from(len));
879                        } else if distance_bit_count != 0 {
880                            writer.write_bits(distance_extra, distance_bit_count);
881                        }
882                    }
883                }
884                state.remember(length, distance);
885            }
886        }
887    }
888
889    let payload_bits = writer.bit_pos;
890    encode_compressed_block(&writer.finish(), payload_bits, true, is_last)
891}
892
893#[derive(Debug, Clone, Default)]
894pub struct Unpack50Encoder {
895    history: Vec<u8>,
896    options: EncodeOptions,
897}
898
899impl Unpack50Encoder {
900    pub fn new() -> Self {
901        Self::default()
902    }
903
904    pub fn with_options(options: EncodeOptions) -> Self {
905        Self {
906            history: Vec::new(),
907            options,
908        }
909    }
910
911    pub fn encode_member(&mut self, input: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
912        let packed = encode_lz_member_with_history_and_options(
913            input,
914            &self.history,
915            algorithm_version,
916            self.options,
917        )?;
918        self.remember(input);
919        Ok(packed)
920    }
921
922    pub fn encode_member_with_filter(
923        &mut self,
924        input: &[u8],
925        algorithm_version: u8,
926        filter: Rar50FilterSpec,
927    ) -> Result<Vec<u8>> {
928        self.encode_member_with_filters(input, algorithm_version, &[filter])
929    }
930
931    pub fn encode_member_with_filters(
932        &mut self,
933        input: &[u8],
934        algorithm_version: u8,
935        filters: &[Rar50FilterSpec],
936    ) -> Result<Vec<u8>> {
937        if input.len() > MAX_FILTER_BLOCK_LENGTH {
938            let packed = filtered_lz_blocks(
939                input,
940                filters,
941                &self.history,
942                algorithm_version,
943                self.options,
944            )?;
945            self.remember(input);
946            return Ok(packed);
947        }
948        let (filtered, records) = filtered_lz_member(input, filters)?;
949        let packed = encode_lz_member_inner(
950            &filtered,
951            &self.history,
952            algorithm_version,
953            &records,
954            self.options,
955        )?;
956        self.remember(input);
957        Ok(packed)
958    }
959
960    fn remember(&mut self, input: &[u8]) {
961        self.history.extend_from_slice(input);
962        let keep_from = self
963            .history
964            .len()
965            .saturating_sub(self.options.max_match_distance);
966        if keep_from != 0 {
967            self.history.drain(..keep_from);
968        }
969    }
970}
971
972#[derive(Debug, Clone, Copy)]
973enum EncodeToken {
974    Filter(EncodeFilter),
975    Literal(u8),
976    Match { length: usize, distance: usize },
977}
978
979#[derive(Debug, Clone, Copy)]
980struct EncodeFilter {
981    offset: usize,
982    length: usize,
983    filter_type: FilterType,
984    channels: usize,
985}
986
987#[derive(Debug, Clone, Copy, Default)]
988struct EncoderMatchState {
989    reps: [usize; 4],
990    last_length: usize,
991}
992
993#[derive(Debug, Clone, Copy, PartialEq, Eq)]
994enum EncodedMatch {
995    LastLengthRepeat,
996    RepeatDistance {
997        index: usize,
998        length_slot: usize,
999        length_extra: usize,
1000    },
1001    New {
1002        length_slot: usize,
1003        length_extra: usize,
1004        distance_slot: usize,
1005        distance_extra: usize,
1006        distance_bit_count: usize,
1007    },
1008}
1009
1010impl EncoderMatchState {
1011    fn encode_match(
1012        &self,
1013        length: usize,
1014        distance: usize,
1015        distance_size: usize,
1016    ) -> Result<EncodedMatch> {
1017        if distance == self.reps[0] && length == self.last_length && self.last_length != 0 {
1018            return Ok(EncodedMatch::LastLengthRepeat);
1019        }
1020        if let Some(index) = self
1021            .reps
1022            .iter()
1023            .position(|&repeat_distance| repeat_distance == distance && repeat_distance != 0)
1024        {
1025            let (length_slot, length_extra) = length_slot_for_match(length)?;
1026            return Ok(EncodedMatch::RepeatDistance {
1027                index,
1028                length_slot,
1029                length_extra,
1030            });
1031        }
1032
1033        let (distance_slot, distance_extra) = distance_slot_for_match(distance, distance_size)?;
1034        let encoded_length = length
1035            .checked_sub(length_bonus(distance))
1036            .ok_or(Error::InvalidData("RAR 5 adjusted match length underflows"))?;
1037        let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1038        let (length_slot, length_extra) = length_slot_for_match(encoded_length)?;
1039        Ok(EncodedMatch::New {
1040            length_slot,
1041            length_extra,
1042            distance_slot,
1043            distance_extra,
1044            distance_bit_count,
1045        })
1046    }
1047
1048    fn remember(&mut self, length: usize, distance: usize) {
1049        if distance == self.reps[0] && length == self.last_length {
1050            return;
1051        }
1052        if let Some(index) = self
1053            .reps
1054            .iter()
1055            .position(|&repeat_distance| repeat_distance == distance)
1056        {
1057            self.reps[..=index].rotate_right(1);
1058        } else {
1059            self.reps.rotate_right(1);
1060        }
1061        self.reps[0] = distance;
1062        self.last_length = length;
1063    }
1064}
1065
1066fn encode_tokens(
1067    input: &[u8],
1068    history: &[u8],
1069    options: EncodeOptions,
1070    distance_size: usize,
1071) -> Vec<EncodeToken> {
1072    let mut tokens = Vec::new();
1073    let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1074    let history = &history[history.len().saturating_sub(options.max_match_distance)..];
1075    let mut combined = Vec::with_capacity(history.len() + input.len());
1076    combined.extend_from_slice(history);
1077    combined.extend_from_slice(input);
1078    for history_pos in 0..history.len().saturating_sub(2) {
1079        insert_match_position(&combined, history_pos, &mut buckets);
1080    }
1081
1082    let mut pos = history.len();
1083    let end = combined.len();
1084    let mut state = EncoderMatchState::default();
1085    while pos < end {
1086        if let Some(candidate) = best_match(
1087            &combined,
1088            pos,
1089            end,
1090            &buckets,
1091            options,
1092            &state,
1093            distance_size,
1094        ) {
1095            if should_lazy_emit_literal(
1096                &combined,
1097                pos,
1098                &buckets,
1099                options,
1100                &state,
1101                distance_size,
1102                candidate,
1103            ) {
1104                tokens.push(EncodeToken::Literal(combined[pos]));
1105                insert_match_position(&combined, pos, &mut buckets);
1106                pos += 1;
1107                continue;
1108            }
1109            let MatchCandidate {
1110                length, distance, ..
1111            } = candidate;
1112            tokens.push(EncodeToken::Match { length, distance });
1113            state.remember(length, distance);
1114            for history_pos in pos..pos + length {
1115                insert_match_position(&combined, history_pos, &mut buckets);
1116            }
1117            pos += length;
1118        } else {
1119            tokens.push(EncodeToken::Literal(combined[pos]));
1120            insert_match_position(&combined, pos, &mut buckets);
1121            pos += 1;
1122        }
1123    }
1124    tokens
1125}
1126
1127fn should_lazy_emit_literal(
1128    input: &[u8],
1129    pos: usize,
1130    buckets: &[Vec<usize>],
1131    options: EncodeOptions,
1132    state: &EncoderMatchState,
1133    distance_size: usize,
1134    current: MatchCandidate,
1135) -> bool {
1136    let end = input.len();
1137    if !options.lazy_matching || pos + 1 >= end {
1138        return false;
1139    }
1140    let lookahead = options.lazy_lookahead.max(1);
1141    (1..=lookahead)
1142        .take_while(|offset| pos + offset < end)
1143        .any(|offset| {
1144            best_match(
1145                input,
1146                pos + offset,
1147                end,
1148                buckets,
1149                options,
1150                state,
1151                distance_size,
1152            )
1153            .is_some_and(|next| {
1154                let skipped_literal_score = offset as isize * 8;
1155                next.score > current.score + skipped_literal_score
1156            })
1157        })
1158}
1159
1160#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1161struct MatchCandidate {
1162    length: usize,
1163    distance: usize,
1164    score: isize,
1165    cost: usize,
1166}
1167
1168fn best_match(
1169    input: &[u8],
1170    pos: usize,
1171    end: usize,
1172    buckets: &[Vec<usize>],
1173    options: EncodeOptions,
1174    state: &EncoderMatchState,
1175    distance_size: usize,
1176) -> Option<MatchCandidate> {
1177    let max_distance = pos.min(options.max_match_distance);
1178    let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
1179    if options.max_match_candidates == 0
1180        || max_distance == 0
1181        || max_length < 4
1182        || pos + 2 >= input.len()
1183    {
1184        return None;
1185    }
1186    let bucket = &buckets[match_hash(input, pos)];
1187    let mut best = None;
1188    let mut checked = 0usize;
1189    for distance in state.reps {
1190        if distance == 0 || distance > max_distance {
1191            continue;
1192        }
1193        let length = match_length(input, pos, distance, max_length);
1194        consider_match_candidate(&mut best, state, distance_size, length, distance);
1195    }
1196    for &candidate in bucket.iter().rev() {
1197        if candidate >= pos {
1198            continue;
1199        }
1200        let distance = pos - candidate;
1201        if distance > max_distance {
1202            break;
1203        }
1204        checked += 1;
1205        let length = match_length(input, pos, distance, max_length);
1206        consider_match_candidate(&mut best, state, distance_size, length, distance);
1207        if let Some(best) = best {
1208            if best.length == max_length {
1209                break;
1210            }
1211        }
1212        if checked >= options.max_match_candidates {
1213            break;
1214        }
1215    }
1216    best
1217}
1218
1219fn match_length(input: &[u8], pos: usize, distance: usize, max_length: usize) -> usize {
1220    crate::fast::match_length(input, pos, distance, max_length)
1221}
1222
1223fn consider_match_candidate(
1224    best: &mut Option<MatchCandidate>,
1225    state: &EncoderMatchState,
1226    distance_size: usize,
1227    length: usize,
1228    distance: usize,
1229) {
1230    if length < 4 {
1231        return;
1232    }
1233    let Ok(cost) = estimated_match_cost(state, length, distance, distance_size) else {
1234        return;
1235    };
1236    let candidate = MatchCandidate {
1237        length,
1238        distance,
1239        score: (length as isize * 16) - cost as isize,
1240        cost,
1241    };
1242    if best.is_none_or(|best| {
1243        candidate.score > best.score
1244            || (candidate.score == best.score
1245                && (candidate.length > best.length
1246                    || (candidate.length == best.length && candidate.cost < best.cost)
1247                    || (candidate.length == best.length
1248                        && candidate.cost == best.cost
1249                        && candidate.distance < best.distance)))
1250    }) {
1251        *best = Some(candidate);
1252    }
1253}
1254
1255fn estimated_match_cost(
1256    state: &EncoderMatchState,
1257    length: usize,
1258    distance: usize,
1259    distance_size: usize,
1260) -> Result<usize> {
1261    if distance == state.reps[0] && length == state.last_length && state.last_length != 0 {
1262        return Ok(2);
1263    }
1264    if state
1265        .reps
1266        .iter()
1267        .any(|&repeat_distance| repeat_distance == distance && repeat_distance != 0)
1268    {
1269        let (length_slot, _) = length_slot_for_match(length)?;
1270        return Ok(5 + usize::from(length_slot_extra_bits(length_slot)?));
1271    }
1272
1273    let (distance_slot, _) = distance_slot_for_match(distance, distance_size)?;
1274    let encoded_length = length
1275        .checked_sub(length_bonus(distance))
1276        .ok_or(Error::InvalidData("RAR 5 adjusted match length underflows"))?;
1277    let (length_slot, _) = length_slot_for_match(encoded_length)?;
1278    Ok(10
1279        + usize::from(length_slot_extra_bits(length_slot)?)
1280        + distance_slot_bit_count(distance_slot)?)
1281}
1282
1283fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
1284    if pos + 2 < input.len() {
1285        buckets[match_hash(input, pos)].push(pos);
1286    }
1287}
1288
1289fn match_hash(input: &[u8], pos: usize) -> usize {
1290    let value =
1291        ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1292    value & (MATCH_HASH_BUCKETS - 1)
1293}
1294
1295fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
1296    if length < 2 {
1297        return Err(Error::InvalidData("RAR 5 match length is too short"));
1298    }
1299    for slot in 0..LENGTH_TABLE_SIZE {
1300        let bit_count = usize::from(length_slot_extra_bits(slot)?);
1301        let base = slot_to_length(slot, 0)?;
1302        let max = base
1303            + if bit_count == 0 {
1304                0
1305            } else {
1306                (1usize << bit_count) - 1
1307            };
1308        if length >= base && length <= max {
1309            return Ok((slot, length - base));
1310        }
1311    }
1312    Err(Error::InvalidData("RAR 5 match length is too long"))
1313}
1314
1315fn distance_slot_for_match(distance: usize, distance_size: usize) -> Result<(usize, usize)> {
1316    if distance == 0 {
1317        return Err(Error::InvalidData("RAR 5 match distance is zero"));
1318    }
1319    for slot in 0..distance_size {
1320        let bit_count = distance_slot_bit_count(slot)?;
1321        let base = slot_to_distance(slot, 0)?;
1322        let max = base
1323            + if bit_count == 0 {
1324                0
1325            } else {
1326                (1usize << bit_count) - 1
1327            };
1328        if distance >= base && distance <= max {
1329            return Ok((slot, distance - base));
1330        }
1331    }
1332    Err(Error::InvalidData("RAR 5 match distance is too large"))
1333}
1334
1335fn literal_presence(data: &[u8]) -> [bool; 256] {
1336    let mut present = [false; 256];
1337    for &byte in data {
1338        present[byte as usize] = true;
1339    }
1340    present
1341}
1342
1343#[derive(Debug, Clone)]
1344pub struct Unpack50Decoder {
1345    tables: Option<DecodeTables>,
1346    reps: [usize; 4],
1347    last_length: usize,
1348    history: Vec<u8>,
1349}
1350
1351impl Unpack50Decoder {
1352    pub fn new() -> Self {
1353        Self {
1354            tables: None,
1355            reps: [0; 4],
1356            last_length: 0,
1357            history: Vec::new(),
1358        }
1359    }
1360
1361    pub fn decode_member(
1362        &mut self,
1363        input: &[u8],
1364        algorithm_version: u8,
1365        output_size: usize,
1366        solid: bool,
1367        mode: DecodeMode,
1368    ) -> Result<Vec<u8>> {
1369        self.decode_member_with_dictionary(
1370            input,
1371            algorithm_version,
1372            output_size,
1373            DEFAULT_DICTIONARY_SIZE,
1374            solid,
1375            mode,
1376        )
1377    }
1378
1379    pub fn decode_member_with_dictionary(
1380        &mut self,
1381        input: &[u8],
1382        algorithm_version: u8,
1383        output_size: usize,
1384        dictionary_size: usize,
1385        solid: bool,
1386        mode: DecodeMode,
1387    ) -> Result<Vec<u8>> {
1388        let mut input = std::io::Cursor::new(input);
1389        self.decode_member_from_reader_with_dictionary(
1390            &mut input,
1391            algorithm_version,
1392            output_size,
1393            dictionary_size,
1394            solid,
1395            mode,
1396        )
1397    }
1398
1399    pub fn decode_member_from_reader(
1400        &mut self,
1401        input: &mut impl Read,
1402        algorithm_version: u8,
1403        output_size: usize,
1404        solid: bool,
1405        mode: DecodeMode,
1406    ) -> Result<Vec<u8>> {
1407        self.decode_member_from_reader_with_dictionary(
1408            input,
1409            algorithm_version,
1410            output_size,
1411            DEFAULT_DICTIONARY_SIZE,
1412            solid,
1413            mode,
1414        )
1415    }
1416
1417    pub fn decode_member_from_reader_with_dictionary(
1418        &mut self,
1419        input: &mut impl Read,
1420        algorithm_version: u8,
1421        output_size: usize,
1422        dictionary_size: usize,
1423        solid: bool,
1424        mode: DecodeMode,
1425    ) -> Result<Vec<u8>> {
1426        if dictionary_size == 0 {
1427            return Err(Error::InvalidData("RAR 5 dictionary size is zero"));
1428        }
1429        if !solid {
1430            self.reset();
1431        }
1432
1433        let mut output = Vec::with_capacity(output_size.min(MAX_INITIAL_OUTPUT_CAPACITY));
1434        let mut filters = Vec::new();
1435
1436        loop {
1437            let block = read_compressed_block(input)?;
1438            let payload = block.payload.as_slice();
1439            let mut payload_bit_pos = 0;
1440            if block.header.has_tables {
1441                let (lengths, table_bits) = read_table_lengths(payload, algorithm_version)?;
1442                self.tables = Some(DecodeTables::from_lengths(&lengths)?);
1443                payload_bit_pos = table_bits;
1444            }
1445            let tables = self
1446                .tables
1447                .take()
1448                .ok_or(Error::InvalidData("RAR 5 block reuses missing tables"))?;
1449            let mut bits = BitReader::new(payload);
1450            bits.bit_pos = payload_bit_pos;
1451
1452            while bits.bit_pos < block.header.payload_bits && output.len() < output_size {
1453                let symbol = tables.main.decode(&mut bits)?;
1454                match symbol {
1455                    0..=255 => output.push(symbol as u8),
1456                    256 if mode.uses_lz() => {
1457                        filters.push(read_filter(&mut bits, output.len())?);
1458                    }
1459                    257 if mode.uses_lz() => {
1460                        if self.last_length != 0 {
1461                            self.copy_match(
1462                                &mut output,
1463                                self.reps[0],
1464                                self.last_length,
1465                                output_size,
1466                                dictionary_size,
1467                            )?;
1468                        }
1469                    }
1470                    258..=261 if mode.uses_lz() => {
1471                        let rep_index = symbol - 258;
1472                        let distance = self.reps[rep_index];
1473                        if distance == 0 {
1474                            return Err(Error::InvalidData(
1475                                "RAR 5 repeat distance is not initialized",
1476                            ));
1477                        }
1478                        let length_slot = tables.length.decode(&mut bits)?;
1479                        let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1480                        let length = slot_to_length(length_slot, length_extra)?;
1481                        self.reps[..=rep_index].rotate_right(1);
1482                        self.reps[0] = distance;
1483                        self.last_length = length;
1484                        self.copy_match(
1485                            &mut output,
1486                            distance,
1487                            length,
1488                            output_size,
1489                            dictionary_size,
1490                        )?;
1491                    }
1492                    262.. if mode.uses_lz() => {
1493                        let length_slot = symbol - 262;
1494                        let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1495                        let mut length = slot_to_length(length_slot, length_extra)?;
1496                        let distance_slot = tables.distance.decode(&mut bits)?;
1497                        let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1498                        let distance_extra = if distance_bit_count >= 4 && tables.align_mode {
1499                            let high = bits.read_bits((distance_bit_count - 4) as u8)?;
1500                            let low = tables.align.decode(&mut bits)? as u32;
1501                            (high << 4) | low
1502                        } else {
1503                            bits.read_bits(distance_bit_count as u8)?
1504                        };
1505                        let distance = slot_to_distance(distance_slot, distance_extra)?;
1506                        length += length_bonus(distance);
1507                        self.reps.rotate_right(1);
1508                        self.reps[0] = distance;
1509                        self.last_length = length;
1510                        self.copy_match(
1511                            &mut output,
1512                            distance,
1513                            length,
1514                            output_size,
1515                            dictionary_size,
1516                        )?;
1517                    }
1518                    _ if mode == DecodeMode::LiteralOnly => {
1519                        return Err(Error::InvalidData(
1520                            "RAR 5 literal-only decoder encountered non-literal symbol",
1521                        ));
1522                    }
1523                    _ => {
1524                        return Err(Error::InvalidData(
1525                            "RAR 5 decoder encountered unsupported control symbol",
1526                        ));
1527                    }
1528                }
1529            }
1530
1531            self.tables = Some(tables);
1532            if block.header.is_last || output.len() >= output_size {
1533                break;
1534            }
1535        }
1536
1537        if output.len() == output_size {
1538            let history_output = if mode.applies_filters() && !filters.is_empty() {
1539                Some(output.clone())
1540            } else {
1541                None
1542            };
1543            if mode.applies_filters() {
1544                apply_filters(&mut output, &filters)?;
1545            }
1546            self.history
1547                .extend_from_slice(history_output.as_deref().unwrap_or(&output));
1548            if self.history.len() > dictionary_size {
1549                let discard = self.history.len() - dictionary_size;
1550                self.history.drain(..discard);
1551            }
1552            Ok(output)
1553        } else {
1554            Err(Error::NeedMoreInput)
1555        }
1556    }
1557
1558    pub fn decode_member_from_reader_with_dictionary_to_sink<E>(
1559        &mut self,
1560        input: &mut impl Read,
1561        algorithm_version: u8,
1562        output_size: usize,
1563        dictionary_size: usize,
1564        solid: bool,
1565        mut sink: impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1566    ) -> std::result::Result<(), StreamDecodeError<E>> {
1567        if dictionary_size == 0 {
1568            return Err(Error::InvalidData("RAR 5 dictionary size is zero").into());
1569        }
1570        if !solid {
1571            self.reset();
1572        }
1573
1574        let history_limit = dictionary_size.min(STREAM_HISTORY_LIMIT);
1575        if self.history.len() > history_limit {
1576            let discard = self.history.len() - history_limit;
1577            self.history.drain(..discard);
1578        }
1579        let mut output = StreamingOutput::new(
1580            std::mem::take(&mut self.history),
1581            output_size,
1582            dictionary_size,
1583            history_limit,
1584        );
1585
1586        loop {
1587            let block = read_compressed_block(input)?;
1588            let payload = block.payload.as_slice();
1589            let mut payload_bit_pos = 0;
1590            if block.header.has_tables {
1591                let (lengths, table_bits) = read_table_lengths(payload, algorithm_version)?;
1592                self.tables = Some(DecodeTables::from_lengths(&lengths)?);
1593                payload_bit_pos = table_bits;
1594            }
1595            let tables = self
1596                .tables
1597                .take()
1598                .ok_or(Error::InvalidData("RAR 5 block reuses missing tables"))?;
1599            let mut bits = BitReader::new(payload);
1600            bits.bit_pos = payload_bit_pos;
1601
1602            while bits.bit_pos < block.header.payload_bits && output.written() < output_size {
1603                let symbol = tables.main.decode(&mut bits)?;
1604                match symbol {
1605                    0..=255 => output.push(symbol as u8, &mut sink)?,
1606                    256 => {
1607                        return Err(StreamDecodeError::FilteredMember);
1608                    }
1609                    257 => {
1610                        if self.last_length != 0 {
1611                            output.copy_match(self.reps[0], self.last_length, &mut sink)?;
1612                        }
1613                    }
1614                    258..=261 => {
1615                        let rep_index = symbol - 258;
1616                        let distance = self.reps[rep_index];
1617                        if distance == 0 {
1618                            return Err(Error::InvalidData(
1619                                "RAR 5 repeat distance is not initialized",
1620                            )
1621                            .into());
1622                        }
1623                        let length_slot = tables.length.decode(&mut bits)?;
1624                        let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1625                        let length = slot_to_length(length_slot, length_extra)?;
1626                        self.reps[..=rep_index].rotate_right(1);
1627                        self.reps[0] = distance;
1628                        self.last_length = length;
1629                        output.copy_match(distance, length, &mut sink)?;
1630                    }
1631                    262.. => {
1632                        let length_slot = symbol - 262;
1633                        let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1634                        let mut length = slot_to_length(length_slot, length_extra)?;
1635                        let distance_slot = tables.distance.decode(&mut bits)?;
1636                        let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1637                        let distance_extra = if distance_bit_count >= 4 && tables.align_mode {
1638                            let high = bits.read_bits((distance_bit_count - 4) as u8)?;
1639                            let low = tables.align.decode(&mut bits)? as u32;
1640                            (high << 4) | low
1641                        } else {
1642                            bits.read_bits(distance_bit_count as u8)?
1643                        };
1644                        let distance = slot_to_distance(distance_slot, distance_extra)?;
1645                        length += length_bonus(distance);
1646                        self.reps.rotate_right(1);
1647                        self.reps[0] = distance;
1648                        self.last_length = length;
1649                        output.copy_match(distance, length, &mut sink)?;
1650                    }
1651                }
1652            }
1653
1654            self.tables = Some(tables);
1655            if block.header.is_last || output.written() >= output_size {
1656                break;
1657            }
1658        }
1659
1660        if output.written() == output_size {
1661            output.finish(&mut sink)?;
1662            self.history = output.into_history();
1663            Ok(())
1664        } else {
1665            Err(Error::NeedMoreInput.into())
1666        }
1667    }
1668
1669    fn reset(&mut self) {
1670        self.tables = None;
1671        self.reps = [0; 4];
1672        self.last_length = 0;
1673        self.history.clear();
1674    }
1675
1676    fn copy_match(
1677        &self,
1678        output: &mut Vec<u8>,
1679        distance: usize,
1680        length: usize,
1681        output_limit: usize,
1682        dictionary_size: usize,
1683    ) -> Result<()> {
1684        if distance > dictionary_size {
1685            return Err(Error::InvalidData(
1686                "RAR 5 match distance exceeds dictionary",
1687            ));
1688        }
1689        if distance == 0 || distance > self.history.len() + output.len() {
1690            return Err(Error::InvalidData("RAR 5 match distance exceeds window"));
1691        }
1692        if output
1693            .len()
1694            .checked_add(length)
1695            .is_none_or(|end| end > output_limit)
1696        {
1697            return Err(Error::InvalidData("RAR 5 match exceeds output limit"));
1698        }
1699        for _ in 0..length {
1700            if distance <= output.len() {
1701                let index = output.len() - distance;
1702                output.push(output[index]);
1703            } else {
1704                let history_distance = distance - output.len();
1705                let index = self.history.len() - history_distance;
1706                output.push(self.history[index]);
1707            }
1708        }
1709        Ok(())
1710    }
1711}
1712
1713struct StreamingOutput {
1714    history: VecDeque<u8>,
1715    pending: Vec<u8>,
1716    written: usize,
1717    output_limit: usize,
1718    dictionary_size: usize,
1719    history_limit: usize,
1720    all_zero: bool,
1721}
1722
1723impl StreamingOutput {
1724    fn new(
1725        history: Vec<u8>,
1726        output_limit: usize,
1727        dictionary_size: usize,
1728        history_limit: usize,
1729    ) -> Self {
1730        Self {
1731            all_zero: history.iter().all(|&byte| byte == 0),
1732            history: history.into(),
1733            pending: Vec::with_capacity(STREAM_FLUSH_THRESHOLD),
1734            written: 0,
1735            output_limit,
1736            dictionary_size,
1737            history_limit,
1738        }
1739    }
1740
1741    fn written(&self) -> usize {
1742        self.written
1743    }
1744
1745    fn push<E>(
1746        &mut self,
1747        byte: u8,
1748        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1749    ) -> std::result::Result<(), StreamDecodeError<E>> {
1750        if self.written >= self.output_limit {
1751            return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1752        }
1753        if byte != 0 {
1754            self.all_zero = false;
1755        }
1756        self.pending.push(byte);
1757        self.written += 1;
1758        if self.pending.len() >= STREAM_FLUSH_THRESHOLD {
1759            self.flush(sink)?;
1760        }
1761        Ok(())
1762    }
1763
1764    fn push_repeated<E>(
1765        &mut self,
1766        byte: u8,
1767        mut count: usize,
1768        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1769    ) -> std::result::Result<(), StreamDecodeError<E>> {
1770        if self
1771            .written
1772            .checked_add(count)
1773            .is_none_or(|end| end > self.output_limit)
1774        {
1775            return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1776        }
1777        if byte != 0 {
1778            self.all_zero = false;
1779        }
1780        while count > 0 {
1781            let available = STREAM_FLUSH_THRESHOLD - self.pending.len();
1782            let take = count.min(available.max(1));
1783            let old_len = self.pending.len();
1784            self.pending.resize(old_len + take, byte);
1785            self.written += take;
1786            count -= take;
1787            if self.pending.len() >= STREAM_FLUSH_THRESHOLD {
1788                self.flush(sink)?;
1789            }
1790        }
1791        Ok(())
1792    }
1793
1794    fn push_zeroes<E>(
1795        &mut self,
1796        count: usize,
1797        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1798    ) -> std::result::Result<(), StreamDecodeError<E>> {
1799        if self
1800            .written
1801            .checked_add(count)
1802            .is_none_or(|end| end > self.output_limit)
1803        {
1804            return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1805        }
1806        self.flush(sink)?;
1807        sink(DecodedChunk::Repeated {
1808            byte: 0,
1809            len: count,
1810        })
1811        .map_err(StreamDecodeError::Sink)?;
1812        self.written += count;
1813        if self.history.is_empty() && self.history_limit != 0 {
1814            self.history.push_back(0);
1815        }
1816        Ok(())
1817    }
1818
1819    fn copy_match<E>(
1820        &mut self,
1821        distance: usize,
1822        length: usize,
1823        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1824    ) -> std::result::Result<(), StreamDecodeError<E>> {
1825        if distance > self.dictionary_size {
1826            return Err(Error::InvalidData("RAR 5 match distance exceeds dictionary").into());
1827        }
1828        if self.all_zero && distance <= self.written + self.history.len() {
1829            return self.push_zeroes(length, sink);
1830        }
1831        if distance == 0 || distance > self.history.len() + self.pending.len() {
1832            return Err(Error::InvalidData("RAR 5 match distance exceeds window").into());
1833        }
1834        if self
1835            .written
1836            .checked_add(length)
1837            .is_none_or(|end| end > self.output_limit)
1838        {
1839            return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1840        }
1841        if distance == 1 {
1842            let byte = self.byte_at_distance(1)?;
1843            return self.push_repeated(byte, length, sink);
1844        }
1845        for _ in 0..length {
1846            let byte = self.byte_at_distance(distance)?;
1847            self.push(byte, sink)?;
1848        }
1849        Ok(())
1850    }
1851
1852    fn byte_at_distance(&self, distance: usize) -> Result<u8> {
1853        if distance <= self.pending.len() {
1854            Ok(self.pending[self.pending.len() - distance])
1855        } else {
1856            let history_distance = distance - self.pending.len();
1857            if history_distance > self.history.len() {
1858                return Err(Error::InvalidData("RAR 5 match distance exceeds window"));
1859            }
1860            Ok(*self
1861                .history
1862                .get(self.history.len() - history_distance)
1863                .ok_or(Error::InvalidData("RAR 5 match distance exceeds window"))?)
1864        }
1865    }
1866
1867    fn flush<E>(
1868        &mut self,
1869        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1870    ) -> std::result::Result<(), StreamDecodeError<E>> {
1871        if self.pending.is_empty() {
1872            return Ok(());
1873        }
1874        sink(DecodedChunk::Bytes(&self.pending)).map_err(StreamDecodeError::Sink)?;
1875        self.history.extend(self.pending.iter().copied());
1876        self.pending.clear();
1877        while self.history.len() > self.history_limit {
1878            self.history.pop_front();
1879        }
1880        Ok(())
1881    }
1882
1883    fn finish<E>(
1884        &mut self,
1885        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1886    ) -> std::result::Result<(), StreamDecodeError<E>> {
1887        self.flush(sink)
1888    }
1889
1890    fn into_history(self) -> Vec<u8> {
1891        self.history.into()
1892    }
1893}
1894
1895fn read_compressed_block(input: &mut impl Read) -> Result<OwnedCompressedBlock> {
1896    let mut fixed = [0u8; 2];
1897    input
1898        .read_exact(&mut fixed)
1899        .map_err(|_| Error::NeedMoreInput)?;
1900    let flags = fixed[0];
1901    let checksum = fixed[1];
1902    let size_bytes_len = match (flags >> 3) & 0x03 {
1903        0 => 1,
1904        1 => 2,
1905        2 => 3,
1906        _ => return Err(Error::InvalidData("RAR 5 block size length is invalid")),
1907    };
1908    let mut size_bytes = [0u8; 3];
1909    input
1910        .read_exact(&mut size_bytes[..size_bytes_len])
1911        .map_err(|_| Error::NeedMoreInput)?;
1912
1913    let actual = size_bytes[..size_bytes_len]
1914        .iter()
1915        .fold(checksum ^ flags, |acc, &byte| acc ^ byte);
1916    if actual != 0x5a {
1917        return Err(Error::InvalidData("RAR 5 block header checksum mismatch"));
1918    }
1919
1920    let payload_size = size_bytes[..size_bytes_len]
1921        .iter()
1922        .enumerate()
1923        .fold(0usize, |acc, (index, &byte)| {
1924            acc | (usize::from(byte) << (index * 8))
1925        });
1926    let mut payload = vec![0; payload_size];
1927    input
1928        .read_exact(&mut payload)
1929        .map_err(|_| Error::NeedMoreInput)?;
1930    let final_byte_bits = ((flags & 0x07) + 1).min(8);
1931    let payload_bits = if payload_size == 0 {
1932        0
1933    } else {
1934        (payload_size - 1) * 8 + usize::from(final_byte_bits)
1935    };
1936
1937    Ok(OwnedCompressedBlock {
1938        header: CompressedBlockHeader {
1939            flags,
1940            is_last: flags & 0x40 != 0,
1941            has_tables: flags & 0x80 != 0,
1942            final_byte_bits,
1943            payload_size,
1944            payload_bits,
1945        },
1946        payload,
1947    })
1948}
1949
1950impl Default for Unpack50Decoder {
1951    fn default() -> Self {
1952        Self::new()
1953    }
1954}
1955
1956#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1957struct PendingFilter {
1958    start: usize,
1959    length: usize,
1960    filter_type: FilterType,
1961    channels: usize,
1962}
1963
1964#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1965enum FilterType {
1966    Delta,
1967    E8,
1968    E8E9,
1969    Arm,
1970}
1971
1972fn read_filter(bits: &mut BitReader<'_>, current_pos: usize) -> Result<PendingFilter> {
1973    let offset = read_filter_data(bits)? as usize;
1974    let length = read_filter_data(bits)? as usize;
1975    let filter_type = match bits.read_bits(3)? {
1976        0 => FilterType::Delta,
1977        1 => FilterType::E8,
1978        2 => FilterType::E8E9,
1979        3 => FilterType::Arm,
1980        _ => return Err(Error::InvalidData("RAR 5 filter type is unsupported")),
1981    };
1982    let channels = if filter_type == FilterType::Delta {
1983        bits.read_bits(5)? as usize + 1
1984    } else {
1985        0
1986    };
1987    Ok(PendingFilter {
1988        start: current_pos
1989            .checked_add(offset)
1990            .ok_or(Error::InvalidData("RAR 5 filter start overflows"))?,
1991        length,
1992        filter_type,
1993        channels,
1994    })
1995}
1996
1997fn read_filter_data(bits: &mut BitReader<'_>) -> Result<u32> {
1998    let byte_count = bits.read_bits(2)? as usize + 1;
1999    let mut data = 0;
2000    for index in 0..byte_count {
2001        data |= bits.read_bits(8)? << (index * 8);
2002    }
2003    Ok(data)
2004}
2005
2006fn write_filter(writer: &mut BitWriter, filter: EncodeFilter) -> Result<()> {
2007    if filter.offset > u32::MAX as usize {
2008        return Err(Error::InvalidData("RAR 5 filter offset is too large"));
2009    }
2010    if filter.length > u32::MAX as usize {
2011        return Err(Error::InvalidData("RAR 5 filter length is too large"));
2012    }
2013    write_filter_data(writer, filter.offset as u32);
2014    write_filter_data(writer, filter.length as u32);
2015    match filter.filter_type {
2016        FilterType::Delta => {
2017            if filter.channels == 0 || filter.channels > 32 {
2018                return Err(Error::InvalidData(
2019                    "RAR 5 DELTA filter channel count is invalid",
2020                ));
2021            }
2022            writer.write_bits(0, 3);
2023            writer.write_bits(filter.channels - 1, 5);
2024        }
2025        FilterType::E8 => writer.write_bits(1, 3),
2026        FilterType::E8E9 => writer.write_bits(2, 3),
2027        FilterType::Arm => writer.write_bits(3, 3),
2028    }
2029    Ok(())
2030}
2031
2032fn write_filter_data(writer: &mut BitWriter, value: u32) {
2033    let byte_count = if value <= 0xff {
2034        1
2035    } else if value <= 0xffff {
2036        2
2037    } else if value <= 0x00ff_ffff {
2038        3
2039    } else {
2040        4
2041    };
2042    writer.write_bits(byte_count - 1, 2);
2043    for index in 0..byte_count {
2044        writer.write_bits(((value >> (index * 8)) & 0xff) as usize, 8);
2045    }
2046}
2047
2048fn apply_filters(output: &mut [u8], filters: &[PendingFilter]) -> Result<()> {
2049    for filter in filters {
2050        let end = filter
2051            .start
2052            .checked_add(filter.length)
2053            .ok_or(Error::InvalidData("RAR 5 filter range overflows"))?;
2054        let data = output
2055            .get_mut(filter.start..end)
2056            .ok_or(Error::InvalidData("RAR 5 filter range exceeds output"))?;
2057        match filter.filter_type {
2058            FilterType::Delta => {
2059                let decoded = filters::delta_decode(data, filter.channels, rar50_delta_messages())?;
2060                data.copy_from_slice(&decoded);
2061            }
2062            FilterType::E8 => e8e9_decode(data, filter.start as u32, false),
2063            FilterType::E8E9 => e8e9_decode(data, filter.start as u32, true),
2064            FilterType::Arm => arm_decode(data, filter.start as u32),
2065        }
2066    }
2067    Ok(())
2068}
2069
2070fn rar50_delta_messages() -> DeltaErrorMessages {
2071    DeltaErrorMessages {
2072        invalid_channels: "RAR 5 DELTA filter channel count is invalid",
2073        zero_channels: "RAR 5 DELTA filter has zero channels",
2074        truncated_source: "RAR 5 DELTA filter source is truncated",
2075    }
2076}
2077
2078fn e8e9_decode(data: &mut [u8], file_offset: u32, include_e9: bool) {
2079    if data.len() <= 4 {
2080        return;
2081    }
2082    let cmp_mask = if include_e9 { 0xfe } else { 0xff };
2083    let opcode_limit = data.len() - 4;
2084    let mut opcode_pos = 0usize;
2085    while let Some(pos) = crate::fast::next_x86_opcode(data, opcode_pos, opcode_limit, cmp_mask) {
2086        let cur_pos = pos + 1;
2087        let offset = file_offset.wrapping_add(cur_pos as u32) % X86_FILTER_FILE_SIZE;
2088        let addr = u32::from_le_bytes([
2089            data[cur_pos],
2090            data[cur_pos + 1],
2091            data[cur_pos + 2],
2092            data[cur_pos + 3],
2093        ]);
2094        let new_addr = if addr & 0x8000_0000 != 0 {
2095            (addr.wrapping_add(offset) & 0x8000_0000 == 0)
2096                .then(|| addr.wrapping_add(X86_FILTER_FILE_SIZE))
2097        } else {
2098            (addr.wrapping_sub(X86_FILTER_FILE_SIZE) & 0x8000_0000 != 0)
2099                .then(|| addr.wrapping_sub(offset))
2100        };
2101        if let Some(value) = new_addr {
2102            data[cur_pos..cur_pos + 4].copy_from_slice(&value.to_le_bytes());
2103        }
2104        opcode_pos = pos + 5;
2105    }
2106}
2107
2108fn e8e9_encode(data: &mut [u8], file_offset: u32, include_e9: bool) {
2109    if data.len() <= 4 {
2110        return;
2111    }
2112    let cmp_mask = if include_e9 { 0xfe } else { 0xff };
2113    let opcode_limit = data.len() - 4;
2114    let mut opcode_pos = 0usize;
2115    while let Some(pos) = crate::fast::next_x86_opcode(data, opcode_pos, opcode_limit, cmp_mask) {
2116        let cur_pos = pos + 1;
2117        let offset = file_offset.wrapping_add(cur_pos as u32) % X86_FILTER_FILE_SIZE;
2118        let addr = u32::from_le_bytes([
2119            data[cur_pos],
2120            data[cur_pos + 1],
2121            data[cur_pos + 2],
2122            data[cur_pos + 3],
2123        ]);
2124        let candidate = addr.wrapping_add(offset);
2125        let new_addr = if candidate < X86_FILTER_FILE_SIZE {
2126            Some(candidate)
2127        } else {
2128            let candidate = addr.wrapping_sub(X86_FILTER_FILE_SIZE);
2129            (candidate & 0x8000_0000 != 0 && candidate.wrapping_add(offset) & 0x8000_0000 == 0)
2130                .then_some(candidate)
2131        };
2132        if let Some(value) = new_addr {
2133            data[cur_pos..cur_pos + 4].copy_from_slice(&value.to_le_bytes());
2134        }
2135        opcode_pos = pos + 5;
2136    }
2137}
2138
2139const X86_FILTER_FILE_SIZE: u32 = 0x0100_0000;
2140
2141fn arm_decode(data: &mut [u8], file_offset: u32) {
2142    let mut pos = 0usize;
2143    while pos + 3 < data.len() {
2144        if data[pos + 3] == 0xeb {
2145            let mut offset = u32::from(data[pos])
2146                | (u32::from(data[pos + 1]) << 8)
2147                | (u32::from(data[pos + 2]) << 16);
2148            offset = offset.wrapping_sub(file_offset.wrapping_add(pos as u32) / 4);
2149            data[pos] = offset as u8;
2150            data[pos + 1] = (offset >> 8) as u8;
2151            data[pos + 2] = (offset >> 16) as u8;
2152        }
2153        pos += 4;
2154    }
2155}
2156
2157fn arm_encode(data: &mut [u8], file_offset: u32) {
2158    let mut pos = 0usize;
2159    while pos + 3 < data.len() {
2160        if data[pos + 3] == 0xeb {
2161            let mut offset = u32::from(data[pos])
2162                | (u32::from(data[pos + 1]) << 8)
2163                | (u32::from(data[pos + 2]) << 16);
2164            offset = offset.wrapping_add(file_offset.wrapping_add(pos as u32) / 4);
2165            data[pos] = offset as u8;
2166            data[pos + 1] = (offset >> 8) as u8;
2167            data[pos + 2] = (offset >> 16) as u8;
2168        }
2169        pos += 4;
2170    }
2171}
2172
2173fn length_slot_extra_bits(slot: usize) -> Result<u8> {
2174    if slot < 8 {
2175        Ok(0)
2176    } else {
2177        let bit_count = (slot >> 2) - 1;
2178        if bit_count > 24 {
2179            Err(Error::InvalidData("RAR 5 length slot is too large"))
2180        } else {
2181            Ok(bit_count as u8)
2182        }
2183    }
2184}
2185
2186fn length_bonus(distance: usize) -> usize {
2187    usize::from(distance > 0x100) + usize::from(distance > 0x2000) + usize::from(distance > 0x40000)
2188}
2189
2190pub fn slot_to_length(slot: usize, extra_bits: u32) -> Result<usize> {
2191    if slot < 8 {
2192        return Ok(slot + 2);
2193    }
2194    let bit_count = (slot >> 2) - 1;
2195    if bit_count > 24 {
2196        return Err(Error::InvalidData("RAR 5 length slot is too large"));
2197    }
2198    let max_extra = if bit_count == 32 {
2199        u32::MAX
2200    } else {
2201        (1u32 << bit_count) - 1
2202    };
2203    if extra_bits > max_extra {
2204        return Err(Error::InvalidData("RAR 5 length extra bits exceed slot"));
2205    }
2206    Ok((((4 | (slot & 3)) << bit_count) | extra_bits as usize) + 2)
2207}
2208
2209pub fn distance_slot_bit_count(slot: usize) -> Result<usize> {
2210    if slot < 4 {
2211        Ok(0)
2212    } else {
2213        let bit_count = (slot - 2) >> 1;
2214        if bit_count > 31 {
2215            Err(Error::InvalidData("RAR 5 distance slot is too large"))
2216        } else {
2217            Ok(bit_count)
2218        }
2219    }
2220}
2221
2222pub fn slot_to_distance(slot: usize, extra_bits: u32) -> Result<usize> {
2223    if slot < 4 {
2224        return Ok(slot + 1);
2225    }
2226    let bit_count = distance_slot_bit_count(slot)?;
2227    let max_extra = if bit_count == 32 {
2228        u32::MAX
2229    } else {
2230        (1u32 << bit_count) - 1
2231    };
2232    if extra_bits > max_extra {
2233        return Err(Error::InvalidData("RAR 5 distance extra bits exceed slot"));
2234    }
2235    Ok((((2 | (slot & 1)) << bit_count) | extra_bits as usize) + 1)
2236}
2237
2238#[derive(Debug, Clone)]
2239pub struct HuffmanTable {
2240    symbols: Vec<HuffmanSymbol>,
2241    first_code: [u16; 16],
2242    first_index: [usize; 16],
2243    counts: [u16; 16],
2244}
2245
2246#[derive(Debug, Clone)]
2247struct HuffmanSymbol {
2248    code: u16,
2249    len: u8,
2250    symbol: usize,
2251}
2252
2253impl HuffmanTable {
2254    pub fn from_lengths(lengths: &[u8]) -> Result<Self> {
2255        let mut count = [0u16; 16];
2256        for &length in lengths {
2257            if length > 15 {
2258                return Err(Error::InvalidData("RAR 5 Huffman length is too large"));
2259            }
2260            if length != 0 {
2261                count[length as usize] += 1;
2262            }
2263        }
2264        validate_huffman_counts(&count)?;
2265
2266        let mut first_code = [0u16; 16];
2267        let mut next_code = [0u16; 16];
2268        let mut code = 0u16;
2269        for length in 1..=15 {
2270            code = (code + count[length - 1]) << 1;
2271            first_code[length] = code;
2272            next_code[length] = code;
2273        }
2274
2275        let mut first_index = [0usize; 16];
2276        let mut index = 0usize;
2277        for length in 1..=15 {
2278            first_index[length] = index;
2279            index += usize::from(count[length]);
2280        }
2281
2282        let mut symbols = Vec::new();
2283        for (symbol, &length) in lengths.iter().enumerate() {
2284            if length == 0 {
2285                continue;
2286            }
2287            let code = next_code[length as usize];
2288            next_code[length as usize] += 1;
2289            symbols.push(HuffmanSymbol {
2290                code,
2291                len: length,
2292                symbol,
2293            });
2294        }
2295        symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
2296        Ok(Self {
2297            symbols,
2298            first_code,
2299            first_index,
2300            counts: count,
2301        })
2302    }
2303
2304    pub fn is_empty(&self) -> bool {
2305        self.symbols.is_empty()
2306    }
2307
2308    fn decode(&self, bits: &mut BitReader<'_>) -> Result<usize> {
2309        if self.symbols.is_empty() {
2310            return Err(Error::InvalidData("RAR 5 empty Huffman table"));
2311        }
2312        let mut code = 0u16;
2313        for len in 1..=15 {
2314            code = (code << 1) | bits.read_bits(1)? as u16;
2315            let count = self.counts[len];
2316            if count != 0 {
2317                let first = self.first_code[len];
2318                let offset = code.wrapping_sub(first);
2319                if offset < count {
2320                    let index = self.first_index[len] + usize::from(offset);
2321                    return Ok(self.symbols[index].symbol);
2322                }
2323            }
2324        }
2325        Err(Error::InvalidData("RAR 5 invalid Huffman code"))
2326    }
2327
2328    fn code_for_symbol(&self, symbol: usize) -> Result<(u16, u8)> {
2329        self.symbols
2330            .iter()
2331            .find(|item| item.symbol == symbol)
2332            .map(|item| (item.code, item.len))
2333            .ok_or(Error::InvalidData("RAR 5 missing Huffman symbol"))
2334    }
2335}
2336
2337struct BitReader<'a> {
2338    input: &'a [u8],
2339    bit_pos: usize,
2340}
2341
2342impl<'a> BitReader<'a> {
2343    fn new(input: &'a [u8]) -> Self {
2344        Self { input, bit_pos: 0 }
2345    }
2346
2347    fn read_bits(&mut self, count: u8) -> Result<u32> {
2348        if count > 32 {
2349            return Err(Error::InvalidData("RAR 5 bit read is too wide"));
2350        }
2351        let end = self
2352            .bit_pos
2353            .checked_add(usize::from(count))
2354            .ok_or(Error::NeedMoreInput)?;
2355        if end > self.input.len() * 8 {
2356            return Err(Error::NeedMoreInput);
2357        }
2358
2359        let mut value = 0u32;
2360        let mut remaining = usize::from(count);
2361        while remaining != 0 {
2362            let byte = self.input[self.bit_pos / 8];
2363            let bit_offset = self.bit_pos % 8;
2364            let available = 8 - bit_offset;
2365            let take = available.min(remaining);
2366            let shift = available - take;
2367            let mask = ((1u16 << take) - 1) as u8;
2368            let chunk = (byte >> shift) & mask;
2369            value = (value << take) | u32::from(chunk);
2370            self.bit_pos += take;
2371            remaining -= take;
2372        }
2373
2374        Ok(value)
2375    }
2376}
2377
2378struct BitWriter {
2379    bytes: Vec<u8>,
2380    bit_pos: usize,
2381}
2382
2383impl BitWriter {
2384    fn new() -> Self {
2385        Self {
2386            bytes: Vec::new(),
2387            bit_pos: 0,
2388        }
2389    }
2390
2391    fn write_bits(&mut self, value: usize, count: usize) {
2392        for bit in (0..count).rev() {
2393            if self.bit_pos.is_multiple_of(8) {
2394                self.bytes.push(0);
2395            }
2396            if (value >> bit) & 1 != 0 {
2397                let byte = self.bytes.last_mut().unwrap();
2398                *byte |= 1 << (7 - (self.bit_pos % 8));
2399            }
2400            self.bit_pos += 1;
2401        }
2402    }
2403
2404    fn finish(self) -> Vec<u8> {
2405        self.bytes
2406    }
2407}
2408
2409fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
2410    let mut available = 1i32;
2411    for &len_count in count.iter().skip(1) {
2412        available = (available << 1) - i32::from(len_count);
2413        if available < 0 {
2414            return Err(Error::InvalidData("RAR 5 oversubscribed Huffman table"));
2415        }
2416    }
2417    Ok(())
2418}
2419
2420#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2421struct LevelToken {
2422    symbol: usize,
2423    extra_bits: u8,
2424    extra_value: u8,
2425}
2426
2427impl LevelToken {
2428    const fn plain(symbol: usize) -> Self {
2429        Self {
2430            symbol,
2431            extra_bits: 0,
2432            extra_value: 0,
2433        }
2434    }
2435
2436    const fn repeat_previous_short(count: usize) -> Self {
2437        Self {
2438            symbol: 16,
2439            extra_bits: 3,
2440            extra_value: (count - 3) as u8,
2441        }
2442    }
2443
2444    const fn repeat_previous_long(count: usize) -> Self {
2445        Self {
2446            symbol: 17,
2447            extra_bits: 7,
2448            extra_value: (count - 11) as u8,
2449        }
2450    }
2451
2452    const fn zero_run_short(count: usize) -> Self {
2453        Self {
2454            symbol: 18,
2455            extra_bits: 3,
2456            extra_value: (count - 3) as u8,
2457        }
2458    }
2459
2460    const fn zero_run_long(count: usize) -> Self {
2461        Self {
2462            symbol: 19,
2463            extra_bits: 7,
2464            extra_value: (count - 11) as u8,
2465        }
2466    }
2467}
2468
2469fn encode_table_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
2470    let mut tokens = Vec::new();
2471    let mut pos = 0usize;
2472    let mut previous = None;
2473    while pos < lengths.len() {
2474        let value = lengths[pos];
2475        let mut run = 1usize;
2476        while pos + run < lengths.len() && lengths[pos + run] == value {
2477            run += 1;
2478        }
2479
2480        if value == 0 {
2481            emit_zero_level_run(&mut tokens, run);
2482            previous = Some(0);
2483            pos += run;
2484            continue;
2485        }
2486
2487        if previous == Some(value) && run >= 3 {
2488            emit_repeat_level_run(&mut tokens, run);
2489            pos += run;
2490            continue;
2491        }
2492
2493        tokens.push(LevelToken::plain(value as usize));
2494        previous = Some(value);
2495        pos += 1;
2496    }
2497    tokens
2498}
2499
2500fn emit_repeat_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
2501    while run != 0 {
2502        if run >= 11 {
2503            let mut chunk = run.min(138);
2504            if matches!(run - chunk, 1 | 2) && chunk >= 14 {
2505                chunk -= 3;
2506            }
2507            tokens.push(LevelToken::repeat_previous_long(chunk));
2508            run -= chunk;
2509        } else if run >= 3 {
2510            let chunk = run.min(10);
2511            tokens.push(LevelToken::repeat_previous_short(chunk));
2512            run -= chunk;
2513        } else {
2514            break;
2515        }
2516    }
2517}
2518
2519fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
2520    while run != 0 {
2521        if run >= 11 {
2522            let mut chunk = run.min(138);
2523            if matches!(run - chunk, 1 | 2) && chunk >= 14 {
2524                chunk -= 3;
2525            }
2526            tokens.push(LevelToken::zero_run_long(chunk));
2527            run -= chunk;
2528        } else if run >= 3 {
2529            let chunk = run.min(10);
2530            tokens.push(LevelToken::zero_run_short(chunk));
2531            run -= chunk;
2532        } else {
2533            tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
2534            break;
2535        }
2536    }
2537}
2538
2539fn level_code_lengths_for_tokens(tokens: &[LevelToken]) -> [u8; LEVEL_TABLE_SIZE] {
2540    let mut used = [false; LEVEL_TABLE_SIZE];
2541    for token in tokens {
2542        used[token.symbol] = true;
2543    }
2544    let used_count = used.iter().filter(|&&used| used).count();
2545    let len = huffman::bits_for_symbol_count(used_count);
2546    let mut lengths = [0u8; LEVEL_TABLE_SIZE];
2547    for (symbol, is_used) in used.into_iter().enumerate() {
2548        if is_used {
2549            lengths[symbol] = len;
2550        }
2551    }
2552    lengths
2553}
2554
2555fn write_level_lengths(writer: &mut BitWriter, lengths: &[u8; LEVEL_TABLE_SIZE]) {
2556    let mut pos = 0usize;
2557    while pos < LEVEL_TABLE_SIZE {
2558        let length = lengths[pos];
2559        if length == 0 {
2560            let mut count = 1usize;
2561            while pos + count < LEVEL_TABLE_SIZE && lengths[pos + count] == 0 {
2562                count += 1;
2563            }
2564            while count >= 3 {
2565                let chunk = count.min(17);
2566                writer.write_bits(15, 4);
2567                writer.write_bits(chunk - 2, 4);
2568                pos += chunk;
2569                count -= chunk;
2570            }
2571            for _ in 0..count {
2572                writer.write_bits(0, 4);
2573                pos += 1;
2574            }
2575        } else {
2576            writer.write_bits(usize::from(length), 4);
2577            if length == 15 {
2578                writer.write_bits(0, 4);
2579            }
2580            pos += 1;
2581        }
2582    }
2583}
2584
2585#[cfg(test)]
2586mod tests {
2587    use super::*;
2588
2589    fn checksum(flags: u8, size_bytes: &[u8]) -> u8 {
2590        size_bytes
2591            .iter()
2592            .fold(0x5a ^ flags, |acc, &byte| acc ^ byte)
2593    }
2594
2595    #[test]
2596    fn parses_one_byte_size_block_header() {
2597        let flags = 0xc7;
2598        let size = [3];
2599        let input = [flags, checksum(flags, &size), size[0], 0xaa, 0xbb, 0xcc];
2600
2601        let block = parse_compressed_block(&input).unwrap();
2602        assert_eq!(block.header_len, 3);
2603        assert_eq!(block.payload, 3..6);
2604        assert_eq!(block.header.flags, flags);
2605        assert!(block.header.is_last);
2606        assert!(block.header.has_tables);
2607        assert_eq!(block.header.final_byte_bits, 8);
2608        assert_eq!(block.header.payload_size, 3);
2609        assert_eq!(block.header.payload_bits, 24);
2610    }
2611
2612    #[test]
2613    fn parses_three_byte_size_block_header_with_partial_final_byte() {
2614        let flags = 0x94;
2615        let size = [0x34, 0x12, 0x00];
2616        let mut input = vec![flags, checksum(flags, &size), size[0], size[1], size[2]];
2617        input.resize(0x1234 + 5, 0);
2618
2619        let block = parse_compressed_block(&input).unwrap();
2620        assert_eq!(block.header_len, 5);
2621        assert_eq!(block.payload, 5..0x1239);
2622        assert!(!block.header.is_last);
2623        assert!(block.header.has_tables);
2624        assert_eq!(block.header.final_byte_bits, 5);
2625        assert_eq!(block.header.payload_size, 0x1234);
2626        assert_eq!(block.header.payload_bits, (0x1234 - 1) * 8 + 5);
2627    }
2628
2629    #[test]
2630    fn rejects_reserved_size_length_selector() {
2631        let input = [0x18, 0x42, 0x00];
2632
2633        assert_eq!(
2634            parse_compressed_block(&input),
2635            Err(Error::InvalidData("RAR 5 block size length is invalid"))
2636        );
2637    }
2638
2639    #[test]
2640    fn rejects_bad_block_header_checksum() {
2641        let input = [0xc7, 0x00, 0x03, 0xaa, 0xbb, 0xcc];
2642
2643        assert_eq!(
2644            parse_compressed_block(&input),
2645            Err(Error::InvalidData("RAR 5 block header checksum mismatch"))
2646        );
2647    }
2648
2649    #[test]
2650    fn rejects_truncated_block_payload() {
2651        let flags = 0xc7;
2652        let size = [3];
2653        let input = [flags, checksum(flags, &size), size[0], 0xaa, 0xbb];
2654
2655        assert_eq!(parse_compressed_block(&input), Err(Error::NeedMoreInput));
2656    }
2657
2658    #[test]
2659    fn reads_level_lengths_with_literal_fifteen() {
2660        let mut nibbles = vec![1, 2, 15, 0, 3, 4];
2661        nibbles.resize(LEVEL_TABLE_SIZE + 1, 0);
2662
2663        let (lengths, bits) = read_level_lengths(&pack_nibbles(&nibbles)).unwrap();
2664
2665        assert_eq!(&lengths[..6], &[1, 2, 15, 3, 4, 0]);
2666        assert_eq!(bits, LEVEL_TABLE_SIZE * 4 + 4);
2667    }
2668
2669    #[test]
2670    fn reads_level_lengths_with_zero_run_at_current_position() {
2671        let mut nibbles = vec![7, 15, 3, 2];
2672        nibbles.resize(LEVEL_TABLE_SIZE - 3, 0);
2673
2674        let (lengths, bits) = read_level_lengths(&pack_nibbles(&nibbles)).unwrap();
2675
2676        assert_eq!(lengths[0], 7);
2677        assert_eq!(&lengths[1..6], &[0, 0, 0, 0, 0]);
2678        assert_eq!(lengths[6], 2);
2679        assert_eq!(bits, (LEVEL_TABLE_SIZE - 3) * 4);
2680    }
2681
2682    fn pack_nibbles(nibbles: &[u8]) -> Vec<u8> {
2683        nibbles
2684            .chunks(2)
2685            .map(|chunk| {
2686                let high = chunk[0] & 0x0f;
2687                let low = chunk.get(1).copied().unwrap_or(0) & 0x0f;
2688                (high << 4) | low
2689            })
2690            .collect()
2691    }
2692
2693    #[test]
2694    fn reads_rar50_second_level_table_lengths() {
2695        let mut writer = BitWriter::new();
2696        for _ in 0..LEVEL_TABLE_SIZE {
2697            writer.write_bits(5, 4);
2698        }
2699        for count in [138, 138, 138, 16] {
2700            writer.write_bits(19, 5);
2701            writer.write_bits(count - 11, 7);
2702        }
2703        let input = writer.finish();
2704
2705        let (lengths, bits) = read_table_lengths(&input, 0).unwrap();
2706
2707        assert_eq!(lengths.main.len(), MAIN_TABLE_SIZE);
2708        assert_eq!(lengths.distance.len(), DISTANCE_TABLE_SIZE_50);
2709        assert_eq!(lengths.align.len(), ALIGN_TABLE_SIZE);
2710        assert_eq!(lengths.length.len(), LENGTH_TABLE_SIZE);
2711        assert!(lengths.main.iter().all(|&length| length == 0));
2712        assert!(lengths.distance.iter().all(|&length| length == 0));
2713        assert!(lengths.align.iter().all(|&length| length == 0));
2714        assert!(lengths.length.iter().all(|&length| length == 0));
2715        assert_eq!(bits, LEVEL_TABLE_SIZE * 4 + 4 * (5 + 7));
2716    }
2717
2718    #[test]
2719    fn reads_rar70_table_length_count() {
2720        assert_eq!(
2721            table_length_count(1).unwrap(),
2722            MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_70 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE
2723        );
2724    }
2725
2726    #[test]
2727    fn encoded_table_lengths_round_trip_with_bit_count() {
2728        let mut lengths = TableLengths {
2729            main: vec![0; MAIN_TABLE_SIZE],
2730            distance: vec![0; DISTANCE_TABLE_SIZE_50],
2731            align: vec![0; ALIGN_TABLE_SIZE],
2732            length: vec![0; LENGTH_TABLE_SIZE],
2733        };
2734        lengths.main[b'A' as usize] = 1;
2735        lengths.main[b'B' as usize] = 3;
2736        lengths.main[262] = 3;
2737        lengths.distance[1] = 1;
2738        lengths.align[0] = 4;
2739        lengths.length[0] = 1;
2740
2741        let (encoded, bit_count) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
2742        let (decoded, decoded_bits) = read_table_lengths(&encoded, 0).unwrap();
2743
2744        assert_eq!(decoded, lengths);
2745        assert_eq!(decoded_bits, bit_count);
2746    }
2747
2748    #[test]
2749    fn table_level_encoder_uses_rar5_run_symbols() {
2750        let mut lengths =
2751            vec![
2752                0u8;
2753                MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_50 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE
2754            ];
2755        lengths[..4].fill(6);
2756        lengths[8..21].fill(0);
2757
2758        let tokens = encode_table_level_tokens(&lengths);
2759
2760        assert!(tokens.contains(&LevelToken::repeat_previous_short(3)));
2761        assert!(tokens.iter().any(|token| token.symbol == 19));
2762    }
2763
2764    #[test]
2765    fn encoded_compressed_block_round_trips_header_fields() {
2766        let payload = [0xaa, 0xbb, 0xc0];
2767        let block = encode_compressed_block(&payload, 18, true, true).unwrap();
2768
2769        let parsed = parse_compressed_block(&block).unwrap();
2770
2771        assert_eq!(parsed.payload, 3..6);
2772        assert!(parsed.header.has_tables);
2773        assert!(parsed.header.is_last);
2774        assert_eq!(parsed.header.final_byte_bits, 2);
2775        assert_eq!(parsed.header.payload_bits, 18);
2776        assert_eq!(&block[parsed.payload], payload);
2777    }
2778
2779    #[test]
2780    fn rejects_table_repeat_without_previous_length() {
2781        let mut writer = BitWriter::new();
2782        for _ in 0..LEVEL_TABLE_SIZE {
2783            writer.write_bits(5, 4);
2784        }
2785        writer.write_bits(16, 5);
2786        writer.write_bits(0, 3);
2787
2788        assert_eq!(
2789            read_table_lengths(&writer.finish(), 0),
2790            Err(Error::InvalidData(
2791                "RAR 5 table repeats missing previous length"
2792            ))
2793        );
2794    }
2795
2796    #[test]
2797    fn rejects_invalid_encoded_block_bit_counts() {
2798        assert_eq!(
2799            encode_compressed_block(&[0], 0, true, true),
2800            Err(Error::InvalidData("RAR 5 block has unused payload bytes"))
2801        );
2802        assert_eq!(
2803            encode_compressed_block(&[], 1, true, true),
2804            Err(Error::InvalidData("RAR 5 block bit count exceeds payload"))
2805        );
2806    }
2807
2808    #[test]
2809    fn builds_named_decode_tables_from_lengths() {
2810        let lengths = TableLengths {
2811            main: vec![1, 1],
2812            distance: vec![1, 1],
2813            align: vec![4; ALIGN_TABLE_SIZE],
2814            length: vec![1, 1],
2815        };
2816
2817        let tables = DecodeTables::from_lengths(&lengths).unwrap();
2818
2819        assert!(!tables.main.is_empty());
2820        assert!(!tables.distance.is_empty());
2821        assert!(!tables.align.is_empty());
2822        assert!(!tables.length.is_empty());
2823        assert!(!tables.align_mode);
2824    }
2825
2826    #[test]
2827    fn rejects_oversubscribed_rar50_huffman_tables() {
2828        assert!(matches!(
2829            HuffmanTable::from_lengths(&[1, 1, 1]),
2830            Err(Error::InvalidData("RAR 5 oversubscribed Huffman table"))
2831        ));
2832    }
2833
2834    #[test]
2835    fn detects_rar50_align_mode_when_align_lengths_are_not_uniform_four() {
2836        let mut align = vec![4; ALIGN_TABLE_SIZE];
2837        align[0] = 0;
2838        align[3] = 3;
2839        let lengths = TableLengths {
2840            main: vec![1, 1],
2841            distance: vec![1, 1],
2842            align,
2843            length: vec![1, 1],
2844        };
2845
2846        let tables = DecodeTables::from_lengths(&lengths).unwrap();
2847
2848        assert!(tables.align_mode);
2849    }
2850
2851    #[test]
2852    fn decodes_synthetic_literal_only_block() {
2853        let payload = literal_only_payload(b"ABBA");
2854        let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
2855
2856        let output = decode_literal_only(&input, 0, 4).unwrap();
2857
2858        assert_eq!(output, b"ABBA");
2859    }
2860
2861    #[test]
2862    fn encodes_literal_only_member_that_decoder_reads() {
2863        let data = b"literal-only RAR5 codec stream\nwith repeated words words words";
2864        let input = encode_literal_only(data, 0).unwrap();
2865
2866        let output = decode_literal_only(&input, 0, data.len()).unwrap();
2867
2868        assert_eq!(output, data);
2869    }
2870
2871    #[test]
2872    fn encodes_literal_only_rar70_table_shape_that_decoder_reads() {
2873        let data = b"small RAR7-compatible literal block";
2874        let input = encode_literal_only(data, 1).unwrap();
2875
2876        let output = decode_literal_only(&input, 1, data.len()).unwrap();
2877
2878        assert_eq!(output, data);
2879    }
2880
2881    #[test]
2882    fn encodes_empty_literal_only_member() {
2883        let input = encode_literal_only(b"", 0).unwrap();
2884
2885        let output = decode_literal_only(&input, 0, 0).unwrap();
2886
2887        assert!(output.is_empty());
2888    }
2889
2890    #[test]
2891    fn encodes_lz_member_with_same_member_matches() {
2892        let data = b"RAR5 match writer phrase. RAR5 match writer phrase. RAR5 match writer phrase.";
2893        let lz = encode_lz_member(data, 0).unwrap();
2894        let literal = encode_literal_only(data, 0).unwrap();
2895
2896        let output = decode_lz(&lz, 0, data.len()).unwrap();
2897
2898        assert_eq!(output, data);
2899        assert!(lz.len() < literal.len());
2900        assert!(
2901            encode_tokens(data, &[], EncodeOptions::default(), DISTANCE_TABLE_SIZE_50)
2902                .iter()
2903                .any(|token| matches!(token, EncodeToken::Match { .. }))
2904        );
2905    }
2906
2907    #[test]
2908    fn frequency_weighted_huffman_lengths_shorten_common_symbols() {
2909        let mut frequencies = vec![1usize; 24];
2910        frequencies[3] = 1024;
2911
2912        let lengths = huffman::lengths_for_frequencies(&frequencies, 15);
2913
2914        assert!(lengths[3] < lengths[0]);
2915        assert!(lengths.iter().all(|&length| length <= 15));
2916    }
2917
2918    #[test]
2919    fn lz_encoder_uses_frequency_weighted_huffman_lengths() {
2920        let mut data = vec![b'a'; 200];
2921        data.extend_from_slice(b"bcdefghijklmnopqrstuvwxyz");
2922        let input = encode_lz_member_with_options(&data, 0, EncodeOptions::new(0)).unwrap();
2923        let block = parse_compressed_block(&input).unwrap();
2924        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
2925
2926        let output = decode_lz(&input, 0, data.len()).unwrap();
2927
2928        assert_eq!(output, data);
2929        assert!(lengths.main[b'a' as usize] < lengths.main[b'z' as usize]);
2930    }
2931
2932    #[test]
2933    fn lazy_lz_parser_defers_short_match_for_longer_next_match() {
2934        let input = b"abcdXbcdYYYYYYYYYYYYabcdYYYYYYYYYYYY";
2935        let greedy = encode_tokens(
2936            input,
2937            &[],
2938            EncodeOptions::new(MAX_MATCH_CANDIDATES),
2939            DISTANCE_TABLE_SIZE_50,
2940        );
2941        let lazy = encode_tokens(
2942            input,
2943            &[],
2944            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
2945            DISTANCE_TABLE_SIZE_50,
2946        );
2947        let packed = encode_lz_member_with_options(
2948            input,
2949            0,
2950            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
2951        )
2952        .unwrap();
2953
2954        assert!(greedy
2955            .iter()
2956            .any(|token| matches!(token, EncodeToken::Match { length: 4, .. })));
2957        assert!(lazy
2958            .iter()
2959            .any(|token| matches!(token, EncodeToken::Match { length, .. } if *length > 8)));
2960        assert_eq!(decode_lz(&packed, 0, input.len()).unwrap(), input);
2961    }
2962
2963    #[test]
2964    fn cost_aware_match_selection_prefers_repeat_distance_token() {
2965        let pos = 64;
2966        let pattern = b"abcdefgh";
2967        let mut input: Vec<u8> = (0..96u8).map(|byte| byte.wrapping_mul(37)).collect();
2968        input[pos - 30..pos - 22].copy_from_slice(pattern);
2969        input[pos - 10..pos - 2].copy_from_slice(pattern);
2970        input[pos..pos + 8].copy_from_slice(pattern);
2971        input[pos + 8] = b'X';
2972
2973        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
2974        for candidate in 0..pos {
2975            insert_match_position(&input, candidate, &mut buckets);
2976        }
2977        let state = EncoderMatchState {
2978            reps: [30, 0, 0, 0],
2979            last_length: 8,
2980        };
2981
2982        let best = best_match(
2983            &input,
2984            pos,
2985            input.len(),
2986            &buckets,
2987            EncodeOptions::default(),
2988            &state,
2989            DISTANCE_TABLE_SIZE_50,
2990        )
2991        .unwrap();
2992
2993        assert_eq!((best.length, best.distance), (8, 30));
2994    }
2995
2996    #[test]
2997    fn lazy_parser_uses_match_cost_not_only_match_length() {
2998        let pos = 600;
2999        let mut input: Vec<u8> = (0..700u16)
3000            .map(|value| value.wrapping_mul(73) as u8)
3001            .collect();
3002        input[pos - 512..pos - 504].copy_from_slice(b"ABCDEFGH");
3003        input[pos - 504] = b'Z';
3004        input[pos - 29..pos - 21].copy_from_slice(b"BCDEFGHI");
3005        input[pos - 30] = b'x';
3006        input[pos..pos + 9].copy_from_slice(b"ABCDEFGHI");
3007
3008        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3009        for candidate in 0..pos {
3010            insert_match_position(&input, candidate, &mut buckets);
3011        }
3012        let state = EncoderMatchState {
3013            reps: [30, 0, 0, 0],
3014            last_length: 8,
3015        };
3016        let current = best_match(
3017            &input,
3018            pos,
3019            input.len(),
3020            &buckets,
3021            EncodeOptions::default(),
3022            &state,
3023            DISTANCE_TABLE_SIZE_50,
3024        )
3025        .unwrap();
3026
3027        assert_eq!((current.length, current.distance), (8, 512));
3028        assert!(should_lazy_emit_literal(
3029            &input,
3030            pos,
3031            &buckets,
3032            EncodeOptions::default().with_lazy_matching(true),
3033            &state,
3034            DISTANCE_TABLE_SIZE_50,
3035            current,
3036        ));
3037    }
3038
3039    #[test]
3040    fn lazy_parser_uses_bounded_cost_lookahead() {
3041        let pos = 160;
3042        let mut input: Vec<u8> = (0..240u16)
3043            .map(|value| value.wrapping_mul(91) as u8)
3044            .collect();
3045        input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3046        input[pos - 80..pos - 70].copy_from_slice(b"CDEFGHIJKL");
3047        input[pos..pos + 12].copy_from_slice(b"ABCDEFGHIJKL");
3048
3049        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3050        for candidate in 0..pos {
3051            insert_match_position(&input, candidate, &mut buckets);
3052        }
3053        let state = EncoderMatchState::default();
3054        let current = best_match(
3055            &input,
3056            pos,
3057            input.len(),
3058            &buckets,
3059            EncodeOptions::default(),
3060            &state,
3061            DISTANCE_TABLE_SIZE_50,
3062        )
3063        .unwrap();
3064
3065        assert_eq!((current.length, current.distance), (8, 30));
3066        assert!(!should_lazy_emit_literal(
3067            &input,
3068            pos,
3069            &buckets,
3070            EncodeOptions::default()
3071                .with_lazy_matching(true)
3072                .with_lazy_lookahead(1),
3073            &state,
3074            DISTANCE_TABLE_SIZE_50,
3075            current,
3076        ));
3077        assert!(should_lazy_emit_literal(
3078            &input,
3079            pos,
3080            &buckets,
3081            EncodeOptions::default()
3082                .with_lazy_matching(true)
3083                .with_lazy_lookahead(2),
3084            &state,
3085            DISTANCE_TABLE_SIZE_50,
3086            current,
3087        ));
3088    }
3089
3090    #[test]
3091    fn lazy_parser_charges_for_skipped_literals() {
3092        let pos = 160;
3093        let mut input: Vec<u8> = (0..240u16)
3094            .map(|value| value.wrapping_mul(91) as u8)
3095            .collect();
3096        input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3097        input[pos - 80..pos - 71].copy_from_slice(b"CDEFGHIJK");
3098        input[pos..pos + 12].copy_from_slice(b"ABCDEFGHIJKL");
3099
3100        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3101        for candidate in 0..pos {
3102            insert_match_position(&input, candidate, &mut buckets);
3103        }
3104        let state = EncoderMatchState::default();
3105        let current = best_match(
3106            &input,
3107            pos,
3108            input.len(),
3109            &buckets,
3110            EncodeOptions::default(),
3111            &state,
3112            DISTANCE_TABLE_SIZE_50,
3113        )
3114        .unwrap();
3115
3116        let next = best_match(
3117            &input,
3118            pos + 2,
3119            input.len(),
3120            &buckets,
3121            EncodeOptions::default(),
3122            &state,
3123            DISTANCE_TABLE_SIZE_50,
3124        )
3125        .unwrap();
3126
3127        assert!(next.score > current.score);
3128        assert!(next.score <= current.score + 16);
3129        assert!(!should_lazy_emit_literal(
3130            &input,
3131            pos,
3132            &buckets,
3133            EncodeOptions::default()
3134                .with_lazy_matching(true)
3135                .with_lazy_lookahead(2),
3136            &state,
3137            DISTANCE_TABLE_SIZE_50,
3138            current,
3139        ));
3140    }
3141
3142    fn encode_lz_member_with_filter(data: &[u8], kind: Rar50FilterKind) -> Result<Vec<u8>> {
3143        Unpack50Encoder::new().encode_member_with_filter(data, 0, Rar50FilterSpec::new(kind))
3144    }
3145
3146    #[test]
3147    fn encodes_lz_member_with_delta_filter_record() {
3148        let data: Vec<u8> = (0..96).map(|index| (index * 7 + index / 3) as u8).collect();
3149        let input =
3150            encode_lz_member_with_filter(&data, Rar50FilterKind::Delta { channels: 3 }).unwrap();
3151        let block = parse_compressed_block(&input).unwrap();
3152        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3153
3154        let output = decode_lz(&input, 0, data.len()).unwrap();
3155
3156        assert_eq!(output, data);
3157        assert_ne!(lengths.main[256], 0);
3158    }
3159
3160    #[test]
3161    fn rejects_invalid_delta_filter_channel_count() {
3162        assert_eq!(
3163            encode_lz_member_with_filter(b"abc", Rar50FilterKind::Delta { channels: 0 }),
3164            Err(Error::InvalidData(
3165                "RAR 5 DELTA filter channel count is invalid"
3166            ))
3167        );
3168        assert_eq!(
3169            encode_lz_member_with_filter(b"abc", Rar50FilterKind::Delta { channels: 33 }),
3170            Err(Error::InvalidData(
3171                "RAR 5 DELTA filter channel count is invalid"
3172            ))
3173        );
3174    }
3175
3176    #[test]
3177    fn encodes_lz_member_with_e8_filter_record() {
3178        let mut data = b"\xe8\0\0\0\0plain text after call".to_vec();
3179        data.extend_from_slice(&[0xe8, 3, 0, 0, 0, b'X']);
3180        let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8).unwrap();
3181        let block = parse_compressed_block(&input).unwrap();
3182        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3183
3184        let output = decode_lz(&input, 0, data.len()).unwrap();
3185
3186        assert_eq!(output, data);
3187        assert_ne!(lengths.main[256], 0);
3188    }
3189
3190    #[test]
3191    fn rar50_e8_filter_wraps_file_offset_modulo_16m() {
3192        let file_offset = 0x0110_0000;
3193        let mut encoded = vec![0xe8];
3194        encoded.extend_from_slice(&0x0010_0c08u32.to_le_bytes());
3195
3196        let mut decoded = encoded.clone();
3197        e8e9_decode(&mut decoded, file_offset, false);
3198
3199        assert_eq!(&decoded[1..5], &0x0000_0c07u32.to_le_bytes());
3200        e8e9_encode(&mut decoded, file_offset, false);
3201        assert_eq!(decoded, encoded);
3202    }
3203
3204    #[test]
3205    fn streaming_decode_reports_filtered_member_with_typed_sentinel() {
3206        let data = b"\xe8\0\0\0\0plain text after call".to_vec();
3207        let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8).unwrap();
3208        let mut reader = input.as_slice();
3209        let mut decoder = Unpack50Decoder::new();
3210
3211        let error = decoder
3212            .decode_member_from_reader_with_dictionary_to_sink(
3213                &mut reader,
3214                0,
3215                data.len(),
3216                128 * 1024,
3217                false,
3218                |_chunk| Ok::<_, std::convert::Infallible>(()),
3219            )
3220            .unwrap_err();
3221
3222        assert!(matches!(error, StreamDecodeError::FilteredMember));
3223    }
3224
3225    #[test]
3226    fn encodes_lz_member_with_e8e9_filter_record() {
3227        let data = b"\xe9\0\0\0\0jump target through e9".to_vec();
3228        let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8E9).unwrap();
3229        let block = parse_compressed_block(&input).unwrap();
3230        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3231
3232        let output = decode_lz(&input, 0, data.len()).unwrap();
3233
3234        assert_eq!(output, data);
3235        assert_ne!(lengths.main[256], 0);
3236    }
3237
3238    #[test]
3239    fn encodes_lz_member_with_ranged_e8e9_filter_record() {
3240        let mut data = b"\xe8\0\0\0\0plain prefix outside filter range".to_vec();
3241        let range_start = data.len();
3242        for _ in 0..16 {
3243            let operand_pos = data.len() + 1;
3244            data.push(0xe8);
3245            let relative = 0x7000u32.wrapping_sub(operand_pos as u32);
3246            data.extend_from_slice(&relative.to_le_bytes());
3247            data.extend_from_slice(b" code ");
3248        }
3249        let range = range_start..data.len();
3250        data.extend_from_slice(b"\xe9\0\0\0\0plain suffix outside filter range");
3251
3252        let input = Unpack50Encoder::new()
3253            .encode_member_with_filter(
3254                &data,
3255                0,
3256                Rar50FilterSpec::range(Rar50FilterKind::E8E9, range),
3257            )
3258            .unwrap();
3259        let block = parse_compressed_block(&input).unwrap();
3260        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3261
3262        let output = decode_lz(&input, 0, data.len()).unwrap();
3263
3264        assert_eq!(output, data);
3265        assert_ne!(lengths.main[256], 0);
3266    }
3267
3268    #[test]
3269    fn encodes_lz_member_with_multiple_filter_records() {
3270        let mut data = b"\xe8\0\0\0\0plain prefix outside filters".to_vec();
3271        let first_start = data.len();
3272        data.extend_from_slice(b"\xe8\0\0\0\0first filtered cluster");
3273        let first_end = data.len();
3274        data.extend_from_slice(b"large plain middle outside filters");
3275        let second_start = data.len();
3276        data.extend_from_slice(b"\xe8\0\0\0\0second filtered cluster");
3277        let second_end = data.len();
3278
3279        let input = Unpack50Encoder::new()
3280            .encode_member_with_filters(
3281                &data,
3282                0,
3283                &[
3284                    Rar50FilterSpec::range(Rar50FilterKind::E8, first_start..first_end),
3285                    Rar50FilterSpec::range(Rar50FilterKind::E8, second_start..second_end),
3286                ],
3287            )
3288            .unwrap();
3289        let block = parse_compressed_block(&input).unwrap();
3290        let (lengths, table_bits) = read_table_lengths(&input[block.payload.clone()], 0).unwrap();
3291        let tables = DecodeTables::from_lengths(&lengths).unwrap();
3292        let mut bits = BitReader {
3293            input: &input[block.payload],
3294            bit_pos: table_bits,
3295        };
3296        assert_eq!(tables.main.decode(&mut bits).unwrap(), 256);
3297        let first = read_filter(&mut bits, 0).unwrap();
3298        assert_eq!(tables.main.decode(&mut bits).unwrap(), 256);
3299        let second = read_filter(&mut bits, 0).unwrap();
3300
3301        let output = decode_lz(&input, 0, data.len()).unwrap();
3302
3303        assert_eq!(output, data);
3304        assert_eq!(first.start, first_start);
3305        assert_eq!(second.start, second_start);
3306    }
3307
3308    #[test]
3309    fn encodes_lz_member_with_arm_filter_record() {
3310        let data = [0x04, 0x00, 0x00, 0xeb, b'A', b'R', b'M', b'!'];
3311        let input = encode_lz_member_with_filter(&data, Rar50FilterKind::Arm).unwrap();
3312        let block = parse_compressed_block(&input).unwrap();
3313        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3314
3315        let output = decode_lz(&input, 0, data.len()).unwrap();
3316
3317        assert_eq!(output, data);
3318        assert_ne!(lengths.main[256], 0);
3319    }
3320
3321    #[test]
3322    fn arm_filter_uses_wrapping_address_arithmetic_at_u32_boundary() {
3323        let original = [0x04, 0x00, 0x00, 0xeb, 0x08, 0x00, 0x00, 0xeb];
3324        let mut filtered = original;
3325
3326        arm_encode(&mut filtered, u32::MAX - 3);
3327        assert_ne!(filtered, original);
3328        arm_decode(&mut filtered, u32::MAX - 3);
3329
3330        assert_eq!(filtered, original);
3331    }
3332
3333    #[test]
3334    fn solid_encoder_emits_rar50_matches_against_previous_member_history() {
3335        let first = b"RAR5 solid shared phrase alpha beta gamma\n".repeat(16);
3336        let second = b"RAR5 solid shared phrase alpha beta gamma\nsecond\n".repeat(4);
3337        let solid = encode_lz_member_with_history(&second, &first, 0).unwrap();
3338        let standalone = encode_lz_member(&second, 0).unwrap();
3339        let mut decoder = Unpack50Decoder::new();
3340
3341        assert_eq!(
3342            decoder
3343                .decode_member(
3344                    &encode_lz_member(&first, 0).unwrap(),
3345                    0,
3346                    first.len(),
3347                    false,
3348                    DecodeMode::Lz
3349                )
3350                .unwrap(),
3351            first
3352        );
3353        assert_eq!(
3354            decoder
3355                .decode_member(&solid, 0, second.len(), true, DecodeMode::Lz)
3356                .unwrap(),
3357            second
3358        );
3359        assert!(solid.len() < standalone.len());
3360    }
3361
3362    #[test]
3363    fn large_lz_members_are_split_into_multiple_compressed_blocks() {
3364        let data = vec![0u8; MAX_COMPRESSED_BLOCK_OUTPUT + 1];
3365        let encoded = encode_lz_member_with_options(&data, 0, EncodeOptions::new(16)).unwrap();
3366        let mut cursor = std::io::Cursor::new(encoded.as_slice());
3367        let first = read_compressed_block(&mut cursor).unwrap();
3368        let second = read_compressed_block(&mut cursor).unwrap();
3369        let mut decoder = Unpack50Decoder::new();
3370
3371        assert!(!first.header.is_last);
3372        assert!(second.header.is_last);
3373        assert_eq!(
3374            decoder
3375                .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3376                .unwrap(),
3377            data
3378        );
3379    }
3380
3381    #[test]
3382    fn large_filtered_lz_members_split_filter_records_by_block() {
3383        let mut data: Vec<_> = (0..MAX_COMPRESSED_BLOCK_OUTPUT + 512)
3384            .map(|index| index as u8)
3385            .collect();
3386        data[256] = 0xe8;
3387        data[257..261].copy_from_slice(&0x20u32.to_le_bytes());
3388        data[MAX_COMPRESSED_BLOCK_OUTPUT + 64] = 0xe8;
3389        data[MAX_COMPRESSED_BLOCK_OUTPUT + 65..MAX_COMPRESSED_BLOCK_OUTPUT + 69]
3390            .copy_from_slice(&0x40u32.to_le_bytes());
3391
3392        let encoded = Unpack50Encoder::with_options(EncodeOptions::new(0))
3393            .encode_member_with_filter(
3394                &data,
3395                0,
3396                Rar50FilterSpec::range(Rar50FilterKind::E8, 0..data.len()),
3397            )
3398            .unwrap();
3399        let mut cursor = std::io::Cursor::new(encoded.as_slice());
3400        let first = read_compressed_block(&mut cursor).unwrap();
3401        let mut blocks = 1usize;
3402        let mut last_is_last = first.header.is_last;
3403        while cursor.position() < encoded.len() as u64 {
3404            last_is_last = read_compressed_block(&mut cursor).unwrap().header.is_last;
3405            blocks += 1;
3406        }
3407        let mut decoder = Unpack50Decoder::new();
3408
3409        assert!(!first.header.is_last);
3410        assert!(last_is_last);
3411        assert!(blocks > 2);
3412        assert_eq!(
3413            decoder
3414                .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3415                .unwrap(),
3416            data
3417        );
3418    }
3419
3420    #[test]
3421    fn filters_are_split_before_rar_reader_filter_limit() {
3422        let data = vec![0u8; MAX_FILTER_BLOCK_LENGTH + 1];
3423        let encoded = Unpack50Encoder::with_options(
3424            EncodeOptions::new(0).with_max_match_distance(128 * 1024),
3425        )
3426        .encode_member_with_filter(
3427            &data,
3428            0,
3429            Rar50FilterSpec::new(Rar50FilterKind::Delta { channels: 4 }),
3430        )
3431        .unwrap();
3432        let mut cursor = std::io::Cursor::new(encoded.as_slice());
3433        let first = read_compressed_block(&mut cursor).unwrap();
3434        let second = read_compressed_block(&mut cursor).unwrap();
3435        let mut decoder = Unpack50Decoder::new();
3436
3437        assert!(!first.header.is_last);
3438        assert!(second.header.is_last);
3439        assert_eq!(
3440            decoder
3441                .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3442                .unwrap(),
3443            data
3444        );
3445    }
3446
3447    #[test]
3448    fn solid_encoder_history_limit_follows_encode_options_dictionary() {
3449        let mut encoder = Unpack50Encoder::with_options(
3450            EncodeOptions::new(0).with_max_match_distance(DEFAULT_DICTIONARY_SIZE + 1024),
3451        );
3452        encoder.remember(&vec![0x41; DEFAULT_DICTIONARY_SIZE + 512]);
3453
3454        assert_eq!(encoder.history.len(), DEFAULT_DICTIONARY_SIZE + 512);
3455
3456        let mut capped =
3457            Unpack50Encoder::with_options(EncodeOptions::new(0).with_max_match_distance(1024));
3458        capped.remember(&vec![0x42; 4096]);
3459
3460        assert_eq!(capped.history.len(), 1024);
3461    }
3462
3463    #[test]
3464    fn encodes_lz_member_with_last_length_repeat_symbols() {
3465        let data = b"abcdXabcdYabcdZabcd";
3466        let input = encode_lz_member(data, 0).unwrap();
3467        let block = parse_compressed_block(&input).unwrap();
3468        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3469
3470        let output = decode_lz(&input, 0, data.len()).unwrap();
3471
3472        assert_eq!(output, data);
3473        assert_ne!(lengths.main[257], 0);
3474    }
3475
3476    #[test]
3477    fn encodes_lz_member_using_rar70_distance_table_shape() {
3478        let data = b"RAR7-compatible repeated phrase repeated phrase repeated phrase";
3479        let input = encode_lz_member(data, 1).unwrap();
3480
3481        let output = decode_lz(&input, 1, data.len()).unwrap();
3482
3483        assert_eq!(output, data);
3484    }
3485
3486    #[test]
3487    fn decode_member_from_reader_accepts_incremental_input() {
3488        struct OneByteReader<'a> {
3489            data: &'a [u8],
3490            pos: usize,
3491        }
3492
3493        impl Read for OneByteReader<'_> {
3494            fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
3495                if self.pos >= self.data.len() {
3496                    return Ok(0);
3497                }
3498                out[0] = self.data[self.pos];
3499                self.pos += 1;
3500                Ok(1)
3501            }
3502        }
3503
3504        let payload = literal_only_payload(b"ABBA");
3505        let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3506        let mut reader = OneByteReader {
3507            data: &input,
3508            pos: 0,
3509        };
3510        let mut decoder = Unpack50Decoder::new();
3511
3512        let output = decoder
3513            .decode_member_from_reader(&mut reader, 0, 4, false, DecodeMode::LiteralOnly)
3514            .unwrap();
3515
3516        assert_eq!(output, b"ABBA");
3517    }
3518
3519    #[test]
3520    fn decodes_synthetic_new_match_block() {
3521        let payload = new_match_payload();
3522        let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3523
3524        let output = decode_lz(&input, 0, 4).unwrap();
3525
3526        assert_eq!(output, b"ABAB");
3527    }
3528
3529    #[test]
3530    fn decodes_synthetic_last_length_match_block() {
3531        let payload = repeat_payload(257);
3532        let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3533
3534        let output = decode_lz(&input, 0, 6).unwrap();
3535
3536        assert_eq!(output, b"ABABAB");
3537    }
3538
3539    #[test]
3540    fn decodes_synthetic_repeat_distance_match_block() {
3541        let payload = repeat_payload(258);
3542        let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3543
3544        let output = decode_lz(&input, 0, 6).unwrap();
3545
3546        assert_eq!(output, b"ABABAB");
3547    }
3548
3549    #[test]
3550    fn rejects_literal_only_block_without_tables() {
3551        let input = encode_compressed_block(&[0], 8, false, true).unwrap();
3552
3553        assert_eq!(
3554            decode_literal_only(&input, 0, 1),
3555            Err(Error::InvalidData("RAR 5 block reuses missing tables"))
3556        );
3557    }
3558
3559    #[test]
3560    fn decodes_length_slots() {
3561        assert_eq!(slot_to_length(0, 0).unwrap(), 2);
3562        assert_eq!(slot_to_length(7, 0).unwrap(), 9);
3563        assert_eq!(slot_to_length(8, 0).unwrap(), 10);
3564        assert_eq!(slot_to_length(8, 1).unwrap(), 11);
3565        assert_eq!(slot_to_length(11, 1).unwrap(), 17);
3566        assert_eq!(slot_to_length(12, 3).unwrap(), 21);
3567    }
3568
3569    #[test]
3570    fn decodes_distance_slots() {
3571        assert_eq!(slot_to_distance(0, 0).unwrap(), 1);
3572        assert_eq!(slot_to_distance(3, 0).unwrap(), 4);
3573        assert_eq!(distance_slot_bit_count(4).unwrap(), 1);
3574        assert_eq!(slot_to_distance(4, 0).unwrap(), 5);
3575        assert_eq!(slot_to_distance(4, 1).unwrap(), 6);
3576        assert_eq!(distance_slot_bit_count(10).unwrap(), 4);
3577        assert_eq!(slot_to_distance(10, 15).unwrap(), 48);
3578    }
3579
3580    #[test]
3581    fn bit_reader_accepts_large_rar5_distance_extras() {
3582        let mut bits = BitReader::new(&[0xff, 0x00, 0xaa, 0x55]);
3583
3584        assert_eq!(bits.read_bits(32).unwrap(), 0xff00_aa55);
3585        assert_eq!(
3586            bits.read_bits(1),
3587            Err(Error::NeedMoreInput),
3588            "32-bit reads must not leave a partial cursor state"
3589        );
3590    }
3591
3592    #[test]
3593    fn copies_lz_matches_with_overlap() {
3594        let decoder = Unpack50Decoder::new();
3595        let mut output = b"AB".to_vec();
3596
3597        decoder
3598            .copy_match(&mut output, 2, 6, 8, DEFAULT_DICTIONARY_SIZE)
3599            .unwrap();
3600
3601        assert_eq!(output, b"ABABABAB");
3602    }
3603
3604    #[test]
3605    fn rejects_invalid_match_copy() {
3606        let decoder = Unpack50Decoder::new();
3607        let mut output = b"AB".to_vec();
3608
3609        assert_eq!(
3610            decoder.copy_match(&mut output, 3, 1, 3, DEFAULT_DICTIONARY_SIZE),
3611            Err(Error::InvalidData("RAR 5 match distance exceeds window"))
3612        );
3613        assert_eq!(
3614            decoder.copy_match(&mut output, 1, 2, 3, DEFAULT_DICTIONARY_SIZE),
3615            Err(Error::InvalidData("RAR 5 match exceeds output limit"))
3616        );
3617    }
3618
3619    #[test]
3620    fn rejects_match_distance_beyond_dictionary() {
3621        let decoder = Unpack50Decoder::new();
3622        let mut output = b"ABCD".to_vec();
3623
3624        assert_eq!(
3625            decoder.copy_match(&mut output, 4, 1, 5, 3),
3626            Err(Error::InvalidData(
3627                "RAR 5 match distance exceeds dictionary"
3628            ))
3629        );
3630    }
3631
3632    #[test]
3633    fn solid_history_is_capped_to_dictionary_size() {
3634        let mut decoder = Unpack50Decoder::new();
3635        let first_payload = literal_only_payload(b"ABBA");
3636        let first =
3637            encode_compressed_block(&first_payload, first_payload.len() * 8, true, true).unwrap();
3638        let second_payload = literal_only_payload(b"BAAB");
3639        let second =
3640            encode_compressed_block(&second_payload, second_payload.len() * 8, true, true).unwrap();
3641
3642        assert_eq!(
3643            decoder
3644                .decode_member_with_dictionary(&first, 0, 4, 6, false, DecodeMode::LiteralOnly)
3645                .unwrap(),
3646            b"ABBA"
3647        );
3648        assert_eq!(decoder.history, b"ABBA");
3649
3650        assert_eq!(
3651            decoder
3652                .decode_member_with_dictionary(&second, 0, 4, 6, true, DecodeMode::LiteralOnly)
3653                .unwrap(),
3654            b"BAAB"
3655        );
3656        assert_eq!(decoder.history, b"BABAAB");
3657    }
3658
3659    #[test]
3660    fn streaming_decoder_history_is_capped_without_reordering() {
3661        let mut decoder = Unpack50Decoder::new();
3662        let first_payload = literal_only_payload(b"ABBA");
3663        let first =
3664            encode_compressed_block(&first_payload, first_payload.len() * 8, true, true).unwrap();
3665        let second_payload = literal_only_payload(b"BAAB");
3666        let second =
3667            encode_compressed_block(&second_payload, second_payload.len() * 8, true, true).unwrap();
3668        let mut decoded = Vec::new();
3669
3670        decoder
3671            .decode_member_from_reader_with_dictionary_to_sink(
3672                &mut std::io::Cursor::new(&first),
3673                0,
3674                4,
3675                6,
3676                false,
3677                |chunk| {
3678                    match chunk {
3679                        DecodedChunk::Bytes(bytes) => decoded.extend_from_slice(bytes),
3680                        DecodedChunk::Repeated { byte, len } => {
3681                            decoded.extend(std::iter::repeat_n(byte, len));
3682                        }
3683                    }
3684                    Ok::<(), std::io::Error>(())
3685                },
3686            )
3687            .unwrap();
3688        assert_eq!(decoded, b"ABBA");
3689        assert_eq!(decoder.history, b"ABBA");
3690
3691        decoded.clear();
3692        decoder
3693            .decode_member_from_reader_with_dictionary_to_sink(
3694                &mut std::io::Cursor::new(&second),
3695                0,
3696                4,
3697                6,
3698                true,
3699                |chunk| {
3700                    match chunk {
3701                        DecodedChunk::Bytes(bytes) => decoded.extend_from_slice(bytes),
3702                        DecodedChunk::Repeated { byte, len } => {
3703                            decoded.extend(std::iter::repeat_n(byte, len));
3704                        }
3705                    }
3706                    Ok::<(), std::io::Error>(())
3707                },
3708            )
3709            .unwrap();
3710        assert_eq!(decoded, b"BAAB");
3711        assert_eq!(decoder.history, b"BABAAB");
3712    }
3713
3714    fn literal_only_payload(data: &[u8]) -> Vec<u8> {
3715        let mut lengths = TableLengths {
3716            main: vec![0; MAIN_TABLE_SIZE],
3717            distance: vec![0; DISTANCE_TABLE_SIZE_50],
3718            align: vec![0; ALIGN_TABLE_SIZE],
3719            length: vec![0; LENGTH_TABLE_SIZE],
3720        };
3721        lengths.main[b'A' as usize] = 1;
3722        lengths.main[b'B' as usize] = 1;
3723        let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3724        let mut writer = BitWriter { bytes, bit_pos };
3725        for &byte in data {
3726            match byte {
3727                b'A' => writer.write_bits(0, 1),
3728                b'B' => writer.write_bits(1, 1),
3729                _ => panic!("test helper only encodes A/B"),
3730            }
3731        }
3732        writer.finish()
3733    }
3734
3735    fn new_match_payload() -> Vec<u8> {
3736        let mut lengths = TableLengths {
3737            main: vec![0; MAIN_TABLE_SIZE],
3738            distance: vec![0; DISTANCE_TABLE_SIZE_50],
3739            align: vec![0; ALIGN_TABLE_SIZE],
3740            length: vec![0; LENGTH_TABLE_SIZE],
3741        };
3742        lengths.main[b'A' as usize] = 2;
3743        lengths.main[b'B' as usize] = 2;
3744        lengths.main[262] = 2;
3745        lengths.distance[1] = 1;
3746        let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3747        let mut writer = BitWriter { bytes, bit_pos };
3748
3749        writer.write_bits(0b00, 2); // 'A'
3750        writer.write_bits(0b01, 2); // 'B'
3751        writer.write_bits(0b10, 2); // match length 2
3752        writer.write_bits(0, 1); // distance slot 1
3753        writer.finish()
3754    }
3755
3756    fn repeat_payload(repeat_symbol: usize) -> Vec<u8> {
3757        let mut lengths = TableLengths {
3758            main: vec![0; MAIN_TABLE_SIZE],
3759            distance: vec![0; DISTANCE_TABLE_SIZE_50],
3760            align: vec![0; ALIGN_TABLE_SIZE],
3761            length: vec![0; LENGTH_TABLE_SIZE],
3762        };
3763        lengths.main[b'A' as usize] = 2;
3764        lengths.main[b'B' as usize] = 2;
3765        lengths.main[repeat_symbol] = 2;
3766        lengths.main[262] = 2;
3767        lengths.distance[1] = 1;
3768        lengths.length[0] = 1;
3769        let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3770        let mut writer = BitWriter { bytes, bit_pos };
3771
3772        writer.write_bits(0b00, 2); // 'A'
3773        writer.write_bits(0b01, 2); // 'B'
3774        writer.write_bits(0b11, 2); // match length 2
3775        writer.write_bits(0, 1); // distance slot 1
3776        writer.write_bits(0b10, 2); // repeat control symbol
3777        if repeat_symbol == 258 {
3778            writer.write_bits(0, 1); // length slot 0
3779        }
3780        writer.finish()
3781    }
3782}