Skip to main content

rars_codec/
rar50.rs

1use crate::filters::{self, DeltaErrorMessages, FilterOp};
2use crate::{huffman, Error, Result};
3use std::collections::VecDeque;
4use std::io::Read;
5use std::ops::Range;
6
7pub const LEVEL_TABLE_SIZE: usize = 20;
8pub const MAIN_TABLE_SIZE: usize = 306;
9pub const DISTANCE_TABLE_SIZE_50: usize = 64;
10pub const DISTANCE_TABLE_SIZE_70: usize = 80;
11pub const ALIGN_TABLE_SIZE: usize = 16;
12pub const LENGTH_TABLE_SIZE: usize = 44;
13const DEFAULT_DICTIONARY_SIZE: usize = 4 * 1024 * 1024;
14const MAX_INITIAL_OUTPUT_CAPACITY: usize = 1024 * 1024;
15const STREAM_FLUSH_THRESHOLD: usize = 64 * 1024;
16const STREAM_HISTORY_LIMIT: usize = 64 * 1024 * 1024;
17const MAX_ENCODER_MATCH_OFFSET: usize = DEFAULT_DICTIONARY_SIZE;
18const MAX_ENCODER_MATCH_LENGTH: usize = 4096;
19const MAX_COMPRESSED_BLOCK_OUTPUT: usize = 4 * 1024 * 1024;
20const MAX_FILTER_BLOCK_LENGTH: usize = 0x3ffff;
21const MATCH_HASH_BUCKETS: usize = 4096;
22const MAX_MATCH_CANDIDATES: usize = 256;
23
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct CompressedBlock {
26    pub header: CompressedBlockHeader,
27    pub header_len: usize,
28    pub payload: Range<usize>,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub struct CompressedBlockHeader {
33    pub flags: u8,
34    pub is_last: bool,
35    pub has_tables: bool,
36    pub final_byte_bits: u8,
37    pub payload_size: usize,
38    pub payload_bits: usize,
39}
40
41struct OwnedCompressedBlock {
42    header: CompressedBlockHeader,
43    payload: Vec<u8>,
44}
45
46#[derive(Debug)]
47#[doc(hidden)]
48pub enum StreamDecodeError<E> {
49    Decode(Error),
50    FilteredMember,
51    Sink(E),
52}
53
54impl<E> From<Error> for StreamDecodeError<E> {
55    fn from(error: Error) -> Self {
56        Self::Decode(error)
57    }
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61#[doc(hidden)]
62pub enum DecodedChunk<'a> {
63    Bytes(&'a [u8]),
64    Repeated { byte: u8, len: usize },
65}
66
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct TableLengths {
69    pub main: Vec<u8>,
70    pub distance: Vec<u8>,
71    pub align: Vec<u8>,
72    pub length: Vec<u8>,
73}
74
75#[derive(Debug, Clone)]
76pub struct DecodeTables {
77    pub main: HuffmanTable,
78    pub distance: HuffmanTable,
79    pub align: HuffmanTable,
80    pub length: HuffmanTable,
81    pub align_mode: bool,
82}
83
84impl DecodeTables {
85    pub fn from_lengths(lengths: &TableLengths) -> Result<Self> {
86        let align_mode = lengths
87            .align
88            .iter()
89            .any(|&length| length != 0 && length != 4);
90        Ok(Self {
91            main: HuffmanTable::from_lengths(&lengths.main)?,
92            distance: HuffmanTable::from_lengths(&lengths.distance)?,
93            align: HuffmanTable::from_lengths(&lengths.align)?,
94            length: HuffmanTable::from_lengths(&lengths.length)?,
95            align_mode,
96        })
97    }
98}
99
100#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub enum DecodeMode {
102    LiteralOnly,
103    Lz,
104    LzNoFilters,
105}
106
107impl DecodeMode {
108    fn uses_lz(self) -> bool {
109        matches!(self, Self::Lz | Self::LzNoFilters)
110    }
111
112    fn applies_filters(self) -> bool {
113        matches!(self, Self::Lz)
114    }
115}
116
117pub fn parse_compressed_block(input: &[u8]) -> Result<CompressedBlock> {
118    if input.len() < 3 {
119        return Err(Error::NeedMoreInput);
120    }
121
122    let flags = input[0];
123    let checksum = input[1];
124    let size_bytes = match (flags >> 3) & 0x03 {
125        0 => 1,
126        1 => 2,
127        2 => 3,
128        _ => return Err(Error::InvalidData("RAR 5 block size length is invalid")),
129    };
130    let header_len = 2 + size_bytes;
131    if input.len() < header_len {
132        return Err(Error::NeedMoreInput);
133    }
134
135    let size_data = &input[2..header_len];
136    let actual = size_data
137        .iter()
138        .fold(checksum ^ flags, |acc, &byte| acc ^ byte);
139    if actual != 0x5a {
140        return Err(Error::InvalidData("RAR 5 block header checksum mismatch"));
141    }
142
143    let payload_size = size_data
144        .iter()
145        .enumerate()
146        .fold(0usize, |acc, (index, &byte)| {
147            acc | (usize::from(byte) << (index * 8))
148        });
149    let payload_end = header_len
150        .checked_add(payload_size)
151        .ok_or(Error::InvalidData("RAR 5 block size overflows"))?;
152    if input.len() < payload_end {
153        return Err(Error::NeedMoreInput);
154    }
155
156    let final_byte_bits = ((flags & 0x07) + 1).min(8);
157    let payload_bits = if payload_size == 0 {
158        0
159    } else {
160        (payload_size - 1) * 8 + usize::from(final_byte_bits)
161    };
162
163    Ok(CompressedBlock {
164        header: CompressedBlockHeader {
165            flags,
166            is_last: flags & 0x40 != 0,
167            has_tables: flags & 0x80 != 0,
168            final_byte_bits,
169            payload_size,
170            payload_bits,
171        },
172        header_len,
173        payload: header_len..payload_end,
174    })
175}
176
177pub fn read_level_lengths(input: &[u8]) -> Result<([u8; LEVEL_TABLE_SIZE], usize)> {
178    let mut bits = BitReader::new(input);
179    let mut lengths = [0; LEVEL_TABLE_SIZE];
180    let mut pos = 0;
181    while pos < LEVEL_TABLE_SIZE {
182        let length = bits.read_bits(4)? as u8;
183        if length == 15 {
184            let zero_count = bits.read_bits(4)? as usize;
185            if zero_count == 0 {
186                lengths[pos] = 15;
187                pos += 1;
188            } else {
189                let count = zero_count + 2;
190                for _ in 0..count {
191                    if pos >= LEVEL_TABLE_SIZE {
192                        break;
193                    }
194                    lengths[pos] = 0;
195                    pos += 1;
196                }
197            }
198        } else {
199            lengths[pos] = length;
200            pos += 1;
201        }
202    }
203    Ok((lengths, bits.bit_pos))
204}
205
206pub fn table_length_count(algorithm_version: u8) -> Result<usize> {
207    match algorithm_version {
208        0 => Ok(MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_50 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE),
209        1 => Ok(MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_70 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE),
210        _ => Err(Error::InvalidData(
211            "RAR 5 unknown compression algorithm version",
212        )),
213    }
214}
215
216pub fn read_table_lengths(input: &[u8], algorithm_version: u8) -> Result<(TableLengths, usize)> {
217    let table_size = table_length_count(algorithm_version)?;
218    let (level_lengths, level_bits) = read_level_lengths(input)?;
219    let level_decoder = HuffmanTable::from_lengths(&level_lengths)?;
220    let mut bits = BitReader::new(input);
221    bits.bit_pos = level_bits;
222
223    let mut lengths = Vec::with_capacity(table_size);
224    while lengths.len() < table_size {
225        let number = level_decoder.decode(&mut bits)?;
226        match number {
227            0..=15 => lengths.push(number as u8),
228            16 | 17 => {
229                if lengths.is_empty() {
230                    return Err(Error::InvalidData(
231                        "RAR 5 table repeats missing previous length",
232                    ));
233                }
234                let count = if number == 16 {
235                    3 + bits.read_bits(3)? as usize
236                } else {
237                    11 + bits.read_bits(7)? as usize
238                };
239                let previous = *lengths.last().unwrap();
240                for _ in 0..count {
241                    if lengths.len() >= table_size {
242                        break;
243                    }
244                    lengths.push(previous);
245                }
246            }
247            18 | 19 => {
248                let count = if number == 18 {
249                    3 + bits.read_bits(3)? as usize
250                } else {
251                    11 + bits.read_bits(7)? as usize
252                };
253                for _ in 0..count {
254                    if lengths.len() >= table_size {
255                        break;
256                    }
257                    lengths.push(0);
258                }
259            }
260            _ => return Err(Error::InvalidData("RAR 5 invalid level-table symbol")),
261        }
262    }
263
264    let distance_size = match algorithm_version {
265        0 => DISTANCE_TABLE_SIZE_50,
266        1 => DISTANCE_TABLE_SIZE_70,
267        _ => unreachable!("validated by table_length_count"),
268    };
269    let distance_start = MAIN_TABLE_SIZE;
270    let align_start = distance_start + distance_size;
271    let length_start = align_start + ALIGN_TABLE_SIZE;
272
273    Ok((
274        TableLengths {
275            main: lengths[..distance_start].to_vec(),
276            distance: lengths[distance_start..align_start].to_vec(),
277            align: lengths[align_start..length_start].to_vec(),
278            length: lengths[length_start..].to_vec(),
279        },
280        bits.bit_pos,
281    ))
282}
283
284pub fn encode_table_lengths(lengths: &TableLengths, algorithm_version: u8) -> Result<Vec<u8>> {
285    encode_table_lengths_with_bit_count(lengths, algorithm_version).map(|(data, _)| data)
286}
287
288pub fn encode_table_lengths_with_bit_count(
289    lengths: &TableLengths,
290    algorithm_version: u8,
291) -> Result<(Vec<u8>, usize)> {
292    let distance_size = match algorithm_version {
293        0 => DISTANCE_TABLE_SIZE_50,
294        1 => DISTANCE_TABLE_SIZE_70,
295        _ => {
296            return Err(Error::InvalidData(
297                "RAR 5 unknown compression algorithm version",
298            ))
299        }
300    };
301    if lengths.main.len() != MAIN_TABLE_SIZE
302        || lengths.distance.len() != distance_size
303        || lengths.align.len() != ALIGN_TABLE_SIZE
304        || lengths.length.len() != LENGTH_TABLE_SIZE
305    {
306        return Err(Error::InvalidData("RAR 5 table length count mismatch"));
307    }
308
309    let flattened = lengths
310        .main
311        .iter()
312        .chain(lengths.distance.iter())
313        .chain(lengths.align.iter())
314        .chain(lengths.length.iter())
315        .copied()
316        .collect::<Vec<_>>();
317    for &length in &flattened {
318        if length > 15 {
319            return Err(Error::InvalidData("RAR 5 Huffman length is too large"));
320        }
321    }
322
323    let level_tokens = encode_table_level_tokens(&flattened);
324    let level_lengths = level_code_lengths_for_tokens(&level_tokens);
325    let level_table = HuffmanTable::from_lengths(&level_lengths)?;
326    let mut writer = BitWriter::new();
327    write_level_lengths(&mut writer, &level_lengths);
328    for token in level_tokens {
329        let (code, len) = level_table.code_for_symbol(token.symbol)?;
330        writer.write_bits(usize::from(code), usize::from(len));
331        if token.extra_bits != 0 {
332            writer.write_bits(
333                usize::from(token.extra_value),
334                usize::from(token.extra_bits),
335            );
336        }
337    }
338    let bit_count = writer.bit_pos;
339    Ok((writer.finish(), bit_count))
340}
341
342pub fn encode_compressed_block(
343    payload: &[u8],
344    payload_bits: usize,
345    has_tables: bool,
346    is_last: bool,
347) -> Result<Vec<u8>> {
348    if payload_bits > payload.len() * 8 {
349        return Err(Error::InvalidData("RAR 5 block bit count exceeds payload"));
350    }
351    if payload.is_empty() && payload_bits != 0 {
352        return Err(Error::InvalidData("RAR 5 empty block has payload bits"));
353    }
354    if !payload.is_empty() && payload_bits <= (payload.len() - 1) * 8 {
355        return Err(Error::InvalidData("RAR 5 block has unused payload bytes"));
356    }
357    if payload.len() > 0x00ff_ffff {
358        return Err(Error::InvalidData("RAR 5 block payload is too large"));
359    }
360
361    let size_len = if payload.len() <= 0xff {
362        1
363    } else if payload.len() <= 0xffff {
364        2
365    } else {
366        3
367    };
368    let final_byte_bits = if payload.is_empty() {
369        1
370    } else {
371        ((payload_bits - 1) % 8) + 1
372    };
373    let mut flags = (final_byte_bits as u8) - 1;
374    flags |= match size_len {
375        1 => 0,
376        2 => 1 << 3,
377        3 => 2 << 3,
378        _ => unreachable!("size_len is constrained above"),
379    };
380    if is_last {
381        flags |= 0x40;
382    }
383    if has_tables {
384        flags |= 0x80;
385    }
386
387    let mut size_bytes = [0u8; 3];
388    let mut size = payload.len();
389    for byte in &mut size_bytes[..size_len] {
390        *byte = size as u8;
391        size >>= 8;
392    }
393    let checksum = size_bytes[..size_len]
394        .iter()
395        .fold(0x5a ^ flags, |acc, &byte| acc ^ byte);
396    let mut out = Vec::with_capacity(2 + size_len + payload.len());
397    out.push(flags);
398    out.push(checksum);
399    out.extend_from_slice(&size_bytes[..size_len]);
400    out.extend_from_slice(payload);
401    Ok(out)
402}
403
404pub fn decode_literal_only(
405    input: &[u8],
406    algorithm_version: u8,
407    output_size: usize,
408) -> Result<Vec<u8>> {
409    let mut decoder = Unpack50Decoder::new();
410    decoder.decode_member(
411        input,
412        algorithm_version,
413        output_size,
414        false,
415        DecodeMode::LiteralOnly,
416    )
417}
418
419pub fn decode_lz(input: &[u8], algorithm_version: u8, output_size: usize) -> Result<Vec<u8>> {
420    let mut decoder = Unpack50Decoder::new();
421    decoder.decode_member(input, algorithm_version, output_size, false, DecodeMode::Lz)
422}
423
424pub fn encode_literal_only(data: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
425    let distance_size = match algorithm_version {
426        0 => DISTANCE_TABLE_SIZE_50,
427        1 => DISTANCE_TABLE_SIZE_70,
428        _ => {
429            return Err(Error::InvalidData(
430                "RAR 5 unknown compression algorithm version",
431            ))
432        }
433    };
434    let mut lengths = TableLengths {
435        main: vec![0; MAIN_TABLE_SIZE],
436        distance: vec![0; distance_size],
437        align: vec![0; ALIGN_TABLE_SIZE],
438        length: vec![0; LENGTH_TABLE_SIZE],
439    };
440    let present = literal_presence(data);
441    let literal_count = present.iter().filter(|&&used| used).count();
442    let literal_length = huffman::bits_for_symbol_count(literal_count);
443    for (symbol, used) in present.into_iter().enumerate() {
444        if used {
445            lengths.main[symbol] = literal_length;
446        }
447    }
448
449    let table = HuffmanTable::from_lengths(&lengths.main)?;
450    let (table_data, table_bits) =
451        encode_table_lengths_with_bit_count(&lengths, algorithm_version)?;
452    let mut writer = BitWriter {
453        bytes: table_data,
454        bit_pos: table_bits,
455    };
456    for &byte in data {
457        let (code, len) = table.code_for_symbol(byte as usize)?;
458        writer.write_bits(usize::from(code), usize::from(len));
459    }
460    let payload_bits = writer.bit_pos;
461    encode_compressed_block(&writer.finish(), payload_bits, true, true)
462}
463
464pub fn encode_lz_member(data: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
465    encode_lz_member_with_history(data, &[], algorithm_version)
466}
467
468#[derive(Debug, Clone, Copy, PartialEq, Eq)]
469#[non_exhaustive]
470pub struct EncodeOptions {
471    pub max_match_candidates: usize,
472    pub lazy_matching: bool,
473    pub lazy_lookahead: usize,
474    pub max_match_distance: usize,
475}
476
477impl EncodeOptions {
478    pub const fn new(max_match_candidates: usize) -> Self {
479        Self {
480            max_match_candidates,
481            lazy_matching: false,
482            lazy_lookahead: 1,
483            max_match_distance: MAX_ENCODER_MATCH_OFFSET,
484        }
485    }
486
487    pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
488        self.lazy_matching = enabled;
489        self
490    }
491
492    pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
493        self.lazy_lookahead = bytes;
494        self
495    }
496
497    pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
498        self.max_match_distance = distance;
499        self
500    }
501}
502
503impl Default for EncodeOptions {
504    fn default() -> Self {
505        Self::new(MAX_MATCH_CANDIDATES)
506    }
507}
508
509pub fn encode_lz_member_with_history(
510    data: &[u8],
511    history: &[u8],
512    algorithm_version: u8,
513) -> Result<Vec<u8>> {
514    encode_lz_member_inner(
515        data,
516        history,
517        algorithm_version,
518        &[],
519        EncodeOptions::default(),
520    )
521}
522
523pub fn encode_lz_member_with_options(
524    data: &[u8],
525    algorithm_version: u8,
526    options: EncodeOptions,
527) -> Result<Vec<u8>> {
528    encode_lz_member_with_history_and_options(data, &[], algorithm_version, options)
529}
530
531pub fn encode_lz_member_with_history_and_options(
532    data: &[u8],
533    history: &[u8],
534    algorithm_version: u8,
535    options: EncodeOptions,
536) -> Result<Vec<u8>> {
537    encode_lz_member_inner(data, history, algorithm_version, &[], options)
538}
539
540#[derive(Debug, Clone, Copy, PartialEq, Eq)]
541pub enum Rar50FilterKind {
542    Delta { channels: usize },
543    E8,
544    E8E9,
545    Arm,
546}
547
548#[derive(Debug, Clone, PartialEq, Eq)]
549pub struct Rar50FilterSpec {
550    pub kind: Rar50FilterKind,
551    pub range: Option<Range<usize>>,
552}
553
554impl Rar50FilterSpec {
555    pub fn new(kind: Rar50FilterKind) -> Self {
556        Self { kind, range: None }
557    }
558
559    pub fn range(kind: Rar50FilterKind, range: Range<usize>) -> Self {
560        Self {
561            kind,
562            range: Some(range),
563        }
564    }
565}
566
567fn filtered_lz_member(
568    data: &[u8],
569    filters: &[Rar50FilterSpec],
570) -> Result<(Vec<u8>, Vec<EncodeFilter>)> {
571    let mut filtered = data.to_vec();
572    let mut records = Vec::with_capacity(filters.len());
573    for filter in filters {
574        let range = filter.range.clone().unwrap_or(0..data.len());
575        if range.start >= range.end || range.end > data.len() {
576            return Err(Error::InvalidData("RAR 5 filter range is invalid"));
577        }
578        if range.start > u32::MAX as usize {
579            return Err(Error::InvalidData("RAR 5 filter offset is too large"));
580        }
581
582        let filter_data = &mut filtered[range.clone()];
583        let (filter_type, channels) = encode_filter_data(filter.kind, filter_data, range.start)?;
584        records.push(EncodeFilter {
585            offset: range.start,
586            length: range.len(),
587            filter_type,
588            channels,
589        });
590    }
591    Ok((filtered, records))
592}
593
594fn encode_filter_data(
595    kind: Rar50FilterKind,
596    data: &mut [u8],
597    file_offset: usize,
598) -> Result<(FilterType, usize)> {
599    if file_offset > u32::MAX as usize {
600        return Err(Error::InvalidData("RAR 5 filter offset is too large"));
601    }
602    match kind {
603        Rar50FilterKind::Delta { channels } => {
604            filters::encode_in_place(
605                FilterOp::Delta { channels },
606                data,
607                0,
608                rar50_delta_messages(),
609            )?;
610            Ok((FilterType::Delta, channels))
611        }
612        Rar50FilterKind::E8 => {
613            filters::encode_in_place(
614                FilterOp::E8,
615                data,
616                file_offset as u32,
617                rar50_delta_messages(),
618            )?;
619            Ok((FilterType::E8, 0))
620        }
621        Rar50FilterKind::E8E9 => {
622            filters::encode_in_place(
623                FilterOp::E8E9,
624                data,
625                file_offset as u32,
626                rar50_delta_messages(),
627            )?;
628            Ok((FilterType::E8E9, 0))
629        }
630        Rar50FilterKind::Arm => {
631            arm_encode(data, file_offset as u32);
632            Ok((FilterType::Arm, 0))
633        }
634    }
635}
636
637fn filtered_lz_blocks(
638    data: &[u8],
639    filters: &[Rar50FilterSpec],
640    history: &[u8],
641    algorithm_version: u8,
642    options: EncodeOptions,
643) -> Result<Vec<u8>> {
644    let filters = normalized_filter_specs(data.len(), filters)?;
645    let mut out = Vec::new();
646    let mut block_history =
647        history[history.len().saturating_sub(options.max_match_distance)..].to_vec();
648    let mut chunk_start = 0usize;
649    while chunk_start < data.len() {
650        let chunk_end = (chunk_start + MAX_FILTER_BLOCK_LENGTH).min(data.len());
651        let mut chunk = data[chunk_start..chunk_end].to_vec();
652        let mut records = Vec::new();
653        for filter in &filters {
654            let start = filter.range.start.max(chunk_start);
655            let end = filter.range.end.min(chunk_end);
656            if start >= end {
657                continue;
658            }
659            let local_start = start - chunk_start;
660            let local_end = end - chunk_start;
661            let (filter_type, channels) =
662                encode_filter_data(filter.kind, &mut chunk[local_start..local_end], start)?;
663            records.push(EncodeFilter {
664                offset: local_start,
665                length: local_end - local_start,
666                filter_type,
667                channels,
668            });
669        }
670        out.extend(encode_lz_block(
671            &chunk,
672            &block_history,
673            algorithm_version,
674            &records,
675            options,
676            chunk_end == data.len(),
677        )?);
678        block_history.extend_from_slice(&chunk);
679        let keep_from = block_history
680            .len()
681            .saturating_sub(options.max_match_distance);
682        if keep_from != 0 {
683            block_history.drain(..keep_from);
684        }
685        chunk_start = chunk_end;
686    }
687    Ok(out)
688}
689
690#[derive(Debug, Clone, PartialEq, Eq)]
691struct NormalizedFilterSpec {
692    kind: Rar50FilterKind,
693    range: Range<usize>,
694}
695
696fn normalized_filter_specs(
697    data_len: usize,
698    filters: &[Rar50FilterSpec],
699) -> Result<Vec<NormalizedFilterSpec>> {
700    let mut normalized = Vec::with_capacity(filters.len());
701    for filter in filters {
702        let range = filter.range.clone().unwrap_or(0..data_len);
703        if range.start >= range.end || range.end > data_len {
704            return Err(Error::InvalidData("RAR 5 filter range is invalid"));
705        }
706        normalized.push(NormalizedFilterSpec {
707            kind: filter.kind,
708            range,
709        });
710    }
711    Ok(normalized)
712}
713
714fn encode_lz_member_inner(
715    data: &[u8],
716    history: &[u8],
717    algorithm_version: u8,
718    initial_filters: &[EncodeFilter],
719    options: EncodeOptions,
720) -> Result<Vec<u8>> {
721    if data.len() > MAX_COMPRESSED_BLOCK_OUTPUT && initial_filters.is_empty() {
722        let mut out = Vec::new();
723        let mut block_history =
724            history[history.len().saturating_sub(options.max_match_distance)..].to_vec();
725        let mut chunks = data.chunks(MAX_COMPRESSED_BLOCK_OUTPUT).peekable();
726        while let Some(chunk) = chunks.next() {
727            let is_last = chunks.peek().is_none();
728            out.extend(encode_lz_block(
729                chunk,
730                &block_history,
731                algorithm_version,
732                &[],
733                options,
734                is_last,
735            )?);
736            block_history.extend_from_slice(chunk);
737            let keep_from = block_history
738                .len()
739                .saturating_sub(options.max_match_distance);
740            if keep_from != 0 {
741                block_history.drain(..keep_from);
742            }
743        }
744        return Ok(out);
745    }
746
747    encode_lz_block(
748        data,
749        history,
750        algorithm_version,
751        initial_filters,
752        options,
753        true,
754    )
755}
756
757fn encode_lz_block(
758    data: &[u8],
759    history: &[u8],
760    algorithm_version: u8,
761    initial_filters: &[EncodeFilter],
762    options: EncodeOptions,
763    is_last: bool,
764) -> Result<Vec<u8>> {
765    let distance_size = match algorithm_version {
766        0 => DISTANCE_TABLE_SIZE_50,
767        1 => DISTANCE_TABLE_SIZE_70,
768        _ => {
769            return Err(Error::InvalidData(
770                "RAR 5 unknown compression algorithm version",
771            ))
772        }
773    };
774    let mut tokens = Vec::new();
775    tokens.extend(initial_filters.iter().copied().map(EncodeToken::Filter));
776    tokens.extend(encode_tokens(data, history, options, distance_size));
777    let mut lengths = TableLengths {
778        main: vec![0; MAIN_TABLE_SIZE],
779        distance: vec![0; distance_size],
780        align: vec![0; ALIGN_TABLE_SIZE],
781        length: vec![0; LENGTH_TABLE_SIZE],
782    };
783
784    let mut main_frequencies = vec![0usize; MAIN_TABLE_SIZE];
785    let mut distance_frequencies = vec![0usize; distance_size];
786    let mut align_frequencies = vec![0usize; ALIGN_TABLE_SIZE];
787    let mut length_frequencies = vec![0usize; LENGTH_TABLE_SIZE];
788    let mut state = EncoderMatchState::default();
789    for token in &tokens {
790        match *token {
791            EncodeToken::Filter(_) => main_frequencies[256] += 1,
792            EncodeToken::Literal(byte) => main_frequencies[byte as usize] += 1,
793            EncodeToken::Match { length, distance } => {
794                match state.encode_match(length, distance, distance_size)? {
795                    EncodedMatch::LastLengthRepeat => main_frequencies[257] += 1,
796                    EncodedMatch::RepeatDistance {
797                        index, length_slot, ..
798                    } => {
799                        main_frequencies[258 + index] += 1;
800                        length_frequencies[length_slot] += 1;
801                    }
802                    EncodedMatch::New {
803                        length_slot,
804                        distance_slot,
805                        distance_extra,
806                        distance_bit_count,
807                        ..
808                    } => {
809                        main_frequencies[262 + length_slot] += 1;
810                        distance_frequencies[distance_slot] += 1;
811                        if distance_bit_count >= 4 {
812                            align_frequencies[distance_extra & 0x0f] += 1;
813                        }
814                    }
815                }
816                state.remember(length, distance);
817            }
818        }
819    }
820
821    lengths.main = huffman::lengths_for_frequencies(&main_frequencies, 15);
822    lengths.distance = huffman::lengths_for_frequencies(&distance_frequencies, 15);
823    lengths.length = huffman::lengths_for_frequencies(&length_frequencies, 15);
824    lengths.align = huffman::lengths_for_frequencies(&align_frequencies, 15);
825
826    let main_table = HuffmanTable::from_lengths(&lengths.main)?;
827    let distance_table = HuffmanTable::from_lengths(&lengths.distance)?;
828    let align_table = HuffmanTable::from_lengths(&lengths.align)?;
829    let length_table = HuffmanTable::from_lengths(&lengths.length)?;
830    let (table_data, table_bits) =
831        encode_table_lengths_with_bit_count(&lengths, algorithm_version)?;
832    let mut writer = BitWriter {
833        bytes: table_data,
834        bit_pos: table_bits,
835    };
836    let mut state = EncoderMatchState::default();
837    for token in tokens {
838        match token {
839            EncodeToken::Filter(filter) => {
840                let (code, len) = main_table.code_for_symbol(256)?;
841                writer.write_bits(usize::from(code), usize::from(len));
842                write_filter(&mut writer, filter)?;
843            }
844            EncodeToken::Literal(byte) => {
845                let (code, len) = main_table.code_for_symbol(byte as usize)?;
846                writer.write_bits(usize::from(code), usize::from(len));
847            }
848            EncodeToken::Match { length, distance } => {
849                match state.encode_match(length, distance, distance_size)? {
850                    EncodedMatch::LastLengthRepeat => {
851                        let (code, len) = main_table.code_for_symbol(257)?;
852                        writer.write_bits(usize::from(code), usize::from(len));
853                    }
854                    EncodedMatch::RepeatDistance {
855                        index,
856                        length_slot,
857                        length_extra,
858                    } => {
859                        let (code, len) = main_table.code_for_symbol(258 + index)?;
860                        writer.write_bits(usize::from(code), usize::from(len));
861                        let (code, len) = length_table.code_for_symbol(length_slot)?;
862                        writer.write_bits(usize::from(code), usize::from(len));
863                        let length_extra_bits = length_slot_extra_bits(length_slot)?;
864                        if length_extra_bits != 0 {
865                            writer.write_bits(length_extra, usize::from(length_extra_bits));
866                        }
867                    }
868                    EncodedMatch::New {
869                        length_slot,
870                        length_extra,
871                        distance_slot,
872                        distance_extra,
873                        distance_bit_count,
874                    } => {
875                        let (code, len) = main_table.code_for_symbol(262 + length_slot)?;
876                        writer.write_bits(usize::from(code), usize::from(len));
877                        let length_extra_bits = length_slot_extra_bits(length_slot)?;
878                        if length_extra_bits != 0 {
879                            writer.write_bits(length_extra, usize::from(length_extra_bits));
880                        }
881                        let (code, len) = distance_table.code_for_symbol(distance_slot)?;
882                        writer.write_bits(usize::from(code), usize::from(len));
883                        if distance_bit_count >= 4 {
884                            if distance_bit_count > 4 {
885                                writer.write_bits(distance_extra >> 4, distance_bit_count - 4);
886                            }
887                            let (code, len) = align_table.code_for_symbol(distance_extra & 0x0f)?;
888                            writer.write_bits(usize::from(code), usize::from(len));
889                        } else if distance_bit_count != 0 {
890                            writer.write_bits(distance_extra, distance_bit_count);
891                        }
892                    }
893                }
894                state.remember(length, distance);
895            }
896        }
897    }
898
899    let payload_bits = writer.bit_pos;
900    encode_compressed_block(&writer.finish(), payload_bits, true, is_last)
901}
902
903#[derive(Debug, Clone, Default)]
904pub struct Unpack50Encoder {
905    history: Vec<u8>,
906    options: EncodeOptions,
907}
908
909impl Unpack50Encoder {
910    pub fn new() -> Self {
911        Self::default()
912    }
913
914    pub fn with_options(options: EncodeOptions) -> Self {
915        Self {
916            history: Vec::new(),
917            options,
918        }
919    }
920
921    pub fn encode_member(&mut self, input: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
922        let packed = encode_lz_member_with_history_and_options(
923            input,
924            &self.history,
925            algorithm_version,
926            self.options,
927        )?;
928        self.remember(input);
929        Ok(packed)
930    }
931
932    pub fn encode_member_with_filter(
933        &mut self,
934        input: &[u8],
935        algorithm_version: u8,
936        filter: Rar50FilterSpec,
937    ) -> Result<Vec<u8>> {
938        self.encode_member_with_filters(input, algorithm_version, &[filter])
939    }
940
941    pub fn encode_member_with_filters(
942        &mut self,
943        input: &[u8],
944        algorithm_version: u8,
945        filters: &[Rar50FilterSpec],
946    ) -> Result<Vec<u8>> {
947        if input.len() > MAX_FILTER_BLOCK_LENGTH {
948            let packed = filtered_lz_blocks(
949                input,
950                filters,
951                &self.history,
952                algorithm_version,
953                self.options,
954            )?;
955            self.remember(input);
956            return Ok(packed);
957        }
958        let (filtered, records) = filtered_lz_member(input, filters)?;
959        let packed = encode_lz_member_inner(
960            &filtered,
961            &self.history,
962            algorithm_version,
963            &records,
964            self.options,
965        )?;
966        self.remember(input);
967        Ok(packed)
968    }
969
970    fn remember(&mut self, input: &[u8]) {
971        self.history.extend_from_slice(input);
972        let keep_from = self
973            .history
974            .len()
975            .saturating_sub(self.options.max_match_distance);
976        if keep_from != 0 {
977            self.history.drain(..keep_from);
978        }
979    }
980}
981
982#[derive(Debug, Clone, Copy)]
983enum EncodeToken {
984    Filter(EncodeFilter),
985    Literal(u8),
986    Match { length: usize, distance: usize },
987}
988
989#[derive(Debug, Clone, Copy)]
990struct EncodeFilter {
991    offset: usize,
992    length: usize,
993    filter_type: FilterType,
994    channels: usize,
995}
996
997#[derive(Debug, Clone, Copy, Default)]
998struct EncoderMatchState {
999    reps: [usize; 4],
1000    last_length: usize,
1001}
1002
1003#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1004enum EncodedMatch {
1005    LastLengthRepeat,
1006    RepeatDistance {
1007        index: usize,
1008        length_slot: usize,
1009        length_extra: usize,
1010    },
1011    New {
1012        length_slot: usize,
1013        length_extra: usize,
1014        distance_slot: usize,
1015        distance_extra: usize,
1016        distance_bit_count: usize,
1017    },
1018}
1019
1020impl EncoderMatchState {
1021    fn encode_match(
1022        &self,
1023        length: usize,
1024        distance: usize,
1025        distance_size: usize,
1026    ) -> Result<EncodedMatch> {
1027        if distance == self.reps[0] && length == self.last_length && self.last_length != 0 {
1028            return Ok(EncodedMatch::LastLengthRepeat);
1029        }
1030        if let Some(index) = self
1031            .reps
1032            .iter()
1033            .position(|&repeat_distance| repeat_distance == distance && repeat_distance != 0)
1034        {
1035            let (length_slot, length_extra) = length_slot_for_match(length)?;
1036            return Ok(EncodedMatch::RepeatDistance {
1037                index,
1038                length_slot,
1039                length_extra,
1040            });
1041        }
1042
1043        let (distance_slot, distance_extra) = distance_slot_for_match(distance, distance_size)?;
1044        let encoded_length = length
1045            .checked_sub(length_bonus(distance))
1046            .ok_or(Error::InvalidData("RAR 5 adjusted match length underflows"))?;
1047        let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1048        let (length_slot, length_extra) = length_slot_for_match(encoded_length)?;
1049        Ok(EncodedMatch::New {
1050            length_slot,
1051            length_extra,
1052            distance_slot,
1053            distance_extra,
1054            distance_bit_count,
1055        })
1056    }
1057
1058    fn remember(&mut self, length: usize, distance: usize) {
1059        if distance == self.reps[0] && length == self.last_length {
1060            return;
1061        }
1062        if let Some(index) = self
1063            .reps
1064            .iter()
1065            .position(|&repeat_distance| repeat_distance == distance)
1066        {
1067            self.reps[..=index].rotate_right(1);
1068        } else {
1069            self.reps.rotate_right(1);
1070        }
1071        self.reps[0] = distance;
1072        self.last_length = length;
1073    }
1074}
1075
1076fn encode_tokens(
1077    input: &[u8],
1078    history: &[u8],
1079    options: EncodeOptions,
1080    distance_size: usize,
1081) -> Vec<EncodeToken> {
1082    let mut tokens = Vec::new();
1083    let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1084    let history = &history[history.len().saturating_sub(options.max_match_distance)..];
1085    let mut combined = Vec::with_capacity(history.len() + input.len());
1086    combined.extend_from_slice(history);
1087    combined.extend_from_slice(input);
1088    for history_pos in 0..history.len().saturating_sub(2) {
1089        insert_match_position(&combined, history_pos, &mut buckets);
1090    }
1091
1092    let mut pos = history.len();
1093    let end = combined.len();
1094    let mut state = EncoderMatchState::default();
1095    while pos < end {
1096        if let Some(candidate) = best_match(
1097            &combined,
1098            pos,
1099            end,
1100            &buckets,
1101            options,
1102            &state,
1103            distance_size,
1104        ) {
1105            if should_lazy_emit_literal(
1106                &combined,
1107                pos,
1108                &buckets,
1109                options,
1110                &state,
1111                distance_size,
1112                candidate,
1113            ) {
1114                tokens.push(EncodeToken::Literal(combined[pos]));
1115                insert_match_position(&combined, pos, &mut buckets);
1116                pos += 1;
1117                continue;
1118            }
1119            let MatchCandidate {
1120                length, distance, ..
1121            } = candidate;
1122            tokens.push(EncodeToken::Match { length, distance });
1123            state.remember(length, distance);
1124            for history_pos in pos..pos + length {
1125                insert_match_position(&combined, history_pos, &mut buckets);
1126            }
1127            pos += length;
1128        } else {
1129            tokens.push(EncodeToken::Literal(combined[pos]));
1130            insert_match_position(&combined, pos, &mut buckets);
1131            pos += 1;
1132        }
1133    }
1134    tokens
1135}
1136
1137fn should_lazy_emit_literal(
1138    input: &[u8],
1139    pos: usize,
1140    buckets: &[Vec<usize>],
1141    options: EncodeOptions,
1142    state: &EncoderMatchState,
1143    distance_size: usize,
1144    current: MatchCandidate,
1145) -> bool {
1146    let end = input.len();
1147    if !options.lazy_matching || pos + 1 >= end {
1148        return false;
1149    }
1150    let lookahead = options.lazy_lookahead.max(1);
1151    (1..=lookahead)
1152        .take_while(|offset| pos + offset < end)
1153        .any(|offset| {
1154            best_match(
1155                input,
1156                pos + offset,
1157                end,
1158                buckets,
1159                options,
1160                state,
1161                distance_size,
1162            )
1163            .is_some_and(|next| {
1164                let skipped_literal_score = offset as isize * 8;
1165                next.score > current.score + skipped_literal_score
1166            })
1167        })
1168}
1169
1170#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1171struct MatchCandidate {
1172    length: usize,
1173    distance: usize,
1174    score: isize,
1175    cost: usize,
1176}
1177
1178fn best_match(
1179    input: &[u8],
1180    pos: usize,
1181    end: usize,
1182    buckets: &[Vec<usize>],
1183    options: EncodeOptions,
1184    state: &EncoderMatchState,
1185    distance_size: usize,
1186) -> Option<MatchCandidate> {
1187    let max_distance = pos.min(options.max_match_distance);
1188    let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
1189    if options.max_match_candidates == 0
1190        || max_distance == 0
1191        || max_length < 4
1192        || pos + 2 >= input.len()
1193    {
1194        return None;
1195    }
1196    let bucket = &buckets[match_hash(input, pos)];
1197    let mut best = None;
1198    let mut checked = 0usize;
1199    for distance in state.reps {
1200        if distance == 0 || distance > max_distance {
1201            continue;
1202        }
1203        let length = match_length(input, pos, distance, max_length);
1204        consider_match_candidate(&mut best, state, distance_size, length, distance);
1205    }
1206    for &candidate in bucket.iter().rev() {
1207        if candidate >= pos {
1208            continue;
1209        }
1210        let distance = pos - candidate;
1211        if distance > max_distance {
1212            break;
1213        }
1214        checked += 1;
1215        let length = match_length(input, pos, distance, max_length);
1216        consider_match_candidate(&mut best, state, distance_size, length, distance);
1217        if let Some(best) = best {
1218            if best.length == max_length {
1219                break;
1220            }
1221        }
1222        if checked >= options.max_match_candidates {
1223            break;
1224        }
1225    }
1226    best
1227}
1228
1229fn match_length(input: &[u8], pos: usize, distance: usize, max_length: usize) -> usize {
1230    let mut length = 0usize;
1231    while length < max_length && input[pos + length] == input[pos + length - distance] {
1232        length += 1;
1233    }
1234    length
1235}
1236
1237fn consider_match_candidate(
1238    best: &mut Option<MatchCandidate>,
1239    state: &EncoderMatchState,
1240    distance_size: usize,
1241    length: usize,
1242    distance: usize,
1243) {
1244    if length < 4 {
1245        return;
1246    }
1247    let Ok(cost) = estimated_match_cost(state, length, distance, distance_size) else {
1248        return;
1249    };
1250    let candidate = MatchCandidate {
1251        length,
1252        distance,
1253        score: (length as isize * 16) - cost as isize,
1254        cost,
1255    };
1256    if best.is_none_or(|best| {
1257        candidate.score > best.score
1258            || (candidate.score == best.score
1259                && (candidate.length > best.length
1260                    || (candidate.length == best.length && candidate.cost < best.cost)
1261                    || (candidate.length == best.length
1262                        && candidate.cost == best.cost
1263                        && candidate.distance < best.distance)))
1264    }) {
1265        *best = Some(candidate);
1266    }
1267}
1268
1269fn estimated_match_cost(
1270    state: &EncoderMatchState,
1271    length: usize,
1272    distance: usize,
1273    distance_size: usize,
1274) -> Result<usize> {
1275    if distance == state.reps[0] && length == state.last_length && state.last_length != 0 {
1276        return Ok(2);
1277    }
1278    if state
1279        .reps
1280        .iter()
1281        .any(|&repeat_distance| repeat_distance == distance && repeat_distance != 0)
1282    {
1283        let (length_slot, _) = length_slot_for_match(length)?;
1284        return Ok(5 + usize::from(length_slot_extra_bits(length_slot)?));
1285    }
1286
1287    let (distance_slot, _) = distance_slot_for_match(distance, distance_size)?;
1288    let encoded_length = length
1289        .checked_sub(length_bonus(distance))
1290        .ok_or(Error::InvalidData("RAR 5 adjusted match length underflows"))?;
1291    let (length_slot, _) = length_slot_for_match(encoded_length)?;
1292    Ok(10
1293        + usize::from(length_slot_extra_bits(length_slot)?)
1294        + distance_slot_bit_count(distance_slot)?)
1295}
1296
1297fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
1298    if pos + 2 < input.len() {
1299        buckets[match_hash(input, pos)].push(pos);
1300    }
1301}
1302
1303fn match_hash(input: &[u8], pos: usize) -> usize {
1304    let value =
1305        ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1306    value & (MATCH_HASH_BUCKETS - 1)
1307}
1308
1309fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
1310    if length < 2 {
1311        return Err(Error::InvalidData("RAR 5 match length is too short"));
1312    }
1313    for slot in 0..LENGTH_TABLE_SIZE {
1314        let bit_count = usize::from(length_slot_extra_bits(slot)?);
1315        let base = slot_to_length(slot, 0)?;
1316        let max = base
1317            + if bit_count == 0 {
1318                0
1319            } else {
1320                (1usize << bit_count) - 1
1321            };
1322        if length >= base && length <= max {
1323            return Ok((slot, length - base));
1324        }
1325    }
1326    Err(Error::InvalidData("RAR 5 match length is too long"))
1327}
1328
1329fn distance_slot_for_match(distance: usize, distance_size: usize) -> Result<(usize, usize)> {
1330    if distance == 0 {
1331        return Err(Error::InvalidData("RAR 5 match distance is zero"));
1332    }
1333    for slot in 0..distance_size {
1334        let bit_count = distance_slot_bit_count(slot)?;
1335        let base = slot_to_distance(slot, 0)?;
1336        let max = base
1337            + if bit_count == 0 {
1338                0
1339            } else {
1340                (1usize << bit_count) - 1
1341            };
1342        if distance >= base && distance <= max {
1343            return Ok((slot, distance - base));
1344        }
1345    }
1346    Err(Error::InvalidData("RAR 5 match distance is too large"))
1347}
1348
1349fn literal_presence(data: &[u8]) -> [bool; 256] {
1350    let mut present = [false; 256];
1351    for &byte in data {
1352        present[byte as usize] = true;
1353    }
1354    present
1355}
1356
1357#[derive(Debug, Clone)]
1358pub struct Unpack50Decoder {
1359    tables: Option<DecodeTables>,
1360    reps: [usize; 4],
1361    last_length: usize,
1362    history: Vec<u8>,
1363}
1364
1365impl Unpack50Decoder {
1366    pub fn new() -> Self {
1367        Self {
1368            tables: None,
1369            reps: [0; 4],
1370            last_length: 0,
1371            history: Vec::new(),
1372        }
1373    }
1374
1375    pub fn decode_member(
1376        &mut self,
1377        input: &[u8],
1378        algorithm_version: u8,
1379        output_size: usize,
1380        solid: bool,
1381        mode: DecodeMode,
1382    ) -> Result<Vec<u8>> {
1383        self.decode_member_with_dictionary(
1384            input,
1385            algorithm_version,
1386            output_size,
1387            DEFAULT_DICTIONARY_SIZE,
1388            solid,
1389            mode,
1390        )
1391    }
1392
1393    pub fn decode_member_with_dictionary(
1394        &mut self,
1395        input: &[u8],
1396        algorithm_version: u8,
1397        output_size: usize,
1398        dictionary_size: usize,
1399        solid: bool,
1400        mode: DecodeMode,
1401    ) -> Result<Vec<u8>> {
1402        let mut input = std::io::Cursor::new(input);
1403        self.decode_member_from_reader_with_dictionary(
1404            &mut input,
1405            algorithm_version,
1406            output_size,
1407            dictionary_size,
1408            solid,
1409            mode,
1410        )
1411    }
1412
1413    pub fn decode_member_from_reader(
1414        &mut self,
1415        input: &mut impl Read,
1416        algorithm_version: u8,
1417        output_size: usize,
1418        solid: bool,
1419        mode: DecodeMode,
1420    ) -> Result<Vec<u8>> {
1421        self.decode_member_from_reader_with_dictionary(
1422            input,
1423            algorithm_version,
1424            output_size,
1425            DEFAULT_DICTIONARY_SIZE,
1426            solid,
1427            mode,
1428        )
1429    }
1430
1431    pub fn decode_member_from_reader_with_dictionary(
1432        &mut self,
1433        input: &mut impl Read,
1434        algorithm_version: u8,
1435        output_size: usize,
1436        dictionary_size: usize,
1437        solid: bool,
1438        mode: DecodeMode,
1439    ) -> Result<Vec<u8>> {
1440        if dictionary_size == 0 {
1441            return Err(Error::InvalidData("RAR 5 dictionary size is zero"));
1442        }
1443        if !solid {
1444            self.reset();
1445        }
1446
1447        let mut output = Vec::with_capacity(output_size.min(MAX_INITIAL_OUTPUT_CAPACITY));
1448        let mut filters = Vec::new();
1449
1450        loop {
1451            let block = read_compressed_block(input)?;
1452            let payload = block.payload.as_slice();
1453            let mut payload_bit_pos = 0;
1454            if block.header.has_tables {
1455                let (lengths, table_bits) = read_table_lengths(payload, algorithm_version)?;
1456                self.tables = Some(DecodeTables::from_lengths(&lengths)?);
1457                payload_bit_pos = table_bits;
1458            }
1459            let tables = self
1460                .tables
1461                .take()
1462                .ok_or(Error::InvalidData("RAR 5 block reuses missing tables"))?;
1463            let mut bits = BitReader::new(payload);
1464            bits.bit_pos = payload_bit_pos;
1465
1466            while bits.bit_pos < block.header.payload_bits && output.len() < output_size {
1467                let symbol = tables.main.decode(&mut bits)?;
1468                match symbol {
1469                    0..=255 => output.push(symbol as u8),
1470                    256 if mode.uses_lz() => {
1471                        filters.push(read_filter(&mut bits, output.len())?);
1472                    }
1473                    257 if mode.uses_lz() => {
1474                        if self.last_length != 0 {
1475                            self.copy_match(
1476                                &mut output,
1477                                self.reps[0],
1478                                self.last_length,
1479                                output_size,
1480                                dictionary_size,
1481                            )?;
1482                        }
1483                    }
1484                    258..=261 if mode.uses_lz() => {
1485                        let rep_index = symbol - 258;
1486                        let distance = self.reps[rep_index];
1487                        if distance == 0 {
1488                            return Err(Error::InvalidData(
1489                                "RAR 5 repeat distance is not initialized",
1490                            ));
1491                        }
1492                        let length_slot = tables.length.decode(&mut bits)?;
1493                        let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1494                        let length = slot_to_length(length_slot, length_extra)?;
1495                        self.reps[..=rep_index].rotate_right(1);
1496                        self.reps[0] = distance;
1497                        self.last_length = length;
1498                        self.copy_match(
1499                            &mut output,
1500                            distance,
1501                            length,
1502                            output_size,
1503                            dictionary_size,
1504                        )?;
1505                    }
1506                    262.. if mode.uses_lz() => {
1507                        let length_slot = symbol - 262;
1508                        let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1509                        let mut length = slot_to_length(length_slot, length_extra)?;
1510                        let distance_slot = tables.distance.decode(&mut bits)?;
1511                        let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1512                        let distance_extra = if distance_bit_count >= 4 && tables.align_mode {
1513                            let high = bits.read_bits((distance_bit_count - 4) as u8)?;
1514                            let low = tables.align.decode(&mut bits)? as u32;
1515                            (high << 4) | low
1516                        } else {
1517                            bits.read_bits(distance_bit_count as u8)?
1518                        };
1519                        let distance = slot_to_distance(distance_slot, distance_extra)?;
1520                        length += length_bonus(distance);
1521                        self.reps.rotate_right(1);
1522                        self.reps[0] = distance;
1523                        self.last_length = length;
1524                        self.copy_match(
1525                            &mut output,
1526                            distance,
1527                            length,
1528                            output_size,
1529                            dictionary_size,
1530                        )?;
1531                    }
1532                    _ if mode == DecodeMode::LiteralOnly => {
1533                        return Err(Error::InvalidData(
1534                            "RAR 5 literal-only decoder encountered non-literal symbol",
1535                        ));
1536                    }
1537                    _ => {
1538                        return Err(Error::InvalidData(
1539                            "RAR 5 decoder encountered unsupported control symbol",
1540                        ));
1541                    }
1542                }
1543            }
1544
1545            self.tables = Some(tables);
1546            if block.header.is_last || output.len() >= output_size {
1547                break;
1548            }
1549        }
1550
1551        if output.len() == output_size {
1552            let history_output = if mode.applies_filters() && !filters.is_empty() {
1553                Some(output.clone())
1554            } else {
1555                None
1556            };
1557            if mode.applies_filters() {
1558                apply_filters(&mut output, &filters)?;
1559            }
1560            self.history
1561                .extend_from_slice(history_output.as_deref().unwrap_or(&output));
1562            if self.history.len() > dictionary_size {
1563                let discard = self.history.len() - dictionary_size;
1564                self.history.drain(..discard);
1565            }
1566            Ok(output)
1567        } else {
1568            Err(Error::NeedMoreInput)
1569        }
1570    }
1571
1572    pub fn decode_member_from_reader_with_dictionary_to_sink<E>(
1573        &mut self,
1574        input: &mut impl Read,
1575        algorithm_version: u8,
1576        output_size: usize,
1577        dictionary_size: usize,
1578        solid: bool,
1579        mut sink: impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1580    ) -> std::result::Result<(), StreamDecodeError<E>> {
1581        if dictionary_size == 0 {
1582            return Err(Error::InvalidData("RAR 5 dictionary size is zero").into());
1583        }
1584        if !solid {
1585            self.reset();
1586        }
1587
1588        let history_limit = dictionary_size.min(STREAM_HISTORY_LIMIT);
1589        if self.history.len() > history_limit {
1590            let discard = self.history.len() - history_limit;
1591            self.history.drain(..discard);
1592        }
1593        let mut output = StreamingOutput::new(
1594            std::mem::take(&mut self.history),
1595            output_size,
1596            dictionary_size,
1597            history_limit,
1598        );
1599
1600        loop {
1601            let block = read_compressed_block(input)?;
1602            let payload = block.payload.as_slice();
1603            let mut payload_bit_pos = 0;
1604            if block.header.has_tables {
1605                let (lengths, table_bits) = read_table_lengths(payload, algorithm_version)?;
1606                self.tables = Some(DecodeTables::from_lengths(&lengths)?);
1607                payload_bit_pos = table_bits;
1608            }
1609            let tables = self
1610                .tables
1611                .take()
1612                .ok_or(Error::InvalidData("RAR 5 block reuses missing tables"))?;
1613            let mut bits = BitReader::new(payload);
1614            bits.bit_pos = payload_bit_pos;
1615
1616            while bits.bit_pos < block.header.payload_bits && output.written() < output_size {
1617                let symbol = tables.main.decode(&mut bits)?;
1618                match symbol {
1619                    0..=255 => output.push(symbol as u8, &mut sink)?,
1620                    256 => {
1621                        return Err(StreamDecodeError::FilteredMember);
1622                    }
1623                    257 => {
1624                        if self.last_length != 0 {
1625                            output.copy_match(self.reps[0], self.last_length, &mut sink)?;
1626                        }
1627                    }
1628                    258..=261 => {
1629                        let rep_index = symbol - 258;
1630                        let distance = self.reps[rep_index];
1631                        if distance == 0 {
1632                            return Err(Error::InvalidData(
1633                                "RAR 5 repeat distance is not initialized",
1634                            )
1635                            .into());
1636                        }
1637                        let length_slot = tables.length.decode(&mut bits)?;
1638                        let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1639                        let length = slot_to_length(length_slot, length_extra)?;
1640                        self.reps[..=rep_index].rotate_right(1);
1641                        self.reps[0] = distance;
1642                        self.last_length = length;
1643                        output.copy_match(distance, length, &mut sink)?;
1644                    }
1645                    262.. => {
1646                        let length_slot = symbol - 262;
1647                        let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1648                        let mut length = slot_to_length(length_slot, length_extra)?;
1649                        let distance_slot = tables.distance.decode(&mut bits)?;
1650                        let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1651                        let distance_extra = if distance_bit_count >= 4 && tables.align_mode {
1652                            let high = bits.read_bits((distance_bit_count - 4) as u8)?;
1653                            let low = tables.align.decode(&mut bits)? as u32;
1654                            (high << 4) | low
1655                        } else {
1656                            bits.read_bits(distance_bit_count as u8)?
1657                        };
1658                        let distance = slot_to_distance(distance_slot, distance_extra)?;
1659                        length += length_bonus(distance);
1660                        self.reps.rotate_right(1);
1661                        self.reps[0] = distance;
1662                        self.last_length = length;
1663                        output.copy_match(distance, length, &mut sink)?;
1664                    }
1665                }
1666            }
1667
1668            self.tables = Some(tables);
1669            if block.header.is_last || output.written() >= output_size {
1670                break;
1671            }
1672        }
1673
1674        if output.written() == output_size {
1675            output.finish(&mut sink)?;
1676            self.history = output.into_history();
1677            Ok(())
1678        } else {
1679            Err(Error::NeedMoreInput.into())
1680        }
1681    }
1682
1683    fn reset(&mut self) {
1684        self.tables = None;
1685        self.reps = [0; 4];
1686        self.last_length = 0;
1687        self.history.clear();
1688    }
1689
1690    fn copy_match(
1691        &self,
1692        output: &mut Vec<u8>,
1693        distance: usize,
1694        length: usize,
1695        output_limit: usize,
1696        dictionary_size: usize,
1697    ) -> Result<()> {
1698        if distance > dictionary_size {
1699            return Err(Error::InvalidData(
1700                "RAR 5 match distance exceeds dictionary",
1701            ));
1702        }
1703        if distance == 0 || distance > self.history.len() + output.len() {
1704            return Err(Error::InvalidData("RAR 5 match distance exceeds window"));
1705        }
1706        if output
1707            .len()
1708            .checked_add(length)
1709            .is_none_or(|end| end > output_limit)
1710        {
1711            return Err(Error::InvalidData("RAR 5 match exceeds output limit"));
1712        }
1713        for _ in 0..length {
1714            if distance <= output.len() {
1715                let index = output.len() - distance;
1716                output.push(output[index]);
1717            } else {
1718                let history_distance = distance - output.len();
1719                let index = self.history.len() - history_distance;
1720                output.push(self.history[index]);
1721            }
1722        }
1723        Ok(())
1724    }
1725}
1726
1727struct StreamingOutput {
1728    history: VecDeque<u8>,
1729    pending: Vec<u8>,
1730    written: usize,
1731    output_limit: usize,
1732    dictionary_size: usize,
1733    history_limit: usize,
1734    all_zero: bool,
1735}
1736
1737impl StreamingOutput {
1738    fn new(
1739        history: Vec<u8>,
1740        output_limit: usize,
1741        dictionary_size: usize,
1742        history_limit: usize,
1743    ) -> Self {
1744        Self {
1745            all_zero: history.iter().all(|&byte| byte == 0),
1746            history: history.into(),
1747            pending: Vec::with_capacity(STREAM_FLUSH_THRESHOLD),
1748            written: 0,
1749            output_limit,
1750            dictionary_size,
1751            history_limit,
1752        }
1753    }
1754
1755    fn written(&self) -> usize {
1756        self.written
1757    }
1758
1759    fn push<E>(
1760        &mut self,
1761        byte: u8,
1762        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1763    ) -> std::result::Result<(), StreamDecodeError<E>> {
1764        if self.written >= self.output_limit {
1765            return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1766        }
1767        if byte != 0 {
1768            self.all_zero = false;
1769        }
1770        self.pending.push(byte);
1771        self.written += 1;
1772        if self.pending.len() >= STREAM_FLUSH_THRESHOLD {
1773            self.flush(sink)?;
1774        }
1775        Ok(())
1776    }
1777
1778    fn push_repeated<E>(
1779        &mut self,
1780        byte: u8,
1781        mut count: usize,
1782        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1783    ) -> std::result::Result<(), StreamDecodeError<E>> {
1784        if self
1785            .written
1786            .checked_add(count)
1787            .is_none_or(|end| end > self.output_limit)
1788        {
1789            return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1790        }
1791        if byte != 0 {
1792            self.all_zero = false;
1793        }
1794        while count > 0 {
1795            let available = STREAM_FLUSH_THRESHOLD - self.pending.len();
1796            let take = count.min(available.max(1));
1797            let old_len = self.pending.len();
1798            self.pending.resize(old_len + take, byte);
1799            self.written += take;
1800            count -= take;
1801            if self.pending.len() >= STREAM_FLUSH_THRESHOLD {
1802                self.flush(sink)?;
1803            }
1804        }
1805        Ok(())
1806    }
1807
1808    fn push_zeroes<E>(
1809        &mut self,
1810        count: usize,
1811        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1812    ) -> std::result::Result<(), StreamDecodeError<E>> {
1813        if self
1814            .written
1815            .checked_add(count)
1816            .is_none_or(|end| end > self.output_limit)
1817        {
1818            return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1819        }
1820        self.flush(sink)?;
1821        sink(DecodedChunk::Repeated {
1822            byte: 0,
1823            len: count,
1824        })
1825        .map_err(StreamDecodeError::Sink)?;
1826        self.written += count;
1827        if self.history.is_empty() && self.history_limit != 0 {
1828            self.history.push_back(0);
1829        }
1830        Ok(())
1831    }
1832
1833    fn copy_match<E>(
1834        &mut self,
1835        distance: usize,
1836        length: usize,
1837        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1838    ) -> std::result::Result<(), StreamDecodeError<E>> {
1839        if distance > self.dictionary_size {
1840            return Err(Error::InvalidData("RAR 5 match distance exceeds dictionary").into());
1841        }
1842        if self.all_zero && distance <= self.written + self.history.len() {
1843            return self.push_zeroes(length, sink);
1844        }
1845        if distance == 0 || distance > self.history.len() + self.pending.len() {
1846            return Err(Error::InvalidData("RAR 5 match distance exceeds window").into());
1847        }
1848        if self
1849            .written
1850            .checked_add(length)
1851            .is_none_or(|end| end > self.output_limit)
1852        {
1853            return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1854        }
1855        if distance == 1 {
1856            let byte = self.byte_at_distance(1)?;
1857            return self.push_repeated(byte, length, sink);
1858        }
1859        for _ in 0..length {
1860            let byte = self.byte_at_distance(distance)?;
1861            self.push(byte, sink)?;
1862        }
1863        Ok(())
1864    }
1865
1866    fn byte_at_distance(&self, distance: usize) -> Result<u8> {
1867        if distance <= self.pending.len() {
1868            Ok(self.pending[self.pending.len() - distance])
1869        } else {
1870            let history_distance = distance - self.pending.len();
1871            if history_distance > self.history.len() {
1872                return Err(Error::InvalidData("RAR 5 match distance exceeds window"));
1873            }
1874            Ok(*self
1875                .history
1876                .get(self.history.len() - history_distance)
1877                .ok_or(Error::InvalidData("RAR 5 match distance exceeds window"))?)
1878        }
1879    }
1880
1881    fn flush<E>(
1882        &mut self,
1883        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1884    ) -> std::result::Result<(), StreamDecodeError<E>> {
1885        if self.pending.is_empty() {
1886            return Ok(());
1887        }
1888        sink(DecodedChunk::Bytes(&self.pending)).map_err(StreamDecodeError::Sink)?;
1889        self.history.extend(self.pending.iter().copied());
1890        self.pending.clear();
1891        while self.history.len() > self.history_limit {
1892            self.history.pop_front();
1893        }
1894        Ok(())
1895    }
1896
1897    fn finish<E>(
1898        &mut self,
1899        sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1900    ) -> std::result::Result<(), StreamDecodeError<E>> {
1901        self.flush(sink)
1902    }
1903
1904    fn into_history(self) -> Vec<u8> {
1905        self.history.into()
1906    }
1907}
1908
1909fn read_compressed_block(input: &mut impl Read) -> Result<OwnedCompressedBlock> {
1910    let mut fixed = [0u8; 2];
1911    input
1912        .read_exact(&mut fixed)
1913        .map_err(|_| Error::NeedMoreInput)?;
1914    let flags = fixed[0];
1915    let checksum = fixed[1];
1916    let size_bytes_len = match (flags >> 3) & 0x03 {
1917        0 => 1,
1918        1 => 2,
1919        2 => 3,
1920        _ => return Err(Error::InvalidData("RAR 5 block size length is invalid")),
1921    };
1922    let mut size_bytes = [0u8; 3];
1923    input
1924        .read_exact(&mut size_bytes[..size_bytes_len])
1925        .map_err(|_| Error::NeedMoreInput)?;
1926
1927    let actual = size_bytes[..size_bytes_len]
1928        .iter()
1929        .fold(checksum ^ flags, |acc, &byte| acc ^ byte);
1930    if actual != 0x5a {
1931        return Err(Error::InvalidData("RAR 5 block header checksum mismatch"));
1932    }
1933
1934    let payload_size = size_bytes[..size_bytes_len]
1935        .iter()
1936        .enumerate()
1937        .fold(0usize, |acc, (index, &byte)| {
1938            acc | (usize::from(byte) << (index * 8))
1939        });
1940    let mut payload = vec![0; payload_size];
1941    input
1942        .read_exact(&mut payload)
1943        .map_err(|_| Error::NeedMoreInput)?;
1944    let final_byte_bits = ((flags & 0x07) + 1).min(8);
1945    let payload_bits = if payload_size == 0 {
1946        0
1947    } else {
1948        (payload_size - 1) * 8 + usize::from(final_byte_bits)
1949    };
1950
1951    Ok(OwnedCompressedBlock {
1952        header: CompressedBlockHeader {
1953            flags,
1954            is_last: flags & 0x40 != 0,
1955            has_tables: flags & 0x80 != 0,
1956            final_byte_bits,
1957            payload_size,
1958            payload_bits,
1959        },
1960        payload,
1961    })
1962}
1963
1964impl Default for Unpack50Decoder {
1965    fn default() -> Self {
1966        Self::new()
1967    }
1968}
1969
1970#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1971struct PendingFilter {
1972    start: usize,
1973    length: usize,
1974    filter_type: FilterType,
1975    channels: usize,
1976}
1977
1978#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1979enum FilterType {
1980    Delta,
1981    E8,
1982    E8E9,
1983    Arm,
1984}
1985
1986fn read_filter(bits: &mut BitReader<'_>, current_pos: usize) -> Result<PendingFilter> {
1987    let offset = read_filter_data(bits)? as usize;
1988    let length = read_filter_data(bits)? as usize;
1989    let filter_type = match bits.read_bits(3)? {
1990        0 => FilterType::Delta,
1991        1 => FilterType::E8,
1992        2 => FilterType::E8E9,
1993        3 => FilterType::Arm,
1994        _ => return Err(Error::InvalidData("RAR 5 filter type is unsupported")),
1995    };
1996    let channels = if filter_type == FilterType::Delta {
1997        bits.read_bits(5)? as usize + 1
1998    } else {
1999        0
2000    };
2001    Ok(PendingFilter {
2002        start: current_pos
2003            .checked_add(offset)
2004            .ok_or(Error::InvalidData("RAR 5 filter start overflows"))?,
2005        length,
2006        filter_type,
2007        channels,
2008    })
2009}
2010
2011fn read_filter_data(bits: &mut BitReader<'_>) -> Result<u32> {
2012    let byte_count = bits.read_bits(2)? as usize + 1;
2013    let mut data = 0;
2014    for index in 0..byte_count {
2015        data |= bits.read_bits(8)? << (index * 8);
2016    }
2017    Ok(data)
2018}
2019
2020fn write_filter(writer: &mut BitWriter, filter: EncodeFilter) -> Result<()> {
2021    if filter.offset > u32::MAX as usize {
2022        return Err(Error::InvalidData("RAR 5 filter offset is too large"));
2023    }
2024    if filter.length > u32::MAX as usize {
2025        return Err(Error::InvalidData("RAR 5 filter length is too large"));
2026    }
2027    write_filter_data(writer, filter.offset as u32);
2028    write_filter_data(writer, filter.length as u32);
2029    match filter.filter_type {
2030        FilterType::Delta => {
2031            if filter.channels == 0 || filter.channels > 32 {
2032                return Err(Error::InvalidData(
2033                    "RAR 5 DELTA filter channel count is invalid",
2034                ));
2035            }
2036            writer.write_bits(0, 3);
2037            writer.write_bits(filter.channels - 1, 5);
2038        }
2039        FilterType::E8 => writer.write_bits(1, 3),
2040        FilterType::E8E9 => writer.write_bits(2, 3),
2041        FilterType::Arm => writer.write_bits(3, 3),
2042    }
2043    Ok(())
2044}
2045
2046fn write_filter_data(writer: &mut BitWriter, value: u32) {
2047    let byte_count = if value <= 0xff {
2048        1
2049    } else if value <= 0xffff {
2050        2
2051    } else if value <= 0x00ff_ffff {
2052        3
2053    } else {
2054        4
2055    };
2056    writer.write_bits(byte_count - 1, 2);
2057    for index in 0..byte_count {
2058        writer.write_bits(((value >> (index * 8)) & 0xff) as usize, 8);
2059    }
2060}
2061
2062fn apply_filters(output: &mut [u8], filters: &[PendingFilter]) -> Result<()> {
2063    for filter in filters {
2064        let end = filter
2065            .start
2066            .checked_add(filter.length)
2067            .ok_or(Error::InvalidData("RAR 5 filter range overflows"))?;
2068        let data = output
2069            .get_mut(filter.start..end)
2070            .ok_or(Error::InvalidData("RAR 5 filter range exceeds output"))?;
2071        match filter.filter_type {
2072            FilterType::Delta => {
2073                let decoded = filters::delta_decode(data, filter.channels, rar50_delta_messages())?;
2074                data.copy_from_slice(&decoded);
2075            }
2076            FilterType::E8 => filters::e8e9_decode(data, filter.start as u32, false),
2077            FilterType::E8E9 => filters::e8e9_decode(data, filter.start as u32, true),
2078            FilterType::Arm => arm_decode(data, filter.start as u32),
2079        }
2080    }
2081    Ok(())
2082}
2083
2084fn rar50_delta_messages() -> DeltaErrorMessages {
2085    DeltaErrorMessages {
2086        invalid_channels: "RAR 5 DELTA filter channel count is invalid",
2087        zero_channels: "RAR 5 DELTA filter has zero channels",
2088        truncated_source: "RAR 5 DELTA filter source is truncated",
2089    }
2090}
2091
2092fn arm_decode(data: &mut [u8], file_offset: u32) {
2093    let mut pos = 0usize;
2094    while pos + 3 < data.len() {
2095        if data[pos + 3] == 0xeb {
2096            let mut offset = u32::from(data[pos])
2097                | (u32::from(data[pos + 1]) << 8)
2098                | (u32::from(data[pos + 2]) << 16);
2099            offset = offset.wrapping_sub(file_offset.wrapping_add(pos as u32) / 4);
2100            data[pos] = offset as u8;
2101            data[pos + 1] = (offset >> 8) as u8;
2102            data[pos + 2] = (offset >> 16) as u8;
2103        }
2104        pos += 4;
2105    }
2106}
2107
2108fn arm_encode(data: &mut [u8], file_offset: u32) {
2109    let mut pos = 0usize;
2110    while pos + 3 < data.len() {
2111        if data[pos + 3] == 0xeb {
2112            let mut offset = u32::from(data[pos])
2113                | (u32::from(data[pos + 1]) << 8)
2114                | (u32::from(data[pos + 2]) << 16);
2115            offset = offset.wrapping_add(file_offset.wrapping_add(pos as u32) / 4);
2116            data[pos] = offset as u8;
2117            data[pos + 1] = (offset >> 8) as u8;
2118            data[pos + 2] = (offset >> 16) as u8;
2119        }
2120        pos += 4;
2121    }
2122}
2123
2124fn length_slot_extra_bits(slot: usize) -> Result<u8> {
2125    if slot < 8 {
2126        Ok(0)
2127    } else {
2128        let bit_count = (slot >> 2) - 1;
2129        if bit_count > 24 {
2130            Err(Error::InvalidData("RAR 5 length slot is too large"))
2131        } else {
2132            Ok(bit_count as u8)
2133        }
2134    }
2135}
2136
2137fn length_bonus(distance: usize) -> usize {
2138    usize::from(distance > 0x100) + usize::from(distance > 0x2000) + usize::from(distance > 0x40000)
2139}
2140
2141pub fn slot_to_length(slot: usize, extra_bits: u32) -> Result<usize> {
2142    if slot < 8 {
2143        return Ok(slot + 2);
2144    }
2145    let bit_count = (slot >> 2) - 1;
2146    if bit_count > 24 {
2147        return Err(Error::InvalidData("RAR 5 length slot is too large"));
2148    }
2149    let max_extra = if bit_count == 32 {
2150        u32::MAX
2151    } else {
2152        (1u32 << bit_count) - 1
2153    };
2154    if extra_bits > max_extra {
2155        return Err(Error::InvalidData("RAR 5 length extra bits exceed slot"));
2156    }
2157    Ok((((4 | (slot & 3)) << bit_count) | extra_bits as usize) + 2)
2158}
2159
2160pub fn distance_slot_bit_count(slot: usize) -> Result<usize> {
2161    if slot < 4 {
2162        Ok(0)
2163    } else {
2164        let bit_count = (slot - 2) >> 1;
2165        if bit_count > 31 {
2166            Err(Error::InvalidData("RAR 5 distance slot is too large"))
2167        } else {
2168            Ok(bit_count)
2169        }
2170    }
2171}
2172
2173pub fn slot_to_distance(slot: usize, extra_bits: u32) -> Result<usize> {
2174    if slot < 4 {
2175        return Ok(slot + 1);
2176    }
2177    let bit_count = distance_slot_bit_count(slot)?;
2178    let max_extra = if bit_count == 32 {
2179        u32::MAX
2180    } else {
2181        (1u32 << bit_count) - 1
2182    };
2183    if extra_bits > max_extra {
2184        return Err(Error::InvalidData("RAR 5 distance extra bits exceed slot"));
2185    }
2186    Ok((((2 | (slot & 1)) << bit_count) | extra_bits as usize) + 1)
2187}
2188
2189#[derive(Debug, Clone)]
2190pub struct HuffmanTable {
2191    symbols: Vec<HuffmanSymbol>,
2192    first_code: [u16; 16],
2193    first_index: [usize; 16],
2194    counts: [u16; 16],
2195}
2196
2197#[derive(Debug, Clone)]
2198struct HuffmanSymbol {
2199    code: u16,
2200    len: u8,
2201    symbol: usize,
2202}
2203
2204impl HuffmanTable {
2205    pub fn from_lengths(lengths: &[u8]) -> Result<Self> {
2206        let mut count = [0u16; 16];
2207        for &length in lengths {
2208            if length > 15 {
2209                return Err(Error::InvalidData("RAR 5 Huffman length is too large"));
2210            }
2211            if length != 0 {
2212                count[length as usize] += 1;
2213            }
2214        }
2215        validate_huffman_counts(&count)?;
2216
2217        let mut first_code = [0u16; 16];
2218        let mut next_code = [0u16; 16];
2219        let mut code = 0u16;
2220        for length in 1..=15 {
2221            code = (code + count[length - 1]) << 1;
2222            first_code[length] = code;
2223            next_code[length] = code;
2224        }
2225
2226        let mut first_index = [0usize; 16];
2227        let mut index = 0usize;
2228        for length in 1..=15 {
2229            first_index[length] = index;
2230            index += usize::from(count[length]);
2231        }
2232
2233        let mut symbols = Vec::new();
2234        for (symbol, &length) in lengths.iter().enumerate() {
2235            if length == 0 {
2236                continue;
2237            }
2238            let code = next_code[length as usize];
2239            next_code[length as usize] += 1;
2240            symbols.push(HuffmanSymbol {
2241                code,
2242                len: length,
2243                symbol,
2244            });
2245        }
2246        symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
2247        Ok(Self {
2248            symbols,
2249            first_code,
2250            first_index,
2251            counts: count,
2252        })
2253    }
2254
2255    pub fn is_empty(&self) -> bool {
2256        self.symbols.is_empty()
2257    }
2258
2259    fn decode(&self, bits: &mut BitReader<'_>) -> Result<usize> {
2260        if self.symbols.is_empty() {
2261            return Err(Error::InvalidData("RAR 5 empty Huffman table"));
2262        }
2263        let mut code = 0u16;
2264        for len in 1..=15 {
2265            code = (code << 1) | bits.read_bits(1)? as u16;
2266            let count = self.counts[len];
2267            if count != 0 {
2268                let first = self.first_code[len];
2269                let offset = code.wrapping_sub(first);
2270                if offset < count {
2271                    let index = self.first_index[len] + usize::from(offset);
2272                    return Ok(self.symbols[index].symbol);
2273                }
2274            }
2275        }
2276        Err(Error::InvalidData("RAR 5 invalid Huffman code"))
2277    }
2278
2279    fn code_for_symbol(&self, symbol: usize) -> Result<(u16, u8)> {
2280        self.symbols
2281            .iter()
2282            .find(|item| item.symbol == symbol)
2283            .map(|item| (item.code, item.len))
2284            .ok_or(Error::InvalidData("RAR 5 missing Huffman symbol"))
2285    }
2286}
2287
2288struct BitReader<'a> {
2289    input: &'a [u8],
2290    bit_pos: usize,
2291}
2292
2293impl<'a> BitReader<'a> {
2294    fn new(input: &'a [u8]) -> Self {
2295        Self { input, bit_pos: 0 }
2296    }
2297
2298    fn read_bits(&mut self, count: u8) -> Result<u32> {
2299        if count > 32 {
2300            return Err(Error::InvalidData("RAR 5 bit read is too wide"));
2301        }
2302        let end = self
2303            .bit_pos
2304            .checked_add(usize::from(count))
2305            .ok_or(Error::NeedMoreInput)?;
2306        if end > self.input.len() * 8 {
2307            return Err(Error::NeedMoreInput);
2308        }
2309
2310        let mut value = 0u32;
2311        let mut remaining = usize::from(count);
2312        while remaining != 0 {
2313            let byte = self.input[self.bit_pos / 8];
2314            let bit_offset = self.bit_pos % 8;
2315            let available = 8 - bit_offset;
2316            let take = available.min(remaining);
2317            let shift = available - take;
2318            let mask = ((1u16 << take) - 1) as u8;
2319            let chunk = (byte >> shift) & mask;
2320            value = (value << take) | u32::from(chunk);
2321            self.bit_pos += take;
2322            remaining -= take;
2323        }
2324
2325        Ok(value)
2326    }
2327}
2328
2329struct BitWriter {
2330    bytes: Vec<u8>,
2331    bit_pos: usize,
2332}
2333
2334impl BitWriter {
2335    fn new() -> Self {
2336        Self {
2337            bytes: Vec::new(),
2338            bit_pos: 0,
2339        }
2340    }
2341
2342    fn write_bits(&mut self, value: usize, count: usize) {
2343        for bit in (0..count).rev() {
2344            if self.bit_pos.is_multiple_of(8) {
2345                self.bytes.push(0);
2346            }
2347            if (value >> bit) & 1 != 0 {
2348                let byte = self.bytes.last_mut().unwrap();
2349                *byte |= 1 << (7 - (self.bit_pos % 8));
2350            }
2351            self.bit_pos += 1;
2352        }
2353    }
2354
2355    fn finish(self) -> Vec<u8> {
2356        self.bytes
2357    }
2358}
2359
2360fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
2361    let mut available = 1i32;
2362    for &len_count in count.iter().skip(1) {
2363        available = (available << 1) - i32::from(len_count);
2364        if available < 0 {
2365            return Err(Error::InvalidData("RAR 5 oversubscribed Huffman table"));
2366        }
2367    }
2368    Ok(())
2369}
2370
2371#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2372struct LevelToken {
2373    symbol: usize,
2374    extra_bits: u8,
2375    extra_value: u8,
2376}
2377
2378impl LevelToken {
2379    const fn plain(symbol: usize) -> Self {
2380        Self {
2381            symbol,
2382            extra_bits: 0,
2383            extra_value: 0,
2384        }
2385    }
2386
2387    const fn repeat_previous_short(count: usize) -> Self {
2388        Self {
2389            symbol: 16,
2390            extra_bits: 3,
2391            extra_value: (count - 3) as u8,
2392        }
2393    }
2394
2395    const fn repeat_previous_long(count: usize) -> Self {
2396        Self {
2397            symbol: 17,
2398            extra_bits: 7,
2399            extra_value: (count - 11) as u8,
2400        }
2401    }
2402
2403    const fn zero_run_short(count: usize) -> Self {
2404        Self {
2405            symbol: 18,
2406            extra_bits: 3,
2407            extra_value: (count - 3) as u8,
2408        }
2409    }
2410
2411    const fn zero_run_long(count: usize) -> Self {
2412        Self {
2413            symbol: 19,
2414            extra_bits: 7,
2415            extra_value: (count - 11) as u8,
2416        }
2417    }
2418}
2419
2420fn encode_table_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
2421    let mut tokens = Vec::new();
2422    let mut pos = 0usize;
2423    let mut previous = None;
2424    while pos < lengths.len() {
2425        let value = lengths[pos];
2426        let mut run = 1usize;
2427        while pos + run < lengths.len() && lengths[pos + run] == value {
2428            run += 1;
2429        }
2430
2431        if value == 0 {
2432            emit_zero_level_run(&mut tokens, run);
2433            previous = Some(0);
2434            pos += run;
2435            continue;
2436        }
2437
2438        if previous == Some(value) && run >= 3 {
2439            emit_repeat_level_run(&mut tokens, run);
2440            pos += run;
2441            continue;
2442        }
2443
2444        tokens.push(LevelToken::plain(value as usize));
2445        previous = Some(value);
2446        pos += 1;
2447    }
2448    tokens
2449}
2450
2451fn emit_repeat_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
2452    while run != 0 {
2453        if run >= 11 {
2454            let mut chunk = run.min(138);
2455            if matches!(run - chunk, 1 | 2) && chunk >= 14 {
2456                chunk -= 3;
2457            }
2458            tokens.push(LevelToken::repeat_previous_long(chunk));
2459            run -= chunk;
2460        } else if run >= 3 {
2461            let chunk = run.min(10);
2462            tokens.push(LevelToken::repeat_previous_short(chunk));
2463            run -= chunk;
2464        } else {
2465            break;
2466        }
2467    }
2468}
2469
2470fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
2471    while run != 0 {
2472        if run >= 11 {
2473            let mut chunk = run.min(138);
2474            if matches!(run - chunk, 1 | 2) && chunk >= 14 {
2475                chunk -= 3;
2476            }
2477            tokens.push(LevelToken::zero_run_long(chunk));
2478            run -= chunk;
2479        } else if run >= 3 {
2480            let chunk = run.min(10);
2481            tokens.push(LevelToken::zero_run_short(chunk));
2482            run -= chunk;
2483        } else {
2484            tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
2485            break;
2486        }
2487    }
2488}
2489
2490fn level_code_lengths_for_tokens(tokens: &[LevelToken]) -> [u8; LEVEL_TABLE_SIZE] {
2491    let mut used = [false; LEVEL_TABLE_SIZE];
2492    for token in tokens {
2493        used[token.symbol] = true;
2494    }
2495    let used_count = used.iter().filter(|&&used| used).count();
2496    let len = huffman::bits_for_symbol_count(used_count);
2497    let mut lengths = [0u8; LEVEL_TABLE_SIZE];
2498    for (symbol, is_used) in used.into_iter().enumerate() {
2499        if is_used {
2500            lengths[symbol] = len;
2501        }
2502    }
2503    lengths
2504}
2505
2506fn write_level_lengths(writer: &mut BitWriter, lengths: &[u8; LEVEL_TABLE_SIZE]) {
2507    let mut pos = 0usize;
2508    while pos < LEVEL_TABLE_SIZE {
2509        let length = lengths[pos];
2510        if length == 0 {
2511            let mut count = 1usize;
2512            while pos + count < LEVEL_TABLE_SIZE && lengths[pos + count] == 0 {
2513                count += 1;
2514            }
2515            while count >= 3 {
2516                let chunk = count.min(17);
2517                writer.write_bits(15, 4);
2518                writer.write_bits(chunk - 2, 4);
2519                pos += chunk;
2520                count -= chunk;
2521            }
2522            for _ in 0..count {
2523                writer.write_bits(0, 4);
2524                pos += 1;
2525            }
2526        } else {
2527            writer.write_bits(usize::from(length), 4);
2528            if length == 15 {
2529                writer.write_bits(0, 4);
2530            }
2531            pos += 1;
2532        }
2533    }
2534}
2535
2536#[cfg(test)]
2537mod tests {
2538    use super::*;
2539
2540    fn checksum(flags: u8, size_bytes: &[u8]) -> u8 {
2541        size_bytes
2542            .iter()
2543            .fold(0x5a ^ flags, |acc, &byte| acc ^ byte)
2544    }
2545
2546    #[test]
2547    fn parses_one_byte_size_block_header() {
2548        let flags = 0xc7;
2549        let size = [3];
2550        let input = [flags, checksum(flags, &size), size[0], 0xaa, 0xbb, 0xcc];
2551
2552        let block = parse_compressed_block(&input).unwrap();
2553        assert_eq!(block.header_len, 3);
2554        assert_eq!(block.payload, 3..6);
2555        assert_eq!(block.header.flags, flags);
2556        assert!(block.header.is_last);
2557        assert!(block.header.has_tables);
2558        assert_eq!(block.header.final_byte_bits, 8);
2559        assert_eq!(block.header.payload_size, 3);
2560        assert_eq!(block.header.payload_bits, 24);
2561    }
2562
2563    #[test]
2564    fn parses_three_byte_size_block_header_with_partial_final_byte() {
2565        let flags = 0x94;
2566        let size = [0x34, 0x12, 0x00];
2567        let mut input = vec![flags, checksum(flags, &size), size[0], size[1], size[2]];
2568        input.resize(0x1234 + 5, 0);
2569
2570        let block = parse_compressed_block(&input).unwrap();
2571        assert_eq!(block.header_len, 5);
2572        assert_eq!(block.payload, 5..0x1239);
2573        assert!(!block.header.is_last);
2574        assert!(block.header.has_tables);
2575        assert_eq!(block.header.final_byte_bits, 5);
2576        assert_eq!(block.header.payload_size, 0x1234);
2577        assert_eq!(block.header.payload_bits, (0x1234 - 1) * 8 + 5);
2578    }
2579
2580    #[test]
2581    fn rejects_reserved_size_length_selector() {
2582        let input = [0x18, 0x42, 0x00];
2583
2584        assert_eq!(
2585            parse_compressed_block(&input),
2586            Err(Error::InvalidData("RAR 5 block size length is invalid"))
2587        );
2588    }
2589
2590    #[test]
2591    fn rejects_bad_block_header_checksum() {
2592        let input = [0xc7, 0x00, 0x03, 0xaa, 0xbb, 0xcc];
2593
2594        assert_eq!(
2595            parse_compressed_block(&input),
2596            Err(Error::InvalidData("RAR 5 block header checksum mismatch"))
2597        );
2598    }
2599
2600    #[test]
2601    fn rejects_truncated_block_payload() {
2602        let flags = 0xc7;
2603        let size = [3];
2604        let input = [flags, checksum(flags, &size), size[0], 0xaa, 0xbb];
2605
2606        assert_eq!(parse_compressed_block(&input), Err(Error::NeedMoreInput));
2607    }
2608
2609    #[test]
2610    fn reads_level_lengths_with_literal_fifteen() {
2611        let mut nibbles = vec![1, 2, 15, 0, 3, 4];
2612        nibbles.resize(LEVEL_TABLE_SIZE + 1, 0);
2613
2614        let (lengths, bits) = read_level_lengths(&pack_nibbles(&nibbles)).unwrap();
2615
2616        assert_eq!(&lengths[..6], &[1, 2, 15, 3, 4, 0]);
2617        assert_eq!(bits, LEVEL_TABLE_SIZE * 4 + 4);
2618    }
2619
2620    #[test]
2621    fn reads_level_lengths_with_zero_run_at_current_position() {
2622        let mut nibbles = vec![7, 15, 3, 2];
2623        nibbles.resize(LEVEL_TABLE_SIZE - 3, 0);
2624
2625        let (lengths, bits) = read_level_lengths(&pack_nibbles(&nibbles)).unwrap();
2626
2627        assert_eq!(lengths[0], 7);
2628        assert_eq!(&lengths[1..6], &[0, 0, 0, 0, 0]);
2629        assert_eq!(lengths[6], 2);
2630        assert_eq!(bits, (LEVEL_TABLE_SIZE - 3) * 4);
2631    }
2632
2633    fn pack_nibbles(nibbles: &[u8]) -> Vec<u8> {
2634        nibbles
2635            .chunks(2)
2636            .map(|chunk| {
2637                let high = chunk[0] & 0x0f;
2638                let low = chunk.get(1).copied().unwrap_or(0) & 0x0f;
2639                (high << 4) | low
2640            })
2641            .collect()
2642    }
2643
2644    #[test]
2645    fn reads_rar50_second_level_table_lengths() {
2646        let mut writer = BitWriter::new();
2647        for _ in 0..LEVEL_TABLE_SIZE {
2648            writer.write_bits(5, 4);
2649        }
2650        for count in [138, 138, 138, 16] {
2651            writer.write_bits(19, 5);
2652            writer.write_bits(count - 11, 7);
2653        }
2654        let input = writer.finish();
2655
2656        let (lengths, bits) = read_table_lengths(&input, 0).unwrap();
2657
2658        assert_eq!(lengths.main.len(), MAIN_TABLE_SIZE);
2659        assert_eq!(lengths.distance.len(), DISTANCE_TABLE_SIZE_50);
2660        assert_eq!(lengths.align.len(), ALIGN_TABLE_SIZE);
2661        assert_eq!(lengths.length.len(), LENGTH_TABLE_SIZE);
2662        assert!(lengths.main.iter().all(|&length| length == 0));
2663        assert!(lengths.distance.iter().all(|&length| length == 0));
2664        assert!(lengths.align.iter().all(|&length| length == 0));
2665        assert!(lengths.length.iter().all(|&length| length == 0));
2666        assert_eq!(bits, LEVEL_TABLE_SIZE * 4 + 4 * (5 + 7));
2667    }
2668
2669    #[test]
2670    fn reads_rar70_table_length_count() {
2671        assert_eq!(
2672            table_length_count(1).unwrap(),
2673            MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_70 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE
2674        );
2675    }
2676
2677    #[test]
2678    fn encoded_table_lengths_round_trip_with_bit_count() {
2679        let mut lengths = TableLengths {
2680            main: vec![0; MAIN_TABLE_SIZE],
2681            distance: vec![0; DISTANCE_TABLE_SIZE_50],
2682            align: vec![0; ALIGN_TABLE_SIZE],
2683            length: vec![0; LENGTH_TABLE_SIZE],
2684        };
2685        lengths.main[b'A' as usize] = 1;
2686        lengths.main[b'B' as usize] = 3;
2687        lengths.main[262] = 3;
2688        lengths.distance[1] = 1;
2689        lengths.align[0] = 4;
2690        lengths.length[0] = 1;
2691
2692        let (encoded, bit_count) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
2693        let (decoded, decoded_bits) = read_table_lengths(&encoded, 0).unwrap();
2694
2695        assert_eq!(decoded, lengths);
2696        assert_eq!(decoded_bits, bit_count);
2697    }
2698
2699    #[test]
2700    fn table_level_encoder_uses_rar5_run_symbols() {
2701        let mut lengths =
2702            vec![
2703                0u8;
2704                MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_50 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE
2705            ];
2706        lengths[..4].fill(6);
2707        lengths[8..21].fill(0);
2708
2709        let tokens = encode_table_level_tokens(&lengths);
2710
2711        assert!(tokens.contains(&LevelToken::repeat_previous_short(3)));
2712        assert!(tokens.iter().any(|token| token.symbol == 19));
2713    }
2714
2715    #[test]
2716    fn encoded_compressed_block_round_trips_header_fields() {
2717        let payload = [0xaa, 0xbb, 0xc0];
2718        let block = encode_compressed_block(&payload, 18, true, true).unwrap();
2719
2720        let parsed = parse_compressed_block(&block).unwrap();
2721
2722        assert_eq!(parsed.payload, 3..6);
2723        assert!(parsed.header.has_tables);
2724        assert!(parsed.header.is_last);
2725        assert_eq!(parsed.header.final_byte_bits, 2);
2726        assert_eq!(parsed.header.payload_bits, 18);
2727        assert_eq!(&block[parsed.payload], payload);
2728    }
2729
2730    #[test]
2731    fn rejects_table_repeat_without_previous_length() {
2732        let mut writer = BitWriter::new();
2733        for _ in 0..LEVEL_TABLE_SIZE {
2734            writer.write_bits(5, 4);
2735        }
2736        writer.write_bits(16, 5);
2737        writer.write_bits(0, 3);
2738
2739        assert_eq!(
2740            read_table_lengths(&writer.finish(), 0),
2741            Err(Error::InvalidData(
2742                "RAR 5 table repeats missing previous length"
2743            ))
2744        );
2745    }
2746
2747    #[test]
2748    fn rejects_invalid_encoded_block_bit_counts() {
2749        assert_eq!(
2750            encode_compressed_block(&[0], 0, true, true),
2751            Err(Error::InvalidData("RAR 5 block has unused payload bytes"))
2752        );
2753        assert_eq!(
2754            encode_compressed_block(&[], 1, true, true),
2755            Err(Error::InvalidData("RAR 5 block bit count exceeds payload"))
2756        );
2757    }
2758
2759    #[test]
2760    fn builds_named_decode_tables_from_lengths() {
2761        let lengths = TableLengths {
2762            main: vec![1, 1],
2763            distance: vec![1, 1],
2764            align: vec![4; ALIGN_TABLE_SIZE],
2765            length: vec![1, 1],
2766        };
2767
2768        let tables = DecodeTables::from_lengths(&lengths).unwrap();
2769
2770        assert!(!tables.main.is_empty());
2771        assert!(!tables.distance.is_empty());
2772        assert!(!tables.align.is_empty());
2773        assert!(!tables.length.is_empty());
2774        assert!(!tables.align_mode);
2775    }
2776
2777    #[test]
2778    fn rejects_oversubscribed_rar50_huffman_tables() {
2779        assert!(matches!(
2780            HuffmanTable::from_lengths(&[1, 1, 1]),
2781            Err(Error::InvalidData("RAR 5 oversubscribed Huffman table"))
2782        ));
2783    }
2784
2785    #[test]
2786    fn detects_rar50_align_mode_when_align_lengths_are_not_uniform_four() {
2787        let mut align = vec![4; ALIGN_TABLE_SIZE];
2788        align[0] = 0;
2789        align[3] = 3;
2790        let lengths = TableLengths {
2791            main: vec![1, 1],
2792            distance: vec![1, 1],
2793            align,
2794            length: vec![1, 1],
2795        };
2796
2797        let tables = DecodeTables::from_lengths(&lengths).unwrap();
2798
2799        assert!(tables.align_mode);
2800    }
2801
2802    #[test]
2803    fn decodes_synthetic_literal_only_block() {
2804        let payload = literal_only_payload(b"ABBA");
2805        let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
2806
2807        let output = decode_literal_only(&input, 0, 4).unwrap();
2808
2809        assert_eq!(output, b"ABBA");
2810    }
2811
2812    #[test]
2813    fn encodes_literal_only_member_that_decoder_reads() {
2814        let data = b"literal-only RAR5 codec stream\nwith repeated words words words";
2815        let input = encode_literal_only(data, 0).unwrap();
2816
2817        let output = decode_literal_only(&input, 0, data.len()).unwrap();
2818
2819        assert_eq!(output, data);
2820    }
2821
2822    #[test]
2823    fn encodes_literal_only_rar70_table_shape_that_decoder_reads() {
2824        let data = b"small RAR7-compatible literal block";
2825        let input = encode_literal_only(data, 1).unwrap();
2826
2827        let output = decode_literal_only(&input, 1, data.len()).unwrap();
2828
2829        assert_eq!(output, data);
2830    }
2831
2832    #[test]
2833    fn encodes_empty_literal_only_member() {
2834        let input = encode_literal_only(b"", 0).unwrap();
2835
2836        let output = decode_literal_only(&input, 0, 0).unwrap();
2837
2838        assert!(output.is_empty());
2839    }
2840
2841    #[test]
2842    fn encodes_lz_member_with_same_member_matches() {
2843        let data = b"RAR5 match writer phrase. RAR5 match writer phrase. RAR5 match writer phrase.";
2844        let lz = encode_lz_member(data, 0).unwrap();
2845        let literal = encode_literal_only(data, 0).unwrap();
2846
2847        let output = decode_lz(&lz, 0, data.len()).unwrap();
2848
2849        assert_eq!(output, data);
2850        assert!(lz.len() < literal.len());
2851        assert!(
2852            encode_tokens(data, &[], EncodeOptions::default(), DISTANCE_TABLE_SIZE_50)
2853                .iter()
2854                .any(|token| matches!(token, EncodeToken::Match { .. }))
2855        );
2856    }
2857
2858    #[test]
2859    fn frequency_weighted_huffman_lengths_shorten_common_symbols() {
2860        let mut frequencies = vec![1usize; 24];
2861        frequencies[3] = 1024;
2862
2863        let lengths = huffman::lengths_for_frequencies(&frequencies, 15);
2864
2865        assert!(lengths[3] < lengths[0]);
2866        assert!(lengths.iter().all(|&length| length <= 15));
2867    }
2868
2869    #[test]
2870    fn lz_encoder_uses_frequency_weighted_huffman_lengths() {
2871        let mut data = vec![b'a'; 200];
2872        data.extend_from_slice(b"bcdefghijklmnopqrstuvwxyz");
2873        let input = encode_lz_member_with_options(&data, 0, EncodeOptions::new(0)).unwrap();
2874        let block = parse_compressed_block(&input).unwrap();
2875        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
2876
2877        let output = decode_lz(&input, 0, data.len()).unwrap();
2878
2879        assert_eq!(output, data);
2880        assert!(lengths.main[b'a' as usize] < lengths.main[b'z' as usize]);
2881    }
2882
2883    #[test]
2884    fn lazy_lz_parser_defers_short_match_for_longer_next_match() {
2885        let input = b"abcdXbcdYYYYYYYYYYYYabcdYYYYYYYYYYYY";
2886        let greedy = encode_tokens(
2887            input,
2888            &[],
2889            EncodeOptions::new(MAX_MATCH_CANDIDATES),
2890            DISTANCE_TABLE_SIZE_50,
2891        );
2892        let lazy = encode_tokens(
2893            input,
2894            &[],
2895            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
2896            DISTANCE_TABLE_SIZE_50,
2897        );
2898        let packed = encode_lz_member_with_options(
2899            input,
2900            0,
2901            EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
2902        )
2903        .unwrap();
2904
2905        assert!(greedy
2906            .iter()
2907            .any(|token| matches!(token, EncodeToken::Match { length: 4, .. })));
2908        assert!(lazy
2909            .iter()
2910            .any(|token| matches!(token, EncodeToken::Match { length, .. } if *length > 8)));
2911        assert_eq!(decode_lz(&packed, 0, input.len()).unwrap(), input);
2912    }
2913
2914    #[test]
2915    fn cost_aware_match_selection_prefers_repeat_distance_token() {
2916        let pos = 64;
2917        let pattern = b"abcdefgh";
2918        let mut input: Vec<u8> = (0..96u8).map(|byte| byte.wrapping_mul(37)).collect();
2919        input[pos - 30..pos - 22].copy_from_slice(pattern);
2920        input[pos - 10..pos - 2].copy_from_slice(pattern);
2921        input[pos..pos + 8].copy_from_slice(pattern);
2922        input[pos + 8] = b'X';
2923
2924        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
2925        for candidate in 0..pos {
2926            insert_match_position(&input, candidate, &mut buckets);
2927        }
2928        let state = EncoderMatchState {
2929            reps: [30, 0, 0, 0],
2930            last_length: 8,
2931        };
2932
2933        let best = best_match(
2934            &input,
2935            pos,
2936            input.len(),
2937            &buckets,
2938            EncodeOptions::default(),
2939            &state,
2940            DISTANCE_TABLE_SIZE_50,
2941        )
2942        .unwrap();
2943
2944        assert_eq!((best.length, best.distance), (8, 30));
2945    }
2946
2947    #[test]
2948    fn lazy_parser_uses_match_cost_not_only_match_length() {
2949        let pos = 600;
2950        let mut input: Vec<u8> = (0..700u16)
2951            .map(|value| value.wrapping_mul(73) as u8)
2952            .collect();
2953        input[pos - 512..pos - 504].copy_from_slice(b"ABCDEFGH");
2954        input[pos - 504] = b'Z';
2955        input[pos - 29..pos - 21].copy_from_slice(b"BCDEFGHI");
2956        input[pos - 30] = b'x';
2957        input[pos..pos + 9].copy_from_slice(b"ABCDEFGHI");
2958
2959        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
2960        for candidate in 0..pos {
2961            insert_match_position(&input, candidate, &mut buckets);
2962        }
2963        let state = EncoderMatchState {
2964            reps: [30, 0, 0, 0],
2965            last_length: 8,
2966        };
2967        let current = best_match(
2968            &input,
2969            pos,
2970            input.len(),
2971            &buckets,
2972            EncodeOptions::default(),
2973            &state,
2974            DISTANCE_TABLE_SIZE_50,
2975        )
2976        .unwrap();
2977
2978        assert_eq!((current.length, current.distance), (8, 512));
2979        assert!(should_lazy_emit_literal(
2980            &input,
2981            pos,
2982            &buckets,
2983            EncodeOptions::default().with_lazy_matching(true),
2984            &state,
2985            DISTANCE_TABLE_SIZE_50,
2986            current,
2987        ));
2988    }
2989
2990    #[test]
2991    fn lazy_parser_uses_bounded_cost_lookahead() {
2992        let pos = 160;
2993        let mut input: Vec<u8> = (0..240u16)
2994            .map(|value| value.wrapping_mul(91) as u8)
2995            .collect();
2996        input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
2997        input[pos - 80..pos - 70].copy_from_slice(b"CDEFGHIJKL");
2998        input[pos..pos + 12].copy_from_slice(b"ABCDEFGHIJKL");
2999
3000        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3001        for candidate in 0..pos {
3002            insert_match_position(&input, candidate, &mut buckets);
3003        }
3004        let state = EncoderMatchState::default();
3005        let current = best_match(
3006            &input,
3007            pos,
3008            input.len(),
3009            &buckets,
3010            EncodeOptions::default(),
3011            &state,
3012            DISTANCE_TABLE_SIZE_50,
3013        )
3014        .unwrap();
3015
3016        assert_eq!((current.length, current.distance), (8, 30));
3017        assert!(!should_lazy_emit_literal(
3018            &input,
3019            pos,
3020            &buckets,
3021            EncodeOptions::default()
3022                .with_lazy_matching(true)
3023                .with_lazy_lookahead(1),
3024            &state,
3025            DISTANCE_TABLE_SIZE_50,
3026            current,
3027        ));
3028        assert!(should_lazy_emit_literal(
3029            &input,
3030            pos,
3031            &buckets,
3032            EncodeOptions::default()
3033                .with_lazy_matching(true)
3034                .with_lazy_lookahead(2),
3035            &state,
3036            DISTANCE_TABLE_SIZE_50,
3037            current,
3038        ));
3039    }
3040
3041    #[test]
3042    fn lazy_parser_charges_for_skipped_literals() {
3043        let pos = 160;
3044        let mut input: Vec<u8> = (0..240u16)
3045            .map(|value| value.wrapping_mul(91) as u8)
3046            .collect();
3047        input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3048        input[pos - 80..pos - 71].copy_from_slice(b"CDEFGHIJK");
3049        input[pos..pos + 12].copy_from_slice(b"ABCDEFGHIJKL");
3050
3051        let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3052        for candidate in 0..pos {
3053            insert_match_position(&input, candidate, &mut buckets);
3054        }
3055        let state = EncoderMatchState::default();
3056        let current = best_match(
3057            &input,
3058            pos,
3059            input.len(),
3060            &buckets,
3061            EncodeOptions::default(),
3062            &state,
3063            DISTANCE_TABLE_SIZE_50,
3064        )
3065        .unwrap();
3066
3067        let next = best_match(
3068            &input,
3069            pos + 2,
3070            input.len(),
3071            &buckets,
3072            EncodeOptions::default(),
3073            &state,
3074            DISTANCE_TABLE_SIZE_50,
3075        )
3076        .unwrap();
3077
3078        assert!(next.score > current.score);
3079        assert!(next.score <= current.score + 16);
3080        assert!(!should_lazy_emit_literal(
3081            &input,
3082            pos,
3083            &buckets,
3084            EncodeOptions::default()
3085                .with_lazy_matching(true)
3086                .with_lazy_lookahead(2),
3087            &state,
3088            DISTANCE_TABLE_SIZE_50,
3089            current,
3090        ));
3091    }
3092
3093    fn encode_lz_member_with_filter(data: &[u8], kind: Rar50FilterKind) -> Result<Vec<u8>> {
3094        Unpack50Encoder::new().encode_member_with_filter(data, 0, Rar50FilterSpec::new(kind))
3095    }
3096
3097    #[test]
3098    fn encodes_lz_member_with_delta_filter_record() {
3099        let data: Vec<u8> = (0..96).map(|index| (index * 7 + index / 3) as u8).collect();
3100        let input =
3101            encode_lz_member_with_filter(&data, Rar50FilterKind::Delta { channels: 3 }).unwrap();
3102        let block = parse_compressed_block(&input).unwrap();
3103        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3104
3105        let output = decode_lz(&input, 0, data.len()).unwrap();
3106
3107        assert_eq!(output, data);
3108        assert_ne!(lengths.main[256], 0);
3109    }
3110
3111    #[test]
3112    fn rejects_invalid_delta_filter_channel_count() {
3113        assert_eq!(
3114            encode_lz_member_with_filter(b"abc", Rar50FilterKind::Delta { channels: 0 }),
3115            Err(Error::InvalidData(
3116                "RAR 5 DELTA filter channel count is invalid"
3117            ))
3118        );
3119        assert_eq!(
3120            encode_lz_member_with_filter(b"abc", Rar50FilterKind::Delta { channels: 33 }),
3121            Err(Error::InvalidData(
3122                "RAR 5 DELTA filter channel count is invalid"
3123            ))
3124        );
3125    }
3126
3127    #[test]
3128    fn encodes_lz_member_with_e8_filter_record() {
3129        let mut data = b"\xe8\0\0\0\0plain text after call".to_vec();
3130        data.extend_from_slice(&[0xe8, 3, 0, 0, 0, b'X']);
3131        let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8).unwrap();
3132        let block = parse_compressed_block(&input).unwrap();
3133        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3134
3135        let output = decode_lz(&input, 0, data.len()).unwrap();
3136
3137        assert_eq!(output, data);
3138        assert_ne!(lengths.main[256], 0);
3139    }
3140
3141    #[test]
3142    fn streaming_decode_reports_filtered_member_with_typed_sentinel() {
3143        let data = b"\xe8\0\0\0\0plain text after call".to_vec();
3144        let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8).unwrap();
3145        let mut reader = input.as_slice();
3146        let mut decoder = Unpack50Decoder::new();
3147
3148        let error = decoder
3149            .decode_member_from_reader_with_dictionary_to_sink(
3150                &mut reader,
3151                0,
3152                data.len(),
3153                128 * 1024,
3154                false,
3155                |_chunk| Ok::<_, std::convert::Infallible>(()),
3156            )
3157            .unwrap_err();
3158
3159        assert!(matches!(error, StreamDecodeError::FilteredMember));
3160    }
3161
3162    #[test]
3163    fn encodes_lz_member_with_e8e9_filter_record() {
3164        let data = b"\xe9\0\0\0\0jump target through e9".to_vec();
3165        let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8E9).unwrap();
3166        let block = parse_compressed_block(&input).unwrap();
3167        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3168
3169        let output = decode_lz(&input, 0, data.len()).unwrap();
3170
3171        assert_eq!(output, data);
3172        assert_ne!(lengths.main[256], 0);
3173    }
3174
3175    #[test]
3176    fn encodes_lz_member_with_ranged_e8e9_filter_record() {
3177        let mut data = b"\xe8\0\0\0\0plain prefix outside filter range".to_vec();
3178        let range_start = data.len();
3179        for _ in 0..16 {
3180            let operand_pos = data.len() + 1;
3181            data.push(0xe8);
3182            let relative = 0x7000u32.wrapping_sub(operand_pos as u32);
3183            data.extend_from_slice(&relative.to_le_bytes());
3184            data.extend_from_slice(b" code ");
3185        }
3186        let range = range_start..data.len();
3187        data.extend_from_slice(b"\xe9\0\0\0\0plain suffix outside filter range");
3188
3189        let input = Unpack50Encoder::new()
3190            .encode_member_with_filter(
3191                &data,
3192                0,
3193                Rar50FilterSpec::range(Rar50FilterKind::E8E9, range),
3194            )
3195            .unwrap();
3196        let block = parse_compressed_block(&input).unwrap();
3197        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3198
3199        let output = decode_lz(&input, 0, data.len()).unwrap();
3200
3201        assert_eq!(output, data);
3202        assert_ne!(lengths.main[256], 0);
3203    }
3204
3205    #[test]
3206    fn encodes_lz_member_with_multiple_filter_records() {
3207        let mut data = b"\xe8\0\0\0\0plain prefix outside filters".to_vec();
3208        let first_start = data.len();
3209        data.extend_from_slice(b"\xe8\0\0\0\0first filtered cluster");
3210        let first_end = data.len();
3211        data.extend_from_slice(b"large plain middle outside filters");
3212        let second_start = data.len();
3213        data.extend_from_slice(b"\xe8\0\0\0\0second filtered cluster");
3214        let second_end = data.len();
3215
3216        let input = Unpack50Encoder::new()
3217            .encode_member_with_filters(
3218                &data,
3219                0,
3220                &[
3221                    Rar50FilterSpec::range(Rar50FilterKind::E8, first_start..first_end),
3222                    Rar50FilterSpec::range(Rar50FilterKind::E8, second_start..second_end),
3223                ],
3224            )
3225            .unwrap();
3226        let block = parse_compressed_block(&input).unwrap();
3227        let (lengths, table_bits) = read_table_lengths(&input[block.payload.clone()], 0).unwrap();
3228        let tables = DecodeTables::from_lengths(&lengths).unwrap();
3229        let mut bits = BitReader {
3230            input: &input[block.payload],
3231            bit_pos: table_bits,
3232        };
3233        assert_eq!(tables.main.decode(&mut bits).unwrap(), 256);
3234        let first = read_filter(&mut bits, 0).unwrap();
3235        assert_eq!(tables.main.decode(&mut bits).unwrap(), 256);
3236        let second = read_filter(&mut bits, 0).unwrap();
3237
3238        let output = decode_lz(&input, 0, data.len()).unwrap();
3239
3240        assert_eq!(output, data);
3241        assert_eq!(first.start, first_start);
3242        assert_eq!(second.start, second_start);
3243    }
3244
3245    #[test]
3246    fn encodes_lz_member_with_arm_filter_record() {
3247        let data = [0x04, 0x00, 0x00, 0xeb, b'A', b'R', b'M', b'!'];
3248        let input = encode_lz_member_with_filter(&data, Rar50FilterKind::Arm).unwrap();
3249        let block = parse_compressed_block(&input).unwrap();
3250        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3251
3252        let output = decode_lz(&input, 0, data.len()).unwrap();
3253
3254        assert_eq!(output, data);
3255        assert_ne!(lengths.main[256], 0);
3256    }
3257
3258    #[test]
3259    fn arm_filter_uses_wrapping_address_arithmetic_at_u32_boundary() {
3260        let original = [0x04, 0x00, 0x00, 0xeb, 0x08, 0x00, 0x00, 0xeb];
3261        let mut filtered = original;
3262
3263        arm_encode(&mut filtered, u32::MAX - 3);
3264        assert_ne!(filtered, original);
3265        arm_decode(&mut filtered, u32::MAX - 3);
3266
3267        assert_eq!(filtered, original);
3268    }
3269
3270    #[test]
3271    fn solid_encoder_emits_rar50_matches_against_previous_member_history() {
3272        let first = b"RAR5 solid shared phrase alpha beta gamma\n".repeat(16);
3273        let second = b"RAR5 solid shared phrase alpha beta gamma\nsecond\n".repeat(4);
3274        let solid = encode_lz_member_with_history(&second, &first, 0).unwrap();
3275        let standalone = encode_lz_member(&second, 0).unwrap();
3276        let mut decoder = Unpack50Decoder::new();
3277
3278        assert_eq!(
3279            decoder
3280                .decode_member(
3281                    &encode_lz_member(&first, 0).unwrap(),
3282                    0,
3283                    first.len(),
3284                    false,
3285                    DecodeMode::Lz
3286                )
3287                .unwrap(),
3288            first
3289        );
3290        assert_eq!(
3291            decoder
3292                .decode_member(&solid, 0, second.len(), true, DecodeMode::Lz)
3293                .unwrap(),
3294            second
3295        );
3296        assert!(solid.len() < standalone.len());
3297    }
3298
3299    #[test]
3300    fn large_lz_members_are_split_into_multiple_compressed_blocks() {
3301        let data = vec![0u8; MAX_COMPRESSED_BLOCK_OUTPUT + 1];
3302        let encoded = encode_lz_member_with_options(&data, 0, EncodeOptions::new(16)).unwrap();
3303        let mut cursor = std::io::Cursor::new(encoded.as_slice());
3304        let first = read_compressed_block(&mut cursor).unwrap();
3305        let second = read_compressed_block(&mut cursor).unwrap();
3306        let mut decoder = Unpack50Decoder::new();
3307
3308        assert!(!first.header.is_last);
3309        assert!(second.header.is_last);
3310        assert_eq!(
3311            decoder
3312                .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3313                .unwrap(),
3314            data
3315        );
3316    }
3317
3318    #[test]
3319    fn large_filtered_lz_members_split_filter_records_by_block() {
3320        let mut data: Vec<_> = (0..MAX_COMPRESSED_BLOCK_OUTPUT + 512)
3321            .map(|index| index as u8)
3322            .collect();
3323        data[256] = 0xe8;
3324        data[257..261].copy_from_slice(&0x20u32.to_le_bytes());
3325        data[MAX_COMPRESSED_BLOCK_OUTPUT + 64] = 0xe8;
3326        data[MAX_COMPRESSED_BLOCK_OUTPUT + 65..MAX_COMPRESSED_BLOCK_OUTPUT + 69]
3327            .copy_from_slice(&0x40u32.to_le_bytes());
3328
3329        let encoded = Unpack50Encoder::with_options(EncodeOptions::new(0))
3330            .encode_member_with_filter(
3331                &data,
3332                0,
3333                Rar50FilterSpec::range(Rar50FilterKind::E8, 0..data.len()),
3334            )
3335            .unwrap();
3336        let mut cursor = std::io::Cursor::new(encoded.as_slice());
3337        let first = read_compressed_block(&mut cursor).unwrap();
3338        let mut blocks = 1usize;
3339        let mut last_is_last = first.header.is_last;
3340        while cursor.position() < encoded.len() as u64 {
3341            last_is_last = read_compressed_block(&mut cursor).unwrap().header.is_last;
3342            blocks += 1;
3343        }
3344        let mut decoder = Unpack50Decoder::new();
3345
3346        assert!(!first.header.is_last);
3347        assert!(last_is_last);
3348        assert!(blocks > 2);
3349        assert_eq!(
3350            decoder
3351                .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3352                .unwrap(),
3353            data
3354        );
3355    }
3356
3357    #[test]
3358    fn filters_are_split_before_rar_reader_filter_limit() {
3359        let data = vec![0u8; MAX_FILTER_BLOCK_LENGTH + 1];
3360        let encoded = Unpack50Encoder::with_options(
3361            EncodeOptions::new(0).with_max_match_distance(128 * 1024),
3362        )
3363        .encode_member_with_filter(
3364            &data,
3365            0,
3366            Rar50FilterSpec::new(Rar50FilterKind::Delta { channels: 4 }),
3367        )
3368        .unwrap();
3369        let mut cursor = std::io::Cursor::new(encoded.as_slice());
3370        let first = read_compressed_block(&mut cursor).unwrap();
3371        let second = read_compressed_block(&mut cursor).unwrap();
3372        let mut decoder = Unpack50Decoder::new();
3373
3374        assert!(!first.header.is_last);
3375        assert!(second.header.is_last);
3376        assert_eq!(
3377            decoder
3378                .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3379                .unwrap(),
3380            data
3381        );
3382    }
3383
3384    #[test]
3385    fn solid_encoder_history_limit_follows_encode_options_dictionary() {
3386        let mut encoder = Unpack50Encoder::with_options(
3387            EncodeOptions::new(0).with_max_match_distance(DEFAULT_DICTIONARY_SIZE + 1024),
3388        );
3389        encoder.remember(&vec![0x41; DEFAULT_DICTIONARY_SIZE + 512]);
3390
3391        assert_eq!(encoder.history.len(), DEFAULT_DICTIONARY_SIZE + 512);
3392
3393        let mut capped =
3394            Unpack50Encoder::with_options(EncodeOptions::new(0).with_max_match_distance(1024));
3395        capped.remember(&vec![0x42; 4096]);
3396
3397        assert_eq!(capped.history.len(), 1024);
3398    }
3399
3400    #[test]
3401    fn encodes_lz_member_with_last_length_repeat_symbols() {
3402        let data = b"abcdXabcdYabcdZabcd";
3403        let input = encode_lz_member(data, 0).unwrap();
3404        let block = parse_compressed_block(&input).unwrap();
3405        let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3406
3407        let output = decode_lz(&input, 0, data.len()).unwrap();
3408
3409        assert_eq!(output, data);
3410        assert_ne!(lengths.main[257], 0);
3411    }
3412
3413    #[test]
3414    fn encodes_lz_member_using_rar70_distance_table_shape() {
3415        let data = b"RAR7-compatible repeated phrase repeated phrase repeated phrase";
3416        let input = encode_lz_member(data, 1).unwrap();
3417
3418        let output = decode_lz(&input, 1, data.len()).unwrap();
3419
3420        assert_eq!(output, data);
3421    }
3422
3423    #[test]
3424    fn decode_member_from_reader_accepts_incremental_input() {
3425        struct OneByteReader<'a> {
3426            data: &'a [u8],
3427            pos: usize,
3428        }
3429
3430        impl Read for OneByteReader<'_> {
3431            fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
3432                if self.pos >= self.data.len() {
3433                    return Ok(0);
3434                }
3435                out[0] = self.data[self.pos];
3436                self.pos += 1;
3437                Ok(1)
3438            }
3439        }
3440
3441        let payload = literal_only_payload(b"ABBA");
3442        let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3443        let mut reader = OneByteReader {
3444            data: &input,
3445            pos: 0,
3446        };
3447        let mut decoder = Unpack50Decoder::new();
3448
3449        let output = decoder
3450            .decode_member_from_reader(&mut reader, 0, 4, false, DecodeMode::LiteralOnly)
3451            .unwrap();
3452
3453        assert_eq!(output, b"ABBA");
3454    }
3455
3456    #[test]
3457    fn decodes_synthetic_new_match_block() {
3458        let payload = new_match_payload();
3459        let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3460
3461        let output = decode_lz(&input, 0, 4).unwrap();
3462
3463        assert_eq!(output, b"ABAB");
3464    }
3465
3466    #[test]
3467    fn decodes_synthetic_last_length_match_block() {
3468        let payload = repeat_payload(257);
3469        let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3470
3471        let output = decode_lz(&input, 0, 6).unwrap();
3472
3473        assert_eq!(output, b"ABABAB");
3474    }
3475
3476    #[test]
3477    fn decodes_synthetic_repeat_distance_match_block() {
3478        let payload = repeat_payload(258);
3479        let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3480
3481        let output = decode_lz(&input, 0, 6).unwrap();
3482
3483        assert_eq!(output, b"ABABAB");
3484    }
3485
3486    #[test]
3487    fn rejects_literal_only_block_without_tables() {
3488        let input = encode_compressed_block(&[0], 8, false, true).unwrap();
3489
3490        assert_eq!(
3491            decode_literal_only(&input, 0, 1),
3492            Err(Error::InvalidData("RAR 5 block reuses missing tables"))
3493        );
3494    }
3495
3496    #[test]
3497    fn decodes_length_slots() {
3498        assert_eq!(slot_to_length(0, 0).unwrap(), 2);
3499        assert_eq!(slot_to_length(7, 0).unwrap(), 9);
3500        assert_eq!(slot_to_length(8, 0).unwrap(), 10);
3501        assert_eq!(slot_to_length(8, 1).unwrap(), 11);
3502        assert_eq!(slot_to_length(11, 1).unwrap(), 17);
3503        assert_eq!(slot_to_length(12, 3).unwrap(), 21);
3504    }
3505
3506    #[test]
3507    fn decodes_distance_slots() {
3508        assert_eq!(slot_to_distance(0, 0).unwrap(), 1);
3509        assert_eq!(slot_to_distance(3, 0).unwrap(), 4);
3510        assert_eq!(distance_slot_bit_count(4).unwrap(), 1);
3511        assert_eq!(slot_to_distance(4, 0).unwrap(), 5);
3512        assert_eq!(slot_to_distance(4, 1).unwrap(), 6);
3513        assert_eq!(distance_slot_bit_count(10).unwrap(), 4);
3514        assert_eq!(slot_to_distance(10, 15).unwrap(), 48);
3515    }
3516
3517    #[test]
3518    fn bit_reader_accepts_large_rar5_distance_extras() {
3519        let mut bits = BitReader::new(&[0xff, 0x00, 0xaa, 0x55]);
3520
3521        assert_eq!(bits.read_bits(32).unwrap(), 0xff00_aa55);
3522        assert_eq!(
3523            bits.read_bits(1),
3524            Err(Error::NeedMoreInput),
3525            "32-bit reads must not leave a partial cursor state"
3526        );
3527    }
3528
3529    #[test]
3530    fn copies_lz_matches_with_overlap() {
3531        let decoder = Unpack50Decoder::new();
3532        let mut output = b"AB".to_vec();
3533
3534        decoder
3535            .copy_match(&mut output, 2, 6, 8, DEFAULT_DICTIONARY_SIZE)
3536            .unwrap();
3537
3538        assert_eq!(output, b"ABABABAB");
3539    }
3540
3541    #[test]
3542    fn rejects_invalid_match_copy() {
3543        let decoder = Unpack50Decoder::new();
3544        let mut output = b"AB".to_vec();
3545
3546        assert_eq!(
3547            decoder.copy_match(&mut output, 3, 1, 3, DEFAULT_DICTIONARY_SIZE),
3548            Err(Error::InvalidData("RAR 5 match distance exceeds window"))
3549        );
3550        assert_eq!(
3551            decoder.copy_match(&mut output, 1, 2, 3, DEFAULT_DICTIONARY_SIZE),
3552            Err(Error::InvalidData("RAR 5 match exceeds output limit"))
3553        );
3554    }
3555
3556    #[test]
3557    fn rejects_match_distance_beyond_dictionary() {
3558        let decoder = Unpack50Decoder::new();
3559        let mut output = b"ABCD".to_vec();
3560
3561        assert_eq!(
3562            decoder.copy_match(&mut output, 4, 1, 5, 3),
3563            Err(Error::InvalidData(
3564                "RAR 5 match distance exceeds dictionary"
3565            ))
3566        );
3567    }
3568
3569    #[test]
3570    fn solid_history_is_capped_to_dictionary_size() {
3571        let mut decoder = Unpack50Decoder::new();
3572        let first_payload = literal_only_payload(b"ABBA");
3573        let first =
3574            encode_compressed_block(&first_payload, first_payload.len() * 8, true, true).unwrap();
3575        let second_payload = literal_only_payload(b"BAAB");
3576        let second =
3577            encode_compressed_block(&second_payload, second_payload.len() * 8, true, true).unwrap();
3578
3579        assert_eq!(
3580            decoder
3581                .decode_member_with_dictionary(&first, 0, 4, 6, false, DecodeMode::LiteralOnly)
3582                .unwrap(),
3583            b"ABBA"
3584        );
3585        assert_eq!(decoder.history, b"ABBA");
3586
3587        assert_eq!(
3588            decoder
3589                .decode_member_with_dictionary(&second, 0, 4, 6, true, DecodeMode::LiteralOnly)
3590                .unwrap(),
3591            b"BAAB"
3592        );
3593        assert_eq!(decoder.history, b"BABAAB");
3594    }
3595
3596    #[test]
3597    fn streaming_decoder_history_is_capped_without_reordering() {
3598        let mut decoder = Unpack50Decoder::new();
3599        let first_payload = literal_only_payload(b"ABBA");
3600        let first =
3601            encode_compressed_block(&first_payload, first_payload.len() * 8, true, true).unwrap();
3602        let second_payload = literal_only_payload(b"BAAB");
3603        let second =
3604            encode_compressed_block(&second_payload, second_payload.len() * 8, true, true).unwrap();
3605        let mut decoded = Vec::new();
3606
3607        decoder
3608            .decode_member_from_reader_with_dictionary_to_sink(
3609                &mut std::io::Cursor::new(&first),
3610                0,
3611                4,
3612                6,
3613                false,
3614                |chunk| {
3615                    match chunk {
3616                        DecodedChunk::Bytes(bytes) => decoded.extend_from_slice(bytes),
3617                        DecodedChunk::Repeated { byte, len } => {
3618                            decoded.extend(std::iter::repeat_n(byte, len));
3619                        }
3620                    }
3621                    Ok::<(), std::io::Error>(())
3622                },
3623            )
3624            .unwrap();
3625        assert_eq!(decoded, b"ABBA");
3626        assert_eq!(decoder.history, b"ABBA");
3627
3628        decoded.clear();
3629        decoder
3630            .decode_member_from_reader_with_dictionary_to_sink(
3631                &mut std::io::Cursor::new(&second),
3632                0,
3633                4,
3634                6,
3635                true,
3636                |chunk| {
3637                    match chunk {
3638                        DecodedChunk::Bytes(bytes) => decoded.extend_from_slice(bytes),
3639                        DecodedChunk::Repeated { byte, len } => {
3640                            decoded.extend(std::iter::repeat_n(byte, len));
3641                        }
3642                    }
3643                    Ok::<(), std::io::Error>(())
3644                },
3645            )
3646            .unwrap();
3647        assert_eq!(decoded, b"BAAB");
3648        assert_eq!(decoder.history, b"BABAAB");
3649    }
3650
3651    fn literal_only_payload(data: &[u8]) -> Vec<u8> {
3652        let mut lengths = TableLengths {
3653            main: vec![0; MAIN_TABLE_SIZE],
3654            distance: vec![0; DISTANCE_TABLE_SIZE_50],
3655            align: vec![0; ALIGN_TABLE_SIZE],
3656            length: vec![0; LENGTH_TABLE_SIZE],
3657        };
3658        lengths.main[b'A' as usize] = 1;
3659        lengths.main[b'B' as usize] = 1;
3660        let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3661        let mut writer = BitWriter { bytes, bit_pos };
3662        for &byte in data {
3663            match byte {
3664                b'A' => writer.write_bits(0, 1),
3665                b'B' => writer.write_bits(1, 1),
3666                _ => panic!("test helper only encodes A/B"),
3667            }
3668        }
3669        writer.finish()
3670    }
3671
3672    fn new_match_payload() -> Vec<u8> {
3673        let mut lengths = TableLengths {
3674            main: vec![0; MAIN_TABLE_SIZE],
3675            distance: vec![0; DISTANCE_TABLE_SIZE_50],
3676            align: vec![0; ALIGN_TABLE_SIZE],
3677            length: vec![0; LENGTH_TABLE_SIZE],
3678        };
3679        lengths.main[b'A' as usize] = 2;
3680        lengths.main[b'B' as usize] = 2;
3681        lengths.main[262] = 2;
3682        lengths.distance[1] = 1;
3683        let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3684        let mut writer = BitWriter { bytes, bit_pos };
3685
3686        writer.write_bits(0b00, 2); // 'A'
3687        writer.write_bits(0b01, 2); // 'B'
3688        writer.write_bits(0b10, 2); // match length 2
3689        writer.write_bits(0, 1); // distance slot 1
3690        writer.finish()
3691    }
3692
3693    fn repeat_payload(repeat_symbol: usize) -> Vec<u8> {
3694        let mut lengths = TableLengths {
3695            main: vec![0; MAIN_TABLE_SIZE],
3696            distance: vec![0; DISTANCE_TABLE_SIZE_50],
3697            align: vec![0; ALIGN_TABLE_SIZE],
3698            length: vec![0; LENGTH_TABLE_SIZE],
3699        };
3700        lengths.main[b'A' as usize] = 2;
3701        lengths.main[b'B' as usize] = 2;
3702        lengths.main[repeat_symbol] = 2;
3703        lengths.main[262] = 2;
3704        lengths.distance[1] = 1;
3705        lengths.length[0] = 1;
3706        let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3707        let mut writer = BitWriter { bytes, bit_pos };
3708
3709        writer.write_bits(0b00, 2); // 'A'
3710        writer.write_bits(0b01, 2); // 'B'
3711        writer.write_bits(0b11, 2); // match length 2
3712        writer.write_bits(0, 1); // distance slot 1
3713        writer.write_bits(0b10, 2); // repeat control symbol
3714        if repeat_symbol == 258 {
3715            writer.write_bits(0, 1); // length slot 0
3716        }
3717        writer.finish()
3718    }
3719}