1use crate::filters::{self, DeltaErrorMessages, FilterOp};
2use crate::{huffman, Error, Result};
3use std::collections::VecDeque;
4use std::io::Read;
5use std::ops::Range;
6
7pub const LEVEL_TABLE_SIZE: usize = 20;
8pub const MAIN_TABLE_SIZE: usize = 306;
9pub const DISTANCE_TABLE_SIZE_50: usize = 64;
10pub const DISTANCE_TABLE_SIZE_70: usize = 80;
11pub const ALIGN_TABLE_SIZE: usize = 16;
12pub const LENGTH_TABLE_SIZE: usize = 44;
13const DEFAULT_DICTIONARY_SIZE: usize = 4 * 1024 * 1024;
14const MAX_INITIAL_OUTPUT_CAPACITY: usize = 1024 * 1024;
15const STREAM_FLUSH_THRESHOLD: usize = 64 * 1024;
16const STREAM_HISTORY_LIMIT: usize = 64 * 1024 * 1024;
17const MAX_ENCODER_MATCH_OFFSET: usize = DEFAULT_DICTIONARY_SIZE;
18const MAX_ENCODER_MATCH_LENGTH: usize = 4096;
19const MAX_COMPRESSED_BLOCK_OUTPUT: usize = 4 * 1024 * 1024;
20const MAX_FILTER_BLOCK_LENGTH: usize = 0x3ffff;
21const MATCH_HASH_BUCKETS: usize = 4096;
22const MAX_MATCH_CANDIDATES: usize = 256;
23
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct CompressedBlock {
26 pub header: CompressedBlockHeader,
27 pub header_len: usize,
28 pub payload: Range<usize>,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub struct CompressedBlockHeader {
33 pub flags: u8,
34 pub is_last: bool,
35 pub has_tables: bool,
36 pub final_byte_bits: u8,
37 pub payload_size: usize,
38 pub payload_bits: usize,
39}
40
41struct OwnedCompressedBlock {
42 header: CompressedBlockHeader,
43 payload: Vec<u8>,
44}
45
46#[derive(Debug)]
47#[doc(hidden)]
48pub enum StreamDecodeError<E> {
49 Decode(Error),
50 FilteredMember,
51 Sink(E),
52}
53
54impl<E> From<Error> for StreamDecodeError<E> {
55 fn from(error: Error) -> Self {
56 Self::Decode(error)
57 }
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61#[doc(hidden)]
62pub enum DecodedChunk<'a> {
63 Bytes(&'a [u8]),
64 Repeated { byte: u8, len: usize },
65}
66
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct TableLengths {
69 pub main: Vec<u8>,
70 pub distance: Vec<u8>,
71 pub align: Vec<u8>,
72 pub length: Vec<u8>,
73}
74
75#[derive(Debug, Clone)]
76pub struct DecodeTables {
77 pub main: HuffmanTable,
78 pub distance: HuffmanTable,
79 pub align: HuffmanTable,
80 pub length: HuffmanTable,
81 pub align_mode: bool,
82}
83
84impl DecodeTables {
85 pub fn from_lengths(lengths: &TableLengths) -> Result<Self> {
86 let align_mode = lengths
87 .align
88 .iter()
89 .any(|&length| length != 0 && length != 4);
90 Ok(Self {
91 main: HuffmanTable::from_lengths(&lengths.main)?,
92 distance: HuffmanTable::from_lengths(&lengths.distance)?,
93 align: HuffmanTable::from_lengths(&lengths.align)?,
94 length: HuffmanTable::from_lengths(&lengths.length)?,
95 align_mode,
96 })
97 }
98}
99
100#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub enum DecodeMode {
102 LiteralOnly,
103 Lz,
104 LzNoFilters,
105}
106
107impl DecodeMode {
108 fn uses_lz(self) -> bool {
109 matches!(self, Self::Lz | Self::LzNoFilters)
110 }
111
112 fn applies_filters(self) -> bool {
113 matches!(self, Self::Lz)
114 }
115}
116
117pub fn parse_compressed_block(input: &[u8]) -> Result<CompressedBlock> {
118 if input.len() < 3 {
119 return Err(Error::NeedMoreInput);
120 }
121
122 let flags = input[0];
123 let checksum = input[1];
124 let size_bytes = match (flags >> 3) & 0x03 {
125 0 => 1,
126 1 => 2,
127 2 => 3,
128 _ => return Err(Error::InvalidData("RAR 5 block size length is invalid")),
129 };
130 let header_len = 2 + size_bytes;
131 if input.len() < header_len {
132 return Err(Error::NeedMoreInput);
133 }
134
135 let size_data = &input[2..header_len];
136 let actual = size_data
137 .iter()
138 .fold(checksum ^ flags, |acc, &byte| acc ^ byte);
139 if actual != 0x5a {
140 return Err(Error::InvalidData("RAR 5 block header checksum mismatch"));
141 }
142
143 let payload_size = size_data
144 .iter()
145 .enumerate()
146 .fold(0usize, |acc, (index, &byte)| {
147 acc | (usize::from(byte) << (index * 8))
148 });
149 let payload_end = header_len
150 .checked_add(payload_size)
151 .ok_or(Error::InvalidData("RAR 5 block size overflows"))?;
152 if input.len() < payload_end {
153 return Err(Error::NeedMoreInput);
154 }
155
156 let final_byte_bits = ((flags & 0x07) + 1).min(8);
157 let payload_bits = if payload_size == 0 {
158 0
159 } else {
160 (payload_size - 1) * 8 + usize::from(final_byte_bits)
161 };
162
163 Ok(CompressedBlock {
164 header: CompressedBlockHeader {
165 flags,
166 is_last: flags & 0x40 != 0,
167 has_tables: flags & 0x80 != 0,
168 final_byte_bits,
169 payload_size,
170 payload_bits,
171 },
172 header_len,
173 payload: header_len..payload_end,
174 })
175}
176
177pub fn read_level_lengths(input: &[u8]) -> Result<([u8; LEVEL_TABLE_SIZE], usize)> {
178 let mut bits = BitReader::new(input);
179 let mut lengths = [0; LEVEL_TABLE_SIZE];
180 let mut pos = 0;
181 while pos < LEVEL_TABLE_SIZE {
182 let length = bits.read_bits(4)? as u8;
183 if length == 15 {
184 let zero_count = bits.read_bits(4)? as usize;
185 if zero_count == 0 {
186 lengths[pos] = 15;
187 pos += 1;
188 } else {
189 let count = zero_count + 2;
190 for _ in 0..count {
191 if pos >= LEVEL_TABLE_SIZE {
192 break;
193 }
194 lengths[pos] = 0;
195 pos += 1;
196 }
197 }
198 } else {
199 lengths[pos] = length;
200 pos += 1;
201 }
202 }
203 Ok((lengths, bits.bit_pos))
204}
205
206pub fn table_length_count(algorithm_version: u8) -> Result<usize> {
207 match algorithm_version {
208 0 => Ok(MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_50 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE),
209 1 => Ok(MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_70 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE),
210 _ => Err(Error::InvalidData(
211 "RAR 5 unknown compression algorithm version",
212 )),
213 }
214}
215
216pub fn read_table_lengths(input: &[u8], algorithm_version: u8) -> Result<(TableLengths, usize)> {
217 let table_size = table_length_count(algorithm_version)?;
218 let (level_lengths, level_bits) = read_level_lengths(input)?;
219 let level_decoder = HuffmanTable::from_lengths(&level_lengths)?;
220 let mut bits = BitReader::new(input);
221 bits.bit_pos = level_bits;
222
223 let mut lengths = Vec::with_capacity(table_size);
224 while lengths.len() < table_size {
225 let number = level_decoder.decode(&mut bits)?;
226 match number {
227 0..=15 => lengths.push(number as u8),
228 16 | 17 => {
229 if lengths.is_empty() {
230 return Err(Error::InvalidData(
231 "RAR 5 table repeats missing previous length",
232 ));
233 }
234 let count = if number == 16 {
235 3 + bits.read_bits(3)? as usize
236 } else {
237 11 + bits.read_bits(7)? as usize
238 };
239 let previous = *lengths.last().unwrap();
240 for _ in 0..count {
241 if lengths.len() >= table_size {
242 break;
243 }
244 lengths.push(previous);
245 }
246 }
247 18 | 19 => {
248 let count = if number == 18 {
249 3 + bits.read_bits(3)? as usize
250 } else {
251 11 + bits.read_bits(7)? as usize
252 };
253 for _ in 0..count {
254 if lengths.len() >= table_size {
255 break;
256 }
257 lengths.push(0);
258 }
259 }
260 _ => return Err(Error::InvalidData("RAR 5 invalid level-table symbol")),
261 }
262 }
263
264 let distance_size = match algorithm_version {
265 0 => DISTANCE_TABLE_SIZE_50,
266 1 => DISTANCE_TABLE_SIZE_70,
267 _ => unreachable!("validated by table_length_count"),
268 };
269 let distance_start = MAIN_TABLE_SIZE;
270 let align_start = distance_start + distance_size;
271 let length_start = align_start + ALIGN_TABLE_SIZE;
272
273 Ok((
274 TableLengths {
275 main: lengths[..distance_start].to_vec(),
276 distance: lengths[distance_start..align_start].to_vec(),
277 align: lengths[align_start..length_start].to_vec(),
278 length: lengths[length_start..].to_vec(),
279 },
280 bits.bit_pos,
281 ))
282}
283
284pub fn encode_table_lengths(lengths: &TableLengths, algorithm_version: u8) -> Result<Vec<u8>> {
285 encode_table_lengths_with_bit_count(lengths, algorithm_version).map(|(data, _)| data)
286}
287
288pub fn encode_table_lengths_with_bit_count(
289 lengths: &TableLengths,
290 algorithm_version: u8,
291) -> Result<(Vec<u8>, usize)> {
292 let distance_size = match algorithm_version {
293 0 => DISTANCE_TABLE_SIZE_50,
294 1 => DISTANCE_TABLE_SIZE_70,
295 _ => {
296 return Err(Error::InvalidData(
297 "RAR 5 unknown compression algorithm version",
298 ))
299 }
300 };
301 if lengths.main.len() != MAIN_TABLE_SIZE
302 || lengths.distance.len() != distance_size
303 || lengths.align.len() != ALIGN_TABLE_SIZE
304 || lengths.length.len() != LENGTH_TABLE_SIZE
305 {
306 return Err(Error::InvalidData("RAR 5 table length count mismatch"));
307 }
308
309 let flattened = lengths
310 .main
311 .iter()
312 .chain(lengths.distance.iter())
313 .chain(lengths.align.iter())
314 .chain(lengths.length.iter())
315 .copied()
316 .collect::<Vec<_>>();
317 for &length in &flattened {
318 if length > 15 {
319 return Err(Error::InvalidData("RAR 5 Huffman length is too large"));
320 }
321 }
322
323 let level_tokens = encode_table_level_tokens(&flattened);
324 let level_lengths = level_code_lengths_for_tokens(&level_tokens);
325 let level_table = HuffmanTable::from_lengths(&level_lengths)?;
326 let mut writer = BitWriter::new();
327 write_level_lengths(&mut writer, &level_lengths);
328 for token in level_tokens {
329 let (code, len) = level_table.code_for_symbol(token.symbol)?;
330 writer.write_bits(usize::from(code), usize::from(len));
331 if token.extra_bits != 0 {
332 writer.write_bits(
333 usize::from(token.extra_value),
334 usize::from(token.extra_bits),
335 );
336 }
337 }
338 let bit_count = writer.bit_pos;
339 Ok((writer.finish(), bit_count))
340}
341
342pub fn encode_compressed_block(
343 payload: &[u8],
344 payload_bits: usize,
345 has_tables: bool,
346 is_last: bool,
347) -> Result<Vec<u8>> {
348 if payload_bits > payload.len() * 8 {
349 return Err(Error::InvalidData("RAR 5 block bit count exceeds payload"));
350 }
351 if payload.is_empty() && payload_bits != 0 {
352 return Err(Error::InvalidData("RAR 5 empty block has payload bits"));
353 }
354 if !payload.is_empty() && payload_bits <= (payload.len() - 1) * 8 {
355 return Err(Error::InvalidData("RAR 5 block has unused payload bytes"));
356 }
357 if payload.len() > 0x00ff_ffff {
358 return Err(Error::InvalidData("RAR 5 block payload is too large"));
359 }
360
361 let size_len = if payload.len() <= 0xff {
362 1
363 } else if payload.len() <= 0xffff {
364 2
365 } else {
366 3
367 };
368 let final_byte_bits = if payload.is_empty() {
369 1
370 } else {
371 ((payload_bits - 1) % 8) + 1
372 };
373 let mut flags = (final_byte_bits as u8) - 1;
374 flags |= match size_len {
375 1 => 0,
376 2 => 1 << 3,
377 3 => 2 << 3,
378 _ => unreachable!("size_len is constrained above"),
379 };
380 if is_last {
381 flags |= 0x40;
382 }
383 if has_tables {
384 flags |= 0x80;
385 }
386
387 let mut size_bytes = [0u8; 3];
388 let mut size = payload.len();
389 for byte in &mut size_bytes[..size_len] {
390 *byte = size as u8;
391 size >>= 8;
392 }
393 let checksum = size_bytes[..size_len]
394 .iter()
395 .fold(0x5a ^ flags, |acc, &byte| acc ^ byte);
396 let mut out = Vec::with_capacity(2 + size_len + payload.len());
397 out.push(flags);
398 out.push(checksum);
399 out.extend_from_slice(&size_bytes[..size_len]);
400 out.extend_from_slice(payload);
401 Ok(out)
402}
403
404pub fn decode_literal_only(
405 input: &[u8],
406 algorithm_version: u8,
407 output_size: usize,
408) -> Result<Vec<u8>> {
409 let mut decoder = Unpack50Decoder::new();
410 decoder.decode_member(
411 input,
412 algorithm_version,
413 output_size,
414 false,
415 DecodeMode::LiteralOnly,
416 )
417}
418
419pub fn decode_lz(input: &[u8], algorithm_version: u8, output_size: usize) -> Result<Vec<u8>> {
420 let mut decoder = Unpack50Decoder::new();
421 decoder.decode_member(input, algorithm_version, output_size, false, DecodeMode::Lz)
422}
423
424pub fn encode_literal_only(data: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
425 let distance_size = match algorithm_version {
426 0 => DISTANCE_TABLE_SIZE_50,
427 1 => DISTANCE_TABLE_SIZE_70,
428 _ => {
429 return Err(Error::InvalidData(
430 "RAR 5 unknown compression algorithm version",
431 ))
432 }
433 };
434 let mut lengths = TableLengths {
435 main: vec![0; MAIN_TABLE_SIZE],
436 distance: vec![0; distance_size],
437 align: vec![0; ALIGN_TABLE_SIZE],
438 length: vec![0; LENGTH_TABLE_SIZE],
439 };
440 let present = literal_presence(data);
441 let literal_count = present.iter().filter(|&&used| used).count();
442 let literal_length = huffman::bits_for_symbol_count(literal_count);
443 for (symbol, used) in present.into_iter().enumerate() {
444 if used {
445 lengths.main[symbol] = literal_length;
446 }
447 }
448
449 let table = HuffmanTable::from_lengths(&lengths.main)?;
450 let (table_data, table_bits) =
451 encode_table_lengths_with_bit_count(&lengths, algorithm_version)?;
452 let mut writer = BitWriter {
453 bytes: table_data,
454 bit_pos: table_bits,
455 };
456 for &byte in data {
457 let (code, len) = table.code_for_symbol(byte as usize)?;
458 writer.write_bits(usize::from(code), usize::from(len));
459 }
460 let payload_bits = writer.bit_pos;
461 encode_compressed_block(&writer.finish(), payload_bits, true, true)
462}
463
464pub fn encode_lz_member(data: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
465 encode_lz_member_with_history(data, &[], algorithm_version)
466}
467
468#[derive(Debug, Clone, Copy, PartialEq, Eq)]
469#[non_exhaustive]
470pub struct EncodeOptions {
471 pub max_match_candidates: usize,
472 pub lazy_matching: bool,
473 pub lazy_lookahead: usize,
474 pub max_match_distance: usize,
475}
476
477impl EncodeOptions {
478 pub const fn new(max_match_candidates: usize) -> Self {
479 Self {
480 max_match_candidates,
481 lazy_matching: false,
482 lazy_lookahead: 1,
483 max_match_distance: MAX_ENCODER_MATCH_OFFSET,
484 }
485 }
486
487 pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
488 self.lazy_matching = enabled;
489 self
490 }
491
492 pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
493 self.lazy_lookahead = bytes;
494 self
495 }
496
497 pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
498 self.max_match_distance = distance;
499 self
500 }
501}
502
503impl Default for EncodeOptions {
504 fn default() -> Self {
505 Self::new(MAX_MATCH_CANDIDATES)
506 }
507}
508
509pub fn encode_lz_member_with_history(
510 data: &[u8],
511 history: &[u8],
512 algorithm_version: u8,
513) -> Result<Vec<u8>> {
514 encode_lz_member_inner(
515 data,
516 history,
517 algorithm_version,
518 &[],
519 EncodeOptions::default(),
520 )
521}
522
523pub fn encode_lz_member_with_options(
524 data: &[u8],
525 algorithm_version: u8,
526 options: EncodeOptions,
527) -> Result<Vec<u8>> {
528 encode_lz_member_with_history_and_options(data, &[], algorithm_version, options)
529}
530
531pub fn encode_lz_member_with_history_and_options(
532 data: &[u8],
533 history: &[u8],
534 algorithm_version: u8,
535 options: EncodeOptions,
536) -> Result<Vec<u8>> {
537 encode_lz_member_inner(data, history, algorithm_version, &[], options)
538}
539
540#[derive(Debug, Clone, Copy, PartialEq, Eq)]
541pub enum Rar50FilterKind {
542 Delta { channels: usize },
543 E8,
544 E8E9,
545 Arm,
546}
547
548#[derive(Debug, Clone, PartialEq, Eq)]
549pub struct Rar50FilterSpec {
550 pub kind: Rar50FilterKind,
551 pub range: Option<Range<usize>>,
552}
553
554impl Rar50FilterSpec {
555 pub fn new(kind: Rar50FilterKind) -> Self {
556 Self { kind, range: None }
557 }
558
559 pub fn range(kind: Rar50FilterKind, range: Range<usize>) -> Self {
560 Self {
561 kind,
562 range: Some(range),
563 }
564 }
565}
566
567fn filtered_lz_member(
568 data: &[u8],
569 filters: &[Rar50FilterSpec],
570) -> Result<(Vec<u8>, Vec<EncodeFilter>)> {
571 let mut filtered = data.to_vec();
572 let mut records = Vec::with_capacity(filters.len());
573 for filter in filters {
574 let range = filter.range.clone().unwrap_or(0..data.len());
575 if range.start >= range.end || range.end > data.len() {
576 return Err(Error::InvalidData("RAR 5 filter range is invalid"));
577 }
578 if range.start > u32::MAX as usize {
579 return Err(Error::InvalidData("RAR 5 filter offset is too large"));
580 }
581
582 let filter_data = &mut filtered[range.clone()];
583 let (filter_type, channels) = encode_filter_data(filter.kind, filter_data, range.start)?;
584 records.push(EncodeFilter {
585 offset: range.start,
586 length: range.len(),
587 filter_type,
588 channels,
589 });
590 }
591 Ok((filtered, records))
592}
593
594fn encode_filter_data(
595 kind: Rar50FilterKind,
596 data: &mut [u8],
597 file_offset: usize,
598) -> Result<(FilterType, usize)> {
599 if file_offset > u32::MAX as usize {
600 return Err(Error::InvalidData("RAR 5 filter offset is too large"));
601 }
602 match kind {
603 Rar50FilterKind::Delta { channels } => {
604 filters::encode_in_place(
605 FilterOp::Delta { channels },
606 data,
607 0,
608 rar50_delta_messages(),
609 )?;
610 Ok((FilterType::Delta, channels))
611 }
612 Rar50FilterKind::E8 => {
613 e8e9_encode(data, file_offset as u32, false);
614 Ok((FilterType::E8, 0))
615 }
616 Rar50FilterKind::E8E9 => {
617 e8e9_encode(data, file_offset as u32, true);
618 Ok((FilterType::E8E9, 0))
619 }
620 Rar50FilterKind::Arm => {
621 arm_encode(data, file_offset as u32);
622 Ok((FilterType::Arm, 0))
623 }
624 }
625}
626
627fn filtered_lz_blocks(
628 data: &[u8],
629 filters: &[Rar50FilterSpec],
630 history: &[u8],
631 algorithm_version: u8,
632 options: EncodeOptions,
633) -> Result<Vec<u8>> {
634 let filters = normalized_filter_specs(data.len(), filters)?;
635 let mut out = Vec::new();
636 let mut block_history =
637 history[history.len().saturating_sub(options.max_match_distance)..].to_vec();
638 let mut chunk_start = 0usize;
639 while chunk_start < data.len() {
640 let chunk_end = (chunk_start + MAX_FILTER_BLOCK_LENGTH).min(data.len());
641 let mut chunk = data[chunk_start..chunk_end].to_vec();
642 let mut records = Vec::new();
643 for filter in &filters {
644 let start = filter.range.start.max(chunk_start);
645 let end = filter.range.end.min(chunk_end);
646 if start >= end {
647 continue;
648 }
649 let local_start = start - chunk_start;
650 let local_end = end - chunk_start;
651 let (filter_type, channels) =
652 encode_filter_data(filter.kind, &mut chunk[local_start..local_end], start)?;
653 records.push(EncodeFilter {
654 offset: local_start,
655 length: local_end - local_start,
656 filter_type,
657 channels,
658 });
659 }
660 out.extend(encode_lz_block(
661 &chunk,
662 &block_history,
663 algorithm_version,
664 &records,
665 options,
666 chunk_end == data.len(),
667 )?);
668 block_history.extend_from_slice(&chunk);
669 let keep_from = block_history
670 .len()
671 .saturating_sub(options.max_match_distance);
672 if keep_from != 0 {
673 block_history.drain(..keep_from);
674 }
675 chunk_start = chunk_end;
676 }
677 Ok(out)
678}
679
680#[derive(Debug, Clone, PartialEq, Eq)]
681struct NormalizedFilterSpec {
682 kind: Rar50FilterKind,
683 range: Range<usize>,
684}
685
686fn normalized_filter_specs(
687 data_len: usize,
688 filters: &[Rar50FilterSpec],
689) -> Result<Vec<NormalizedFilterSpec>> {
690 let mut normalized = Vec::with_capacity(filters.len());
691 for filter in filters {
692 let range = filter.range.clone().unwrap_or(0..data_len);
693 if range.start >= range.end || range.end > data_len {
694 return Err(Error::InvalidData("RAR 5 filter range is invalid"));
695 }
696 normalized.push(NormalizedFilterSpec {
697 kind: filter.kind,
698 range,
699 });
700 }
701 Ok(normalized)
702}
703
704fn encode_lz_member_inner(
705 data: &[u8],
706 history: &[u8],
707 algorithm_version: u8,
708 initial_filters: &[EncodeFilter],
709 options: EncodeOptions,
710) -> Result<Vec<u8>> {
711 if data.len() > MAX_COMPRESSED_BLOCK_OUTPUT && initial_filters.is_empty() {
712 let mut out = Vec::new();
713 let mut block_history =
714 history[history.len().saturating_sub(options.max_match_distance)..].to_vec();
715 let mut chunks = data.chunks(MAX_COMPRESSED_BLOCK_OUTPUT).peekable();
716 while let Some(chunk) = chunks.next() {
717 let is_last = chunks.peek().is_none();
718 out.extend(encode_lz_block(
719 chunk,
720 &block_history,
721 algorithm_version,
722 &[],
723 options,
724 is_last,
725 )?);
726 block_history.extend_from_slice(chunk);
727 let keep_from = block_history
728 .len()
729 .saturating_sub(options.max_match_distance);
730 if keep_from != 0 {
731 block_history.drain(..keep_from);
732 }
733 }
734 return Ok(out);
735 }
736
737 encode_lz_block(
738 data,
739 history,
740 algorithm_version,
741 initial_filters,
742 options,
743 true,
744 )
745}
746
747fn encode_lz_block(
748 data: &[u8],
749 history: &[u8],
750 algorithm_version: u8,
751 initial_filters: &[EncodeFilter],
752 options: EncodeOptions,
753 is_last: bool,
754) -> Result<Vec<u8>> {
755 let distance_size = match algorithm_version {
756 0 => DISTANCE_TABLE_SIZE_50,
757 1 => DISTANCE_TABLE_SIZE_70,
758 _ => {
759 return Err(Error::InvalidData(
760 "RAR 5 unknown compression algorithm version",
761 ))
762 }
763 };
764 let mut tokens = Vec::new();
765 tokens.extend(initial_filters.iter().copied().map(EncodeToken::Filter));
766 tokens.extend(encode_tokens(data, history, options, distance_size));
767 let mut lengths = TableLengths {
768 main: vec![0; MAIN_TABLE_SIZE],
769 distance: vec![0; distance_size],
770 align: vec![0; ALIGN_TABLE_SIZE],
771 length: vec![0; LENGTH_TABLE_SIZE],
772 };
773
774 let mut main_frequencies = vec![0usize; MAIN_TABLE_SIZE];
775 let mut distance_frequencies = vec![0usize; distance_size];
776 let mut align_frequencies = vec![0usize; ALIGN_TABLE_SIZE];
777 let mut length_frequencies = vec![0usize; LENGTH_TABLE_SIZE];
778 let mut state = EncoderMatchState::default();
779 for token in &tokens {
780 match *token {
781 EncodeToken::Filter(_) => main_frequencies[256] += 1,
782 EncodeToken::Literal(byte) => main_frequencies[byte as usize] += 1,
783 EncodeToken::Match { length, distance } => {
784 match state.encode_match(length, distance, distance_size)? {
785 EncodedMatch::LastLengthRepeat => main_frequencies[257] += 1,
786 EncodedMatch::RepeatDistance {
787 index, length_slot, ..
788 } => {
789 main_frequencies[258 + index] += 1;
790 length_frequencies[length_slot] += 1;
791 }
792 EncodedMatch::New {
793 length_slot,
794 distance_slot,
795 distance_extra,
796 distance_bit_count,
797 ..
798 } => {
799 main_frequencies[262 + length_slot] += 1;
800 distance_frequencies[distance_slot] += 1;
801 if distance_bit_count >= 4 {
802 align_frequencies[distance_extra & 0x0f] += 1;
803 }
804 }
805 }
806 state.remember(length, distance);
807 }
808 }
809 }
810
811 lengths.main = huffman::lengths_for_frequencies(&main_frequencies, 15);
812 lengths.distance = huffman::lengths_for_frequencies(&distance_frequencies, 15);
813 lengths.length = huffman::lengths_for_frequencies(&length_frequencies, 15);
814 lengths.align = huffman::lengths_for_frequencies(&align_frequencies, 15);
815
816 let main_table = HuffmanTable::from_lengths(&lengths.main)?;
817 let distance_table = HuffmanTable::from_lengths(&lengths.distance)?;
818 let align_table = HuffmanTable::from_lengths(&lengths.align)?;
819 let length_table = HuffmanTable::from_lengths(&lengths.length)?;
820 let (table_data, table_bits) =
821 encode_table_lengths_with_bit_count(&lengths, algorithm_version)?;
822 let mut writer = BitWriter {
823 bytes: table_data,
824 bit_pos: table_bits,
825 };
826 let mut state = EncoderMatchState::default();
827 for token in tokens {
828 match token {
829 EncodeToken::Filter(filter) => {
830 let (code, len) = main_table.code_for_symbol(256)?;
831 writer.write_bits(usize::from(code), usize::from(len));
832 write_filter(&mut writer, filter)?;
833 }
834 EncodeToken::Literal(byte) => {
835 let (code, len) = main_table.code_for_symbol(byte as usize)?;
836 writer.write_bits(usize::from(code), usize::from(len));
837 }
838 EncodeToken::Match { length, distance } => {
839 match state.encode_match(length, distance, distance_size)? {
840 EncodedMatch::LastLengthRepeat => {
841 let (code, len) = main_table.code_for_symbol(257)?;
842 writer.write_bits(usize::from(code), usize::from(len));
843 }
844 EncodedMatch::RepeatDistance {
845 index,
846 length_slot,
847 length_extra,
848 } => {
849 let (code, len) = main_table.code_for_symbol(258 + index)?;
850 writer.write_bits(usize::from(code), usize::from(len));
851 let (code, len) = length_table.code_for_symbol(length_slot)?;
852 writer.write_bits(usize::from(code), usize::from(len));
853 let length_extra_bits = length_slot_extra_bits(length_slot)?;
854 if length_extra_bits != 0 {
855 writer.write_bits(length_extra, usize::from(length_extra_bits));
856 }
857 }
858 EncodedMatch::New {
859 length_slot,
860 length_extra,
861 distance_slot,
862 distance_extra,
863 distance_bit_count,
864 } => {
865 let (code, len) = main_table.code_for_symbol(262 + length_slot)?;
866 writer.write_bits(usize::from(code), usize::from(len));
867 let length_extra_bits = length_slot_extra_bits(length_slot)?;
868 if length_extra_bits != 0 {
869 writer.write_bits(length_extra, usize::from(length_extra_bits));
870 }
871 let (code, len) = distance_table.code_for_symbol(distance_slot)?;
872 writer.write_bits(usize::from(code), usize::from(len));
873 if distance_bit_count >= 4 {
874 if distance_bit_count > 4 {
875 writer.write_bits(distance_extra >> 4, distance_bit_count - 4);
876 }
877 let (code, len) = align_table.code_for_symbol(distance_extra & 0x0f)?;
878 writer.write_bits(usize::from(code), usize::from(len));
879 } else if distance_bit_count != 0 {
880 writer.write_bits(distance_extra, distance_bit_count);
881 }
882 }
883 }
884 state.remember(length, distance);
885 }
886 }
887 }
888
889 let payload_bits = writer.bit_pos;
890 encode_compressed_block(&writer.finish(), payload_bits, true, is_last)
891}
892
893#[derive(Debug, Clone, Default)]
894pub struct Unpack50Encoder {
895 history: Vec<u8>,
896 options: EncodeOptions,
897}
898
899impl Unpack50Encoder {
900 pub fn new() -> Self {
901 Self::default()
902 }
903
904 pub fn with_options(options: EncodeOptions) -> Self {
905 Self {
906 history: Vec::new(),
907 options,
908 }
909 }
910
911 pub fn encode_member(&mut self, input: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
912 let packed = encode_lz_member_with_history_and_options(
913 input,
914 &self.history,
915 algorithm_version,
916 self.options,
917 )?;
918 self.remember(input);
919 Ok(packed)
920 }
921
922 pub fn encode_member_with_filter(
923 &mut self,
924 input: &[u8],
925 algorithm_version: u8,
926 filter: Rar50FilterSpec,
927 ) -> Result<Vec<u8>> {
928 self.encode_member_with_filters(input, algorithm_version, &[filter])
929 }
930
931 pub fn encode_member_with_filters(
932 &mut self,
933 input: &[u8],
934 algorithm_version: u8,
935 filters: &[Rar50FilterSpec],
936 ) -> Result<Vec<u8>> {
937 if input.len() > MAX_FILTER_BLOCK_LENGTH {
938 let packed = filtered_lz_blocks(
939 input,
940 filters,
941 &self.history,
942 algorithm_version,
943 self.options,
944 )?;
945 self.remember(input);
946 return Ok(packed);
947 }
948 let (filtered, records) = filtered_lz_member(input, filters)?;
949 let packed = encode_lz_member_inner(
950 &filtered,
951 &self.history,
952 algorithm_version,
953 &records,
954 self.options,
955 )?;
956 self.remember(input);
957 Ok(packed)
958 }
959
960 fn remember(&mut self, input: &[u8]) {
961 self.history.extend_from_slice(input);
962 let keep_from = self
963 .history
964 .len()
965 .saturating_sub(self.options.max_match_distance);
966 if keep_from != 0 {
967 self.history.drain(..keep_from);
968 }
969 }
970}
971
972#[derive(Debug, Clone, Copy)]
973enum EncodeToken {
974 Filter(EncodeFilter),
975 Literal(u8),
976 Match { length: usize, distance: usize },
977}
978
979#[derive(Debug, Clone, Copy)]
980struct EncodeFilter {
981 offset: usize,
982 length: usize,
983 filter_type: FilterType,
984 channels: usize,
985}
986
987#[derive(Debug, Clone, Copy, Default)]
988struct EncoderMatchState {
989 reps: [usize; 4],
990 last_length: usize,
991}
992
993#[derive(Debug, Clone, Copy, PartialEq, Eq)]
994enum EncodedMatch {
995 LastLengthRepeat,
996 RepeatDistance {
997 index: usize,
998 length_slot: usize,
999 length_extra: usize,
1000 },
1001 New {
1002 length_slot: usize,
1003 length_extra: usize,
1004 distance_slot: usize,
1005 distance_extra: usize,
1006 distance_bit_count: usize,
1007 },
1008}
1009
1010impl EncoderMatchState {
1011 fn encode_match(
1012 &self,
1013 length: usize,
1014 distance: usize,
1015 distance_size: usize,
1016 ) -> Result<EncodedMatch> {
1017 if distance == self.reps[0] && length == self.last_length && self.last_length != 0 {
1018 return Ok(EncodedMatch::LastLengthRepeat);
1019 }
1020 if let Some(index) = self
1021 .reps
1022 .iter()
1023 .position(|&repeat_distance| repeat_distance == distance && repeat_distance != 0)
1024 {
1025 let (length_slot, length_extra) = length_slot_for_match(length)?;
1026 return Ok(EncodedMatch::RepeatDistance {
1027 index,
1028 length_slot,
1029 length_extra,
1030 });
1031 }
1032
1033 let (distance_slot, distance_extra) = distance_slot_for_match(distance, distance_size)?;
1034 let encoded_length = length
1035 .checked_sub(length_bonus(distance))
1036 .ok_or(Error::InvalidData("RAR 5 adjusted match length underflows"))?;
1037 let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1038 let (length_slot, length_extra) = length_slot_for_match(encoded_length)?;
1039 Ok(EncodedMatch::New {
1040 length_slot,
1041 length_extra,
1042 distance_slot,
1043 distance_extra,
1044 distance_bit_count,
1045 })
1046 }
1047
1048 fn remember(&mut self, length: usize, distance: usize) {
1049 if distance == self.reps[0] && length == self.last_length {
1050 return;
1051 }
1052 if let Some(index) = self
1053 .reps
1054 .iter()
1055 .position(|&repeat_distance| repeat_distance == distance)
1056 {
1057 self.reps[..=index].rotate_right(1);
1058 } else {
1059 self.reps.rotate_right(1);
1060 }
1061 self.reps[0] = distance;
1062 self.last_length = length;
1063 }
1064}
1065
1066fn encode_tokens(
1067 input: &[u8],
1068 history: &[u8],
1069 options: EncodeOptions,
1070 distance_size: usize,
1071) -> Vec<EncodeToken> {
1072 let mut tokens = Vec::new();
1073 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1074 let history = &history[history.len().saturating_sub(options.max_match_distance)..];
1075 let mut combined = Vec::with_capacity(history.len() + input.len());
1076 combined.extend_from_slice(history);
1077 combined.extend_from_slice(input);
1078 for history_pos in 0..history.len().saturating_sub(2) {
1079 insert_match_position(&combined, history_pos, &mut buckets);
1080 }
1081
1082 let mut pos = history.len();
1083 let end = combined.len();
1084 let mut state = EncoderMatchState::default();
1085 while pos < end {
1086 if let Some(candidate) = best_match(
1087 &combined,
1088 pos,
1089 end,
1090 &buckets,
1091 options,
1092 &state,
1093 distance_size,
1094 ) {
1095 if should_lazy_emit_literal(
1096 &combined,
1097 pos,
1098 &buckets,
1099 options,
1100 &state,
1101 distance_size,
1102 candidate,
1103 ) {
1104 tokens.push(EncodeToken::Literal(combined[pos]));
1105 insert_match_position(&combined, pos, &mut buckets);
1106 pos += 1;
1107 continue;
1108 }
1109 let MatchCandidate {
1110 length, distance, ..
1111 } = candidate;
1112 tokens.push(EncodeToken::Match { length, distance });
1113 state.remember(length, distance);
1114 for history_pos in pos..pos + length {
1115 insert_match_position(&combined, history_pos, &mut buckets);
1116 }
1117 pos += length;
1118 } else {
1119 tokens.push(EncodeToken::Literal(combined[pos]));
1120 insert_match_position(&combined, pos, &mut buckets);
1121 pos += 1;
1122 }
1123 }
1124 tokens
1125}
1126
1127fn should_lazy_emit_literal(
1128 input: &[u8],
1129 pos: usize,
1130 buckets: &[Vec<usize>],
1131 options: EncodeOptions,
1132 state: &EncoderMatchState,
1133 distance_size: usize,
1134 current: MatchCandidate,
1135) -> bool {
1136 let end = input.len();
1137 if !options.lazy_matching || pos + 1 >= end {
1138 return false;
1139 }
1140 let lookahead = options.lazy_lookahead.max(1);
1141 (1..=lookahead)
1142 .take_while(|offset| pos + offset < end)
1143 .any(|offset| {
1144 best_match(
1145 input,
1146 pos + offset,
1147 end,
1148 buckets,
1149 options,
1150 state,
1151 distance_size,
1152 )
1153 .is_some_and(|next| {
1154 let skipped_literal_score = offset as isize * 8;
1155 next.score > current.score + skipped_literal_score
1156 })
1157 })
1158}
1159
1160#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1161struct MatchCandidate {
1162 length: usize,
1163 distance: usize,
1164 score: isize,
1165 cost: usize,
1166}
1167
1168fn best_match(
1169 input: &[u8],
1170 pos: usize,
1171 end: usize,
1172 buckets: &[Vec<usize>],
1173 options: EncodeOptions,
1174 state: &EncoderMatchState,
1175 distance_size: usize,
1176) -> Option<MatchCandidate> {
1177 let max_distance = pos.min(options.max_match_distance);
1178 let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
1179 if options.max_match_candidates == 0
1180 || max_distance == 0
1181 || max_length < 4
1182 || pos + 2 >= input.len()
1183 {
1184 return None;
1185 }
1186 let bucket = &buckets[match_hash(input, pos)];
1187 let mut best = None;
1188 let mut checked = 0usize;
1189 for distance in state.reps {
1190 if distance == 0 || distance > max_distance {
1191 continue;
1192 }
1193 let length = match_length(input, pos, distance, max_length);
1194 consider_match_candidate(&mut best, state, distance_size, length, distance);
1195 }
1196 for &candidate in bucket.iter().rev() {
1197 if candidate >= pos {
1198 continue;
1199 }
1200 let distance = pos - candidate;
1201 if distance > max_distance {
1202 break;
1203 }
1204 checked += 1;
1205 let length = match_length(input, pos, distance, max_length);
1206 consider_match_candidate(&mut best, state, distance_size, length, distance);
1207 if let Some(best) = best {
1208 if best.length == max_length {
1209 break;
1210 }
1211 }
1212 if checked >= options.max_match_candidates {
1213 break;
1214 }
1215 }
1216 best
1217}
1218
1219fn match_length(input: &[u8], pos: usize, distance: usize, max_length: usize) -> usize {
1220 crate::fast::match_length(input, pos, distance, max_length)
1221}
1222
1223fn consider_match_candidate(
1224 best: &mut Option<MatchCandidate>,
1225 state: &EncoderMatchState,
1226 distance_size: usize,
1227 length: usize,
1228 distance: usize,
1229) {
1230 if length < 4 {
1231 return;
1232 }
1233 let Ok(cost) = estimated_match_cost(state, length, distance, distance_size) else {
1234 return;
1235 };
1236 let candidate = MatchCandidate {
1237 length,
1238 distance,
1239 score: (length as isize * 16) - cost as isize,
1240 cost,
1241 };
1242 if best.is_none_or(|best| {
1243 candidate.score > best.score
1244 || (candidate.score == best.score
1245 && (candidate.length > best.length
1246 || (candidate.length == best.length && candidate.cost < best.cost)
1247 || (candidate.length == best.length
1248 && candidate.cost == best.cost
1249 && candidate.distance < best.distance)))
1250 }) {
1251 *best = Some(candidate);
1252 }
1253}
1254
1255fn estimated_match_cost(
1256 state: &EncoderMatchState,
1257 length: usize,
1258 distance: usize,
1259 distance_size: usize,
1260) -> Result<usize> {
1261 if distance == state.reps[0] && length == state.last_length && state.last_length != 0 {
1262 return Ok(2);
1263 }
1264 if state
1265 .reps
1266 .iter()
1267 .any(|&repeat_distance| repeat_distance == distance && repeat_distance != 0)
1268 {
1269 let (length_slot, _) = length_slot_for_match(length)?;
1270 return Ok(5 + usize::from(length_slot_extra_bits(length_slot)?));
1271 }
1272
1273 let (distance_slot, _) = distance_slot_for_match(distance, distance_size)?;
1274 let encoded_length = length
1275 .checked_sub(length_bonus(distance))
1276 .ok_or(Error::InvalidData("RAR 5 adjusted match length underflows"))?;
1277 let (length_slot, _) = length_slot_for_match(encoded_length)?;
1278 Ok(10
1279 + usize::from(length_slot_extra_bits(length_slot)?)
1280 + distance_slot_bit_count(distance_slot)?)
1281}
1282
1283fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
1284 if pos + 2 < input.len() {
1285 buckets[match_hash(input, pos)].push(pos);
1286 }
1287}
1288
1289fn match_hash(input: &[u8], pos: usize) -> usize {
1290 let value =
1291 ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1292 value & (MATCH_HASH_BUCKETS - 1)
1293}
1294
1295fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
1296 if length < 2 {
1297 return Err(Error::InvalidData("RAR 5 match length is too short"));
1298 }
1299 for slot in 0..LENGTH_TABLE_SIZE {
1300 let bit_count = usize::from(length_slot_extra_bits(slot)?);
1301 let base = slot_to_length(slot, 0)?;
1302 let max = base
1303 + if bit_count == 0 {
1304 0
1305 } else {
1306 (1usize << bit_count) - 1
1307 };
1308 if length >= base && length <= max {
1309 return Ok((slot, length - base));
1310 }
1311 }
1312 Err(Error::InvalidData("RAR 5 match length is too long"))
1313}
1314
1315fn distance_slot_for_match(distance: usize, distance_size: usize) -> Result<(usize, usize)> {
1316 if distance == 0 {
1317 return Err(Error::InvalidData("RAR 5 match distance is zero"));
1318 }
1319 for slot in 0..distance_size {
1320 let bit_count = distance_slot_bit_count(slot)?;
1321 let base = slot_to_distance(slot, 0)?;
1322 let max = base
1323 + if bit_count == 0 {
1324 0
1325 } else {
1326 (1usize << bit_count) - 1
1327 };
1328 if distance >= base && distance <= max {
1329 return Ok((slot, distance - base));
1330 }
1331 }
1332 Err(Error::InvalidData("RAR 5 match distance is too large"))
1333}
1334
1335fn literal_presence(data: &[u8]) -> [bool; 256] {
1336 let mut present = [false; 256];
1337 for &byte in data {
1338 present[byte as usize] = true;
1339 }
1340 present
1341}
1342
1343#[derive(Debug, Clone)]
1344pub struct Unpack50Decoder {
1345 tables: Option<DecodeTables>,
1346 reps: [usize; 4],
1347 last_length: usize,
1348 history: Vec<u8>,
1349}
1350
1351impl Unpack50Decoder {
1352 pub fn new() -> Self {
1353 Self {
1354 tables: None,
1355 reps: [0; 4],
1356 last_length: 0,
1357 history: Vec::new(),
1358 }
1359 }
1360
1361 pub fn decode_member(
1362 &mut self,
1363 input: &[u8],
1364 algorithm_version: u8,
1365 output_size: usize,
1366 solid: bool,
1367 mode: DecodeMode,
1368 ) -> Result<Vec<u8>> {
1369 self.decode_member_with_dictionary(
1370 input,
1371 algorithm_version,
1372 output_size,
1373 DEFAULT_DICTIONARY_SIZE,
1374 solid,
1375 mode,
1376 )
1377 }
1378
1379 pub fn decode_member_with_dictionary(
1380 &mut self,
1381 input: &[u8],
1382 algorithm_version: u8,
1383 output_size: usize,
1384 dictionary_size: usize,
1385 solid: bool,
1386 mode: DecodeMode,
1387 ) -> Result<Vec<u8>> {
1388 let mut input = std::io::Cursor::new(input);
1389 self.decode_member_from_reader_with_dictionary(
1390 &mut input,
1391 algorithm_version,
1392 output_size,
1393 dictionary_size,
1394 solid,
1395 mode,
1396 )
1397 }
1398
1399 pub fn decode_member_from_reader(
1400 &mut self,
1401 input: &mut impl Read,
1402 algorithm_version: u8,
1403 output_size: usize,
1404 solid: bool,
1405 mode: DecodeMode,
1406 ) -> Result<Vec<u8>> {
1407 self.decode_member_from_reader_with_dictionary(
1408 input,
1409 algorithm_version,
1410 output_size,
1411 DEFAULT_DICTIONARY_SIZE,
1412 solid,
1413 mode,
1414 )
1415 }
1416
1417 pub fn decode_member_from_reader_with_dictionary(
1418 &mut self,
1419 input: &mut impl Read,
1420 algorithm_version: u8,
1421 output_size: usize,
1422 dictionary_size: usize,
1423 solid: bool,
1424 mode: DecodeMode,
1425 ) -> Result<Vec<u8>> {
1426 if dictionary_size == 0 {
1427 return Err(Error::InvalidData("RAR 5 dictionary size is zero"));
1428 }
1429 if !solid {
1430 self.reset();
1431 }
1432
1433 let mut output = Vec::with_capacity(output_size.min(MAX_INITIAL_OUTPUT_CAPACITY));
1434 let mut filters = Vec::new();
1435
1436 loop {
1437 let block = read_compressed_block(input)?;
1438 let payload = block.payload.as_slice();
1439 let mut payload_bit_pos = 0;
1440 if block.header.has_tables {
1441 let (lengths, table_bits) = read_table_lengths(payload, algorithm_version)?;
1442 self.tables = Some(DecodeTables::from_lengths(&lengths)?);
1443 payload_bit_pos = table_bits;
1444 }
1445 let tables = self
1446 .tables
1447 .take()
1448 .ok_or(Error::InvalidData("RAR 5 block reuses missing tables"))?;
1449 let mut bits = BitReader::new(payload);
1450 bits.bit_pos = payload_bit_pos;
1451
1452 while bits.bit_pos < block.header.payload_bits && output.len() < output_size {
1453 let symbol = tables.main.decode(&mut bits)?;
1454 match symbol {
1455 0..=255 => output.push(symbol as u8),
1456 256 if mode.uses_lz() => {
1457 filters.push(read_filter(&mut bits, output.len())?);
1458 }
1459 257 if mode.uses_lz() => {
1460 if self.last_length != 0 {
1461 self.copy_match(
1462 &mut output,
1463 self.reps[0],
1464 self.last_length,
1465 output_size,
1466 dictionary_size,
1467 )?;
1468 }
1469 }
1470 258..=261 if mode.uses_lz() => {
1471 let rep_index = symbol - 258;
1472 let distance = self.reps[rep_index];
1473 if distance == 0 {
1474 return Err(Error::InvalidData(
1475 "RAR 5 repeat distance is not initialized",
1476 ));
1477 }
1478 let length_slot = tables.length.decode(&mut bits)?;
1479 let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1480 let length = slot_to_length(length_slot, length_extra)?;
1481 self.reps[..=rep_index].rotate_right(1);
1482 self.reps[0] = distance;
1483 self.last_length = length;
1484 self.copy_match(
1485 &mut output,
1486 distance,
1487 length,
1488 output_size,
1489 dictionary_size,
1490 )?;
1491 }
1492 262.. if mode.uses_lz() => {
1493 let length_slot = symbol - 262;
1494 let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1495 let mut length = slot_to_length(length_slot, length_extra)?;
1496 let distance_slot = tables.distance.decode(&mut bits)?;
1497 let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1498 let distance_extra = if distance_bit_count >= 4 && tables.align_mode {
1499 let high = bits.read_bits((distance_bit_count - 4) as u8)?;
1500 let low = tables.align.decode(&mut bits)? as u32;
1501 (high << 4) | low
1502 } else {
1503 bits.read_bits(distance_bit_count as u8)?
1504 };
1505 let distance = slot_to_distance(distance_slot, distance_extra)?;
1506 length += length_bonus(distance);
1507 self.reps.rotate_right(1);
1508 self.reps[0] = distance;
1509 self.last_length = length;
1510 self.copy_match(
1511 &mut output,
1512 distance,
1513 length,
1514 output_size,
1515 dictionary_size,
1516 )?;
1517 }
1518 _ if mode == DecodeMode::LiteralOnly => {
1519 return Err(Error::InvalidData(
1520 "RAR 5 literal-only decoder encountered non-literal symbol",
1521 ));
1522 }
1523 _ => {
1524 return Err(Error::InvalidData(
1525 "RAR 5 decoder encountered unsupported control symbol",
1526 ));
1527 }
1528 }
1529 }
1530
1531 self.tables = Some(tables);
1532 if block.header.is_last || output.len() >= output_size {
1533 break;
1534 }
1535 }
1536
1537 if output.len() == output_size {
1538 let history_output = if mode.applies_filters() && !filters.is_empty() {
1539 Some(output.clone())
1540 } else {
1541 None
1542 };
1543 if mode.applies_filters() {
1544 apply_filters(&mut output, &filters)?;
1545 }
1546 self.history
1547 .extend_from_slice(history_output.as_deref().unwrap_or(&output));
1548 if self.history.len() > dictionary_size {
1549 let discard = self.history.len() - dictionary_size;
1550 self.history.drain(..discard);
1551 }
1552 Ok(output)
1553 } else {
1554 Err(Error::NeedMoreInput)
1555 }
1556 }
1557
1558 pub fn decode_member_from_reader_with_dictionary_to_sink<E>(
1559 &mut self,
1560 input: &mut impl Read,
1561 algorithm_version: u8,
1562 output_size: usize,
1563 dictionary_size: usize,
1564 solid: bool,
1565 mut sink: impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1566 ) -> std::result::Result<(), StreamDecodeError<E>> {
1567 if dictionary_size == 0 {
1568 return Err(Error::InvalidData("RAR 5 dictionary size is zero").into());
1569 }
1570 if !solid {
1571 self.reset();
1572 }
1573
1574 let history_limit = dictionary_size.min(STREAM_HISTORY_LIMIT);
1575 if self.history.len() > history_limit {
1576 let discard = self.history.len() - history_limit;
1577 self.history.drain(..discard);
1578 }
1579 let mut output = StreamingOutput::new(
1580 std::mem::take(&mut self.history),
1581 output_size,
1582 dictionary_size,
1583 history_limit,
1584 );
1585
1586 loop {
1587 let block = read_compressed_block(input)?;
1588 let payload = block.payload.as_slice();
1589 let mut payload_bit_pos = 0;
1590 if block.header.has_tables {
1591 let (lengths, table_bits) = read_table_lengths(payload, algorithm_version)?;
1592 self.tables = Some(DecodeTables::from_lengths(&lengths)?);
1593 payload_bit_pos = table_bits;
1594 }
1595 let tables = self
1596 .tables
1597 .take()
1598 .ok_or(Error::InvalidData("RAR 5 block reuses missing tables"))?;
1599 let mut bits = BitReader::new(payload);
1600 bits.bit_pos = payload_bit_pos;
1601
1602 while bits.bit_pos < block.header.payload_bits && output.written() < output_size {
1603 let symbol = tables.main.decode(&mut bits)?;
1604 match symbol {
1605 0..=255 => output.push(symbol as u8, &mut sink)?,
1606 256 => {
1607 return Err(StreamDecodeError::FilteredMember);
1608 }
1609 257 => {
1610 if self.last_length != 0 {
1611 output.copy_match(self.reps[0], self.last_length, &mut sink)?;
1612 }
1613 }
1614 258..=261 => {
1615 let rep_index = symbol - 258;
1616 let distance = self.reps[rep_index];
1617 if distance == 0 {
1618 return Err(Error::InvalidData(
1619 "RAR 5 repeat distance is not initialized",
1620 )
1621 .into());
1622 }
1623 let length_slot = tables.length.decode(&mut bits)?;
1624 let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1625 let length = slot_to_length(length_slot, length_extra)?;
1626 self.reps[..=rep_index].rotate_right(1);
1627 self.reps[0] = distance;
1628 self.last_length = length;
1629 output.copy_match(distance, length, &mut sink)?;
1630 }
1631 262.. => {
1632 let length_slot = symbol - 262;
1633 let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1634 let mut length = slot_to_length(length_slot, length_extra)?;
1635 let distance_slot = tables.distance.decode(&mut bits)?;
1636 let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1637 let distance_extra = if distance_bit_count >= 4 && tables.align_mode {
1638 let high = bits.read_bits((distance_bit_count - 4) as u8)?;
1639 let low = tables.align.decode(&mut bits)? as u32;
1640 (high << 4) | low
1641 } else {
1642 bits.read_bits(distance_bit_count as u8)?
1643 };
1644 let distance = slot_to_distance(distance_slot, distance_extra)?;
1645 length += length_bonus(distance);
1646 self.reps.rotate_right(1);
1647 self.reps[0] = distance;
1648 self.last_length = length;
1649 output.copy_match(distance, length, &mut sink)?;
1650 }
1651 }
1652 }
1653
1654 self.tables = Some(tables);
1655 if block.header.is_last || output.written() >= output_size {
1656 break;
1657 }
1658 }
1659
1660 if output.written() == output_size {
1661 output.finish(&mut sink)?;
1662 self.history = output.into_history();
1663 Ok(())
1664 } else {
1665 Err(Error::NeedMoreInput.into())
1666 }
1667 }
1668
1669 fn reset(&mut self) {
1670 self.tables = None;
1671 self.reps = [0; 4];
1672 self.last_length = 0;
1673 self.history.clear();
1674 }
1675
1676 fn copy_match(
1677 &self,
1678 output: &mut Vec<u8>,
1679 distance: usize,
1680 length: usize,
1681 output_limit: usize,
1682 dictionary_size: usize,
1683 ) -> Result<()> {
1684 if distance > dictionary_size {
1685 return Err(Error::InvalidData(
1686 "RAR 5 match distance exceeds dictionary",
1687 ));
1688 }
1689 if distance == 0 || distance > self.history.len() + output.len() {
1690 return Err(Error::InvalidData("RAR 5 match distance exceeds window"));
1691 }
1692 if output
1693 .len()
1694 .checked_add(length)
1695 .is_none_or(|end| end > output_limit)
1696 {
1697 return Err(Error::InvalidData("RAR 5 match exceeds output limit"));
1698 }
1699 for _ in 0..length {
1700 if distance <= output.len() {
1701 let index = output.len() - distance;
1702 output.push(output[index]);
1703 } else {
1704 let history_distance = distance - output.len();
1705 let index = self.history.len() - history_distance;
1706 output.push(self.history[index]);
1707 }
1708 }
1709 Ok(())
1710 }
1711}
1712
1713struct StreamingOutput {
1714 history: VecDeque<u8>,
1715 pending: Vec<u8>,
1716 written: usize,
1717 output_limit: usize,
1718 dictionary_size: usize,
1719 history_limit: usize,
1720 all_zero: bool,
1721}
1722
1723impl StreamingOutput {
1724 fn new(
1725 history: Vec<u8>,
1726 output_limit: usize,
1727 dictionary_size: usize,
1728 history_limit: usize,
1729 ) -> Self {
1730 Self {
1731 all_zero: history.iter().all(|&byte| byte == 0),
1732 history: history.into(),
1733 pending: Vec::with_capacity(STREAM_FLUSH_THRESHOLD),
1734 written: 0,
1735 output_limit,
1736 dictionary_size,
1737 history_limit,
1738 }
1739 }
1740
1741 fn written(&self) -> usize {
1742 self.written
1743 }
1744
1745 fn push<E>(
1746 &mut self,
1747 byte: u8,
1748 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1749 ) -> std::result::Result<(), StreamDecodeError<E>> {
1750 if self.written >= self.output_limit {
1751 return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1752 }
1753 if byte != 0 {
1754 self.all_zero = false;
1755 }
1756 self.pending.push(byte);
1757 self.written += 1;
1758 if self.pending.len() >= STREAM_FLUSH_THRESHOLD {
1759 self.flush(sink)?;
1760 }
1761 Ok(())
1762 }
1763
1764 fn push_repeated<E>(
1765 &mut self,
1766 byte: u8,
1767 mut count: usize,
1768 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1769 ) -> std::result::Result<(), StreamDecodeError<E>> {
1770 if self
1771 .written
1772 .checked_add(count)
1773 .is_none_or(|end| end > self.output_limit)
1774 {
1775 return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1776 }
1777 if byte != 0 {
1778 self.all_zero = false;
1779 }
1780 while count > 0 {
1781 let available = STREAM_FLUSH_THRESHOLD - self.pending.len();
1782 let take = count.min(available.max(1));
1783 let old_len = self.pending.len();
1784 self.pending.resize(old_len + take, byte);
1785 self.written += take;
1786 count -= take;
1787 if self.pending.len() >= STREAM_FLUSH_THRESHOLD {
1788 self.flush(sink)?;
1789 }
1790 }
1791 Ok(())
1792 }
1793
1794 fn push_zeroes<E>(
1795 &mut self,
1796 count: usize,
1797 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1798 ) -> std::result::Result<(), StreamDecodeError<E>> {
1799 if self
1800 .written
1801 .checked_add(count)
1802 .is_none_or(|end| end > self.output_limit)
1803 {
1804 return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1805 }
1806 self.flush(sink)?;
1807 sink(DecodedChunk::Repeated {
1808 byte: 0,
1809 len: count,
1810 })
1811 .map_err(StreamDecodeError::Sink)?;
1812 self.written += count;
1813 if self.history.is_empty() && self.history_limit != 0 {
1814 self.history.push_back(0);
1815 }
1816 Ok(())
1817 }
1818
1819 fn copy_match<E>(
1820 &mut self,
1821 distance: usize,
1822 length: usize,
1823 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1824 ) -> std::result::Result<(), StreamDecodeError<E>> {
1825 if distance > self.dictionary_size {
1826 return Err(Error::InvalidData("RAR 5 match distance exceeds dictionary").into());
1827 }
1828 if self.all_zero && distance <= self.written + self.history.len() {
1829 return self.push_zeroes(length, sink);
1830 }
1831 if distance == 0 || distance > self.history.len() + self.pending.len() {
1832 return Err(Error::InvalidData("RAR 5 match distance exceeds window").into());
1833 }
1834 if self
1835 .written
1836 .checked_add(length)
1837 .is_none_or(|end| end > self.output_limit)
1838 {
1839 return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1840 }
1841 if distance == 1 {
1842 let byte = self.byte_at_distance(1)?;
1843 return self.push_repeated(byte, length, sink);
1844 }
1845 for _ in 0..length {
1846 let byte = self.byte_at_distance(distance)?;
1847 self.push(byte, sink)?;
1848 }
1849 Ok(())
1850 }
1851
1852 fn byte_at_distance(&self, distance: usize) -> Result<u8> {
1853 if distance <= self.pending.len() {
1854 Ok(self.pending[self.pending.len() - distance])
1855 } else {
1856 let history_distance = distance - self.pending.len();
1857 if history_distance > self.history.len() {
1858 return Err(Error::InvalidData("RAR 5 match distance exceeds window"));
1859 }
1860 Ok(*self
1861 .history
1862 .get(self.history.len() - history_distance)
1863 .ok_or(Error::InvalidData("RAR 5 match distance exceeds window"))?)
1864 }
1865 }
1866
1867 fn flush<E>(
1868 &mut self,
1869 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1870 ) -> std::result::Result<(), StreamDecodeError<E>> {
1871 if self.pending.is_empty() {
1872 return Ok(());
1873 }
1874 sink(DecodedChunk::Bytes(&self.pending)).map_err(StreamDecodeError::Sink)?;
1875 self.history.extend(self.pending.iter().copied());
1876 self.pending.clear();
1877 while self.history.len() > self.history_limit {
1878 self.history.pop_front();
1879 }
1880 Ok(())
1881 }
1882
1883 fn finish<E>(
1884 &mut self,
1885 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1886 ) -> std::result::Result<(), StreamDecodeError<E>> {
1887 self.flush(sink)
1888 }
1889
1890 fn into_history(self) -> Vec<u8> {
1891 self.history.into()
1892 }
1893}
1894
1895fn read_compressed_block(input: &mut impl Read) -> Result<OwnedCompressedBlock> {
1896 let mut fixed = [0u8; 2];
1897 input
1898 .read_exact(&mut fixed)
1899 .map_err(|_| Error::NeedMoreInput)?;
1900 let flags = fixed[0];
1901 let checksum = fixed[1];
1902 let size_bytes_len = match (flags >> 3) & 0x03 {
1903 0 => 1,
1904 1 => 2,
1905 2 => 3,
1906 _ => return Err(Error::InvalidData("RAR 5 block size length is invalid")),
1907 };
1908 let mut size_bytes = [0u8; 3];
1909 input
1910 .read_exact(&mut size_bytes[..size_bytes_len])
1911 .map_err(|_| Error::NeedMoreInput)?;
1912
1913 let actual = size_bytes[..size_bytes_len]
1914 .iter()
1915 .fold(checksum ^ flags, |acc, &byte| acc ^ byte);
1916 if actual != 0x5a {
1917 return Err(Error::InvalidData("RAR 5 block header checksum mismatch"));
1918 }
1919
1920 let payload_size = size_bytes[..size_bytes_len]
1921 .iter()
1922 .enumerate()
1923 .fold(0usize, |acc, (index, &byte)| {
1924 acc | (usize::from(byte) << (index * 8))
1925 });
1926 let mut payload = vec![0; payload_size];
1927 input
1928 .read_exact(&mut payload)
1929 .map_err(|_| Error::NeedMoreInput)?;
1930 let final_byte_bits = ((flags & 0x07) + 1).min(8);
1931 let payload_bits = if payload_size == 0 {
1932 0
1933 } else {
1934 (payload_size - 1) * 8 + usize::from(final_byte_bits)
1935 };
1936
1937 Ok(OwnedCompressedBlock {
1938 header: CompressedBlockHeader {
1939 flags,
1940 is_last: flags & 0x40 != 0,
1941 has_tables: flags & 0x80 != 0,
1942 final_byte_bits,
1943 payload_size,
1944 payload_bits,
1945 },
1946 payload,
1947 })
1948}
1949
1950impl Default for Unpack50Decoder {
1951 fn default() -> Self {
1952 Self::new()
1953 }
1954}
1955
1956#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1957struct PendingFilter {
1958 start: usize,
1959 length: usize,
1960 filter_type: FilterType,
1961 channels: usize,
1962}
1963
1964#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1965enum FilterType {
1966 Delta,
1967 E8,
1968 E8E9,
1969 Arm,
1970}
1971
1972fn read_filter(bits: &mut BitReader<'_>, current_pos: usize) -> Result<PendingFilter> {
1973 let offset = read_filter_data(bits)? as usize;
1974 let length = read_filter_data(bits)? as usize;
1975 let filter_type = match bits.read_bits(3)? {
1976 0 => FilterType::Delta,
1977 1 => FilterType::E8,
1978 2 => FilterType::E8E9,
1979 3 => FilterType::Arm,
1980 _ => return Err(Error::InvalidData("RAR 5 filter type is unsupported")),
1981 };
1982 let channels = if filter_type == FilterType::Delta {
1983 bits.read_bits(5)? as usize + 1
1984 } else {
1985 0
1986 };
1987 Ok(PendingFilter {
1988 start: current_pos
1989 .checked_add(offset)
1990 .ok_or(Error::InvalidData("RAR 5 filter start overflows"))?,
1991 length,
1992 filter_type,
1993 channels,
1994 })
1995}
1996
1997fn read_filter_data(bits: &mut BitReader<'_>) -> Result<u32> {
1998 let byte_count = bits.read_bits(2)? as usize + 1;
1999 let mut data = 0;
2000 for index in 0..byte_count {
2001 data |= bits.read_bits(8)? << (index * 8);
2002 }
2003 Ok(data)
2004}
2005
2006fn write_filter(writer: &mut BitWriter, filter: EncodeFilter) -> Result<()> {
2007 if filter.offset > u32::MAX as usize {
2008 return Err(Error::InvalidData("RAR 5 filter offset is too large"));
2009 }
2010 if filter.length > u32::MAX as usize {
2011 return Err(Error::InvalidData("RAR 5 filter length is too large"));
2012 }
2013 write_filter_data(writer, filter.offset as u32);
2014 write_filter_data(writer, filter.length as u32);
2015 match filter.filter_type {
2016 FilterType::Delta => {
2017 if filter.channels == 0 || filter.channels > 32 {
2018 return Err(Error::InvalidData(
2019 "RAR 5 DELTA filter channel count is invalid",
2020 ));
2021 }
2022 writer.write_bits(0, 3);
2023 writer.write_bits(filter.channels - 1, 5);
2024 }
2025 FilterType::E8 => writer.write_bits(1, 3),
2026 FilterType::E8E9 => writer.write_bits(2, 3),
2027 FilterType::Arm => writer.write_bits(3, 3),
2028 }
2029 Ok(())
2030}
2031
2032fn write_filter_data(writer: &mut BitWriter, value: u32) {
2033 let byte_count = if value <= 0xff {
2034 1
2035 } else if value <= 0xffff {
2036 2
2037 } else if value <= 0x00ff_ffff {
2038 3
2039 } else {
2040 4
2041 };
2042 writer.write_bits(byte_count - 1, 2);
2043 for index in 0..byte_count {
2044 writer.write_bits(((value >> (index * 8)) & 0xff) as usize, 8);
2045 }
2046}
2047
2048fn apply_filters(output: &mut [u8], filters: &[PendingFilter]) -> Result<()> {
2049 for filter in filters {
2050 let end = filter
2051 .start
2052 .checked_add(filter.length)
2053 .ok_or(Error::InvalidData("RAR 5 filter range overflows"))?;
2054 let data = output
2055 .get_mut(filter.start..end)
2056 .ok_or(Error::InvalidData("RAR 5 filter range exceeds output"))?;
2057 match filter.filter_type {
2058 FilterType::Delta => {
2059 let decoded = filters::delta_decode(data, filter.channels, rar50_delta_messages())?;
2060 data.copy_from_slice(&decoded);
2061 }
2062 FilterType::E8 => e8e9_decode(data, filter.start as u32, false),
2063 FilterType::E8E9 => e8e9_decode(data, filter.start as u32, true),
2064 FilterType::Arm => arm_decode(data, filter.start as u32),
2065 }
2066 }
2067 Ok(())
2068}
2069
2070fn rar50_delta_messages() -> DeltaErrorMessages {
2071 DeltaErrorMessages {
2072 invalid_channels: "RAR 5 DELTA filter channel count is invalid",
2073 zero_channels: "RAR 5 DELTA filter has zero channels",
2074 truncated_source: "RAR 5 DELTA filter source is truncated",
2075 }
2076}
2077
2078fn e8e9_decode(data: &mut [u8], file_offset: u32, include_e9: bool) {
2079 if data.len() <= 4 {
2080 return;
2081 }
2082 let cmp_mask = if include_e9 { 0xfe } else { 0xff };
2083 let opcode_limit = data.len() - 4;
2084 let mut opcode_pos = 0usize;
2085 while let Some(pos) = crate::fast::next_x86_opcode(data, opcode_pos, opcode_limit, cmp_mask) {
2086 let cur_pos = pos + 1;
2087 let offset = file_offset.wrapping_add(cur_pos as u32) % X86_FILTER_FILE_SIZE;
2088 let addr = u32::from_le_bytes([
2089 data[cur_pos],
2090 data[cur_pos + 1],
2091 data[cur_pos + 2],
2092 data[cur_pos + 3],
2093 ]);
2094 let new_addr = if addr & 0x8000_0000 != 0 {
2095 (addr.wrapping_add(offset) & 0x8000_0000 == 0)
2096 .then(|| addr.wrapping_add(X86_FILTER_FILE_SIZE))
2097 } else {
2098 (addr.wrapping_sub(X86_FILTER_FILE_SIZE) & 0x8000_0000 != 0)
2099 .then(|| addr.wrapping_sub(offset))
2100 };
2101 if let Some(value) = new_addr {
2102 data[cur_pos..cur_pos + 4].copy_from_slice(&value.to_le_bytes());
2103 }
2104 opcode_pos = pos + 5;
2105 }
2106}
2107
2108fn e8e9_encode(data: &mut [u8], file_offset: u32, include_e9: bool) {
2109 if data.len() <= 4 {
2110 return;
2111 }
2112 let cmp_mask = if include_e9 { 0xfe } else { 0xff };
2113 let opcode_limit = data.len() - 4;
2114 let mut opcode_pos = 0usize;
2115 while let Some(pos) = crate::fast::next_x86_opcode(data, opcode_pos, opcode_limit, cmp_mask) {
2116 let cur_pos = pos + 1;
2117 let offset = file_offset.wrapping_add(cur_pos as u32) % X86_FILTER_FILE_SIZE;
2118 let addr = u32::from_le_bytes([
2119 data[cur_pos],
2120 data[cur_pos + 1],
2121 data[cur_pos + 2],
2122 data[cur_pos + 3],
2123 ]);
2124 let candidate = addr.wrapping_add(offset);
2125 let new_addr = if candidate < X86_FILTER_FILE_SIZE {
2126 Some(candidate)
2127 } else {
2128 let candidate = addr.wrapping_sub(X86_FILTER_FILE_SIZE);
2129 (candidate & 0x8000_0000 != 0 && candidate.wrapping_add(offset) & 0x8000_0000 == 0)
2130 .then_some(candidate)
2131 };
2132 if let Some(value) = new_addr {
2133 data[cur_pos..cur_pos + 4].copy_from_slice(&value.to_le_bytes());
2134 }
2135 opcode_pos = pos + 5;
2136 }
2137}
2138
2139const X86_FILTER_FILE_SIZE: u32 = 0x0100_0000;
2140
2141fn arm_decode(data: &mut [u8], file_offset: u32) {
2142 let mut pos = 0usize;
2143 while pos + 3 < data.len() {
2144 if data[pos + 3] == 0xeb {
2145 let mut offset = u32::from(data[pos])
2146 | (u32::from(data[pos + 1]) << 8)
2147 | (u32::from(data[pos + 2]) << 16);
2148 offset = offset.wrapping_sub(file_offset.wrapping_add(pos as u32) / 4);
2149 data[pos] = offset as u8;
2150 data[pos + 1] = (offset >> 8) as u8;
2151 data[pos + 2] = (offset >> 16) as u8;
2152 }
2153 pos += 4;
2154 }
2155}
2156
2157fn arm_encode(data: &mut [u8], file_offset: u32) {
2158 let mut pos = 0usize;
2159 while pos + 3 < data.len() {
2160 if data[pos + 3] == 0xeb {
2161 let mut offset = u32::from(data[pos])
2162 | (u32::from(data[pos + 1]) << 8)
2163 | (u32::from(data[pos + 2]) << 16);
2164 offset = offset.wrapping_add(file_offset.wrapping_add(pos as u32) / 4);
2165 data[pos] = offset as u8;
2166 data[pos + 1] = (offset >> 8) as u8;
2167 data[pos + 2] = (offset >> 16) as u8;
2168 }
2169 pos += 4;
2170 }
2171}
2172
2173fn length_slot_extra_bits(slot: usize) -> Result<u8> {
2174 if slot < 8 {
2175 Ok(0)
2176 } else {
2177 let bit_count = (slot >> 2) - 1;
2178 if bit_count > 24 {
2179 Err(Error::InvalidData("RAR 5 length slot is too large"))
2180 } else {
2181 Ok(bit_count as u8)
2182 }
2183 }
2184}
2185
2186fn length_bonus(distance: usize) -> usize {
2187 usize::from(distance > 0x100) + usize::from(distance > 0x2000) + usize::from(distance > 0x40000)
2188}
2189
2190pub fn slot_to_length(slot: usize, extra_bits: u32) -> Result<usize> {
2191 if slot < 8 {
2192 return Ok(slot + 2);
2193 }
2194 let bit_count = (slot >> 2) - 1;
2195 if bit_count > 24 {
2196 return Err(Error::InvalidData("RAR 5 length slot is too large"));
2197 }
2198 let max_extra = if bit_count == 32 {
2199 u32::MAX
2200 } else {
2201 (1u32 << bit_count) - 1
2202 };
2203 if extra_bits > max_extra {
2204 return Err(Error::InvalidData("RAR 5 length extra bits exceed slot"));
2205 }
2206 Ok((((4 | (slot & 3)) << bit_count) | extra_bits as usize) + 2)
2207}
2208
2209pub fn distance_slot_bit_count(slot: usize) -> Result<usize> {
2210 if slot < 4 {
2211 Ok(0)
2212 } else {
2213 let bit_count = (slot - 2) >> 1;
2214 if bit_count > 31 {
2215 Err(Error::InvalidData("RAR 5 distance slot is too large"))
2216 } else {
2217 Ok(bit_count)
2218 }
2219 }
2220}
2221
2222pub fn slot_to_distance(slot: usize, extra_bits: u32) -> Result<usize> {
2223 if slot < 4 {
2224 return Ok(slot + 1);
2225 }
2226 let bit_count = distance_slot_bit_count(slot)?;
2227 let max_extra = if bit_count == 32 {
2228 u32::MAX
2229 } else {
2230 (1u32 << bit_count) - 1
2231 };
2232 if extra_bits > max_extra {
2233 return Err(Error::InvalidData("RAR 5 distance extra bits exceed slot"));
2234 }
2235 Ok((((2 | (slot & 1)) << bit_count) | extra_bits as usize) + 1)
2236}
2237
2238#[derive(Debug, Clone)]
2239pub struct HuffmanTable {
2240 symbols: Vec<HuffmanSymbol>,
2241 first_code: [u16; 16],
2242 first_index: [usize; 16],
2243 counts: [u16; 16],
2244}
2245
2246#[derive(Debug, Clone)]
2247struct HuffmanSymbol {
2248 code: u16,
2249 len: u8,
2250 symbol: usize,
2251}
2252
2253impl HuffmanTable {
2254 pub fn from_lengths(lengths: &[u8]) -> Result<Self> {
2255 let mut count = [0u16; 16];
2256 for &length in lengths {
2257 if length > 15 {
2258 return Err(Error::InvalidData("RAR 5 Huffman length is too large"));
2259 }
2260 if length != 0 {
2261 count[length as usize] += 1;
2262 }
2263 }
2264 validate_huffman_counts(&count)?;
2265
2266 let mut first_code = [0u16; 16];
2267 let mut next_code = [0u16; 16];
2268 let mut code = 0u16;
2269 for length in 1..=15 {
2270 code = (code + count[length - 1]) << 1;
2271 first_code[length] = code;
2272 next_code[length] = code;
2273 }
2274
2275 let mut first_index = [0usize; 16];
2276 let mut index = 0usize;
2277 for length in 1..=15 {
2278 first_index[length] = index;
2279 index += usize::from(count[length]);
2280 }
2281
2282 let mut symbols = Vec::new();
2283 for (symbol, &length) in lengths.iter().enumerate() {
2284 if length == 0 {
2285 continue;
2286 }
2287 let code = next_code[length as usize];
2288 next_code[length as usize] += 1;
2289 symbols.push(HuffmanSymbol {
2290 code,
2291 len: length,
2292 symbol,
2293 });
2294 }
2295 symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
2296 Ok(Self {
2297 symbols,
2298 first_code,
2299 first_index,
2300 counts: count,
2301 })
2302 }
2303
2304 pub fn is_empty(&self) -> bool {
2305 self.symbols.is_empty()
2306 }
2307
2308 fn decode(&self, bits: &mut BitReader<'_>) -> Result<usize> {
2309 if self.symbols.is_empty() {
2310 return Err(Error::InvalidData("RAR 5 empty Huffman table"));
2311 }
2312 let mut code = 0u16;
2313 for len in 1..=15 {
2314 code = (code << 1) | bits.read_bits(1)? as u16;
2315 let count = self.counts[len];
2316 if count != 0 {
2317 let first = self.first_code[len];
2318 let offset = code.wrapping_sub(first);
2319 if offset < count {
2320 let index = self.first_index[len] + usize::from(offset);
2321 return Ok(self.symbols[index].symbol);
2322 }
2323 }
2324 }
2325 Err(Error::InvalidData("RAR 5 invalid Huffman code"))
2326 }
2327
2328 fn code_for_symbol(&self, symbol: usize) -> Result<(u16, u8)> {
2329 self.symbols
2330 .iter()
2331 .find(|item| item.symbol == symbol)
2332 .map(|item| (item.code, item.len))
2333 .ok_or(Error::InvalidData("RAR 5 missing Huffman symbol"))
2334 }
2335}
2336
2337struct BitReader<'a> {
2338 input: &'a [u8],
2339 bit_pos: usize,
2340}
2341
2342impl<'a> BitReader<'a> {
2343 fn new(input: &'a [u8]) -> Self {
2344 Self { input, bit_pos: 0 }
2345 }
2346
2347 fn read_bits(&mut self, count: u8) -> Result<u32> {
2348 if count > 32 {
2349 return Err(Error::InvalidData("RAR 5 bit read is too wide"));
2350 }
2351 let end = self
2352 .bit_pos
2353 .checked_add(usize::from(count))
2354 .ok_or(Error::NeedMoreInput)?;
2355 if end > self.input.len() * 8 {
2356 return Err(Error::NeedMoreInput);
2357 }
2358
2359 let mut value = 0u32;
2360 let mut remaining = usize::from(count);
2361 while remaining != 0 {
2362 let byte = self.input[self.bit_pos / 8];
2363 let bit_offset = self.bit_pos % 8;
2364 let available = 8 - bit_offset;
2365 let take = available.min(remaining);
2366 let shift = available - take;
2367 let mask = ((1u16 << take) - 1) as u8;
2368 let chunk = (byte >> shift) & mask;
2369 value = (value << take) | u32::from(chunk);
2370 self.bit_pos += take;
2371 remaining -= take;
2372 }
2373
2374 Ok(value)
2375 }
2376}
2377
2378struct BitWriter {
2379 bytes: Vec<u8>,
2380 bit_pos: usize,
2381}
2382
2383impl BitWriter {
2384 fn new() -> Self {
2385 Self {
2386 bytes: Vec::new(),
2387 bit_pos: 0,
2388 }
2389 }
2390
2391 fn write_bits(&mut self, value: usize, count: usize) {
2392 for bit in (0..count).rev() {
2393 if self.bit_pos.is_multiple_of(8) {
2394 self.bytes.push(0);
2395 }
2396 if (value >> bit) & 1 != 0 {
2397 let byte = self.bytes.last_mut().unwrap();
2398 *byte |= 1 << (7 - (self.bit_pos % 8));
2399 }
2400 self.bit_pos += 1;
2401 }
2402 }
2403
2404 fn finish(self) -> Vec<u8> {
2405 self.bytes
2406 }
2407}
2408
2409fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
2410 let mut available = 1i32;
2411 for &len_count in count.iter().skip(1) {
2412 available = (available << 1) - i32::from(len_count);
2413 if available < 0 {
2414 return Err(Error::InvalidData("RAR 5 oversubscribed Huffman table"));
2415 }
2416 }
2417 Ok(())
2418}
2419
2420#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2421struct LevelToken {
2422 symbol: usize,
2423 extra_bits: u8,
2424 extra_value: u8,
2425}
2426
2427impl LevelToken {
2428 const fn plain(symbol: usize) -> Self {
2429 Self {
2430 symbol,
2431 extra_bits: 0,
2432 extra_value: 0,
2433 }
2434 }
2435
2436 const fn repeat_previous_short(count: usize) -> Self {
2437 Self {
2438 symbol: 16,
2439 extra_bits: 3,
2440 extra_value: (count - 3) as u8,
2441 }
2442 }
2443
2444 const fn repeat_previous_long(count: usize) -> Self {
2445 Self {
2446 symbol: 17,
2447 extra_bits: 7,
2448 extra_value: (count - 11) as u8,
2449 }
2450 }
2451
2452 const fn zero_run_short(count: usize) -> Self {
2453 Self {
2454 symbol: 18,
2455 extra_bits: 3,
2456 extra_value: (count - 3) as u8,
2457 }
2458 }
2459
2460 const fn zero_run_long(count: usize) -> Self {
2461 Self {
2462 symbol: 19,
2463 extra_bits: 7,
2464 extra_value: (count - 11) as u8,
2465 }
2466 }
2467}
2468
2469fn encode_table_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
2470 let mut tokens = Vec::new();
2471 let mut pos = 0usize;
2472 let mut previous = None;
2473 while pos < lengths.len() {
2474 let value = lengths[pos];
2475 let mut run = 1usize;
2476 while pos + run < lengths.len() && lengths[pos + run] == value {
2477 run += 1;
2478 }
2479
2480 if value == 0 {
2481 emit_zero_level_run(&mut tokens, run);
2482 previous = Some(0);
2483 pos += run;
2484 continue;
2485 }
2486
2487 if previous == Some(value) && run >= 3 {
2488 emit_repeat_level_run(&mut tokens, run);
2489 pos += run;
2490 continue;
2491 }
2492
2493 tokens.push(LevelToken::plain(value as usize));
2494 previous = Some(value);
2495 pos += 1;
2496 }
2497 tokens
2498}
2499
2500fn emit_repeat_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
2501 while run != 0 {
2502 if run >= 11 {
2503 let mut chunk = run.min(138);
2504 if matches!(run - chunk, 1 | 2) && chunk >= 14 {
2505 chunk -= 3;
2506 }
2507 tokens.push(LevelToken::repeat_previous_long(chunk));
2508 run -= chunk;
2509 } else if run >= 3 {
2510 let chunk = run.min(10);
2511 tokens.push(LevelToken::repeat_previous_short(chunk));
2512 run -= chunk;
2513 } else {
2514 break;
2515 }
2516 }
2517}
2518
2519fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
2520 while run != 0 {
2521 if run >= 11 {
2522 let mut chunk = run.min(138);
2523 if matches!(run - chunk, 1 | 2) && chunk >= 14 {
2524 chunk -= 3;
2525 }
2526 tokens.push(LevelToken::zero_run_long(chunk));
2527 run -= chunk;
2528 } else if run >= 3 {
2529 let chunk = run.min(10);
2530 tokens.push(LevelToken::zero_run_short(chunk));
2531 run -= chunk;
2532 } else {
2533 tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
2534 break;
2535 }
2536 }
2537}
2538
2539fn level_code_lengths_for_tokens(tokens: &[LevelToken]) -> [u8; LEVEL_TABLE_SIZE] {
2540 let mut used = [false; LEVEL_TABLE_SIZE];
2541 for token in tokens {
2542 used[token.symbol] = true;
2543 }
2544 let used_count = used.iter().filter(|&&used| used).count();
2545 let len = huffman::bits_for_symbol_count(used_count);
2546 let mut lengths = [0u8; LEVEL_TABLE_SIZE];
2547 for (symbol, is_used) in used.into_iter().enumerate() {
2548 if is_used {
2549 lengths[symbol] = len;
2550 }
2551 }
2552 lengths
2553}
2554
2555fn write_level_lengths(writer: &mut BitWriter, lengths: &[u8; LEVEL_TABLE_SIZE]) {
2556 let mut pos = 0usize;
2557 while pos < LEVEL_TABLE_SIZE {
2558 let length = lengths[pos];
2559 if length == 0 {
2560 let mut count = 1usize;
2561 while pos + count < LEVEL_TABLE_SIZE && lengths[pos + count] == 0 {
2562 count += 1;
2563 }
2564 while count >= 3 {
2565 let chunk = count.min(17);
2566 writer.write_bits(15, 4);
2567 writer.write_bits(chunk - 2, 4);
2568 pos += chunk;
2569 count -= chunk;
2570 }
2571 for _ in 0..count {
2572 writer.write_bits(0, 4);
2573 pos += 1;
2574 }
2575 } else {
2576 writer.write_bits(usize::from(length), 4);
2577 if length == 15 {
2578 writer.write_bits(0, 4);
2579 }
2580 pos += 1;
2581 }
2582 }
2583}
2584
2585#[cfg(test)]
2586mod tests {
2587 use super::*;
2588
2589 fn checksum(flags: u8, size_bytes: &[u8]) -> u8 {
2590 size_bytes
2591 .iter()
2592 .fold(0x5a ^ flags, |acc, &byte| acc ^ byte)
2593 }
2594
2595 #[test]
2596 fn parses_one_byte_size_block_header() {
2597 let flags = 0xc7;
2598 let size = [3];
2599 let input = [flags, checksum(flags, &size), size[0], 0xaa, 0xbb, 0xcc];
2600
2601 let block = parse_compressed_block(&input).unwrap();
2602 assert_eq!(block.header_len, 3);
2603 assert_eq!(block.payload, 3..6);
2604 assert_eq!(block.header.flags, flags);
2605 assert!(block.header.is_last);
2606 assert!(block.header.has_tables);
2607 assert_eq!(block.header.final_byte_bits, 8);
2608 assert_eq!(block.header.payload_size, 3);
2609 assert_eq!(block.header.payload_bits, 24);
2610 }
2611
2612 #[test]
2613 fn parses_three_byte_size_block_header_with_partial_final_byte() {
2614 let flags = 0x94;
2615 let size = [0x34, 0x12, 0x00];
2616 let mut input = vec![flags, checksum(flags, &size), size[0], size[1], size[2]];
2617 input.resize(0x1234 + 5, 0);
2618
2619 let block = parse_compressed_block(&input).unwrap();
2620 assert_eq!(block.header_len, 5);
2621 assert_eq!(block.payload, 5..0x1239);
2622 assert!(!block.header.is_last);
2623 assert!(block.header.has_tables);
2624 assert_eq!(block.header.final_byte_bits, 5);
2625 assert_eq!(block.header.payload_size, 0x1234);
2626 assert_eq!(block.header.payload_bits, (0x1234 - 1) * 8 + 5);
2627 }
2628
2629 #[test]
2630 fn rejects_reserved_size_length_selector() {
2631 let input = [0x18, 0x42, 0x00];
2632
2633 assert_eq!(
2634 parse_compressed_block(&input),
2635 Err(Error::InvalidData("RAR 5 block size length is invalid"))
2636 );
2637 }
2638
2639 #[test]
2640 fn rejects_bad_block_header_checksum() {
2641 let input = [0xc7, 0x00, 0x03, 0xaa, 0xbb, 0xcc];
2642
2643 assert_eq!(
2644 parse_compressed_block(&input),
2645 Err(Error::InvalidData("RAR 5 block header checksum mismatch"))
2646 );
2647 }
2648
2649 #[test]
2650 fn rejects_truncated_block_payload() {
2651 let flags = 0xc7;
2652 let size = [3];
2653 let input = [flags, checksum(flags, &size), size[0], 0xaa, 0xbb];
2654
2655 assert_eq!(parse_compressed_block(&input), Err(Error::NeedMoreInput));
2656 }
2657
2658 #[test]
2659 fn reads_level_lengths_with_literal_fifteen() {
2660 let mut nibbles = vec![1, 2, 15, 0, 3, 4];
2661 nibbles.resize(LEVEL_TABLE_SIZE + 1, 0);
2662
2663 let (lengths, bits) = read_level_lengths(&pack_nibbles(&nibbles)).unwrap();
2664
2665 assert_eq!(&lengths[..6], &[1, 2, 15, 3, 4, 0]);
2666 assert_eq!(bits, LEVEL_TABLE_SIZE * 4 + 4);
2667 }
2668
2669 #[test]
2670 fn reads_level_lengths_with_zero_run_at_current_position() {
2671 let mut nibbles = vec![7, 15, 3, 2];
2672 nibbles.resize(LEVEL_TABLE_SIZE - 3, 0);
2673
2674 let (lengths, bits) = read_level_lengths(&pack_nibbles(&nibbles)).unwrap();
2675
2676 assert_eq!(lengths[0], 7);
2677 assert_eq!(&lengths[1..6], &[0, 0, 0, 0, 0]);
2678 assert_eq!(lengths[6], 2);
2679 assert_eq!(bits, (LEVEL_TABLE_SIZE - 3) * 4);
2680 }
2681
2682 fn pack_nibbles(nibbles: &[u8]) -> Vec<u8> {
2683 nibbles
2684 .chunks(2)
2685 .map(|chunk| {
2686 let high = chunk[0] & 0x0f;
2687 let low = chunk.get(1).copied().unwrap_or(0) & 0x0f;
2688 (high << 4) | low
2689 })
2690 .collect()
2691 }
2692
2693 #[test]
2694 fn reads_rar50_second_level_table_lengths() {
2695 let mut writer = BitWriter::new();
2696 for _ in 0..LEVEL_TABLE_SIZE {
2697 writer.write_bits(5, 4);
2698 }
2699 for count in [138, 138, 138, 16] {
2700 writer.write_bits(19, 5);
2701 writer.write_bits(count - 11, 7);
2702 }
2703 let input = writer.finish();
2704
2705 let (lengths, bits) = read_table_lengths(&input, 0).unwrap();
2706
2707 assert_eq!(lengths.main.len(), MAIN_TABLE_SIZE);
2708 assert_eq!(lengths.distance.len(), DISTANCE_TABLE_SIZE_50);
2709 assert_eq!(lengths.align.len(), ALIGN_TABLE_SIZE);
2710 assert_eq!(lengths.length.len(), LENGTH_TABLE_SIZE);
2711 assert!(lengths.main.iter().all(|&length| length == 0));
2712 assert!(lengths.distance.iter().all(|&length| length == 0));
2713 assert!(lengths.align.iter().all(|&length| length == 0));
2714 assert!(lengths.length.iter().all(|&length| length == 0));
2715 assert_eq!(bits, LEVEL_TABLE_SIZE * 4 + 4 * (5 + 7));
2716 }
2717
2718 #[test]
2719 fn reads_rar70_table_length_count() {
2720 assert_eq!(
2721 table_length_count(1).unwrap(),
2722 MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_70 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE
2723 );
2724 }
2725
2726 #[test]
2727 fn encoded_table_lengths_round_trip_with_bit_count() {
2728 let mut lengths = TableLengths {
2729 main: vec![0; MAIN_TABLE_SIZE],
2730 distance: vec![0; DISTANCE_TABLE_SIZE_50],
2731 align: vec![0; ALIGN_TABLE_SIZE],
2732 length: vec![0; LENGTH_TABLE_SIZE],
2733 };
2734 lengths.main[b'A' as usize] = 1;
2735 lengths.main[b'B' as usize] = 3;
2736 lengths.main[262] = 3;
2737 lengths.distance[1] = 1;
2738 lengths.align[0] = 4;
2739 lengths.length[0] = 1;
2740
2741 let (encoded, bit_count) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
2742 let (decoded, decoded_bits) = read_table_lengths(&encoded, 0).unwrap();
2743
2744 assert_eq!(decoded, lengths);
2745 assert_eq!(decoded_bits, bit_count);
2746 }
2747
2748 #[test]
2749 fn table_level_encoder_uses_rar5_run_symbols() {
2750 let mut lengths =
2751 vec![
2752 0u8;
2753 MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_50 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE
2754 ];
2755 lengths[..4].fill(6);
2756 lengths[8..21].fill(0);
2757
2758 let tokens = encode_table_level_tokens(&lengths);
2759
2760 assert!(tokens.contains(&LevelToken::repeat_previous_short(3)));
2761 assert!(tokens.iter().any(|token| token.symbol == 19));
2762 }
2763
2764 #[test]
2765 fn encoded_compressed_block_round_trips_header_fields() {
2766 let payload = [0xaa, 0xbb, 0xc0];
2767 let block = encode_compressed_block(&payload, 18, true, true).unwrap();
2768
2769 let parsed = parse_compressed_block(&block).unwrap();
2770
2771 assert_eq!(parsed.payload, 3..6);
2772 assert!(parsed.header.has_tables);
2773 assert!(parsed.header.is_last);
2774 assert_eq!(parsed.header.final_byte_bits, 2);
2775 assert_eq!(parsed.header.payload_bits, 18);
2776 assert_eq!(&block[parsed.payload], payload);
2777 }
2778
2779 #[test]
2780 fn rejects_table_repeat_without_previous_length() {
2781 let mut writer = BitWriter::new();
2782 for _ in 0..LEVEL_TABLE_SIZE {
2783 writer.write_bits(5, 4);
2784 }
2785 writer.write_bits(16, 5);
2786 writer.write_bits(0, 3);
2787
2788 assert_eq!(
2789 read_table_lengths(&writer.finish(), 0),
2790 Err(Error::InvalidData(
2791 "RAR 5 table repeats missing previous length"
2792 ))
2793 );
2794 }
2795
2796 #[test]
2797 fn rejects_invalid_encoded_block_bit_counts() {
2798 assert_eq!(
2799 encode_compressed_block(&[0], 0, true, true),
2800 Err(Error::InvalidData("RAR 5 block has unused payload bytes"))
2801 );
2802 assert_eq!(
2803 encode_compressed_block(&[], 1, true, true),
2804 Err(Error::InvalidData("RAR 5 block bit count exceeds payload"))
2805 );
2806 }
2807
2808 #[test]
2809 fn builds_named_decode_tables_from_lengths() {
2810 let lengths = TableLengths {
2811 main: vec![1, 1],
2812 distance: vec![1, 1],
2813 align: vec![4; ALIGN_TABLE_SIZE],
2814 length: vec![1, 1],
2815 };
2816
2817 let tables = DecodeTables::from_lengths(&lengths).unwrap();
2818
2819 assert!(!tables.main.is_empty());
2820 assert!(!tables.distance.is_empty());
2821 assert!(!tables.align.is_empty());
2822 assert!(!tables.length.is_empty());
2823 assert!(!tables.align_mode);
2824 }
2825
2826 #[test]
2827 fn rejects_oversubscribed_rar50_huffman_tables() {
2828 assert!(matches!(
2829 HuffmanTable::from_lengths(&[1, 1, 1]),
2830 Err(Error::InvalidData("RAR 5 oversubscribed Huffman table"))
2831 ));
2832 }
2833
2834 #[test]
2835 fn detects_rar50_align_mode_when_align_lengths_are_not_uniform_four() {
2836 let mut align = vec![4; ALIGN_TABLE_SIZE];
2837 align[0] = 0;
2838 align[3] = 3;
2839 let lengths = TableLengths {
2840 main: vec![1, 1],
2841 distance: vec![1, 1],
2842 align,
2843 length: vec![1, 1],
2844 };
2845
2846 let tables = DecodeTables::from_lengths(&lengths).unwrap();
2847
2848 assert!(tables.align_mode);
2849 }
2850
2851 #[test]
2852 fn decodes_synthetic_literal_only_block() {
2853 let payload = literal_only_payload(b"ABBA");
2854 let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
2855
2856 let output = decode_literal_only(&input, 0, 4).unwrap();
2857
2858 assert_eq!(output, b"ABBA");
2859 }
2860
2861 #[test]
2862 fn encodes_literal_only_member_that_decoder_reads() {
2863 let data = b"literal-only RAR5 codec stream\nwith repeated words words words";
2864 let input = encode_literal_only(data, 0).unwrap();
2865
2866 let output = decode_literal_only(&input, 0, data.len()).unwrap();
2867
2868 assert_eq!(output, data);
2869 }
2870
2871 #[test]
2872 fn encodes_literal_only_rar70_table_shape_that_decoder_reads() {
2873 let data = b"small RAR7-compatible literal block";
2874 let input = encode_literal_only(data, 1).unwrap();
2875
2876 let output = decode_literal_only(&input, 1, data.len()).unwrap();
2877
2878 assert_eq!(output, data);
2879 }
2880
2881 #[test]
2882 fn encodes_empty_literal_only_member() {
2883 let input = encode_literal_only(b"", 0).unwrap();
2884
2885 let output = decode_literal_only(&input, 0, 0).unwrap();
2886
2887 assert!(output.is_empty());
2888 }
2889
2890 #[test]
2891 fn encodes_lz_member_with_same_member_matches() {
2892 let data = b"RAR5 match writer phrase. RAR5 match writer phrase. RAR5 match writer phrase.";
2893 let lz = encode_lz_member(data, 0).unwrap();
2894 let literal = encode_literal_only(data, 0).unwrap();
2895
2896 let output = decode_lz(&lz, 0, data.len()).unwrap();
2897
2898 assert_eq!(output, data);
2899 assert!(lz.len() < literal.len());
2900 assert!(
2901 encode_tokens(data, &[], EncodeOptions::default(), DISTANCE_TABLE_SIZE_50)
2902 .iter()
2903 .any(|token| matches!(token, EncodeToken::Match { .. }))
2904 );
2905 }
2906
2907 #[test]
2908 fn frequency_weighted_huffman_lengths_shorten_common_symbols() {
2909 let mut frequencies = vec![1usize; 24];
2910 frequencies[3] = 1024;
2911
2912 let lengths = huffman::lengths_for_frequencies(&frequencies, 15);
2913
2914 assert!(lengths[3] < lengths[0]);
2915 assert!(lengths.iter().all(|&length| length <= 15));
2916 }
2917
2918 #[test]
2919 fn lz_encoder_uses_frequency_weighted_huffman_lengths() {
2920 let mut data = vec![b'a'; 200];
2921 data.extend_from_slice(b"bcdefghijklmnopqrstuvwxyz");
2922 let input = encode_lz_member_with_options(&data, 0, EncodeOptions::new(0)).unwrap();
2923 let block = parse_compressed_block(&input).unwrap();
2924 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
2925
2926 let output = decode_lz(&input, 0, data.len()).unwrap();
2927
2928 assert_eq!(output, data);
2929 assert!(lengths.main[b'a' as usize] < lengths.main[b'z' as usize]);
2930 }
2931
2932 #[test]
2933 fn lazy_lz_parser_defers_short_match_for_longer_next_match() {
2934 let input = b"abcdXbcdYYYYYYYYYYYYabcdYYYYYYYYYYYY";
2935 let greedy = encode_tokens(
2936 input,
2937 &[],
2938 EncodeOptions::new(MAX_MATCH_CANDIDATES),
2939 DISTANCE_TABLE_SIZE_50,
2940 );
2941 let lazy = encode_tokens(
2942 input,
2943 &[],
2944 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
2945 DISTANCE_TABLE_SIZE_50,
2946 );
2947 let packed = encode_lz_member_with_options(
2948 input,
2949 0,
2950 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
2951 )
2952 .unwrap();
2953
2954 assert!(greedy
2955 .iter()
2956 .any(|token| matches!(token, EncodeToken::Match { length: 4, .. })));
2957 assert!(lazy
2958 .iter()
2959 .any(|token| matches!(token, EncodeToken::Match { length, .. } if *length > 8)));
2960 assert_eq!(decode_lz(&packed, 0, input.len()).unwrap(), input);
2961 }
2962
2963 #[test]
2964 fn cost_aware_match_selection_prefers_repeat_distance_token() {
2965 let pos = 64;
2966 let pattern = b"abcdefgh";
2967 let mut input: Vec<u8> = (0..96u8).map(|byte| byte.wrapping_mul(37)).collect();
2968 input[pos - 30..pos - 22].copy_from_slice(pattern);
2969 input[pos - 10..pos - 2].copy_from_slice(pattern);
2970 input[pos..pos + 8].copy_from_slice(pattern);
2971 input[pos + 8] = b'X';
2972
2973 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
2974 for candidate in 0..pos {
2975 insert_match_position(&input, candidate, &mut buckets);
2976 }
2977 let state = EncoderMatchState {
2978 reps: [30, 0, 0, 0],
2979 last_length: 8,
2980 };
2981
2982 let best = best_match(
2983 &input,
2984 pos,
2985 input.len(),
2986 &buckets,
2987 EncodeOptions::default(),
2988 &state,
2989 DISTANCE_TABLE_SIZE_50,
2990 )
2991 .unwrap();
2992
2993 assert_eq!((best.length, best.distance), (8, 30));
2994 }
2995
2996 #[test]
2997 fn lazy_parser_uses_match_cost_not_only_match_length() {
2998 let pos = 600;
2999 let mut input: Vec<u8> = (0..700u16)
3000 .map(|value| value.wrapping_mul(73) as u8)
3001 .collect();
3002 input[pos - 512..pos - 504].copy_from_slice(b"ABCDEFGH");
3003 input[pos - 504] = b'Z';
3004 input[pos - 29..pos - 21].copy_from_slice(b"BCDEFGHI");
3005 input[pos - 30] = b'x';
3006 input[pos..pos + 9].copy_from_slice(b"ABCDEFGHI");
3007
3008 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3009 for candidate in 0..pos {
3010 insert_match_position(&input, candidate, &mut buckets);
3011 }
3012 let state = EncoderMatchState {
3013 reps: [30, 0, 0, 0],
3014 last_length: 8,
3015 };
3016 let current = best_match(
3017 &input,
3018 pos,
3019 input.len(),
3020 &buckets,
3021 EncodeOptions::default(),
3022 &state,
3023 DISTANCE_TABLE_SIZE_50,
3024 )
3025 .unwrap();
3026
3027 assert_eq!((current.length, current.distance), (8, 512));
3028 assert!(should_lazy_emit_literal(
3029 &input,
3030 pos,
3031 &buckets,
3032 EncodeOptions::default().with_lazy_matching(true),
3033 &state,
3034 DISTANCE_TABLE_SIZE_50,
3035 current,
3036 ));
3037 }
3038
3039 #[test]
3040 fn lazy_parser_uses_bounded_cost_lookahead() {
3041 let pos = 160;
3042 let mut input: Vec<u8> = (0..240u16)
3043 .map(|value| value.wrapping_mul(91) as u8)
3044 .collect();
3045 input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3046 input[pos - 80..pos - 70].copy_from_slice(b"CDEFGHIJKL");
3047 input[pos..pos + 12].copy_from_slice(b"ABCDEFGHIJKL");
3048
3049 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3050 for candidate in 0..pos {
3051 insert_match_position(&input, candidate, &mut buckets);
3052 }
3053 let state = EncoderMatchState::default();
3054 let current = best_match(
3055 &input,
3056 pos,
3057 input.len(),
3058 &buckets,
3059 EncodeOptions::default(),
3060 &state,
3061 DISTANCE_TABLE_SIZE_50,
3062 )
3063 .unwrap();
3064
3065 assert_eq!((current.length, current.distance), (8, 30));
3066 assert!(!should_lazy_emit_literal(
3067 &input,
3068 pos,
3069 &buckets,
3070 EncodeOptions::default()
3071 .with_lazy_matching(true)
3072 .with_lazy_lookahead(1),
3073 &state,
3074 DISTANCE_TABLE_SIZE_50,
3075 current,
3076 ));
3077 assert!(should_lazy_emit_literal(
3078 &input,
3079 pos,
3080 &buckets,
3081 EncodeOptions::default()
3082 .with_lazy_matching(true)
3083 .with_lazy_lookahead(2),
3084 &state,
3085 DISTANCE_TABLE_SIZE_50,
3086 current,
3087 ));
3088 }
3089
3090 #[test]
3091 fn lazy_parser_charges_for_skipped_literals() {
3092 let pos = 160;
3093 let mut input: Vec<u8> = (0..240u16)
3094 .map(|value| value.wrapping_mul(91) as u8)
3095 .collect();
3096 input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3097 input[pos - 80..pos - 71].copy_from_slice(b"CDEFGHIJK");
3098 input[pos..pos + 12].copy_from_slice(b"ABCDEFGHIJKL");
3099
3100 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3101 for candidate in 0..pos {
3102 insert_match_position(&input, candidate, &mut buckets);
3103 }
3104 let state = EncoderMatchState::default();
3105 let current = best_match(
3106 &input,
3107 pos,
3108 input.len(),
3109 &buckets,
3110 EncodeOptions::default(),
3111 &state,
3112 DISTANCE_TABLE_SIZE_50,
3113 )
3114 .unwrap();
3115
3116 let next = best_match(
3117 &input,
3118 pos + 2,
3119 input.len(),
3120 &buckets,
3121 EncodeOptions::default(),
3122 &state,
3123 DISTANCE_TABLE_SIZE_50,
3124 )
3125 .unwrap();
3126
3127 assert!(next.score > current.score);
3128 assert!(next.score <= current.score + 16);
3129 assert!(!should_lazy_emit_literal(
3130 &input,
3131 pos,
3132 &buckets,
3133 EncodeOptions::default()
3134 .with_lazy_matching(true)
3135 .with_lazy_lookahead(2),
3136 &state,
3137 DISTANCE_TABLE_SIZE_50,
3138 current,
3139 ));
3140 }
3141
3142 fn encode_lz_member_with_filter(data: &[u8], kind: Rar50FilterKind) -> Result<Vec<u8>> {
3143 Unpack50Encoder::new().encode_member_with_filter(data, 0, Rar50FilterSpec::new(kind))
3144 }
3145
3146 #[test]
3147 fn encodes_lz_member_with_delta_filter_record() {
3148 let data: Vec<u8> = (0..96).map(|index| (index * 7 + index / 3) as u8).collect();
3149 let input =
3150 encode_lz_member_with_filter(&data, Rar50FilterKind::Delta { channels: 3 }).unwrap();
3151 let block = parse_compressed_block(&input).unwrap();
3152 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3153
3154 let output = decode_lz(&input, 0, data.len()).unwrap();
3155
3156 assert_eq!(output, data);
3157 assert_ne!(lengths.main[256], 0);
3158 }
3159
3160 #[test]
3161 fn rejects_invalid_delta_filter_channel_count() {
3162 assert_eq!(
3163 encode_lz_member_with_filter(b"abc", Rar50FilterKind::Delta { channels: 0 }),
3164 Err(Error::InvalidData(
3165 "RAR 5 DELTA filter channel count is invalid"
3166 ))
3167 );
3168 assert_eq!(
3169 encode_lz_member_with_filter(b"abc", Rar50FilterKind::Delta { channels: 33 }),
3170 Err(Error::InvalidData(
3171 "RAR 5 DELTA filter channel count is invalid"
3172 ))
3173 );
3174 }
3175
3176 #[test]
3177 fn encodes_lz_member_with_e8_filter_record() {
3178 let mut data = b"\xe8\0\0\0\0plain text after call".to_vec();
3179 data.extend_from_slice(&[0xe8, 3, 0, 0, 0, b'X']);
3180 let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8).unwrap();
3181 let block = parse_compressed_block(&input).unwrap();
3182 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3183
3184 let output = decode_lz(&input, 0, data.len()).unwrap();
3185
3186 assert_eq!(output, data);
3187 assert_ne!(lengths.main[256], 0);
3188 }
3189
3190 #[test]
3191 fn rar50_e8_filter_wraps_file_offset_modulo_16m() {
3192 let file_offset = 0x0110_0000;
3193 let mut encoded = vec![0xe8];
3194 encoded.extend_from_slice(&0x0010_0c08u32.to_le_bytes());
3195
3196 let mut decoded = encoded.clone();
3197 e8e9_decode(&mut decoded, file_offset, false);
3198
3199 assert_eq!(&decoded[1..5], &0x0000_0c07u32.to_le_bytes());
3200 e8e9_encode(&mut decoded, file_offset, false);
3201 assert_eq!(decoded, encoded);
3202 }
3203
3204 #[test]
3205 fn streaming_decode_reports_filtered_member_with_typed_sentinel() {
3206 let data = b"\xe8\0\0\0\0plain text after call".to_vec();
3207 let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8).unwrap();
3208 let mut reader = input.as_slice();
3209 let mut decoder = Unpack50Decoder::new();
3210
3211 let error = decoder
3212 .decode_member_from_reader_with_dictionary_to_sink(
3213 &mut reader,
3214 0,
3215 data.len(),
3216 128 * 1024,
3217 false,
3218 |_chunk| Ok::<_, std::convert::Infallible>(()),
3219 )
3220 .unwrap_err();
3221
3222 assert!(matches!(error, StreamDecodeError::FilteredMember));
3223 }
3224
3225 #[test]
3226 fn encodes_lz_member_with_e8e9_filter_record() {
3227 let data = b"\xe9\0\0\0\0jump target through e9".to_vec();
3228 let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8E9).unwrap();
3229 let block = parse_compressed_block(&input).unwrap();
3230 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3231
3232 let output = decode_lz(&input, 0, data.len()).unwrap();
3233
3234 assert_eq!(output, data);
3235 assert_ne!(lengths.main[256], 0);
3236 }
3237
3238 #[test]
3239 fn encodes_lz_member_with_ranged_e8e9_filter_record() {
3240 let mut data = b"\xe8\0\0\0\0plain prefix outside filter range".to_vec();
3241 let range_start = data.len();
3242 for _ in 0..16 {
3243 let operand_pos = data.len() + 1;
3244 data.push(0xe8);
3245 let relative = 0x7000u32.wrapping_sub(operand_pos as u32);
3246 data.extend_from_slice(&relative.to_le_bytes());
3247 data.extend_from_slice(b" code ");
3248 }
3249 let range = range_start..data.len();
3250 data.extend_from_slice(b"\xe9\0\0\0\0plain suffix outside filter range");
3251
3252 let input = Unpack50Encoder::new()
3253 .encode_member_with_filter(
3254 &data,
3255 0,
3256 Rar50FilterSpec::range(Rar50FilterKind::E8E9, range),
3257 )
3258 .unwrap();
3259 let block = parse_compressed_block(&input).unwrap();
3260 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3261
3262 let output = decode_lz(&input, 0, data.len()).unwrap();
3263
3264 assert_eq!(output, data);
3265 assert_ne!(lengths.main[256], 0);
3266 }
3267
3268 #[test]
3269 fn encodes_lz_member_with_multiple_filter_records() {
3270 let mut data = b"\xe8\0\0\0\0plain prefix outside filters".to_vec();
3271 let first_start = data.len();
3272 data.extend_from_slice(b"\xe8\0\0\0\0first filtered cluster");
3273 let first_end = data.len();
3274 data.extend_from_slice(b"large plain middle outside filters");
3275 let second_start = data.len();
3276 data.extend_from_slice(b"\xe8\0\0\0\0second filtered cluster");
3277 let second_end = data.len();
3278
3279 let input = Unpack50Encoder::new()
3280 .encode_member_with_filters(
3281 &data,
3282 0,
3283 &[
3284 Rar50FilterSpec::range(Rar50FilterKind::E8, first_start..first_end),
3285 Rar50FilterSpec::range(Rar50FilterKind::E8, second_start..second_end),
3286 ],
3287 )
3288 .unwrap();
3289 let block = parse_compressed_block(&input).unwrap();
3290 let (lengths, table_bits) = read_table_lengths(&input[block.payload.clone()], 0).unwrap();
3291 let tables = DecodeTables::from_lengths(&lengths).unwrap();
3292 let mut bits = BitReader {
3293 input: &input[block.payload],
3294 bit_pos: table_bits,
3295 };
3296 assert_eq!(tables.main.decode(&mut bits).unwrap(), 256);
3297 let first = read_filter(&mut bits, 0).unwrap();
3298 assert_eq!(tables.main.decode(&mut bits).unwrap(), 256);
3299 let second = read_filter(&mut bits, 0).unwrap();
3300
3301 let output = decode_lz(&input, 0, data.len()).unwrap();
3302
3303 assert_eq!(output, data);
3304 assert_eq!(first.start, first_start);
3305 assert_eq!(second.start, second_start);
3306 }
3307
3308 #[test]
3309 fn encodes_lz_member_with_arm_filter_record() {
3310 let data = [0x04, 0x00, 0x00, 0xeb, b'A', b'R', b'M', b'!'];
3311 let input = encode_lz_member_with_filter(&data, Rar50FilterKind::Arm).unwrap();
3312 let block = parse_compressed_block(&input).unwrap();
3313 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3314
3315 let output = decode_lz(&input, 0, data.len()).unwrap();
3316
3317 assert_eq!(output, data);
3318 assert_ne!(lengths.main[256], 0);
3319 }
3320
3321 #[test]
3322 fn arm_filter_uses_wrapping_address_arithmetic_at_u32_boundary() {
3323 let original = [0x04, 0x00, 0x00, 0xeb, 0x08, 0x00, 0x00, 0xeb];
3324 let mut filtered = original;
3325
3326 arm_encode(&mut filtered, u32::MAX - 3);
3327 assert_ne!(filtered, original);
3328 arm_decode(&mut filtered, u32::MAX - 3);
3329
3330 assert_eq!(filtered, original);
3331 }
3332
3333 #[test]
3334 fn solid_encoder_emits_rar50_matches_against_previous_member_history() {
3335 let first = b"RAR5 solid shared phrase alpha beta gamma\n".repeat(16);
3336 let second = b"RAR5 solid shared phrase alpha beta gamma\nsecond\n".repeat(4);
3337 let solid = encode_lz_member_with_history(&second, &first, 0).unwrap();
3338 let standalone = encode_lz_member(&second, 0).unwrap();
3339 let mut decoder = Unpack50Decoder::new();
3340
3341 assert_eq!(
3342 decoder
3343 .decode_member(
3344 &encode_lz_member(&first, 0).unwrap(),
3345 0,
3346 first.len(),
3347 false,
3348 DecodeMode::Lz
3349 )
3350 .unwrap(),
3351 first
3352 );
3353 assert_eq!(
3354 decoder
3355 .decode_member(&solid, 0, second.len(), true, DecodeMode::Lz)
3356 .unwrap(),
3357 second
3358 );
3359 assert!(solid.len() < standalone.len());
3360 }
3361
3362 #[test]
3363 fn large_lz_members_are_split_into_multiple_compressed_blocks() {
3364 let data = vec![0u8; MAX_COMPRESSED_BLOCK_OUTPUT + 1];
3365 let encoded = encode_lz_member_with_options(&data, 0, EncodeOptions::new(16)).unwrap();
3366 let mut cursor = std::io::Cursor::new(encoded.as_slice());
3367 let first = read_compressed_block(&mut cursor).unwrap();
3368 let second = read_compressed_block(&mut cursor).unwrap();
3369 let mut decoder = Unpack50Decoder::new();
3370
3371 assert!(!first.header.is_last);
3372 assert!(second.header.is_last);
3373 assert_eq!(
3374 decoder
3375 .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3376 .unwrap(),
3377 data
3378 );
3379 }
3380
3381 #[test]
3382 fn large_filtered_lz_members_split_filter_records_by_block() {
3383 let mut data: Vec<_> = (0..MAX_COMPRESSED_BLOCK_OUTPUT + 512)
3384 .map(|index| index as u8)
3385 .collect();
3386 data[256] = 0xe8;
3387 data[257..261].copy_from_slice(&0x20u32.to_le_bytes());
3388 data[MAX_COMPRESSED_BLOCK_OUTPUT + 64] = 0xe8;
3389 data[MAX_COMPRESSED_BLOCK_OUTPUT + 65..MAX_COMPRESSED_BLOCK_OUTPUT + 69]
3390 .copy_from_slice(&0x40u32.to_le_bytes());
3391
3392 let encoded = Unpack50Encoder::with_options(EncodeOptions::new(0))
3393 .encode_member_with_filter(
3394 &data,
3395 0,
3396 Rar50FilterSpec::range(Rar50FilterKind::E8, 0..data.len()),
3397 )
3398 .unwrap();
3399 let mut cursor = std::io::Cursor::new(encoded.as_slice());
3400 let first = read_compressed_block(&mut cursor).unwrap();
3401 let mut blocks = 1usize;
3402 let mut last_is_last = first.header.is_last;
3403 while cursor.position() < encoded.len() as u64 {
3404 last_is_last = read_compressed_block(&mut cursor).unwrap().header.is_last;
3405 blocks += 1;
3406 }
3407 let mut decoder = Unpack50Decoder::new();
3408
3409 assert!(!first.header.is_last);
3410 assert!(last_is_last);
3411 assert!(blocks > 2);
3412 assert_eq!(
3413 decoder
3414 .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3415 .unwrap(),
3416 data
3417 );
3418 }
3419
3420 #[test]
3421 fn filters_are_split_before_rar_reader_filter_limit() {
3422 let data = vec![0u8; MAX_FILTER_BLOCK_LENGTH + 1];
3423 let encoded = Unpack50Encoder::with_options(
3424 EncodeOptions::new(0).with_max_match_distance(128 * 1024),
3425 )
3426 .encode_member_with_filter(
3427 &data,
3428 0,
3429 Rar50FilterSpec::new(Rar50FilterKind::Delta { channels: 4 }),
3430 )
3431 .unwrap();
3432 let mut cursor = std::io::Cursor::new(encoded.as_slice());
3433 let first = read_compressed_block(&mut cursor).unwrap();
3434 let second = read_compressed_block(&mut cursor).unwrap();
3435 let mut decoder = Unpack50Decoder::new();
3436
3437 assert!(!first.header.is_last);
3438 assert!(second.header.is_last);
3439 assert_eq!(
3440 decoder
3441 .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3442 .unwrap(),
3443 data
3444 );
3445 }
3446
3447 #[test]
3448 fn solid_encoder_history_limit_follows_encode_options_dictionary() {
3449 let mut encoder = Unpack50Encoder::with_options(
3450 EncodeOptions::new(0).with_max_match_distance(DEFAULT_DICTIONARY_SIZE + 1024),
3451 );
3452 encoder.remember(&vec![0x41; DEFAULT_DICTIONARY_SIZE + 512]);
3453
3454 assert_eq!(encoder.history.len(), DEFAULT_DICTIONARY_SIZE + 512);
3455
3456 let mut capped =
3457 Unpack50Encoder::with_options(EncodeOptions::new(0).with_max_match_distance(1024));
3458 capped.remember(&vec![0x42; 4096]);
3459
3460 assert_eq!(capped.history.len(), 1024);
3461 }
3462
3463 #[test]
3464 fn encodes_lz_member_with_last_length_repeat_symbols() {
3465 let data = b"abcdXabcdYabcdZabcd";
3466 let input = encode_lz_member(data, 0).unwrap();
3467 let block = parse_compressed_block(&input).unwrap();
3468 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3469
3470 let output = decode_lz(&input, 0, data.len()).unwrap();
3471
3472 assert_eq!(output, data);
3473 assert_ne!(lengths.main[257], 0);
3474 }
3475
3476 #[test]
3477 fn encodes_lz_member_using_rar70_distance_table_shape() {
3478 let data = b"RAR7-compatible repeated phrase repeated phrase repeated phrase";
3479 let input = encode_lz_member(data, 1).unwrap();
3480
3481 let output = decode_lz(&input, 1, data.len()).unwrap();
3482
3483 assert_eq!(output, data);
3484 }
3485
3486 #[test]
3487 fn decode_member_from_reader_accepts_incremental_input() {
3488 struct OneByteReader<'a> {
3489 data: &'a [u8],
3490 pos: usize,
3491 }
3492
3493 impl Read for OneByteReader<'_> {
3494 fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
3495 if self.pos >= self.data.len() {
3496 return Ok(0);
3497 }
3498 out[0] = self.data[self.pos];
3499 self.pos += 1;
3500 Ok(1)
3501 }
3502 }
3503
3504 let payload = literal_only_payload(b"ABBA");
3505 let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3506 let mut reader = OneByteReader {
3507 data: &input,
3508 pos: 0,
3509 };
3510 let mut decoder = Unpack50Decoder::new();
3511
3512 let output = decoder
3513 .decode_member_from_reader(&mut reader, 0, 4, false, DecodeMode::LiteralOnly)
3514 .unwrap();
3515
3516 assert_eq!(output, b"ABBA");
3517 }
3518
3519 #[test]
3520 fn decodes_synthetic_new_match_block() {
3521 let payload = new_match_payload();
3522 let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3523
3524 let output = decode_lz(&input, 0, 4).unwrap();
3525
3526 assert_eq!(output, b"ABAB");
3527 }
3528
3529 #[test]
3530 fn decodes_synthetic_last_length_match_block() {
3531 let payload = repeat_payload(257);
3532 let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3533
3534 let output = decode_lz(&input, 0, 6).unwrap();
3535
3536 assert_eq!(output, b"ABABAB");
3537 }
3538
3539 #[test]
3540 fn decodes_synthetic_repeat_distance_match_block() {
3541 let payload = repeat_payload(258);
3542 let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3543
3544 let output = decode_lz(&input, 0, 6).unwrap();
3545
3546 assert_eq!(output, b"ABABAB");
3547 }
3548
3549 #[test]
3550 fn rejects_literal_only_block_without_tables() {
3551 let input = encode_compressed_block(&[0], 8, false, true).unwrap();
3552
3553 assert_eq!(
3554 decode_literal_only(&input, 0, 1),
3555 Err(Error::InvalidData("RAR 5 block reuses missing tables"))
3556 );
3557 }
3558
3559 #[test]
3560 fn decodes_length_slots() {
3561 assert_eq!(slot_to_length(0, 0).unwrap(), 2);
3562 assert_eq!(slot_to_length(7, 0).unwrap(), 9);
3563 assert_eq!(slot_to_length(8, 0).unwrap(), 10);
3564 assert_eq!(slot_to_length(8, 1).unwrap(), 11);
3565 assert_eq!(slot_to_length(11, 1).unwrap(), 17);
3566 assert_eq!(slot_to_length(12, 3).unwrap(), 21);
3567 }
3568
3569 #[test]
3570 fn decodes_distance_slots() {
3571 assert_eq!(slot_to_distance(0, 0).unwrap(), 1);
3572 assert_eq!(slot_to_distance(3, 0).unwrap(), 4);
3573 assert_eq!(distance_slot_bit_count(4).unwrap(), 1);
3574 assert_eq!(slot_to_distance(4, 0).unwrap(), 5);
3575 assert_eq!(slot_to_distance(4, 1).unwrap(), 6);
3576 assert_eq!(distance_slot_bit_count(10).unwrap(), 4);
3577 assert_eq!(slot_to_distance(10, 15).unwrap(), 48);
3578 }
3579
3580 #[test]
3581 fn bit_reader_accepts_large_rar5_distance_extras() {
3582 let mut bits = BitReader::new(&[0xff, 0x00, 0xaa, 0x55]);
3583
3584 assert_eq!(bits.read_bits(32).unwrap(), 0xff00_aa55);
3585 assert_eq!(
3586 bits.read_bits(1),
3587 Err(Error::NeedMoreInput),
3588 "32-bit reads must not leave a partial cursor state"
3589 );
3590 }
3591
3592 #[test]
3593 fn copies_lz_matches_with_overlap() {
3594 let decoder = Unpack50Decoder::new();
3595 let mut output = b"AB".to_vec();
3596
3597 decoder
3598 .copy_match(&mut output, 2, 6, 8, DEFAULT_DICTIONARY_SIZE)
3599 .unwrap();
3600
3601 assert_eq!(output, b"ABABABAB");
3602 }
3603
3604 #[test]
3605 fn rejects_invalid_match_copy() {
3606 let decoder = Unpack50Decoder::new();
3607 let mut output = b"AB".to_vec();
3608
3609 assert_eq!(
3610 decoder.copy_match(&mut output, 3, 1, 3, DEFAULT_DICTIONARY_SIZE),
3611 Err(Error::InvalidData("RAR 5 match distance exceeds window"))
3612 );
3613 assert_eq!(
3614 decoder.copy_match(&mut output, 1, 2, 3, DEFAULT_DICTIONARY_SIZE),
3615 Err(Error::InvalidData("RAR 5 match exceeds output limit"))
3616 );
3617 }
3618
3619 #[test]
3620 fn rejects_match_distance_beyond_dictionary() {
3621 let decoder = Unpack50Decoder::new();
3622 let mut output = b"ABCD".to_vec();
3623
3624 assert_eq!(
3625 decoder.copy_match(&mut output, 4, 1, 5, 3),
3626 Err(Error::InvalidData(
3627 "RAR 5 match distance exceeds dictionary"
3628 ))
3629 );
3630 }
3631
3632 #[test]
3633 fn solid_history_is_capped_to_dictionary_size() {
3634 let mut decoder = Unpack50Decoder::new();
3635 let first_payload = literal_only_payload(b"ABBA");
3636 let first =
3637 encode_compressed_block(&first_payload, first_payload.len() * 8, true, true).unwrap();
3638 let second_payload = literal_only_payload(b"BAAB");
3639 let second =
3640 encode_compressed_block(&second_payload, second_payload.len() * 8, true, true).unwrap();
3641
3642 assert_eq!(
3643 decoder
3644 .decode_member_with_dictionary(&first, 0, 4, 6, false, DecodeMode::LiteralOnly)
3645 .unwrap(),
3646 b"ABBA"
3647 );
3648 assert_eq!(decoder.history, b"ABBA");
3649
3650 assert_eq!(
3651 decoder
3652 .decode_member_with_dictionary(&second, 0, 4, 6, true, DecodeMode::LiteralOnly)
3653 .unwrap(),
3654 b"BAAB"
3655 );
3656 assert_eq!(decoder.history, b"BABAAB");
3657 }
3658
3659 #[test]
3660 fn streaming_decoder_history_is_capped_without_reordering() {
3661 let mut decoder = Unpack50Decoder::new();
3662 let first_payload = literal_only_payload(b"ABBA");
3663 let first =
3664 encode_compressed_block(&first_payload, first_payload.len() * 8, true, true).unwrap();
3665 let second_payload = literal_only_payload(b"BAAB");
3666 let second =
3667 encode_compressed_block(&second_payload, second_payload.len() * 8, true, true).unwrap();
3668 let mut decoded = Vec::new();
3669
3670 decoder
3671 .decode_member_from_reader_with_dictionary_to_sink(
3672 &mut std::io::Cursor::new(&first),
3673 0,
3674 4,
3675 6,
3676 false,
3677 |chunk| {
3678 match chunk {
3679 DecodedChunk::Bytes(bytes) => decoded.extend_from_slice(bytes),
3680 DecodedChunk::Repeated { byte, len } => {
3681 decoded.extend(std::iter::repeat_n(byte, len));
3682 }
3683 }
3684 Ok::<(), std::io::Error>(())
3685 },
3686 )
3687 .unwrap();
3688 assert_eq!(decoded, b"ABBA");
3689 assert_eq!(decoder.history, b"ABBA");
3690
3691 decoded.clear();
3692 decoder
3693 .decode_member_from_reader_with_dictionary_to_sink(
3694 &mut std::io::Cursor::new(&second),
3695 0,
3696 4,
3697 6,
3698 true,
3699 |chunk| {
3700 match chunk {
3701 DecodedChunk::Bytes(bytes) => decoded.extend_from_slice(bytes),
3702 DecodedChunk::Repeated { byte, len } => {
3703 decoded.extend(std::iter::repeat_n(byte, len));
3704 }
3705 }
3706 Ok::<(), std::io::Error>(())
3707 },
3708 )
3709 .unwrap();
3710 assert_eq!(decoded, b"BAAB");
3711 assert_eq!(decoder.history, b"BABAAB");
3712 }
3713
3714 fn literal_only_payload(data: &[u8]) -> Vec<u8> {
3715 let mut lengths = TableLengths {
3716 main: vec![0; MAIN_TABLE_SIZE],
3717 distance: vec![0; DISTANCE_TABLE_SIZE_50],
3718 align: vec![0; ALIGN_TABLE_SIZE],
3719 length: vec![0; LENGTH_TABLE_SIZE],
3720 };
3721 lengths.main[b'A' as usize] = 1;
3722 lengths.main[b'B' as usize] = 1;
3723 let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3724 let mut writer = BitWriter { bytes, bit_pos };
3725 for &byte in data {
3726 match byte {
3727 b'A' => writer.write_bits(0, 1),
3728 b'B' => writer.write_bits(1, 1),
3729 _ => panic!("test helper only encodes A/B"),
3730 }
3731 }
3732 writer.finish()
3733 }
3734
3735 fn new_match_payload() -> Vec<u8> {
3736 let mut lengths = TableLengths {
3737 main: vec![0; MAIN_TABLE_SIZE],
3738 distance: vec![0; DISTANCE_TABLE_SIZE_50],
3739 align: vec![0; ALIGN_TABLE_SIZE],
3740 length: vec![0; LENGTH_TABLE_SIZE],
3741 };
3742 lengths.main[b'A' as usize] = 2;
3743 lengths.main[b'B' as usize] = 2;
3744 lengths.main[262] = 2;
3745 lengths.distance[1] = 1;
3746 let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3747 let mut writer = BitWriter { bytes, bit_pos };
3748
3749 writer.write_bits(0b00, 2); writer.write_bits(0b01, 2); writer.write_bits(0b10, 2); writer.write_bits(0, 1); writer.finish()
3754 }
3755
3756 fn repeat_payload(repeat_symbol: usize) -> Vec<u8> {
3757 let mut lengths = TableLengths {
3758 main: vec![0; MAIN_TABLE_SIZE],
3759 distance: vec![0; DISTANCE_TABLE_SIZE_50],
3760 align: vec![0; ALIGN_TABLE_SIZE],
3761 length: vec![0; LENGTH_TABLE_SIZE],
3762 };
3763 lengths.main[b'A' as usize] = 2;
3764 lengths.main[b'B' as usize] = 2;
3765 lengths.main[repeat_symbol] = 2;
3766 lengths.main[262] = 2;
3767 lengths.distance[1] = 1;
3768 lengths.length[0] = 1;
3769 let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3770 let mut writer = BitWriter { bytes, bit_pos };
3771
3772 writer.write_bits(0b00, 2); writer.write_bits(0b01, 2); writer.write_bits(0b11, 2); writer.write_bits(0, 1); writer.write_bits(0b10, 2); if repeat_symbol == 258 {
3778 writer.write_bits(0, 1); }
3780 writer.finish()
3781 }
3782}