1use crate::filters::{self, DeltaErrorMessages, FilterOp};
2use crate::{huffman, Error, Result};
3use std::collections::VecDeque;
4use std::io::Read;
5use std::ops::Range;
6
7pub const LEVEL_TABLE_SIZE: usize = 20;
8pub const MAIN_TABLE_SIZE: usize = 306;
9pub const DISTANCE_TABLE_SIZE_50: usize = 64;
10pub const DISTANCE_TABLE_SIZE_70: usize = 80;
11pub const ALIGN_TABLE_SIZE: usize = 16;
12pub const LENGTH_TABLE_SIZE: usize = 44;
13const DEFAULT_DICTIONARY_SIZE: usize = 4 * 1024 * 1024;
14const MAX_INITIAL_OUTPUT_CAPACITY: usize = 1024 * 1024;
15const STREAM_FLUSH_THRESHOLD: usize = 64 * 1024;
16const STREAM_HISTORY_LIMIT: usize = 64 * 1024 * 1024;
17const MAX_ENCODER_MATCH_OFFSET: usize = DEFAULT_DICTIONARY_SIZE;
18const MAX_ENCODER_MATCH_LENGTH: usize = 4096;
19const MAX_COMPRESSED_BLOCK_OUTPUT: usize = 4 * 1024 * 1024;
20const MAX_FILTER_BLOCK_LENGTH: usize = 0x3ffff;
21const MATCH_HASH_BUCKETS: usize = 4096;
22const MAX_MATCH_CANDIDATES: usize = 256;
23
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct CompressedBlock {
26 pub header: CompressedBlockHeader,
27 pub header_len: usize,
28 pub payload: Range<usize>,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub struct CompressedBlockHeader {
33 pub flags: u8,
34 pub is_last: bool,
35 pub has_tables: bool,
36 pub final_byte_bits: u8,
37 pub payload_size: usize,
38 pub payload_bits: usize,
39}
40
41struct OwnedCompressedBlock {
42 header: CompressedBlockHeader,
43 payload: Vec<u8>,
44}
45
46#[derive(Debug)]
47#[doc(hidden)]
48pub enum StreamDecodeError<E> {
49 Decode(Error),
50 FilteredMember,
51 Sink(E),
52}
53
54impl<E> From<Error> for StreamDecodeError<E> {
55 fn from(error: Error) -> Self {
56 Self::Decode(error)
57 }
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61#[doc(hidden)]
62pub enum DecodedChunk<'a> {
63 Bytes(&'a [u8]),
64 Repeated { byte: u8, len: usize },
65}
66
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct TableLengths {
69 pub main: Vec<u8>,
70 pub distance: Vec<u8>,
71 pub align: Vec<u8>,
72 pub length: Vec<u8>,
73}
74
75#[derive(Debug, Clone)]
76pub struct DecodeTables {
77 pub main: HuffmanTable,
78 pub distance: HuffmanTable,
79 pub align: HuffmanTable,
80 pub length: HuffmanTable,
81 pub align_mode: bool,
82}
83
84impl DecodeTables {
85 pub fn from_lengths(lengths: &TableLengths) -> Result<Self> {
86 let align_mode = lengths
87 .align
88 .iter()
89 .any(|&length| length != 0 && length != 4);
90 Ok(Self {
91 main: HuffmanTable::from_lengths(&lengths.main)?,
92 distance: HuffmanTable::from_lengths(&lengths.distance)?,
93 align: HuffmanTable::from_lengths(&lengths.align)?,
94 length: HuffmanTable::from_lengths(&lengths.length)?,
95 align_mode,
96 })
97 }
98}
99
100#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub enum DecodeMode {
102 LiteralOnly,
103 Lz,
104 LzNoFilters,
105}
106
107impl DecodeMode {
108 fn uses_lz(self) -> bool {
109 matches!(self, Self::Lz | Self::LzNoFilters)
110 }
111
112 fn applies_filters(self) -> bool {
113 matches!(self, Self::Lz)
114 }
115}
116
117pub fn parse_compressed_block(input: &[u8]) -> Result<CompressedBlock> {
118 if input.len() < 3 {
119 return Err(Error::NeedMoreInput);
120 }
121
122 let flags = input[0];
123 let checksum = input[1];
124 let size_bytes = match (flags >> 3) & 0x03 {
125 0 => 1,
126 1 => 2,
127 2 => 3,
128 _ => return Err(Error::InvalidData("RAR 5 block size length is invalid")),
129 };
130 let header_len = 2 + size_bytes;
131 if input.len() < header_len {
132 return Err(Error::NeedMoreInput);
133 }
134
135 let size_data = &input[2..header_len];
136 let actual = size_data
137 .iter()
138 .fold(checksum ^ flags, |acc, &byte| acc ^ byte);
139 if actual != 0x5a {
140 return Err(Error::InvalidData("RAR 5 block header checksum mismatch"));
141 }
142
143 let payload_size = size_data
144 .iter()
145 .enumerate()
146 .fold(0usize, |acc, (index, &byte)| {
147 acc | (usize::from(byte) << (index * 8))
148 });
149 let payload_end = header_len
150 .checked_add(payload_size)
151 .ok_or(Error::InvalidData("RAR 5 block size overflows"))?;
152 if input.len() < payload_end {
153 return Err(Error::NeedMoreInput);
154 }
155
156 let final_byte_bits = ((flags & 0x07) + 1).min(8);
157 let payload_bits = if payload_size == 0 {
158 0
159 } else {
160 (payload_size - 1) * 8 + usize::from(final_byte_bits)
161 };
162
163 Ok(CompressedBlock {
164 header: CompressedBlockHeader {
165 flags,
166 is_last: flags & 0x40 != 0,
167 has_tables: flags & 0x80 != 0,
168 final_byte_bits,
169 payload_size,
170 payload_bits,
171 },
172 header_len,
173 payload: header_len..payload_end,
174 })
175}
176
177pub fn read_level_lengths(input: &[u8]) -> Result<([u8; LEVEL_TABLE_SIZE], usize)> {
178 let mut bits = BitReader::new(input);
179 let mut lengths = [0; LEVEL_TABLE_SIZE];
180 let mut pos = 0;
181 while pos < LEVEL_TABLE_SIZE {
182 let length = bits.read_bits(4)? as u8;
183 if length == 15 {
184 let zero_count = bits.read_bits(4)? as usize;
185 if zero_count == 0 {
186 lengths[pos] = 15;
187 pos += 1;
188 } else {
189 let count = zero_count + 2;
190 for _ in 0..count {
191 if pos >= LEVEL_TABLE_SIZE {
192 break;
193 }
194 lengths[pos] = 0;
195 pos += 1;
196 }
197 }
198 } else {
199 lengths[pos] = length;
200 pos += 1;
201 }
202 }
203 Ok((lengths, bits.bit_pos))
204}
205
206pub fn table_length_count(algorithm_version: u8) -> Result<usize> {
207 match algorithm_version {
208 0 => Ok(MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_50 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE),
209 1 => Ok(MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_70 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE),
210 _ => Err(Error::InvalidData(
211 "RAR 5 unknown compression algorithm version",
212 )),
213 }
214}
215
216pub fn read_table_lengths(input: &[u8], algorithm_version: u8) -> Result<(TableLengths, usize)> {
217 let table_size = table_length_count(algorithm_version)?;
218 let (level_lengths, level_bits) = read_level_lengths(input)?;
219 let level_decoder = HuffmanTable::from_lengths(&level_lengths)?;
220 let mut bits = BitReader::new(input);
221 bits.bit_pos = level_bits;
222
223 let mut lengths = Vec::with_capacity(table_size);
224 while lengths.len() < table_size {
225 let number = level_decoder.decode(&mut bits)?;
226 match number {
227 0..=15 => lengths.push(number as u8),
228 16 | 17 => {
229 if lengths.is_empty() {
230 return Err(Error::InvalidData(
231 "RAR 5 table repeats missing previous length",
232 ));
233 }
234 let count = if number == 16 {
235 3 + bits.read_bits(3)? as usize
236 } else {
237 11 + bits.read_bits(7)? as usize
238 };
239 let previous = *lengths.last().unwrap();
240 for _ in 0..count {
241 if lengths.len() >= table_size {
242 break;
243 }
244 lengths.push(previous);
245 }
246 }
247 18 | 19 => {
248 let count = if number == 18 {
249 3 + bits.read_bits(3)? as usize
250 } else {
251 11 + bits.read_bits(7)? as usize
252 };
253 for _ in 0..count {
254 if lengths.len() >= table_size {
255 break;
256 }
257 lengths.push(0);
258 }
259 }
260 _ => return Err(Error::InvalidData("RAR 5 invalid level-table symbol")),
261 }
262 }
263
264 let distance_size = match algorithm_version {
265 0 => DISTANCE_TABLE_SIZE_50,
266 1 => DISTANCE_TABLE_SIZE_70,
267 _ => unreachable!("validated by table_length_count"),
268 };
269 let distance_start = MAIN_TABLE_SIZE;
270 let align_start = distance_start + distance_size;
271 let length_start = align_start + ALIGN_TABLE_SIZE;
272
273 Ok((
274 TableLengths {
275 main: lengths[..distance_start].to_vec(),
276 distance: lengths[distance_start..align_start].to_vec(),
277 align: lengths[align_start..length_start].to_vec(),
278 length: lengths[length_start..].to_vec(),
279 },
280 bits.bit_pos,
281 ))
282}
283
284pub fn encode_table_lengths(lengths: &TableLengths, algorithm_version: u8) -> Result<Vec<u8>> {
285 encode_table_lengths_with_bit_count(lengths, algorithm_version).map(|(data, _)| data)
286}
287
288pub fn encode_table_lengths_with_bit_count(
289 lengths: &TableLengths,
290 algorithm_version: u8,
291) -> Result<(Vec<u8>, usize)> {
292 let distance_size = match algorithm_version {
293 0 => DISTANCE_TABLE_SIZE_50,
294 1 => DISTANCE_TABLE_SIZE_70,
295 _ => {
296 return Err(Error::InvalidData(
297 "RAR 5 unknown compression algorithm version",
298 ))
299 }
300 };
301 if lengths.main.len() != MAIN_TABLE_SIZE
302 || lengths.distance.len() != distance_size
303 || lengths.align.len() != ALIGN_TABLE_SIZE
304 || lengths.length.len() != LENGTH_TABLE_SIZE
305 {
306 return Err(Error::InvalidData("RAR 5 table length count mismatch"));
307 }
308
309 let flattened = lengths
310 .main
311 .iter()
312 .chain(lengths.distance.iter())
313 .chain(lengths.align.iter())
314 .chain(lengths.length.iter())
315 .copied()
316 .collect::<Vec<_>>();
317 for &length in &flattened {
318 if length > 15 {
319 return Err(Error::InvalidData("RAR 5 Huffman length is too large"));
320 }
321 }
322
323 let level_tokens = encode_table_level_tokens(&flattened);
324 let level_lengths = level_code_lengths_for_tokens(&level_tokens);
325 let level_table = HuffmanTable::from_lengths(&level_lengths)?;
326 let mut writer = BitWriter::new();
327 write_level_lengths(&mut writer, &level_lengths);
328 for token in level_tokens {
329 let (code, len) = level_table.code_for_symbol(token.symbol)?;
330 writer.write_bits(usize::from(code), usize::from(len));
331 if token.extra_bits != 0 {
332 writer.write_bits(
333 usize::from(token.extra_value),
334 usize::from(token.extra_bits),
335 );
336 }
337 }
338 let bit_count = writer.bit_pos;
339 Ok((writer.finish(), bit_count))
340}
341
342pub fn encode_compressed_block(
343 payload: &[u8],
344 payload_bits: usize,
345 has_tables: bool,
346 is_last: bool,
347) -> Result<Vec<u8>> {
348 if payload_bits > payload.len() * 8 {
349 return Err(Error::InvalidData("RAR 5 block bit count exceeds payload"));
350 }
351 if payload.is_empty() && payload_bits != 0 {
352 return Err(Error::InvalidData("RAR 5 empty block has payload bits"));
353 }
354 if !payload.is_empty() && payload_bits <= (payload.len() - 1) * 8 {
355 return Err(Error::InvalidData("RAR 5 block has unused payload bytes"));
356 }
357 if payload.len() > 0x00ff_ffff {
358 return Err(Error::InvalidData("RAR 5 block payload is too large"));
359 }
360
361 let size_len = if payload.len() <= 0xff {
362 1
363 } else if payload.len() <= 0xffff {
364 2
365 } else {
366 3
367 };
368 let final_byte_bits = if payload.is_empty() {
369 1
370 } else {
371 ((payload_bits - 1) % 8) + 1
372 };
373 let mut flags = (final_byte_bits as u8) - 1;
374 flags |= match size_len {
375 1 => 0,
376 2 => 1 << 3,
377 3 => 2 << 3,
378 _ => unreachable!("size_len is constrained above"),
379 };
380 if is_last {
381 flags |= 0x40;
382 }
383 if has_tables {
384 flags |= 0x80;
385 }
386
387 let mut size_bytes = [0u8; 3];
388 let mut size = payload.len();
389 for byte in &mut size_bytes[..size_len] {
390 *byte = size as u8;
391 size >>= 8;
392 }
393 let checksum = size_bytes[..size_len]
394 .iter()
395 .fold(0x5a ^ flags, |acc, &byte| acc ^ byte);
396 let mut out = Vec::with_capacity(2 + size_len + payload.len());
397 out.push(flags);
398 out.push(checksum);
399 out.extend_from_slice(&size_bytes[..size_len]);
400 out.extend_from_slice(payload);
401 Ok(out)
402}
403
404pub fn decode_literal_only(
405 input: &[u8],
406 algorithm_version: u8,
407 output_size: usize,
408) -> Result<Vec<u8>> {
409 let mut decoder = Unpack50Decoder::new();
410 decoder.decode_member(
411 input,
412 algorithm_version,
413 output_size,
414 false,
415 DecodeMode::LiteralOnly,
416 )
417}
418
419pub fn decode_lz(input: &[u8], algorithm_version: u8, output_size: usize) -> Result<Vec<u8>> {
420 let mut decoder = Unpack50Decoder::new();
421 decoder.decode_member(input, algorithm_version, output_size, false, DecodeMode::Lz)
422}
423
424pub fn encode_literal_only(data: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
425 let distance_size = match algorithm_version {
426 0 => DISTANCE_TABLE_SIZE_50,
427 1 => DISTANCE_TABLE_SIZE_70,
428 _ => {
429 return Err(Error::InvalidData(
430 "RAR 5 unknown compression algorithm version",
431 ))
432 }
433 };
434 let mut lengths = TableLengths {
435 main: vec![0; MAIN_TABLE_SIZE],
436 distance: vec![0; distance_size],
437 align: vec![0; ALIGN_TABLE_SIZE],
438 length: vec![0; LENGTH_TABLE_SIZE],
439 };
440 let present = literal_presence(data);
441 let literal_count = present.iter().filter(|&&used| used).count();
442 let literal_length = huffman::bits_for_symbol_count(literal_count);
443 for (symbol, used) in present.into_iter().enumerate() {
444 if used {
445 lengths.main[symbol] = literal_length;
446 }
447 }
448
449 let table = HuffmanTable::from_lengths(&lengths.main)?;
450 let (table_data, table_bits) =
451 encode_table_lengths_with_bit_count(&lengths, algorithm_version)?;
452 let mut writer = BitWriter {
453 bytes: table_data,
454 bit_pos: table_bits,
455 };
456 for &byte in data {
457 let (code, len) = table.code_for_symbol(byte as usize)?;
458 writer.write_bits(usize::from(code), usize::from(len));
459 }
460 let payload_bits = writer.bit_pos;
461 encode_compressed_block(&writer.finish(), payload_bits, true, true)
462}
463
464pub fn encode_lz_member(data: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
465 encode_lz_member_with_history(data, &[], algorithm_version)
466}
467
468#[derive(Debug, Clone, Copy, PartialEq, Eq)]
469#[non_exhaustive]
470pub struct EncodeOptions {
471 pub max_match_candidates: usize,
472 pub lazy_matching: bool,
473 pub lazy_lookahead: usize,
474 pub max_match_distance: usize,
475}
476
477impl EncodeOptions {
478 pub const fn new(max_match_candidates: usize) -> Self {
479 Self {
480 max_match_candidates,
481 lazy_matching: false,
482 lazy_lookahead: 1,
483 max_match_distance: MAX_ENCODER_MATCH_OFFSET,
484 }
485 }
486
487 pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
488 self.lazy_matching = enabled;
489 self
490 }
491
492 pub const fn with_lazy_lookahead(mut self, bytes: usize) -> Self {
493 self.lazy_lookahead = bytes;
494 self
495 }
496
497 pub const fn with_max_match_distance(mut self, distance: usize) -> Self {
498 self.max_match_distance = distance;
499 self
500 }
501}
502
503impl Default for EncodeOptions {
504 fn default() -> Self {
505 Self::new(MAX_MATCH_CANDIDATES)
506 }
507}
508
509pub fn encode_lz_member_with_history(
510 data: &[u8],
511 history: &[u8],
512 algorithm_version: u8,
513) -> Result<Vec<u8>> {
514 encode_lz_member_inner(
515 data,
516 history,
517 algorithm_version,
518 &[],
519 EncodeOptions::default(),
520 )
521}
522
523pub fn encode_lz_member_with_options(
524 data: &[u8],
525 algorithm_version: u8,
526 options: EncodeOptions,
527) -> Result<Vec<u8>> {
528 encode_lz_member_with_history_and_options(data, &[], algorithm_version, options)
529}
530
531pub fn encode_lz_member_with_history_and_options(
532 data: &[u8],
533 history: &[u8],
534 algorithm_version: u8,
535 options: EncodeOptions,
536) -> Result<Vec<u8>> {
537 encode_lz_member_inner(data, history, algorithm_version, &[], options)
538}
539
540#[derive(Debug, Clone, Copy, PartialEq, Eq)]
541pub enum Rar50FilterKind {
542 Delta { channels: usize },
543 E8,
544 E8E9,
545 Arm,
546}
547
548#[derive(Debug, Clone, PartialEq, Eq)]
549pub struct Rar50FilterSpec {
550 pub kind: Rar50FilterKind,
551 pub range: Option<Range<usize>>,
552}
553
554impl Rar50FilterSpec {
555 pub fn new(kind: Rar50FilterKind) -> Self {
556 Self { kind, range: None }
557 }
558
559 pub fn range(kind: Rar50FilterKind, range: Range<usize>) -> Self {
560 Self {
561 kind,
562 range: Some(range),
563 }
564 }
565}
566
567fn filtered_lz_member(
568 data: &[u8],
569 filters: &[Rar50FilterSpec],
570) -> Result<(Vec<u8>, Vec<EncodeFilter>)> {
571 let mut filtered = data.to_vec();
572 let mut records = Vec::with_capacity(filters.len());
573 for filter in filters {
574 let range = filter.range.clone().unwrap_or(0..data.len());
575 if range.start >= range.end || range.end > data.len() {
576 return Err(Error::InvalidData("RAR 5 filter range is invalid"));
577 }
578 if range.start > u32::MAX as usize {
579 return Err(Error::InvalidData("RAR 5 filter offset is too large"));
580 }
581
582 let filter_data = &mut filtered[range.clone()];
583 let (filter_type, channels) = encode_filter_data(filter.kind, filter_data, range.start)?;
584 records.push(EncodeFilter {
585 offset: range.start,
586 length: range.len(),
587 filter_type,
588 channels,
589 });
590 }
591 Ok((filtered, records))
592}
593
594fn encode_filter_data(
595 kind: Rar50FilterKind,
596 data: &mut [u8],
597 file_offset: usize,
598) -> Result<(FilterType, usize)> {
599 if file_offset > u32::MAX as usize {
600 return Err(Error::InvalidData("RAR 5 filter offset is too large"));
601 }
602 match kind {
603 Rar50FilterKind::Delta { channels } => {
604 filters::encode_in_place(
605 FilterOp::Delta { channels },
606 data,
607 0,
608 rar50_delta_messages(),
609 )?;
610 Ok((FilterType::Delta, channels))
611 }
612 Rar50FilterKind::E8 => {
613 filters::encode_in_place(
614 FilterOp::E8,
615 data,
616 file_offset as u32,
617 rar50_delta_messages(),
618 )?;
619 Ok((FilterType::E8, 0))
620 }
621 Rar50FilterKind::E8E9 => {
622 filters::encode_in_place(
623 FilterOp::E8E9,
624 data,
625 file_offset as u32,
626 rar50_delta_messages(),
627 )?;
628 Ok((FilterType::E8E9, 0))
629 }
630 Rar50FilterKind::Arm => {
631 arm_encode(data, file_offset as u32);
632 Ok((FilterType::Arm, 0))
633 }
634 }
635}
636
637fn filtered_lz_blocks(
638 data: &[u8],
639 filters: &[Rar50FilterSpec],
640 history: &[u8],
641 algorithm_version: u8,
642 options: EncodeOptions,
643) -> Result<Vec<u8>> {
644 let filters = normalized_filter_specs(data.len(), filters)?;
645 let mut out = Vec::new();
646 let mut block_history =
647 history[history.len().saturating_sub(options.max_match_distance)..].to_vec();
648 let mut chunk_start = 0usize;
649 while chunk_start < data.len() {
650 let chunk_end = (chunk_start + MAX_FILTER_BLOCK_LENGTH).min(data.len());
651 let mut chunk = data[chunk_start..chunk_end].to_vec();
652 let mut records = Vec::new();
653 for filter in &filters {
654 let start = filter.range.start.max(chunk_start);
655 let end = filter.range.end.min(chunk_end);
656 if start >= end {
657 continue;
658 }
659 let local_start = start - chunk_start;
660 let local_end = end - chunk_start;
661 let (filter_type, channels) =
662 encode_filter_data(filter.kind, &mut chunk[local_start..local_end], start)?;
663 records.push(EncodeFilter {
664 offset: local_start,
665 length: local_end - local_start,
666 filter_type,
667 channels,
668 });
669 }
670 out.extend(encode_lz_block(
671 &chunk,
672 &block_history,
673 algorithm_version,
674 &records,
675 options,
676 chunk_end == data.len(),
677 )?);
678 block_history.extend_from_slice(&chunk);
679 let keep_from = block_history
680 .len()
681 .saturating_sub(options.max_match_distance);
682 if keep_from != 0 {
683 block_history.drain(..keep_from);
684 }
685 chunk_start = chunk_end;
686 }
687 Ok(out)
688}
689
690#[derive(Debug, Clone, PartialEq, Eq)]
691struct NormalizedFilterSpec {
692 kind: Rar50FilterKind,
693 range: Range<usize>,
694}
695
696fn normalized_filter_specs(
697 data_len: usize,
698 filters: &[Rar50FilterSpec],
699) -> Result<Vec<NormalizedFilterSpec>> {
700 let mut normalized = Vec::with_capacity(filters.len());
701 for filter in filters {
702 let range = filter.range.clone().unwrap_or(0..data_len);
703 if range.start >= range.end || range.end > data_len {
704 return Err(Error::InvalidData("RAR 5 filter range is invalid"));
705 }
706 normalized.push(NormalizedFilterSpec {
707 kind: filter.kind,
708 range,
709 });
710 }
711 Ok(normalized)
712}
713
714fn encode_lz_member_inner(
715 data: &[u8],
716 history: &[u8],
717 algorithm_version: u8,
718 initial_filters: &[EncodeFilter],
719 options: EncodeOptions,
720) -> Result<Vec<u8>> {
721 if data.len() > MAX_COMPRESSED_BLOCK_OUTPUT && initial_filters.is_empty() {
722 let mut out = Vec::new();
723 let mut block_history =
724 history[history.len().saturating_sub(options.max_match_distance)..].to_vec();
725 let mut chunks = data.chunks(MAX_COMPRESSED_BLOCK_OUTPUT).peekable();
726 while let Some(chunk) = chunks.next() {
727 let is_last = chunks.peek().is_none();
728 out.extend(encode_lz_block(
729 chunk,
730 &block_history,
731 algorithm_version,
732 &[],
733 options,
734 is_last,
735 )?);
736 block_history.extend_from_slice(chunk);
737 let keep_from = block_history
738 .len()
739 .saturating_sub(options.max_match_distance);
740 if keep_from != 0 {
741 block_history.drain(..keep_from);
742 }
743 }
744 return Ok(out);
745 }
746
747 encode_lz_block(
748 data,
749 history,
750 algorithm_version,
751 initial_filters,
752 options,
753 true,
754 )
755}
756
757fn encode_lz_block(
758 data: &[u8],
759 history: &[u8],
760 algorithm_version: u8,
761 initial_filters: &[EncodeFilter],
762 options: EncodeOptions,
763 is_last: bool,
764) -> Result<Vec<u8>> {
765 let distance_size = match algorithm_version {
766 0 => DISTANCE_TABLE_SIZE_50,
767 1 => DISTANCE_TABLE_SIZE_70,
768 _ => {
769 return Err(Error::InvalidData(
770 "RAR 5 unknown compression algorithm version",
771 ))
772 }
773 };
774 let mut tokens = Vec::new();
775 tokens.extend(initial_filters.iter().copied().map(EncodeToken::Filter));
776 tokens.extend(encode_tokens(data, history, options, distance_size));
777 let mut lengths = TableLengths {
778 main: vec![0; MAIN_TABLE_SIZE],
779 distance: vec![0; distance_size],
780 align: vec![0; ALIGN_TABLE_SIZE],
781 length: vec![0; LENGTH_TABLE_SIZE],
782 };
783
784 let mut main_frequencies = vec![0usize; MAIN_TABLE_SIZE];
785 let mut distance_frequencies = vec![0usize; distance_size];
786 let mut align_frequencies = vec![0usize; ALIGN_TABLE_SIZE];
787 let mut length_frequencies = vec![0usize; LENGTH_TABLE_SIZE];
788 let mut state = EncoderMatchState::default();
789 for token in &tokens {
790 match *token {
791 EncodeToken::Filter(_) => main_frequencies[256] += 1,
792 EncodeToken::Literal(byte) => main_frequencies[byte as usize] += 1,
793 EncodeToken::Match { length, distance } => {
794 match state.encode_match(length, distance, distance_size)? {
795 EncodedMatch::LastLengthRepeat => main_frequencies[257] += 1,
796 EncodedMatch::RepeatDistance {
797 index, length_slot, ..
798 } => {
799 main_frequencies[258 + index] += 1;
800 length_frequencies[length_slot] += 1;
801 }
802 EncodedMatch::New {
803 length_slot,
804 distance_slot,
805 distance_extra,
806 distance_bit_count,
807 ..
808 } => {
809 main_frequencies[262 + length_slot] += 1;
810 distance_frequencies[distance_slot] += 1;
811 if distance_bit_count >= 4 {
812 align_frequencies[distance_extra & 0x0f] += 1;
813 }
814 }
815 }
816 state.remember(length, distance);
817 }
818 }
819 }
820
821 lengths.main = huffman::lengths_for_frequencies(&main_frequencies, 15);
822 lengths.distance = huffman::lengths_for_frequencies(&distance_frequencies, 15);
823 lengths.length = huffman::lengths_for_frequencies(&length_frequencies, 15);
824 lengths.align = huffman::lengths_for_frequencies(&align_frequencies, 15);
825
826 let main_table = HuffmanTable::from_lengths(&lengths.main)?;
827 let distance_table = HuffmanTable::from_lengths(&lengths.distance)?;
828 let align_table = HuffmanTable::from_lengths(&lengths.align)?;
829 let length_table = HuffmanTable::from_lengths(&lengths.length)?;
830 let (table_data, table_bits) =
831 encode_table_lengths_with_bit_count(&lengths, algorithm_version)?;
832 let mut writer = BitWriter {
833 bytes: table_data,
834 bit_pos: table_bits,
835 };
836 let mut state = EncoderMatchState::default();
837 for token in tokens {
838 match token {
839 EncodeToken::Filter(filter) => {
840 let (code, len) = main_table.code_for_symbol(256)?;
841 writer.write_bits(usize::from(code), usize::from(len));
842 write_filter(&mut writer, filter)?;
843 }
844 EncodeToken::Literal(byte) => {
845 let (code, len) = main_table.code_for_symbol(byte as usize)?;
846 writer.write_bits(usize::from(code), usize::from(len));
847 }
848 EncodeToken::Match { length, distance } => {
849 match state.encode_match(length, distance, distance_size)? {
850 EncodedMatch::LastLengthRepeat => {
851 let (code, len) = main_table.code_for_symbol(257)?;
852 writer.write_bits(usize::from(code), usize::from(len));
853 }
854 EncodedMatch::RepeatDistance {
855 index,
856 length_slot,
857 length_extra,
858 } => {
859 let (code, len) = main_table.code_for_symbol(258 + index)?;
860 writer.write_bits(usize::from(code), usize::from(len));
861 let (code, len) = length_table.code_for_symbol(length_slot)?;
862 writer.write_bits(usize::from(code), usize::from(len));
863 let length_extra_bits = length_slot_extra_bits(length_slot)?;
864 if length_extra_bits != 0 {
865 writer.write_bits(length_extra, usize::from(length_extra_bits));
866 }
867 }
868 EncodedMatch::New {
869 length_slot,
870 length_extra,
871 distance_slot,
872 distance_extra,
873 distance_bit_count,
874 } => {
875 let (code, len) = main_table.code_for_symbol(262 + length_slot)?;
876 writer.write_bits(usize::from(code), usize::from(len));
877 let length_extra_bits = length_slot_extra_bits(length_slot)?;
878 if length_extra_bits != 0 {
879 writer.write_bits(length_extra, usize::from(length_extra_bits));
880 }
881 let (code, len) = distance_table.code_for_symbol(distance_slot)?;
882 writer.write_bits(usize::from(code), usize::from(len));
883 if distance_bit_count >= 4 {
884 if distance_bit_count > 4 {
885 writer.write_bits(distance_extra >> 4, distance_bit_count - 4);
886 }
887 let (code, len) = align_table.code_for_symbol(distance_extra & 0x0f)?;
888 writer.write_bits(usize::from(code), usize::from(len));
889 } else if distance_bit_count != 0 {
890 writer.write_bits(distance_extra, distance_bit_count);
891 }
892 }
893 }
894 state.remember(length, distance);
895 }
896 }
897 }
898
899 let payload_bits = writer.bit_pos;
900 encode_compressed_block(&writer.finish(), payload_bits, true, is_last)
901}
902
903#[derive(Debug, Clone, Default)]
904pub struct Unpack50Encoder {
905 history: Vec<u8>,
906 options: EncodeOptions,
907}
908
909impl Unpack50Encoder {
910 pub fn new() -> Self {
911 Self::default()
912 }
913
914 pub fn with_options(options: EncodeOptions) -> Self {
915 Self {
916 history: Vec::new(),
917 options,
918 }
919 }
920
921 pub fn encode_member(&mut self, input: &[u8], algorithm_version: u8) -> Result<Vec<u8>> {
922 let packed = encode_lz_member_with_history_and_options(
923 input,
924 &self.history,
925 algorithm_version,
926 self.options,
927 )?;
928 self.remember(input);
929 Ok(packed)
930 }
931
932 pub fn encode_member_with_filter(
933 &mut self,
934 input: &[u8],
935 algorithm_version: u8,
936 filter: Rar50FilterSpec,
937 ) -> Result<Vec<u8>> {
938 self.encode_member_with_filters(input, algorithm_version, &[filter])
939 }
940
941 pub fn encode_member_with_filters(
942 &mut self,
943 input: &[u8],
944 algorithm_version: u8,
945 filters: &[Rar50FilterSpec],
946 ) -> Result<Vec<u8>> {
947 if input.len() > MAX_FILTER_BLOCK_LENGTH {
948 let packed = filtered_lz_blocks(
949 input,
950 filters,
951 &self.history,
952 algorithm_version,
953 self.options,
954 )?;
955 self.remember(input);
956 return Ok(packed);
957 }
958 let (filtered, records) = filtered_lz_member(input, filters)?;
959 let packed = encode_lz_member_inner(
960 &filtered,
961 &self.history,
962 algorithm_version,
963 &records,
964 self.options,
965 )?;
966 self.remember(input);
967 Ok(packed)
968 }
969
970 fn remember(&mut self, input: &[u8]) {
971 self.history.extend_from_slice(input);
972 let keep_from = self
973 .history
974 .len()
975 .saturating_sub(self.options.max_match_distance);
976 if keep_from != 0 {
977 self.history.drain(..keep_from);
978 }
979 }
980}
981
982#[derive(Debug, Clone, Copy)]
983enum EncodeToken {
984 Filter(EncodeFilter),
985 Literal(u8),
986 Match { length: usize, distance: usize },
987}
988
989#[derive(Debug, Clone, Copy)]
990struct EncodeFilter {
991 offset: usize,
992 length: usize,
993 filter_type: FilterType,
994 channels: usize,
995}
996
997#[derive(Debug, Clone, Copy, Default)]
998struct EncoderMatchState {
999 reps: [usize; 4],
1000 last_length: usize,
1001}
1002
1003#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1004enum EncodedMatch {
1005 LastLengthRepeat,
1006 RepeatDistance {
1007 index: usize,
1008 length_slot: usize,
1009 length_extra: usize,
1010 },
1011 New {
1012 length_slot: usize,
1013 length_extra: usize,
1014 distance_slot: usize,
1015 distance_extra: usize,
1016 distance_bit_count: usize,
1017 },
1018}
1019
1020impl EncoderMatchState {
1021 fn encode_match(
1022 &self,
1023 length: usize,
1024 distance: usize,
1025 distance_size: usize,
1026 ) -> Result<EncodedMatch> {
1027 if distance == self.reps[0] && length == self.last_length && self.last_length != 0 {
1028 return Ok(EncodedMatch::LastLengthRepeat);
1029 }
1030 if let Some(index) = self
1031 .reps
1032 .iter()
1033 .position(|&repeat_distance| repeat_distance == distance && repeat_distance != 0)
1034 {
1035 let (length_slot, length_extra) = length_slot_for_match(length)?;
1036 return Ok(EncodedMatch::RepeatDistance {
1037 index,
1038 length_slot,
1039 length_extra,
1040 });
1041 }
1042
1043 let (distance_slot, distance_extra) = distance_slot_for_match(distance, distance_size)?;
1044 let encoded_length = length
1045 .checked_sub(length_bonus(distance))
1046 .ok_or(Error::InvalidData("RAR 5 adjusted match length underflows"))?;
1047 let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1048 let (length_slot, length_extra) = length_slot_for_match(encoded_length)?;
1049 Ok(EncodedMatch::New {
1050 length_slot,
1051 length_extra,
1052 distance_slot,
1053 distance_extra,
1054 distance_bit_count,
1055 })
1056 }
1057
1058 fn remember(&mut self, length: usize, distance: usize) {
1059 if distance == self.reps[0] && length == self.last_length {
1060 return;
1061 }
1062 if let Some(index) = self
1063 .reps
1064 .iter()
1065 .position(|&repeat_distance| repeat_distance == distance)
1066 {
1067 self.reps[..=index].rotate_right(1);
1068 } else {
1069 self.reps.rotate_right(1);
1070 }
1071 self.reps[0] = distance;
1072 self.last_length = length;
1073 }
1074}
1075
1076fn encode_tokens(
1077 input: &[u8],
1078 history: &[u8],
1079 options: EncodeOptions,
1080 distance_size: usize,
1081) -> Vec<EncodeToken> {
1082 let mut tokens = Vec::new();
1083 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1084 let history = &history[history.len().saturating_sub(options.max_match_distance)..];
1085 let mut combined = Vec::with_capacity(history.len() + input.len());
1086 combined.extend_from_slice(history);
1087 combined.extend_from_slice(input);
1088 for history_pos in 0..history.len().saturating_sub(2) {
1089 insert_match_position(&combined, history_pos, &mut buckets);
1090 }
1091
1092 let mut pos = history.len();
1093 let end = combined.len();
1094 let mut state = EncoderMatchState::default();
1095 while pos < end {
1096 if let Some(candidate) = best_match(
1097 &combined,
1098 pos,
1099 end,
1100 &buckets,
1101 options,
1102 &state,
1103 distance_size,
1104 ) {
1105 if should_lazy_emit_literal(
1106 &combined,
1107 pos,
1108 &buckets,
1109 options,
1110 &state,
1111 distance_size,
1112 candidate,
1113 ) {
1114 tokens.push(EncodeToken::Literal(combined[pos]));
1115 insert_match_position(&combined, pos, &mut buckets);
1116 pos += 1;
1117 continue;
1118 }
1119 let MatchCandidate {
1120 length, distance, ..
1121 } = candidate;
1122 tokens.push(EncodeToken::Match { length, distance });
1123 state.remember(length, distance);
1124 for history_pos in pos..pos + length {
1125 insert_match_position(&combined, history_pos, &mut buckets);
1126 }
1127 pos += length;
1128 } else {
1129 tokens.push(EncodeToken::Literal(combined[pos]));
1130 insert_match_position(&combined, pos, &mut buckets);
1131 pos += 1;
1132 }
1133 }
1134 tokens
1135}
1136
1137fn should_lazy_emit_literal(
1138 input: &[u8],
1139 pos: usize,
1140 buckets: &[Vec<usize>],
1141 options: EncodeOptions,
1142 state: &EncoderMatchState,
1143 distance_size: usize,
1144 current: MatchCandidate,
1145) -> bool {
1146 let end = input.len();
1147 if !options.lazy_matching || pos + 1 >= end {
1148 return false;
1149 }
1150 let lookahead = options.lazy_lookahead.max(1);
1151 (1..=lookahead)
1152 .take_while(|offset| pos + offset < end)
1153 .any(|offset| {
1154 best_match(
1155 input,
1156 pos + offset,
1157 end,
1158 buckets,
1159 options,
1160 state,
1161 distance_size,
1162 )
1163 .is_some_and(|next| {
1164 let skipped_literal_score = offset as isize * 8;
1165 next.score > current.score + skipped_literal_score
1166 })
1167 })
1168}
1169
1170#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1171struct MatchCandidate {
1172 length: usize,
1173 distance: usize,
1174 score: isize,
1175 cost: usize,
1176}
1177
1178fn best_match(
1179 input: &[u8],
1180 pos: usize,
1181 end: usize,
1182 buckets: &[Vec<usize>],
1183 options: EncodeOptions,
1184 state: &EncoderMatchState,
1185 distance_size: usize,
1186) -> Option<MatchCandidate> {
1187 let max_distance = pos.min(options.max_match_distance);
1188 let max_length = (end - pos).min(MAX_ENCODER_MATCH_LENGTH);
1189 if options.max_match_candidates == 0
1190 || max_distance == 0
1191 || max_length < 4
1192 || pos + 2 >= input.len()
1193 {
1194 return None;
1195 }
1196 let bucket = &buckets[match_hash(input, pos)];
1197 let mut best = None;
1198 let mut checked = 0usize;
1199 for distance in state.reps {
1200 if distance == 0 || distance > max_distance {
1201 continue;
1202 }
1203 let length = match_length(input, pos, distance, max_length);
1204 consider_match_candidate(&mut best, state, distance_size, length, distance);
1205 }
1206 for &candidate in bucket.iter().rev() {
1207 if candidate >= pos {
1208 continue;
1209 }
1210 let distance = pos - candidate;
1211 if distance > max_distance {
1212 break;
1213 }
1214 checked += 1;
1215 let length = match_length(input, pos, distance, max_length);
1216 consider_match_candidate(&mut best, state, distance_size, length, distance);
1217 if let Some(best) = best {
1218 if best.length == max_length {
1219 break;
1220 }
1221 }
1222 if checked >= options.max_match_candidates {
1223 break;
1224 }
1225 }
1226 best
1227}
1228
1229fn match_length(input: &[u8], pos: usize, distance: usize, max_length: usize) -> usize {
1230 let mut length = 0usize;
1231 while length < max_length && input[pos + length] == input[pos + length - distance] {
1232 length += 1;
1233 }
1234 length
1235}
1236
1237fn consider_match_candidate(
1238 best: &mut Option<MatchCandidate>,
1239 state: &EncoderMatchState,
1240 distance_size: usize,
1241 length: usize,
1242 distance: usize,
1243) {
1244 if length < 4 {
1245 return;
1246 }
1247 let Ok(cost) = estimated_match_cost(state, length, distance, distance_size) else {
1248 return;
1249 };
1250 let candidate = MatchCandidate {
1251 length,
1252 distance,
1253 score: (length as isize * 16) - cost as isize,
1254 cost,
1255 };
1256 if best.is_none_or(|best| {
1257 candidate.score > best.score
1258 || (candidate.score == best.score
1259 && (candidate.length > best.length
1260 || (candidate.length == best.length && candidate.cost < best.cost)
1261 || (candidate.length == best.length
1262 && candidate.cost == best.cost
1263 && candidate.distance < best.distance)))
1264 }) {
1265 *best = Some(candidate);
1266 }
1267}
1268
1269fn estimated_match_cost(
1270 state: &EncoderMatchState,
1271 length: usize,
1272 distance: usize,
1273 distance_size: usize,
1274) -> Result<usize> {
1275 if distance == state.reps[0] && length == state.last_length && state.last_length != 0 {
1276 return Ok(2);
1277 }
1278 if state
1279 .reps
1280 .iter()
1281 .any(|&repeat_distance| repeat_distance == distance && repeat_distance != 0)
1282 {
1283 let (length_slot, _) = length_slot_for_match(length)?;
1284 return Ok(5 + usize::from(length_slot_extra_bits(length_slot)?));
1285 }
1286
1287 let (distance_slot, _) = distance_slot_for_match(distance, distance_size)?;
1288 let encoded_length = length
1289 .checked_sub(length_bonus(distance))
1290 .ok_or(Error::InvalidData("RAR 5 adjusted match length underflows"))?;
1291 let (length_slot, _) = length_slot_for_match(encoded_length)?;
1292 Ok(10
1293 + usize::from(length_slot_extra_bits(length_slot)?)
1294 + distance_slot_bit_count(distance_slot)?)
1295}
1296
1297fn insert_match_position(input: &[u8], pos: usize, buckets: &mut [Vec<usize>]) {
1298 if pos + 2 < input.len() {
1299 buckets[match_hash(input, pos)].push(pos);
1300 }
1301}
1302
1303fn match_hash(input: &[u8], pos: usize) -> usize {
1304 let value =
1305 ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1306 value & (MATCH_HASH_BUCKETS - 1)
1307}
1308
1309fn length_slot_for_match(length: usize) -> Result<(usize, usize)> {
1310 if length < 2 {
1311 return Err(Error::InvalidData("RAR 5 match length is too short"));
1312 }
1313 for slot in 0..LENGTH_TABLE_SIZE {
1314 let bit_count = usize::from(length_slot_extra_bits(slot)?);
1315 let base = slot_to_length(slot, 0)?;
1316 let max = base
1317 + if bit_count == 0 {
1318 0
1319 } else {
1320 (1usize << bit_count) - 1
1321 };
1322 if length >= base && length <= max {
1323 return Ok((slot, length - base));
1324 }
1325 }
1326 Err(Error::InvalidData("RAR 5 match length is too long"))
1327}
1328
1329fn distance_slot_for_match(distance: usize, distance_size: usize) -> Result<(usize, usize)> {
1330 if distance == 0 {
1331 return Err(Error::InvalidData("RAR 5 match distance is zero"));
1332 }
1333 for slot in 0..distance_size {
1334 let bit_count = distance_slot_bit_count(slot)?;
1335 let base = slot_to_distance(slot, 0)?;
1336 let max = base
1337 + if bit_count == 0 {
1338 0
1339 } else {
1340 (1usize << bit_count) - 1
1341 };
1342 if distance >= base && distance <= max {
1343 return Ok((slot, distance - base));
1344 }
1345 }
1346 Err(Error::InvalidData("RAR 5 match distance is too large"))
1347}
1348
1349fn literal_presence(data: &[u8]) -> [bool; 256] {
1350 let mut present = [false; 256];
1351 for &byte in data {
1352 present[byte as usize] = true;
1353 }
1354 present
1355}
1356
1357#[derive(Debug, Clone)]
1358pub struct Unpack50Decoder {
1359 tables: Option<DecodeTables>,
1360 reps: [usize; 4],
1361 last_length: usize,
1362 history: Vec<u8>,
1363}
1364
1365impl Unpack50Decoder {
1366 pub fn new() -> Self {
1367 Self {
1368 tables: None,
1369 reps: [0; 4],
1370 last_length: 0,
1371 history: Vec::new(),
1372 }
1373 }
1374
1375 pub fn decode_member(
1376 &mut self,
1377 input: &[u8],
1378 algorithm_version: u8,
1379 output_size: usize,
1380 solid: bool,
1381 mode: DecodeMode,
1382 ) -> Result<Vec<u8>> {
1383 self.decode_member_with_dictionary(
1384 input,
1385 algorithm_version,
1386 output_size,
1387 DEFAULT_DICTIONARY_SIZE,
1388 solid,
1389 mode,
1390 )
1391 }
1392
1393 pub fn decode_member_with_dictionary(
1394 &mut self,
1395 input: &[u8],
1396 algorithm_version: u8,
1397 output_size: usize,
1398 dictionary_size: usize,
1399 solid: bool,
1400 mode: DecodeMode,
1401 ) -> Result<Vec<u8>> {
1402 let mut input = std::io::Cursor::new(input);
1403 self.decode_member_from_reader_with_dictionary(
1404 &mut input,
1405 algorithm_version,
1406 output_size,
1407 dictionary_size,
1408 solid,
1409 mode,
1410 )
1411 }
1412
1413 pub fn decode_member_from_reader(
1414 &mut self,
1415 input: &mut impl Read,
1416 algorithm_version: u8,
1417 output_size: usize,
1418 solid: bool,
1419 mode: DecodeMode,
1420 ) -> Result<Vec<u8>> {
1421 self.decode_member_from_reader_with_dictionary(
1422 input,
1423 algorithm_version,
1424 output_size,
1425 DEFAULT_DICTIONARY_SIZE,
1426 solid,
1427 mode,
1428 )
1429 }
1430
1431 pub fn decode_member_from_reader_with_dictionary(
1432 &mut self,
1433 input: &mut impl Read,
1434 algorithm_version: u8,
1435 output_size: usize,
1436 dictionary_size: usize,
1437 solid: bool,
1438 mode: DecodeMode,
1439 ) -> Result<Vec<u8>> {
1440 if dictionary_size == 0 {
1441 return Err(Error::InvalidData("RAR 5 dictionary size is zero"));
1442 }
1443 if !solid {
1444 self.reset();
1445 }
1446
1447 let mut output = Vec::with_capacity(output_size.min(MAX_INITIAL_OUTPUT_CAPACITY));
1448 let mut filters = Vec::new();
1449
1450 loop {
1451 let block = read_compressed_block(input)?;
1452 let payload = block.payload.as_slice();
1453 let mut payload_bit_pos = 0;
1454 if block.header.has_tables {
1455 let (lengths, table_bits) = read_table_lengths(payload, algorithm_version)?;
1456 self.tables = Some(DecodeTables::from_lengths(&lengths)?);
1457 payload_bit_pos = table_bits;
1458 }
1459 let tables = self
1460 .tables
1461 .take()
1462 .ok_or(Error::InvalidData("RAR 5 block reuses missing tables"))?;
1463 let mut bits = BitReader::new(payload);
1464 bits.bit_pos = payload_bit_pos;
1465
1466 while bits.bit_pos < block.header.payload_bits && output.len() < output_size {
1467 let symbol = tables.main.decode(&mut bits)?;
1468 match symbol {
1469 0..=255 => output.push(symbol as u8),
1470 256 if mode.uses_lz() => {
1471 filters.push(read_filter(&mut bits, output.len())?);
1472 }
1473 257 if mode.uses_lz() => {
1474 if self.last_length != 0 {
1475 self.copy_match(
1476 &mut output,
1477 self.reps[0],
1478 self.last_length,
1479 output_size,
1480 dictionary_size,
1481 )?;
1482 }
1483 }
1484 258..=261 if mode.uses_lz() => {
1485 let rep_index = symbol - 258;
1486 let distance = self.reps[rep_index];
1487 if distance == 0 {
1488 return Err(Error::InvalidData(
1489 "RAR 5 repeat distance is not initialized",
1490 ));
1491 }
1492 let length_slot = tables.length.decode(&mut bits)?;
1493 let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1494 let length = slot_to_length(length_slot, length_extra)?;
1495 self.reps[..=rep_index].rotate_right(1);
1496 self.reps[0] = distance;
1497 self.last_length = length;
1498 self.copy_match(
1499 &mut output,
1500 distance,
1501 length,
1502 output_size,
1503 dictionary_size,
1504 )?;
1505 }
1506 262.. if mode.uses_lz() => {
1507 let length_slot = symbol - 262;
1508 let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1509 let mut length = slot_to_length(length_slot, length_extra)?;
1510 let distance_slot = tables.distance.decode(&mut bits)?;
1511 let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1512 let distance_extra = if distance_bit_count >= 4 && tables.align_mode {
1513 let high = bits.read_bits((distance_bit_count - 4) as u8)?;
1514 let low = tables.align.decode(&mut bits)? as u32;
1515 (high << 4) | low
1516 } else {
1517 bits.read_bits(distance_bit_count as u8)?
1518 };
1519 let distance = slot_to_distance(distance_slot, distance_extra)?;
1520 length += length_bonus(distance);
1521 self.reps.rotate_right(1);
1522 self.reps[0] = distance;
1523 self.last_length = length;
1524 self.copy_match(
1525 &mut output,
1526 distance,
1527 length,
1528 output_size,
1529 dictionary_size,
1530 )?;
1531 }
1532 _ if mode == DecodeMode::LiteralOnly => {
1533 return Err(Error::InvalidData(
1534 "RAR 5 literal-only decoder encountered non-literal symbol",
1535 ));
1536 }
1537 _ => {
1538 return Err(Error::InvalidData(
1539 "RAR 5 decoder encountered unsupported control symbol",
1540 ));
1541 }
1542 }
1543 }
1544
1545 self.tables = Some(tables);
1546 if block.header.is_last || output.len() >= output_size {
1547 break;
1548 }
1549 }
1550
1551 if output.len() == output_size {
1552 let history_output = if mode.applies_filters() && !filters.is_empty() {
1553 Some(output.clone())
1554 } else {
1555 None
1556 };
1557 if mode.applies_filters() {
1558 apply_filters(&mut output, &filters)?;
1559 }
1560 self.history
1561 .extend_from_slice(history_output.as_deref().unwrap_or(&output));
1562 if self.history.len() > dictionary_size {
1563 let discard = self.history.len() - dictionary_size;
1564 self.history.drain(..discard);
1565 }
1566 Ok(output)
1567 } else {
1568 Err(Error::NeedMoreInput)
1569 }
1570 }
1571
1572 pub fn decode_member_from_reader_with_dictionary_to_sink<E>(
1573 &mut self,
1574 input: &mut impl Read,
1575 algorithm_version: u8,
1576 output_size: usize,
1577 dictionary_size: usize,
1578 solid: bool,
1579 mut sink: impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1580 ) -> std::result::Result<(), StreamDecodeError<E>> {
1581 if dictionary_size == 0 {
1582 return Err(Error::InvalidData("RAR 5 dictionary size is zero").into());
1583 }
1584 if !solid {
1585 self.reset();
1586 }
1587
1588 let history_limit = dictionary_size.min(STREAM_HISTORY_LIMIT);
1589 if self.history.len() > history_limit {
1590 let discard = self.history.len() - history_limit;
1591 self.history.drain(..discard);
1592 }
1593 let mut output = StreamingOutput::new(
1594 std::mem::take(&mut self.history),
1595 output_size,
1596 dictionary_size,
1597 history_limit,
1598 );
1599
1600 loop {
1601 let block = read_compressed_block(input)?;
1602 let payload = block.payload.as_slice();
1603 let mut payload_bit_pos = 0;
1604 if block.header.has_tables {
1605 let (lengths, table_bits) = read_table_lengths(payload, algorithm_version)?;
1606 self.tables = Some(DecodeTables::from_lengths(&lengths)?);
1607 payload_bit_pos = table_bits;
1608 }
1609 let tables = self
1610 .tables
1611 .take()
1612 .ok_or(Error::InvalidData("RAR 5 block reuses missing tables"))?;
1613 let mut bits = BitReader::new(payload);
1614 bits.bit_pos = payload_bit_pos;
1615
1616 while bits.bit_pos < block.header.payload_bits && output.written() < output_size {
1617 let symbol = tables.main.decode(&mut bits)?;
1618 match symbol {
1619 0..=255 => output.push(symbol as u8, &mut sink)?,
1620 256 => {
1621 return Err(StreamDecodeError::FilteredMember);
1622 }
1623 257 => {
1624 if self.last_length != 0 {
1625 output.copy_match(self.reps[0], self.last_length, &mut sink)?;
1626 }
1627 }
1628 258..=261 => {
1629 let rep_index = symbol - 258;
1630 let distance = self.reps[rep_index];
1631 if distance == 0 {
1632 return Err(Error::InvalidData(
1633 "RAR 5 repeat distance is not initialized",
1634 )
1635 .into());
1636 }
1637 let length_slot = tables.length.decode(&mut bits)?;
1638 let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1639 let length = slot_to_length(length_slot, length_extra)?;
1640 self.reps[..=rep_index].rotate_right(1);
1641 self.reps[0] = distance;
1642 self.last_length = length;
1643 output.copy_match(distance, length, &mut sink)?;
1644 }
1645 262.. => {
1646 let length_slot = symbol - 262;
1647 let length_extra = bits.read_bits(length_slot_extra_bits(length_slot)?)?;
1648 let mut length = slot_to_length(length_slot, length_extra)?;
1649 let distance_slot = tables.distance.decode(&mut bits)?;
1650 let distance_bit_count = distance_slot_bit_count(distance_slot)?;
1651 let distance_extra = if distance_bit_count >= 4 && tables.align_mode {
1652 let high = bits.read_bits((distance_bit_count - 4) as u8)?;
1653 let low = tables.align.decode(&mut bits)? as u32;
1654 (high << 4) | low
1655 } else {
1656 bits.read_bits(distance_bit_count as u8)?
1657 };
1658 let distance = slot_to_distance(distance_slot, distance_extra)?;
1659 length += length_bonus(distance);
1660 self.reps.rotate_right(1);
1661 self.reps[0] = distance;
1662 self.last_length = length;
1663 output.copy_match(distance, length, &mut sink)?;
1664 }
1665 }
1666 }
1667
1668 self.tables = Some(tables);
1669 if block.header.is_last || output.written() >= output_size {
1670 break;
1671 }
1672 }
1673
1674 if output.written() == output_size {
1675 output.finish(&mut sink)?;
1676 self.history = output.into_history();
1677 Ok(())
1678 } else {
1679 Err(Error::NeedMoreInput.into())
1680 }
1681 }
1682
1683 fn reset(&mut self) {
1684 self.tables = None;
1685 self.reps = [0; 4];
1686 self.last_length = 0;
1687 self.history.clear();
1688 }
1689
1690 fn copy_match(
1691 &self,
1692 output: &mut Vec<u8>,
1693 distance: usize,
1694 length: usize,
1695 output_limit: usize,
1696 dictionary_size: usize,
1697 ) -> Result<()> {
1698 if distance > dictionary_size {
1699 return Err(Error::InvalidData(
1700 "RAR 5 match distance exceeds dictionary",
1701 ));
1702 }
1703 if distance == 0 || distance > self.history.len() + output.len() {
1704 return Err(Error::InvalidData("RAR 5 match distance exceeds window"));
1705 }
1706 if output
1707 .len()
1708 .checked_add(length)
1709 .is_none_or(|end| end > output_limit)
1710 {
1711 return Err(Error::InvalidData("RAR 5 match exceeds output limit"));
1712 }
1713 for _ in 0..length {
1714 if distance <= output.len() {
1715 let index = output.len() - distance;
1716 output.push(output[index]);
1717 } else {
1718 let history_distance = distance - output.len();
1719 let index = self.history.len() - history_distance;
1720 output.push(self.history[index]);
1721 }
1722 }
1723 Ok(())
1724 }
1725}
1726
1727struct StreamingOutput {
1728 history: VecDeque<u8>,
1729 pending: Vec<u8>,
1730 written: usize,
1731 output_limit: usize,
1732 dictionary_size: usize,
1733 history_limit: usize,
1734 all_zero: bool,
1735}
1736
1737impl StreamingOutput {
1738 fn new(
1739 history: Vec<u8>,
1740 output_limit: usize,
1741 dictionary_size: usize,
1742 history_limit: usize,
1743 ) -> Self {
1744 Self {
1745 all_zero: history.iter().all(|&byte| byte == 0),
1746 history: history.into(),
1747 pending: Vec::with_capacity(STREAM_FLUSH_THRESHOLD),
1748 written: 0,
1749 output_limit,
1750 dictionary_size,
1751 history_limit,
1752 }
1753 }
1754
1755 fn written(&self) -> usize {
1756 self.written
1757 }
1758
1759 fn push<E>(
1760 &mut self,
1761 byte: u8,
1762 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1763 ) -> std::result::Result<(), StreamDecodeError<E>> {
1764 if self.written >= self.output_limit {
1765 return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1766 }
1767 if byte != 0 {
1768 self.all_zero = false;
1769 }
1770 self.pending.push(byte);
1771 self.written += 1;
1772 if self.pending.len() >= STREAM_FLUSH_THRESHOLD {
1773 self.flush(sink)?;
1774 }
1775 Ok(())
1776 }
1777
1778 fn push_repeated<E>(
1779 &mut self,
1780 byte: u8,
1781 mut count: usize,
1782 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1783 ) -> std::result::Result<(), StreamDecodeError<E>> {
1784 if self
1785 .written
1786 .checked_add(count)
1787 .is_none_or(|end| end > self.output_limit)
1788 {
1789 return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1790 }
1791 if byte != 0 {
1792 self.all_zero = false;
1793 }
1794 while count > 0 {
1795 let available = STREAM_FLUSH_THRESHOLD - self.pending.len();
1796 let take = count.min(available.max(1));
1797 let old_len = self.pending.len();
1798 self.pending.resize(old_len + take, byte);
1799 self.written += take;
1800 count -= take;
1801 if self.pending.len() >= STREAM_FLUSH_THRESHOLD {
1802 self.flush(sink)?;
1803 }
1804 }
1805 Ok(())
1806 }
1807
1808 fn push_zeroes<E>(
1809 &mut self,
1810 count: usize,
1811 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1812 ) -> std::result::Result<(), StreamDecodeError<E>> {
1813 if self
1814 .written
1815 .checked_add(count)
1816 .is_none_or(|end| end > self.output_limit)
1817 {
1818 return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1819 }
1820 self.flush(sink)?;
1821 sink(DecodedChunk::Repeated {
1822 byte: 0,
1823 len: count,
1824 })
1825 .map_err(StreamDecodeError::Sink)?;
1826 self.written += count;
1827 if self.history.is_empty() && self.history_limit != 0 {
1828 self.history.push_back(0);
1829 }
1830 Ok(())
1831 }
1832
1833 fn copy_match<E>(
1834 &mut self,
1835 distance: usize,
1836 length: usize,
1837 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1838 ) -> std::result::Result<(), StreamDecodeError<E>> {
1839 if distance > self.dictionary_size {
1840 return Err(Error::InvalidData("RAR 5 match distance exceeds dictionary").into());
1841 }
1842 if self.all_zero && distance <= self.written + self.history.len() {
1843 return self.push_zeroes(length, sink);
1844 }
1845 if distance == 0 || distance > self.history.len() + self.pending.len() {
1846 return Err(Error::InvalidData("RAR 5 match distance exceeds window").into());
1847 }
1848 if self
1849 .written
1850 .checked_add(length)
1851 .is_none_or(|end| end > self.output_limit)
1852 {
1853 return Err(Error::InvalidData("RAR 5 match exceeds output limit").into());
1854 }
1855 if distance == 1 {
1856 let byte = self.byte_at_distance(1)?;
1857 return self.push_repeated(byte, length, sink);
1858 }
1859 for _ in 0..length {
1860 let byte = self.byte_at_distance(distance)?;
1861 self.push(byte, sink)?;
1862 }
1863 Ok(())
1864 }
1865
1866 fn byte_at_distance(&self, distance: usize) -> Result<u8> {
1867 if distance <= self.pending.len() {
1868 Ok(self.pending[self.pending.len() - distance])
1869 } else {
1870 let history_distance = distance - self.pending.len();
1871 if history_distance > self.history.len() {
1872 return Err(Error::InvalidData("RAR 5 match distance exceeds window"));
1873 }
1874 Ok(*self
1875 .history
1876 .get(self.history.len() - history_distance)
1877 .ok_or(Error::InvalidData("RAR 5 match distance exceeds window"))?)
1878 }
1879 }
1880
1881 fn flush<E>(
1882 &mut self,
1883 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1884 ) -> std::result::Result<(), StreamDecodeError<E>> {
1885 if self.pending.is_empty() {
1886 return Ok(());
1887 }
1888 sink(DecodedChunk::Bytes(&self.pending)).map_err(StreamDecodeError::Sink)?;
1889 self.history.extend(self.pending.iter().copied());
1890 self.pending.clear();
1891 while self.history.len() > self.history_limit {
1892 self.history.pop_front();
1893 }
1894 Ok(())
1895 }
1896
1897 fn finish<E>(
1898 &mut self,
1899 sink: &mut impl FnMut(DecodedChunk<'_>) -> std::result::Result<(), E>,
1900 ) -> std::result::Result<(), StreamDecodeError<E>> {
1901 self.flush(sink)
1902 }
1903
1904 fn into_history(self) -> Vec<u8> {
1905 self.history.into()
1906 }
1907}
1908
1909fn read_compressed_block(input: &mut impl Read) -> Result<OwnedCompressedBlock> {
1910 let mut fixed = [0u8; 2];
1911 input
1912 .read_exact(&mut fixed)
1913 .map_err(|_| Error::NeedMoreInput)?;
1914 let flags = fixed[0];
1915 let checksum = fixed[1];
1916 let size_bytes_len = match (flags >> 3) & 0x03 {
1917 0 => 1,
1918 1 => 2,
1919 2 => 3,
1920 _ => return Err(Error::InvalidData("RAR 5 block size length is invalid")),
1921 };
1922 let mut size_bytes = [0u8; 3];
1923 input
1924 .read_exact(&mut size_bytes[..size_bytes_len])
1925 .map_err(|_| Error::NeedMoreInput)?;
1926
1927 let actual = size_bytes[..size_bytes_len]
1928 .iter()
1929 .fold(checksum ^ flags, |acc, &byte| acc ^ byte);
1930 if actual != 0x5a {
1931 return Err(Error::InvalidData("RAR 5 block header checksum mismatch"));
1932 }
1933
1934 let payload_size = size_bytes[..size_bytes_len]
1935 .iter()
1936 .enumerate()
1937 .fold(0usize, |acc, (index, &byte)| {
1938 acc | (usize::from(byte) << (index * 8))
1939 });
1940 let mut payload = vec![0; payload_size];
1941 input
1942 .read_exact(&mut payload)
1943 .map_err(|_| Error::NeedMoreInput)?;
1944 let final_byte_bits = ((flags & 0x07) + 1).min(8);
1945 let payload_bits = if payload_size == 0 {
1946 0
1947 } else {
1948 (payload_size - 1) * 8 + usize::from(final_byte_bits)
1949 };
1950
1951 Ok(OwnedCompressedBlock {
1952 header: CompressedBlockHeader {
1953 flags,
1954 is_last: flags & 0x40 != 0,
1955 has_tables: flags & 0x80 != 0,
1956 final_byte_bits,
1957 payload_size,
1958 payload_bits,
1959 },
1960 payload,
1961 })
1962}
1963
1964impl Default for Unpack50Decoder {
1965 fn default() -> Self {
1966 Self::new()
1967 }
1968}
1969
1970#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1971struct PendingFilter {
1972 start: usize,
1973 length: usize,
1974 filter_type: FilterType,
1975 channels: usize,
1976}
1977
1978#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1979enum FilterType {
1980 Delta,
1981 E8,
1982 E8E9,
1983 Arm,
1984}
1985
1986fn read_filter(bits: &mut BitReader<'_>, current_pos: usize) -> Result<PendingFilter> {
1987 let offset = read_filter_data(bits)? as usize;
1988 let length = read_filter_data(bits)? as usize;
1989 let filter_type = match bits.read_bits(3)? {
1990 0 => FilterType::Delta,
1991 1 => FilterType::E8,
1992 2 => FilterType::E8E9,
1993 3 => FilterType::Arm,
1994 _ => return Err(Error::InvalidData("RAR 5 filter type is unsupported")),
1995 };
1996 let channels = if filter_type == FilterType::Delta {
1997 bits.read_bits(5)? as usize + 1
1998 } else {
1999 0
2000 };
2001 Ok(PendingFilter {
2002 start: current_pos
2003 .checked_add(offset)
2004 .ok_or(Error::InvalidData("RAR 5 filter start overflows"))?,
2005 length,
2006 filter_type,
2007 channels,
2008 })
2009}
2010
2011fn read_filter_data(bits: &mut BitReader<'_>) -> Result<u32> {
2012 let byte_count = bits.read_bits(2)? as usize + 1;
2013 let mut data = 0;
2014 for index in 0..byte_count {
2015 data |= bits.read_bits(8)? << (index * 8);
2016 }
2017 Ok(data)
2018}
2019
2020fn write_filter(writer: &mut BitWriter, filter: EncodeFilter) -> Result<()> {
2021 if filter.offset > u32::MAX as usize {
2022 return Err(Error::InvalidData("RAR 5 filter offset is too large"));
2023 }
2024 if filter.length > u32::MAX as usize {
2025 return Err(Error::InvalidData("RAR 5 filter length is too large"));
2026 }
2027 write_filter_data(writer, filter.offset as u32);
2028 write_filter_data(writer, filter.length as u32);
2029 match filter.filter_type {
2030 FilterType::Delta => {
2031 if filter.channels == 0 || filter.channels > 32 {
2032 return Err(Error::InvalidData(
2033 "RAR 5 DELTA filter channel count is invalid",
2034 ));
2035 }
2036 writer.write_bits(0, 3);
2037 writer.write_bits(filter.channels - 1, 5);
2038 }
2039 FilterType::E8 => writer.write_bits(1, 3),
2040 FilterType::E8E9 => writer.write_bits(2, 3),
2041 FilterType::Arm => writer.write_bits(3, 3),
2042 }
2043 Ok(())
2044}
2045
2046fn write_filter_data(writer: &mut BitWriter, value: u32) {
2047 let byte_count = if value <= 0xff {
2048 1
2049 } else if value <= 0xffff {
2050 2
2051 } else if value <= 0x00ff_ffff {
2052 3
2053 } else {
2054 4
2055 };
2056 writer.write_bits(byte_count - 1, 2);
2057 for index in 0..byte_count {
2058 writer.write_bits(((value >> (index * 8)) & 0xff) as usize, 8);
2059 }
2060}
2061
2062fn apply_filters(output: &mut [u8], filters: &[PendingFilter]) -> Result<()> {
2063 for filter in filters {
2064 let end = filter
2065 .start
2066 .checked_add(filter.length)
2067 .ok_or(Error::InvalidData("RAR 5 filter range overflows"))?;
2068 let data = output
2069 .get_mut(filter.start..end)
2070 .ok_or(Error::InvalidData("RAR 5 filter range exceeds output"))?;
2071 match filter.filter_type {
2072 FilterType::Delta => {
2073 let decoded = filters::delta_decode(data, filter.channels, rar50_delta_messages())?;
2074 data.copy_from_slice(&decoded);
2075 }
2076 FilterType::E8 => filters::e8e9_decode(data, filter.start as u32, false),
2077 FilterType::E8E9 => filters::e8e9_decode(data, filter.start as u32, true),
2078 FilterType::Arm => arm_decode(data, filter.start as u32),
2079 }
2080 }
2081 Ok(())
2082}
2083
2084fn rar50_delta_messages() -> DeltaErrorMessages {
2085 DeltaErrorMessages {
2086 invalid_channels: "RAR 5 DELTA filter channel count is invalid",
2087 zero_channels: "RAR 5 DELTA filter has zero channels",
2088 truncated_source: "RAR 5 DELTA filter source is truncated",
2089 }
2090}
2091
2092fn arm_decode(data: &mut [u8], file_offset: u32) {
2093 let mut pos = 0usize;
2094 while pos + 3 < data.len() {
2095 if data[pos + 3] == 0xeb {
2096 let mut offset = u32::from(data[pos])
2097 | (u32::from(data[pos + 1]) << 8)
2098 | (u32::from(data[pos + 2]) << 16);
2099 offset = offset.wrapping_sub(file_offset.wrapping_add(pos as u32) / 4);
2100 data[pos] = offset as u8;
2101 data[pos + 1] = (offset >> 8) as u8;
2102 data[pos + 2] = (offset >> 16) as u8;
2103 }
2104 pos += 4;
2105 }
2106}
2107
2108fn arm_encode(data: &mut [u8], file_offset: u32) {
2109 let mut pos = 0usize;
2110 while pos + 3 < data.len() {
2111 if data[pos + 3] == 0xeb {
2112 let mut offset = u32::from(data[pos])
2113 | (u32::from(data[pos + 1]) << 8)
2114 | (u32::from(data[pos + 2]) << 16);
2115 offset = offset.wrapping_add(file_offset.wrapping_add(pos as u32) / 4);
2116 data[pos] = offset as u8;
2117 data[pos + 1] = (offset >> 8) as u8;
2118 data[pos + 2] = (offset >> 16) as u8;
2119 }
2120 pos += 4;
2121 }
2122}
2123
2124fn length_slot_extra_bits(slot: usize) -> Result<u8> {
2125 if slot < 8 {
2126 Ok(0)
2127 } else {
2128 let bit_count = (slot >> 2) - 1;
2129 if bit_count > 24 {
2130 Err(Error::InvalidData("RAR 5 length slot is too large"))
2131 } else {
2132 Ok(bit_count as u8)
2133 }
2134 }
2135}
2136
2137fn length_bonus(distance: usize) -> usize {
2138 usize::from(distance > 0x100) + usize::from(distance > 0x2000) + usize::from(distance > 0x40000)
2139}
2140
2141pub fn slot_to_length(slot: usize, extra_bits: u32) -> Result<usize> {
2142 if slot < 8 {
2143 return Ok(slot + 2);
2144 }
2145 let bit_count = (slot >> 2) - 1;
2146 if bit_count > 24 {
2147 return Err(Error::InvalidData("RAR 5 length slot is too large"));
2148 }
2149 let max_extra = if bit_count == 32 {
2150 u32::MAX
2151 } else {
2152 (1u32 << bit_count) - 1
2153 };
2154 if extra_bits > max_extra {
2155 return Err(Error::InvalidData("RAR 5 length extra bits exceed slot"));
2156 }
2157 Ok((((4 | (slot & 3)) << bit_count) | extra_bits as usize) + 2)
2158}
2159
2160pub fn distance_slot_bit_count(slot: usize) -> Result<usize> {
2161 if slot < 4 {
2162 Ok(0)
2163 } else {
2164 let bit_count = (slot - 2) >> 1;
2165 if bit_count > 31 {
2166 Err(Error::InvalidData("RAR 5 distance slot is too large"))
2167 } else {
2168 Ok(bit_count)
2169 }
2170 }
2171}
2172
2173pub fn slot_to_distance(slot: usize, extra_bits: u32) -> Result<usize> {
2174 if slot < 4 {
2175 return Ok(slot + 1);
2176 }
2177 let bit_count = distance_slot_bit_count(slot)?;
2178 let max_extra = if bit_count == 32 {
2179 u32::MAX
2180 } else {
2181 (1u32 << bit_count) - 1
2182 };
2183 if extra_bits > max_extra {
2184 return Err(Error::InvalidData("RAR 5 distance extra bits exceed slot"));
2185 }
2186 Ok((((2 | (slot & 1)) << bit_count) | extra_bits as usize) + 1)
2187}
2188
2189#[derive(Debug, Clone)]
2190pub struct HuffmanTable {
2191 symbols: Vec<HuffmanSymbol>,
2192 first_code: [u16; 16],
2193 first_index: [usize; 16],
2194 counts: [u16; 16],
2195}
2196
2197#[derive(Debug, Clone)]
2198struct HuffmanSymbol {
2199 code: u16,
2200 len: u8,
2201 symbol: usize,
2202}
2203
2204impl HuffmanTable {
2205 pub fn from_lengths(lengths: &[u8]) -> Result<Self> {
2206 let mut count = [0u16; 16];
2207 for &length in lengths {
2208 if length > 15 {
2209 return Err(Error::InvalidData("RAR 5 Huffman length is too large"));
2210 }
2211 if length != 0 {
2212 count[length as usize] += 1;
2213 }
2214 }
2215 validate_huffman_counts(&count)?;
2216
2217 let mut first_code = [0u16; 16];
2218 let mut next_code = [0u16; 16];
2219 let mut code = 0u16;
2220 for length in 1..=15 {
2221 code = (code + count[length - 1]) << 1;
2222 first_code[length] = code;
2223 next_code[length] = code;
2224 }
2225
2226 let mut first_index = [0usize; 16];
2227 let mut index = 0usize;
2228 for length in 1..=15 {
2229 first_index[length] = index;
2230 index += usize::from(count[length]);
2231 }
2232
2233 let mut symbols = Vec::new();
2234 for (symbol, &length) in lengths.iter().enumerate() {
2235 if length == 0 {
2236 continue;
2237 }
2238 let code = next_code[length as usize];
2239 next_code[length as usize] += 1;
2240 symbols.push(HuffmanSymbol {
2241 code,
2242 len: length,
2243 symbol,
2244 });
2245 }
2246 symbols.sort_by_key(|item| (item.len, item.code, item.symbol));
2247 Ok(Self {
2248 symbols,
2249 first_code,
2250 first_index,
2251 counts: count,
2252 })
2253 }
2254
2255 pub fn is_empty(&self) -> bool {
2256 self.symbols.is_empty()
2257 }
2258
2259 fn decode(&self, bits: &mut BitReader<'_>) -> Result<usize> {
2260 if self.symbols.is_empty() {
2261 return Err(Error::InvalidData("RAR 5 empty Huffman table"));
2262 }
2263 let mut code = 0u16;
2264 for len in 1..=15 {
2265 code = (code << 1) | bits.read_bits(1)? as u16;
2266 let count = self.counts[len];
2267 if count != 0 {
2268 let first = self.first_code[len];
2269 let offset = code.wrapping_sub(first);
2270 if offset < count {
2271 let index = self.first_index[len] + usize::from(offset);
2272 return Ok(self.symbols[index].symbol);
2273 }
2274 }
2275 }
2276 Err(Error::InvalidData("RAR 5 invalid Huffman code"))
2277 }
2278
2279 fn code_for_symbol(&self, symbol: usize) -> Result<(u16, u8)> {
2280 self.symbols
2281 .iter()
2282 .find(|item| item.symbol == symbol)
2283 .map(|item| (item.code, item.len))
2284 .ok_or(Error::InvalidData("RAR 5 missing Huffman symbol"))
2285 }
2286}
2287
2288struct BitReader<'a> {
2289 input: &'a [u8],
2290 bit_pos: usize,
2291}
2292
2293impl<'a> BitReader<'a> {
2294 fn new(input: &'a [u8]) -> Self {
2295 Self { input, bit_pos: 0 }
2296 }
2297
2298 fn read_bits(&mut self, count: u8) -> Result<u32> {
2299 if count > 32 {
2300 return Err(Error::InvalidData("RAR 5 bit read is too wide"));
2301 }
2302 let end = self
2303 .bit_pos
2304 .checked_add(usize::from(count))
2305 .ok_or(Error::NeedMoreInput)?;
2306 if end > self.input.len() * 8 {
2307 return Err(Error::NeedMoreInput);
2308 }
2309
2310 let mut value = 0u32;
2311 let mut remaining = usize::from(count);
2312 while remaining != 0 {
2313 let byte = self.input[self.bit_pos / 8];
2314 let bit_offset = self.bit_pos % 8;
2315 let available = 8 - bit_offset;
2316 let take = available.min(remaining);
2317 let shift = available - take;
2318 let mask = ((1u16 << take) - 1) as u8;
2319 let chunk = (byte >> shift) & mask;
2320 value = (value << take) | u32::from(chunk);
2321 self.bit_pos += take;
2322 remaining -= take;
2323 }
2324
2325 Ok(value)
2326 }
2327}
2328
2329struct BitWriter {
2330 bytes: Vec<u8>,
2331 bit_pos: usize,
2332}
2333
2334impl BitWriter {
2335 fn new() -> Self {
2336 Self {
2337 bytes: Vec::new(),
2338 bit_pos: 0,
2339 }
2340 }
2341
2342 fn write_bits(&mut self, value: usize, count: usize) {
2343 for bit in (0..count).rev() {
2344 if self.bit_pos.is_multiple_of(8) {
2345 self.bytes.push(0);
2346 }
2347 if (value >> bit) & 1 != 0 {
2348 let byte = self.bytes.last_mut().unwrap();
2349 *byte |= 1 << (7 - (self.bit_pos % 8));
2350 }
2351 self.bit_pos += 1;
2352 }
2353 }
2354
2355 fn finish(self) -> Vec<u8> {
2356 self.bytes
2357 }
2358}
2359
2360fn validate_huffman_counts(count: &[u16; 16]) -> Result<()> {
2361 let mut available = 1i32;
2362 for &len_count in count.iter().skip(1) {
2363 available = (available << 1) - i32::from(len_count);
2364 if available < 0 {
2365 return Err(Error::InvalidData("RAR 5 oversubscribed Huffman table"));
2366 }
2367 }
2368 Ok(())
2369}
2370
2371#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2372struct LevelToken {
2373 symbol: usize,
2374 extra_bits: u8,
2375 extra_value: u8,
2376}
2377
2378impl LevelToken {
2379 const fn plain(symbol: usize) -> Self {
2380 Self {
2381 symbol,
2382 extra_bits: 0,
2383 extra_value: 0,
2384 }
2385 }
2386
2387 const fn repeat_previous_short(count: usize) -> Self {
2388 Self {
2389 symbol: 16,
2390 extra_bits: 3,
2391 extra_value: (count - 3) as u8,
2392 }
2393 }
2394
2395 const fn repeat_previous_long(count: usize) -> Self {
2396 Self {
2397 symbol: 17,
2398 extra_bits: 7,
2399 extra_value: (count - 11) as u8,
2400 }
2401 }
2402
2403 const fn zero_run_short(count: usize) -> Self {
2404 Self {
2405 symbol: 18,
2406 extra_bits: 3,
2407 extra_value: (count - 3) as u8,
2408 }
2409 }
2410
2411 const fn zero_run_long(count: usize) -> Self {
2412 Self {
2413 symbol: 19,
2414 extra_bits: 7,
2415 extra_value: (count - 11) as u8,
2416 }
2417 }
2418}
2419
2420fn encode_table_level_tokens(lengths: &[u8]) -> Vec<LevelToken> {
2421 let mut tokens = Vec::new();
2422 let mut pos = 0usize;
2423 let mut previous = None;
2424 while pos < lengths.len() {
2425 let value = lengths[pos];
2426 let mut run = 1usize;
2427 while pos + run < lengths.len() && lengths[pos + run] == value {
2428 run += 1;
2429 }
2430
2431 if value == 0 {
2432 emit_zero_level_run(&mut tokens, run);
2433 previous = Some(0);
2434 pos += run;
2435 continue;
2436 }
2437
2438 if previous == Some(value) && run >= 3 {
2439 emit_repeat_level_run(&mut tokens, run);
2440 pos += run;
2441 continue;
2442 }
2443
2444 tokens.push(LevelToken::plain(value as usize));
2445 previous = Some(value);
2446 pos += 1;
2447 }
2448 tokens
2449}
2450
2451fn emit_repeat_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
2452 while run != 0 {
2453 if run >= 11 {
2454 let mut chunk = run.min(138);
2455 if matches!(run - chunk, 1 | 2) && chunk >= 14 {
2456 chunk -= 3;
2457 }
2458 tokens.push(LevelToken::repeat_previous_long(chunk));
2459 run -= chunk;
2460 } else if run >= 3 {
2461 let chunk = run.min(10);
2462 tokens.push(LevelToken::repeat_previous_short(chunk));
2463 run -= chunk;
2464 } else {
2465 break;
2466 }
2467 }
2468}
2469
2470fn emit_zero_level_run(tokens: &mut Vec<LevelToken>, mut run: usize) {
2471 while run != 0 {
2472 if run >= 11 {
2473 let mut chunk = run.min(138);
2474 if matches!(run - chunk, 1 | 2) && chunk >= 14 {
2475 chunk -= 3;
2476 }
2477 tokens.push(LevelToken::zero_run_long(chunk));
2478 run -= chunk;
2479 } else if run >= 3 {
2480 let chunk = run.min(10);
2481 tokens.push(LevelToken::zero_run_short(chunk));
2482 run -= chunk;
2483 } else {
2484 tokens.extend(std::iter::repeat_n(LevelToken::plain(0), run));
2485 break;
2486 }
2487 }
2488}
2489
2490fn level_code_lengths_for_tokens(tokens: &[LevelToken]) -> [u8; LEVEL_TABLE_SIZE] {
2491 let mut used = [false; LEVEL_TABLE_SIZE];
2492 for token in tokens {
2493 used[token.symbol] = true;
2494 }
2495 let used_count = used.iter().filter(|&&used| used).count();
2496 let len = huffman::bits_for_symbol_count(used_count);
2497 let mut lengths = [0u8; LEVEL_TABLE_SIZE];
2498 for (symbol, is_used) in used.into_iter().enumerate() {
2499 if is_used {
2500 lengths[symbol] = len;
2501 }
2502 }
2503 lengths
2504}
2505
2506fn write_level_lengths(writer: &mut BitWriter, lengths: &[u8; LEVEL_TABLE_SIZE]) {
2507 let mut pos = 0usize;
2508 while pos < LEVEL_TABLE_SIZE {
2509 let length = lengths[pos];
2510 if length == 0 {
2511 let mut count = 1usize;
2512 while pos + count < LEVEL_TABLE_SIZE && lengths[pos + count] == 0 {
2513 count += 1;
2514 }
2515 while count >= 3 {
2516 let chunk = count.min(17);
2517 writer.write_bits(15, 4);
2518 writer.write_bits(chunk - 2, 4);
2519 pos += chunk;
2520 count -= chunk;
2521 }
2522 for _ in 0..count {
2523 writer.write_bits(0, 4);
2524 pos += 1;
2525 }
2526 } else {
2527 writer.write_bits(usize::from(length), 4);
2528 if length == 15 {
2529 writer.write_bits(0, 4);
2530 }
2531 pos += 1;
2532 }
2533 }
2534}
2535
2536#[cfg(test)]
2537mod tests {
2538 use super::*;
2539
2540 fn checksum(flags: u8, size_bytes: &[u8]) -> u8 {
2541 size_bytes
2542 .iter()
2543 .fold(0x5a ^ flags, |acc, &byte| acc ^ byte)
2544 }
2545
2546 #[test]
2547 fn parses_one_byte_size_block_header() {
2548 let flags = 0xc7;
2549 let size = [3];
2550 let input = [flags, checksum(flags, &size), size[0], 0xaa, 0xbb, 0xcc];
2551
2552 let block = parse_compressed_block(&input).unwrap();
2553 assert_eq!(block.header_len, 3);
2554 assert_eq!(block.payload, 3..6);
2555 assert_eq!(block.header.flags, flags);
2556 assert!(block.header.is_last);
2557 assert!(block.header.has_tables);
2558 assert_eq!(block.header.final_byte_bits, 8);
2559 assert_eq!(block.header.payload_size, 3);
2560 assert_eq!(block.header.payload_bits, 24);
2561 }
2562
2563 #[test]
2564 fn parses_three_byte_size_block_header_with_partial_final_byte() {
2565 let flags = 0x94;
2566 let size = [0x34, 0x12, 0x00];
2567 let mut input = vec![flags, checksum(flags, &size), size[0], size[1], size[2]];
2568 input.resize(0x1234 + 5, 0);
2569
2570 let block = parse_compressed_block(&input).unwrap();
2571 assert_eq!(block.header_len, 5);
2572 assert_eq!(block.payload, 5..0x1239);
2573 assert!(!block.header.is_last);
2574 assert!(block.header.has_tables);
2575 assert_eq!(block.header.final_byte_bits, 5);
2576 assert_eq!(block.header.payload_size, 0x1234);
2577 assert_eq!(block.header.payload_bits, (0x1234 - 1) * 8 + 5);
2578 }
2579
2580 #[test]
2581 fn rejects_reserved_size_length_selector() {
2582 let input = [0x18, 0x42, 0x00];
2583
2584 assert_eq!(
2585 parse_compressed_block(&input),
2586 Err(Error::InvalidData("RAR 5 block size length is invalid"))
2587 );
2588 }
2589
2590 #[test]
2591 fn rejects_bad_block_header_checksum() {
2592 let input = [0xc7, 0x00, 0x03, 0xaa, 0xbb, 0xcc];
2593
2594 assert_eq!(
2595 parse_compressed_block(&input),
2596 Err(Error::InvalidData("RAR 5 block header checksum mismatch"))
2597 );
2598 }
2599
2600 #[test]
2601 fn rejects_truncated_block_payload() {
2602 let flags = 0xc7;
2603 let size = [3];
2604 let input = [flags, checksum(flags, &size), size[0], 0xaa, 0xbb];
2605
2606 assert_eq!(parse_compressed_block(&input), Err(Error::NeedMoreInput));
2607 }
2608
2609 #[test]
2610 fn reads_level_lengths_with_literal_fifteen() {
2611 let mut nibbles = vec![1, 2, 15, 0, 3, 4];
2612 nibbles.resize(LEVEL_TABLE_SIZE + 1, 0);
2613
2614 let (lengths, bits) = read_level_lengths(&pack_nibbles(&nibbles)).unwrap();
2615
2616 assert_eq!(&lengths[..6], &[1, 2, 15, 3, 4, 0]);
2617 assert_eq!(bits, LEVEL_TABLE_SIZE * 4 + 4);
2618 }
2619
2620 #[test]
2621 fn reads_level_lengths_with_zero_run_at_current_position() {
2622 let mut nibbles = vec![7, 15, 3, 2];
2623 nibbles.resize(LEVEL_TABLE_SIZE - 3, 0);
2624
2625 let (lengths, bits) = read_level_lengths(&pack_nibbles(&nibbles)).unwrap();
2626
2627 assert_eq!(lengths[0], 7);
2628 assert_eq!(&lengths[1..6], &[0, 0, 0, 0, 0]);
2629 assert_eq!(lengths[6], 2);
2630 assert_eq!(bits, (LEVEL_TABLE_SIZE - 3) * 4);
2631 }
2632
2633 fn pack_nibbles(nibbles: &[u8]) -> Vec<u8> {
2634 nibbles
2635 .chunks(2)
2636 .map(|chunk| {
2637 let high = chunk[0] & 0x0f;
2638 let low = chunk.get(1).copied().unwrap_or(0) & 0x0f;
2639 (high << 4) | low
2640 })
2641 .collect()
2642 }
2643
2644 #[test]
2645 fn reads_rar50_second_level_table_lengths() {
2646 let mut writer = BitWriter::new();
2647 for _ in 0..LEVEL_TABLE_SIZE {
2648 writer.write_bits(5, 4);
2649 }
2650 for count in [138, 138, 138, 16] {
2651 writer.write_bits(19, 5);
2652 writer.write_bits(count - 11, 7);
2653 }
2654 let input = writer.finish();
2655
2656 let (lengths, bits) = read_table_lengths(&input, 0).unwrap();
2657
2658 assert_eq!(lengths.main.len(), MAIN_TABLE_SIZE);
2659 assert_eq!(lengths.distance.len(), DISTANCE_TABLE_SIZE_50);
2660 assert_eq!(lengths.align.len(), ALIGN_TABLE_SIZE);
2661 assert_eq!(lengths.length.len(), LENGTH_TABLE_SIZE);
2662 assert!(lengths.main.iter().all(|&length| length == 0));
2663 assert!(lengths.distance.iter().all(|&length| length == 0));
2664 assert!(lengths.align.iter().all(|&length| length == 0));
2665 assert!(lengths.length.iter().all(|&length| length == 0));
2666 assert_eq!(bits, LEVEL_TABLE_SIZE * 4 + 4 * (5 + 7));
2667 }
2668
2669 #[test]
2670 fn reads_rar70_table_length_count() {
2671 assert_eq!(
2672 table_length_count(1).unwrap(),
2673 MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_70 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE
2674 );
2675 }
2676
2677 #[test]
2678 fn encoded_table_lengths_round_trip_with_bit_count() {
2679 let mut lengths = TableLengths {
2680 main: vec![0; MAIN_TABLE_SIZE],
2681 distance: vec![0; DISTANCE_TABLE_SIZE_50],
2682 align: vec![0; ALIGN_TABLE_SIZE],
2683 length: vec![0; LENGTH_TABLE_SIZE],
2684 };
2685 lengths.main[b'A' as usize] = 1;
2686 lengths.main[b'B' as usize] = 3;
2687 lengths.main[262] = 3;
2688 lengths.distance[1] = 1;
2689 lengths.align[0] = 4;
2690 lengths.length[0] = 1;
2691
2692 let (encoded, bit_count) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
2693 let (decoded, decoded_bits) = read_table_lengths(&encoded, 0).unwrap();
2694
2695 assert_eq!(decoded, lengths);
2696 assert_eq!(decoded_bits, bit_count);
2697 }
2698
2699 #[test]
2700 fn table_level_encoder_uses_rar5_run_symbols() {
2701 let mut lengths =
2702 vec![
2703 0u8;
2704 MAIN_TABLE_SIZE + DISTANCE_TABLE_SIZE_50 + ALIGN_TABLE_SIZE + LENGTH_TABLE_SIZE
2705 ];
2706 lengths[..4].fill(6);
2707 lengths[8..21].fill(0);
2708
2709 let tokens = encode_table_level_tokens(&lengths);
2710
2711 assert!(tokens.contains(&LevelToken::repeat_previous_short(3)));
2712 assert!(tokens.iter().any(|token| token.symbol == 19));
2713 }
2714
2715 #[test]
2716 fn encoded_compressed_block_round_trips_header_fields() {
2717 let payload = [0xaa, 0xbb, 0xc0];
2718 let block = encode_compressed_block(&payload, 18, true, true).unwrap();
2719
2720 let parsed = parse_compressed_block(&block).unwrap();
2721
2722 assert_eq!(parsed.payload, 3..6);
2723 assert!(parsed.header.has_tables);
2724 assert!(parsed.header.is_last);
2725 assert_eq!(parsed.header.final_byte_bits, 2);
2726 assert_eq!(parsed.header.payload_bits, 18);
2727 assert_eq!(&block[parsed.payload], payload);
2728 }
2729
2730 #[test]
2731 fn rejects_table_repeat_without_previous_length() {
2732 let mut writer = BitWriter::new();
2733 for _ in 0..LEVEL_TABLE_SIZE {
2734 writer.write_bits(5, 4);
2735 }
2736 writer.write_bits(16, 5);
2737 writer.write_bits(0, 3);
2738
2739 assert_eq!(
2740 read_table_lengths(&writer.finish(), 0),
2741 Err(Error::InvalidData(
2742 "RAR 5 table repeats missing previous length"
2743 ))
2744 );
2745 }
2746
2747 #[test]
2748 fn rejects_invalid_encoded_block_bit_counts() {
2749 assert_eq!(
2750 encode_compressed_block(&[0], 0, true, true),
2751 Err(Error::InvalidData("RAR 5 block has unused payload bytes"))
2752 );
2753 assert_eq!(
2754 encode_compressed_block(&[], 1, true, true),
2755 Err(Error::InvalidData("RAR 5 block bit count exceeds payload"))
2756 );
2757 }
2758
2759 #[test]
2760 fn builds_named_decode_tables_from_lengths() {
2761 let lengths = TableLengths {
2762 main: vec![1, 1],
2763 distance: vec![1, 1],
2764 align: vec![4; ALIGN_TABLE_SIZE],
2765 length: vec![1, 1],
2766 };
2767
2768 let tables = DecodeTables::from_lengths(&lengths).unwrap();
2769
2770 assert!(!tables.main.is_empty());
2771 assert!(!tables.distance.is_empty());
2772 assert!(!tables.align.is_empty());
2773 assert!(!tables.length.is_empty());
2774 assert!(!tables.align_mode);
2775 }
2776
2777 #[test]
2778 fn rejects_oversubscribed_rar50_huffman_tables() {
2779 assert!(matches!(
2780 HuffmanTable::from_lengths(&[1, 1, 1]),
2781 Err(Error::InvalidData("RAR 5 oversubscribed Huffman table"))
2782 ));
2783 }
2784
2785 #[test]
2786 fn detects_rar50_align_mode_when_align_lengths_are_not_uniform_four() {
2787 let mut align = vec![4; ALIGN_TABLE_SIZE];
2788 align[0] = 0;
2789 align[3] = 3;
2790 let lengths = TableLengths {
2791 main: vec![1, 1],
2792 distance: vec![1, 1],
2793 align,
2794 length: vec![1, 1],
2795 };
2796
2797 let tables = DecodeTables::from_lengths(&lengths).unwrap();
2798
2799 assert!(tables.align_mode);
2800 }
2801
2802 #[test]
2803 fn decodes_synthetic_literal_only_block() {
2804 let payload = literal_only_payload(b"ABBA");
2805 let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
2806
2807 let output = decode_literal_only(&input, 0, 4).unwrap();
2808
2809 assert_eq!(output, b"ABBA");
2810 }
2811
2812 #[test]
2813 fn encodes_literal_only_member_that_decoder_reads() {
2814 let data = b"literal-only RAR5 codec stream\nwith repeated words words words";
2815 let input = encode_literal_only(data, 0).unwrap();
2816
2817 let output = decode_literal_only(&input, 0, data.len()).unwrap();
2818
2819 assert_eq!(output, data);
2820 }
2821
2822 #[test]
2823 fn encodes_literal_only_rar70_table_shape_that_decoder_reads() {
2824 let data = b"small RAR7-compatible literal block";
2825 let input = encode_literal_only(data, 1).unwrap();
2826
2827 let output = decode_literal_only(&input, 1, data.len()).unwrap();
2828
2829 assert_eq!(output, data);
2830 }
2831
2832 #[test]
2833 fn encodes_empty_literal_only_member() {
2834 let input = encode_literal_only(b"", 0).unwrap();
2835
2836 let output = decode_literal_only(&input, 0, 0).unwrap();
2837
2838 assert!(output.is_empty());
2839 }
2840
2841 #[test]
2842 fn encodes_lz_member_with_same_member_matches() {
2843 let data = b"RAR5 match writer phrase. RAR5 match writer phrase. RAR5 match writer phrase.";
2844 let lz = encode_lz_member(data, 0).unwrap();
2845 let literal = encode_literal_only(data, 0).unwrap();
2846
2847 let output = decode_lz(&lz, 0, data.len()).unwrap();
2848
2849 assert_eq!(output, data);
2850 assert!(lz.len() < literal.len());
2851 assert!(
2852 encode_tokens(data, &[], EncodeOptions::default(), DISTANCE_TABLE_SIZE_50)
2853 .iter()
2854 .any(|token| matches!(token, EncodeToken::Match { .. }))
2855 );
2856 }
2857
2858 #[test]
2859 fn frequency_weighted_huffman_lengths_shorten_common_symbols() {
2860 let mut frequencies = vec![1usize; 24];
2861 frequencies[3] = 1024;
2862
2863 let lengths = huffman::lengths_for_frequencies(&frequencies, 15);
2864
2865 assert!(lengths[3] < lengths[0]);
2866 assert!(lengths.iter().all(|&length| length <= 15));
2867 }
2868
2869 #[test]
2870 fn lz_encoder_uses_frequency_weighted_huffman_lengths() {
2871 let mut data = vec![b'a'; 200];
2872 data.extend_from_slice(b"bcdefghijklmnopqrstuvwxyz");
2873 let input = encode_lz_member_with_options(&data, 0, EncodeOptions::new(0)).unwrap();
2874 let block = parse_compressed_block(&input).unwrap();
2875 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
2876
2877 let output = decode_lz(&input, 0, data.len()).unwrap();
2878
2879 assert_eq!(output, data);
2880 assert!(lengths.main[b'a' as usize] < lengths.main[b'z' as usize]);
2881 }
2882
2883 #[test]
2884 fn lazy_lz_parser_defers_short_match_for_longer_next_match() {
2885 let input = b"abcdXbcdYYYYYYYYYYYYabcdYYYYYYYYYYYY";
2886 let greedy = encode_tokens(
2887 input,
2888 &[],
2889 EncodeOptions::new(MAX_MATCH_CANDIDATES),
2890 DISTANCE_TABLE_SIZE_50,
2891 );
2892 let lazy = encode_tokens(
2893 input,
2894 &[],
2895 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
2896 DISTANCE_TABLE_SIZE_50,
2897 );
2898 let packed = encode_lz_member_with_options(
2899 input,
2900 0,
2901 EncodeOptions::new(MAX_MATCH_CANDIDATES).with_lazy_matching(true),
2902 )
2903 .unwrap();
2904
2905 assert!(greedy
2906 .iter()
2907 .any(|token| matches!(token, EncodeToken::Match { length: 4, .. })));
2908 assert!(lazy
2909 .iter()
2910 .any(|token| matches!(token, EncodeToken::Match { length, .. } if *length > 8)));
2911 assert_eq!(decode_lz(&packed, 0, input.len()).unwrap(), input);
2912 }
2913
2914 #[test]
2915 fn cost_aware_match_selection_prefers_repeat_distance_token() {
2916 let pos = 64;
2917 let pattern = b"abcdefgh";
2918 let mut input: Vec<u8> = (0..96u8).map(|byte| byte.wrapping_mul(37)).collect();
2919 input[pos - 30..pos - 22].copy_from_slice(pattern);
2920 input[pos - 10..pos - 2].copy_from_slice(pattern);
2921 input[pos..pos + 8].copy_from_slice(pattern);
2922 input[pos + 8] = b'X';
2923
2924 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
2925 for candidate in 0..pos {
2926 insert_match_position(&input, candidate, &mut buckets);
2927 }
2928 let state = EncoderMatchState {
2929 reps: [30, 0, 0, 0],
2930 last_length: 8,
2931 };
2932
2933 let best = best_match(
2934 &input,
2935 pos,
2936 input.len(),
2937 &buckets,
2938 EncodeOptions::default(),
2939 &state,
2940 DISTANCE_TABLE_SIZE_50,
2941 )
2942 .unwrap();
2943
2944 assert_eq!((best.length, best.distance), (8, 30));
2945 }
2946
2947 #[test]
2948 fn lazy_parser_uses_match_cost_not_only_match_length() {
2949 let pos = 600;
2950 let mut input: Vec<u8> = (0..700u16)
2951 .map(|value| value.wrapping_mul(73) as u8)
2952 .collect();
2953 input[pos - 512..pos - 504].copy_from_slice(b"ABCDEFGH");
2954 input[pos - 504] = b'Z';
2955 input[pos - 29..pos - 21].copy_from_slice(b"BCDEFGHI");
2956 input[pos - 30] = b'x';
2957 input[pos..pos + 9].copy_from_slice(b"ABCDEFGHI");
2958
2959 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
2960 for candidate in 0..pos {
2961 insert_match_position(&input, candidate, &mut buckets);
2962 }
2963 let state = EncoderMatchState {
2964 reps: [30, 0, 0, 0],
2965 last_length: 8,
2966 };
2967 let current = best_match(
2968 &input,
2969 pos,
2970 input.len(),
2971 &buckets,
2972 EncodeOptions::default(),
2973 &state,
2974 DISTANCE_TABLE_SIZE_50,
2975 )
2976 .unwrap();
2977
2978 assert_eq!((current.length, current.distance), (8, 512));
2979 assert!(should_lazy_emit_literal(
2980 &input,
2981 pos,
2982 &buckets,
2983 EncodeOptions::default().with_lazy_matching(true),
2984 &state,
2985 DISTANCE_TABLE_SIZE_50,
2986 current,
2987 ));
2988 }
2989
2990 #[test]
2991 fn lazy_parser_uses_bounded_cost_lookahead() {
2992 let pos = 160;
2993 let mut input: Vec<u8> = (0..240u16)
2994 .map(|value| value.wrapping_mul(91) as u8)
2995 .collect();
2996 input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
2997 input[pos - 80..pos - 70].copy_from_slice(b"CDEFGHIJKL");
2998 input[pos..pos + 12].copy_from_slice(b"ABCDEFGHIJKL");
2999
3000 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3001 for candidate in 0..pos {
3002 insert_match_position(&input, candidate, &mut buckets);
3003 }
3004 let state = EncoderMatchState::default();
3005 let current = best_match(
3006 &input,
3007 pos,
3008 input.len(),
3009 &buckets,
3010 EncodeOptions::default(),
3011 &state,
3012 DISTANCE_TABLE_SIZE_50,
3013 )
3014 .unwrap();
3015
3016 assert_eq!((current.length, current.distance), (8, 30));
3017 assert!(!should_lazy_emit_literal(
3018 &input,
3019 pos,
3020 &buckets,
3021 EncodeOptions::default()
3022 .with_lazy_matching(true)
3023 .with_lazy_lookahead(1),
3024 &state,
3025 DISTANCE_TABLE_SIZE_50,
3026 current,
3027 ));
3028 assert!(should_lazy_emit_literal(
3029 &input,
3030 pos,
3031 &buckets,
3032 EncodeOptions::default()
3033 .with_lazy_matching(true)
3034 .with_lazy_lookahead(2),
3035 &state,
3036 DISTANCE_TABLE_SIZE_50,
3037 current,
3038 ));
3039 }
3040
3041 #[test]
3042 fn lazy_parser_charges_for_skipped_literals() {
3043 let pos = 160;
3044 let mut input: Vec<u8> = (0..240u16)
3045 .map(|value| value.wrapping_mul(91) as u8)
3046 .collect();
3047 input[pos - 30..pos - 22].copy_from_slice(b"ABCDEFGH");
3048 input[pos - 80..pos - 71].copy_from_slice(b"CDEFGHIJK");
3049 input[pos..pos + 12].copy_from_slice(b"ABCDEFGHIJKL");
3050
3051 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
3052 for candidate in 0..pos {
3053 insert_match_position(&input, candidate, &mut buckets);
3054 }
3055 let state = EncoderMatchState::default();
3056 let current = best_match(
3057 &input,
3058 pos,
3059 input.len(),
3060 &buckets,
3061 EncodeOptions::default(),
3062 &state,
3063 DISTANCE_TABLE_SIZE_50,
3064 )
3065 .unwrap();
3066
3067 let next = best_match(
3068 &input,
3069 pos + 2,
3070 input.len(),
3071 &buckets,
3072 EncodeOptions::default(),
3073 &state,
3074 DISTANCE_TABLE_SIZE_50,
3075 )
3076 .unwrap();
3077
3078 assert!(next.score > current.score);
3079 assert!(next.score <= current.score + 16);
3080 assert!(!should_lazy_emit_literal(
3081 &input,
3082 pos,
3083 &buckets,
3084 EncodeOptions::default()
3085 .with_lazy_matching(true)
3086 .with_lazy_lookahead(2),
3087 &state,
3088 DISTANCE_TABLE_SIZE_50,
3089 current,
3090 ));
3091 }
3092
3093 fn encode_lz_member_with_filter(data: &[u8], kind: Rar50FilterKind) -> Result<Vec<u8>> {
3094 Unpack50Encoder::new().encode_member_with_filter(data, 0, Rar50FilterSpec::new(kind))
3095 }
3096
3097 #[test]
3098 fn encodes_lz_member_with_delta_filter_record() {
3099 let data: Vec<u8> = (0..96).map(|index| (index * 7 + index / 3) as u8).collect();
3100 let input =
3101 encode_lz_member_with_filter(&data, Rar50FilterKind::Delta { channels: 3 }).unwrap();
3102 let block = parse_compressed_block(&input).unwrap();
3103 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3104
3105 let output = decode_lz(&input, 0, data.len()).unwrap();
3106
3107 assert_eq!(output, data);
3108 assert_ne!(lengths.main[256], 0);
3109 }
3110
3111 #[test]
3112 fn rejects_invalid_delta_filter_channel_count() {
3113 assert_eq!(
3114 encode_lz_member_with_filter(b"abc", Rar50FilterKind::Delta { channels: 0 }),
3115 Err(Error::InvalidData(
3116 "RAR 5 DELTA filter channel count is invalid"
3117 ))
3118 );
3119 assert_eq!(
3120 encode_lz_member_with_filter(b"abc", Rar50FilterKind::Delta { channels: 33 }),
3121 Err(Error::InvalidData(
3122 "RAR 5 DELTA filter channel count is invalid"
3123 ))
3124 );
3125 }
3126
3127 #[test]
3128 fn encodes_lz_member_with_e8_filter_record() {
3129 let mut data = b"\xe8\0\0\0\0plain text after call".to_vec();
3130 data.extend_from_slice(&[0xe8, 3, 0, 0, 0, b'X']);
3131 let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8).unwrap();
3132 let block = parse_compressed_block(&input).unwrap();
3133 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3134
3135 let output = decode_lz(&input, 0, data.len()).unwrap();
3136
3137 assert_eq!(output, data);
3138 assert_ne!(lengths.main[256], 0);
3139 }
3140
3141 #[test]
3142 fn streaming_decode_reports_filtered_member_with_typed_sentinel() {
3143 let data = b"\xe8\0\0\0\0plain text after call".to_vec();
3144 let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8).unwrap();
3145 let mut reader = input.as_slice();
3146 let mut decoder = Unpack50Decoder::new();
3147
3148 let error = decoder
3149 .decode_member_from_reader_with_dictionary_to_sink(
3150 &mut reader,
3151 0,
3152 data.len(),
3153 128 * 1024,
3154 false,
3155 |_chunk| Ok::<_, std::convert::Infallible>(()),
3156 )
3157 .unwrap_err();
3158
3159 assert!(matches!(error, StreamDecodeError::FilteredMember));
3160 }
3161
3162 #[test]
3163 fn encodes_lz_member_with_e8e9_filter_record() {
3164 let data = b"\xe9\0\0\0\0jump target through e9".to_vec();
3165 let input = encode_lz_member_with_filter(&data, Rar50FilterKind::E8E9).unwrap();
3166 let block = parse_compressed_block(&input).unwrap();
3167 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3168
3169 let output = decode_lz(&input, 0, data.len()).unwrap();
3170
3171 assert_eq!(output, data);
3172 assert_ne!(lengths.main[256], 0);
3173 }
3174
3175 #[test]
3176 fn encodes_lz_member_with_ranged_e8e9_filter_record() {
3177 let mut data = b"\xe8\0\0\0\0plain prefix outside filter range".to_vec();
3178 let range_start = data.len();
3179 for _ in 0..16 {
3180 let operand_pos = data.len() + 1;
3181 data.push(0xe8);
3182 let relative = 0x7000u32.wrapping_sub(operand_pos as u32);
3183 data.extend_from_slice(&relative.to_le_bytes());
3184 data.extend_from_slice(b" code ");
3185 }
3186 let range = range_start..data.len();
3187 data.extend_from_slice(b"\xe9\0\0\0\0plain suffix outside filter range");
3188
3189 let input = Unpack50Encoder::new()
3190 .encode_member_with_filter(
3191 &data,
3192 0,
3193 Rar50FilterSpec::range(Rar50FilterKind::E8E9, range),
3194 )
3195 .unwrap();
3196 let block = parse_compressed_block(&input).unwrap();
3197 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3198
3199 let output = decode_lz(&input, 0, data.len()).unwrap();
3200
3201 assert_eq!(output, data);
3202 assert_ne!(lengths.main[256], 0);
3203 }
3204
3205 #[test]
3206 fn encodes_lz_member_with_multiple_filter_records() {
3207 let mut data = b"\xe8\0\0\0\0plain prefix outside filters".to_vec();
3208 let first_start = data.len();
3209 data.extend_from_slice(b"\xe8\0\0\0\0first filtered cluster");
3210 let first_end = data.len();
3211 data.extend_from_slice(b"large plain middle outside filters");
3212 let second_start = data.len();
3213 data.extend_from_slice(b"\xe8\0\0\0\0second filtered cluster");
3214 let second_end = data.len();
3215
3216 let input = Unpack50Encoder::new()
3217 .encode_member_with_filters(
3218 &data,
3219 0,
3220 &[
3221 Rar50FilterSpec::range(Rar50FilterKind::E8, first_start..first_end),
3222 Rar50FilterSpec::range(Rar50FilterKind::E8, second_start..second_end),
3223 ],
3224 )
3225 .unwrap();
3226 let block = parse_compressed_block(&input).unwrap();
3227 let (lengths, table_bits) = read_table_lengths(&input[block.payload.clone()], 0).unwrap();
3228 let tables = DecodeTables::from_lengths(&lengths).unwrap();
3229 let mut bits = BitReader {
3230 input: &input[block.payload],
3231 bit_pos: table_bits,
3232 };
3233 assert_eq!(tables.main.decode(&mut bits).unwrap(), 256);
3234 let first = read_filter(&mut bits, 0).unwrap();
3235 assert_eq!(tables.main.decode(&mut bits).unwrap(), 256);
3236 let second = read_filter(&mut bits, 0).unwrap();
3237
3238 let output = decode_lz(&input, 0, data.len()).unwrap();
3239
3240 assert_eq!(output, data);
3241 assert_eq!(first.start, first_start);
3242 assert_eq!(second.start, second_start);
3243 }
3244
3245 #[test]
3246 fn encodes_lz_member_with_arm_filter_record() {
3247 let data = [0x04, 0x00, 0x00, 0xeb, b'A', b'R', b'M', b'!'];
3248 let input = encode_lz_member_with_filter(&data, Rar50FilterKind::Arm).unwrap();
3249 let block = parse_compressed_block(&input).unwrap();
3250 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3251
3252 let output = decode_lz(&input, 0, data.len()).unwrap();
3253
3254 assert_eq!(output, data);
3255 assert_ne!(lengths.main[256], 0);
3256 }
3257
3258 #[test]
3259 fn arm_filter_uses_wrapping_address_arithmetic_at_u32_boundary() {
3260 let original = [0x04, 0x00, 0x00, 0xeb, 0x08, 0x00, 0x00, 0xeb];
3261 let mut filtered = original;
3262
3263 arm_encode(&mut filtered, u32::MAX - 3);
3264 assert_ne!(filtered, original);
3265 arm_decode(&mut filtered, u32::MAX - 3);
3266
3267 assert_eq!(filtered, original);
3268 }
3269
3270 #[test]
3271 fn solid_encoder_emits_rar50_matches_against_previous_member_history() {
3272 let first = b"RAR5 solid shared phrase alpha beta gamma\n".repeat(16);
3273 let second = b"RAR5 solid shared phrase alpha beta gamma\nsecond\n".repeat(4);
3274 let solid = encode_lz_member_with_history(&second, &first, 0).unwrap();
3275 let standalone = encode_lz_member(&second, 0).unwrap();
3276 let mut decoder = Unpack50Decoder::new();
3277
3278 assert_eq!(
3279 decoder
3280 .decode_member(
3281 &encode_lz_member(&first, 0).unwrap(),
3282 0,
3283 first.len(),
3284 false,
3285 DecodeMode::Lz
3286 )
3287 .unwrap(),
3288 first
3289 );
3290 assert_eq!(
3291 decoder
3292 .decode_member(&solid, 0, second.len(), true, DecodeMode::Lz)
3293 .unwrap(),
3294 second
3295 );
3296 assert!(solid.len() < standalone.len());
3297 }
3298
3299 #[test]
3300 fn large_lz_members_are_split_into_multiple_compressed_blocks() {
3301 let data = vec![0u8; MAX_COMPRESSED_BLOCK_OUTPUT + 1];
3302 let encoded = encode_lz_member_with_options(&data, 0, EncodeOptions::new(16)).unwrap();
3303 let mut cursor = std::io::Cursor::new(encoded.as_slice());
3304 let first = read_compressed_block(&mut cursor).unwrap();
3305 let second = read_compressed_block(&mut cursor).unwrap();
3306 let mut decoder = Unpack50Decoder::new();
3307
3308 assert!(!first.header.is_last);
3309 assert!(second.header.is_last);
3310 assert_eq!(
3311 decoder
3312 .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3313 .unwrap(),
3314 data
3315 );
3316 }
3317
3318 #[test]
3319 fn large_filtered_lz_members_split_filter_records_by_block() {
3320 let mut data: Vec<_> = (0..MAX_COMPRESSED_BLOCK_OUTPUT + 512)
3321 .map(|index| index as u8)
3322 .collect();
3323 data[256] = 0xe8;
3324 data[257..261].copy_from_slice(&0x20u32.to_le_bytes());
3325 data[MAX_COMPRESSED_BLOCK_OUTPUT + 64] = 0xe8;
3326 data[MAX_COMPRESSED_BLOCK_OUTPUT + 65..MAX_COMPRESSED_BLOCK_OUTPUT + 69]
3327 .copy_from_slice(&0x40u32.to_le_bytes());
3328
3329 let encoded = Unpack50Encoder::with_options(EncodeOptions::new(0))
3330 .encode_member_with_filter(
3331 &data,
3332 0,
3333 Rar50FilterSpec::range(Rar50FilterKind::E8, 0..data.len()),
3334 )
3335 .unwrap();
3336 let mut cursor = std::io::Cursor::new(encoded.as_slice());
3337 let first = read_compressed_block(&mut cursor).unwrap();
3338 let mut blocks = 1usize;
3339 let mut last_is_last = first.header.is_last;
3340 while cursor.position() < encoded.len() as u64 {
3341 last_is_last = read_compressed_block(&mut cursor).unwrap().header.is_last;
3342 blocks += 1;
3343 }
3344 let mut decoder = Unpack50Decoder::new();
3345
3346 assert!(!first.header.is_last);
3347 assert!(last_is_last);
3348 assert!(blocks > 2);
3349 assert_eq!(
3350 decoder
3351 .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3352 .unwrap(),
3353 data
3354 );
3355 }
3356
3357 #[test]
3358 fn filters_are_split_before_rar_reader_filter_limit() {
3359 let data = vec![0u8; MAX_FILTER_BLOCK_LENGTH + 1];
3360 let encoded = Unpack50Encoder::with_options(
3361 EncodeOptions::new(0).with_max_match_distance(128 * 1024),
3362 )
3363 .encode_member_with_filter(
3364 &data,
3365 0,
3366 Rar50FilterSpec::new(Rar50FilterKind::Delta { channels: 4 }),
3367 )
3368 .unwrap();
3369 let mut cursor = std::io::Cursor::new(encoded.as_slice());
3370 let first = read_compressed_block(&mut cursor).unwrap();
3371 let second = read_compressed_block(&mut cursor).unwrap();
3372 let mut decoder = Unpack50Decoder::new();
3373
3374 assert!(!first.header.is_last);
3375 assert!(second.header.is_last);
3376 assert_eq!(
3377 decoder
3378 .decode_member(&encoded, 0, data.len(), false, DecodeMode::Lz)
3379 .unwrap(),
3380 data
3381 );
3382 }
3383
3384 #[test]
3385 fn solid_encoder_history_limit_follows_encode_options_dictionary() {
3386 let mut encoder = Unpack50Encoder::with_options(
3387 EncodeOptions::new(0).with_max_match_distance(DEFAULT_DICTIONARY_SIZE + 1024),
3388 );
3389 encoder.remember(&vec![0x41; DEFAULT_DICTIONARY_SIZE + 512]);
3390
3391 assert_eq!(encoder.history.len(), DEFAULT_DICTIONARY_SIZE + 512);
3392
3393 let mut capped =
3394 Unpack50Encoder::with_options(EncodeOptions::new(0).with_max_match_distance(1024));
3395 capped.remember(&vec![0x42; 4096]);
3396
3397 assert_eq!(capped.history.len(), 1024);
3398 }
3399
3400 #[test]
3401 fn encodes_lz_member_with_last_length_repeat_symbols() {
3402 let data = b"abcdXabcdYabcdZabcd";
3403 let input = encode_lz_member(data, 0).unwrap();
3404 let block = parse_compressed_block(&input).unwrap();
3405 let (lengths, _) = read_table_lengths(&input[block.payload], 0).unwrap();
3406
3407 let output = decode_lz(&input, 0, data.len()).unwrap();
3408
3409 assert_eq!(output, data);
3410 assert_ne!(lengths.main[257], 0);
3411 }
3412
3413 #[test]
3414 fn encodes_lz_member_using_rar70_distance_table_shape() {
3415 let data = b"RAR7-compatible repeated phrase repeated phrase repeated phrase";
3416 let input = encode_lz_member(data, 1).unwrap();
3417
3418 let output = decode_lz(&input, 1, data.len()).unwrap();
3419
3420 assert_eq!(output, data);
3421 }
3422
3423 #[test]
3424 fn decode_member_from_reader_accepts_incremental_input() {
3425 struct OneByteReader<'a> {
3426 data: &'a [u8],
3427 pos: usize,
3428 }
3429
3430 impl Read for OneByteReader<'_> {
3431 fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
3432 if self.pos >= self.data.len() {
3433 return Ok(0);
3434 }
3435 out[0] = self.data[self.pos];
3436 self.pos += 1;
3437 Ok(1)
3438 }
3439 }
3440
3441 let payload = literal_only_payload(b"ABBA");
3442 let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3443 let mut reader = OneByteReader {
3444 data: &input,
3445 pos: 0,
3446 };
3447 let mut decoder = Unpack50Decoder::new();
3448
3449 let output = decoder
3450 .decode_member_from_reader(&mut reader, 0, 4, false, DecodeMode::LiteralOnly)
3451 .unwrap();
3452
3453 assert_eq!(output, b"ABBA");
3454 }
3455
3456 #[test]
3457 fn decodes_synthetic_new_match_block() {
3458 let payload = new_match_payload();
3459 let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3460
3461 let output = decode_lz(&input, 0, 4).unwrap();
3462
3463 assert_eq!(output, b"ABAB");
3464 }
3465
3466 #[test]
3467 fn decodes_synthetic_last_length_match_block() {
3468 let payload = repeat_payload(257);
3469 let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3470
3471 let output = decode_lz(&input, 0, 6).unwrap();
3472
3473 assert_eq!(output, b"ABABAB");
3474 }
3475
3476 #[test]
3477 fn decodes_synthetic_repeat_distance_match_block() {
3478 let payload = repeat_payload(258);
3479 let input = encode_compressed_block(&payload, payload.len() * 8, true, true).unwrap();
3480
3481 let output = decode_lz(&input, 0, 6).unwrap();
3482
3483 assert_eq!(output, b"ABABAB");
3484 }
3485
3486 #[test]
3487 fn rejects_literal_only_block_without_tables() {
3488 let input = encode_compressed_block(&[0], 8, false, true).unwrap();
3489
3490 assert_eq!(
3491 decode_literal_only(&input, 0, 1),
3492 Err(Error::InvalidData("RAR 5 block reuses missing tables"))
3493 );
3494 }
3495
3496 #[test]
3497 fn decodes_length_slots() {
3498 assert_eq!(slot_to_length(0, 0).unwrap(), 2);
3499 assert_eq!(slot_to_length(7, 0).unwrap(), 9);
3500 assert_eq!(slot_to_length(8, 0).unwrap(), 10);
3501 assert_eq!(slot_to_length(8, 1).unwrap(), 11);
3502 assert_eq!(slot_to_length(11, 1).unwrap(), 17);
3503 assert_eq!(slot_to_length(12, 3).unwrap(), 21);
3504 }
3505
3506 #[test]
3507 fn decodes_distance_slots() {
3508 assert_eq!(slot_to_distance(0, 0).unwrap(), 1);
3509 assert_eq!(slot_to_distance(3, 0).unwrap(), 4);
3510 assert_eq!(distance_slot_bit_count(4).unwrap(), 1);
3511 assert_eq!(slot_to_distance(4, 0).unwrap(), 5);
3512 assert_eq!(slot_to_distance(4, 1).unwrap(), 6);
3513 assert_eq!(distance_slot_bit_count(10).unwrap(), 4);
3514 assert_eq!(slot_to_distance(10, 15).unwrap(), 48);
3515 }
3516
3517 #[test]
3518 fn bit_reader_accepts_large_rar5_distance_extras() {
3519 let mut bits = BitReader::new(&[0xff, 0x00, 0xaa, 0x55]);
3520
3521 assert_eq!(bits.read_bits(32).unwrap(), 0xff00_aa55);
3522 assert_eq!(
3523 bits.read_bits(1),
3524 Err(Error::NeedMoreInput),
3525 "32-bit reads must not leave a partial cursor state"
3526 );
3527 }
3528
3529 #[test]
3530 fn copies_lz_matches_with_overlap() {
3531 let decoder = Unpack50Decoder::new();
3532 let mut output = b"AB".to_vec();
3533
3534 decoder
3535 .copy_match(&mut output, 2, 6, 8, DEFAULT_DICTIONARY_SIZE)
3536 .unwrap();
3537
3538 assert_eq!(output, b"ABABABAB");
3539 }
3540
3541 #[test]
3542 fn rejects_invalid_match_copy() {
3543 let decoder = Unpack50Decoder::new();
3544 let mut output = b"AB".to_vec();
3545
3546 assert_eq!(
3547 decoder.copy_match(&mut output, 3, 1, 3, DEFAULT_DICTIONARY_SIZE),
3548 Err(Error::InvalidData("RAR 5 match distance exceeds window"))
3549 );
3550 assert_eq!(
3551 decoder.copy_match(&mut output, 1, 2, 3, DEFAULT_DICTIONARY_SIZE),
3552 Err(Error::InvalidData("RAR 5 match exceeds output limit"))
3553 );
3554 }
3555
3556 #[test]
3557 fn rejects_match_distance_beyond_dictionary() {
3558 let decoder = Unpack50Decoder::new();
3559 let mut output = b"ABCD".to_vec();
3560
3561 assert_eq!(
3562 decoder.copy_match(&mut output, 4, 1, 5, 3),
3563 Err(Error::InvalidData(
3564 "RAR 5 match distance exceeds dictionary"
3565 ))
3566 );
3567 }
3568
3569 #[test]
3570 fn solid_history_is_capped_to_dictionary_size() {
3571 let mut decoder = Unpack50Decoder::new();
3572 let first_payload = literal_only_payload(b"ABBA");
3573 let first =
3574 encode_compressed_block(&first_payload, first_payload.len() * 8, true, true).unwrap();
3575 let second_payload = literal_only_payload(b"BAAB");
3576 let second =
3577 encode_compressed_block(&second_payload, second_payload.len() * 8, true, true).unwrap();
3578
3579 assert_eq!(
3580 decoder
3581 .decode_member_with_dictionary(&first, 0, 4, 6, false, DecodeMode::LiteralOnly)
3582 .unwrap(),
3583 b"ABBA"
3584 );
3585 assert_eq!(decoder.history, b"ABBA");
3586
3587 assert_eq!(
3588 decoder
3589 .decode_member_with_dictionary(&second, 0, 4, 6, true, DecodeMode::LiteralOnly)
3590 .unwrap(),
3591 b"BAAB"
3592 );
3593 assert_eq!(decoder.history, b"BABAAB");
3594 }
3595
3596 #[test]
3597 fn streaming_decoder_history_is_capped_without_reordering() {
3598 let mut decoder = Unpack50Decoder::new();
3599 let first_payload = literal_only_payload(b"ABBA");
3600 let first =
3601 encode_compressed_block(&first_payload, first_payload.len() * 8, true, true).unwrap();
3602 let second_payload = literal_only_payload(b"BAAB");
3603 let second =
3604 encode_compressed_block(&second_payload, second_payload.len() * 8, true, true).unwrap();
3605 let mut decoded = Vec::new();
3606
3607 decoder
3608 .decode_member_from_reader_with_dictionary_to_sink(
3609 &mut std::io::Cursor::new(&first),
3610 0,
3611 4,
3612 6,
3613 false,
3614 |chunk| {
3615 match chunk {
3616 DecodedChunk::Bytes(bytes) => decoded.extend_from_slice(bytes),
3617 DecodedChunk::Repeated { byte, len } => {
3618 decoded.extend(std::iter::repeat_n(byte, len));
3619 }
3620 }
3621 Ok::<(), std::io::Error>(())
3622 },
3623 )
3624 .unwrap();
3625 assert_eq!(decoded, b"ABBA");
3626 assert_eq!(decoder.history, b"ABBA");
3627
3628 decoded.clear();
3629 decoder
3630 .decode_member_from_reader_with_dictionary_to_sink(
3631 &mut std::io::Cursor::new(&second),
3632 0,
3633 4,
3634 6,
3635 true,
3636 |chunk| {
3637 match chunk {
3638 DecodedChunk::Bytes(bytes) => decoded.extend_from_slice(bytes),
3639 DecodedChunk::Repeated { byte, len } => {
3640 decoded.extend(std::iter::repeat_n(byte, len));
3641 }
3642 }
3643 Ok::<(), std::io::Error>(())
3644 },
3645 )
3646 .unwrap();
3647 assert_eq!(decoded, b"BAAB");
3648 assert_eq!(decoder.history, b"BABAAB");
3649 }
3650
3651 fn literal_only_payload(data: &[u8]) -> Vec<u8> {
3652 let mut lengths = TableLengths {
3653 main: vec![0; MAIN_TABLE_SIZE],
3654 distance: vec![0; DISTANCE_TABLE_SIZE_50],
3655 align: vec![0; ALIGN_TABLE_SIZE],
3656 length: vec![0; LENGTH_TABLE_SIZE],
3657 };
3658 lengths.main[b'A' as usize] = 1;
3659 lengths.main[b'B' as usize] = 1;
3660 let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3661 let mut writer = BitWriter { bytes, bit_pos };
3662 for &byte in data {
3663 match byte {
3664 b'A' => writer.write_bits(0, 1),
3665 b'B' => writer.write_bits(1, 1),
3666 _ => panic!("test helper only encodes A/B"),
3667 }
3668 }
3669 writer.finish()
3670 }
3671
3672 fn new_match_payload() -> Vec<u8> {
3673 let mut lengths = TableLengths {
3674 main: vec![0; MAIN_TABLE_SIZE],
3675 distance: vec![0; DISTANCE_TABLE_SIZE_50],
3676 align: vec![0; ALIGN_TABLE_SIZE],
3677 length: vec![0; LENGTH_TABLE_SIZE],
3678 };
3679 lengths.main[b'A' as usize] = 2;
3680 lengths.main[b'B' as usize] = 2;
3681 lengths.main[262] = 2;
3682 lengths.distance[1] = 1;
3683 let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3684 let mut writer = BitWriter { bytes, bit_pos };
3685
3686 writer.write_bits(0b00, 2); writer.write_bits(0b01, 2); writer.write_bits(0b10, 2); writer.write_bits(0, 1); writer.finish()
3691 }
3692
3693 fn repeat_payload(repeat_symbol: usize) -> Vec<u8> {
3694 let mut lengths = TableLengths {
3695 main: vec![0; MAIN_TABLE_SIZE],
3696 distance: vec![0; DISTANCE_TABLE_SIZE_50],
3697 align: vec![0; ALIGN_TABLE_SIZE],
3698 length: vec![0; LENGTH_TABLE_SIZE],
3699 };
3700 lengths.main[b'A' as usize] = 2;
3701 lengths.main[b'B' as usize] = 2;
3702 lengths.main[repeat_symbol] = 2;
3703 lengths.main[262] = 2;
3704 lengths.distance[1] = 1;
3705 lengths.length[0] = 1;
3706 let (bytes, bit_pos) = encode_table_lengths_with_bit_count(&lengths, 0).unwrap();
3707 let mut writer = BitWriter { bytes, bit_pos };
3708
3709 writer.write_bits(0b00, 2); writer.write_bits(0b01, 2); writer.write_bits(0b11, 2); writer.write_bits(0, 1); writer.write_bits(0b10, 2); if repeat_symbol == 258 {
3715 writer.write_bits(0, 1); }
3717 writer.finish()
3718 }
3719}