rar_stream/decompress/mod.rs
1//! RAR decompression algorithms.
2//!
3//! This module provides decompression support for RAR archives, implementing
4//! the LZSS + Huffman and PPMd algorithms used by RAR 2.9-5.x.
5//!
6//! ## Decoders
7//!
8//! | Decoder | Format | Algorithms |
9//! |---------|--------|------------|
10//! | [`Rar29Decoder`] | RAR 2.9-4.x | LZSS + Huffman, PPMd, VM filters |
11//! | [`Rar5Decoder`] | RAR 5.0+ | LZSS + Huffman, byte filters |
12//!
13//! ## Compression Methods
14//!
15//! RAR uses a single byte to identify the compression method:
16//!
17//! | Value | Name | Description |
18//! |-------|------|-------------|
19//! | `0x30` | Store | No compression (data is stored as-is) |
20//! | `0x31` | Fastest | LZSS with minimal dictionary |
21//! | `0x32` | Fast | LZSS with small dictionary |
22//! | `0x33` | Normal | LZSS with medium dictionary (default) |
23//! | `0x34` | Good | LZSS with large dictionary |
24//! | `0x35` | Best | LZSS with maximum dictionary |
25//!
26//! ## Filter Support
27//!
28//! RAR applies preprocessing filters before compression to improve ratios:
29//!
30//! | Filter | RAR4 | RAR5 | Description |
31//! |--------|------|------|-------------|
32//! | Delta | ✅ | ✅ | Byte delta encoding (audio, images) |
33//! | E8/E8E9 | ✅ | ✅ | x86 CALL/JMP instruction preprocessing |
34//! | ARM | — | ✅ | ARM branch instruction preprocessing |
35//! | Audio | ✅ | — | Multi-channel audio predictor |
36//! | RGB | ✅ | — | Predictive color filter (images) |
37//!
38//! ## Example
39//!
40//! ```rust
41//! use rar_stream::Rar29Decoder;
42//!
43//! // Create a new decoder (reusable for multiple files)
44//! let mut decoder = Rar29Decoder::new();
45//!
46//! // Decompress data (compressed_data from file header's data area)
47//! // let decompressed = decoder.decompress(&compressed_data, expected_size)?;
48//! ```
49//!
50//! ## Architecture
51//!
52//! The decompression pipeline:
53//!
54//! ```text
55//! Compressed Data
56//! ↓
57//! ┌─────────────┐
58//! │ BitReader │ ← Bit-level access to compressed stream
59//! └─────────────┘
60//! ↓
61//! ┌─────────────┐
62//! │ Huffman │ ← Decode variable-length symbols
63//! └─────────────┘
64//! ↓
65//! ┌─────────────┐
66//! │ LZSS/PPMd │ ← Expand literals and back-references
67//! └─────────────┘
68//! ↓
69//! ┌─────────────┐
70//! │ Filters │ ← Apply inverse preprocessing (E8, Delta, etc.)
71//! └─────────────┘
72//! ↓
73//! Decompressed Data
74//! ```
75//!
76//! ## Performance Notes
77//!
78//! - Decoders are reusable and maintain internal state
79//! - Window size is 4MB for RAR4, up to 4GB for RAR5
80//! - PPMd uses significant memory (~100MB for order-8 model)
81
82// Work-in-progress: Some filters not fully integrated yet
83#![allow(dead_code)]
84
85mod bit_reader;
86mod huffman;
87mod lzss;
88mod ppm;
89mod rar29;
90pub mod rar5;
91mod vm;
92
93#[cfg(test)]
94mod tests;
95
96pub use bit_reader::BitReader;
97pub use huffman::{HuffmanDecoder, HuffmanTable};
98pub use lzss::LzssDecoder;
99pub use ppm::PpmModel;
100pub use rar29::Rar29Decoder;
101pub use rar5::Rar5Decoder;
102pub use vm::RarVM;
103
104use std::fmt;
105use std::io;
106
107/// Decompression errors.
108#[derive(Debug)]
109pub enum DecompressError {
110 UnexpectedEof,
111 InvalidHuffmanCode,
112 InvalidBackReference { offset: u32, position: u32 },
113 BufferOverflow,
114 UnsupportedMethod(u8),
115 IncompleteData,
116 Io(io::Error),
117}
118
119impl fmt::Display for DecompressError {
120 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121 match self {
122 Self::UnexpectedEof => write!(f, "Unexpected end of data"),
123 Self::InvalidHuffmanCode => write!(f, "Invalid Huffman code"),
124 Self::InvalidBackReference { offset, position } => {
125 write!(
126 f,
127 "Invalid back reference: offset {} exceeds window position {}",
128 offset, position
129 )
130 }
131 Self::BufferOverflow => write!(f, "Decompression buffer overflow"),
132 Self::UnsupportedMethod(m) => write!(f, "Unsupported compression method: {}", m),
133 Self::IncompleteData => write!(f, "Incomplete compressed data"),
134 Self::Io(e) => write!(f, "I/O error: {}", e),
135 }
136 }
137}
138
139impl std::error::Error for DecompressError {
140 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
141 match self {
142 Self::Io(e) => Some(e),
143 _ => None,
144 }
145 }
146}
147
148impl From<io::Error> for DecompressError {
149 fn from(e: io::Error) -> Self {
150 Self::Io(e)
151 }
152}
153
154pub type Result<T> = std::result::Result<T, DecompressError>;
155
156/// Compression methods used in RAR.
157#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158#[repr(u8)]
159pub enum CompressionMethod {
160 /// Store (no compression)
161 Store = 0x30,
162 /// Fastest compression
163 Fastest = 0x31,
164 /// Fast compression
165 Fast = 0x32,
166 /// Normal compression
167 Normal = 0x33,
168 /// Good compression
169 Good = 0x34,
170 /// Best compression
171 Best = 0x35,
172}
173
174impl CompressionMethod {
175 pub fn from_u8(v: u8) -> Option<Self> {
176 match v {
177 0x30 => Some(Self::Store),
178 0x31 => Some(Self::Fastest),
179 0x32 => Some(Self::Fast),
180 0x33 => Some(Self::Normal),
181 0x34 => Some(Self::Good),
182 0x35 => Some(Self::Best),
183 _ => None,
184 }
185 }
186
187 /// Whether this method requires decompression.
188 pub fn needs_decompression(&self) -> bool {
189 *self != Self::Store
190 }
191}