Skip to main content

rar_stream/decompress/
mod.rs

1//! RAR decompression algorithms.
2//!
3//! This module provides decompression support for RAR archives, implementing
4//! the LZSS + Huffman and PPMd algorithms used by RAR 2.9-5.x.
5//!
6//! ## Decoders
7//!
8//! | Decoder | Format | Algorithms |
9//! |---------|--------|------------|
10//! | [`Rar29Decoder`] | RAR 2.9-4.x | LZSS + Huffman, PPMd, VM filters |
11//! | [`Rar5Decoder`] | RAR 5.0+ | LZSS + Huffman, byte filters |
12//!
13//! ## Compression Methods
14//!
15//! RAR uses a single byte to identify the compression method:
16//!
17//! | Value | Name | Description |
18//! |-------|------|-------------|
19//! | `0x30` | Store | No compression (data is stored as-is) |
20//! | `0x31` | Fastest | LZSS with minimal dictionary |
21//! | `0x32` | Fast | LZSS with small dictionary |
22//! | `0x33` | Normal | LZSS with medium dictionary (default) |
23//! | `0x34` | Good | LZSS with large dictionary |
24//! | `0x35` | Best | LZSS with maximum dictionary |
25//!
26//! ## Filter Support
27//!
28//! RAR applies preprocessing filters before compression to improve ratios:
29//!
30//! | Filter | RAR4 | RAR5 | Description |
31//! |--------|------|------|-------------|
32//! | Delta | ✅ | ✅ | Byte delta encoding (audio, images) |
33//! | E8/E8E9 | ✅ | ✅ | x86 CALL/JMP instruction preprocessing |
34//! | ARM | — | ✅ | ARM branch instruction preprocessing |
35//! | Audio | ✅ | — | Multi-channel audio predictor |
36//! | RGB | ✅ | — | Predictive color filter (images) |
37//!
38//! ## Example
39//!
40//! ```rust
41//! use rar_stream::Rar29Decoder;
42//!
43//! // Create a new decoder (reusable for multiple files)
44//! let mut decoder = Rar29Decoder::new();
45//!
46//! // Decompress data (compressed_data from file header's data area)
47//! // let decompressed = decoder.decompress(&compressed_data, expected_size)?;
48//! ```
49//!
50//! ## Architecture
51//!
52//! The decompression pipeline:
53//!
54//! ```text
55//! Compressed Data
56//!       ↓
57//! ┌─────────────┐
58//! │ BitReader   │ ← Bit-level access to compressed stream
59//! └─────────────┘
60//!       ↓
61//! ┌─────────────┐
62//! │ Huffman     │ ← Decode variable-length symbols
63//! └─────────────┘
64//!       ↓
65//! ┌─────────────┐
66//! │ LZSS/PPMd   │ ← Expand literals and back-references
67//! └─────────────┘
68//!       ↓
69//! ┌─────────────┐
70//! │ Filters     │ ← Apply inverse preprocessing (E8, Delta, etc.)
71//! └─────────────┘
72//!       ↓
73//! Decompressed Data
74//! ```
75//!
76//! ## Performance Notes
77//!
78//! - Decoders are reusable and maintain internal state
79//! - Window size is 4MB for RAR4, up to 4GB for RAR5
80//! - PPMd uses significant memory (~100MB for order-8 model)
81
82// Work-in-progress: Some filters not fully integrated yet
83#![allow(dead_code)]
84
85mod bit_reader;
86mod huffman;
87mod lzss;
88mod ppm;
89mod rar29;
90pub mod rar5;
91mod vm;
92
93#[cfg(test)]
94mod tests;
95
96pub use bit_reader::BitReader;
97pub use huffman::{HuffmanDecoder, HuffmanTable};
98pub use lzss::LzssDecoder;
99pub use ppm::PpmModel;
100pub use rar29::Rar29Decoder;
101pub use rar5::Rar5Decoder;
102pub use vm::RarVM;
103
104use std::fmt;
105use std::io;
106
107/// Decompression errors.
108#[derive(Debug)]
109pub enum DecompressError {
110    UnexpectedEof,
111    InvalidHuffmanCode,
112    InvalidBackReference { offset: u32, position: u32 },
113    BufferOverflow,
114    UnsupportedMethod(u8),
115    IncompleteData,
116    Io(io::Error),
117}
118
119impl fmt::Display for DecompressError {
120    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121        match self {
122            Self::UnexpectedEof => write!(f, "Unexpected end of data"),
123            Self::InvalidHuffmanCode => write!(f, "Invalid Huffman code"),
124            Self::InvalidBackReference { offset, position } => {
125                write!(
126                    f,
127                    "Invalid back reference: offset {} exceeds window position {}",
128                    offset, position
129                )
130            }
131            Self::BufferOverflow => write!(f, "Decompression buffer overflow"),
132            Self::UnsupportedMethod(m) => write!(f, "Unsupported compression method: {}", m),
133            Self::IncompleteData => write!(f, "Incomplete compressed data"),
134            Self::Io(e) => write!(f, "I/O error: {}", e),
135        }
136    }
137}
138
139impl std::error::Error for DecompressError {
140    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
141        match self {
142            Self::Io(e) => Some(e),
143            _ => None,
144        }
145    }
146}
147
148impl From<io::Error> for DecompressError {
149    fn from(e: io::Error) -> Self {
150        Self::Io(e)
151    }
152}
153
154pub type Result<T> = std::result::Result<T, DecompressError>;
155
156/// Compression methods used in RAR.
157#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158#[repr(u8)]
159pub enum CompressionMethod {
160    /// Store (no compression)
161    Store = 0x30,
162    /// Fastest compression
163    Fastest = 0x31,
164    /// Fast compression  
165    Fast = 0x32,
166    /// Normal compression
167    Normal = 0x33,
168    /// Good compression
169    Good = 0x34,
170    /// Best compression
171    Best = 0x35,
172}
173
174impl CompressionMethod {
175    pub fn from_u8(v: u8) -> Option<Self> {
176        match v {
177            0x30 => Some(Self::Store),
178            0x31 => Some(Self::Fastest),
179            0x32 => Some(Self::Fast),
180            0x33 => Some(Self::Normal),
181            0x34 => Some(Self::Good),
182            0x35 => Some(Self::Best),
183            _ => None,
184        }
185    }
186
187    /// Whether this method requires decompression.
188    pub fn needs_decompression(&self) -> bool {
189        *self != Self::Store
190    }
191}