Skip to main content

rar_stream/decompress/
lzss.rs

1//! LZSS sliding window decoder.
2//!
3//! Implements the dictionary-based decompression used in RAR.
4
5use super::{DecompressError, Result};
6
7/// Window size for RAR29 (2MB).
8pub const WINDOW_SIZE_29: usize = 0x200000;
9
10/// Window size for RAR50 (up to 4GB, but we use 64MB max for memory).
11pub const WINDOW_SIZE_50: usize = 0x4000000;
12
13/// LZSS sliding window decoder.
14pub struct LzssDecoder {
15    /// Sliding window buffer
16    window: Vec<u8>,
17    /// Window size mask for wrap-around
18    mask: usize,
19    /// Current write position in window
20    pos: usize,
21    /// Total bytes written
22    total_written: u64,
23}
24
25impl LzssDecoder {
26    /// Create a new LZSS decoder with the specified window size.
27    pub fn new(window_size: usize) -> Self {
28        debug_assert!(window_size.is_power_of_two());
29        Self {
30            window: vec![0; window_size],
31            mask: window_size - 1,
32            pos: 0,
33            total_written: 0,
34        }
35    }
36
37    /// Create decoder for RAR 2.9 format.
38    pub fn rar29() -> Self {
39        Self::new(WINDOW_SIZE_29)
40    }
41
42    /// Create decoder for RAR 5.0 format.
43    pub fn rar50() -> Self {
44        Self::new(WINDOW_SIZE_50)
45    }
46
47    /// Write a literal byte to the output.
48    #[inline]
49    pub fn write_literal(&mut self, byte: u8) {
50        self.window[self.pos] = byte;
51        self.pos = (self.pos + 1) & self.mask;
52        self.total_written += 1;
53    }
54
55    /// Copy bytes from a previous position in the window.
56    #[inline]
57    pub fn copy_match(&mut self, distance: u32, length: u32) -> Result<()> {
58        if distance == 0 || distance as usize > self.window.len() {
59            return Err(DecompressError::InvalidBackReference {
60                offset: distance,
61                position: self.pos as u32,
62            });
63        }
64
65        let src_pos = (self.pos.wrapping_sub(distance as usize)) & self.mask;
66        
67        for i in 0..length as usize {
68            let src_idx = (src_pos + i) & self.mask;
69            let byte = self.window[src_idx];
70            self.window[self.pos] = byte;
71            self.pos = (self.pos + 1) & self.mask;
72        }
73        
74        self.total_written += length as u64;
75        Ok(())
76    }
77
78    /// Get the current window position.
79    pub fn position(&self) -> usize {
80        self.pos
81    }
82
83    /// Get total bytes written.
84    pub fn total_written(&self) -> u64 {
85        self.total_written
86    }
87
88    /// Get a byte at the specified offset from current position (going back).
89    #[inline]
90    pub fn get_byte_at_offset(&self, offset: usize) -> u8 {
91        let idx = (self.pos.wrapping_sub(offset)) & self.mask;
92        self.window[idx]
93    }
94
95    /// Extract decompressed data from the window.
96    /// Call this after decompression to get the output.
97    pub fn get_output(&self, start: u64, len: usize) -> Vec<u8> {
98        let mut output = Vec::with_capacity(len);
99        let window_len = self.window.len();
100        
101        // Calculate start position in window
102        let start_pos = if self.total_written <= window_len as u64 {
103            start as usize
104        } else {
105            // Window has wrapped
106            let _written_in_window = self.total_written as usize % window_len;
107            let offset = (self.total_written - start) as usize;
108            if offset > window_len {
109                return output; // Data no longer in window
110            }
111            (self.pos.wrapping_sub(offset)) & self.mask
112        };
113
114        for i in 0..len {
115            let idx = (start_pos + i) & self.mask;
116            output.push(self.window[idx]);
117        }
118
119        output
120    }
121
122    /// Get the most recent `len` bytes from the window.
123    pub fn get_recent(&self, len: usize) -> Vec<u8> {
124        let actual_len = len.min(self.total_written as usize);
125        let mut output = Vec::with_capacity(actual_len);
126        
127        let start = (self.pos.wrapping_sub(actual_len)) & self.mask;
128        for i in 0..actual_len {
129            let idx = (start + i) & self.mask;
130            output.push(self.window[idx]);
131        }
132        
133        output
134    }
135
136    /// Reset the decoder state.
137    pub fn reset(&mut self) {
138        self.window.fill(0);
139        self.pos = 0;
140        self.total_written = 0;
141    }
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    #[test]
149    fn test_literal_output() {
150        let mut decoder = LzssDecoder::new(256);
151        
152        decoder.write_literal(b'H');
153        decoder.write_literal(b'e');
154        decoder.write_literal(b'l');
155        decoder.write_literal(b'l');
156        decoder.write_literal(b'o');
157        
158        assert_eq!(decoder.total_written(), 5);
159        assert_eq!(decoder.get_recent(5), b"Hello");
160    }
161
162    #[test]
163    fn test_copy_match() {
164        let mut decoder = LzssDecoder::new(256);
165        
166        // Write "abc"
167        decoder.write_literal(b'a');
168        decoder.write_literal(b'b');
169        decoder.write_literal(b'c');
170        
171        // Copy from distance 3, length 6 -> "abcabc"
172        decoder.copy_match(3, 6).unwrap();
173        
174        assert_eq!(decoder.total_written(), 9);
175        assert_eq!(decoder.get_recent(9), b"abcabcabc");
176    }
177
178    #[test]
179    fn test_overlapping_copy() {
180        let mut decoder = LzssDecoder::new(256);
181        
182        // Write "a"
183        decoder.write_literal(b'a');
184        
185        // Copy from distance 1, length 5 -> "aaaaa"
186        decoder.copy_match(1, 5).unwrap();
187        
188        assert_eq!(decoder.get_recent(6), b"aaaaaa");
189    }
190
191    #[test]
192    fn test_invalid_distance() {
193        let mut decoder = LzssDecoder::new(256);
194        decoder.write_literal(b'a');
195        
196        // Distance 0 is invalid
197        assert!(decoder.copy_match(0, 1).is_err());
198    }
199}