rar_stream/decompress/
lzss.rs1use super::{DecompressError, Result};
6
7pub const WINDOW_SIZE_29: usize = 0x200000;
9
10pub const WINDOW_SIZE_50: usize = 0x4000000;
12
13pub struct LzssDecoder {
15 window: Vec<u8>,
17 mask: usize,
19 pos: usize,
21 total_written: u64,
23 flushed_pos: u64,
25 output: Vec<u8>,
27}
28
29impl LzssDecoder {
30 pub fn new(window_size: usize) -> Self {
32 debug_assert!(window_size.is_power_of_two());
33 Self {
34 window: vec![0; window_size],
35 mask: window_size - 1,
36 pos: 0,
37 total_written: 0,
38 flushed_pos: 0,
39 output: Vec::new(),
40 }
41 }
42
43 pub fn rar29() -> Self {
45 Self::new(WINDOW_SIZE_29)
46 }
47
48 pub fn rar50() -> Self {
50 Self::new(WINDOW_SIZE_50)
51 }
52
53 #[inline]
56 pub fn reset(&mut self) {
57 self.pos = 0;
58 self.total_written = 0;
59 self.output.clear();
60 }
62
63 pub fn enable_output(&mut self, capacity: usize) {
65 self.output = Vec::with_capacity(capacity);
66 }
67
68 #[inline(always)]
70 pub fn write_literal(&mut self, byte: u8) {
71 unsafe {
73 *self.window.get_unchecked_mut(self.pos) = byte;
74 }
75 self.pos = (self.pos + 1) & self.mask;
76 self.total_written += 1;
77 }
78
79 pub fn flush_to_output(&mut self, up_to: u64) {
82 let current_output_len = self.output.len() as u64;
83 if up_to <= current_output_len {
84 return; }
86
87 let flush_start = current_output_len as usize;
88 let flush_end = up_to as usize;
89 let flush_len = flush_end - flush_start;
90 let window_start = flush_start & self.mask;
91
92 self.output.reserve(flush_len);
94
95 if window_start + flush_len <= self.window.len() {
97 self.output
99 .extend_from_slice(&self.window[window_start..window_start + flush_len]);
100 } else {
101 let first_part = self.window.len() - window_start;
103 self.output.extend_from_slice(&self.window[window_start..]);
104 self.output
105 .extend_from_slice(&self.window[..flush_len - first_part]);
106 }
107
108 self.flushed_pos = up_to;
109 }
110
111 pub fn window_mut(&mut self) -> &mut [u8] {
113 &mut self.window
114 }
115
116 pub fn window_mask(&self) -> u32 {
118 self.mask as u32
119 }
120
121 pub fn flushed_pos(&self) -> u64 {
123 self.flushed_pos
124 }
125
126 pub fn write_filtered_to_output(&mut self, data: &[u8], position: u64) {
129 let current_len = self.output.len() as u64;
131 if current_len < position {
132 let window_start = current_len as usize;
135 let flush_len = (position - current_len) as usize;
136 self.output.reserve(flush_len);
137 for i in 0..flush_len {
138 let window_idx = (window_start + i) & self.mask;
139 self.output.push(self.window[window_idx]);
140 }
141 }
142 self.output.extend_from_slice(data);
143 self.flushed_pos = position + data.len() as u64;
144 }
145
146 pub fn window(&self) -> &[u8] {
148 &self.window
149 }
150
151 #[inline(always)]
154 pub fn copy_match(&mut self, distance: u32, length: u32) -> Result<()> {
155 if distance == 0 || distance as u64 > self.total_written {
157 return self.copy_match_error(distance);
158 }
159
160 let len = length as usize;
161 let dist = distance as usize;
162
163 if dist >= len && self.pos + len <= self.window.len() && self.pos >= dist {
165 let src_start = self.pos - dist;
167 self.window
168 .copy_within(src_start..src_start + len, self.pos);
169 self.pos += len;
170 self.total_written += length as u64;
171 return Ok(());
172 }
173
174 if self.pos + len <= self.window.len() && self.pos >= dist && dist >= 8 {
177 let src_start = self.pos - dist;
178 let mut copied = 0;
179 while copied < len {
180 let chunk = (len - copied).min(dist);
181 self.window
182 .copy_within(src_start..src_start + chunk, self.pos + copied);
183 copied += chunk;
184 }
185 self.pos += len;
186 self.total_written += length as u64;
187 return Ok(());
188 }
189
190 let src_pos = (self.pos.wrapping_sub(dist)) & self.mask;
193 let window_ptr = self.window.as_mut_ptr();
194
195 for i in 0..len {
196 let src_idx = (src_pos + i) & self.mask;
197 let dest_idx = (self.pos + i) & self.mask;
198 unsafe {
200 let byte = *window_ptr.add(src_idx);
201 *window_ptr.add(dest_idx) = byte;
202 }
203 }
204 self.pos = (self.pos + len) & self.mask;
205
206 self.total_written += length as u64;
207 Ok(())
208 }
209
210 #[cold]
212 #[inline(never)]
213 fn copy_match_error(&self, distance: u32) -> Result<()> {
214 Err(DecompressError::InvalidBackReference {
215 offset: distance,
216 position: self.pos as u32,
217 })
218 }
219
220 pub fn position(&self) -> usize {
222 self.pos
223 }
224
225 pub fn total_written(&self) -> u64 {
227 self.total_written
228 }
229
230 pub fn get_output(&self, start: u64, len: usize) -> Vec<u8> {
233 if !self.output.is_empty() {
235 let start = start as usize;
236 let end = (start + len).min(self.output.len());
237 return self.output[start..end].to_vec();
238 }
239
240 let mut output = Vec::with_capacity(len);
241 let window_len = self.window.len();
242
243 let start_pos = if self.total_written <= window_len as u64 {
245 start as usize
246 } else {
247 let _written_in_window = self.total_written as usize % window_len;
249 let offset = (self.total_written - start) as usize;
250 if offset > window_len {
251 return output; }
253 (self.pos.wrapping_sub(offset)) & self.mask
254 };
255
256 for i in 0..len {
257 let idx = (start_pos + i) & self.mask;
258 output.push(self.window[idx]);
259 }
260
261 output
262 }
263
264 pub fn take_output(&mut self) -> Vec<u8> {
267 std::mem::take(&mut self.output)
268 }
269
270 pub fn output(&self) -> &[u8] {
272 &self.output
273 }
274
275 pub fn output_mut(&mut self) -> &mut [u8] {
277 &mut self.output
278 }
279
280 pub fn get_recent(&self, len: usize) -> Vec<u8> {
282 let actual_len = len.min(self.total_written as usize);
283 let mut output = Vec::with_capacity(actual_len);
284
285 let start = (self.pos.wrapping_sub(actual_len)) & self.mask;
286 for i in 0..actual_len {
287 let idx = (start + i) & self.mask;
288 output.push(self.window[idx]);
289 }
290
291 output
292 }
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298
299 #[test]
300 fn test_literal_output() {
301 let mut decoder = LzssDecoder::new(256);
302
303 decoder.write_literal(b'H');
304 decoder.write_literal(b'e');
305 decoder.write_literal(b'l');
306 decoder.write_literal(b'l');
307 decoder.write_literal(b'o');
308
309 assert_eq!(decoder.total_written(), 5);
310 assert_eq!(decoder.get_recent(5), b"Hello");
311 }
312
313 #[test]
314 fn test_copy_match() {
315 let mut decoder = LzssDecoder::new(256);
316
317 decoder.write_literal(b'a');
319 decoder.write_literal(b'b');
320 decoder.write_literal(b'c');
321
322 decoder.copy_match(3, 6).unwrap();
324
325 assert_eq!(decoder.total_written(), 9);
326 assert_eq!(decoder.get_recent(9), b"abcabcabc");
327 }
328
329 #[test]
330 fn test_overlapping_copy() {
331 let mut decoder = LzssDecoder::new(256);
332
333 decoder.write_literal(b'a');
335
336 decoder.copy_match(1, 5).unwrap();
338
339 assert_eq!(decoder.get_recent(6), b"aaaaaa");
340 }
341
342 #[test]
343 fn test_invalid_distance() {
344 let mut decoder = LzssDecoder::new(256);
345 decoder.write_literal(b'a');
346
347 assert!(decoder.copy_match(0, 1).is_err());
349 }
350}