refpack/data/decompression.rs
1////////////////////////////////////////////////////////////////////////////////
2// This Source Code Form is subject to the terms of the Mozilla Public /
3// License, v. 2.0. If a copy of the MPL was not distributed with this /
4// file, You can obtain one at https://mozilla.org/MPL/2.0/. /
5// /
6////////////////////////////////////////////////////////////////////////////////
7
8//! Decompression parsing, algorithms, and functionality. Exact decompression
9//! algorithm is subject to change.
10//!
11//! Basic concept is to parse the header, identify key information such as
12//! decompressed header, then parse as a repeating stream of "command" blocks,
13//! consisting of a control code and any (if any) following literal bytes.
14//!
15//! Literal bytes should always be written before performing the control code
16//! operation.
17//!
18//! Control code operations are "run length encoded", which is a format of
19//! encoding where the "length" of a pointer-style control can be longer than
20//! the lookback length. This indicates that the section within the lookback
21//! region should repeat until the length is fulfilled
22//!
23//! # Example Byte-by-byte Algorithm
24//!
25//! This is *normally* handled via byte-by-byte decoding, where a lookback
26//! position and write position counter are incremented at the same time,
27//! however other faster implementations may be possible. This algorithm
28//! explanation is purely intended to illustrate the concept.
29//!
30//! ## Algorithm steps
31//!
32//! while in reality the possible allowed values are any arbitrary byte,
33//! characters are used to indicate whole bytes to simplify illustration
34//!
35//! Given the current decoded output of `DEADBEEF`, the next control encountered
36//! has a lookback of `4`, and a length of `16`.
37//!
38//! ### 1. Create Pointers
39//! Create pointers/get indexes for the current lookback position and current
40//! output position
41//!
42//! ```text
43//! DEADBEEF
44//! ^ ^
45//! LB O
46//! ```
47//!
48//! ### 2. Copy from lookback to output
49//! Take the current byte at the lookback position, and copy it to the output
50//! position
51//!
52//! ```text
53//! DEADBEEFB
54//! ^ ^
55//! LB O
56//! ```
57//!
58//! ### 3. Advance Pointers
59//! Increment both the lookback position and the output position
60//!
61//! ```text
62//! DEADBEEFB
63//! ^ ^
64//! LB O
65//! ```
66//!
67//! ### 4. Repeat `length` times
68//! Steps 2 and 3 should repeat a total of times equal to the length of the
69//! control block.
70//!
71//! #### 4.A "What about when the lookback reaches already output?"
72//! In a case where the length overruns into the already written output, the
73//! process continues as normal and the output starts to repeat.
74//!
75//! in this example output, On the 5th iteration you will encounter this state:
76//! ```text
77//! DEADBEEFBEEF
78//! ^ ^
79//! LB O
80//! ```
81//! The lookback is currently pointing at the first "B" that has been written.
82//! This isn't actually a problem, and step 2 and 3 should be followed as normal:
83//!
84//! ```text
85//! DEADBEEFBEEFB
86//! ^ ^
87//! LB O
88//! ```
89//!
90//! ```text
91//! DEADBEEFBEEFB
92//! ^ ^
93//! LB O
94//! ```
95//!
96//! ## Final Result
97//! Given that the algorithm was followed properly, the final output of the
98//! example input should be
99//! ```text
100//! DEADBEEFBEEFBEEFBEEFBEEF
101//! ```
102
103use std::cmp::max;
104use std::io::{Cursor, Read, Seek, Write};
105
106use crate::RefPackError;
107use crate::data::control::{Command, CommandKind};
108use crate::data::{copy_from_reader, rle_decode_fixed};
109use crate::format::Format;
110use crate::header::Header;
111
112// Returning the internal buffer is the fastest way to return the data
113// since that way the buffer doesn't have to be copied,
114// this function is used to reach optimal performance
115fn decompress_internal<F: Format>(
116 reader: &mut (impl Read + Seek),
117) -> Result<Vec<u8>, RefPackError> {
118 let Header {
119 decompressed_length,
120 ..
121 } = Header::read::<F::HeaderMode>(reader)?;
122
123 let mut decompression_buffer = vec![0; decompressed_length as usize];
124 let mut position = 0usize;
125
126 loop {
127 let command = Command::read(reader)?;
128
129 while position + command.literal as usize + command.length as usize
130 > decompression_buffer.len()
131 {
132 decompression_buffer.resize(
133 max(
134 command.literal as usize + command.length as usize,
135 decompression_buffer.len() * 2,
136 ),
137 0,
138 );
139 }
140
141 match command.kind {
142 CommandKind::Short | CommandKind::Medium | CommandKind::Long => {
143 if command.literal > 0 {
144 position = copy_from_reader(
145 &mut decompression_buffer,
146 reader,
147 position,
148 command.literal as usize,
149 )?;
150 }
151 position = rle_decode_fixed(
152 &mut decompression_buffer,
153 position,
154 command.offset as usize,
155 command.length as usize,
156 )
157 .map_err(|error| RefPackError::ControlError { error, position })?;
158 }
159 CommandKind::Literal => {
160 position = copy_from_reader(
161 &mut decompression_buffer,
162 reader,
163 position,
164 command.literal as usize,
165 )?;
166 }
167 CommandKind::Stop => {
168 position = copy_from_reader(
169 &mut decompression_buffer,
170 reader,
171 position,
172 command.literal as usize,
173 )?;
174 decompression_buffer.resize(position, 0);
175 break;
176 }
177 }
178 }
179
180 Ok(decompression_buffer)
181}
182
183/// Decompress `refpack` data. Accepts arbitrary `Read`s and `Write`s.
184///
185/// # Example
186///
187/// ```Rust
188/// use std::io::Cursor;
189///
190/// let mut input = Cursor::new(/* some refpack data */);
191/// let mut output = Cursor::new(Vec::new());
192///
193/// // decompress the input into the output
194/// refpack::compress(&mut input, &mut output);
195/// // output now contains the decompressed version of the input
196/// ```
197/// # Errors
198/// - [RefPackError::BadMagic]: Header magic was malformed, likely indicating
199/// either uncompressed data or attempting to decompress data in an incorrect
200/// format
201/// - [RefPackError::BadFlags]: Header magic was malformed, likely indicating
202/// either uncompressed data or attempting to decompress data in an incorrect
203/// format
204/// - [RefPackError::ControlError]: Invalid control code operation was attempted
205/// to be performed. This normally indicated corrupted or invalid refpack
206/// data
207/// - [RefPackError::Io]: Generic IO error occured while attempting to read or
208/// write data
209pub fn decompress<F: Format>(
210 reader: &mut (impl Read + Seek),
211 writer: &mut impl Write,
212) -> Result<(), RefPackError> {
213 let data = decompress_internal::<F>(reader)?;
214
215 writer.write_all(data.as_slice())?;
216 writer.flush()?;
217
218 Ok(())
219}
220
221/// Wrapped decompress function with a bit easier and cleaner of an API.
222/// Takes a slice of bytes and returns a Vec of byes
223/// In implementation this just creates `Cursor`s for the reader and writer and
224/// calls `decompress`
225///
226/// # Returns
227///
228/// A Result containing either `Vec<u8>` of the decompressed data or a
229/// `RefPackError`.
230///
231/// # Errors
232/// - [RefPackError::BadMagic]: Header was malformed, likely indicating either
233/// uncompressed data or attempting to decompress data in an incorrect format
234/// - [RefPackError::BadFlags]: Header magic was malformed, likely indicating
235/// either uncompressed data or attempting to decompress data in an incorrect
236/// format
237/// - [RefPackError::ControlError]: Invalid control code operation was attempted
238/// to be performed. This normally indicated corrupted or invalid refpack
239/// data
240/// - [RefPackError::Io]: Generic IO error occured while attempting to read or
241/// write data
242#[inline]
243pub fn easy_decompress<F: Format>(input: &[u8]) -> Result<Vec<u8>, RefPackError> {
244 let mut reader = Cursor::new(input);
245 decompress_internal::<F>(&mut reader)
246}