refpack/data/
decompression.rs

1////////////////////////////////////////////////////////////////////////////////
2// This Source Code Form is subject to the terms of the Mozilla Public         /
3// License, v. 2.0. If a copy of the MPL was not distributed with this         /
4// file, You can obtain one at https://mozilla.org/MPL/2.0/.                   /
5//                                                                             /
6////////////////////////////////////////////////////////////////////////////////
7
8//! Decompression parsing, algorithms, and functionality. Exact decompression
9//! algorithm is subject to change.
10//!
11//! Basic concept is to parse the header, identify key information such as
12//! decompressed header, then parse as a repeating stream of "command" blocks,
13//! consisting of a control code and any (if any) following literal bytes.
14//!
15//! Literal bytes should always be written before performing the control code
16//! operation.
17//!
18//! Control code operations are "run length encoded", which is a format of
19//! encoding where the "length" of a pointer-style control can be longer than
20//! the lookback length. This indicates that the section within the lookback
21//! region should repeat until the length is fulfilled
22//!
23//! # Example Byte-by-byte Algorithm
24//!
25//! This is *normally* handled via byte-by-byte decoding, where a lookback
26//! position and write position counter are incremented at the same time,
27//! however other faster implementations may be possible. This algorithm
28//! explanation is purely intended to illustrate the concept.
29//!
30//! ## Algorithm steps
31//!
32//! while in reality the possible allowed values are any arbitrary byte,
33//! characters are used to indicate whole bytes to simplify illustration
34//!
35//! Given the current decoded output of `DEADBEEF`, the next control encountered
36//! has a lookback of `4`, and a length of `16`.
37//!
38//! ### 1. Create Pointers
39//! Create pointers/get indexes for the current lookback position and current
40//! output position
41//!
42//! ```text
43//! DEADBEEF
44//!     ^   ^
45//!     LB  O
46//! ```
47//!
48//! ### 2. Copy from lookback to output
49//! Take the current byte at the lookback position, and copy it to the output
50//! position
51//!
52//! ```text
53//! DEADBEEFB
54//!     ^   ^
55//!     LB  O
56//! ```
57//!
58//! ### 3. Advance Pointers
59//! Increment both the lookback position and the output position
60//!
61//! ```text
62//! DEADBEEFB
63//!      ^   ^
64//!      LB  O
65//! ```
66//!
67//! ### 4. Repeat `length` times
68//! Steps 2 and 3 should repeat a total of times equal to the length of the
69//! control block.
70//!
71//! #### 4.A "What about when the lookback reaches already output?"
72//! In a case where the length overruns into the already written output, the
73//! process continues as normal and the output starts to repeat.
74//!
75//! in this example output, On the 5th iteration you will encounter this state:
76//! ```text
77//! DEADBEEFBEEF
78//!         ^   ^
79//!         LB  O
80//! ```
81//! The lookback is currently pointing at the first "B" that has been written.
82//! This isn't actually a problem, and step 2 and 3 should be followed as normal:
83//!
84//! ```text
85//! DEADBEEFBEEFB
86//!         ^   ^
87//!         LB  O
88//! ```
89//!
90//! ```text
91//! DEADBEEFBEEFB
92//!          ^   ^
93//!          LB  O
94//! ```
95//!
96//! ## Final Result
97//! Given that the algorithm was followed properly, the final output of the
98//! example input should be
99//! ```text
100//! DEADBEEFBEEFBEEFBEEFBEEF
101//! ```
102
103use std::cmp::max;
104use std::io::{Cursor, Read, Seek, Write};
105
106use crate::RefPackError;
107use crate::data::control::{Command, CommandKind};
108use crate::data::{copy_from_reader, rle_decode_fixed};
109use crate::format::Format;
110use crate::header::Header;
111
112// Returning the internal buffer is the fastest way to return the data
113// since that way the buffer doesn't have to be copied,
114// this function is used to reach optimal performance
115fn decompress_internal<F: Format>(
116    reader: &mut (impl Read + Seek),
117) -> Result<Vec<u8>, RefPackError> {
118    let Header {
119        decompressed_length,
120        ..
121    } = Header::read::<F::HeaderMode>(reader)?;
122
123    let mut decompression_buffer = vec![0; decompressed_length as usize];
124    let mut position = 0usize;
125
126    loop {
127        let command = Command::read(reader)?;
128
129        while position + command.literal as usize + command.length as usize
130            > decompression_buffer.len()
131        {
132            decompression_buffer.resize(
133                max(
134                    command.literal as usize + command.length as usize,
135                    decompression_buffer.len() * 2,
136                ),
137                0,
138            );
139        }
140
141        match command.kind {
142            CommandKind::Short | CommandKind::Medium | CommandKind::Long => {
143                if command.literal > 0 {
144                    position = copy_from_reader(
145                        &mut decompression_buffer,
146                        reader,
147                        position,
148                        command.literal as usize,
149                    )?;
150                }
151                position = rle_decode_fixed(
152                    &mut decompression_buffer,
153                    position,
154                    command.offset as usize,
155                    command.length as usize,
156                )
157                .map_err(|error| RefPackError::ControlError { error, position })?;
158            }
159            CommandKind::Literal => {
160                position = copy_from_reader(
161                    &mut decompression_buffer,
162                    reader,
163                    position,
164                    command.literal as usize,
165                )?;
166            }
167            CommandKind::Stop => {
168                position = copy_from_reader(
169                    &mut decompression_buffer,
170                    reader,
171                    position,
172                    command.literal as usize,
173                )?;
174                decompression_buffer.resize(position, 0);
175                break;
176            }
177        }
178    }
179
180    Ok(decompression_buffer)
181}
182
183/// Decompress `refpack` data. Accepts arbitrary `Read`s and `Write`s.
184///
185/// # Example
186///
187/// ```Rust
188/// use std::io::Cursor;
189///
190/// let mut input = Cursor::new(/* some refpack data */);
191/// let mut output = Cursor::new(Vec::new());
192///
193/// // decompress the input into the output
194/// refpack::compress(&mut input, &mut output);
195/// // output now contains the decompressed version of the input
196/// ```
197/// # Errors
198/// - [RefPackError::BadMagic]: Header magic was malformed, likely indicating
199///   either uncompressed data or attempting to decompress data in an incorrect
200///   format
201/// - [RefPackError::BadFlags]: Header magic was malformed, likely indicating
202///   either uncompressed data or attempting to decompress data in an incorrect
203///   format
204/// - [RefPackError::ControlError]: Invalid control code operation was attempted
205///   to be performed. This normally indicated corrupted or invalid refpack
206///   data
207/// - [RefPackError::Io]: Generic IO error occured while attempting to read or
208///   write data
209pub fn decompress<F: Format>(
210    reader: &mut (impl Read + Seek),
211    writer: &mut impl Write,
212) -> Result<(), RefPackError> {
213    let data = decompress_internal::<F>(reader)?;
214
215    writer.write_all(data.as_slice())?;
216    writer.flush()?;
217
218    Ok(())
219}
220
221/// Wrapped decompress function with a bit easier and cleaner of an API.
222/// Takes a slice of bytes and returns a Vec of byes
223/// In implementation this just creates `Cursor`s for the reader and writer and
224/// calls `decompress`
225///
226/// # Returns
227///
228/// A Result containing either `Vec<u8>` of the decompressed data or a
229/// `RefPackError`.
230///
231/// # Errors
232/// - [RefPackError::BadMagic]: Header was malformed, likely indicating either
233///   uncompressed data or attempting to decompress data in an incorrect format
234/// - [RefPackError::BadFlags]: Header magic was malformed, likely indicating
235///   either uncompressed data or attempting to decompress data in an incorrect
236///   format
237/// - [RefPackError::ControlError]: Invalid control code operation was attempted
238///   to be performed. This normally indicated corrupted or invalid refpack
239///   data
240/// - [RefPackError::Io]: Generic IO error occured while attempting to read or
241///   write data
242#[inline]
243pub fn easy_decompress<F: Format>(input: &[u8]) -> Result<Vec<u8>, RefPackError> {
244    let mut reader = Cursor::new(input);
245    decompress_internal::<F>(&mut reader)
246}