refpack/data/decompression.rs
1////////////////////////////////////////////////////////////////////////////////
2// This Source Code Form is subject to the terms of the Mozilla Public /
3// License, v. 2.0. If a copy of the MPL was not distributed with this /
4// file, You can obtain one at https://mozilla.org/MPL/2.0/. /
5// /
6////////////////////////////////////////////////////////////////////////////////
7
8//! Decompression parsing, algorithms, and functionality. Exact decompression
9//! algorithm is subject to change.
10//!
11//! Basic concept is to parse the header, identify key information such as
12//! decompressed header, then parse as a repeating stream of "command" blocks,
13//! consisting of a control code and any (if any) following literal bytes.
14//!
15//! Literal bytes should always be written before performing the control code
16//! operation.
17//!
18//! Control code operations are "run length encoded", which is a format of
19//! encoding where the "length" of a pointer-style control can be longer than
20//! the lookback length. This indicates that the section within the lookback
21//! region should repeat until the length is fulfilled
22//!
23//! # Example Byte-by-byte Algorithm
24//!
25//! This is *normally* handled via byte-by-byte decoding, where a lookback
26//! position and write position counter are incremented at the same time,
27//! however other faster implementations may be possible. This algorithm
28//! explanation is purely intended to illustrate the concept.
29//!
30//! ## Algorithm steps
31//!
32//! while in reality the possible allowed values are any arbitrary byte,
33//! characters are used to indicate whole bytes to simplify illustration
34//!
35//! Given the current decoded output of `DEADBEEF`, the next control encountered
36//! has a lookback of `4`, and a length of `16`.
37//!
38//! ### 1. Create Pointers
39//! Create pointers/get indexes for the current lookback position and current
40//! output position
41//!
42//! ```text
43//! DEADBEEF
44//! ^ ^
45//! LB O
46//! ```
47//!
48//! ### 2. Copy from lookback to output
49//! Take the current byte at the lookback position, and copy it to the output
50//! position
51//!
52//! ```text
53//! DEADBEEFB
54//! ^ ^
55//! LB O
56//! ```
57//!
58//! ### 3. Advance Pointers
59//! Increment both the lookback position and the output position
60//!
61//! ```text
62//! DEADBEEFB
63//! ^ ^
64//! LB O
65//! ```
66//!
67//! ### 4. Repeat `length` times
68//! Steps 2 and 3 should repeat a total of times equal to the length of the
69//! control block.
70//!
71//! #### 4.A "What about when the lookback reaches already output?"
72//! In a case where the length overruns into the already written output, the
73//! process continues as normal and the output starts to repeat.
74//!
75//! in this example output, On the 5th iteration you will encounter this state:
76//! ```text
77//! DEADBEEFBEEF
78//! ^ ^
79//! LB O
80//! ```
81//! The lookback is currently pointing at the first "B" that has been written.
82//! This isn't actually a problem, and step 2 and 3 should be followed as normal:
83//!
84//! ```text
85//! DEADBEEFBEEFB
86//! ^ ^
87//! LB O
88//! ```
89//!
90//! ```text
91//! DEADBEEFBEEFB
92//! ^ ^
93//! LB O
94//! ```
95//!
96//! ## Final Result
97//! Given that the algorithm was followed properly, the final output of the
98//! example input should be
99//! ```text
100//! DEADBEEFBEEFBEEFBEEFBEEF
101//! ```
102use std::io::{Cursor, Read, Seek, Write};
103
104use crate::RefPackError;
105use crate::data::control::{Command, CommandKind};
106use crate::data::{copy_from_reader, rle_decode_fixed};
107use crate::format::Format;
108use crate::header::Header;
109
110// Returning the internal buffer is the fastest way to return the data
111// since that way the buffer doesn't have to be copied,
112// this function is used to reach optimal performance
113fn decompress_internal<F: Format>(
114 reader: &mut (impl Read + Seek),
115) -> Result<Vec<u8>, RefPackError> {
116 let Header {
117 decompressed_length,
118 ..
119 } = Header::read::<F::HeaderMode>(reader)?;
120
121 let mut decompression_buffer = vec![0; decompressed_length as usize];
122 let mut position = 0usize;
123
124 loop {
125 let command = Command::read(reader)?;
126
127 match command.kind {
128 CommandKind::Short | CommandKind::Medium | CommandKind::Long => {
129 if command.literal > 0 {
130 position = copy_from_reader(
131 &mut decompression_buffer,
132 reader,
133 position,
134 command.literal as usize,
135 )?;
136 }
137 while position + command.length as usize > decompression_buffer.len() {
138 decompression_buffer.resize(decompression_buffer.len() * 2, 0);
139 }
140 position = rle_decode_fixed(
141 &mut decompression_buffer,
142 position,
143 command.offset as usize,
144 command.length as usize,
145 )
146 .map_err(|error| RefPackError::ControlError { error, position })?;
147 }
148 CommandKind::Literal => {
149 position = copy_from_reader(
150 &mut decompression_buffer,
151 reader,
152 position,
153 command.literal as usize,
154 )?;
155 }
156 CommandKind::Stop => {
157 position = copy_from_reader(
158 &mut decompression_buffer,
159 reader,
160 position,
161 command.literal as usize,
162 )?;
163 decompression_buffer.resize(position, 0);
164 break;
165 }
166 }
167 }
168
169 Ok(decompression_buffer)
170}
171
172/// Decompress `refpack` data. Accepts arbitrary `Read`s and `Write`s.
173///
174/// # Example
175///
176/// ```Rust
177/// use std::io::Cursor;
178///
179/// let mut input = Cursor::new(/* some refpack data */);
180/// let mut output = Cursor::new(Vec::new());
181///
182/// // decompress the input into the output
183/// refpack::compress(&mut input, &mut output);
184/// // output now contains the decompressed version of the input
185/// ```
186/// # Errors
187/// - [RefPackError::BadMagic]: Header magic was malformed, likely indicating
188/// either uncompressed data or attempting to decompress data in an incorrect
189/// format
190/// - [RefPackError::BadFlags]: Header magic was malformed, likely indicating
191/// either uncompressed data or attempting to decompress data in an incorrect
192/// format
193/// - [RefPackError::ControlError]: Invalid control code operation was attempted
194/// to be performed. This normally indicated corrupted or invalid refpack
195/// data
196/// - [RefPackError::Io]: Generic IO error occured while attempting to read or
197/// write data
198pub fn decompress<F: Format>(
199 reader: &mut (impl Read + Seek),
200 writer: &mut impl Write,
201) -> Result<(), RefPackError> {
202 let data = decompress_internal::<F>(reader)?;
203
204 writer.write_all(data.as_slice())?;
205 writer.flush()?;
206
207 Ok(())
208}
209
210/// Wrapped decompress function with a bit easier and cleaner of an API.
211/// Takes a slice of bytes and returns a Vec of byes
212/// In implementation this just creates `Cursor`s for the reader and writer and
213/// calls `decompress`
214///
215/// # Returns
216///
217/// A Result containing either `Vec<u8>` of the decompressed data or a
218/// `RefPackError`.
219///
220/// # Errors
221/// - [RefPackError::BadMagic]: Header was malformed, likely indicating either
222/// uncompressed data or attempting to decompress data in an incorrect format
223/// - [RefPackError::BadFlags]: Header magic was malformed, likely indicating
224/// either uncompressed data or attempting to decompress data in an incorrect
225/// format
226/// - [RefPackError::ControlError]: Invalid control code operation was attempted
227/// to be performed. This normally indicated corrupted or invalid refpack
228/// data
229/// - [RefPackError::Io]: Generic IO error occured while attempting to read or
230/// write data
231#[inline]
232pub fn easy_decompress<F: Format>(input: &[u8]) -> Result<Vec<u8>, RefPackError> {
233 let mut reader = Cursor::new(input);
234 decompress_internal::<F>(&mut reader)
235}