explode/explode.rs
1use super::codes::{DecodeResult, Decoder};
2use super::{tables, Error, Result};
3
4use arraydeque::ArrayDeque;
5
6/// Low-level decompression interface.
7///
8/// This provides low-level access to the decompression algorithm. If
9/// possible, prefer using [`explode`](fn.explode.html) or
10/// [`ExplodeReader`](struct.ExplodeReader.html) as they are simpler
11/// to use.
12///
13/// The usual control flow with this interface is to provide a buffer
14/// to decompress into with [`with_buffer`](#method.with_buffer), and
15/// then to feed the resulting
16/// [`ExplodeBuffer`](struct.ExplodeBuffer.html) handle with bytes
17/// until it returns `Ok`. Then you can retrieve the filled portion of
18/// the buffer containing your decompressed data.
19///
20/// ```
21/// # fn main() -> explode::Result<()> {
22/// use explode::{Error, Explode};
23///
24/// // some test data to decompress
25/// let input = vec![0x00, 0x04, 0x82, 0x24, 0x25, 0x8f, 0x80, 0x7f];
26/// // which byte we are currently feeding in
27/// let mut i = 0;
28/// // our output buffer
29/// let mut outbuf: [u8; 256] = [0; 256];
30///
31/// // decompress
32/// let mut ex = explode::Explode::new();
33/// let mut exbuf = ex.with_buffer(&mut outbuf);
34/// // loop while we have more input, and decompression is not done
35/// while i < input.len() && !exbuf.done() {
36/// // note we feed exbuf the *same byte* every loop, until it requests
37/// // more input with Error::IncompleteInput.
38/// match exbuf.feed(input[i]) {
39/// Ok(()) => {
40/// // buffer is full. use exbuf.get() to get the filled portion
41/// println!("{:?}", exbuf.get());
42/// // compression may not be finished, so reset and loop again
43/// exbuf.reset();
44/// }
45///
46/// Err(Error::IncompleteInput) => {
47/// // advance our input cursor
48/// i += 1;
49/// }
50///
51/// Err(e) => {
52/// // any other error is a sign the input is invalid
53/// panic!("{:?}", e);
54/// }
55/// }
56/// }
57///
58/// if !exbuf.done() {
59/// // we ran out of input, but decompression isn't done!
60/// panic!("unexpected end of input");
61/// }
62/// # Ok(()) }
63/// ```
64///
65/// Be careful that the input byte you provide to
66/// [`ExplodeBuffer::feed`](struct.ExplodeBuffer.html#method.feed)
67/// only changes when requested by
68/// [`Error::IncompleteInput`](enum.Error.html#variant.IncompleteInput). If
69/// the input changes at any other time, decompression will fail or
70/// produce incorrect output.
71#[derive(Debug)]
72pub struct Explode {
73 state: ExplodeState<Decoder<'static, &'static [u8]>>,
74
75 // header info
76 lit: Option<u8>,
77 dict: Option<u8>,
78
79 // input management
80 input: ExplodeInput,
81
82 // store our window (which cannot exceed 4096 bytes)
83 window: ArrayDeque<[u8; 4096], arraydeque::behavior::Wrapping>,
84}
85
86// hold a byte until it's ready to use
87#[derive(Debug)]
88enum ExplodeInputState {
89 Available(u8),
90 Taken,
91 Waiting,
92}
93
94// help manage the bitstream input
95#[derive(Debug)]
96struct ExplodeInput {
97 next: ExplodeInputState,
98
99 // store unused bits read in
100 bitbuf: u32,
101 bitcount: u8,
102}
103
104// explode state. D is the Huffman decoder type
105#[derive(Debug)]
106enum ExplodeState<D> {
107 Start,
108 Length { decoder: D },
109 LengthExtra { symbol: usize },
110 Distance { len: usize, decoder: D },
111 DistanceExtra { len: usize, symbol: usize },
112 Copy { idx: usize, len: usize },
113 Literal,
114 LiteralCoded { decoder: D },
115 End,
116}
117
118/// A handle to feed input to the decompressor.
119///
120/// This is the primary interface for low-level decompression. You can
121/// get an instance of this by providing an output buffer to
122/// [`Explode::with_buffer`](struct.Explode.html#method.with_buffer).
123///
124/// For a high-level example of how to use this interface, see
125/// [`Explode`](struct.Explode.html).
126#[derive(Debug)]
127pub struct ExplodeBuffer<'a> {
128 parent: &'a mut Explode,
129 buf: &'a mut [u8],
130 pos: usize,
131}
132
133impl ExplodeInputState {
134 fn feed(&mut self, value: u8) {
135 if let ExplodeInputState::Waiting = self {
136 *self = ExplodeInputState::Available(value);
137 }
138 }
139
140 fn take(&mut self) -> Result<u8> {
141 match self {
142 ExplodeInputState::Available(value) => {
143 let v = *value;
144 *self = ExplodeInputState::Taken;
145 Ok(v)
146 }
147 ExplodeInputState::Taken => {
148 *self = ExplodeInputState::Waiting;
149 Err(Error::IncompleteInput)
150 }
151 ExplodeInputState::Waiting => {
152 panic!("double take");
153 }
154 }
155 }
156}
157
158impl ExplodeInput {
159 // read n bits
160 fn bits(&mut self, n: u8) -> Result<u32> {
161 while self.bitcount < n {
162 self.bitbuf |= (self.next.take()? as u32) << self.bitcount;
163 self.bitcount += 8;
164 }
165
166 let val = self.bitbuf;
167 self.bitbuf >>= n;
168 self.bitcount -= n;
169
170 Ok(val & ((1 << n) - 1))
171 }
172
173 // decode using a table
174 fn decode(&mut self, d: &mut Decoder<&'static [u8]>) -> Result<u8> {
175 loop {
176 // codes in this format are inverted from canonical
177 let bit = self.bits(1)? != 1;
178 match d.feed(bit) {
179 DecodeResult::Incomplete => continue,
180 DecodeResult::Invalid => panic!(
181 "Codebooks are under-subscribed but should not be!"
182 ),
183 DecodeResult::Ok(v) => return Ok(v),
184 }
185 }
186 }
187}
188
189impl<'a> ExplodeBuffer<'a> {
190 /// Feed in a byte `input` to decompress.
191 ///
192 /// Signals a full output buffer by returning `Ok(())`. You can
193 /// then get a reference to the full buffer with
194 /// [`get`](#method.get), and reset the output buffer to empty
195 /// with [`reset`](#method.reset).
196 ///
197 /// Note that you should feed in the same byte *repeatedly* to
198 /// this function, until it signals it is ready for more input by
199 /// returning
200 /// [`Error::IncompleteInput`](enum.Error.html#variant.IncompleteInput).
201 /// Doing anything else will result in a decompression failure or
202 /// bad output.
203 pub fn feed(&mut self, input: u8) -> Result<()> {
204 // lengths are funny -- base val + extra bits
205 static LEN_BASE: &[usize] =
206 &[3, 2, 4, 5, 6, 7, 8, 9, 10, 12, 16, 24, 40, 72, 136, 264];
207 static LEN_EXTRA: &[u8] =
208 &[0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8];
209
210 self.parent.input.next.feed(input);
211
212 // first byte is 0 if literals are uncoded, or 1 if coded
213 let lit = if let Some(lit) = self.parent.lit {
214 lit
215 } else {
216 let lit = self.parent.input.bits(8)? as u8;
217 if lit > 1 {
218 return Err(Error::BadLiteralFlag);
219 }
220 self.parent.lit = Some(lit);
221 lit
222 };
223
224 // second byte is 4, 5, or 6 for # extra bits in distance code
225 // (distance code is 6 + this bits total)
226 let dict = if let Some(dict) = self.parent.dict {
227 dict
228 } else {
229 let dict = self.parent.input.bits(8)? as u8;
230 if dict < 4 || dict > 6 {
231 return Err(Error::BadDictionary);
232 }
233 self.parent.dict = Some(dict);
234 dict
235 };
236
237 // decode literals and length/distance pairs
238 // state machine rules:
239 // each state may only call bits() once
240 // and decode() must store the HuffmanExplode in the state
241 loop {
242 use ExplodeState::*;
243 match self.parent.state {
244 Start => {
245 if self.parent.input.bits(1)? > 0 {
246 // this is a length/distance pair. length first.
247 self.parent.state = Length {
248 decoder: tables::LENGTH.decoder(),
249 };
250 } else {
251 // this is a literal
252 if lit > 0 {
253 self.parent.state = LiteralCoded {
254 decoder: tables::LITERAL.decoder(),
255 };
256 } else {
257 self.parent.state = Literal;
258 }
259 }
260 }
261
262 Length { ref mut decoder } => {
263 let symbol = self.parent.input.decode(decoder)? as usize;
264 self.parent.state = LengthExtra { symbol };
265 }
266
267 LengthExtra { symbol } => {
268 let len = LEN_BASE[symbol]
269 + self.parent.input.bits(LEN_EXTRA[symbol])? as usize;
270 if len == 519 {
271 // end code
272 self.parent.state = End;
273 } else {
274 // distance next
275 self.parent.state = Distance {
276 len,
277 decoder: tables::DISTANCE.decoder(),
278 };
279 }
280 }
281
282 Distance {
283 len,
284 ref mut decoder,
285 } => {
286 let symbol = self.parent.input.decode(decoder)? as usize;
287 self.parent.state = DistanceExtra { len, symbol };
288 }
289
290 DistanceExtra { len, symbol } => {
291 let extra_bits = if len == 2 { 2 } else { dict };
292 let mut dist =
293 self.parent.input.bits(extra_bits)? as usize + 1;
294 dist += symbol << extra_bits;
295
296 if dist > self.parent.window.len() {
297 // too far back
298 return Err(Error::BadDistance);
299 }
300
301 self.parent.state = Copy {
302 idx: self.parent.window.len() - dist,
303 len,
304 };
305 }
306
307 Copy {
308 ref mut idx,
309 ref mut len,
310 } => {
311 while *len > 0 {
312 if self.pos >= self.buf.len() {
313 // not enough room
314 return Ok(());
315 }
316
317 let value = self.parent.window[*idx];
318 *len -= 1;
319 if !self.parent.window.is_full() {
320 *idx += 1;
321 }
322
323 self.parent.window.push_back(value);
324 self.buf[self.pos] = value;
325 self.pos += 1;
326 }
327 self.parent.state = Start;
328 }
329
330 Literal => {
331 if self.pos >= self.buf.len() {
332 // not enough room
333 return Ok(());
334 }
335 let value = self.parent.input.bits(8)? as u8;
336 self.parent.window.push_back(value);
337 self.buf[self.pos] = value;
338 self.pos += 1;
339 self.parent.state = Start;
340 }
341
342 LiteralCoded { ref mut decoder } => {
343 if self.pos >= self.buf.len() {
344 // not enough room
345 return Ok(());
346 }
347 let value = self.parent.input.decode(decoder)?;
348 self.parent.window.push_back(value);
349 self.buf[self.pos] = value;
350 self.pos += 1;
351 self.parent.state = Start;
352 }
353
354 End => {
355 return Ok(());
356 }
357 }
358 }
359 }
360
361 /// Get a reference to the filled portion of the output buffer.
362 ///
363 /// This is usually called after [`feed`](#method.feed) returns `Ok(())`.
364 pub fn get(&self) -> &[u8] {
365 &self.buf[..self.pos]
366 }
367
368 /// Return the amount of output produced so far.
369 pub fn len(&self) -> usize {
370 self.pos
371 }
372
373 /// Reset the output buffer to empty.
374 ///
375 /// Note that this does *not* reset the entire decompressor state.
376 pub fn reset(&mut self) {
377 self.pos = 0;
378 }
379
380 /// Returns true if decompression is finished.
381 ///
382 /// This does the same thing as
383 /// [`Explode::done`](struct.Explode.html#method.done) but is
384 /// usable while a `ExplodeBuffer` is still in scope.
385 pub fn done(&self) -> bool {
386 self.parent.done()
387 }
388}
389
390impl Explode {
391 /// Create a new Explode decompression state.
392 pub fn new() -> Self {
393 Explode {
394 state: ExplodeState::Start,
395 lit: None,
396 dict: None,
397 input: ExplodeInput {
398 next: ExplodeInputState::Waiting,
399 bitbuf: 0,
400 bitcount: 0,
401 },
402 window: ArrayDeque::new(),
403 }
404 }
405
406 /// Provide a buffer to decompress into.
407 ///
408 /// This returns a [`ExplodeBuffer`](struct.ExplodeBuffer.html)
409 /// handle that is used for feeding input to decompress and other
410 /// operations.
411 pub fn with_buffer<'a>(
412 &'a mut self,
413 buf: &'a mut [u8],
414 ) -> ExplodeBuffer<'a> {
415 ExplodeBuffer {
416 parent: self,
417 buf,
418 pos: 0,
419 }
420 }
421
422 /// Returns true if decompression is finished.
423 ///
424 /// If this function can't be used because a
425 /// [`ExplodeBuffer`](struct.ExplodeBuffer.html) is currently
426 /// borrowing this object mutably, you can use
427 /// [`ExplodeBuffer::done`](struct.ExplodeBuffer.html#method.done)
428 /// instead.
429 pub fn done(&self) -> bool {
430 if let ExplodeState::End = self.state {
431 true
432 } else {
433 false
434 }
435 }
436}
437
438/// Decompress a block of `data` in memory, using the given auxiliary
439/// buffer `buf`.
440///
441/// This gives you control over the size of the internal buffer
442/// used. If you do not need that control, use
443/// [`explode`](fn.explode.html) instead.
444///
445/// ```
446/// # fn main() -> explode::Result<()> {
447/// let mut buf: [u8; 1] = [0; 1];
448/// let bytes = vec![0x00, 0x04, 0x82, 0x24, 0x25, 0x8f, 0x80, 0x7f];
449/// let result = explode::explode_with_buffer(&bytes, &mut buf)?;
450/// assert_eq!(result, "AIAIAIAIAIAIA".as_bytes());
451/// # Ok(()) }
452/// ```
453pub fn explode_with_buffer(data: &[u8], buf: &mut [u8]) -> Result<Vec<u8>> {
454 let mut dec = Explode::new();
455 let mut i = 0;
456 let mut out = Vec::with_capacity(buf.len());
457 loop {
458 let mut decbuf = dec.with_buffer(buf);
459 while i < data.len() {
460 match decbuf.feed(data[i]) {
461 Ok(()) => {
462 let decompressed = decbuf.get();
463 out.extend_from_slice(decompressed);
464 if decbuf.done() {
465 // we're done
466 return Ok(out);
467 }
468 decbuf.reset();
469 }
470
471 Err(Error::IncompleteInput) => {
472 i += 1;
473 continue;
474 }
475
476 Err(e) => return Err(e),
477 }
478 }
479
480 // out of input
481 return Err(Error::IncompleteInput);
482 }
483}
484
485/// Decompress a block of `data` in memory.
486///
487/// ```
488/// # fn main() -> explode::Result<()> {
489/// let bytes = vec![0x00, 0x04, 0x82, 0x24, 0x25, 0x8f, 0x80, 0x7f];
490/// let result = explode::explode(&bytes)?;
491/// assert_eq!(result, "AIAIAIAIAIAIA".as_bytes());
492/// # Ok(()) }
493/// ```
494///
495/// This function will internally decompress the given memory in
496/// blocks of 4096 bytes. If you wish to use a different block size,
497/// see [`explode_with_buffer`](fn.explode_with_buffer.html).
498pub fn explode(data: &[u8]) -> Result<Vec<u8>> {
499 let mut buf = [0; 4096];
500 explode_with_buffer(data, &mut buf)
501}
502
503#[cfg(test)]
504mod tests {
505 use super::{explode, explode_with_buffer, Error};
506 use crate::examples::EXAMPLES;
507
508 #[test]
509 fn explode_simple() {
510 for (encoded, decoded) in EXAMPLES {
511 let ours = explode(encoded).unwrap();
512 assert_eq!(*decoded, &ours[..]);
513 }
514 }
515
516 #[test]
517 fn explode_small() {
518 let mut buf = [0; 1];
519 for (encoded, decoded) in EXAMPLES {
520 let ours = explode_with_buffer(encoded, &mut buf).unwrap();
521 assert_eq!(*decoded, &ours[..]);
522 }
523 }
524
525 #[test]
526 fn explode_incomplete() {
527 for (encoded, _) in EXAMPLES {
528 let ours = explode(&encoded[..encoded.len() - 1]);
529 match ours {
530 Err(Error::IncompleteInput) => (),
531 _ => panic!("incorrectly parsed incomplete input"),
532 }
533 }
534 }
535
536 #[test]
537 fn explode_extra() {
538 for (encoded, decoded) in EXAMPLES {
539 let mut encodedplus: Vec<u8> = encoded.iter().cloned().collect();
540 encodedplus.push(42);
541 let ours = explode(&encodedplus).unwrap();
542 assert_eq!(*decoded, &ours[..]);
543 }
544 }
545}