Skip to main content

explode/
explode.rs

1use super::codes::{DecodeResult, Decoder};
2use super::{tables, Error, Result};
3
4use arraydeque::ArrayDeque;
5
6/// Low-level decompression interface.
7///
8/// This provides low-level access to the decompression algorithm. If
9/// possible, prefer using [`explode`](fn.explode.html) or
10/// [`ExplodeReader`](struct.ExplodeReader.html) as they are simpler
11/// to use.
12///
13/// The usual control flow with this interface is to provide a buffer
14/// to decompress into with [`with_buffer`](#method.with_buffer), and
15/// then to feed the resulting
16/// [`ExplodeBuffer`](struct.ExplodeBuffer.html) handle with bytes
17/// until it returns `Ok`. Then you can retrieve the filled portion of
18/// the buffer containing your decompressed data.
19///
20/// ```
21/// # fn main() -> explode::Result<()> {
22/// use explode::{Error, Explode};
23///
24/// // some test data to decompress
25/// let input = vec![0x00, 0x04, 0x82, 0x24, 0x25, 0x8f, 0x80, 0x7f];
26/// // which byte we are currently feeding in
27/// let mut i = 0;
28/// // our output buffer
29/// let mut outbuf: [u8; 256] = [0; 256];
30///
31/// // decompress
32/// let mut ex = explode::Explode::new();
33/// let mut exbuf = ex.with_buffer(&mut outbuf);
34/// // loop while we have more input, and decompression is not done
35/// while i < input.len() && !exbuf.done() {
36///     // note we feed exbuf the *same byte* every loop, until it requests
37///     // more input with Error::IncompleteInput.
38///     match exbuf.feed(input[i]) {
39///         Ok(()) => {
40///             // buffer is full. use exbuf.get() to get the filled portion
41///             println!("{:?}", exbuf.get());
42///             // compression may not be finished, so reset and loop again
43///             exbuf.reset();
44///         }
45///
46///         Err(Error::IncompleteInput) => {
47///             // advance our input cursor
48///             i += 1;
49///         }
50///
51///         Err(e) => {
52///             // any other error is a sign the input is invalid
53///             panic!("{:?}", e);
54///         }
55///     }
56/// }
57///
58/// if !exbuf.done() {
59///     // we ran out of input, but decompression isn't done!
60///     panic!("unexpected end of input");
61/// }
62/// # Ok(()) }
63/// ```
64///
65/// Be careful that the input byte you provide to
66/// [`ExplodeBuffer::feed`](struct.ExplodeBuffer.html#method.feed)
67/// only changes when requested by
68/// [`Error::IncompleteInput`](enum.Error.html#variant.IncompleteInput). If
69/// the input changes at any other time, decompression will fail or
70/// produce incorrect output.
71#[derive(Debug)]
72pub struct Explode {
73    state: ExplodeState<Decoder<'static, &'static [u8]>>,
74
75    // header info
76    lit: Option<u8>,
77    dict: Option<u8>,
78
79    // input management
80    input: ExplodeInput,
81
82    // store our window (which cannot exceed 4096 bytes)
83    window: ArrayDeque<[u8; 4096], arraydeque::behavior::Wrapping>,
84}
85
86// hold a byte until it's ready to use
87#[derive(Debug)]
88enum ExplodeInputState {
89    Available(u8),
90    Taken,
91    Waiting,
92}
93
94// help manage the bitstream input
95#[derive(Debug)]
96struct ExplodeInput {
97    next: ExplodeInputState,
98
99    // store unused bits read in
100    bitbuf: u32,
101    bitcount: u8,
102}
103
104// explode state. D is the Huffman decoder type
105#[derive(Debug)]
106enum ExplodeState<D> {
107    Start,
108    Length { decoder: D },
109    LengthExtra { symbol: usize },
110    Distance { len: usize, decoder: D },
111    DistanceExtra { len: usize, symbol: usize },
112    Copy { idx: usize, len: usize },
113    Literal,
114    LiteralCoded { decoder: D },
115    End,
116}
117
118/// A handle to feed input to the decompressor.
119///
120/// This is the primary interface for low-level decompression. You can
121/// get an instance of this by providing an output buffer to
122/// [`Explode::with_buffer`](struct.Explode.html#method.with_buffer).
123///
124/// For a high-level example of how to use this interface, see
125/// [`Explode`](struct.Explode.html).
126#[derive(Debug)]
127pub struct ExplodeBuffer<'a> {
128    parent: &'a mut Explode,
129    buf: &'a mut [u8],
130    pos: usize,
131}
132
133impl ExplodeInputState {
134    fn feed(&mut self, value: u8) {
135        if let ExplodeInputState::Waiting = self {
136            *self = ExplodeInputState::Available(value);
137        }
138    }
139
140    fn take(&mut self) -> Result<u8> {
141        match self {
142            ExplodeInputState::Available(value) => {
143                let v = *value;
144                *self = ExplodeInputState::Taken;
145                Ok(v)
146            }
147            ExplodeInputState::Taken => {
148                *self = ExplodeInputState::Waiting;
149                Err(Error::IncompleteInput)
150            }
151            ExplodeInputState::Waiting => {
152                panic!("double take");
153            }
154        }
155    }
156}
157
158impl ExplodeInput {
159    // read n bits
160    fn bits(&mut self, n: u8) -> Result<u32> {
161        while self.bitcount < n {
162            self.bitbuf |= (self.next.take()? as u32) << self.bitcount;
163            self.bitcount += 8;
164        }
165
166        let val = self.bitbuf;
167        self.bitbuf >>= n;
168        self.bitcount -= n;
169
170        Ok(val & ((1 << n) - 1))
171    }
172
173    // decode using a table
174    fn decode(&mut self, d: &mut Decoder<&'static [u8]>) -> Result<u8> {
175        loop {
176            // codes in this format are inverted from canonical
177            let bit = self.bits(1)? != 1;
178            match d.feed(bit) {
179                DecodeResult::Incomplete => continue,
180                DecodeResult::Invalid => panic!(
181                    "Codebooks are under-subscribed but should not be!"
182                ),
183                DecodeResult::Ok(v) => return Ok(v),
184            }
185        }
186    }
187}
188
189impl<'a> ExplodeBuffer<'a> {
190    /// Feed in a byte `input` to decompress.
191    ///
192    /// Signals a full output buffer by returning `Ok(())`. You can
193    /// then get a reference to the full buffer with
194    /// [`get`](#method.get), and reset the output buffer to empty
195    /// with [`reset`](#method.reset).
196    ///
197    /// Note that you should feed in the same byte *repeatedly* to
198    /// this function, until it signals it is ready for more input by
199    /// returning
200    /// [`Error::IncompleteInput`](enum.Error.html#variant.IncompleteInput).
201    /// Doing anything else will result in a decompression failure or
202    /// bad output.
203    pub fn feed(&mut self, input: u8) -> Result<()> {
204        // lengths are funny -- base val + extra bits
205        static LEN_BASE: &[usize] =
206            &[3, 2, 4, 5, 6, 7, 8, 9, 10, 12, 16, 24, 40, 72, 136, 264];
207        static LEN_EXTRA: &[u8] =
208            &[0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8];
209
210        self.parent.input.next.feed(input);
211
212        // first byte is 0 if literals are uncoded, or 1 if coded
213        let lit = if let Some(lit) = self.parent.lit {
214            lit
215        } else {
216            let lit = self.parent.input.bits(8)? as u8;
217            if lit > 1 {
218                return Err(Error::BadLiteralFlag);
219            }
220            self.parent.lit = Some(lit);
221            lit
222        };
223
224        // second byte is 4, 5, or 6 for # extra bits in distance code
225        // (distance code is 6 + this bits total)
226        let dict = if let Some(dict) = self.parent.dict {
227            dict
228        } else {
229            let dict = self.parent.input.bits(8)? as u8;
230            if dict < 4 || dict > 6 {
231                return Err(Error::BadDictionary);
232            }
233            self.parent.dict = Some(dict);
234            dict
235        };
236
237        // decode literals and length/distance pairs
238        // state machine rules:
239        // each state may only call bits() once
240        // and decode() must store the HuffmanExplode in the state
241        loop {
242            use ExplodeState::*;
243            match self.parent.state {
244                Start => {
245                    if self.parent.input.bits(1)? > 0 {
246                        // this is a length/distance pair. length first.
247                        self.parent.state = Length {
248                            decoder: tables::LENGTH.decoder(),
249                        };
250                    } else {
251                        // this is a literal
252                        if lit > 0 {
253                            self.parent.state = LiteralCoded {
254                                decoder: tables::LITERAL.decoder(),
255                            };
256                        } else {
257                            self.parent.state = Literal;
258                        }
259                    }
260                }
261
262                Length { ref mut decoder } => {
263                    let symbol = self.parent.input.decode(decoder)? as usize;
264                    self.parent.state = LengthExtra { symbol };
265                }
266
267                LengthExtra { symbol } => {
268                    let len = LEN_BASE[symbol]
269                        + self.parent.input.bits(LEN_EXTRA[symbol])? as usize;
270                    if len == 519 {
271                        // end code
272                        self.parent.state = End;
273                    } else {
274                        // distance next
275                        self.parent.state = Distance {
276                            len,
277                            decoder: tables::DISTANCE.decoder(),
278                        };
279                    }
280                }
281
282                Distance {
283                    len,
284                    ref mut decoder,
285                } => {
286                    let symbol = self.parent.input.decode(decoder)? as usize;
287                    self.parent.state = DistanceExtra { len, symbol };
288                }
289
290                DistanceExtra { len, symbol } => {
291                    let extra_bits = if len == 2 { 2 } else { dict };
292                    let mut dist =
293                        self.parent.input.bits(extra_bits)? as usize + 1;
294                    dist += symbol << extra_bits;
295
296                    if dist > self.parent.window.len() {
297                        // too far back
298                        return Err(Error::BadDistance);
299                    }
300
301                    self.parent.state = Copy {
302                        idx: self.parent.window.len() - dist,
303                        len,
304                    };
305                }
306
307                Copy {
308                    ref mut idx,
309                    ref mut len,
310                } => {
311                    while *len > 0 {
312                        if self.pos >= self.buf.len() {
313                            // not enough room
314                            return Ok(());
315                        }
316
317                        let value = self.parent.window[*idx];
318                        *len -= 1;
319                        if !self.parent.window.is_full() {
320                            *idx += 1;
321                        }
322
323                        self.parent.window.push_back(value);
324                        self.buf[self.pos] = value;
325                        self.pos += 1;
326                    }
327                    self.parent.state = Start;
328                }
329
330                Literal => {
331                    if self.pos >= self.buf.len() {
332                        // not enough room
333                        return Ok(());
334                    }
335                    let value = self.parent.input.bits(8)? as u8;
336                    self.parent.window.push_back(value);
337                    self.buf[self.pos] = value;
338                    self.pos += 1;
339                    self.parent.state = Start;
340                }
341
342                LiteralCoded { ref mut decoder } => {
343                    if self.pos >= self.buf.len() {
344                        // not enough room
345                        return Ok(());
346                    }
347                    let value = self.parent.input.decode(decoder)?;
348                    self.parent.window.push_back(value);
349                    self.buf[self.pos] = value;
350                    self.pos += 1;
351                    self.parent.state = Start;
352                }
353
354                End => {
355                    return Ok(());
356                }
357            }
358        }
359    }
360
361    /// Get a reference to the filled portion of the output buffer.
362    ///
363    /// This is usually called after [`feed`](#method.feed) returns `Ok(())`.
364    pub fn get(&self) -> &[u8] {
365        &self.buf[..self.pos]
366    }
367
368    /// Return the amount of output produced so far.
369    pub fn len(&self) -> usize {
370        self.pos
371    }
372
373    /// Reset the output buffer to empty.
374    ///
375    /// Note that this does *not* reset the entire decompressor state.
376    pub fn reset(&mut self) {
377        self.pos = 0;
378    }
379
380    /// Returns true if decompression is finished.
381    ///
382    /// This does the same thing as
383    /// [`Explode::done`](struct.Explode.html#method.done) but is
384    /// usable while a `ExplodeBuffer` is still in scope.
385    pub fn done(&self) -> bool {
386        self.parent.done()
387    }
388}
389
390impl Explode {
391    /// Create a new Explode decompression state.
392    pub fn new() -> Self {
393        Explode {
394            state: ExplodeState::Start,
395            lit: None,
396            dict: None,
397            input: ExplodeInput {
398                next: ExplodeInputState::Waiting,
399                bitbuf: 0,
400                bitcount: 0,
401            },
402            window: ArrayDeque::new(),
403        }
404    }
405
406    /// Provide a buffer to decompress into.
407    ///
408    /// This returns a [`ExplodeBuffer`](struct.ExplodeBuffer.html)
409    /// handle that is used for feeding input to decompress and other
410    /// operations.
411    pub fn with_buffer<'a>(
412        &'a mut self,
413        buf: &'a mut [u8],
414    ) -> ExplodeBuffer<'a> {
415        ExplodeBuffer {
416            parent: self,
417            buf,
418            pos: 0,
419        }
420    }
421
422    /// Returns true if decompression is finished.
423    ///
424    /// If this function can't be used because a
425    /// [`ExplodeBuffer`](struct.ExplodeBuffer.html) is currently
426    /// borrowing this object mutably, you can use
427    /// [`ExplodeBuffer::done`](struct.ExplodeBuffer.html#method.done)
428    /// instead.
429    pub fn done(&self) -> bool {
430        if let ExplodeState::End = self.state {
431            true
432        } else {
433            false
434        }
435    }
436}
437
438/// Decompress a block of `data` in memory, using the given auxiliary
439/// buffer `buf`.
440///
441/// This gives you control over the size of the internal buffer
442/// used. If you do not need that control, use
443/// [`explode`](fn.explode.html) instead.
444///
445/// ```
446/// # fn main() -> explode::Result<()> {
447/// let mut buf: [u8; 1] = [0; 1];
448/// let bytes = vec![0x00, 0x04, 0x82, 0x24, 0x25, 0x8f, 0x80, 0x7f];
449/// let result = explode::explode_with_buffer(&bytes, &mut buf)?;
450/// assert_eq!(result, "AIAIAIAIAIAIA".as_bytes());
451/// # Ok(()) }
452/// ```
453pub fn explode_with_buffer(data: &[u8], buf: &mut [u8]) -> Result<Vec<u8>> {
454    let mut dec = Explode::new();
455    let mut i = 0;
456    let mut out = Vec::with_capacity(buf.len());
457    loop {
458        let mut decbuf = dec.with_buffer(buf);
459        while i < data.len() {
460            match decbuf.feed(data[i]) {
461                Ok(()) => {
462                    let decompressed = decbuf.get();
463                    out.extend_from_slice(decompressed);
464                    if decbuf.done() {
465                        // we're done
466                        return Ok(out);
467                    }
468                    decbuf.reset();
469                }
470
471                Err(Error::IncompleteInput) => {
472                    i += 1;
473                    continue;
474                }
475
476                Err(e) => return Err(e),
477            }
478        }
479
480        // out of input
481        return Err(Error::IncompleteInput);
482    }
483}
484
485/// Decompress a block of `data` in memory.
486///
487/// ```
488/// # fn main() -> explode::Result<()> {
489/// let bytes = vec![0x00, 0x04, 0x82, 0x24, 0x25, 0x8f, 0x80, 0x7f];
490/// let result = explode::explode(&bytes)?;
491/// assert_eq!(result, "AIAIAIAIAIAIA".as_bytes());
492/// # Ok(()) }
493/// ```
494///
495/// This function will internally decompress the given memory in
496/// blocks of 4096 bytes. If you wish to use a different block size,
497/// see [`explode_with_buffer`](fn.explode_with_buffer.html).
498pub fn explode(data: &[u8]) -> Result<Vec<u8>> {
499    let mut buf = [0; 4096];
500    explode_with_buffer(data, &mut buf)
501}
502
503#[cfg(test)]
504mod tests {
505    use super::{explode, explode_with_buffer, Error};
506    use crate::examples::EXAMPLES;
507
508    #[test]
509    fn explode_simple() {
510        for (encoded, decoded) in EXAMPLES {
511            let ours = explode(encoded).unwrap();
512            assert_eq!(*decoded, &ours[..]);
513        }
514    }
515
516    #[test]
517    fn explode_small() {
518        let mut buf = [0; 1];
519        for (encoded, decoded) in EXAMPLES {
520            let ours = explode_with_buffer(encoded, &mut buf).unwrap();
521            assert_eq!(*decoded, &ours[..]);
522        }
523    }
524
525    #[test]
526    fn explode_incomplete() {
527        for (encoded, _) in EXAMPLES {
528            let ours = explode(&encoded[..encoded.len() - 1]);
529            match ours {
530                Err(Error::IncompleteInput) => (),
531                _ => panic!("incorrectly parsed incomplete input"),
532            }
533        }
534    }
535
536    #[test]
537    fn explode_extra() {
538        for (encoded, decoded) in EXAMPLES {
539            let mut encodedplus: Vec<u8> = encoded.iter().cloned().collect();
540            encodedplus.push(42);
541            let ours = explode(&encodedplus).unwrap();
542            assert_eq!(*decoded, &ours[..]);
543        }
544    }
545}