bzip2_rs/decoder/
mod.rs

1//! bzip2 decoding APIs
2
3use std::convert::TryInto;
4
5pub use self::error::DecoderError;
6pub use self::reader::DecoderReader;
7use crate::bitreader::BitReader;
8use crate::block::Block;
9use crate::header::Header;
10
11mod error;
12mod reader;
13
14/// A low-level decoder implementation
15///
16/// This decoder does no IO by itself, instead enough data
17/// has to be written to it in order for it to be able
18/// to decode the next block. After that the decompressed content
19/// for the block can be read until all of the data from the block
20/// has been exhausted.
21/// Repeating this process for every block in sequence will result
22/// into the entire file being decompressed.
23///
24/// ```rust
25/// use bzip2_rs::decoder::{Decoder, ReadState, WriteState};
26///
27/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
28/// let mut compressed_file: &[u8] = include_bytes!("../../tests/samplefiles/sample1.bz2").as_ref();
29/// let mut output = Vec::new();
30///
31/// let mut decoder = Decoder::new();
32///
33/// assert!(
34///     !compressed_file.is_empty(),
35///     "empty files will cause the following loop to spin forever"
36/// );
37///
38/// let mut buf = [0; 1024];
39/// loop {
40///     match decoder.read(&mut buf)? {
41///         ReadState::NeedsWrite(space) => {
42///             // `Decoder` needs more data to be written to it before it
43///             // can decode the next block.
44///             // If we reached the end of the file `compressed_file.len()` will be 0,
45///             // signaling to the `Decoder` that the last block is smaller and it can
46///             // proceed with reading.
47///             match decoder.write(&compressed_file)? {
48///                 WriteState::NeedsRead => unreachable!(),
49///                 WriteState::Written(written) => compressed_file = &compressed_file[written..],
50///             };
51///         }
52///         ReadState::Read(n) => {
53///             // `n` uncompressed bytes have been read into `buf`
54///             output.extend_from_slice(&buf[..n]);
55///         }
56///         ReadState::Eof => {
57///             // we reached the end of the file
58///             break;
59///         }
60///     }
61/// }
62///
63/// // `output` contains the decompressed file
64/// let decompressed_file: &[u8] = include_bytes!("../../tests/samplefiles/sample1.ref").as_ref();
65/// assert_eq!(output, decompressed_file);
66/// #
67/// # Ok(())
68/// # }
69/// ```
70pub struct Decoder {
71    header_block: Option<(Header, Block)>,
72
73    skip_bits: usize,
74    in_buf: Vec<u8>,
75
76    eof: bool,
77}
78
79/// State returned by [`Decoder::write`]
80pub enum WriteState {
81    /// Enough data has already been written to [`Decoder`]
82    /// in order for it to be able to decode the next block.
83    /// Now call [`Decoder::read`] to read the decompressed data.
84    NeedsRead,
85    /// N. number of bytes have been written.
86    Written(usize),
87}
88
89/// State returned by [`Decoder::read`]
90pub enum ReadState {
91    /// Not enough data has been written to the underlying [`Decoder`]
92    /// in order to allow the next block to be decoded. Call
93    /// [`Decoder::write`] to write more data. If the end of the file
94    /// has been reached, call [`Decoder::write`] with an empty buffer.
95    NeedsWrite(usize),
96    /// N. number of data has been read
97    Read(usize),
98    /// The end of the compressed file has been reached and
99    /// there is no more data to read
100    Eof,
101}
102
103impl Decoder {
104    /// Construct a new [`Decoder`], ready to decompress a new bzip2 file
105    pub fn new() -> Self {
106        Self {
107            header_block: None,
108
109            skip_bits: 0,
110            in_buf: Vec::new(),
111
112            eof: false,
113        }
114    }
115
116    fn space(&self) -> usize {
117        match &self.header_block {
118            Some((_, block)) if block.is_reading() => 0,
119            Some((header, _)) => {
120                let max_length = header.max_blocksize() as usize + (self.skip_bits / 8) + 1;
121                max_length - self.in_buf.len()
122            }
123            None => {
124                Header::from_raw_blocksize(1)
125                    .expect("blocksize is valid")
126                    .max_blocksize() as usize
127                    + 4
128            }
129        }
130    }
131
132    /// Write more compressed data into this [`Decoder`]
133    ///
134    /// See the documentation for [`WriteState`] to decide
135    /// what to do next.
136    pub fn write(&mut self, buf: &[u8]) -> Result<WriteState, DecoderError> {
137        let space = self.space();
138
139        match &mut self.header_block {
140            Some((_, block)) if block.is_reading() => Ok(WriteState::NeedsRead),
141            Some((header, block)) => {
142                let written = space.min(buf.len());
143
144                self.in_buf.extend_from_slice(&buf[..written]);
145
146                let minimum = (self.skip_bits / 8) + header.max_blocksize() as usize;
147                if buf.is_empty() || self.in_buf.len() >= minimum {
148                    block.set_ready_for_read();
149                }
150
151                Ok(WriteState::Written(written))
152            }
153            None => {
154                let written = space.min(buf.len());
155                self.in_buf.extend_from_slice(&buf[..written]);
156
157                if self.in_buf.len() < 4 {
158                    return Ok(WriteState::Written(buf.len()));
159                }
160
161                let header = Header::parse(self.in_buf[..4].try_into().unwrap())?;
162                let block = Block::new(header.clone());
163                self.header_block = Some((header, block));
164
165                self.skip_bits = 4 * 8;
166
167                if written == buf.len() {
168                    return Ok(WriteState::Written(written));
169                }
170
171                match self.write(&buf[written..])? {
172                    WriteState::NeedsRead => unreachable!(),
173                    WriteState::Written(n) => Ok(WriteState::Written(n + written)),
174                }
175            }
176        }
177    }
178
179    /// Read more decompressed data from this [`Decoder`]
180    ///
181    /// See the documentation for [`ReadState`] to decide
182    /// what to do next.
183    pub fn read(&mut self, buf: &mut [u8]) -> Result<ReadState, DecoderError> {
184        match &mut self.header_block {
185            Some(_) if self.eof => Ok(ReadState::Eof),
186            Some((_, block)) if block.is_not_ready() => Ok(ReadState::NeedsWrite(self.space())),
187            Some((_, block)) => {
188                let mut reader = BitReader::new(&self.in_buf);
189                reader.advance_by(self.skip_bits);
190
191                let ready_for_read = block.is_ready_for_read();
192
193                let read = block.read(&mut reader, buf)?;
194
195                if read == 0 {
196                    if !buf.is_empty() {
197                        self.eof = ready_for_read;
198                    }
199
200                    return Ok(ReadState::NeedsWrite(self.space()));
201                }
202
203                if read == 0 && !buf.is_empty() {
204                    self.eof = true;
205                }
206
207                self.skip_bits = reader.position();
208
209                if block.is_not_ready() {
210                    let bytes = self.skip_bits / 8;
211
212                    self.in_buf.drain(..bytes);
213
214                    self.skip_bits -= bytes * 8;
215                }
216
217                Ok(ReadState::Read(read))
218            }
219            None => Ok(ReadState::NeedsWrite(self.space())),
220        }
221    }
222}
223
224impl Default for Decoder {
225    fn default() -> Self {
226        Self::new()
227    }
228}