bzip2_rs/decoder/mod.rs
1//! bzip2 decoding APIs
2
3use std::convert::TryInto;
4
5pub use self::error::DecoderError;
6pub use self::reader::DecoderReader;
7use crate::bitreader::BitReader;
8use crate::block::Block;
9use crate::header::Header;
10
11mod error;
12mod reader;
13
14/// A low-level decoder implementation
15///
16/// This decoder does no IO by itself, instead enough data
17/// has to be written to it in order for it to be able
18/// to decode the next block. After that the decompressed content
19/// for the block can be read until all of the data from the block
20/// has been exhausted.
21/// Repeating this process for every block in sequence will result
22/// into the entire file being decompressed.
23///
24/// ```rust
25/// use bzip2_rs::decoder::{Decoder, ReadState, WriteState};
26///
27/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
28/// let mut compressed_file: &[u8] = include_bytes!("../../tests/samplefiles/sample1.bz2").as_ref();
29/// let mut output = Vec::new();
30///
31/// let mut decoder = Decoder::new();
32///
33/// assert!(
34/// !compressed_file.is_empty(),
35/// "empty files will cause the following loop to spin forever"
36/// );
37///
38/// let mut buf = [0; 1024];
39/// loop {
40/// match decoder.read(&mut buf)? {
41/// ReadState::NeedsWrite(space) => {
42/// // `Decoder` needs more data to be written to it before it
43/// // can decode the next block.
44/// // If we reached the end of the file `compressed_file.len()` will be 0,
45/// // signaling to the `Decoder` that the last block is smaller and it can
46/// // proceed with reading.
47/// match decoder.write(&compressed_file)? {
48/// WriteState::NeedsRead => unreachable!(),
49/// WriteState::Written(written) => compressed_file = &compressed_file[written..],
50/// };
51/// }
52/// ReadState::Read(n) => {
53/// // `n` uncompressed bytes have been read into `buf`
54/// output.extend_from_slice(&buf[..n]);
55/// }
56/// ReadState::Eof => {
57/// // we reached the end of the file
58/// break;
59/// }
60/// }
61/// }
62///
63/// // `output` contains the decompressed file
64/// let decompressed_file: &[u8] = include_bytes!("../../tests/samplefiles/sample1.ref").as_ref();
65/// assert_eq!(output, decompressed_file);
66/// #
67/// # Ok(())
68/// # }
69/// ```
70pub struct Decoder {
71 header_block: Option<(Header, Block)>,
72
73 skip_bits: usize,
74 in_buf: Vec<u8>,
75
76 eof: bool,
77}
78
79/// State returned by [`Decoder::write`]
80pub enum WriteState {
81 /// Enough data has already been written to [`Decoder`]
82 /// in order for it to be able to decode the next block.
83 /// Now call [`Decoder::read`] to read the decompressed data.
84 NeedsRead,
85 /// N. number of bytes have been written.
86 Written(usize),
87}
88
89/// State returned by [`Decoder::read`]
90pub enum ReadState {
91 /// Not enough data has been written to the underlying [`Decoder`]
92 /// in order to allow the next block to be decoded. Call
93 /// [`Decoder::write`] to write more data. If the end of the file
94 /// has been reached, call [`Decoder::write`] with an empty buffer.
95 NeedsWrite(usize),
96 /// N. number of data has been read
97 Read(usize),
98 /// The end of the compressed file has been reached and
99 /// there is no more data to read
100 Eof,
101}
102
103impl Decoder {
104 /// Construct a new [`Decoder`], ready to decompress a new bzip2 file
105 pub fn new() -> Self {
106 Self {
107 header_block: None,
108
109 skip_bits: 0,
110 in_buf: Vec::new(),
111
112 eof: false,
113 }
114 }
115
116 fn space(&self) -> usize {
117 match &self.header_block {
118 Some((_, block)) if block.is_reading() => 0,
119 Some((header, _)) => {
120 let max_length = header.max_blocksize() as usize + (self.skip_bits / 8) + 1;
121 max_length - self.in_buf.len()
122 }
123 None => {
124 Header::from_raw_blocksize(1)
125 .expect("blocksize is valid")
126 .max_blocksize() as usize
127 + 4
128 }
129 }
130 }
131
132 /// Write more compressed data into this [`Decoder`]
133 ///
134 /// See the documentation for [`WriteState`] to decide
135 /// what to do next.
136 pub fn write(&mut self, buf: &[u8]) -> Result<WriteState, DecoderError> {
137 let space = self.space();
138
139 match &mut self.header_block {
140 Some((_, block)) if block.is_reading() => Ok(WriteState::NeedsRead),
141 Some((header, block)) => {
142 let written = space.min(buf.len());
143
144 self.in_buf.extend_from_slice(&buf[..written]);
145
146 let minimum = (self.skip_bits / 8) + header.max_blocksize() as usize;
147 if buf.is_empty() || self.in_buf.len() >= minimum {
148 block.set_ready_for_read();
149 }
150
151 Ok(WriteState::Written(written))
152 }
153 None => {
154 let written = space.min(buf.len());
155 self.in_buf.extend_from_slice(&buf[..written]);
156
157 if self.in_buf.len() < 4 {
158 return Ok(WriteState::Written(buf.len()));
159 }
160
161 let header = Header::parse(self.in_buf[..4].try_into().unwrap())?;
162 let block = Block::new(header.clone());
163 self.header_block = Some((header, block));
164
165 self.skip_bits = 4 * 8;
166
167 if written == buf.len() {
168 return Ok(WriteState::Written(written));
169 }
170
171 match self.write(&buf[written..])? {
172 WriteState::NeedsRead => unreachable!(),
173 WriteState::Written(n) => Ok(WriteState::Written(n + written)),
174 }
175 }
176 }
177 }
178
179 /// Read more decompressed data from this [`Decoder`]
180 ///
181 /// See the documentation for [`ReadState`] to decide
182 /// what to do next.
183 pub fn read(&mut self, buf: &mut [u8]) -> Result<ReadState, DecoderError> {
184 match &mut self.header_block {
185 Some(_) if self.eof => Ok(ReadState::Eof),
186 Some((_, block)) if block.is_not_ready() => Ok(ReadState::NeedsWrite(self.space())),
187 Some((_, block)) => {
188 let mut reader = BitReader::new(&self.in_buf);
189 reader.advance_by(self.skip_bits);
190
191 let ready_for_read = block.is_ready_for_read();
192
193 let read = block.read(&mut reader, buf)?;
194
195 if read == 0 {
196 if !buf.is_empty() {
197 self.eof = ready_for_read;
198 }
199
200 return Ok(ReadState::NeedsWrite(self.space()));
201 }
202
203 if read == 0 && !buf.is_empty() {
204 self.eof = true;
205 }
206
207 self.skip_bits = reader.position();
208
209 if block.is_not_ready() {
210 let bytes = self.skip_bits / 8;
211
212 self.in_buf.drain(..bytes);
213
214 self.skip_bits -= bytes * 8;
215 }
216
217 Ok(ReadState::Read(read))
218 }
219 None => Ok(ReadState::NeedsWrite(self.space())),
220 }
221 }
222}
223
224impl Default for Decoder {
225 fn default() -> Self {
226 Self::new()
227 }
228}