Skip to main content

ai_flate2/gz/
bufread.rs

1use alloc::vec::Vec;
2use core::cmp;
3use core::mem;
4
5use crate::io;
6use crate::io::prelude::*;
7
8use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser};
9use crate::crc::CrcReader;
10use crate::deflate;
11use crate::Compression;
12
13fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
14    let min = cmp::min(into.len(), from.len() - *pos);
15    into[..min].copy_from_slice(&from[*pos..*pos + min]);
16    *pos += min;
17    min
18}
19
20/// A gzip streaming encoder
21///
22/// This structure implements a [`Read`] interface. When read from, it reads
23/// uncompressed data from the underlying [`BufRead`] and provides the compressed data.
24///
25/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
26/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
27///
28/// # Examples
29///
30#[cfg_attr(not(feature = "std"), doc = "```ignore")]
31#[cfg_attr(feature = "std", doc = "```")]
32/// use std::io::prelude::*;
33/// use std::io;
34/// use ai_flate2::Compression;
35/// use ai_flate2::bufread::GzEncoder;
36/// use std::fs::File;
37/// use std::io::BufReader;
38///
39/// // Opens sample file, compresses the contents and returns a Vector or error
40/// // File wrapped in a BufReader implements BufRead
41///
42/// fn open_hello_world() -> io::Result<Vec<u8>> {
43///     let f = File::open("examples/hello_world.txt")?;
44///     let b = BufReader::new(f);
45///     let mut gz = GzEncoder::new(b, Compression::fast());
46///     let mut buffer = Vec::new();
47///     gz.read_to_end(&mut buffer)?;
48///     Ok(buffer)
49/// }
50/// ```
51#[derive(Debug)]
52pub struct GzEncoder<R> {
53    inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
54    header: Vec<u8>,
55    pos: usize,
56    eof: bool,
57}
58
59pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
60    let crc = CrcReader::new(r);
61    GzEncoder {
62        inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
63        header,
64        pos: 0,
65        eof: false,
66    }
67}
68
69impl<R: BufRead> GzEncoder<R> {
70    /// Creates a new encoder which will use the given compression level.
71    ///
72    /// The encoder is not configured specially for the emitted header. For
73    /// header configuration, see the `GzBuilder` type.
74    ///
75    /// The data read from the stream `r` will be compressed and available
76    /// through the returned reader.
77    pub fn new(r: R, level: Compression) -> GzEncoder<R> {
78        GzBuilder::new().buf_read(r, level)
79    }
80
81    fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
82        if self.pos == 8 {
83            return Ok(0);
84        }
85        let crc = self.inner.get_ref().crc();
86        let calced_crc_bytes = crc.sum().to_le_bytes();
87        let arr = [
88            calced_crc_bytes[0],
89            calced_crc_bytes[1],
90            calced_crc_bytes[2],
91            calced_crc_bytes[3],
92            crc.amount() as u8,
93            (crc.amount() >> 8) as u8,
94            (crc.amount() >> 16) as u8,
95            (crc.amount() >> 24) as u8,
96        ];
97        Ok(copy(into, &arr, &mut self.pos))
98    }
99}
100
101impl<R> GzEncoder<R> {
102    /// Acquires a reference to the underlying reader.
103    pub fn get_ref(&self) -> &R {
104        self.inner.get_ref().get_ref()
105    }
106
107    /// Acquires a mutable reference to the underlying reader.
108    ///
109    /// Note that mutation of the reader may result in surprising results if
110    /// this encoder is continued to be used.
111    pub fn get_mut(&mut self) -> &mut R {
112        self.inner.get_mut().get_mut()
113    }
114
115    /// Returns the underlying stream, consuming this encoder
116    pub fn into_inner(self) -> R {
117        self.inner.into_inner().into_inner()
118    }
119}
120
121#[inline]
122fn finish(buf: &[u8; 8]) -> (u32, u32) {
123    let crc = (buf[0] as u32)
124        | ((buf[1] as u32) << 8)
125        | ((buf[2] as u32) << 16)
126        | ((buf[3] as u32) << 24);
127    let amt = (buf[4] as u32)
128        | ((buf[5] as u32) << 8)
129        | ((buf[6] as u32) << 16)
130        | ((buf[7] as u32) << 24);
131    (crc, amt)
132}
133
134impl<R: BufRead> Read for GzEncoder<R> {
135    fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
136        let mut amt = 0;
137        if self.eof {
138            return self.read_footer(into);
139        } else if self.pos < self.header.len() {
140            amt += copy(into, &self.header, &mut self.pos);
141            if amt == into.len() {
142                return Ok(amt);
143            }
144            let tmp = into;
145            into = &mut tmp[amt..];
146        }
147        match self.inner.read(into)? {
148            0 => {
149                self.eof = true;
150                self.pos = 0;
151                self.read_footer(into)
152            }
153            n => Ok(amt + n),
154        }
155    }
156}
157
158impl<R: BufRead + Write> Write for GzEncoder<R> {
159    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
160        self.get_mut().write(buf)
161    }
162
163    fn flush(&mut self) -> io::Result<()> {
164        self.get_mut().flush()
165    }
166}
167
168/// A decoder for a single member of a [gzip file].
169///
170/// This structure implements a [`Read`] interface. When read from, it reads
171/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
172///
173/// After reading a single member of the gzip data this reader will return
174/// Ok(0) even if there are more bytes available in the underlying reader.
175/// If you need the following bytes, call `into_inner()` after Ok(0) to
176/// recover the underlying reader.
177///
178/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
179/// or read more
180/// [in the introduction](../index.html#about-multi-member-gzip-files).
181///
182/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
183/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
184/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
185///
186/// # Examples
187///
188#[cfg_attr(not(feature = "std"), doc = "```ignore")]
189#[cfg_attr(feature = "std", doc = "```")]
190/// use std::io::prelude::*;
191/// use std::io;
192/// # use ai_flate2::Compression;
193/// # use ai_flate2::write::GzEncoder;
194/// use ai_flate2::bufread::GzDecoder;
195///
196/// # fn main() {
197/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
198/// #   e.write_all(b"Hello World").unwrap();
199/// #   let bytes = e.finish().unwrap();
200/// #   println!("{}", decode_reader(bytes).unwrap());
201/// # }
202/// #
203/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
204/// // Here &[u8] implements BufRead
205///
206/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
207///    let mut gz = GzDecoder::new(&bytes[..]);
208///    let mut s = String::new();
209///    gz.read_to_string(&mut s)?;
210///    Ok(s)
211/// }
212/// ```
213#[derive(Debug)]
214pub struct GzDecoder<R> {
215    state: GzState,
216    reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
217    multi: bool,
218}
219
220#[derive(Debug)]
221enum GzState {
222    Header(GzHeaderParser),
223    Body(GzHeader),
224    Finished(GzHeader, usize, [u8; 8]),
225    Err(io::Error),
226    End(Option<GzHeader>),
227}
228
229pub fn reset_decoder_data<R>(decoder: &mut GzDecoder<R>) {
230    decoder.state = GzState::Header(GzHeaderParser::new());
231    decoder.reader.reset(); // reset CrcReader
232    decoder.reader.get_mut().reset_data(); // reset DeflateDecoder
233}
234
235impl<R: BufRead> GzDecoder<R> {
236    /// Creates a new decoder from the given reader, immediately parsing the
237    /// gzip header.
238    pub fn new(mut r: R) -> GzDecoder<R> {
239        let mut header_parser = GzHeaderParser::new();
240
241        let state = match header_parser.parse(&mut r) {
242            Ok(_) => GzState::Body(GzHeader::from(header_parser)),
243            Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => {
244                GzState::Header(header_parser)
245            }
246            Err(err) => GzState::Err(err),
247        };
248
249        GzDecoder {
250            state,
251            reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
252            multi: false,
253        }
254    }
255
256    fn multi(mut self, flag: bool) -> GzDecoder<R> {
257        self.multi = flag;
258        self
259    }
260}
261
262impl<R> GzDecoder<R> {
263    /// Returns the header associated with this stream, if it was valid
264    pub fn header(&self) -> Option<&GzHeader> {
265        match &self.state {
266            GzState::Body(header) | GzState::Finished(header, _, _) => Some(header),
267            GzState::End(header) => header.as_ref(),
268            _ => None,
269        }
270    }
271
272    /// Acquires a reference to the underlying reader.
273    pub fn get_ref(&self) -> &R {
274        self.reader.get_ref().get_ref()
275    }
276
277    /// Acquires a mutable reference to the underlying stream.
278    ///
279    /// Note that mutation of the stream may result in surprising results if
280    /// this decoder is continued to be used.
281    pub fn get_mut(&mut self) -> &mut R {
282        self.reader.get_mut().get_mut()
283    }
284
285    /// Consumes this decoder, returning the underlying reader.
286    pub fn into_inner(self) -> R {
287        self.reader.into_inner().into_inner()
288    }
289
290    /// Resets the state of this decoder entirely, swapping out the input
291    /// stream for another.
292    ///
293    /// This will reset the internal state of this decoder and replace the
294    /// input stream with the one provided, returning the previous input
295    /// stream. Future data read from this decoder will be the decompressed
296    /// version of `r`'s data.
297    pub fn reset(&mut self, r: R) -> R {
298        reset_decoder_data(self);
299        self.reader.get_mut().reset(r)
300    }
301}
302
303impl<R: BufRead> Read for GzDecoder<R> {
304    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
305        loop {
306            match &mut self.state {
307                GzState::Header(parser) => {
308                    parser.parse(self.reader.get_mut().get_mut())?;
309                    self.state = GzState::Body(GzHeader::from(mem::take(parser)));
310                }
311                GzState::Body(header) => {
312                    if into.is_empty() {
313                        return Ok(0);
314                    }
315                    match self.reader.read(into)? {
316                        0 => {
317                            self.state = GzState::Finished(mem::take(header), 0, [0; 8]);
318                        }
319                        n => {
320                            return Ok(n);
321                        }
322                    }
323                }
324                GzState::Finished(header, pos, buf) => {
325                    if *pos < buf.len() {
326                        *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?;
327                    } else {
328                        let (crc, amt) = finish(buf);
329
330                        if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() {
331                            self.state = GzState::End(Some(mem::take(header)));
332                            return Err(corrupt());
333                        } else if self.multi {
334                            let is_eof = self
335                                .reader
336                                .get_mut()
337                                .get_mut()
338                                .fill_buf()
339                                .map(|buf| buf.is_empty())?;
340
341                            if is_eof {
342                                self.state = GzState::End(Some(mem::take(header)));
343                            } else {
344                                self.reader.reset();
345                                self.reader.get_mut().reset_data();
346                                self.state = GzState::Header(GzHeaderParser::new())
347                            }
348                        } else {
349                            self.state = GzState::End(Some(mem::take(header)));
350                        }
351                    }
352                }
353                GzState::Err(err) => {
354                    let result = Err(mem::replace(err, io::ErrorKind::Other.into()));
355                    self.state = GzState::End(None);
356                    return result;
357                }
358                GzState::End(_) => return Ok(0),
359            }
360        }
361    }
362}
363
364impl<R: BufRead + Write> Write for GzDecoder<R> {
365    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
366        self.get_mut().write(buf)
367    }
368
369    fn flush(&mut self) -> io::Result<()> {
370        self.get_mut().flush()
371    }
372}
373
374/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
375///
376/// This structure implements a [`Read`] interface. When read from, it reads
377/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
378///
379/// A gzip file consists of a series of *members* concatenated one after another.
380/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the
381/// underlying reader does. For a file, this reads to the end of the file.
382///
383/// To handle members separately, see [GzDecoder] or read more
384/// [in the introduction](../index.html#about-multi-member-gzip-files).
385///
386/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
387/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
388/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
389///
390/// # Examples
391///
392#[cfg_attr(not(feature = "std"), doc = "```ignore")]
393#[cfg_attr(feature = "std", doc = "```")]
394/// use std::io::prelude::*;
395/// use std::io;
396/// # use ai_flate2::Compression;
397/// # use ai_flate2::write::GzEncoder;
398/// use ai_flate2::bufread::MultiGzDecoder;
399///
400/// # fn main() {
401/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
402/// #   e.write_all(b"Hello World").unwrap();
403/// #   let bytes = e.finish().unwrap();
404/// #   println!("{}", decode_reader(bytes).unwrap());
405/// # }
406/// #
407/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
408/// // Here &[u8] implements BufRead
409///
410/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
411///    let mut gz = MultiGzDecoder::new(&bytes[..]);
412///    let mut s = String::new();
413///    gz.read_to_string(&mut s)?;
414///    Ok(s)
415/// }
416/// ```
417#[derive(Debug)]
418pub struct MultiGzDecoder<R>(GzDecoder<R>);
419
420impl<R: BufRead> MultiGzDecoder<R> {
421    /// Creates a new decoder from the given reader, immediately parsing the
422    /// (first) gzip header. If the gzip stream contains multiple members all will
423    /// be decoded.
424    pub fn new(r: R) -> MultiGzDecoder<R> {
425        MultiGzDecoder(GzDecoder::new(r).multi(true))
426    }
427}
428
429impl<R> MultiGzDecoder<R> {
430    /// Returns the current header associated with this stream, if it's valid
431    pub fn header(&self) -> Option<&GzHeader> {
432        self.0.header()
433    }
434
435    /// Acquires a reference to the underlying reader.
436    pub fn get_ref(&self) -> &R {
437        self.0.get_ref()
438    }
439
440    /// Acquires a mutable reference to the underlying stream.
441    ///
442    /// Note that mutation of the stream may result in surprising results if
443    /// this decoder is continued to be used.
444    pub fn get_mut(&mut self) -> &mut R {
445        self.0.get_mut()
446    }
447
448    /// Consumes this decoder, returning the underlying reader.
449    pub fn into_inner(self) -> R {
450        self.0.into_inner()
451    }
452}
453
454impl<R: BufRead> Read for MultiGzDecoder<R> {
455    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
456        self.0.read(into)
457    }
458}
459
460#[cfg(all(test, feature = "std"))]
461mod test {
462    use crate::bufread::GzDecoder;
463    use crate::gz::write;
464    use crate::Compression;
465    use std::io::{Read, Write};
466
467    // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any
468    // additional data to be consumed by the caller.
469    #[test]
470    fn decode_extra_data() {
471        let expected = "Hello World";
472
473        let compressed = {
474            let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
475            e.write_all(expected.as_ref()).unwrap();
476            let mut b = e.finish().unwrap();
477            b.push(b'x');
478            b
479        };
480
481        let mut output = Vec::new();
482        let mut decoder = GzDecoder::new(compressed.as_slice());
483        let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
484        assert_eq!(decoded_bytes, output.len());
485        let actual = std::str::from_utf8(&output).expect("String parsing error");
486        assert_eq!(
487            actual, expected,
488            "after decompression we obtain the original input"
489        );
490
491        output.clear();
492        assert_eq!(
493            decoder.read(&mut output).unwrap(),
494            0,
495            "subsequent read of decoder returns 0, but inner reader can return additional data"
496        );
497        let mut reader = decoder.into_inner();
498        assert_eq!(
499            reader.read_to_end(&mut output).unwrap(),
500            1,
501            "extra data is accessible in underlying buf-read"
502        );
503        assert_eq!(output, b"x");
504    }
505
506    fn compress_data(data: &[u8]) -> Vec<u8> {
507        use crate::write::GzEncoder;
508        use crate::Compression;
509
510        let mut e = GzEncoder::new(Vec::new(), Compression::default());
511        e.write_all(data).unwrap();
512        e.finish().unwrap()
513    }
514
515    #[test]
516    fn decode_with_reset() {
517        let data1 = b"Hello World";
518        let data2 = b"Goodbye World";
519
520        let compressed1 = compress_data(data1);
521        let compressed2 = compress_data(data2);
522
523        let mut output = Vec::new();
524        let mut decoder = GzDecoder::new(compressed1.as_slice());
525        decoder.read_to_end(&mut output).unwrap();
526        assert_eq!(output, data1);
527
528        output.clear();
529        decoder.reset(compressed2.as_slice());
530        decoder.read_to_end(&mut output).unwrap();
531        assert_eq!(output, data2);
532    }
533
534    #[test]
535    fn decode_with_reset_after_corruption() {
536        let valid_data = b"Hello World";
537        let valid_compressed = compress_data(valid_data);
538
539        // Create a corrupted payload (valid gzip header but corrupted body)
540        let mut corrupted = valid_compressed.clone();
541        assert!(corrupted.len() >= 14);
542        corrupted[12] ^= 0xFF;
543        corrupted[13] ^= 0xFF;
544
545        // Try to decode corrupted data
546        let mut decoder = GzDecoder::new(corrupted.as_slice());
547        let mut output = Vec::new();
548        let _ = decoder.read_to_end(&mut output).unwrap_err();
549
550        // Reset with valid payload and decode
551        decoder.reset(valid_compressed.as_slice());
552        output.clear();
553        decoder.read_to_end(&mut output).unwrap();
554        assert_eq!(output, valid_data);
555    }
556}