egzreader/
lib.rs

1//! Read gzip/non-gzip stream easily.
2//!
3//! [EgzReader](EgzReader) decodes the underlying reader when it is gzipped stream, and
4//! reads as it is when non-gzipped.
5//!
6//! # Examples
7//! ```
8//! use std::io::prelude::*;
9//! use std::io;
10//! use std::fs::File;
11//! use egzreader::EgzReader;
12//!
13//! # fn main() {
14//! #     read_hello().unwrap();
15//! # }
16//! fn read_hello() -> io::Result<()> {
17//!     // text file
18//!     let mut r1 = EgzReader::new(
19//!         File::open("examples/hello.txt")?
20//!     );
21//!     // gzip encoded text file
22//!     let mut r2 = EgzReader::new(
23//!         File::open("examples/hello.txt.gz")?
24//!     );
25//!
26//!     let mut s1 = String::new();
27//!     let mut s2 = String::new();
28//!
29//!     r1.read_to_string(&mut s1)?;
30//!     r2.read_to_string(&mut s2)?;
31//!
32//!     assert_eq!(s1, "Hello!");
33//!     assert_eq!(s2, "Hello!");
34//!
35//!     Ok(())
36//! }
37//! ```
38use flate2::read::GzDecoder;
39use std::io::Read;
40use std::io::Result;
41use std::mem;
42
43#[derive(Debug)]
44struct RawReader<R: Read> {
45    preread: [u8; 11],
46    pos: usize,
47    size: usize,
48
49    reader: R,
50}
51impl<R: Read> RawReader<R> {
52    fn new(preread: [u8; 11], size: usize, r: R) -> RawReader<R> {
53        debug_assert!(size <= preread.len());
54        RawReader {
55            preread,
56            pos: 0,
57            size,
58            reader: r,
59        }
60    }
61}
62impl<R: Read> Read for RawReader<R> {
63    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
64        debug_assert!(self.pos <= self.preread.len());
65
66        if self.size <= self.pos {
67            self.reader.read(buf)
68        } else {
69            debug_assert!(self.pos < self.size);
70            let n = (&self.preread[self.pos..self.size]).read(buf)?;
71            self.pos += n;
72            Ok(n)
73        }
74    }
75}
76
77// Wrapper for flate2::GzDecoder
78#[derive(Debug)]
79struct GzReader<R: Read>(GzDecoder<RawReader<R>>);
80
81impl<R: Read> GzReader<R> {
82    fn new(preread: [u8; 11], r: R) -> GzReader<R> {
83        GzReader(GzDecoder::new(RawReader::new(preread, 11, r)))
84    }
85}
86impl<R: Read> Read for GzReader<R> {
87    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
88        self.0.read(buf)
89    }
90}
91
92#[derive(Debug)]
93enum ReaderType<R: Read> {
94    // Initial state
95    Init(R),
96
97    // Actual reader states
98    Zero,
99    Raw(RawReader<R>), // non-gzip stream
100    Gz(GzReader<R>),   // gzip stream
101}
102
103impl<R: Read> ReaderType<R> {
104    fn is_init(&self) -> bool {
105        matches!(self, ReaderType::Init(_))
106    }
107
108    fn make_reader(mut reader: R) -> Result<ReaderType<R>> {
109        let mut buf = [0; 11];
110
111        let n = {
112            let mut nread = 0;
113            loop {
114                let bytes = reader.read(&mut buf[nread..])?;
115                if bytes == 0 {
116                    break;
117                }
118
119                nread += bytes;
120                if buf.len() <= nread {
121                    break;
122                }
123            }
124            debug_assert!(nread <= buf.len());
125            nread
126        };
127
128        if n == 0 {
129            Ok(ReaderType::Zero)
130        } else if n == 11 && buf[..2] == [0x1f, 0x8b] && buf[2] <= 0x08 {
131            // The underlying stream is assumed as gzip when
132            // - more than 10 bytes (=header size) can be read.
133            // - it begins with magic number '0x1f0x8b'.
134            // - its third byte, specifying compression method, would be '0x08'.
135            Ok(ReaderType::Gz(GzReader::new(buf, reader)))
136        } else {
137            Ok(ReaderType::Raw(RawReader::new(buf, n, reader)))
138        }
139    }
140
141    // Determine actual type of reader.
142    // This method is called at first read().
143    fn into_actual_reader(self) -> Result<Self> {
144        debug_assert!(self.is_init());
145        if let ReaderType::Init(r) = self {
146            Self::make_reader(r)
147        } else {
148            Ok(self)
149        }
150    }
151}
152
153impl<R: Read> Read for ReaderType<R> {
154    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
155        match self {
156            ReaderType::Init(_) => {
157                // Update reader state.
158                let init = mem::replace(self, ReaderType::Zero);
159                *self = init.into_actual_reader()?;
160
161                // Then, call read().
162                debug_assert!(!self.is_init());
163                self.read(buf)
164            }
165            ReaderType::Zero => Ok(0),
166            ReaderType::Raw(raw) => raw.read(buf),
167            ReaderType::Gz(gz) => gz.read(buf),
168        }
169    }
170}
171
172/// A gzip and non-gzip pholymorphic reader.
173#[derive(Debug)]
174pub struct EgzReader<R: Read>(ReaderType<R>);
175
176impl<R: Read> EgzReader<R> {
177    pub fn new(r: R) -> EgzReader<R> {
178        EgzReader(ReaderType::Init(r))
179    }
180}
181impl<R: Read> Read for EgzReader<R> {
182    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
183        self.0.read(buf)
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use std::io::Read;
190
191    use super::EgzReader;
192
193    // "Hello!"
194    const HELLO: &[u8] = &[0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x21];
195
196    // "Hello!" encoded by gzip
197    const HELLO_GZ: &[u8] = &[
198        0x1f, 0x8b, 0x08, 0x00, 0xeb, 0x47, 0x74, 0x60, 0x00, 0x03, 0xf3, 0x48, 0xcd, 0xc9, 0xc9,
199        0x57, 0x04, 0x00, 0x56, 0xcc, 0x2a, 0x9d, 0x06, 0x00, 0x00, 0x00,
200    ];
201
202    #[test]
203    fn read_zero() {
204        let data: &[u8] = &[0; 0];
205        let mut r = EgzReader::new(data);
206        let mut s = String::new();
207        r.read_to_string(&mut s).unwrap();
208        assert_eq!(s, "");
209    }
210    #[test]
211    fn read_long() {
212        let data: &[u8] = &[0x41; 20];
213        let mut r = EgzReader::new(data);
214        let mut s = String::new();
215        r.read_to_string(&mut s).unwrap();
216        assert_eq!(s, "AAAAAAAAAAAAAAAAAAAA");
217    }
218    #[test]
219    fn read_hello_txt() {
220        let mut r = EgzReader::new(HELLO);
221        let mut s = String::new();
222        r.read_to_string(&mut s).unwrap();
223        assert_eq!(s, "Hello!");
224    }
225    #[test]
226    fn read_hello_gz() {
227        let mut r = EgzReader::new(HELLO_GZ);
228        let mut s = String::new();
229        r.read_to_string(&mut s).unwrap();
230        assert_eq!(s, "Hello!");
231    }
232    #[test]
233    fn read_fake_gz() {
234        let mut r = EgzReader::new(&HELLO_GZ[..10]);
235        let mut buf = [0; 11];
236        let n = r.read(&mut buf).unwrap();
237        assert_eq!(buf[..n], HELLO_GZ[..10]);
238    }
239}