stream_zipper/
lib.rs

1extern crate core;
2extern crate miniz_oxide;
3extern crate nom;
4
5pub mod deflate;
6pub mod gzip;
7pub mod input_helper;
8pub mod utils;
9pub mod zip;
10
11trait CompressedStream: Sized {
12    fn feed_input(&mut self, input: &[u8]) -> State<Self>;
13}
14
15/// Represents a state of a compressed input stream.
16/// Generic over the actual stream type (gzip or zip).
17pub enum State<'i, 's, File>
18where
19    'i: 's,
20{
21    NeedsInputOrEof(gzip::GZipFile),
22    NeedsInput,
23    HasOutput {
24        unparsed_input: &'i [u8],
25        output: &'s [u8],
26    },
27    NextFile {
28        unparsed_input: &'i [u8],
29        next_file: File,
30    },
31    EndOfFile,
32}
33
34impl<'i, 's, F> State<'i, 's, F> {
35    pub fn assert_no_output(self) -> State<'i, 'i, F> {
36        use State::*;
37        match self {
38            NeedsInputOrEof(f) => NeedsInputOrEof(f),
39            NeedsInput => NeedsInput,
40            HasOutput { .. } => panic!("Assertion failed: self was HasOutput"),
41            NextFile {
42                unparsed_input,
43                next_file,
44            } => NextFile {
45                unparsed_input,
46                next_file,
47            },
48            EndOfFile => EndOfFile,
49        }
50    }
51}
52
53impl<'i, 's, File> std::fmt::Debug for State<'i, 's, File> {
54    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
55        use State::*;
56        match self {
57            NeedsInputOrEof(_) => writeln!(f, "State::NeedsInputOrEof"),
58            NeedsInput => writeln!(f, "State::NeedsInput"),
59            HasOutput { .. } => writeln!(f, "State::HasOutput"),
60            NextFile { .. } => writeln!(f, "State::NextFile"),
61            EndOfFile => writeln!(f, "State::EndOfFile"),
62        }
63    }
64}
65
66pub enum ReadHeadersResult<'i> {
67    NeedsInput,
68    Done { unparsed: &'i [u8] },
69}
70
71impl<'i, 's> From<State<'i, 's, zip::ZipFile>> for State<'i, 's, File> {
72    fn from(from: State<'i, 's, zip::ZipFile>) -> State<'i, 's, File> {
73        use State::*;
74
75        match from {
76            NeedsInputOrEof(_) => unreachable!(
77                "Zip files have always directory at end so we know if we have reached the end."
78            ),
79            NeedsInput => NeedsInput,
80            HasOutput {
81                unparsed_input,
82                output,
83            } => HasOutput {
84                unparsed_input,
85                output,
86            },
87            NextFile {
88                unparsed_input,
89                next_file,
90            } => NextFile {
91                unparsed_input,
92                next_file: next_file.into(),
93            },
94            EndOfFile => EndOfFile,
95        }
96    }
97}
98
99impl<'i, 's> From<State<'i, 's, gzip::GZipFile>> for State<'i, 's, File> {
100    fn from(from: State<'i, 's, gzip::GZipFile>) -> State<'i, 's, File> {
101        use State::*;
102
103        match from {
104            NeedsInputOrEof(file) => NeedsInputOrEof(file),
105            NeedsInput => NeedsInput,
106            HasOutput {
107                unparsed_input,
108                output,
109            } => HasOutput {
110                unparsed_input,
111                output,
112            },
113            NextFile {
114                unparsed_input,
115                next_file,
116            } => NextFile {
117                unparsed_input,
118                next_file: next_file.into(),
119            },
120            EndOfFile => EndOfFile,
121        }
122    }
123}
124
125/// Corresponds to a zipped or gzipped file/stream.
126/// Can be in one of three states: not-yet-detected type, a zip file or a gzip file.
127pub enum File {
128    Zip(zip::ZipFile),
129    GZip(gzip::GZipFile),
130    Init(Vec<u8>),
131}
132
133enum AutodetectResult {
134    NeedsMoreData,
135    UnknownFormat,
136    Detected(File),
137}
138
139impl File {
140    pub fn name(&self) -> Option<&[u8]> {
141        use File::*;
142
143        match self {
144            Zip(zip) => zip.filename(),
145            GZip(gzip) => gzip.filename(),
146            Init(_) => None,
147        }
148    }
149
150    /// Reads the fist 4 bytes of the input and tries to autodetect the stream format.
151    /// Consumes and retains the amount of bytes read from input in `unparsed` buffer.
152    /// Once the detection succeeds, constructs a stream object of the detected format
153    /// and feeds it the consumed first bytes.
154    /// In case where there is no enough data for detection,
155    /// it consumes the input it can and returns,
156    /// expecting to be called again with more data.
157    fn autodetect_format(unparsed: &mut Vec<u8>, input: &mut &[u8]) -> AutodetectResult {
158        const NEEDED_BYTES: usize = 4;
159        if unparsed.len() + input.len() < NEEDED_BYTES {
160            unparsed.extend_from_slice(input);
161            *input = &[][..];
162            return AutodetectResult::NeedsMoreData;
163        } else {
164            // Byte count from start of input that are used for autodetection
165            let bytes_to_consume = NEEDED_BYTES - unparsed.len();
166            unparsed.extend_from_slice(&input[..bytes_to_consume]);
167            *input = &input[bytes_to_consume..];
168        }
169
170        // Bytes needed for detection are now in `unparsed`!
171
172        // Start a stream according to a detected stream type
173        // and feed in the first bytes
174        // that where used for detection.
175        if unparsed.starts_with(b"\x50\x4b\x03\x04") {
176            let mut stream = zip::start_stream();
177            stream
178                .read(unparsed)
179                .expect("No errors will happen with the 4 first input bytes.");
180            AutodetectResult::Detected(File::Zip(stream))
181        } else if unparsed.starts_with(b"\x1f\x8b\x08") {
182            let mut stream = gzip::start_stream();
183            stream
184                .read(unparsed)
185                .expect("No errors will happen with the 4 first input bytes.");
186            AutodetectResult::Detected(File::GZip(stream))
187        } else {
188            return AutodetectResult::UnknownFormat;
189        }
190    }
191
192    pub fn get_output(&self) -> &[u8] {
193        use File::*;
194        match self {
195            Zip(file) => file.get_output(),
196            GZip(file) => file.get_output(),
197            Init(file) => panic!("This shouldn't be called before autodetect!"),
198        }
199    }
200
201    pub fn read_headers<'i>(
202        &mut self,
203        mut input: &'i [u8],
204    ) -> Result<ReadHeadersResult<'i>, Error> {
205        use File::*;
206
207        // Format detection will run only when the stream has started (the Init state)
208        if let Init(ref mut unparsed) = self {
209            // Set self to the corresponding format
210            *self = match Self::autodetect_format(unparsed, &mut input) {
211                AutodetectResult::NeedsMoreData => return Ok(ReadHeadersResult::NeedsInput),
212                AutodetectResult::UnknownFormat => return Err(Error::UnknownFileFormat),
213                AutodetectResult::Detected(file) => file,
214            };
215        };
216
217        match self {
218            Zip(ref mut file) => Ok(file.read_headers(input)?.into()),
219            GZip(ref mut file) => unimplemented!("TODO"),
220            Init(_) => {
221                unreachable!("The File::Init state is never set after autodetect has succeeded.")
222            }
223        }
224    }
225
226    pub fn read_internal_iter<'i>(
227        &mut self,
228        mut input: &'i [u8],
229        mut callback: impl FnMut(&[u8]),
230    ) -> Result<State<'i, 'i, File>, Error> {
231        loop {
232            let state = self.read(input)?;
233            if let State::HasOutput {
234                unparsed_input,
235                output,
236            } = state
237            {
238                input = unparsed_input;
239                callback(output);
240            } else {
241                return Ok(state.assert_no_output());
242            }
243        }
244    }
245
246    pub fn read<'i, 's>(&'s mut self, mut input: &'i [u8]) -> Result<State<'i, 's, File>, Error> {
247        use File::*;
248
249        // Format detection will run only when the stream has started (the Init state)
250        if let Init(ref mut unparsed) = self {
251            // Set self to the corresponding format
252            *self = match Self::autodetect_format(unparsed, &mut input) {
253                AutodetectResult::NeedsMoreData => return Ok(State::NeedsInput),
254                AutodetectResult::UnknownFormat => return Err(Error::UnknownFileFormat),
255                AutodetectResult::Detected(file) => file,
256            };
257        };
258
259        match self {
260            Zip(ref mut file) => Ok(file.read(input)?.into()),
261            GZip(ref mut file) => Ok(file.read(input)?.into()),
262            Init(_) => {
263                unreachable!("The File::Init state is never set after autodetect has succeeded.")
264            }
265        }
266    }
267}
268
269impl From<zip::ZipFile> for File {
270    fn from(f: zip::ZipFile) -> File {
271        File::Zip(f)
272    }
273}
274
275impl From<gzip::GZipFile> for File {
276    fn from(f: gzip::GZipFile) -> File {
277        File::GZip(f)
278    }
279}
280
281impl std::fmt::Display for Error {
282    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
283        use Error::*;
284        match self {
285            Zip(e) => {
286                write!(f, "zip error:")?;
287                e.fmt(f)?
288            }
289            GZip(e) => {
290                write!(f, "gzip error:")?;
291                e.fmt(f)?
292            }
293            UnknownFileFormat => write!(f, "no known fileformat (zip or gzip) detected")?,
294        }
295        Ok(())
296    }
297}
298
299impl std::error::Error for Error {
300    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
301        match self {
302            Self::Zip(e) => Some(e),
303            Self::GZip(e) => Some(e),
304            Self::UnknownFileFormat => None,
305        }
306    }
307}
308
309/// An error type that delegates to ZipError or GzipError.
310/// In case the file format detection fails, there's a third
311/// error state for that.
312#[derive(Debug, Copy, Clone, Eq, PartialEq)]
313pub enum Error {
314    Zip(zip::ZipError),
315    GZip(gzip::GZipError),
316    UnknownFileFormat,
317}
318
319impl From<zip::ZipError> for Error {
320    fn from(err: zip::ZipError) -> Error {
321        Error::Zip(err)
322    }
323}
324
325impl From<gzip::GZipError> for Error {
326    fn from(err: gzip::GZipError) -> Error {
327        Error::GZip(err)
328    }
329}
330
331/// Initialises a File that starts in a state that is agnostic
332/// about the whether the input
333/// stream is in zip format or gzip format.
334/// Use this function to initialise the stream if you want to
335/// auto-detect the input format.
336pub fn start_stream() -> File {
337    File::Init(Vec::new())
338}