binstall_zip/read/
stream.rs

1use std::fs;
2use std::io::{self, Read};
3use std::path::Path;
4
5use super::{
6    central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile,
7    ZipFileData, ZipResult,
8};
9
10use byteorder::{LittleEndian, ReadBytesExt};
11
12/// Stream decoder for zip.
13#[derive(Debug)]
14pub struct ZipStreamReader<R>(R);
15
16impl<R> ZipStreamReader<R> {
17    /// Create a new ZipStreamReader
18    pub fn new(reader: R) -> Self {
19        Self(reader)
20    }
21}
22
23impl<R: Read> ZipStreamReader<R> {
24    fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> {
25        // Give archive_offset and central_header_start dummy value 0, since
26        // they are not used in the output.
27        let archive_offset = 0;
28        let central_header_start = 0;
29
30        // Parse central header
31        let signature = self.0.read_u32::<LittleEndian>()?;
32        if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
33            Ok(None)
34        } else {
35            central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start)
36                .map(ZipStreamFileMetadata)
37                .map(Some)
38        }
39    }
40
41    /// Iteraate over the stream and extract all file and their
42    /// metadata.
43    pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
44        while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
45            visitor.visit_file(&mut file)?;
46        }
47
48        while let Some(metadata) = self.parse_central_directory()? {
49            visitor.visit_additional_metadata(&metadata)?;
50        }
51
52        Ok(())
53    }
54
55    /// Extract a Zip archive into a directory, overwriting files if they
56    /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
57    ///
58    /// Extraction is not atomic; If an error is encountered, some of the files
59    /// may be left on disk.
60    pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> {
61        struct Extracter<'a>(&'a Path);
62        impl ZipStreamVisitor for Extracter<'_> {
63            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
64                let filepath = file
65                    .enclosed_name()
66                    .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
67
68                let outpath = self.0.join(filepath);
69
70                if file.name().ends_with('/') {
71                    fs::create_dir_all(&outpath)?;
72                } else {
73                    if let Some(p) = outpath.parent() {
74                        fs::create_dir_all(&p)?;
75                    }
76                    let mut outfile = fs::File::create(&outpath)?;
77                    io::copy(file, &mut outfile)?;
78                }
79
80                Ok(())
81            }
82
83            #[allow(unused)]
84            fn visit_additional_metadata(
85                &mut self,
86                metadata: &ZipStreamFileMetadata,
87            ) -> ZipResult<()> {
88                #[cfg(unix)]
89                {
90                    let filepath = metadata
91                        .enclosed_name()
92                        .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
93
94                    let outpath = self.0.join(filepath);
95
96                    use std::os::unix::fs::PermissionsExt;
97                    if let Some(mode) = metadata.unix_mode() {
98                        fs::set_permissions(&outpath, fs::Permissions::from_mode(mode))?;
99                    }
100                }
101
102                Ok(())
103            }
104        }
105
106        self.visit(&mut Extracter(directory.as_ref()))
107    }
108}
109
110/// Visitor for ZipStreamReader
111pub trait ZipStreamVisitor {
112    ///  * `file` - contains the content of the file and most of the metadata,
113    ///    except:
114    ///     - `comment`: set to an empty string
115    ///     - `data_start`: set to 0
116    ///     - `external_attributes`: `unix_mode()`: will return None
117    fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>;
118
119    /// This function is guranteed to be called after all `visit_file`s.
120    ///
121    ///  * `metadata` - Provides missing metadata in `visit_file`.
122    fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>;
123}
124
125/// Additional metadata for the file.
126#[derive(Debug)]
127pub struct ZipStreamFileMetadata(ZipFileData);
128
129impl ZipStreamFileMetadata {
130    /// Get the name of the file
131    ///
132    /// # Warnings
133    ///
134    /// It is dangerous to use this name directly when extracting an archive.
135    /// It may contain an absolute path (`/etc/shadow`), or break out of the
136    /// current directory (`../runtime`). Carelessly writing to these paths
137    /// allows an attacker to craft a ZIP archive that will overwrite critical
138    /// files.
139    ///
140    /// You can use the [`ZipFile::enclosed_name`] method to validate the name
141    /// as a safe path.
142    pub fn name(&self) -> &str {
143        &self.0.file_name
144    }
145
146    /// Get the name of the file, in the raw (internal) byte representation.
147    ///
148    /// The encoding of this data is currently undefined.
149    pub fn name_raw(&self) -> &[u8] {
150        &self.0.file_name_raw
151    }
152
153    /// Rewrite the path, ignoring any path components with special meaning.
154    ///
155    /// - Absolute paths are made relative
156    /// - [`ParentDir`]s are ignored
157    /// - Truncates the filename at a NULL byte
158    ///
159    /// This is appropriate if you need to be able to extract *something* from
160    /// any archive, but will easily misrepresent trivial paths like
161    /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
162    /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
163    ///
164    /// [`ParentDir`]: `Component::ParentDir`
165    pub fn mangled_name(&self) -> ::std::path::PathBuf {
166        self.0.file_name_sanitized()
167    }
168
169    /// Ensure the file path is safe to use as a [`Path`].
170    ///
171    /// - It can't contain NULL bytes
172    /// - It can't resolve to a path outside the current directory
173    ///   > `foo/../bar` is fine, `foo/../../bar` is not.
174    /// - It can't be an absolute path
175    ///
176    /// This will read well-formed ZIP files correctly, and is resistant
177    /// to path-based exploits. It is recommended over
178    /// [`ZipFile::mangled_name`].
179    pub fn enclosed_name(&self) -> Option<&Path> {
180        self.0.enclosed_name()
181    }
182
183    /// Returns whether the file is actually a directory
184    pub fn is_dir(&self) -> bool {
185        self.name()
186            .chars()
187            .rev()
188            .next()
189            .map_or(false, |c| c == '/' || c == '\\')
190    }
191
192    /// Returns whether the file is a regular file
193    pub fn is_file(&self) -> bool {
194        !self.is_dir()
195    }
196
197    /// Get the comment of the file
198    pub fn comment(&self) -> &str {
199        &self.0.file_comment
200    }
201
202    /// Get the starting offset of the data of the compressed file
203    pub fn data_start(&self) -> u64 {
204        self.0.data_start.load()
205    }
206
207    /// Get unix mode for the file
208    pub fn unix_mode(&self) -> Option<u32> {
209        self.0.unix_mode()
210    }
211}
212
213#[cfg(test)]
214mod test {
215    use super::*;
216    use std::collections::BTreeSet;
217    use std::io;
218
219    struct DummyVisitor;
220    impl ZipStreamVisitor for DummyVisitor {
221        fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
222            Ok(())
223        }
224
225        fn visit_additional_metadata(
226            &mut self,
227            _metadata: &ZipStreamFileMetadata,
228        ) -> ZipResult<()> {
229            Ok(())
230        }
231    }
232
233    #[derive(Default, Debug, Eq, PartialEq)]
234    struct CounterVisitor(u64, u64);
235    impl ZipStreamVisitor for CounterVisitor {
236        fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
237            self.0 += 1;
238            Ok(())
239        }
240
241        fn visit_additional_metadata(
242            &mut self,
243            _metadata: &ZipStreamFileMetadata,
244        ) -> ZipResult<()> {
245            self.1 += 1;
246            Ok(())
247        }
248    }
249
250    #[test]
251    fn invalid_offset() {
252        ZipStreamReader::new(io::Cursor::new(include_bytes!(
253            "../../tests/data/invalid_offset.zip"
254        )))
255        .visit(&mut DummyVisitor)
256        .unwrap_err();
257    }
258
259    #[test]
260    fn invalid_offset2() {
261        ZipStreamReader::new(io::Cursor::new(include_bytes!(
262            "../../tests/data/invalid_offset2.zip"
263        )))
264        .visit(&mut DummyVisitor)
265        .unwrap_err();
266    }
267
268    #[test]
269    fn zip_read_streaming() {
270        let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
271            "../../tests/data/mimetype.zip"
272        )));
273
274        #[derive(Default)]
275        struct V {
276            filenames: BTreeSet<Box<str>>,
277        }
278        impl ZipStreamVisitor for V {
279            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
280                if file.is_file() {
281                    self.filenames.insert(file.name().into());
282                }
283
284                Ok(())
285            }
286            fn visit_additional_metadata(
287                &mut self,
288                metadata: &ZipStreamFileMetadata,
289            ) -> ZipResult<()> {
290                if metadata.is_file() {
291                    assert!(
292                        self.filenames.contains(metadata.name()),
293                        "{} is missing its file content",
294                        metadata.name()
295                    );
296                }
297
298                Ok(())
299            }
300        }
301
302        reader.visit(&mut V::default()).unwrap();
303    }
304
305    #[test]
306    fn file_and_dir_predicates() {
307        let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
308            "../../tests/data/files_and_dirs.zip"
309        )));
310
311        #[derive(Default)]
312        struct V {
313            filenames: BTreeSet<Box<str>>,
314        }
315        impl ZipStreamVisitor for V {
316            fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
317                let full_name = file.enclosed_name().unwrap();
318                let file_name = full_name.file_name().unwrap().to_str().unwrap();
319                assert!(
320                    (file_name.starts_with("dir") && file.is_dir())
321                        || (file_name.starts_with("file") && file.is_file())
322                );
323
324                if file.is_file() {
325                    self.filenames.insert(file.name().into());
326                }
327
328                Ok(())
329            }
330            fn visit_additional_metadata(
331                &mut self,
332                metadata: &ZipStreamFileMetadata,
333            ) -> ZipResult<()> {
334                if metadata.is_file() {
335                    assert!(
336                        self.filenames.contains(metadata.name()),
337                        "{} is missing its file content",
338                        metadata.name()
339                    );
340                }
341
342                Ok(())
343            }
344        }
345
346        reader.visit(&mut V::default()).unwrap();
347    }
348
349    /// test case to ensure we don't preemptively over allocate based on the
350    /// declared number of files in the CDE of an invalid zip when the number of
351    /// files declared is more than the alleged offset in the CDE
352    #[test]
353    fn invalid_cde_number_of_files_allocation_smaller_offset() {
354        ZipStreamReader::new(io::Cursor::new(include_bytes!(
355            "../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
356        )))
357        .visit(&mut DummyVisitor)
358        .unwrap_err();
359    }
360
361    /// test case to ensure we don't preemptively over allocate based on the
362    /// declared number of files in the CDE of an invalid zip when the number of
363    /// files declared is less than the alleged offset in the CDE
364    #[test]
365    fn invalid_cde_number_of_files_allocation_greater_offset() {
366        ZipStreamReader::new(io::Cursor::new(include_bytes!(
367            "../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
368        )))
369        .visit(&mut DummyVisitor)
370        .unwrap_err();
371    }
372}