zip_rs/
lib.rs

1/*!
2 * Zip file parsing library
3 *
4 * ```no_run
5 * # use zip_rs::ZipArchive;
6 * let mut archive = ZipArchive::from_path("./foo.zip").unwrap();
7 *
8 * for file in archive.files() {
9 *   // ...
10 * }
11 * ```
12 */
13
14#![deny(missing_debug_implementations)]
15
16use std::{
17    borrow::Cow,
18    ffi::OsStr,
19    fs::File,
20    io::{Read, Write},
21    ops::Deref,
22    path::Path,
23};
24
25#[cfg(target_family = "unix")]
26use std::os::unix::ffi::OsStrExt;
27
28#[cfg(target_family = "windows")]
29use std::os::unix::ffi::OsStrExt;
30
31pub use common::*;
32pub use error::ZipParseError;
33use flate2::read::DeflateDecoder;
34use parse::Parser;
35
36mod common;
37mod error;
38mod parse;
39
40const KB: usize = 1024;
41const MB: usize = 1024 * KB;
42const GB: usize = 1024 * MB;
43
44/// An entire ZIP archive file
45#[derive(Debug)]
46pub struct ZipArchive<'a, B: Deref<Target = [u8]>> {
47    pub central_directory: CentralDirectory<'a>,
48    parser: Parser<B>,
49}
50
51impl<'a> ZipArchive<'a, memmap::Mmap> {
52    /// Parse a [`ZipArchive`] from a file path. The contents are memory mapped.
53    pub fn from_path(path: impl AsRef<Path>) -> Result<Self, ZipParseError> {
54        let file = File::open(path)?;
55        let buffer = unsafe { memmap::Mmap::map(&file) }?;
56
57        Self::from_buffer(buffer)
58    }
59}
60
61impl<'a, B: Deref<Target = [u8]>> ZipArchive<'a, B> {
62    /// Parse a [`ZipArchive`] from an existing buffer in memory
63    pub fn from_buffer(buffer: B) -> Result<Self, ZipParseError> {
64        let mut parser = Parser::new(buffer);
65
66        let central_directory = parser.parse_central_directory()?;
67
68        Ok(ZipArchive {
69            central_directory,
70            parser,
71        })
72    }
73
74    /// Lazily iterate over the files in this archive
75    pub fn files<'b>(
76        &'b mut self,
77    ) -> impl Iterator<Item = Result<CompressedZipFile<'a>, ZipParseError>> + 'b {
78        // todo: can we avoid this clone by wrapping self.parser in RefCell?
79        let files = self.central_directory.files.clone();
80
81        files
82            .into_iter()
83            .map(move |file_header| self.parser.read_file(&file_header))
84    }
85}
86
87#[derive(Debug, Clone)]
88pub struct CentralDirectoryFileHeader<'a> {
89    pub os: Os,
90    pub metadata: Metadata<'a>,
91    pub disk_num_start: u16,
92    pub internal_attributes: InternalAttributes,
93    pub external_attributes: ExternalAttributes,
94    pub zip_specification_version: u8,
95    pub local_header_offset: u32,
96    pub comment: &'a [u8],
97}
98
99#[derive(Debug)]
100pub struct EndCentralDirectory {
101    pub disk_num: u16,
102    pub disk_central_dir_num: u16,
103    pub disk_entires: u16,
104    pub total_entires: u16,
105    pub central_dir_size: u32,
106    pub central_dir_offset: u32,
107}
108
109#[derive(Debug, Clone)]
110pub struct Metadata<'a> {
111    pub version_needed: u16,
112    pub compression_method: CompressionMethod,
113    pub date_time_modified: DateTimeModified,
114    pub flags: ZipFlags,
115    pub name: &'a [u8],
116    pub extra_field: &'a [u8],
117    pub compressed_size: u64,
118    pub uncompressed_size: u64,
119    pub crc: u32,
120}
121
122/// A single compressed ZIP file
123#[derive(Debug)]
124pub struct CompressedZipFile<'a> {
125    pub metadata: Metadata<'a>,
126    contents: &'a [u8],
127}
128
129impl<'a> CompressedZipFile<'a> {
130    pub fn compressed_contents(&self) -> &[u8] {
131        self.contents
132    }
133
134    /// Efficiently writes decompressed contents to sink without loading full
135    /// decompressed contents into memory
136    ///
137    /// `limit` controls the max uncompressed file size that will be accepted. A
138    /// `limit` of `None` implies no limit. Note that setting too high of a limit
139    /// can make decoders susceptible to DoS through ZIP bombs or other means.
140    pub fn write_with_limit(
141        &self,
142        w: &mut dyn Write,
143        limit: Option<usize>,
144    ) -> Result<(), ZipParseError> {
145        if Some(self.metadata.uncompressed_size as usize) >= limit {
146            return Err(ZipParseError::FileTooLarge(self.metadata.uncompressed_size));
147        }
148
149        match self.metadata.compression_method.name() {
150            CompressionMethodName::None => {
151                w.write_all(self.contents)?;
152            }
153            CompressionMethodName::Deflate => {
154                let mut decoder = DeflateDecoder::new(self.contents);
155
156                let amt_read = std::io::copy(&mut decoder, w)?;
157
158                if amt_read != self.metadata.uncompressed_size {
159                    return Err(ZipParseError::Generic("failed to write full buffer"));
160                }
161            }
162            method => todo!("unimplemented compression method {:?}", method),
163        }
164
165        Ok(())
166    }
167
168    /// Efficiently writes decompressed contents to sink without loading full
169    /// decompressed contents into memory.
170    ///
171    /// This method uses the default limit of 8 gigabytes. See
172    /// [CompressedZipFile::write_with_limit] to configure this limit.
173    pub fn write(&self, w: &mut dyn Write) -> Result<(), ZipParseError> {
174        self.write_with_limit(w, Some(8 * GB))
175    }
176
177    /// Decompress full contents into memory
178    ///
179    /// `limit` controls the max uncompressed file size that will be accepted. A
180    /// `limit` of `None` implies no limit. Note that setting too high of a limit
181    /// can make decoders susceptible to DoS through ZIP bombs or other means.
182    pub fn decompressed_contents_with_limit(
183        &self,
184        limit: Option<usize>,
185    ) -> Result<Cow<[u8]>, ZipParseError> {
186        if Some(self.metadata.uncompressed_size as usize) >= limit {
187            return Err(ZipParseError::FileTooLarge(self.metadata.uncompressed_size));
188        }
189
190        match self.metadata.compression_method.name() {
191            CompressionMethodName::None => return Ok(Cow::Borrowed(self.contents)),
192            CompressionMethodName::Deflate => {
193                let mut out = vec![0; self.metadata.uncompressed_size as usize];
194
195                DeflateDecoder::new(self.contents).read_exact(&mut out)?;
196
197                Ok(Cow::Owned(out))
198            }
199            method => todo!("unimplemented compression method {:?}", method),
200        }
201    }
202
203    /// Decompress full contents into memory
204    ///
205    /// This method uses the default limit of 8 gigabytes. See
206    /// [CompressedZipFile::decompressed_contents_with_limit] to configure this
207    /// limit.
208    pub fn decompressed_contents(&self) -> Result<Cow<[u8]>, ZipParseError> {
209        self.decompressed_contents_with_limit(Some(8 * GB))
210    }
211
212    /// This file's `Path` inside the ZIP archive.
213    ///
214    /// Note that this path may reference file paths outside the archive through
215    /// the use of absolute paths or the parent directory (`..`). The full file path
216    /// should not be used when interacting with the host file system if the ZIP
217    /// file is untrusted.
218    pub fn file_path(&self) -> &Path {
219        &Path::new(OsStr::from_bytes(self.metadata.name))
220    }
221
222    /// The raw bytes of this file's path inside the ZIP archive.
223    ///
224    /// Note that this path may reference file paths outside the archive through
225    /// the use of absolute paths or the parent directory (`..`). The full file path
226    /// should not be used when interacting with the host file system if the ZIP
227    /// file is untrusted.
228    pub fn file_path_bytes(&self) -> &'a [u8] {
229        self.metadata.name
230    }
231
232    /// The algorithm used to compress this file.
233    ///
234    /// This is typically [`CompressionMethodName::None`] or
235    /// [`CompressionMethodName::Deflate`].
236    pub fn compression_method(&self) -> CompressionMethod {
237        self.metadata.compression_method
238    }
239}
240
241#[derive(Debug)]
242pub struct CentralDirectory<'a> {
243    pub files: Vec<CentralDirectoryFileHeader<'a>>,
244    pub end: EndCentralDirectory,
245}
246
247#[cfg(test)]
248mod test {
249    use crate::{ZipArchive, MB};
250
251    #[test]
252    #[should_panic]
253    fn zip_bomb() {
254        let mut bomb = ZipArchive::from_path("files/bomb.zip").unwrap();
255
256        for file in bomb.files() {
257            let file = file.unwrap();
258
259            file.decompressed_contents_with_limit(Some(20 * MB))
260                .unwrap();
261        }
262    }
263}