rc_zip/parse/archive.rs
1use chrono::{offset::Utc, DateTime, TimeZone};
2use ownable::{IntoOwned, ToOwned};
3use winnow::{binary::le_u16, PResult, Partial};
4
5use crate::{
6 encoding::Encoding,
7 parse::{Mode, Version},
8};
9
10use super::{zero_datetime, ExtraField, NtfsAttr};
11
12/// An Archive contains general information about a zip file, along with a list
13/// of [entries][Entry].
14///
15/// It is obtained through a state machine like
16/// [ArchiveFsm](crate::fsm::ArchiveFsm), although end-users tend to use
17/// higher-level interfaces like
18/// [rc-zip-sync](https://crates.io/crates/rc-zip-sync) or
19/// [rc-zip-tokio](https://crates.io/crates/rc-zip-tokio).
20pub struct Archive {
21 pub(crate) size: u64,
22 pub(crate) encoding: Encoding,
23 pub(crate) entries: Vec<Entry>,
24 pub(crate) comment: String,
25}
26
27impl Archive {
28 /// The size of .zip file that was read, in bytes.
29 #[inline(always)]
30 pub fn size(&self) -> u64 {
31 self.size
32 }
33
34 /// Iterate over all files in this zip, read from the central directory.
35 pub fn entries(&self) -> impl Iterator<Item = &Entry> {
36 self.entries.iter()
37 }
38
39 /// Attempts to look up an entry by name. This is usually a bad idea,
40 /// as names aren't necessarily normalized in zip archives.
41 pub fn by_name<N: AsRef<str>>(&self, name: N) -> Option<&Entry> {
42 self.entries.iter().find(|&x| x.name == name.as_ref())
43 }
44
45 /// Returns the detected character encoding for text fields
46 /// (names, comments) inside this zip archive.
47 #[inline(always)]
48 pub fn encoding(&self) -> Encoding {
49 self.encoding
50 }
51
52 /// Returns the comment for this archive, if any. When reading
53 /// a zip file with an empty comment field, this will return None.
54 #[inline(always)]
55 pub fn comment(&self) -> &str {
56 &self.comment
57 }
58}
59
60/// Describes a zip archive entry (a file, a directory, a symlink)
61#[derive(Clone)]
62pub struct Entry {
63 /// Name of the file
64 ///
65 /// This should be a relative path, separated by `/`. However, there are zip
66 /// files in the wild with all sorts of evil variants, so, be conservative
67 /// in what you accept.
68 ///
69 /// See also [Self::sanitized_name], which returns a sanitized version of
70 /// the name, working around zip slip vulnerabilities.
71 pub name: String,
72
73 /// Compression method: Store, Deflate, Bzip2, etc.
74 pub method: Method,
75
76 /// Comment is any arbitrary user-defined string shorter than 64KiB
77 pub comment: String,
78
79 /// This entry's "last modified" timestamp - with caveats
80 ///
81 /// Due to the history of the ZIP file format, this may be inaccurate. It may be offset
82 /// by a few hours, if there is no extended timestamp information. It may have a resolution
83 /// as low as two seconds, if only MSDOS timestamps are present. It may default to the Unix
84 /// epoch, if something went really wrong.
85 ///
86 /// If you're reading this after the year 2038, or after the year 2108, godspeed.
87 pub modified: DateTime<Utc>,
88
89 /// This entry's "created" timestamp, if available.
90 ///
91 /// See [Self::modified] for caveats.
92 pub created: Option<DateTime<Utc>>,
93
94 /// This entry's "last accessed" timestamp, if available.
95 ///
96 /// See [Self::accessed] for caveats.
97 pub accessed: Option<DateTime<Utc>>,
98
99 /// Offset of the local file header in the zip file
100 ///
101 /// ```text
102 /// [optional non-zip data]
103 /// [local file header 1] <------ header_offset points here
104 /// [encryption header 1]
105 /// [file data 1]
106 /// [data descriptor 1]
107 /// ...
108 /// [central directory]
109 /// [optional zip64 end of central directory info]
110 /// [end of central directory record]
111 /// ```
112 pub header_offset: u64,
113
114 /// Version of zip needed to extract this archive.
115 pub reader_version: Version,
116
117 /// General purpose bit flag
118 ///
119 /// In the zip format, the most noteworthy flag (bit 11) is for UTF-8 names.
120 /// Other flags can indicate: encryption (unsupported), various compression
121 /// settings (depending on the [Method] used).
122 ///
123 /// For LZMA, general-purpose bit 1 denotes the EOS marker.
124 pub flags: u16,
125
126 /// Unix user ID
127 ///
128 /// Only present if a Unix extra field or New Unix extra field was found.
129 pub uid: Option<u32>,
130
131 /// Unix group ID
132 ///
133 /// Only present if a Unix extra field or New Unix extra field was found.
134 pub gid: Option<u32>,
135
136 /// CRC-32 hash as found in the central directory.
137 ///
138 /// Note that this may be zero, and the actual CRC32 might be in the local header, or (more
139 /// commonly) in the data descriptor instead.
140 pub crc32: u32,
141
142 /// Size in bytes, after compression
143 pub compressed_size: u64,
144
145 /// Size in bytes, before compression
146 ///
147 /// This will be zero for directories.
148 pub uncompressed_size: u64,
149
150 /// File mode.
151 pub mode: Mode,
152}
153
154impl Entry {
155 /// Returns a sanitized version of the entry's name, if it
156 /// seems safe. In particular, if this method feels like the
157 /// entry name is trying to do a zip slip (cf.
158 /// <https://snyk.io/research/zip-slip-vulnerability>), it'll return
159 /// None.
160 ///
161 /// Other than that, it will strip any leading slashes on non-Windows OSes.
162 pub fn sanitized_name(&self) -> Option<&str> {
163 let name = self.name.as_str();
164
165 // refuse entries with traversed/absolute path to mitigate zip slip
166 if name.contains("..") {
167 return None;
168 }
169
170 #[cfg(windows)]
171 {
172 if name.contains(":\\") || name.starts_with("\\") {
173 return None;
174 }
175 Some(name)
176 }
177
178 #[cfg(not(windows))]
179 {
180 // strip absolute prefix on entries pointing to root path
181 let mut entry_chars = name.chars();
182 let mut name = name;
183 while name.starts_with('/') {
184 entry_chars.next();
185 name = entry_chars.as_str()
186 }
187 Some(name)
188 }
189 }
190
191 /// Apply the extra field to the entry, updating its metadata.
192 pub(crate) fn set_extra_field(&mut self, ef: &ExtraField) {
193 match &ef {
194 ExtraField::Zip64(z64) => {
195 self.uncompressed_size = z64.uncompressed_size;
196 self.compressed_size = z64.compressed_size;
197 self.header_offset = z64.header_offset;
198 }
199 ExtraField::Timestamp(ts) => {
200 self.modified = Utc
201 .timestamp_opt(ts.mtime as i64, 0)
202 .single()
203 .unwrap_or_else(zero_datetime);
204 }
205 ExtraField::Ntfs(nf) => {
206 for attr in &nf.attrs {
207 // note: other attributes are unsupported
208 if let NtfsAttr::Attr1(attr) = attr {
209 self.modified = attr.mtime.to_datetime().unwrap_or_else(zero_datetime);
210 self.created = attr.ctime.to_datetime();
211 self.accessed = attr.atime.to_datetime();
212 }
213 }
214 }
215 ExtraField::Unix(uf) => {
216 self.modified = Utc
217 .timestamp_opt(uf.mtime as i64, 0)
218 .single()
219 .unwrap_or_else(zero_datetime);
220
221 if self.uid.is_none() {
222 self.uid = Some(uf.uid as u32);
223 }
224
225 if self.gid.is_none() {
226 self.gid = Some(uf.gid as u32);
227 }
228 }
229 ExtraField::NewUnix(uf) => {
230 self.uid = Some(uf.uid as u32);
231 self.gid = Some(uf.uid as u32);
232 }
233 _ => {}
234 };
235 }
236}
237
238/// The entry's file type: a directory, a file, or a symbolic link.
239#[derive(Debug, Eq, PartialEq)]
240pub enum EntryKind {
241 /// The entry is a directory
242 Directory,
243
244 /// The entry is a file
245 File,
246
247 /// The entry is a symbolic link
248 Symlink,
249}
250
251impl Entry {
252 /// Determine the kind of this entry based on its mode.
253 pub fn kind(&self) -> EntryKind {
254 if self.mode.has(Mode::SYMLINK) {
255 EntryKind::Symlink
256 } else if self.mode.has(Mode::DIR) {
257 EntryKind::Directory
258 } else {
259 EntryKind::File
260 }
261 }
262}
263
264/// Compression method used for a file entry.
265///
266/// In archives that follow [ISO/IEC 21320-1:2015](https://www.iso.org/standard/60101.html), only
267/// [Store][Method::Store] and [Deflate][Method::Deflate] should be used.
268///
269/// However, in the wild, it is not too uncommon to encounter [Bzip2][Method::Bzip2],
270/// [Lzma][Method::Lzma] or others.
271#[derive(
272 Debug, Clone, Copy, PartialEq, Eq, Hash, IntoOwned, ToOwned,
273)]
274#[repr(u16)]
275pub enum Method {
276 /// No compression is applied
277 Store = Self::STORE,
278
279 /// [DEFLATE (RFC 1951)](https://www.ietf.org/rfc/rfc1951.txt)
280 Deflate = Self::DEFLATE,
281
282 /// [DEFLATE64](https://deflate64.com/)
283 Deflate64 = Self::DEFLATE64,
284
285 /// [BZIP-2](https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf)
286 Bzip2 = Self::BZIP2,
287
288 /// [LZMA](https://github.com/jljusten/LZMA-SDK/blob/master/DOC/lzma-specification.txt)
289 Lzma = Self::LZMA,
290
291 /// [zstd](https://datatracker.ietf.org/doc/html/rfc8878)
292 Zstd = Self::ZSTD,
293
294 /// [MP3](https://www.iso.org/obp/ui/#iso:std:iso-iec:11172:-3:ed-1:v1:en)
295 Mp3 = Self::MP3,
296
297 /// [XZ](https://tukaani.org/xz/xz-file-format.txt)
298 Xz = Self::XZ,
299
300 /// [JPEG](https://jpeg.org/jpeg/)
301 Jpeg = Self::JPEG,
302
303 /// [WavPack](https://www.wavpack.com/)
304 WavPack = Self::WAV_PACK,
305
306 /// [PPMd](https://en.wikipedia.org/wiki/Prediction_by_partial_matching)
307 Ppmd = Self::PPMD,
308
309 /// AE-x encryption marker (see Appendix E of appnote)
310 Aex = Self::AEX,
311
312 /// A compression method that isn't recognized by this crate.
313 Unrecognized(u16),
314}
315
316impl Method {
317 const STORE: u16 = 0;
318 const DEFLATE: u16 = 8;
319 const DEFLATE64: u16 = 9;
320 const BZIP2: u16 = 12;
321 const LZMA: u16 = 14;
322 const ZSTD: u16 = 93;
323 const MP3: u16 = 94;
324 const XZ: u16 = 95;
325 const JPEG: u16 = 96;
326 const WAV_PACK: u16 = 97;
327 const PPMD: u16 = 98;
328 const AEX: u16 = 99;
329
330 /// Parse a method from a byte slice
331 pub fn parser(i: &mut Partial<&[u8]>) -> PResult<Self> {
332 le_u16(i).map(From::from)
333 }
334}
335
336impl From<u16> for Method {
337 fn from(u: u16) -> Self {
338 match u {
339 Self::STORE => Self::Store,
340 Self::DEFLATE => Self::Deflate,
341 Self::DEFLATE64 => Self::Deflate64,
342 Self::BZIP2 => Self::Bzip2,
343 Self::LZMA => Self::Lzma,
344 Self::ZSTD => Self::Zstd,
345 Self::MP3 => Self::Mp3,
346 Self::XZ => Self::Xz,
347 Self::JPEG => Self::Jpeg,
348 Self::WAV_PACK => Self::WavPack,
349 Self::PPMD => Self::Ppmd,
350 Self::AEX => Self::Aex,
351 u => Self::Unrecognized(u),
352 }
353 }
354}
355
356impl From<Method> for u16 {
357 fn from(method: Method) -> Self {
358 match method {
359 Method::Store => Method::STORE,
360 Method::Deflate => Method::DEFLATE,
361 Method::Deflate64 => Method::DEFLATE64,
362 Method::Bzip2 => Method::BZIP2,
363 Method::Lzma => Method::LZMA,
364 Method::Zstd => Method::ZSTD,
365 Method::Mp3 => Method::MP3,
366 Method::Xz => Method::XZ,
367 Method::Jpeg => Method::JPEG,
368 Method::WavPack => Method::WAV_PACK,
369 Method::Ppmd => Method::PPMD,
370 Method::Aex => Method::AEX,
371 Method::Unrecognized(u) => u,
372 }
373 }
374}