binstall_zip/
read.rs

1//! Types for reading ZIP archives
2
3#[cfg(feature = "aes-crypto")]
4use crate::aes::{AesReader, AesReaderValid};
5use crate::compression::CompressionMethod;
6use crate::cp437::FromCp437;
7use crate::crc32::Crc32Reader;
8use crate::result::{InvalidPassword, ZipError, ZipResult};
9use crate::spec;
10use crate::types::{AesMode, AesVendorVersion, AtomicU64, DateTime, System, ZipFileData};
11use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
12use byteorder::{LittleEndian, ReadBytesExt};
13use std::borrow::Cow;
14use std::collections::HashMap;
15use std::io::{self, prelude::*};
16use std::path::Path;
17use std::sync::Arc;
18
19#[cfg(any(
20    feature = "deflate",
21    feature = "deflate-miniz",
22    feature = "deflate-zlib"
23))]
24use flate2::read::DeflateDecoder;
25
26#[cfg(feature = "bzip2")]
27use bzip2::read::BzDecoder;
28
29#[cfg(feature = "zstd")]
30use zstd::stream::read::Decoder as ZstdDecoder;
31
32/// Provides high level API for reading from a stream.
33pub mod stream;
34
35// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
36pub(crate) mod zip_archive {
37    /// Extract immutable data from `ZipArchive` to make it cheap to clone
38    #[derive(Debug)]
39    pub(crate) struct Shared {
40        pub(super) files: Vec<super::ZipFileData>,
41        pub(super) names_map: super::HashMap<String, usize>,
42        pub(super) offset: u64,
43        pub(super) comment: Vec<u8>,
44    }
45
46    /// ZIP archive reader
47    ///
48    /// At the moment, this type is cheap to clone if this is the case for the
49    /// reader it uses. However, this is not guaranteed by this crate and it may
50    /// change in the future.
51    ///
52    /// ```no_run
53    /// use std::io::prelude::*;
54    /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
55    ///     let mut zip = zip::ZipArchive::new(reader)?;
56    ///
57    ///     for i in 0..zip.len() {
58    ///         let mut file = zip.by_index(i)?;
59    ///         println!("Filename: {}", file.name());
60    ///         std::io::copy(&mut file, &mut std::io::stdout());
61    ///     }
62    ///
63    ///     Ok(())
64    /// }
65    /// ```
66    #[derive(Clone, Debug)]
67    pub struct ZipArchive<R> {
68        pub(super) reader: R,
69        pub(super) shared: super::Arc<Shared>,
70    }
71}
72
73pub use zip_archive::ZipArchive;
74#[allow(clippy::large_enum_variant)]
75enum CryptoReader<'a> {
76    Plaintext(io::Take<&'a mut dyn Read>),
77    ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut dyn Read>>),
78    #[cfg(feature = "aes-crypto")]
79    Aes {
80        reader: AesReaderValid<io::Take<&'a mut dyn Read>>,
81        vendor_version: AesVendorVersion,
82    },
83}
84
85impl<'a> Read for CryptoReader<'a> {
86    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
87        match self {
88            CryptoReader::Plaintext(r) => r.read(buf),
89            CryptoReader::ZipCrypto(r) => r.read(buf),
90            #[cfg(feature = "aes-crypto")]
91            CryptoReader::Aes { reader: r, .. } => r.read(buf),
92        }
93    }
94}
95
96impl<'a> CryptoReader<'a> {
97    /// Consumes this decoder, returning the underlying reader.
98    pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
99        match self {
100            CryptoReader::Plaintext(r) => r,
101            CryptoReader::ZipCrypto(r) => r.into_inner(),
102            #[cfg(feature = "aes-crypto")]
103            CryptoReader::Aes { reader: r, .. } => r.into_inner(),
104        }
105    }
106
107    /// Returns `true` if the data is encrypted using AE2.
108    pub fn is_ae2_encrypted(&self) -> bool {
109        #[cfg(feature = "aes-crypto")]
110        return matches!(
111            self,
112            CryptoReader::Aes {
113                vendor_version: AesVendorVersion::Ae2,
114                ..
115            }
116        );
117        #[cfg(not(feature = "aes-crypto"))]
118        false
119    }
120}
121
122enum ZipFileReader<'a> {
123    NoReader,
124    Raw(io::Take<&'a mut dyn io::Read>),
125    Stored(Crc32Reader<CryptoReader<'a>>),
126    #[cfg(any(
127        feature = "deflate",
128        feature = "deflate-miniz",
129        feature = "deflate-zlib"
130    ))]
131    Deflated(Crc32Reader<flate2::read::DeflateDecoder<CryptoReader<'a>>>),
132    #[cfg(feature = "bzip2")]
133    Bzip2(Crc32Reader<BzDecoder<CryptoReader<'a>>>),
134    #[cfg(feature = "zstd")]
135    Zstd(Crc32Reader<ZstdDecoder<'a, io::BufReader<CryptoReader<'a>>>>),
136}
137
138impl<'a> Read for ZipFileReader<'a> {
139    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
140        match self {
141            ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
142            ZipFileReader::Raw(r) => r.read(buf),
143            ZipFileReader::Stored(r) => r.read(buf),
144            #[cfg(any(
145                feature = "deflate",
146                feature = "deflate-miniz",
147                feature = "deflate-zlib"
148            ))]
149            ZipFileReader::Deflated(r) => r.read(buf),
150            #[cfg(feature = "bzip2")]
151            ZipFileReader::Bzip2(r) => r.read(buf),
152            #[cfg(feature = "zstd")]
153            ZipFileReader::Zstd(r) => r.read(buf),
154        }
155    }
156}
157
158impl<'a> ZipFileReader<'a> {
159    /// Consumes this decoder, returning the underlying reader.
160    pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
161        match self {
162            ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
163            ZipFileReader::Raw(r) => r,
164            ZipFileReader::Stored(r) => r.into_inner().into_inner(),
165            #[cfg(any(
166                feature = "deflate",
167                feature = "deflate-miniz",
168                feature = "deflate-zlib"
169            ))]
170            ZipFileReader::Deflated(r) => r.into_inner().into_inner().into_inner(),
171            #[cfg(feature = "bzip2")]
172            ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(),
173            #[cfg(feature = "zstd")]
174            ZipFileReader::Zstd(r) => r.into_inner().finish().into_inner().into_inner(),
175        }
176    }
177}
178
179/// A struct for reading a zip file
180pub struct ZipFile<'a> {
181    data: Cow<'a, ZipFileData>,
182    crypto_reader: Option<CryptoReader<'a>>,
183    reader: ZipFileReader<'a>,
184}
185
186fn find_content<'a>(
187    data: &ZipFileData,
188    reader: &'a mut (impl Read + Seek),
189) -> ZipResult<io::Take<&'a mut dyn Read>> {
190    // Parse local header
191    reader.seek(io::SeekFrom::Start(data.header_start))?;
192    let signature = reader.read_u32::<LittleEndian>()?;
193    if signature != spec::LOCAL_FILE_HEADER_SIGNATURE {
194        return Err(ZipError::InvalidArchive("Invalid local file header"));
195    }
196
197    reader.seek(io::SeekFrom::Current(22))?;
198    let file_name_length = reader.read_u16::<LittleEndian>()? as u64;
199    let extra_field_length = reader.read_u16::<LittleEndian>()? as u64;
200    let magic_and_header = 4 + 22 + 2 + 2;
201    let data_start = data.header_start + magic_and_header + file_name_length + extra_field_length;
202    data.data_start.store(data_start);
203
204    reader.seek(io::SeekFrom::Start(data_start))?;
205    Ok((reader as &mut dyn Read).take(data.compressed_size))
206}
207
208#[allow(clippy::too_many_arguments)]
209fn make_crypto_reader<'a>(
210    compression_method: crate::compression::CompressionMethod,
211    crc32: u32,
212    last_modified_time: DateTime,
213    using_data_descriptor: bool,
214    reader: io::Take<&'a mut dyn io::Read>,
215    password: Option<&[u8]>,
216    aes_info: Option<(AesMode, AesVendorVersion)>,
217    #[cfg(feature = "aes-crypto")] compressed_size: u64,
218) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>> {
219    #[allow(deprecated)]
220    {
221        if let CompressionMethod::Unsupported(_) = compression_method {
222            return unsupported_zip_error("Compression method not supported");
223        }
224    }
225
226    let reader = match (password, aes_info) {
227        #[cfg(not(feature = "aes-crypto"))]
228        (Some(_), Some(_)) => {
229            return Err(ZipError::UnsupportedArchive(
230                "AES encrypted files cannot be decrypted without the aes-crypto feature.",
231            ))
232        }
233        #[cfg(feature = "aes-crypto")]
234        (Some(password), Some((aes_mode, vendor_version))) => {
235            match AesReader::new(reader, aes_mode, compressed_size).validate(password)? {
236                None => return Ok(Err(InvalidPassword)),
237                Some(r) => CryptoReader::Aes {
238                    reader: r,
239                    vendor_version,
240                },
241            }
242        }
243        (Some(password), None) => {
244            let validator = if using_data_descriptor {
245                ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart())
246            } else {
247                ZipCryptoValidator::PkzipCrc32(crc32)
248            };
249            match ZipCryptoReader::new(reader, password).validate(validator)? {
250                None => return Ok(Err(InvalidPassword)),
251                Some(r) => CryptoReader::ZipCrypto(r),
252            }
253        }
254        (None, Some(_)) => return Ok(Err(InvalidPassword)),
255        (None, None) => CryptoReader::Plaintext(reader),
256    };
257    Ok(Ok(reader))
258}
259
260fn make_reader(
261    compression_method: CompressionMethod,
262    crc32: u32,
263    reader: CryptoReader,
264) -> ZipFileReader {
265    let ae2_encrypted = reader.is_ae2_encrypted();
266
267    match compression_method {
268        CompressionMethod::Stored => {
269            ZipFileReader::Stored(Crc32Reader::new(reader, crc32, ae2_encrypted))
270        }
271        #[cfg(any(
272            feature = "deflate",
273            feature = "deflate-miniz",
274            feature = "deflate-zlib"
275        ))]
276        CompressionMethod::Deflated => {
277            let deflate_reader = DeflateDecoder::new(reader);
278            ZipFileReader::Deflated(Crc32Reader::new(deflate_reader, crc32, ae2_encrypted))
279        }
280        #[cfg(feature = "bzip2")]
281        CompressionMethod::Bzip2 => {
282            let bzip2_reader = BzDecoder::new(reader);
283            ZipFileReader::Bzip2(Crc32Reader::new(bzip2_reader, crc32, ae2_encrypted))
284        }
285        #[cfg(feature = "zstd")]
286        CompressionMethod::Zstd => {
287            let zstd_reader = ZstdDecoder::new(reader).unwrap();
288            ZipFileReader::Zstd(Crc32Reader::new(zstd_reader, crc32, ae2_encrypted))
289        }
290        _ => panic!("Compression method not supported"),
291    }
292}
293
294impl<R: Read + io::Seek> ZipArchive<R> {
295    /// Get the directory start offset and number of files. This is done in a
296    /// separate function to ease the control flow design.
297    pub(crate) fn get_directory_counts(
298        reader: &mut R,
299        footer: &spec::CentralDirectoryEnd,
300        cde_start_pos: u64,
301    ) -> ZipResult<(u64, u64, usize)> {
302        // See if there's a ZIP64 footer. The ZIP64 locator if present will
303        // have its signature 20 bytes in front of the standard footer. The
304        // standard footer, in turn, is 22+N bytes large, where N is the
305        // comment length. Therefore:
306        let zip64locator = if reader
307            .seek(io::SeekFrom::End(
308                -(20 + 22 + footer.zip_file_comment.len() as i64),
309            ))
310            .is_ok()
311        {
312            match spec::Zip64CentralDirectoryEndLocator::parse(reader) {
313                Ok(loc) => Some(loc),
314                Err(ZipError::InvalidArchive(_)) => {
315                    // No ZIP64 header; that's actually fine. We're done here.
316                    None
317                }
318                Err(e) => {
319                    // Yikes, a real problem
320                    return Err(e);
321                }
322            }
323        } else {
324            // Empty Zip files will have nothing else so this error might be fine. If
325            // not, we'll find out soon.
326            None
327        };
328
329        match zip64locator {
330            None => {
331                // Some zip files have data prepended to them, resulting in the
332                // offsets all being too small. Get the amount of error by comparing
333                // the actual file position we found the CDE at with the offset
334                // recorded in the CDE.
335                let archive_offset = cde_start_pos
336                    .checked_sub(footer.central_directory_size as u64)
337                    .and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
338                    .ok_or(ZipError::InvalidArchive(
339                        "Invalid central directory size or offset",
340                    ))?;
341
342                let directory_start = footer.central_directory_offset as u64 + archive_offset;
343                let number_of_files = footer.number_of_files_on_this_disk as usize;
344                Ok((archive_offset, directory_start, number_of_files))
345            }
346            Some(locator64) => {
347                // If we got here, this is indeed a ZIP64 file.
348
349                if footer.disk_number as u32 != locator64.disk_with_central_directory {
350                    return unsupported_zip_error(
351                        "Support for multi-disk files is not implemented",
352                    );
353                }
354
355                // We need to reassess `archive_offset`. We know where the ZIP64
356                // central-directory-end structure *should* be, but unfortunately we
357                // don't know how to precisely relate that location to our current
358                // actual offset in the file, since there may be junk at its
359                // beginning. Therefore we need to perform another search, as in
360                // read::CentralDirectoryEnd::find_and_parse, except now we search
361                // forward.
362
363                let search_upper_bound = cde_start_pos
364                    .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
365                    .ok_or(ZipError::InvalidArchive(
366                        "File cannot contain ZIP64 central directory end",
367                    ))?;
368                let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
369                    reader,
370                    locator64.end_of_central_directory_offset,
371                    search_upper_bound,
372                )?;
373
374                if footer.disk_number != footer.disk_with_central_directory {
375                    return unsupported_zip_error(
376                        "Support for multi-disk files is not implemented",
377                    );
378                }
379
380                let directory_start = footer
381                    .central_directory_offset
382                    .checked_add(archive_offset)
383                    .ok_or({
384                        ZipError::InvalidArchive("Invalid central directory size or offset")
385                    })?;
386
387                Ok((
388                    archive_offset,
389                    directory_start,
390                    footer.number_of_files as usize,
391                ))
392            }
393        }
394    }
395
396    /// Read a ZIP archive, collecting the files it contains
397    ///
398    /// This uses the central directory record of the ZIP file, and ignores local file headers
399    pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
400        let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?;
401
402        if footer.disk_number != footer.disk_with_central_directory {
403            return unsupported_zip_error("Support for multi-disk files is not implemented");
404        }
405
406        let (archive_offset, directory_start, number_of_files) =
407            Self::get_directory_counts(&mut reader, &footer, cde_start_pos)?;
408
409        // If the parsed number of files is greater than the offset then
410        // something fishy is going on and we shouldn't trust number_of_files.
411        let file_capacity = if number_of_files > cde_start_pos as usize {
412            0
413        } else {
414            number_of_files
415        };
416
417        let mut files = Vec::with_capacity(file_capacity);
418        let mut names_map = HashMap::with_capacity(file_capacity);
419
420        if reader.seek(io::SeekFrom::Start(directory_start)).is_err() {
421            return Err(ZipError::InvalidArchive(
422                "Could not seek to start of central directory",
423            ));
424        }
425
426        for _ in 0..number_of_files {
427            let file = central_header_to_zip_file(&mut reader, archive_offset)?;
428            names_map.insert(file.file_name.clone(), files.len());
429            files.push(file);
430        }
431
432        let shared = Arc::new(zip_archive::Shared {
433            files,
434            names_map,
435            offset: archive_offset,
436            comment: footer.zip_file_comment,
437        });
438
439        Ok(ZipArchive { reader, shared })
440    }
441    /// Extract a Zip archive into a directory, overwriting files if they
442    /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
443    ///
444    /// Extraction is not atomic; If an error is encountered, some of the files
445    /// may be left on disk.
446    pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
447        use std::fs;
448
449        for i in 0..self.len() {
450            let mut file = self.by_index(i)?;
451            let filepath = file
452                .enclosed_name()
453                .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
454
455            let outpath = directory.as_ref().join(filepath);
456
457            if file.name().ends_with('/') {
458                fs::create_dir_all(&outpath)?;
459            } else {
460                if let Some(p) = outpath.parent() {
461                    if !p.exists() {
462                        fs::create_dir_all(&p)?;
463                    }
464                }
465                let mut outfile = fs::File::create(&outpath)?;
466                io::copy(&mut file, &mut outfile)?;
467            }
468            // Get and Set permissions
469            #[cfg(unix)]
470            {
471                use std::os::unix::fs::PermissionsExt;
472                if let Some(mode) = file.unix_mode() {
473                    fs::set_permissions(&outpath, fs::Permissions::from_mode(mode))?;
474                }
475            }
476        }
477        Ok(())
478    }
479
480    /// Number of files contained in this zip.
481    pub fn len(&self) -> usize {
482        self.shared.files.len()
483    }
484
485    /// Whether this zip archive contains no files
486    pub fn is_empty(&self) -> bool {
487        self.len() == 0
488    }
489
490    /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
491    ///
492    /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
493    /// of that prepended data.
494    pub fn offset(&self) -> u64 {
495        self.shared.offset
496    }
497
498    /// Get the comment of the zip archive.
499    pub fn comment(&self) -> &[u8] {
500        &self.shared.comment
501    }
502
503    /// Returns an iterator over all the file and directory names in this archive.
504    pub fn file_names(&self) -> impl Iterator<Item = &str> {
505        self.shared.names_map.keys().map(|s| s.as_str())
506    }
507
508    /// Search for a file entry by name, decrypt with given password
509    ///
510    /// # Warning
511    ///
512    /// The implementation of the cryptographic algorithms has not
513    /// gone through a correctness review, and you should assume it is insecure:
514    /// passwords used with this API may be compromised.
515    ///
516    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
517    /// to check for a 1/256 chance that the password is correct.
518    /// There are many passwords out there that will also pass the validity checks
519    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
520    /// due to its fairly primitive approach to cryptography.
521    pub fn by_name_decrypt<'a>(
522        &'a mut self,
523        name: &str,
524        password: &[u8],
525    ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
526        self.by_name_with_optional_password(name, Some(password))
527    }
528
529    /// Search for a file entry by name
530    pub fn by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>> {
531        Ok(self.by_name_with_optional_password(name, None)?.unwrap())
532    }
533
534    fn by_name_with_optional_password<'a>(
535        &'a mut self,
536        name: &str,
537        password: Option<&[u8]>,
538    ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
539        let index = match self.shared.names_map.get(name) {
540            Some(index) => *index,
541            None => {
542                return Err(ZipError::FileNotFound);
543            }
544        };
545        self.by_index_with_optional_password(index, password)
546    }
547
548    /// Get a contained file by index, decrypt with given password
549    ///
550    /// # Warning
551    ///
552    /// The implementation of the cryptographic algorithms has not
553    /// gone through a correctness review, and you should assume it is insecure:
554    /// passwords used with this API may be compromised.
555    ///
556    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
557    /// to check for a 1/256 chance that the password is correct.
558    /// There are many passwords out there that will also pass the validity checks
559    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
560    /// due to its fairly primitive approach to cryptography.
561    pub fn by_index_decrypt<'a>(
562        &'a mut self,
563        file_number: usize,
564        password: &[u8],
565    ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
566        self.by_index_with_optional_password(file_number, Some(password))
567    }
568
569    /// Get a contained file by index
570    pub fn by_index(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
571        Ok(self
572            .by_index_with_optional_password(file_number, None)?
573            .unwrap())
574    }
575
576    /// Get a contained file by index without decompressing it
577    pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
578        let reader = &mut self.reader;
579        self.shared
580            .files
581            .get(file_number)
582            .ok_or(ZipError::FileNotFound)
583            .and_then(move |data| {
584                Ok(ZipFile {
585                    crypto_reader: None,
586                    reader: ZipFileReader::Raw(find_content(data, reader)?),
587                    data: Cow::Borrowed(data),
588                })
589            })
590    }
591
592    fn by_index_with_optional_password<'a>(
593        &'a mut self,
594        file_number: usize,
595        mut password: Option<&[u8]>,
596    ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
597        let data = self
598            .shared
599            .files
600            .get(file_number)
601            .ok_or(ZipError::FileNotFound)?;
602
603        match (password, data.encrypted) {
604            (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)),
605            (Some(_), false) => password = None, //Password supplied, but none needed! Discard.
606            _ => {}
607        }
608        let limit_reader = find_content(data, &mut self.reader)?;
609
610        match make_crypto_reader(
611            data.compression_method,
612            data.crc32,
613            data.last_modified_time,
614            data.using_data_descriptor,
615            limit_reader,
616            password,
617            data.aes_mode,
618            #[cfg(feature = "aes-crypto")]
619            data.compressed_size,
620        ) {
621            Ok(Ok(crypto_reader)) => Ok(Ok(ZipFile {
622                crypto_reader: Some(crypto_reader),
623                reader: ZipFileReader::NoReader,
624                data: Cow::Borrowed(data),
625            })),
626            Err(e) => Err(e),
627            Ok(Err(e)) => Ok(Err(e)),
628        }
629    }
630
631    /// Unwrap and return the inner reader object
632    ///
633    /// The position of the reader is undefined.
634    pub fn into_inner(self) -> R {
635        self.reader
636    }
637}
638
639fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
640    Err(ZipError::UnsupportedArchive(detail))
641}
642
643/// Parse a central directory entry to collect the information for the file.
644pub(crate) fn central_header_to_zip_file<R: Read + io::Seek>(
645    reader: &mut R,
646    archive_offset: u64,
647) -> ZipResult<ZipFileData> {
648    let central_header_start = reader.stream_position()?;
649
650    // Parse central header
651    let signature = reader.read_u32::<LittleEndian>()?;
652    if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
653        Err(ZipError::InvalidArchive("Invalid Central Directory header"))
654    } else {
655        central_header_to_zip_file_inner(reader, archive_offset, central_header_start)
656    }
657}
658
659/// Parse a central directory entry to collect the information for the file.
660fn central_header_to_zip_file_inner<R: Read>(
661    reader: &mut R,
662    archive_offset: u64,
663    central_header_start: u64,
664) -> ZipResult<ZipFileData> {
665    let version_made_by = reader.read_u16::<LittleEndian>()?;
666    let _version_to_extract = reader.read_u16::<LittleEndian>()?;
667    let flags = reader.read_u16::<LittleEndian>()?;
668    let encrypted = flags & 1 == 1;
669    let is_utf8 = flags & (1 << 11) != 0;
670    let using_data_descriptor = flags & (1 << 3) != 0;
671    let compression_method = reader.read_u16::<LittleEndian>()?;
672    let last_mod_time = reader.read_u16::<LittleEndian>()?;
673    let last_mod_date = reader.read_u16::<LittleEndian>()?;
674    let crc32 = reader.read_u32::<LittleEndian>()?;
675    let compressed_size = reader.read_u32::<LittleEndian>()?;
676    let uncompressed_size = reader.read_u32::<LittleEndian>()?;
677    let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
678    let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
679    let file_comment_length = reader.read_u16::<LittleEndian>()? as usize;
680    let _disk_number = reader.read_u16::<LittleEndian>()?;
681    let _internal_file_attributes = reader.read_u16::<LittleEndian>()?;
682    let external_file_attributes = reader.read_u32::<LittleEndian>()?;
683    let offset = reader.read_u32::<LittleEndian>()? as u64;
684    let mut file_name_raw = vec![0; file_name_length];
685    reader.read_exact(&mut file_name_raw)?;
686    let mut extra_field = vec![0; extra_field_length];
687    reader.read_exact(&mut extra_field)?;
688    let mut file_comment_raw = vec![0; file_comment_length];
689    reader.read_exact(&mut file_comment_raw)?;
690
691    let file_name = match is_utf8 {
692        true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
693        false => file_name_raw.clone().from_cp437(),
694    };
695    let file_comment = match is_utf8 {
696        true => String::from_utf8_lossy(&*file_comment_raw).into_owned(),
697        false => file_comment_raw.from_cp437(),
698    };
699
700    // Construct the result
701    let mut result = ZipFileData {
702        system: System::from_u8((version_made_by >> 8) as u8),
703        version_made_by: version_made_by as u8,
704        encrypted,
705        using_data_descriptor,
706        compression_method: {
707            #[allow(deprecated)]
708            CompressionMethod::from_u16(compression_method)
709        },
710        compression_level: None,
711        last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
712        crc32,
713        compressed_size: compressed_size as u64,
714        uncompressed_size: uncompressed_size as u64,
715        file_name,
716        file_name_raw,
717        extra_field,
718        file_comment,
719        header_start: offset,
720        central_header_start,
721        data_start: AtomicU64::new(0),
722        external_attributes: external_file_attributes,
723        large_file: false,
724        aes_mode: None,
725    };
726
727    match parse_extra_field(&mut result) {
728        Ok(..) | Err(ZipError::Io(..)) => {}
729        Err(e) => return Err(e),
730    }
731
732    let aes_enabled = result.compression_method == CompressionMethod::AES;
733    if aes_enabled && result.aes_mode.is_none() {
734        return Err(ZipError::InvalidArchive(
735            "AES encryption without AES extra data field",
736        ));
737    }
738
739    // Account for shifted zip offsets.
740    result.header_start = result
741        .header_start
742        .checked_add(archive_offset)
743        .ok_or(ZipError::InvalidArchive("Archive header is too large"))?;
744
745    Ok(result)
746}
747
748fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
749    let mut reader = io::Cursor::new(&file.extra_field);
750
751    while (reader.position() as usize) < file.extra_field.len() {
752        let kind = reader.read_u16::<LittleEndian>()?;
753        let len = reader.read_u16::<LittleEndian>()?;
754        let mut len_left = len as i64;
755        match kind {
756            // Zip64 extended information extra field
757            0x0001 => {
758                if file.uncompressed_size == spec::ZIP64_BYTES_THR {
759                    file.large_file = true;
760                    file.uncompressed_size = reader.read_u64::<LittleEndian>()?;
761                    len_left -= 8;
762                }
763                if file.compressed_size == spec::ZIP64_BYTES_THR {
764                    file.large_file = true;
765                    file.compressed_size = reader.read_u64::<LittleEndian>()?;
766                    len_left -= 8;
767                }
768                if file.header_start == spec::ZIP64_BYTES_THR {
769                    file.header_start = reader.read_u64::<LittleEndian>()?;
770                    len_left -= 8;
771                }
772            }
773            0x9901 => {
774                // AES
775                if len != 7 {
776                    return Err(ZipError::UnsupportedArchive(
777                        "AES extra data field has an unsupported length",
778                    ));
779                }
780                let vendor_version = reader.read_u16::<LittleEndian>()?;
781                let vendor_id = reader.read_u16::<LittleEndian>()?;
782                let aes_mode = reader.read_u8()?;
783                let compression_method = reader.read_u16::<LittleEndian>()?;
784
785                if vendor_id != 0x4541 {
786                    return Err(ZipError::InvalidArchive("Invalid AES vendor"));
787                }
788                let vendor_version = match vendor_version {
789                    0x0001 => AesVendorVersion::Ae1,
790                    0x0002 => AesVendorVersion::Ae2,
791                    _ => return Err(ZipError::InvalidArchive("Invalid AES vendor version")),
792                };
793                match aes_mode {
794                    0x01 => file.aes_mode = Some((AesMode::Aes128, vendor_version)),
795                    0x02 => file.aes_mode = Some((AesMode::Aes192, vendor_version)),
796                    0x03 => file.aes_mode = Some((AesMode::Aes256, vendor_version)),
797                    _ => return Err(ZipError::InvalidArchive("Invalid AES encryption strength")),
798                };
799                file.compression_method = {
800                    #[allow(deprecated)]
801                    CompressionMethod::from_u16(compression_method)
802                };
803            }
804            _ => {
805                // Other fields are ignored
806            }
807        }
808
809        // We could also check for < 0 to check for errors
810        if len_left > 0 {
811            reader.seek(io::SeekFrom::Current(len_left))?;
812        }
813    }
814    Ok(())
815}
816
817/// Methods for retrieving information on zip files
818impl<'a> ZipFile<'a> {
819    fn get_reader(&mut self) -> &mut ZipFileReader<'a> {
820        if let ZipFileReader::NoReader = self.reader {
821            let data = &self.data;
822            let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
823            self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)
824        }
825        &mut self.reader
826    }
827
828    pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read {
829        if let ZipFileReader::NoReader = self.reader {
830            let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
831            self.reader = ZipFileReader::Raw(crypto_reader.into_inner())
832        }
833        &mut self.reader
834    }
835
836    /// Get the version of the file
837    pub fn version_made_by(&self) -> (u8, u8) {
838        (
839            self.data.version_made_by / 10,
840            self.data.version_made_by % 10,
841        )
842    }
843
844    /// Get the name of the file
845    ///
846    /// # Warnings
847    ///
848    /// It is dangerous to use this name directly when extracting an archive.
849    /// It may contain an absolute path (`/etc/shadow`), or break out of the
850    /// current directory (`../runtime`). Carelessly writing to these paths
851    /// allows an attacker to craft a ZIP archive that will overwrite critical
852    /// files.
853    ///
854    /// You can use the [`ZipFile::enclosed_name`] method to validate the name
855    /// as a safe path.
856    pub fn name(&self) -> &str {
857        &self.data.file_name
858    }
859
860    /// Get the name of the file, in the raw (internal) byte representation.
861    ///
862    /// The encoding of this data is currently undefined.
863    pub fn name_raw(&self) -> &[u8] {
864        &self.data.file_name_raw
865    }
866
867    /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
868    /// removes a leading '/' and removes '..' parts.
869    #[deprecated(
870        since = "0.5.7",
871        note = "by stripping `..`s from the path, the meaning of paths can change.
872                `mangled_name` can be used if this behaviour is desirable"
873    )]
874    pub fn sanitized_name(&self) -> ::std::path::PathBuf {
875        self.mangled_name()
876    }
877
878    /// Rewrite the path, ignoring any path components with special meaning.
879    ///
880    /// - Absolute paths are made relative
881    /// - [`ParentDir`]s are ignored
882    /// - Truncates the filename at a NULL byte
883    ///
884    /// This is appropriate if you need to be able to extract *something* from
885    /// any archive, but will easily misrepresent trivial paths like
886    /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
887    /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
888    ///
889    /// [`ParentDir`]: `Component::ParentDir`
890    pub fn mangled_name(&self) -> ::std::path::PathBuf {
891        self.data.file_name_sanitized()
892    }
893
894    /// Ensure the file path is safe to use as a [`Path`].
895    ///
896    /// - It can't contain NULL bytes
897    /// - It can't resolve to a path outside the current directory
898    ///   > `foo/../bar` is fine, `foo/../../bar` is not.
899    /// - It can't be an absolute path
900    ///
901    /// This will read well-formed ZIP files correctly, and is resistant
902    /// to path-based exploits. It is recommended over
903    /// [`ZipFile::mangled_name`].
904    pub fn enclosed_name(&self) -> Option<&Path> {
905        self.data.enclosed_name()
906    }
907
908    /// Get the comment of the file
909    pub fn comment(&self) -> &str {
910        &self.data.file_comment
911    }
912
913    /// Get the compression method used to store the file
914    pub fn compression(&self) -> CompressionMethod {
915        self.data.compression_method
916    }
917
918    /// Get the size of the file in the archive
919    pub fn compressed_size(&self) -> u64 {
920        self.data.compressed_size
921    }
922
923    /// Get the size of the file when uncompressed
924    pub fn size(&self) -> u64 {
925        self.data.uncompressed_size
926    }
927
928    /// Get the time the file was last modified
929    pub fn last_modified(&self) -> DateTime {
930        self.data.last_modified_time
931    }
932    /// Returns whether the file is actually a directory
933    pub fn is_dir(&self) -> bool {
934        self.name()
935            .chars()
936            .rev()
937            .next()
938            .map_or(false, |c| c == '/' || c == '\\')
939    }
940
941    /// Returns whether the file is a regular file
942    pub fn is_file(&self) -> bool {
943        !self.is_dir()
944    }
945
946    /// Get unix mode for the file
947    pub fn unix_mode(&self) -> Option<u32> {
948        self.data.unix_mode()
949    }
950
951    /// Get the CRC32 hash of the original file
952    pub fn crc32(&self) -> u32 {
953        self.data.crc32
954    }
955
956    /// Get the extra data of the zip header for this file
957    pub fn extra_data(&self) -> &[u8] {
958        &self.data.extra_field
959    }
960
961    /// Get the starting offset of the data of the compressed file
962    pub fn data_start(&self) -> u64 {
963        self.data.data_start.load()
964    }
965
966    /// Get the starting offset of the zip header for this file
967    pub fn header_start(&self) -> u64 {
968        self.data.header_start
969    }
970    /// Get the starting offset of the zip header in the central directory for this file
971    pub fn central_header_start(&self) -> u64 {
972        self.data.central_header_start
973    }
974}
975
976impl<'a> Read for ZipFile<'a> {
977    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
978        self.get_reader().read(buf)
979    }
980}
981
982impl<'a> Drop for ZipFile<'a> {
983    fn drop(&mut self) {
984        // self.data is Owned, this reader is constructed by a streaming reader.
985        // In this case, we want to exhaust the reader so that the next file is accessible.
986        if let Cow::Owned(_) = self.data {
987            let mut buffer = [0; 1 << 16];
988
989            // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
990            let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader {
991                ZipFileReader::NoReader => {
992                    let innerreader = ::std::mem::replace(&mut self.crypto_reader, None);
993                    innerreader.expect("Invalid reader state").into_inner()
994                }
995                reader => {
996                    let innerreader = ::std::mem::replace(reader, ZipFileReader::NoReader);
997                    innerreader.into_inner()
998                }
999            };
1000
1001            loop {
1002                match reader.read(&mut buffer) {
1003                    Ok(0) => break,
1004                    Ok(_) => (),
1005                    Err(e) => panic!(
1006                        "Could not consume all of the output of the current ZipFile: {:?}",
1007                        e
1008                    ),
1009                }
1010            }
1011        }
1012    }
1013}
1014
1015/// **It is recommended to use [`stream`] for its highlevel API instead.**
1016///
1017/// Read ZipFile structures from a non-seekable reader.
1018///
1019/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
1020/// as some information will be missing when reading this manner.
1021///
1022/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
1023/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
1024/// is encountered. No more files should be read after this.
1025///
1026/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
1027/// the structure is done.
1028///
1029/// Missing fields are:
1030/// * `comment`: set to an empty string
1031/// * `data_start`: set to 0
1032/// * `external_attributes`: `unix_mode()`: will return None
1033pub fn read_zipfile_from_stream<'a, R: io::Read>(
1034    reader: &'a mut R,
1035) -> ZipResult<Option<ZipFile<'_>>> {
1036    let signature = reader.read_u32::<LittleEndian>()?;
1037
1038    match signature {
1039        spec::LOCAL_FILE_HEADER_SIGNATURE => (),
1040        spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
1041        _ => return Err(ZipError::InvalidArchive("Invalid local file header")),
1042    }
1043
1044    let version_made_by = reader.read_u16::<LittleEndian>()?;
1045    let flags = reader.read_u16::<LittleEndian>()?;
1046    let encrypted = flags & 1 == 1;
1047    let is_utf8 = flags & (1 << 11) != 0;
1048    let using_data_descriptor = flags & (1 << 3) != 0;
1049    #[allow(deprecated)]
1050    let compression_method = CompressionMethod::from_u16(reader.read_u16::<LittleEndian>()?);
1051    let last_mod_time = reader.read_u16::<LittleEndian>()?;
1052    let last_mod_date = reader.read_u16::<LittleEndian>()?;
1053    let crc32 = reader.read_u32::<LittleEndian>()?;
1054    let compressed_size = reader.read_u32::<LittleEndian>()?;
1055    let uncompressed_size = reader.read_u32::<LittleEndian>()?;
1056    let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
1057    let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
1058
1059    let mut file_name_raw = vec![0; file_name_length];
1060    reader.read_exact(&mut file_name_raw)?;
1061    let mut extra_field = vec![0; extra_field_length];
1062    reader.read_exact(&mut extra_field)?;
1063
1064    let file_name = match is_utf8 {
1065        true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
1066        false => file_name_raw.clone().from_cp437(),
1067    };
1068
1069    let mut result = ZipFileData {
1070        system: System::from_u8((version_made_by >> 8) as u8),
1071        version_made_by: version_made_by as u8,
1072        encrypted,
1073        using_data_descriptor,
1074        compression_method,
1075        compression_level: None,
1076        last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
1077        crc32,
1078        compressed_size: compressed_size as u64,
1079        uncompressed_size: uncompressed_size as u64,
1080        file_name,
1081        file_name_raw,
1082        extra_field,
1083        file_comment: String::new(), // file comment is only available in the central directory
1084        // header_start and data start are not available, but also don't matter, since seeking is
1085        // not available.
1086        header_start: 0,
1087        data_start: AtomicU64::new(0),
1088        central_header_start: 0,
1089        // The external_attributes field is only available in the central directory.
1090        // We set this to zero, which should be valid as the docs state 'If input came
1091        // from standard input, this field is set to zero.'
1092        external_attributes: 0,
1093        large_file: false,
1094        aes_mode: None,
1095    };
1096
1097    match parse_extra_field(&mut result) {
1098        Ok(..) | Err(ZipError::Io(..)) => {}
1099        Err(e) => return Err(e),
1100    }
1101
1102    if encrypted {
1103        return unsupported_zip_error("Encrypted files are not supported");
1104    }
1105    if using_data_descriptor {
1106        return unsupported_zip_error("The file length is not available in the local header");
1107    }
1108
1109    let limit_reader = (reader as &'a mut dyn io::Read).take(result.compressed_size as u64);
1110
1111    let result_crc32 = result.crc32;
1112    let result_compression_method = result.compression_method;
1113    let crypto_reader = make_crypto_reader(
1114        result_compression_method,
1115        result_crc32,
1116        result.last_modified_time,
1117        result.using_data_descriptor,
1118        limit_reader,
1119        None,
1120        None,
1121        #[cfg(feature = "aes-crypto")]
1122        result.compressed_size,
1123    )?
1124    .unwrap();
1125
1126    Ok(Some(ZipFile {
1127        data: Cow::Owned(result),
1128        crypto_reader: None,
1129        reader: make_reader(result_compression_method, result_crc32, crypto_reader),
1130    }))
1131}
1132
1133#[cfg(test)]
1134mod test {
1135    #[test]
1136    fn invalid_offset() {
1137        use super::ZipArchive;
1138        use std::io;
1139
1140        let mut v = Vec::new();
1141        v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip"));
1142        let reader = ZipArchive::new(io::Cursor::new(v));
1143        assert!(reader.is_err());
1144    }
1145
1146    #[test]
1147    fn invalid_offset2() {
1148        use super::ZipArchive;
1149        use std::io;
1150
1151        let mut v = Vec::new();
1152        v.extend_from_slice(include_bytes!("../tests/data/invalid_offset2.zip"));
1153        let reader = ZipArchive::new(io::Cursor::new(v));
1154        assert!(reader.is_err());
1155    }
1156
1157    #[test]
1158    fn zip64_with_leading_junk() {
1159        use super::ZipArchive;
1160        use std::io;
1161
1162        let mut v = Vec::new();
1163        v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
1164        let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
1165        assert_eq!(reader.len(), 1);
1166    }
1167
1168    #[test]
1169    fn zip_contents() {
1170        use super::ZipArchive;
1171        use std::io;
1172
1173        let mut v = Vec::new();
1174        v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1175        let mut reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
1176        assert_eq!(reader.comment(), b"");
1177        assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77);
1178    }
1179
1180    #[test]
1181    fn zip_read_streaming() {
1182        use super::read_zipfile_from_stream;
1183        use std::io;
1184
1185        let mut v = Vec::new();
1186        v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1187        let mut reader = io::Cursor::new(v);
1188        loop {
1189            if read_zipfile_from_stream(&mut reader).unwrap().is_none() {
1190                break;
1191            }
1192        }
1193    }
1194
1195    #[test]
1196    fn zip_clone() {
1197        use super::ZipArchive;
1198        use std::io::{self, Read};
1199
1200        let mut v = Vec::new();
1201        v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1202        let mut reader1 = ZipArchive::new(io::Cursor::new(v)).unwrap();
1203        let mut reader2 = reader1.clone();
1204
1205        let mut file1 = reader1.by_index(0).unwrap();
1206        let mut file2 = reader2.by_index(0).unwrap();
1207
1208        let t = file1.last_modified();
1209        assert_eq!(
1210            (
1211                t.year(),
1212                t.month(),
1213                t.day(),
1214                t.hour(),
1215                t.minute(),
1216                t.second()
1217            ),
1218            (1980, 1, 1, 0, 0, 0)
1219        );
1220
1221        let mut buf1 = [0; 5];
1222        let mut buf2 = [0; 5];
1223        let mut buf3 = [0; 5];
1224        let mut buf4 = [0; 5];
1225
1226        file1.read_exact(&mut buf1).unwrap();
1227        file2.read_exact(&mut buf2).unwrap();
1228        file1.read_exact(&mut buf3).unwrap();
1229        file2.read_exact(&mut buf4).unwrap();
1230
1231        assert_eq!(buf1, buf2);
1232        assert_eq!(buf3, buf4);
1233        assert_ne!(buf1, buf3);
1234    }
1235
1236    #[test]
1237    fn file_and_dir_predicates() {
1238        use super::ZipArchive;
1239        use std::io;
1240
1241        let mut v = Vec::new();
1242        v.extend_from_slice(include_bytes!("../tests/data/files_and_dirs.zip"));
1243        let mut zip = ZipArchive::new(io::Cursor::new(v)).unwrap();
1244
1245        for i in 0..zip.len() {
1246            let zip_file = zip.by_index(i).unwrap();
1247            let full_name = zip_file.enclosed_name().unwrap();
1248            let file_name = full_name.file_name().unwrap().to_str().unwrap();
1249            assert!(
1250                (file_name.starts_with("dir") && zip_file.is_dir())
1251                    || (file_name.starts_with("file") && zip_file.is_file())
1252            );
1253        }
1254    }
1255
1256    /// test case to ensure we don't preemptively over allocate based on the
1257    /// declared number of files in the CDE of an invalid zip when the number of
1258    /// files declared is more than the alleged offset in the CDE
1259    #[test]
1260    fn invalid_cde_number_of_files_allocation_smaller_offset() {
1261        use super::ZipArchive;
1262        use std::io;
1263
1264        let mut v = Vec::new();
1265        v.extend_from_slice(include_bytes!(
1266            "../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
1267        ));
1268        let reader = ZipArchive::new(io::Cursor::new(v));
1269        assert!(reader.is_err());
1270    }
1271
1272    /// test case to ensure we don't preemptively over allocate based on the
1273    /// declared number of files in the CDE of an invalid zip when the number of
1274    /// files declared is less than the alleged offset in the CDE
1275    #[test]
1276    fn invalid_cde_number_of_files_allocation_greater_offset() {
1277        use super::ZipArchive;
1278        use std::io;
1279
1280        let mut v = Vec::new();
1281        v.extend_from_slice(include_bytes!(
1282            "../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
1283        ));
1284        let reader = ZipArchive::new(io::Cursor::new(v));
1285        assert!(reader.is_err());
1286    }
1287}