zip/
read.rs

1//! Types for reading ZIP archives
2
3#[cfg(feature = "aes-crypto")]
4use crate::aes::{AesReader, AesReaderValid};
5use crate::compression::{CompressionMethod, Decompressor};
6use crate::cp437::FromCp437;
7use crate::crc32::Crc32Reader;
8use crate::extra_fields::{ExtendedTimestamp, ExtraField, Ntfs};
9use crate::read::zip_archive::{Shared, SharedBuilder};
10use crate::result::invalid;
11use crate::result::{ZipError, ZipResult};
12use crate::spec::{self, CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, Pod};
13use crate::types::{
14    AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData,
15    ZipLocalEntryBlock,
16};
17use crate::write::SimpleFileOptions;
18use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
19use crate::ZIP64_BYTES_THR;
20use indexmap::IndexMap;
21use std::borrow::Cow;
22use std::ffi::OsStr;
23use std::fs::create_dir_all;
24use std::io::{self, copy, prelude::*, sink, SeekFrom};
25use std::mem;
26use std::mem::size_of;
27use std::ops::{Deref, Range};
28use std::path::{Component, Path, PathBuf};
29use std::sync::{Arc, OnceLock};
30
31mod config;
32
33pub use config::*;
34
35/// Provides high level API for reading from a stream.
36pub(crate) mod stream;
37
38pub(crate) mod magic_finder;
39
40// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
41pub(crate) mod zip_archive {
42    use indexmap::IndexMap;
43    use std::sync::Arc;
44
45    /// Extract immutable data from `ZipArchive` to make it cheap to clone
46    #[derive(Debug)]
47    pub(crate) struct Shared {
48        pub(crate) files: IndexMap<Box<str>, super::ZipFileData>,
49        pub(super) offset: u64,
50        pub(super) dir_start: u64,
51        // This isn't yet used anywhere, but it is here for use cases in the future.
52        #[allow(dead_code)]
53        pub(super) config: super::Config,
54        pub(crate) comment: Box<[u8]>,
55        pub(crate) zip64_comment: Option<Box<[u8]>>,
56    }
57
58    #[derive(Debug)]
59    pub(crate) struct SharedBuilder {
60        pub(crate) files: Vec<super::ZipFileData>,
61        pub(super) offset: u64,
62        pub(super) dir_start: u64,
63        // This isn't yet used anywhere, but it is here for use cases in the future.
64        #[allow(dead_code)]
65        pub(super) config: super::Config,
66    }
67
68    impl SharedBuilder {
69        pub fn build(self, comment: Box<[u8]>, zip64_comment: Option<Box<[u8]>>) -> Shared {
70            let mut index_map = IndexMap::with_capacity(self.files.len());
71            self.files.into_iter().for_each(|file| {
72                index_map.insert(file.file_name.clone(), file);
73            });
74            Shared {
75                files: index_map,
76                offset: self.offset,
77                dir_start: self.dir_start,
78                config: self.config,
79                comment,
80                zip64_comment,
81            }
82        }
83    }
84
85    /// ZIP archive reader
86    ///
87    /// At the moment, this type is cheap to clone if this is the case for the
88    /// reader it uses. However, this is not guaranteed by this crate and it may
89    /// change in the future.
90    ///
91    /// ```no_run
92    /// use std::io::prelude::*;
93    /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
94    ///     use zip::HasZipMetadata;
95    ///     let mut zip = zip::ZipArchive::new(reader)?;
96    ///
97    ///     for i in 0..zip.len() {
98    ///         let mut file = zip.by_index(i)?;
99    ///         println!("Filename: {}", file.name());
100    ///         std::io::copy(&mut file, &mut std::io::stdout())?;
101    ///     }
102    ///
103    ///     Ok(())
104    /// }
105    /// ```
106    #[derive(Clone, Debug)]
107    pub struct ZipArchive<R> {
108        pub(super) reader: R,
109        pub(super) shared: Arc<Shared>,
110    }
111}
112
113#[cfg(feature = "aes-crypto")]
114use crate::aes::PWD_VERIFY_LENGTH;
115use crate::extra_fields::UnicodeExtraField;
116use crate::result::ZipError::InvalidPassword;
117use crate::spec::is_dir;
118use crate::types::ffi::{S_IFLNK, S_IFREG};
119use crate::unstable::{path_to_string, LittleEndianReadExt};
120pub use zip_archive::ZipArchive;
121
122#[allow(clippy::large_enum_variant)]
123pub(crate) enum CryptoReader<'a, R: Read> {
124    Plaintext(io::Take<&'a mut R>),
125    ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut R>>),
126    #[cfg(feature = "aes-crypto")]
127    Aes {
128        reader: AesReaderValid<io::Take<&'a mut R>>,
129        vendor_version: AesVendorVersion,
130    },
131}
132
133impl<R: Read> Read for CryptoReader<'_, R> {
134    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
135        match self {
136            CryptoReader::Plaintext(r) => r.read(buf),
137            CryptoReader::ZipCrypto(r) => r.read(buf),
138            #[cfg(feature = "aes-crypto")]
139            CryptoReader::Aes { reader: r, .. } => r.read(buf),
140        }
141    }
142
143    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
144        match self {
145            CryptoReader::Plaintext(r) => r.read_to_end(buf),
146            CryptoReader::ZipCrypto(r) => r.read_to_end(buf),
147            #[cfg(feature = "aes-crypto")]
148            CryptoReader::Aes { reader: r, .. } => r.read_to_end(buf),
149        }
150    }
151
152    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
153        match self {
154            CryptoReader::Plaintext(r) => r.read_to_string(buf),
155            CryptoReader::ZipCrypto(r) => r.read_to_string(buf),
156            #[cfg(feature = "aes-crypto")]
157            CryptoReader::Aes { reader: r, .. } => r.read_to_string(buf),
158        }
159    }
160}
161
162impl<'a, R: Read> CryptoReader<'a, R> {
163    /// Consumes this decoder, returning the underlying reader.
164    pub fn into_inner(self) -> io::Take<&'a mut R> {
165        match self {
166            CryptoReader::Plaintext(r) => r,
167            CryptoReader::ZipCrypto(r) => r.into_inner(),
168            #[cfg(feature = "aes-crypto")]
169            CryptoReader::Aes { reader: r, .. } => r.into_inner(),
170        }
171    }
172
173    /// Returns `true` if the data is encrypted using AE2.
174    pub const fn is_ae2_encrypted(&self) -> bool {
175        #[cfg(feature = "aes-crypto")]
176        return matches!(
177            self,
178            CryptoReader::Aes {
179                vendor_version: AesVendorVersion::Ae2,
180                ..
181            }
182        );
183        #[cfg(not(feature = "aes-crypto"))]
184        false
185    }
186}
187
188#[cold]
189fn invalid_state<T>() -> io::Result<T> {
190    Err(io::Error::other("ZipFileReader was in an invalid state"))
191}
192
193pub(crate) enum ZipFileReader<'a, R: Read> {
194    NoReader,
195    Raw(io::Take<&'a mut R>),
196    Compressed(Box<Crc32Reader<Decompressor<io::BufReader<CryptoReader<'a, R>>>>>),
197}
198
199impl<R: Read> Read for ZipFileReader<'_, R> {
200    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
201        match self {
202            ZipFileReader::NoReader => invalid_state(),
203            ZipFileReader::Raw(r) => r.read(buf),
204            ZipFileReader::Compressed(r) => r.read(buf),
205        }
206    }
207
208    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
209        match self {
210            ZipFileReader::NoReader => invalid_state(),
211            ZipFileReader::Raw(r) => r.read_exact(buf),
212            ZipFileReader::Compressed(r) => r.read_exact(buf),
213        }
214    }
215
216    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
217        match self {
218            ZipFileReader::NoReader => invalid_state(),
219            ZipFileReader::Raw(r) => r.read_to_end(buf),
220            ZipFileReader::Compressed(r) => r.read_to_end(buf),
221        }
222    }
223
224    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
225        match self {
226            ZipFileReader::NoReader => invalid_state(),
227            ZipFileReader::Raw(r) => r.read_to_string(buf),
228            ZipFileReader::Compressed(r) => r.read_to_string(buf),
229        }
230    }
231}
232
233impl<'a, R: Read> ZipFileReader<'a, R> {
234    fn into_inner(self) -> io::Result<io::Take<&'a mut R>> {
235        match self {
236            ZipFileReader::NoReader => invalid_state(),
237            ZipFileReader::Raw(r) => Ok(r),
238            ZipFileReader::Compressed(r) => {
239                Ok(r.into_inner().into_inner()?.into_inner().into_inner())
240            }
241        }
242    }
243}
244
245/// A struct for reading a zip file
246pub struct ZipFile<'a, R: Read> {
247    pub(crate) data: Cow<'a, ZipFileData>,
248    pub(crate) reader: ZipFileReader<'a, R>,
249}
250
251/// A struct for reading and seeking a zip file
252pub struct ZipFileSeek<'a, R> {
253    data: Cow<'a, ZipFileData>,
254    reader: ZipFileSeekReader<'a, R>,
255}
256
257enum ZipFileSeekReader<'a, R> {
258    Raw(SeekableTake<'a, R>),
259}
260
261struct SeekableTake<'a, R> {
262    inner: &'a mut R,
263    inner_starting_offset: u64,
264    length: u64,
265    current_offset: u64,
266}
267
268impl<'a, R: Seek> SeekableTake<'a, R> {
269    pub fn new(inner: &'a mut R, length: u64) -> io::Result<Self> {
270        let inner_starting_offset = inner.stream_position()?;
271        Ok(Self {
272            inner,
273            inner_starting_offset,
274            length,
275            current_offset: 0,
276        })
277    }
278}
279
280impl<R: Seek> Seek for SeekableTake<'_, R> {
281    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
282        let offset = match pos {
283            SeekFrom::Start(offset) => Some(offset),
284            SeekFrom::End(offset) => self.length.checked_add_signed(offset),
285            SeekFrom::Current(offset) => self.current_offset.checked_add_signed(offset),
286        };
287        match offset {
288            None => Err(io::Error::new(
289                io::ErrorKind::InvalidInput,
290                "invalid seek to a negative or overflowing position",
291            )),
292            Some(offset) => {
293                let clamped_offset = std::cmp::min(self.length, offset);
294                let new_inner_offset = self
295                    .inner
296                    .seek(SeekFrom::Start(self.inner_starting_offset + clamped_offset))?;
297                self.current_offset = new_inner_offset - self.inner_starting_offset;
298                Ok(self.current_offset)
299            }
300        }
301    }
302}
303
304impl<R: Read> Read for SeekableTake<'_, R> {
305    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
306        let written = self
307            .inner
308            .take(self.length - self.current_offset)
309            .read(buf)?;
310        self.current_offset += written as u64;
311        Ok(written)
312    }
313}
314
315pub(crate) fn make_writable_dir_all<T: AsRef<Path>>(outpath: T) -> Result<(), ZipError> {
316    create_dir_all(outpath.as_ref())?;
317    #[cfg(unix)]
318    {
319        // Dirs must be writable until all normal files are extracted
320        use std::os::unix::fs::PermissionsExt;
321        std::fs::set_permissions(
322            outpath.as_ref(),
323            std::fs::Permissions::from_mode(
324                0o700 | std::fs::metadata(outpath.as_ref())?.permissions().mode(),
325            ),
326        )?;
327    }
328    Ok(())
329}
330
331pub(crate) fn find_content<'a, R: Read + Seek>(
332    data: &ZipFileData,
333    reader: &'a mut R,
334) -> ZipResult<io::Take<&'a mut R>> {
335    // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result!
336    let data_start = data.data_start(reader)?;
337
338    reader.seek(SeekFrom::Start(data_start))?;
339    Ok(reader.take(data.compressed_size))
340}
341
342fn find_content_seek<'a, R: Read + Seek>(
343    data: &ZipFileData,
344    reader: &'a mut R,
345) -> ZipResult<SeekableTake<'a, R>> {
346    // Parse local header
347    let data_start = data.data_start(reader)?;
348    reader.seek(SeekFrom::Start(data_start))?;
349
350    // Explicit Ok and ? are needed to convert io::Error to ZipError
351    Ok(SeekableTake::new(reader, data.compressed_size)?)
352}
353
354pub(crate) fn find_data_start(
355    data: &ZipFileData,
356    reader: &mut (impl Read + Seek + Sized),
357) -> Result<u64, ZipError> {
358    // Go to start of data.
359    reader.seek(SeekFrom::Start(data.header_start))?;
360
361    // Parse static-sized fields and check the magic value.
362    let block = ZipLocalEntryBlock::parse(reader)?;
363
364    // Calculate the end of the local header from the fields we just parsed.
365    let variable_fields_len =
366        // Each of these fields must be converted to u64 before adding, as the result may
367        // easily overflow a u16.
368        block.file_name_length as u64 + block.extra_field_length as u64;
369    let data_start =
370        data.header_start + size_of::<ZipLocalEntryBlock>() as u64 + variable_fields_len;
371
372    // Set the value so we don't have to read it again.
373    match data.data_start.set(data_start) {
374        Ok(()) => (),
375        // If the value was already set in the meantime, ensure it matches (this is probably
376        // unnecessary).
377        Err(_) => {
378            debug_assert_eq!(*data.data_start.get().unwrap(), data_start);
379        }
380    }
381
382    Ok(data_start)
383}
384
385#[allow(clippy::too_many_arguments)]
386pub(crate) fn make_crypto_reader<'a, R: Read>(
387    data: &ZipFileData,
388    reader: io::Take<&'a mut R>,
389    password: Option<&[u8]>,
390    aes_info: Option<(AesMode, AesVendorVersion, CompressionMethod)>,
391) -> ZipResult<CryptoReader<'a, R>> {
392    #[allow(deprecated)]
393    {
394        if let CompressionMethod::Unsupported(_) = data.compression_method {
395            return unsupported_zip_error("Compression method not supported");
396        }
397    }
398
399    let reader = match (password, aes_info) {
400        #[cfg(not(feature = "aes-crypto"))]
401        (Some(_), Some(_)) => {
402            return Err(ZipError::UnsupportedArchive(
403                "AES encrypted files cannot be decrypted without the aes-crypto feature.",
404            ))
405        }
406        #[cfg(feature = "aes-crypto")]
407        (Some(password), Some((aes_mode, vendor_version, _))) => CryptoReader::Aes {
408            reader: AesReader::new(reader, aes_mode, data.compressed_size).validate(password)?,
409            vendor_version,
410        },
411        (Some(password), None) => {
412            let validator = if data.using_data_descriptor {
413                ZipCryptoValidator::InfoZipMsdosTime(
414                    data.last_modified_time.map_or(0, |x| x.timepart()),
415                )
416            } else {
417                ZipCryptoValidator::PkzipCrc32(data.crc32)
418            };
419            CryptoReader::ZipCrypto(ZipCryptoReader::new(reader, password).validate(validator)?)
420        }
421        (None, Some(_)) => return Err(InvalidPassword),
422        (None, None) => CryptoReader::Plaintext(reader),
423    };
424    Ok(reader)
425}
426
427pub(crate) fn make_reader<R: Read>(
428    compression_method: CompressionMethod,
429    uncompressed_size: u64,
430    crc32: u32,
431    reader: CryptoReader<R>,
432    flags: u16,
433) -> ZipResult<ZipFileReader<R>> {
434    let ae2_encrypted = reader.is_ae2_encrypted();
435
436    Ok(ZipFileReader::Compressed(Box::new(Crc32Reader::new(
437        Decompressor::new(
438            io::BufReader::new(reader),
439            compression_method,
440            uncompressed_size,
441            flags,
442        )?,
443        crc32,
444        ae2_encrypted,
445    ))))
446}
447
448pub(crate) fn make_symlink<T>(
449    outpath: &Path,
450    target: &[u8],
451    #[allow(unused)] existing_files: &IndexMap<Box<str>, T>,
452) -> ZipResult<()> {
453    let Ok(target_str) = std::str::from_utf8(target) else {
454        return Err(invalid!("Invalid UTF-8 as symlink target"));
455    };
456
457    #[cfg(not(any(unix, windows)))]
458    {
459        use std::fs::File;
460        let output = File::create(outpath);
461        output?.write_all(target)?;
462    }
463    #[cfg(unix)]
464    {
465        std::os::unix::fs::symlink(Path::new(&target_str), outpath)?;
466    }
467    #[cfg(windows)]
468    {
469        let target = Path::new(OsStr::new(&target_str));
470        let target_is_dir_from_archive =
471            existing_files.contains_key(target_str) && is_dir(target_str);
472        let target_is_dir = if target_is_dir_from_archive {
473            true
474        } else if let Ok(meta) = std::fs::metadata(target) {
475            meta.is_dir()
476        } else {
477            false
478        };
479        if target_is_dir {
480            std::os::windows::fs::symlink_dir(target, outpath)?;
481        } else {
482            std::os::windows::fs::symlink_file(target, outpath)?;
483        }
484    }
485    Ok(())
486}
487
488#[derive(Debug)]
489pub(crate) struct CentralDirectoryInfo {
490    pub(crate) archive_offset: u64,
491    pub(crate) directory_start: u64,
492    pub(crate) number_of_files: usize,
493    pub(crate) disk_number: u32,
494    pub(crate) disk_with_central_directory: u32,
495}
496
497impl<'a> TryFrom<&'a CentralDirectoryEndInfo> for CentralDirectoryInfo {
498    type Error = ZipError;
499
500    fn try_from(value: &'a CentralDirectoryEndInfo) -> Result<Self, Self::Error> {
501        let (relative_cd_offset, number_of_files, disk_number, disk_with_central_directory) =
502            match &value.eocd64 {
503                Some(DataAndPosition { data: eocd64, .. }) => {
504                    if eocd64.number_of_files_on_this_disk > eocd64.number_of_files {
505                        return Err(invalid!("ZIP64 footer indicates more files on this disk than in the whole archive"));
506                    }
507                    (
508                        eocd64.central_directory_offset,
509                        eocd64.number_of_files as usize,
510                        eocd64.disk_number,
511                        eocd64.disk_with_central_directory,
512                    )
513                }
514                _ => (
515                    value.eocd.data.central_directory_offset as u64,
516                    value.eocd.data.number_of_files_on_this_disk as usize,
517                    value.eocd.data.disk_number as u32,
518                    value.eocd.data.disk_with_central_directory as u32,
519                ),
520            };
521
522        let directory_start = relative_cd_offset
523            .checked_add(value.archive_offset)
524            .ok_or(invalid!("Invalid central directory size or offset"))?;
525
526        Ok(Self {
527            archive_offset: value.archive_offset,
528            directory_start,
529            number_of_files,
530            disk_number,
531            disk_with_central_directory,
532        })
533    }
534}
535
536impl<R> ZipArchive<R> {
537    pub(crate) fn from_finalized_writer(
538        files: IndexMap<Box<str>, ZipFileData>,
539        comment: Box<[u8]>,
540        zip64_comment: Option<Box<[u8]>>,
541        reader: R,
542        central_start: u64,
543    ) -> ZipResult<Self> {
544        let initial_offset = match files.first() {
545            Some((_, file)) => file.header_start,
546            None => central_start,
547        };
548        let shared = Arc::new(Shared {
549            files,
550            offset: initial_offset,
551            dir_start: central_start,
552            config: Config {
553                archive_offset: ArchiveOffset::Known(initial_offset),
554            },
555            comment,
556            zip64_comment,
557        });
558        Ok(Self { reader, shared })
559    }
560
561    /// Total size of the files in the archive, if it can be known. Doesn't include directories or
562    /// metadata.
563    pub fn decompressed_size(&self) -> Option<u128> {
564        let mut total = 0u128;
565        for file in self.shared.files.values() {
566            if file.using_data_descriptor {
567                return None;
568            }
569            total = total.checked_add(file.uncompressed_size as u128)?;
570        }
571        Some(total)
572    }
573}
574
575impl<R: Read + Seek> ZipArchive<R> {
576    pub(crate) fn merge_contents<W: Write + Seek>(
577        &mut self,
578        mut w: W,
579    ) -> ZipResult<IndexMap<Box<str>, ZipFileData>> {
580        if self.shared.files.is_empty() {
581            return Ok(IndexMap::new());
582        }
583        let mut new_files = self.shared.files.clone();
584        /* The first file header will probably start at the beginning of the file, but zip doesn't
585         * enforce that, and executable zips like PEX files will have a shebang line so will
586         * definitely be greater than 0.
587         *
588         * assert_eq!(0, new_files[0].header_start); // Avoid this.
589         */
590
591        let first_new_file_header_start = w.stream_position()?;
592
593        /* Push back file header starts for all entries in the covered files. */
594        new_files.values_mut().try_for_each(|f| {
595            /* This is probably the only really important thing to change. */
596            f.header_start = f
597                .header_start
598                .checked_add(first_new_file_header_start)
599                .ok_or(invalid!(
600                    "new header start from merge would have been too large"
601                ))?;
602            /* This is only ever used internally to cache metadata lookups (it's not part of the
603             * zip spec), and 0 is the sentinel value. */
604            f.central_header_start = 0;
605            /* This is an atomic variable so it can be updated from another thread in the
606             * implementation (which is good!). */
607            if let Some(old_data_start) = f.data_start.take() {
608                let new_data_start = old_data_start
609                    .checked_add(first_new_file_header_start)
610                    .ok_or(invalid!(
611                        "new data start from merge would have been too large"
612                    ))?;
613                f.data_start.get_or_init(|| new_data_start);
614            }
615            Ok::<_, ZipError>(())
616        })?;
617
618        /* Rewind to the beginning of the file.
619         *
620         * NB: we *could* decide to start copying from new_files[0].header_start instead, which
621         * would avoid copying over e.g. any pex shebangs or other file contents that start before
622         * the first zip file entry. However, zip files actually shouldn't care about garbage data
623         * in *between* real entries, since the central directory header records the correct start
624         * location of each, and keeping track of that math is more complicated logic that will only
625         * rarely be used, since most zips that get merged together are likely to be produced
626         * specifically for that purpose (and therefore are unlikely to have a shebang or other
627         * preface). Finally, this preserves any data that might actually be useful.
628         */
629        self.reader.rewind()?;
630        /* Find the end of the file data. */
631        let length_to_read = self.shared.dir_start;
632        /* Produce a Read that reads bytes up until the start of the central directory header.
633         * This "as &mut dyn Read" trick is used elsewhere to avoid having to clone the underlying
634         * handle, which it really shouldn't need to anyway. */
635        let mut limited_raw = (&mut self.reader as &mut dyn Read).take(length_to_read);
636        /* Copy over file data from source archive directly. */
637        io::copy(&mut limited_raw, &mut w)?;
638
639        /* Return the files we've just written to the data stream. */
640        Ok(new_files)
641    }
642
643    /// Get the directory start offset and number of files. This is done in a
644    /// separate function to ease the control flow design.
645    pub(crate) fn get_metadata(config: Config, reader: &mut R) -> ZipResult<Shared> {
646        // End of the probed region, initially set to the end of the file
647        let file_len = reader.seek(io::SeekFrom::End(0))?;
648        let mut end_exclusive = file_len;
649
650        loop {
651            // Find the EOCD and possibly EOCD64 entries and determine the archive offset.
652            let cde = spec::find_central_directory(
653                reader,
654                config.archive_offset,
655                end_exclusive,
656                file_len,
657            )?;
658
659            // Turn EOCD into internal representation.
660            let Ok(shared) = CentralDirectoryInfo::try_from(&cde)
661                .and_then(|info| Self::read_central_header(info, config, reader))
662            else {
663                // The next EOCD candidate should start before the current one.
664                end_exclusive = cde.eocd.position;
665                continue;
666            };
667
668            return Ok(shared.build(
669                cde.eocd.data.zip_file_comment,
670                cde.eocd64.map(|v| v.data.extensible_data_sector),
671            ));
672        }
673    }
674
675    fn read_central_header(
676        dir_info: CentralDirectoryInfo,
677        config: Config,
678        reader: &mut R,
679    ) -> Result<SharedBuilder, ZipError> {
680        // If the parsed number of files is greater than the offset then
681        // something fishy is going on and we shouldn't trust number_of_files.
682        let file_capacity = if dir_info.number_of_files > dir_info.directory_start as usize {
683            0
684        } else {
685            dir_info.number_of_files
686        };
687
688        if dir_info.disk_number != dir_info.disk_with_central_directory {
689            return unsupported_zip_error("Support for multi-disk files is not implemented");
690        }
691
692        if file_capacity.saturating_mul(size_of::<ZipFileData>()) > isize::MAX as usize {
693            return unsupported_zip_error("Oversized central directory");
694        }
695
696        let mut files = Vec::with_capacity(file_capacity);
697        reader.seek(SeekFrom::Start(dir_info.directory_start))?;
698        for _ in 0..dir_info.number_of_files {
699            let file = central_header_to_zip_file(reader, &dir_info)?;
700            files.push(file);
701        }
702
703        Ok(SharedBuilder {
704            files,
705            offset: dir_info.archive_offset,
706            dir_start: dir_info.directory_start,
707            config,
708        })
709    }
710
711    /// Returns the verification value and salt for the AES encryption of the file
712    ///
713    /// It fails if the file number is invalid.
714    ///
715    /// # Returns
716    ///
717    /// - None if the file is not encrypted with AES
718    #[cfg(feature = "aes-crypto")]
719    pub fn get_aes_verification_key_and_salt(
720        &mut self,
721        file_number: usize,
722    ) -> ZipResult<Option<AesInfo>> {
723        let (_, data) = self
724            .shared
725            .files
726            .get_index(file_number)
727            .ok_or(ZipError::FileNotFound)?;
728
729        let limit_reader = find_content(data, &mut self.reader)?;
730        match data.aes_mode {
731            None => Ok(None),
732            Some((aes_mode, _, _)) => {
733                let (verification_value, salt) =
734                    AesReader::new(limit_reader, aes_mode, data.compressed_size)
735                        .get_verification_value_and_salt()?;
736                let aes_info = AesInfo {
737                    aes_mode,
738                    verification_value,
739                    salt,
740                };
741                Ok(Some(aes_info))
742            }
743        }
744    }
745
746    /// Read a ZIP archive, collecting the files it contains.
747    ///
748    /// This uses the central directory record of the ZIP file, and ignores local file headers.
749    ///
750    /// A default [`Config`] is used.
751    pub fn new(reader: R) -> ZipResult<ZipArchive<R>> {
752        Self::with_config(Default::default(), reader)
753    }
754
755    /// Read a ZIP archive providing a read configuration, collecting the files it contains.
756    ///
757    /// This uses the central directory record of the ZIP file, and ignores local file headers.
758    pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
759        let shared = Self::get_metadata(config, &mut reader)?;
760
761        Ok(ZipArchive {
762            reader,
763            shared: shared.into(),
764        })
765    }
766
767    /// Extract a Zip archive into a directory, overwriting files if they
768    /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. Symbolic links are only
769    /// created and followed if the target is within the destination directory (this is checked
770    /// conservatively using [`std::fs::canonicalize`]).
771    ///
772    /// Extraction is not atomic. If an error is encountered, some of the files
773    /// may be left on disk. However, on Unix targets, no newly-created directories with part but
774    /// not all of their contents extracted will be readable, writable or usable as process working
775    /// directories by any non-root user except you.
776    ///
777    /// On Unix and Windows, symbolic links are extracted correctly. On other platforms such as
778    /// WebAssembly, symbolic links aren't supported, so they're extracted as normal files
779    /// containing the target path in UTF-8.
780    pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
781        self.extract_internal(directory, None::<fn(&Path) -> bool>)
782    }
783
784    /// Extracts a Zip archive into a directory in the same fashion as
785    /// [`ZipArchive::extract`], but detects a "root" directory in the archive
786    /// (a single top-level directory that contains the rest of the archive's
787    /// entries) and extracts its contents directly.
788    ///
789    /// For a sensible default `filter`, you can use [`root_dir_common_filter`].
790    /// For a custom `filter`, see [`RootDirFilter`].
791    ///
792    /// See [`ZipArchive::root_dir`] for more information on how the root
793    /// directory is detected and the meaning of the `filter` parameter.
794    ///
795    /// ## Example
796    ///
797    /// Imagine a Zip archive with the following structure:
798    ///
799    /// ```text
800    /// root/file1.txt
801    /// root/file2.txt
802    /// root/sub/file3.txt
803    /// root/sub/subsub/file4.txt
804    /// ```
805    ///
806    /// If the archive is extracted to `foo` using [`ZipArchive::extract`],
807    /// the resulting directory structure will be:
808    ///
809    /// ```text
810    /// foo/root/file1.txt
811    /// foo/root/file2.txt
812    /// foo/root/sub/file3.txt
813    /// foo/root/sub/subsub/file4.txt
814    /// ```
815    ///
816    /// If the archive is extracted to `foo` using
817    /// [`ZipArchive::extract_unwrapped_root_dir`], the resulting directory
818    /// structure will be:
819    ///
820    /// ```text
821    /// foo/file1.txt
822    /// foo/file2.txt
823    /// foo/sub/file3.txt
824    /// foo/sub/subsub/file4.txt
825    /// ```
826    ///
827    /// ## Example - No Root Directory
828    ///
829    /// Imagine a Zip archive with the following structure:
830    ///
831    /// ```text
832    /// root/file1.txt
833    /// root/file2.txt
834    /// root/sub/file3.txt
835    /// root/sub/subsub/file4.txt
836    /// other/file5.txt
837    /// ```
838    ///
839    /// Due to the presence of the `other` directory,
840    /// [`ZipArchive::extract_unwrapped_root_dir`] will extract this in the same
841    /// fashion as [`ZipArchive::extract`] as there is now no "root directory."
842    pub fn extract_unwrapped_root_dir<P: AsRef<Path>>(
843        &mut self,
844        directory: P,
845        root_dir_filter: impl RootDirFilter,
846    ) -> ZipResult<()> {
847        self.extract_internal(directory, Some(root_dir_filter))
848    }
849
850    fn extract_internal<P: AsRef<Path>>(
851        &mut self,
852        directory: P,
853        root_dir_filter: Option<impl RootDirFilter>,
854    ) -> ZipResult<()> {
855        use std::fs;
856
857        create_dir_all(&directory)?;
858        let directory = directory.as_ref().canonicalize()?;
859
860        let root_dir = root_dir_filter
861            .and_then(|filter| {
862                self.root_dir(&filter)
863                    .transpose()
864                    .map(|root_dir| root_dir.map(|root_dir| (root_dir, filter)))
865            })
866            .transpose()?;
867
868        // If we have a root dir, simplify the path components to be more
869        // appropriate for passing to `safe_prepare_path`
870        let root_dir = root_dir
871            .as_ref()
872            .map(|(root_dir, filter)| {
873                crate::path::simplified_components(root_dir)
874                    .ok_or_else(|| {
875                        // Should be unreachable
876                        debug_assert!(false, "Invalid root dir path");
877
878                        invalid!("Invalid root dir path")
879                    })
880                    .map(|root_dir| (root_dir, filter))
881            })
882            .transpose()?;
883
884        #[cfg(unix)]
885        let mut files_by_unix_mode = Vec::new();
886
887        for i in 0..self.len() {
888            let mut file = self.by_index(i)?;
889
890            let mut outpath = directory.clone();
891            file.safe_prepare_path(directory.as_ref(), &mut outpath, root_dir.as_ref())?;
892
893            let symlink_target = if file.is_symlink() && (cfg!(unix) || cfg!(windows)) {
894                let mut target = Vec::with_capacity(file.size() as usize);
895                file.read_to_end(&mut target)?;
896                Some(target)
897            } else {
898                if file.is_dir() {
899                    crate::read::make_writable_dir_all(&outpath)?;
900                    continue;
901                }
902                None
903            };
904
905            drop(file);
906
907            if let Some(target) = symlink_target {
908                make_symlink(&outpath, &target, &self.shared.files)?;
909                continue;
910            }
911            let mut file = self.by_index(i)?;
912            let mut outfile = fs::File::create(&outpath)?;
913
914            io::copy(&mut file, &mut outfile)?;
915            #[cfg(unix)]
916            {
917                // Check for real permissions, which we'll set in a second pass
918                if let Some(mode) = file.unix_mode() {
919                    files_by_unix_mode.push((outpath.clone(), mode));
920                }
921            }
922            #[cfg(feature = "chrono")]
923            {
924                // Set original timestamp.
925                if let Some(last_modified) = file.last_modified() {
926                    if let Some(t) = datetime_to_systemtime(&last_modified) {
927                        outfile.set_modified(t)?;
928                    }
929                }
930            }
931        }
932        #[cfg(unix)]
933        {
934            use std::cmp::Reverse;
935            use std::os::unix::fs::PermissionsExt;
936
937            if files_by_unix_mode.len() > 1 {
938                // Ensure we update children's permissions before making a parent unwritable
939                files_by_unix_mode.sort_by_key(|(path, _)| Reverse(path.clone()));
940            }
941            for (path, mode) in files_by_unix_mode.into_iter() {
942                fs::set_permissions(&path, fs::Permissions::from_mode(mode))?;
943            }
944        }
945        Ok(())
946    }
947
948    /// Number of files contained in this zip.
949    pub fn len(&self) -> usize {
950        self.shared.files.len()
951    }
952
953    /// Get the starting offset of the zip central directory.
954    pub fn central_directory_start(&self) -> u64 {
955        self.shared.dir_start
956    }
957
958    /// Whether this zip archive contains no files
959    pub fn is_empty(&self) -> bool {
960        self.len() == 0
961    }
962
963    /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
964    ///
965    /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
966    /// of that prepended data.
967    pub fn offset(&self) -> u64 {
968        self.shared.offset
969    }
970
971    /// Get the comment of the zip archive.
972    pub fn comment(&self) -> &[u8] {
973        &self.shared.comment
974    }
975
976    /// Get the ZIP64 comment of the zip archive, if it is ZIP64.
977    pub fn zip64_comment(&self) -> Option<&[u8]> {
978        self.shared.zip64_comment.as_deref()
979    }
980
981    /// Returns an iterator over all the file and directory names in this archive.
982    pub fn file_names(&self) -> impl Iterator<Item = &str> {
983        self.shared.files.keys().map(|s| s.as_ref())
984    }
985
986    /// Returns Ok(true) if any compressed data in this archive belongs to more than one file. This
987    /// doesn't make the archive invalid, but some programs will refuse to decompress it because the
988    /// copies would take up space independently in the destination.
989    pub fn has_overlapping_files(&mut self) -> ZipResult<bool> {
990        let mut ranges = Vec::<Range<u64>>::with_capacity(self.shared.files.len());
991        for file in self.shared.files.values() {
992            if file.compressed_size == 0 {
993                continue;
994            }
995            let start = file.data_start(&mut self.reader)?;
996            let end = start + file.compressed_size;
997            if ranges
998                .iter()
999                .any(|range| range.start <= end && start <= range.end)
1000            {
1001                return Ok(true);
1002            }
1003            ranges.push(start..end);
1004        }
1005        Ok(false)
1006    }
1007
1008    /// Search for a file entry by name, decrypt with given password
1009    ///
1010    /// # Warning
1011    ///
1012    /// The implementation of the cryptographic algorithms has not
1013    /// gone through a correctness review, and you should assume it is insecure:
1014    /// passwords used with this API may be compromised.
1015    ///
1016    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1017    /// to check for a 1/256 chance that the password is correct.
1018    /// There are many passwords out there that will also pass the validity checks
1019    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1020    /// due to its fairly primitive approach to cryptography.
1021    pub fn by_name_decrypt(&mut self, name: &str, password: &[u8]) -> ZipResult<ZipFile<'_, R>> {
1022        self.by_name_with_optional_password(name, Some(password))
1023    }
1024
1025    /// Search for a file entry by name
1026    pub fn by_name(&mut self, name: &str) -> ZipResult<ZipFile<'_, R>> {
1027        self.by_name_with_optional_password(name, None)
1028    }
1029
1030    /// Get the index of a file entry by name, if it's present.
1031    #[inline(always)]
1032    pub fn index_for_name(&self, name: &str) -> Option<usize> {
1033        self.shared.files.get_index_of(name)
1034    }
1035
1036    /// Search for a file entry by path, decrypt with given password
1037    ///
1038    /// # Warning
1039    ///
1040    /// The implementation of the cryptographic algorithms has not
1041    /// gone through a correctness review, and you should assume it is insecure:
1042    /// passwords used with this API may be compromised.
1043    ///
1044    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1045    /// to check for a 1/256 chance that the password is correct.
1046    /// There are many passwords out there that will also pass the validity checks
1047    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1048    /// due to its fairly primitive approach to cryptography.
1049    pub fn by_path_decrypt<T: AsRef<Path>>(
1050        &mut self,
1051        path: T,
1052        password: &[u8],
1053    ) -> ZipResult<ZipFile<'_, R>> {
1054        self.index_for_path(path)
1055            .ok_or(ZipError::FileNotFound)
1056            .and_then(|index| self.by_index_with_optional_password(index, Some(password)))
1057    }
1058
1059    /// Search for a file entry by path
1060    pub fn by_path<T: AsRef<Path>>(&mut self, path: T) -> ZipResult<ZipFile<'_, R>> {
1061        self.index_for_path(path)
1062            .ok_or(ZipError::FileNotFound)
1063            .and_then(|index| self.by_index_with_optional_password(index, None))
1064    }
1065
1066    /// Get the index of a file entry by path, if it's present.
1067    #[inline(always)]
1068    pub fn index_for_path<T: AsRef<Path>>(&self, path: T) -> Option<usize> {
1069        self.index_for_name(&path_to_string(path))
1070    }
1071
1072    /// Get the name of a file entry, if it's present.
1073    #[inline(always)]
1074    pub fn name_for_index(&self, index: usize) -> Option<&str> {
1075        self.shared
1076            .files
1077            .get_index(index)
1078            .map(|(name, _)| name.as_ref())
1079    }
1080
1081    /// Search for a file entry by name and return a seekable object.
1082    pub fn by_name_seek(&mut self, name: &str) -> ZipResult<ZipFileSeek<'_, R>> {
1083        self.by_index_seek(self.index_for_name(name).ok_or(ZipError::FileNotFound)?)
1084    }
1085
1086    /// Search for a file entry by index and return a seekable object.
1087    pub fn by_index_seek(&mut self, index: usize) -> ZipResult<ZipFileSeek<'_, R>> {
1088        let reader = &mut self.reader;
1089        self.shared
1090            .files
1091            .get_index(index)
1092            .ok_or(ZipError::FileNotFound)
1093            .and_then(move |(_, data)| {
1094                let seek_reader = match data.compression_method {
1095                    CompressionMethod::Stored => {
1096                        ZipFileSeekReader::Raw(find_content_seek(data, reader)?)
1097                    }
1098                    _ => {
1099                        return Err(ZipError::UnsupportedArchive(
1100                            "Seekable compressed files are not yet supported",
1101                        ))
1102                    }
1103                };
1104                Ok(ZipFileSeek {
1105                    reader: seek_reader,
1106                    data: Cow::Borrowed(data),
1107                })
1108            })
1109    }
1110
1111    fn by_name_with_optional_password<'a>(
1112        &'a mut self,
1113        name: &str,
1114        password: Option<&[u8]>,
1115    ) -> ZipResult<ZipFile<'a, R>> {
1116        let Some(index) = self.shared.files.get_index_of(name) else {
1117            return Err(ZipError::FileNotFound);
1118        };
1119        self.by_index_with_optional_password(index, password)
1120    }
1121
1122    /// Get a contained file by index, decrypt with given password
1123    ///
1124    /// # Warning
1125    ///
1126    /// The implementation of the cryptographic algorithms has not
1127    /// gone through a correctness review, and you should assume it is insecure:
1128    /// passwords used with this API may be compromised.
1129    ///
1130    /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
1131    /// to check for a 1/256 chance that the password is correct.
1132    /// There are many passwords out there that will also pass the validity checks
1133    /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
1134    /// due to its fairly primitive approach to cryptography.
1135    pub fn by_index_decrypt(
1136        &mut self,
1137        file_number: usize,
1138        password: &[u8],
1139    ) -> ZipResult<ZipFile<'_, R>> {
1140        self.by_index_with_optional_password(file_number, Some(password))
1141    }
1142
1143    /// Get a contained file by index
1144    pub fn by_index(&mut self, file_number: usize) -> ZipResult<ZipFile<'_, R>> {
1145        self.by_index_with_optional_password(file_number, None)
1146    }
1147
1148    /// Get a contained file by index without decompressing it
1149    pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_, R>> {
1150        let reader = &mut self.reader;
1151        let (_, data) = self
1152            .shared
1153            .files
1154            .get_index(file_number)
1155            .ok_or(ZipError::FileNotFound)?;
1156        Ok(ZipFile {
1157            reader: ZipFileReader::Raw(find_content(data, reader)?),
1158            data: Cow::Borrowed(data),
1159        })
1160    }
1161
1162    fn by_index_with_optional_password(
1163        &mut self,
1164        file_number: usize,
1165        mut password: Option<&[u8]>,
1166    ) -> ZipResult<ZipFile<'_, R>> {
1167        let (_, data) = self
1168            .shared
1169            .files
1170            .get_index(file_number)
1171            .ok_or(ZipError::FileNotFound)?;
1172
1173        match (password, data.encrypted) {
1174            (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)),
1175            (Some(_), false) => password = None, //Password supplied, but none needed! Discard.
1176            _ => {}
1177        }
1178        let limit_reader = find_content(data, &mut self.reader)?;
1179
1180        let crypto_reader = make_crypto_reader(data, limit_reader, password, data.aes_mode)?;
1181
1182        Ok(ZipFile {
1183            data: Cow::Borrowed(data),
1184            reader: make_reader(
1185                data.compression_method,
1186                data.uncompressed_size,
1187                data.crc32,
1188                crypto_reader,
1189                data.flags,
1190            )?,
1191        })
1192    }
1193
1194    /// Find the "root directory" of an archive if it exists, filtering out
1195    /// irrelevant entries when searching.
1196    ///
1197    /// Our definition of a "root directory" is a single top-level directory
1198    /// that contains the rest of the archive's entries. This is useful for
1199    /// extracting archives that contain a single top-level directory that
1200    /// you want to "unwrap" and extract directly.
1201    ///
1202    /// For a sensible default filter, you can use [`root_dir_common_filter`].
1203    /// For a custom filter, see [`RootDirFilter`].
1204    pub fn root_dir(&self, filter: impl RootDirFilter) -> ZipResult<Option<PathBuf>> {
1205        let mut root_dir: Option<PathBuf> = None;
1206
1207        for i in 0..self.len() {
1208            let (_, file) = self
1209                .shared
1210                .files
1211                .get_index(i)
1212                .ok_or(ZipError::FileNotFound)?;
1213
1214            let path = match file.enclosed_name() {
1215                Some(path) => path,
1216                None => return Ok(None),
1217            };
1218
1219            if !filter(&path) {
1220                continue;
1221            }
1222
1223            macro_rules! replace_root_dir {
1224                ($path:ident) => {
1225                    match &mut root_dir {
1226                        Some(root_dir) => {
1227                            if *root_dir != $path {
1228                                // We've found multiple root directories,
1229                                // abort.
1230                                return Ok(None);
1231                            } else {
1232                                continue;
1233                            }
1234                        }
1235
1236                        None => {
1237                            root_dir = Some($path.into());
1238                            continue;
1239                        }
1240                    }
1241                };
1242            }
1243
1244            // If this entry is located at the root of the archive...
1245            if path.components().count() == 1 {
1246                if file.is_dir() {
1247                    // If it's a directory, it could be the root directory.
1248                    replace_root_dir!(path);
1249                } else {
1250                    // If it's anything else, this archive does not have a
1251                    // root directory.
1252                    return Ok(None);
1253                }
1254            }
1255
1256            // Find the root directory for this entry.
1257            let mut path = path.as_path();
1258            while let Some(parent) = path.parent().filter(|path| *path != Path::new("")) {
1259                path = parent;
1260            }
1261
1262            replace_root_dir!(path);
1263        }
1264
1265        Ok(root_dir)
1266    }
1267
1268    /// Unwrap and return the inner reader object
1269    ///
1270    /// The position of the reader is undefined.
1271    pub fn into_inner(self) -> R {
1272        self.reader
1273    }
1274}
1275
1276/// Holds the AES information of a file in the zip archive
1277#[derive(Debug)]
1278#[cfg(feature = "aes-crypto")]
1279pub struct AesInfo {
1280    /// The AES encryption mode
1281    pub aes_mode: AesMode,
1282    /// The verification key
1283    pub verification_value: [u8; PWD_VERIFY_LENGTH],
1284    /// The salt
1285    pub salt: Vec<u8>,
1286}
1287
1288const fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
1289    Err(ZipError::UnsupportedArchive(detail))
1290}
1291
1292/// Parse a central directory entry to collect the information for the file.
1293pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
1294    reader: &mut R,
1295    central_directory: &CentralDirectoryInfo,
1296) -> ZipResult<ZipFileData> {
1297    let central_header_start = reader.stream_position()?;
1298
1299    // Parse central header
1300    let block = ZipCentralEntryBlock::parse(reader)?;
1301
1302    let file = central_header_to_zip_file_inner(
1303        reader,
1304        central_directory.archive_offset,
1305        central_header_start,
1306        block,
1307    )?;
1308
1309    let central_header_end = reader.stream_position()?;
1310
1311    reader.seek(SeekFrom::Start(central_header_end))?;
1312    Ok(file)
1313}
1314
1315#[inline]
1316fn read_variable_length_byte_field<R: Read>(reader: &mut R, len: usize) -> io::Result<Box<[u8]>> {
1317    let mut data = vec![0; len].into_boxed_slice();
1318    reader.read_exact(&mut data)?;
1319    Ok(data)
1320}
1321
1322/// Parse a central directory entry to collect the information for the file.
1323fn central_header_to_zip_file_inner<R: Read>(
1324    reader: &mut R,
1325    archive_offset: u64,
1326    central_header_start: u64,
1327    block: ZipCentralEntryBlock,
1328) -> ZipResult<ZipFileData> {
1329    let ZipCentralEntryBlock {
1330        // magic,
1331        version_made_by,
1332        // version_to_extract,
1333        flags,
1334        compression_method,
1335        last_mod_time,
1336        last_mod_date,
1337        crc32,
1338        compressed_size,
1339        uncompressed_size,
1340        file_name_length,
1341        extra_field_length,
1342        file_comment_length,
1343        // disk_number,
1344        // internal_file_attributes,
1345        external_file_attributes,
1346        offset,
1347        ..
1348    } = block;
1349
1350    let encrypted = flags & 1 == 1;
1351    let is_utf8 = flags & (1 << 11) != 0;
1352    let using_data_descriptor = flags & (1 << 3) != 0;
1353
1354    let file_name_raw = read_variable_length_byte_field(reader, file_name_length as usize)?;
1355    let extra_field = read_variable_length_byte_field(reader, extra_field_length as usize)?;
1356    let file_comment_raw = read_variable_length_byte_field(reader, file_comment_length as usize)?;
1357    let file_name: Box<str> = match is_utf8 {
1358        true => String::from_utf8_lossy(&file_name_raw).into(),
1359        false => file_name_raw.clone().from_cp437(),
1360    };
1361    let file_comment: Box<str> = match is_utf8 {
1362        true => String::from_utf8_lossy(&file_comment_raw).into(),
1363        false => file_comment_raw.from_cp437(),
1364    };
1365
1366    // Construct the result
1367    let mut result = ZipFileData {
1368        system: System::from((version_made_by >> 8) as u8),
1369        /* NB: this strips the top 8 bits! */
1370        version_made_by: version_made_by as u8,
1371        encrypted,
1372        using_data_descriptor,
1373        is_utf8,
1374        compression_method: CompressionMethod::parse_from_u16(compression_method),
1375        compression_level: None,
1376        last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
1377        crc32,
1378        compressed_size: compressed_size.into(),
1379        uncompressed_size: uncompressed_size.into(),
1380        flags,
1381        file_name,
1382        file_name_raw,
1383        extra_field: Some(Arc::new(extra_field.to_vec())),
1384        central_extra_field: None,
1385        file_comment,
1386        header_start: offset.into(),
1387        extra_data_start: None,
1388        central_header_start,
1389        data_start: OnceLock::new(),
1390        external_attributes: external_file_attributes,
1391        large_file: false,
1392        aes_mode: None,
1393        aes_extra_data_start: 0,
1394        extra_fields: Vec::new(),
1395    };
1396    match parse_extra_field(&mut result) {
1397        Ok(stripped_extra_field) => {
1398            result.extra_field = stripped_extra_field;
1399        }
1400        Err(ZipError::Io(..)) => {}
1401        Err(e) => return Err(e),
1402    }
1403
1404    let aes_enabled = result.compression_method == CompressionMethod::AES;
1405    if aes_enabled && result.aes_mode.is_none() {
1406        return Err(invalid!("AES encryption without AES extra data field"));
1407    }
1408
1409    // Account for shifted zip offsets.
1410    result.header_start = result
1411        .header_start
1412        .checked_add(archive_offset)
1413        .ok_or(invalid!("Archive header is too large"))?;
1414
1415    Ok(result)
1416}
1417
1418pub(crate) fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<Option<Arc<Vec<u8>>>> {
1419    let Some(ref extra_field) = file.extra_field else {
1420        return Ok(None);
1421    };
1422    let extra_field = extra_field.clone();
1423    let mut processed_extra_field = extra_field.clone();
1424    let len = extra_field.len();
1425    let mut reader = io::Cursor::new(&**extra_field);
1426
1427    /* TODO: codify this structure into Zip64ExtraFieldBlock fields! */
1428    let mut position = reader.position() as usize;
1429    while (position) < len {
1430        let old_position = position;
1431        let remove = parse_single_extra_field(file, &mut reader, position as u64, false)?;
1432        position = reader.position() as usize;
1433        if remove {
1434            let remaining = len - (position - old_position);
1435            if remaining == 0 {
1436                return Ok(None);
1437            }
1438            let mut new_extra_field = Vec::with_capacity(remaining);
1439            new_extra_field.extend_from_slice(&extra_field[0..old_position]);
1440            new_extra_field.extend_from_slice(&extra_field[position..]);
1441            processed_extra_field = Arc::new(new_extra_field);
1442        }
1443    }
1444    Ok(Some(processed_extra_field))
1445}
1446
1447pub(crate) fn parse_single_extra_field<R: Read>(
1448    file: &mut ZipFileData,
1449    reader: &mut R,
1450    bytes_already_read: u64,
1451    disallow_zip64: bool,
1452) -> ZipResult<bool> {
1453    let kind = reader.read_u16_le()?;
1454    let len = reader.read_u16_le()?;
1455    match kind {
1456        // Zip64 extended information extra field
1457        0x0001 => {
1458            if disallow_zip64 {
1459                return Err(invalid!("Can't write a custom field using the ZIP64 ID"));
1460            }
1461            file.large_file = true;
1462            let mut consumed_len = 0;
1463            if len >= 24 || file.uncompressed_size == spec::ZIP64_BYTES_THR {
1464                file.uncompressed_size = reader.read_u64_le()?;
1465                consumed_len += size_of::<u64>();
1466            }
1467            if len >= 24 || file.compressed_size == spec::ZIP64_BYTES_THR {
1468                file.compressed_size = reader.read_u64_le()?;
1469                consumed_len += size_of::<u64>();
1470            }
1471            if len >= 24 || file.header_start == spec::ZIP64_BYTES_THR {
1472                file.header_start = reader.read_u64_le()?;
1473                consumed_len += size_of::<u64>();
1474            }
1475            let Some(leftover_len) = (len as usize).checked_sub(consumed_len) else {
1476                return Err(invalid!("ZIP64 extra-data field is the wrong length"));
1477            };
1478            reader.read_exact(&mut vec![0u8; leftover_len])?;
1479            return Ok(true);
1480        }
1481        0x000a => {
1482            // NTFS extra field
1483            file.extra_fields
1484                .push(ExtraField::Ntfs(Ntfs::try_from_reader(reader, len)?));
1485        }
1486        0x9901 => {
1487            // AES
1488            if len != 7 {
1489                return Err(ZipError::UnsupportedArchive(
1490                    "AES extra data field has an unsupported length",
1491                ));
1492            }
1493            let vendor_version = reader.read_u16_le()?;
1494            let vendor_id = reader.read_u16_le()?;
1495            let mut out = [0u8];
1496            reader.read_exact(&mut out)?;
1497            let aes_mode = out[0];
1498            let compression_method = CompressionMethod::parse_from_u16(reader.read_u16_le()?);
1499
1500            if vendor_id != 0x4541 {
1501                return Err(invalid!("Invalid AES vendor"));
1502            }
1503            let vendor_version = match vendor_version {
1504                0x0001 => AesVendorVersion::Ae1,
1505                0x0002 => AesVendorVersion::Ae2,
1506                _ => return Err(invalid!("Invalid AES vendor version")),
1507            };
1508            match aes_mode {
1509                0x01 => file.aes_mode = Some((AesMode::Aes128, vendor_version, compression_method)),
1510                0x02 => file.aes_mode = Some((AesMode::Aes192, vendor_version, compression_method)),
1511                0x03 => file.aes_mode = Some((AesMode::Aes256, vendor_version, compression_method)),
1512                _ => return Err(invalid!("Invalid AES encryption strength")),
1513            };
1514            file.compression_method = compression_method;
1515            file.aes_extra_data_start = bytes_already_read;
1516        }
1517        0x5455 => {
1518            // extended timestamp
1519            // https://libzip.org/specifications/extrafld.txt
1520
1521            file.extra_fields.push(ExtraField::ExtendedTimestamp(
1522                ExtendedTimestamp::try_from_reader(reader, len)?,
1523            ));
1524        }
1525        0x6375 => {
1526            // Info-ZIP Unicode Comment Extra Field
1527            // APPNOTE 4.6.8 and https://libzip.org/specifications/extrafld.txt
1528            file.file_comment = String::from_utf8(
1529                UnicodeExtraField::try_from_reader(reader, len)?
1530                    .unwrap_valid(file.file_comment.as_bytes())?
1531                    .into_vec(),
1532            )?
1533            .into();
1534        }
1535        0x7075 => {
1536            // Info-ZIP Unicode Path Extra Field
1537            // APPNOTE 4.6.9 and https://libzip.org/specifications/extrafld.txt
1538            file.file_name_raw = UnicodeExtraField::try_from_reader(reader, len)?
1539                .unwrap_valid(&file.file_name_raw)?;
1540            file.file_name =
1541                String::from_utf8(file.file_name_raw.clone().into_vec())?.into_boxed_str();
1542            file.is_utf8 = true;
1543        }
1544        _ => {
1545            reader.read_exact(&mut vec![0u8; len as usize])?;
1546            // Other fields are ignored
1547        }
1548    }
1549    Ok(false)
1550}
1551
1552/// A trait for exposing file metadata inside the zip.
1553pub trait HasZipMetadata {
1554    /// Get the file metadata
1555    fn get_metadata(&self) -> &ZipFileData;
1556}
1557
1558/// Methods for retrieving information on zip files
1559impl<'a, R: Read> ZipFile<'a, R> {
1560    pub(crate) fn take_raw_reader(&mut self) -> io::Result<io::Take<&'a mut R>> {
1561        mem::replace(&mut self.reader, ZipFileReader::NoReader).into_inner()
1562    }
1563
1564    /// Get the version of the file
1565    pub fn version_made_by(&self) -> (u8, u8) {
1566        (
1567            self.get_metadata().version_made_by / 10,
1568            self.get_metadata().version_made_by % 10,
1569        )
1570    }
1571
1572    /// Get the name of the file
1573    ///
1574    /// # Warnings
1575    ///
1576    /// It is dangerous to use this name directly when extracting an archive.
1577    /// It may contain an absolute path (`/etc/shadow`), or break out of the
1578    /// current directory (`../runtime`). Carelessly writing to these paths
1579    /// allows an attacker to craft a ZIP archive that will overwrite critical
1580    /// files.
1581    ///
1582    /// You can use the [`ZipFile::enclosed_name`] method to validate the name
1583    /// as a safe path.
1584    pub fn name(&self) -> &str {
1585        &self.get_metadata().file_name
1586    }
1587
1588    /// Get the name of the file, in the raw (internal) byte representation.
1589    ///
1590    /// The encoding of this data is currently undefined.
1591    pub fn name_raw(&self) -> &[u8] {
1592        &self.get_metadata().file_name_raw
1593    }
1594
1595    /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
1596    /// removes a leading '/' and removes '..' parts.
1597    #[deprecated(
1598        since = "0.5.7",
1599        note = "by stripping `..`s from the path, the meaning of paths can change.
1600                `mangled_name` can be used if this behaviour is desirable"
1601    )]
1602    pub fn sanitized_name(&self) -> PathBuf {
1603        self.mangled_name()
1604    }
1605
1606    /// Rewrite the path, ignoring any path components with special meaning.
1607    ///
1608    /// - Absolute paths are made relative
1609    /// - [`ParentDir`]s are ignored
1610    /// - Truncates the filename at a NULL byte
1611    ///
1612    /// This is appropriate if you need to be able to extract *something* from
1613    /// any archive, but will easily misrepresent trivial paths like
1614    /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
1615    /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
1616    ///
1617    /// [`ParentDir`]: `PathBuf::Component::ParentDir`
1618    pub fn mangled_name(&self) -> PathBuf {
1619        self.get_metadata().file_name_sanitized()
1620    }
1621
1622    /// Ensure the file path is safe to use as a [`Path`].
1623    ///
1624    /// - It can't contain NULL bytes
1625    /// - It can't resolve to a path outside the current directory
1626    ///   > `foo/../bar` is fine, `foo/../../bar` is not.
1627    /// - It can't be an absolute path
1628    ///
1629    /// This will read well-formed ZIP files correctly, and is resistant
1630    /// to path-based exploits. It is recommended over
1631    /// [`ZipFile::mangled_name`].
1632    pub fn enclosed_name(&self) -> Option<PathBuf> {
1633        self.get_metadata().enclosed_name()
1634    }
1635
1636    pub(crate) fn simplified_components(&self) -> Option<Vec<&OsStr>> {
1637        self.get_metadata().simplified_components()
1638    }
1639
1640    /// Prepare the path for extraction by creating necessary missing directories and checking for symlinks to be contained within the base path.
1641    ///
1642    /// `base_path` parameter is assumed to be canonicalized.
1643    pub(crate) fn safe_prepare_path(
1644        &self,
1645        base_path: &Path,
1646        outpath: &mut PathBuf,
1647        root_dir: Option<&(Vec<&OsStr>, impl RootDirFilter)>,
1648    ) -> ZipResult<()> {
1649        let components = self
1650            .simplified_components()
1651            .ok_or(invalid!("Invalid file path"))?;
1652
1653        let components = match root_dir {
1654            Some((root_dir, filter)) => match components.strip_prefix(&**root_dir) {
1655                Some(components) => components,
1656
1657                // In this case, we expect that the file was not in the root
1658                // directory, but was filtered out when searching for the
1659                // root directory.
1660                None => {
1661                    // We could technically find ourselves at this code
1662                    // path if the user provides an unstable or
1663                    // non-deterministic `filter` function.
1664                    //
1665                    // If debug assertions are on, we should panic here.
1666                    // Otherwise, the safest thing to do here is to just
1667                    // extract as-is.
1668                    debug_assert!(
1669                        !filter(&PathBuf::from_iter(components.iter())),
1670                        "Root directory filter should not match at this point"
1671                    );
1672
1673                    // Extract as-is.
1674                    &components[..]
1675                }
1676            },
1677
1678            None => &components[..],
1679        };
1680
1681        let components_len = components.len();
1682
1683        for (is_last, component) in components
1684            .iter()
1685            .copied()
1686            .enumerate()
1687            .map(|(i, c)| (i == components_len - 1, c))
1688        {
1689            // we can skip the target directory itself because the base path is assumed to be "trusted" (if the user say extract to a symlink we can follow it)
1690            outpath.push(component);
1691
1692            // check if the path is a symlink, the target must be _inherently_ within the directory
1693            for limit in (0..5u8).rev() {
1694                let meta = match std::fs::symlink_metadata(&outpath) {
1695                    Ok(meta) => meta,
1696                    Err(e) if e.kind() == io::ErrorKind::NotFound => {
1697                        if !is_last {
1698                            crate::read::make_writable_dir_all(&outpath)?;
1699                        }
1700                        break;
1701                    }
1702                    Err(e) => return Err(e.into()),
1703                };
1704
1705                if !meta.is_symlink() {
1706                    break;
1707                }
1708
1709                if limit == 0 {
1710                    return Err(invalid!("Extraction followed a symlink too deep"));
1711                }
1712
1713                // note that we cannot accept links that do not inherently resolve to a path inside the directory to prevent:
1714                // - disclosure of unrelated path exists (no check for a path exist and then ../ out)
1715                // - issues with file-system specific path resolution (case sensitivity, etc)
1716                let target = std::fs::read_link(&outpath)?;
1717
1718                if !crate::path::simplified_components(&target)
1719                    .ok_or(invalid!("Invalid symlink target path"))?
1720                    .starts_with(
1721                        &crate::path::simplified_components(base_path)
1722                            .ok_or(invalid!("Invalid base path"))?,
1723                    )
1724                {
1725                    let is_absolute_enclosed = base_path
1726                        .components()
1727                        .map(Some)
1728                        .chain(std::iter::once(None))
1729                        .zip(target.components().map(Some).chain(std::iter::repeat(None)))
1730                        .all(|(a, b)| match (a, b) {
1731                            // both components are normal
1732                            (Some(Component::Normal(a)), Some(Component::Normal(b))) => a == b,
1733                            // both components consumed fully
1734                            (None, None) => true,
1735                            // target consumed fully but base path is not
1736                            (Some(_), None) => false,
1737                            // base path consumed fully but target is not (and normal)
1738                            (None, Some(Component::CurDir | Component::Normal(_))) => true,
1739                            _ => false,
1740                        });
1741
1742                    if !is_absolute_enclosed {
1743                        return Err(invalid!("Symlink is not inherently safe"));
1744                    }
1745                }
1746
1747                outpath.push(target);
1748            }
1749        }
1750        Ok(())
1751    }
1752
1753    /// Get the comment of the file
1754    pub fn comment(&self) -> &str {
1755        &self.get_metadata().file_comment
1756    }
1757
1758    /// Get the compression method used to store the file
1759    pub fn compression(&self) -> CompressionMethod {
1760        self.get_metadata().compression_method
1761    }
1762
1763    /// Get if the files is encrypted or not
1764    pub fn encrypted(&self) -> bool {
1765        self.data.encrypted
1766    }
1767
1768    /// Get the size of the file, in bytes, in the archive
1769    pub fn compressed_size(&self) -> u64 {
1770        self.get_metadata().compressed_size
1771    }
1772
1773    /// Get the size of the file, in bytes, when uncompressed
1774    pub fn size(&self) -> u64 {
1775        self.get_metadata().uncompressed_size
1776    }
1777
1778    /// Get the time the file was last modified
1779    pub fn last_modified(&self) -> Option<DateTime> {
1780        self.data.last_modified_time
1781    }
1782    /// Returns whether the file is actually a directory
1783    pub fn is_dir(&self) -> bool {
1784        is_dir(self.name())
1785    }
1786
1787    /// Returns whether the file is actually a symbolic link
1788    pub fn is_symlink(&self) -> bool {
1789        self.unix_mode()
1790            .is_some_and(|mode| mode & S_IFLNK == S_IFLNK)
1791    }
1792
1793    /// Returns whether the file is a normal file (i.e. not a directory or symlink)
1794    pub fn is_file(&self) -> bool {
1795        !self.is_dir() && !self.is_symlink()
1796    }
1797
1798    /// Get unix mode for the file
1799    pub fn unix_mode(&self) -> Option<u32> {
1800        self.get_metadata().unix_mode()
1801    }
1802
1803    /// Get the CRC32 hash of the original file
1804    pub fn crc32(&self) -> u32 {
1805        self.get_metadata().crc32
1806    }
1807
1808    /// Get the extra data of the zip header for this file
1809    pub fn extra_data(&self) -> Option<&[u8]> {
1810        self.get_metadata()
1811            .extra_field
1812            .as_ref()
1813            .map(|v| v.deref().deref())
1814    }
1815
1816    /// Get the starting offset of the data of the compressed file
1817    pub fn data_start(&self) -> u64 {
1818        *self.data.data_start.get().unwrap()
1819    }
1820
1821    /// Get the starting offset of the zip header for this file
1822    pub fn header_start(&self) -> u64 {
1823        self.get_metadata().header_start
1824    }
1825    /// Get the starting offset of the zip header in the central directory for this file
1826    pub fn central_header_start(&self) -> u64 {
1827        self.get_metadata().central_header_start
1828    }
1829
1830    /// Get the [`SimpleFileOptions`] that would be used to write this file to
1831    /// a new zip archive.
1832    pub fn options(&self) -> SimpleFileOptions {
1833        let mut options = SimpleFileOptions::default()
1834            .large_file(self.compressed_size().max(self.size()) > ZIP64_BYTES_THR)
1835            .compression_method(self.compression())
1836            .unix_permissions(self.unix_mode().unwrap_or(0o644) | S_IFREG)
1837            .last_modified_time(
1838                self.last_modified()
1839                    .filter(|m| m.is_valid())
1840                    .unwrap_or_else(DateTime::default_for_write),
1841            );
1842
1843        options.normalize();
1844        #[cfg(feature = "aes-crypto")]
1845        if let Some(aes) = self.get_metadata().aes_mode {
1846            // Preserve AES metadata in options for downstream writers.
1847            // This is metadata-only and does not trigger encryption.
1848            options.aes_mode = Some(aes);
1849        }
1850        options
1851    }
1852}
1853
1854/// Methods for retrieving information on zip files
1855impl<R: Read> ZipFile<'_, R> {
1856    /// iterate through all extra fields
1857    pub fn extra_data_fields(&self) -> impl Iterator<Item = &ExtraField> {
1858        self.data.extra_fields.iter()
1859    }
1860}
1861
1862impl<R: Read> HasZipMetadata for ZipFile<'_, R> {
1863    fn get_metadata(&self) -> &ZipFileData {
1864        self.data.as_ref()
1865    }
1866}
1867
1868impl<R: Read> Read for ZipFile<'_, R> {
1869    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1870        self.reader.read(buf)
1871    }
1872
1873    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
1874        self.reader.read_exact(buf)
1875    }
1876
1877    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
1878        self.reader.read_to_end(buf)
1879    }
1880
1881    fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
1882        self.reader.read_to_string(buf)
1883    }
1884}
1885
1886impl<R: Read> Read for ZipFileSeek<'_, R> {
1887    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1888        match &mut self.reader {
1889            ZipFileSeekReader::Raw(r) => r.read(buf),
1890        }
1891    }
1892}
1893
1894impl<R: Seek> Seek for ZipFileSeek<'_, R> {
1895    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
1896        match &mut self.reader {
1897            ZipFileSeekReader::Raw(r) => r.seek(pos),
1898        }
1899    }
1900}
1901
1902impl<R> HasZipMetadata for ZipFileSeek<'_, R> {
1903    fn get_metadata(&self) -> &ZipFileData {
1904        self.data.as_ref()
1905    }
1906}
1907
1908impl<R: Read> Drop for ZipFile<'_, R> {
1909    fn drop(&mut self) {
1910        // self.data is Owned, this reader is constructed by a streaming reader.
1911        // In this case, we want to exhaust the reader so that the next file is accessible.
1912        if let Cow::Owned(_) = self.data {
1913            // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
1914            if let Ok(mut inner) = self.take_raw_reader() {
1915                let _ = copy(&mut inner, &mut sink());
1916            }
1917        }
1918    }
1919}
1920
1921/// Read ZipFile structures from a non-seekable reader.
1922///
1923/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
1924/// as some information will be missing when reading this manner.
1925///
1926/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
1927/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
1928/// is encountered. No more files should be read after this.
1929///
1930/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
1931/// the structure is done.
1932///
1933/// Missing fields are:
1934/// * `comment`: set to an empty string
1935/// * `data_start`: set to 0
1936/// * `external_attributes`: `unix_mode()`: will return None
1937pub fn read_zipfile_from_stream<R: Read>(reader: &mut R) -> ZipResult<Option<ZipFile<'_, R>>> {
1938    // We can't use the typical ::parse() method, as we follow separate code paths depending on the
1939    // "magic" value (since the magic value will be from the central directory header if we've
1940    // finished iterating over all the actual files).
1941    /* TODO: smallvec? */
1942
1943    let mut block = ZipLocalEntryBlock::zeroed();
1944    reader.read_exact(block.as_bytes_mut())?;
1945
1946    match block.magic().from_le() {
1947        spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (),
1948        spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
1949        _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR),
1950    }
1951
1952    let block = block.from_le();
1953
1954    let mut result = ZipFileData::from_local_block(block, reader)?;
1955
1956    match parse_extra_field(&mut result) {
1957        Ok(..) | Err(ZipError::Io(..)) => {}
1958        Err(e) => return Err(e),
1959    }
1960
1961    let limit_reader = reader.take(result.compressed_size);
1962
1963    let result_flags = result.flags;
1964    let crypto_reader = make_crypto_reader(&result, limit_reader, None, None)?;
1965    let ZipFileData {
1966        crc32,
1967        uncompressed_size,
1968        compression_method,
1969        ..
1970    } = result;
1971
1972    Ok(Some(ZipFile {
1973        data: Cow::Owned(result),
1974        reader: make_reader(
1975            compression_method,
1976            uncompressed_size,
1977            crc32,
1978            crypto_reader,
1979            result_flags,
1980        )?,
1981    }))
1982}
1983
1984/// A filter that determines whether an entry should be ignored when searching
1985/// for the root directory of a Zip archive.
1986///
1987/// Returns `true` if the entry should be considered, and `false` if it should
1988/// be ignored.
1989///
1990/// See [`root_dir_common_filter`] for a sensible default filter.
1991pub trait RootDirFilter: Fn(&Path) -> bool {}
1992impl<F: Fn(&Path) -> bool> RootDirFilter for F {}
1993
1994/// Common filters when finding the root directory of a Zip archive.
1995///
1996/// This filter is a sensible default for most use cases and filters out common
1997/// system files that are usually irrelevant to the contents of the archive.
1998///
1999/// Currently, the filter ignores:
2000/// - `/__MACOSX/`
2001/// - `/.DS_Store`
2002/// - `/Thumbs.db`
2003///
2004/// **This function is not guaranteed to be stable and may change in future versions.**
2005///
2006/// # Example
2007///
2008/// ```rust
2009/// # use std::path::Path;
2010/// assert!(zip::read::root_dir_common_filter(Path::new("foo.txt")));
2011/// assert!(!zip::read::root_dir_common_filter(Path::new(".DS_Store")));
2012/// assert!(!zip::read::root_dir_common_filter(Path::new("Thumbs.db")));
2013/// assert!(!zip::read::root_dir_common_filter(Path::new("__MACOSX")));
2014/// assert!(!zip::read::root_dir_common_filter(Path::new("__MACOSX/foo.txt")));
2015/// ```
2016pub fn root_dir_common_filter(path: &Path) -> bool {
2017    const COMMON_FILTER_ROOT_FILES: &[&str] = &[".DS_Store", "Thumbs.db"];
2018
2019    if path.starts_with("__MACOSX") {
2020        return false;
2021    }
2022
2023    if path.components().count() == 1
2024        && path.file_name().is_some_and(|file_name| {
2025            COMMON_FILTER_ROOT_FILES
2026                .iter()
2027                .map(OsStr::new)
2028                .any(|cmp| cmp == file_name)
2029        })
2030    {
2031        return false;
2032    }
2033
2034    true
2035}
2036
2037#[cfg(feature = "chrono")]
2038/// Generate a `SystemTime` from a `DateTime`.
2039fn datetime_to_systemtime(datetime: &DateTime) -> Option<std::time::SystemTime> {
2040    if let Some(t) = generate_chrono_datetime(datetime) {
2041        let time = chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(t, chrono::Utc);
2042        return Some(time.into());
2043    }
2044    None
2045}
2046
2047#[cfg(feature = "chrono")]
2048/// Generate a `NaiveDateTime` from a `DateTime`.
2049fn generate_chrono_datetime(datetime: &DateTime) -> Option<chrono::NaiveDateTime> {
2050    if let Some(d) = chrono::NaiveDate::from_ymd_opt(
2051        datetime.year().into(),
2052        datetime.month().into(),
2053        datetime.day().into(),
2054    ) {
2055        if let Some(d) = d.and_hms_opt(
2056            datetime.hour().into(),
2057            datetime.minute().into(),
2058            datetime.second().into(),
2059        ) {
2060            return Some(d);
2061        }
2062    }
2063    None
2064}
2065
2066#[cfg(test)]
2067mod test {
2068    use crate::result::ZipResult;
2069    use crate::write::SimpleFileOptions;
2070    use crate::CompressionMethod::Stored;
2071    use crate::{ZipArchive, ZipWriter};
2072    use std::io::{Cursor, Read, Write};
2073    use tempfile::TempDir;
2074
2075    #[test]
2076    fn invalid_offset() {
2077        use super::ZipArchive;
2078
2079        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2080            "../tests/data/invalid_offset.zip"
2081        )));
2082        assert!(reader.is_err());
2083    }
2084
2085    #[test]
2086    fn invalid_offset2() {
2087        use super::ZipArchive;
2088
2089        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2090            "../tests/data/invalid_offset2.zip"
2091        )));
2092        assert!(reader.is_err());
2093    }
2094
2095    #[test]
2096    fn zip64_with_leading_junk() {
2097        use super::ZipArchive;
2098
2099        let reader =
2100            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/zip64_demo.zip"))).unwrap();
2101        assert_eq!(reader.len(), 1);
2102    }
2103
2104    #[test]
2105    fn zip_contents() {
2106        use super::ZipArchive;
2107
2108        let mut reader =
2109            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/mimetype.zip"))).unwrap();
2110        assert_eq!(reader.comment(), b"");
2111        assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77);
2112    }
2113
2114    #[test]
2115    fn zip_read_streaming() {
2116        use super::read_zipfile_from_stream;
2117
2118        let mut reader = Cursor::new(include_bytes!("../tests/data/mimetype.zip"));
2119        loop {
2120            if read_zipfile_from_stream(&mut reader).unwrap().is_none() {
2121                break;
2122            }
2123        }
2124    }
2125
2126    #[test]
2127    fn zip_clone() {
2128        use super::ZipArchive;
2129        use std::io::Read;
2130
2131        let mut reader1 =
2132            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/mimetype.zip"))).unwrap();
2133        let mut reader2 = reader1.clone();
2134
2135        let mut file1 = reader1.by_index(0).unwrap();
2136        let mut file2 = reader2.by_index(0).unwrap();
2137
2138        let t = file1.last_modified().unwrap();
2139        assert_eq!(
2140            (
2141                t.year(),
2142                t.month(),
2143                t.day(),
2144                t.hour(),
2145                t.minute(),
2146                t.second()
2147            ),
2148            (1980, 1, 1, 0, 0, 0)
2149        );
2150
2151        let mut buf1 = [0; 5];
2152        let mut buf2 = [0; 5];
2153        let mut buf3 = [0; 5];
2154        let mut buf4 = [0; 5];
2155
2156        file1.read_exact(&mut buf1).unwrap();
2157        file2.read_exact(&mut buf2).unwrap();
2158        file1.read_exact(&mut buf3).unwrap();
2159        file2.read_exact(&mut buf4).unwrap();
2160
2161        assert_eq!(buf1, buf2);
2162        assert_eq!(buf3, buf4);
2163        assert_ne!(buf1, buf3);
2164    }
2165
2166    #[test]
2167    fn file_and_dir_predicates() {
2168        use super::ZipArchive;
2169
2170        let mut zip = ZipArchive::new(Cursor::new(include_bytes!(
2171            "../tests/data/files_and_dirs.zip"
2172        )))
2173        .unwrap();
2174
2175        for i in 0..zip.len() {
2176            let zip_file = zip.by_index(i).unwrap();
2177            let full_name = zip_file.enclosed_name().unwrap();
2178            let file_name = full_name.file_name().unwrap().to_str().unwrap();
2179            assert!(
2180                (file_name.starts_with("dir") && zip_file.is_dir())
2181                    || (file_name.starts_with("file") && zip_file.is_file())
2182            );
2183        }
2184    }
2185
2186    #[test]
2187    fn zip64_magic_in_filenames() {
2188        let files = vec![
2189            include_bytes!("../tests/data/zip64_magic_in_filename_1.zip").to_vec(),
2190            include_bytes!("../tests/data/zip64_magic_in_filename_2.zip").to_vec(),
2191            include_bytes!("../tests/data/zip64_magic_in_filename_3.zip").to_vec(),
2192            include_bytes!("../tests/data/zip64_magic_in_filename_4.zip").to_vec(),
2193            include_bytes!("../tests/data/zip64_magic_in_filename_5.zip").to_vec(),
2194        ];
2195        // Although we don't allow adding files whose names contain the ZIP64 CDB-end or
2196        // CDB-end-locator signatures, we still read them when they aren't genuinely ambiguous.
2197        for file in files {
2198            ZipArchive::new(Cursor::new(file)).unwrap();
2199        }
2200    }
2201
2202    /// test case to ensure we don't preemptively over allocate based on the
2203    /// declared number of files in the CDE of an invalid zip when the number of
2204    /// files declared is more than the alleged offset in the CDE
2205    #[test]
2206    fn invalid_cde_number_of_files_allocation_smaller_offset() {
2207        use super::ZipArchive;
2208
2209        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2210            "../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
2211        )));
2212        assert!(reader.is_err() || reader.unwrap().is_empty());
2213    }
2214
2215    /// test case to ensure we don't preemptively over allocate based on the
2216    /// declared number of files in the CDE of an invalid zip when the number of
2217    /// files declared is less than the alleged offset in the CDE
2218    #[test]
2219    fn invalid_cde_number_of_files_allocation_greater_offset() {
2220        use super::ZipArchive;
2221
2222        let reader = ZipArchive::new(Cursor::new(include_bytes!(
2223            "../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
2224        )));
2225        assert!(reader.is_err());
2226    }
2227
2228    #[cfg(feature = "deflate64")]
2229    #[test]
2230    fn deflate64_index_out_of_bounds() -> std::io::Result<()> {
2231        let mut reader = ZipArchive::new(Cursor::new(include_bytes!(
2232            "../tests/data/raw_deflate64_index_out_of_bounds.zip"
2233        )))?;
2234        std::io::copy(&mut reader.by_index(0)?, &mut std::io::sink()).expect_err("Invalid file");
2235        Ok(())
2236    }
2237
2238    #[cfg(feature = "deflate64")]
2239    #[test]
2240    fn deflate64_not_enough_space() {
2241        ZipArchive::new(Cursor::new(include_bytes!(
2242            "../tests/data/deflate64_issue_25.zip"
2243        )))
2244        .expect_err("Invalid file");
2245    }
2246
2247    #[cfg(feature = "deflate-flate2")]
2248    #[test]
2249    fn test_read_with_data_descriptor() {
2250        use std::io::Read;
2251
2252        let mut reader = ZipArchive::new(Cursor::new(include_bytes!(
2253            "../tests/data/data_descriptor.zip"
2254        )))
2255        .unwrap();
2256        let mut decompressed = [0u8; 16];
2257        let mut file = reader.by_index(0).unwrap();
2258        assert_eq!(file.read(&mut decompressed).unwrap(), 12);
2259    }
2260
2261    #[test]
2262    fn test_is_symlink() -> std::io::Result<()> {
2263        let mut reader = ZipArchive::new(Cursor::new(include_bytes!("../tests/data/symlink.zip")))?;
2264        assert!(reader.by_index(0)?.is_symlink());
2265        let tempdir = TempDir::with_prefix("test_is_symlink")?;
2266        reader.extract(&tempdir)?;
2267        assert!(tempdir.path().join("bar").is_symlink());
2268        Ok(())
2269    }
2270
2271    #[test]
2272    #[cfg(feature = "deflate-flate2")]
2273    fn test_utf8_extra_field() {
2274        let mut reader =
2275            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/chinese.zip"))).unwrap();
2276        reader.by_name("七个房间.txt").unwrap();
2277    }
2278
2279    #[test]
2280    fn test_utf8() {
2281        let mut reader =
2282            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/linux-7z.zip"))).unwrap();
2283        reader.by_name("你好.txt").unwrap();
2284    }
2285
2286    #[test]
2287    fn test_utf8_2() {
2288        let mut reader = ZipArchive::new(Cursor::new(include_bytes!(
2289            "../tests/data/windows-7zip.zip"
2290        )))
2291        .unwrap();
2292        reader.by_name("你好.txt").unwrap();
2293    }
2294
2295    #[test]
2296    fn test_64k_files() -> ZipResult<()> {
2297        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
2298        let options = SimpleFileOptions {
2299            compression_method: Stored,
2300            ..Default::default()
2301        };
2302        for i in 0..=u16::MAX {
2303            let file_name = format!("{i}.txt");
2304            writer.start_file(&*file_name, options)?;
2305            writer.write_all(i.to_string().as_bytes())?;
2306        }
2307
2308        let mut reader = ZipArchive::new(writer.finish()?)?;
2309        for i in 0..=u16::MAX {
2310            let expected_name = format!("{i}.txt");
2311            let expected_contents = i.to_string();
2312            let expected_contents = expected_contents.as_bytes();
2313            let mut file = reader.by_name(&expected_name)?;
2314            let mut contents = Vec::with_capacity(expected_contents.len());
2315            file.read_to_end(&mut contents)?;
2316            assert_eq!(contents, expected_contents);
2317            drop(file);
2318            contents.clear();
2319            let mut file = reader.by_index(i as usize)?;
2320            file.read_to_end(&mut contents)?;
2321            assert_eq!(contents, expected_contents);
2322        }
2323        Ok(())
2324    }
2325
2326    /// Symlinks being extracted shouldn't be followed out of the destination directory.
2327    #[test]
2328    fn test_cannot_symlink_outside_destination() -> ZipResult<()> {
2329        use std::fs::create_dir;
2330
2331        let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
2332        writer.add_symlink("symlink/", "../dest-sibling/", SimpleFileOptions::default())?;
2333        writer.start_file("symlink/dest-file", SimpleFileOptions::default())?;
2334        let mut reader = writer.finish_into_readable()?;
2335        let dest_parent = TempDir::with_prefix("read__test_cannot_symlink_outside_destination")?;
2336        let dest_sibling = dest_parent.path().join("dest-sibling");
2337        create_dir(&dest_sibling)?;
2338        let dest = dest_parent.path().join("dest");
2339        create_dir(&dest)?;
2340        assert!(reader.extract(dest).is_err());
2341        assert!(!dest_sibling.join("dest-file").exists());
2342        Ok(())
2343    }
2344
2345    #[test]
2346    fn test_can_create_destination() -> ZipResult<()> {
2347        let mut reader =
2348            ZipArchive::new(Cursor::new(include_bytes!("../tests/data/mimetype.zip")))?;
2349        let dest = TempDir::with_prefix("read__test_can_create_destination")?;
2350        reader.extract(&dest)?;
2351        assert!(dest.path().join("mimetype").exists());
2352        Ok(())
2353    }
2354
2355    #[test]
2356    fn test_central_directory_not_at_end() -> ZipResult<()> {
2357        let mut reader = ZipArchive::new(Cursor::new(include_bytes!("../tests/data/omni.ja")))?;
2358        let mut file = reader.by_name("chrome.manifest")?;
2359        let mut contents = String::new();
2360        file.read_to_string(&mut contents)?; // ensures valid UTF-8
2361        assert!(!contents.is_empty(), "chrome.manifest should not be empty");
2362        drop(file);
2363        for i in 0..reader.len() {
2364            let mut file = reader.by_index(i)?;
2365            // Attempt to read a small portion or all of each file to ensure it's accessible
2366            let mut buffer = Vec::new();
2367            file.read_to_end(&mut buffer)?;
2368            assert_eq!(
2369                buffer.len(),
2370                file.size() as usize,
2371                "File size mismatch for {}",
2372                file.name()
2373            );
2374        }
2375        Ok(())
2376    }
2377}