Skip to main content

zip_core/
archive.rs

1//! Pure-Rust ZIP container parser: EOCD + central directory + local file headers,
2//! with a decoding entry reader that verifies CRC-32 on EOF.
3//!
4//! Mirrors the zip-rs surface (`ZipArchive::new` / `by_index` / `by_name` /
5//! `ZipFile` with `name()`/`compression()`/`size()`/`data_start()`) so fleet
6//! consumers migrate with a near-mechanical `zip::` -> `zip_core::` rename.
7
8use std::io::{self, Read, Seek, SeekFrom};
9use std::path::PathBuf;
10
11use crate::bytes::Reader;
12use crate::codec::Decoder;
13use crate::crypto::{AesInfo, AesReader, ZipCryptoReader};
14use crate::{FormatError, ZipCoreError};
15
16const EOCD_SIG: u32 = 0x0605_4b50;
17const CD_HEADER_SIG: u32 = 0x0201_4b50;
18const LFH_SIG: u32 = 0x0403_4b50;
19const ZIP64_EOCD_SIG: u32 = 0x0606_4b50;
20/// Central-directory digital-signature record header.
21const ARCHIVE_SIG_SIG: u32 = 0x0505_4b50;
22const ZIP64_LOCATOR_SIG: u32 = 0x0706_4b50;
23/// Header id of the Zip64 extended-information extra field.
24const ZIP64_EXTRA_ID: u16 = 0x0001;
25/// 32-bit sentinel: the real value lives in a Zip64 record/extra field.
26const U32_SENTINEL: u32 = 0xFFFF_FFFF;
27/// 16-bit sentinel for counts.
28const U16_SENTINEL: u16 = 0xFFFF;
29
30/// Minimum EOCD record length (no comment).
31const EOCD_MIN: usize = 22;
32/// Largest region we scan back from EOF for the EOCD (record + max comment).
33const EOCD_SCAN_MAX: usize = EOCD_MIN + u16::MAX as usize;
34/// Zip64 EOCD locator record length.
35const ZIP64_LOCATOR_LEN: usize = 20;
36/// Fixed portion of a local file header.
37const LFH_FIXED: usize = 30;
38/// Ceiling on entries we will parse, guarding against a lying EOCD count.
39const MAX_ENTRIES: usize = 16_000_000;
40
41/// ZIP compression method, mirroring zip-rs `CompressionMethod` for the common
42/// methods plus an `Unknown(raw)` that preserves the offending value.
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum CompressionMethod {
45    /// Method 0 — no compression (raw passthrough / in-place window).
46    Stored,
47    /// Method 8 — classic DEFLATE.
48    Deflated,
49    /// Method 9 — Deflate64 / "enhanced deflate".
50    Deflate64,
51    /// Method 12 — bzip2.
52    Bzip2,
53    /// Method 14 — LZMA (with the 4-byte ZIP wrapper prefix).
54    Lzma,
55    /// Method 93 — Zstandard.
56    Zstd,
57    /// Method 95 — XZ.
58    Xz,
59    /// Method 1 — legacy Shrink (not decoded; recognized so it can be named).
60    Shrunk,
61    /// Methods 2–5 — legacy Reduce (not decoded).
62    Reduced,
63    /// Method 6 — legacy Implode (not decoded).
64    Imploded,
65    /// Method 10 — PKWARE DCL Implode (not decoded).
66    DclImploded,
67    /// Method 16 — IBM z/OS CMPSC (not decoded).
68    IbmCmpsc,
69    /// Method 18 — IBM TERSE (not decoded).
70    IbmTerse,
71    /// Method 19 — IBM LZ77 / PFS (not decoded).
72    IbmLz77,
73    /// Method 94 — MP3 (not decoded).
74    Mp3,
75    /// Method 96 — JPEG variant (not decoded).
76    Jpeg,
77    /// Method 97 — WavPack (not decoded).
78    WavPack,
79    /// Method 98 — PPMd (not decoded).
80    Ppmd,
81    /// Any other method id — value preserved so callers can report it.
82    Unknown(u16),
83}
84
85impl CompressionMethod {
86    pub(crate) fn from_u16(raw: u16) -> Self {
87        match raw {
88            0 => Self::Stored,
89            8 => Self::Deflated,
90            9 => Self::Deflate64,
91            12 => Self::Bzip2,
92            14 => Self::Lzma,
93            93 => Self::Zstd,
94            95 => Self::Xz,
95            1 => Self::Shrunk,
96            2..=5 => Self::Reduced,
97            6 => Self::Imploded,
98            10 => Self::DclImploded,
99            16 => Self::IbmCmpsc,
100            18 => Self::IbmTerse,
101            19 => Self::IbmLz77,
102            94 => Self::Mp3,
103            96 => Self::Jpeg,
104            97 => Self::WavPack,
105            98 => Self::Ppmd,
106            other => Self::Unknown(other),
107        }
108    }
109}
110
111/// Parsed central-directory metadata for one entry.
112#[derive(Debug, Clone)]
113pub(crate) struct CentralEntry {
114    pub(crate) name: String,
115    pub(crate) method: CompressionMethod,
116    pub(crate) flags: u16,
117    pub(crate) crc32: u32,
118    pub(crate) compressed_size: u64,
119    pub(crate) uncompressed_size: u64,
120    pub(crate) lfh_offset: u64,
121    /// DOS mod-time (the ZipCrypto password-check byte when a data descriptor is
122    /// used).
123    pub(crate) last_mod_time: u16,
124    /// WinZip AES parameters when this entry is method-99 encrypted.
125    pub(crate) aes: Option<AesInfo>,
126    /// Disk number holding this entry's local header (0 = this disk).
127    pub(crate) disk_start: u16,
128    /// Parsed common extra fields.
129    pub(crate) extra: ExtraFields,
130}
131
132impl CentralEntry {
133    fn is_dir(&self) -> bool {
134        self.name.ends_with('/') || self.name.ends_with('\\')
135    }
136}
137
138/// Container-level offsets/counts, for the forensic analyzer's structural audits
139/// (trailing data, spanning, etc.). Returned by [`ZipArchive::summary`].
140#[derive(Debug, Clone)]
141pub struct ArchiveSummary {
142    /// Total file length.
143    pub file_len: u64,
144    /// Absolute offset of the central directory.
145    pub central_dir_offset: u64,
146    /// Declared central-directory size in bytes.
147    pub central_dir_size: u64,
148    /// Absolute offset just past the end of the 32-bit EOCD record (incl. its
149    /// comment) — bytes beyond this are trailing data.
150    pub eocd_end_offset: u64,
151    /// EOCD archive-comment length.
152    pub comment_len: u16,
153    /// Disk number recorded in the EOCD (0 for a single-file archive).
154    pub disk_number: u32,
155    /// Disk on which the central directory starts (0 for a single-file archive).
156    pub cd_start_disk: u32,
157    /// Length of the central-directory digital-signature record (header
158    /// 0x05054b50) if present, else `None`. The signature is not verified.
159    pub archive_signature_len: Option<u16>,
160}
161
162/// A parsed ZIP archive over a seekable reader.
163pub struct ZipArchive<R> {
164    reader: R,
165    entries: Vec<CentralEntry>,
166    summary: ArchiveSummary,
167}
168
169impl<R: Read + Seek> ZipArchive<R> {
170    /// Parse the EOCD and central directory of `reader`.
171    pub fn new(mut reader: R) -> Result<Self, ZipCoreError> {
172        let file_len = reader.seek(SeekFrom::End(0))?;
173        let (entries, summary) = parse_central_directory(&mut reader, file_len)?;
174        Ok(Self {
175            reader,
176            entries,
177            summary,
178        })
179    }
180
181    /// Container-level offsets/counts for structural audits.
182    pub fn summary(&self) -> &ArchiveSummary {
183        &self.summary
184    }
185
186    /// Number of entries in the central directory.
187    pub fn len(&self) -> usize {
188        self.entries.len()
189    }
190
191    /// Whether the archive has no entries.
192    pub fn is_empty(&self) -> bool {
193        self.entries.is_empty()
194    }
195
196    /// Iterate entry names in central-directory order.
197    pub fn file_names(&self) -> impl Iterator<Item = &str> {
198        self.entries.iter().map(|e| e.name.as_str())
199    }
200
201    /// Open the entry at index `i` for decoding (mirrors zip-rs `by_index`).
202    pub fn by_index(&mut self, i: usize) -> Result<ZipFile<'_>, ZipCoreError> {
203        let meta = self
204            .entries
205            .get(i)
206            .ok_or(ZipCoreError::IndexOutOfBounds(i))?
207            .clone();
208        self.open(meta)
209    }
210
211    /// Open the named entry for decoding (mirrors zip-rs `by_name`).
212    pub fn by_name(&mut self, name: &str) -> Result<ZipFile<'_>, ZipCoreError> {
213        let meta = self
214            .entries
215            .iter()
216            .find(|e| e.name == name)
217            .ok_or_else(|| ZipCoreError::EntryNotFound(name.to_string()))?
218            .clone();
219        self.open(meta)
220    }
221
222    /// Open the entry at index `i`, decrypting it with `password` (ZipCrypto or
223    /// WinZip AES). Errors with `WrongPassword` if the password fails the check.
224    pub fn by_index_decrypt(
225        &mut self,
226        i: usize,
227        password: &[u8],
228    ) -> Result<ZipFile<'_>, ZipCoreError> {
229        let meta = self
230            .entries
231            .get(i)
232            .ok_or(ZipCoreError::IndexOutOfBounds(i))?
233            .clone();
234        self.open_decrypt(meta, password)
235    }
236
237    /// Open the named entry, decrypting it with `password`.
238    pub fn by_name_decrypt(
239        &mut self,
240        name: &str,
241        password: &[u8],
242    ) -> Result<ZipFile<'_>, ZipCoreError> {
243        let meta = self
244            .entries
245            .iter()
246            .find(|e| e.name == name)
247            .ok_or_else(|| ZipCoreError::EntryNotFound(name.to_string()))?
248            .clone();
249        self.open_decrypt(meta, password)
250    }
251
252    /// Raw structural view for the forensic analyzer: per entry, the header
253    /// fields as recorded in BOTH the central directory and the local file
254    /// header, plus offsets. This is the seam that lets `zip-forensic` compare
255    /// the two copies (tamper signal) without re-implementing a second parser.
256    pub fn structural_view(&mut self) -> Result<Vec<EntryLayout>, ZipCoreError> {
257        let metas = self.entries.clone();
258        let mut out = Vec::with_capacity(metas.len());
259        for (index, m) in metas.iter().enumerate() {
260            let (local, data_start) = read_lfh_fields(&mut self.reader, m.lfh_offset)?;
261            out.push(EntryLayout {
262                index,
263                lfh_offset: m.lfh_offset,
264                data_start,
265                central: HeaderFields {
266                    name: m.name.clone(),
267                    method: m.method,
268                    flags: m.flags,
269                    crc32: m.crc32,
270                    compressed_size: m.compressed_size,
271                    uncompressed_size: m.uncompressed_size,
272                },
273                local,
274                extra: m.extra.clone(),
275            });
276        }
277        Ok(out)
278    }
279
280    fn open(&mut self, meta: CentralEntry) -> Result<ZipFile<'_>, ZipCoreError> {
281        check_local_disk(&meta, self.summary.disk_number, self.summary.cd_start_disk)?;
282        if meta.flags & 0x0001 != 0 {
283            return Err(ZipCoreError::EncryptedNoPassword(meta.name.clone()));
284        }
285        let (_local, data_start) = read_lfh_fields(&mut self.reader, meta.lfh_offset)?;
286        self.reader.seek(SeekFrom::Start(data_start))?;
287        let limited: Box<dyn Read + '_> = Box::new((&mut self.reader).take(meta.compressed_size));
288        let decoder = Decoder::new(meta.method, meta.uncompressed_size, limited)?;
289        Ok(ZipFile {
290            data_start,
291            decoder,
292            hasher: crc32fast::Hasher::new(),
293            bytes_out: 0,
294            verified: false,
295            verify_crc: true,
296            meta,
297        })
298    }
299
300    fn open_decrypt(
301        &mut self,
302        meta: CentralEntry,
303        password: &[u8],
304    ) -> Result<ZipFile<'_>, ZipCoreError> {
305        check_local_disk(&meta, self.summary.disk_number, self.summary.cd_start_disk)?;
306        // Not encrypted -> the password is irrelevant; read normally.
307        if meta.flags & 0x0001 == 0 && meta.aes.is_none() {
308            return self.open(meta);
309        }
310        // PKWARE Strong Encryption (GP bit 6) and masked/central-directory
311        // encryption (GP bit 13) are unsupported — fail loud rather than misread
312        // the stream as traditional ZipCrypto. Only ZipCrypto + WinZip AES decode.
313        if meta.aes.is_none() && meta.flags & 0x0040 != 0 {
314            return Err(ZipCoreError::UnsupportedEncryption {
315                entry: meta.name,
316                reason: "PKWARE strong encryption (GP flag bit 6)".to_string(),
317            });
318        }
319        if meta.flags & 0x2000 != 0 {
320            return Err(ZipCoreError::UnsupportedEncryption {
321                entry: meta.name,
322                reason: "masked / central-directory encryption (GP flag bit 13)".to_string(),
323            });
324        }
325        let (_local, data_start) = read_lfh_fields(&mut self.reader, meta.lfh_offset)?;
326        self.reader.seek(SeekFrom::Start(data_start))?;
327        let take = (&mut self.reader).take(meta.compressed_size);
328        let (reader, method, verify_crc): (Box<dyn Read + '_>, CompressionMethod, bool) =
329            if let Some(aes) = meta.aes {
330                let r = AesReader::new(take, password, aes, meta.compressed_size, &meta.name)?;
331                // AE-2 zeroes the CRC field; its integrity is the HMAC (checked by
332                // AesReader). AE-1 keeps the CRC, so verify it.
333                (
334                    Box::new(r),
335                    CompressionMethod::from_u16(aes.actual_method),
336                    !aes.is_ae2,
337                )
338            } else {
339                // Traditional ZipCrypto: the check byte is the CRC high byte, or the
340                // mod-time high byte when a data descriptor is used (bit 3).
341                let check = zipcrypto_check_byte(meta.flags, meta.crc32, meta.last_mod_time);
342                let r = ZipCryptoReader::new(take, password, check, &meta.name)?;
343                (Box::new(r), meta.method, true)
344            };
345        let decoder = Decoder::new(method, meta.uncompressed_size, reader)?;
346        Ok(ZipFile {
347            data_start,
348            decoder,
349            hasher: crc32fast::Hasher::new(),
350            bytes_out: 0,
351            verified: false,
352            verify_crc,
353            meta,
354        })
355    }
356}
357
358/// Header fields as recorded in one header copy (central directory OR local file
359/// header). Exposed via [`ZipArchive::structural_view`] for the forensic seam.
360#[derive(Debug, Clone, PartialEq, Eq)]
361pub struct HeaderFields {
362    /// Entry name (decoded).
363    pub name: String,
364    /// Compression method.
365    pub method: CompressionMethod,
366    /// General-purpose flag bits.
367    pub flags: u16,
368    /// CRC-32 as recorded in this header copy.
369    pub crc32: u32,
370    /// Compressed size as recorded in this header copy.
371    pub compressed_size: u64,
372    /// Uncompressed size as recorded in this header copy.
373    pub uncompressed_size: u64,
374}
375
376/// One entry's raw structural layout: the central-directory and local-file-header
377/// copies of its fields plus offsets, for cross-checking (tamper detection).
378#[derive(Debug, Clone)]
379pub struct EntryLayout {
380    /// Index in central-directory order.
381    pub index: usize,
382    /// Absolute offset of the local file header.
383    pub lfh_offset: u64,
384    /// Absolute offset of the entry's first data byte.
385    pub data_start: u64,
386    /// Fields as recorded in the central directory.
387    pub central: HeaderFields,
388    /// Fields as recorded in the local file header.
389    pub local: HeaderFields,
390    /// Parsed common extra fields from the central-directory header.
391    pub extra: ExtraFields,
392}
393
394/// Parsed common ZIP extra fields (central-directory copy). Unset fields are
395/// `None`. Timestamps are surfaced verbatim: NTFS times are Windows FILETIME
396/// (100 ns ticks since 1601-01-01 UTC); Unix times are signed seconds since the
397/// epoch.
398#[derive(Debug, Clone, Default, PartialEq, Eq)]
399pub struct ExtraFields {
400    /// NTFS last-modified time (FILETIME), extra id 0x000a.
401    pub ntfs_mtime: Option<u64>,
402    /// NTFS last-access time (FILETIME).
403    pub ntfs_atime: Option<u64>,
404    /// NTFS creation time (FILETIME).
405    pub ntfs_ctime: Option<u64>,
406    /// Unix modified time (seconds), Info-ZIP extended timestamp id 0x5455.
407    pub unix_mtime: Option<i32>,
408    /// Unix access time (seconds).
409    pub unix_atime: Option<i32>,
410    /// Unix creation time (seconds).
411    pub unix_ctime: Option<i32>,
412    /// Info-ZIP Unicode path override (id 0x7075), UTF-8.
413    pub unicode_path: Option<String>,
414    /// Info-ZIP Unicode comment override (id 0x6375), UTF-8.
415    pub unicode_comment: Option<String>,
416}
417
418/// Read and parse the local file header at `lfh_offset`, returning its fields and
419/// the absolute offset of the entry's first data byte
420/// (`lfh_offset + 30 + name_len + extra_len`).
421fn read_lfh_fields<R: Read + Seek>(
422    reader: &mut R,
423    lfh_offset: u64,
424) -> Result<(HeaderFields, u64), ZipCoreError> {
425    reader.seek(SeekFrom::Start(lfh_offset))?;
426    let mut fixed = [0u8; LFH_FIXED];
427    reader.read_exact(&mut fixed)?;
428    let mut r = Reader::new(&fixed);
429    if r.u32()? != LFH_SIG {
430        return Err(FormatError::BadSignature {
431            what: "local file header",
432            offset: lfh_offset,
433        }
434        .into());
435    }
436    let _version_needed = r.u16()?;
437    let flags = r.u16()?;
438    let method = CompressionMethod::from_u16(r.u16()?);
439    let _mod_time = r.u16()?;
440    let _mod_date = r.u16()?;
441    let crc32 = r.u32()?;
442    let compressed_size = u64::from(r.u32()?);
443    let uncompressed_size = u64::from(r.u32()?);
444    let name_len = usize::from(r.u16()?);
445    let extra_len = usize::from(r.u16()?);
446
447    let mut name_buf = vec![0u8; name_len];
448    reader.read_exact(&mut name_buf)?;
449    let name = decode_name(&name_buf, flags);
450    let data_start = lfh_offset + LFH_FIXED as u64 + name_len as u64 + extra_len as u64;
451
452    Ok((
453        HeaderFields {
454            name,
455            method,
456            flags,
457            crc32,
458            compressed_size,
459            uncompressed_size,
460        },
461        data_start,
462    ))
463}
464
465/// Locate + parse the EOCD, then read and parse the central directory.
466/// The 32-bit EOCD fields. Any size/offset/count may be a sentinel for Zip64.
467struct Eocd32 {
468    disk_number: u16,
469    cd_start_disk: u16,
470    total_entries: u16,
471    cd_size: u32,
472    cd_offset: u32,
473    comment_len: u16,
474}
475
476fn parse_central_directory<R: Read + Seek>(
477    reader: &mut R,
478    file_len: u64,
479) -> Result<(Vec<CentralEntry>, ArchiveSummary), ZipCoreError> {
480    let scan_len = file_len.min(EOCD_SCAN_MAX as u64);
481    if scan_len < EOCD_MIN as u64 {
482        return Err(FormatError::NoEocd.into());
483    }
484    let scan_start = file_len - scan_len;
485    reader.seek(SeekFrom::Start(scan_start))?;
486    let mut tail = vec![0u8; scan_len as usize];
487    reader.read_exact(&mut tail)?;
488
489    let eocd_rel = find_eocd(&tail).ok_or(FormatError::NoEocd)?;
490    let eocd = parse_eocd(&tail[eocd_rel..])?;
491    // Absolute end of the 32-bit EOCD record incl. its comment; the EOCD is always
492    // the last structure, so anything past this is trailing data.
493    let eocd_end_offset =
494        scan_start + eocd_rel as u64 + EOCD_MIN as u64 + u64::from(eocd.comment_len);
495
496    // Promote to Zip64 when any base field is a sentinel: the real 64-bit
497    // offset/size/count/disk live in the Zip64 EOCD record reached via its locator.
498    let is_zip64 = eocd.cd_offset == U32_SENTINEL
499        || eocd.cd_size == U32_SENTINEL
500        || eocd.total_entries == U16_SENTINEL;
501    let (cd_offset, cd_size, total_entries, disk_number, cd_start_disk) = if is_zip64 {
502        resolve_zip64_eocd(reader, &tail, eocd_rel)?
503    } else {
504        (
505            u64::from(eocd.cd_offset),
506            u64::from(eocd.cd_size),
507            usize::from(eocd.total_entries),
508            u32::from(eocd.disk_number),
509            u32::from(eocd.cd_start_disk),
510        )
511    };
512
513    // Detect data prepended before the archive (SFX stub / polyglot prefix). A
514    // normal central directory ends exactly at the EOCD; if the recorded
515    // `cd_offset` does not point at a CD header but `cd_offset + N` does — where
516    // `N = eocd_pos - (cd_offset + cd_size)` — the file carries an N-byte prefix
517    // and every recorded offset is relative to the archive start, not the file.
518    // The header check disambiguates a real prefix from a digital-signature
519    // record sitting between the CD and EOCD. Not attempted for Zip64, whose
520    // offsets live in a separately-located record.
521    let eocd_pos = scan_start + eocd_rel as u64;
522    let prefix = if is_zip64 {
523        0
524    } else {
525        match eocd_pos.checked_sub(cd_offset.saturating_add(cd_size)) {
526            Some(n)
527                if n > 0
528                    && !cd_header_at(reader, cd_offset)
529                    && cd_header_at(reader, cd_offset + n) =>
530            {
531                n
532            }
533            _ => 0,
534        }
535    };
536    let actual_cd_offset = cd_offset + prefix;
537
538    match actual_cd_offset.checked_add(cd_size) {
539        Some(end) if end <= file_len => {}
540        _ => return Err(FormatError::CentralDirOutOfRange { cd_offset, cd_size }.into()),
541    }
542    if total_entries > MAX_ENTRIES {
543        return Err(FormatError::TooManyEntries(total_entries).into());
544    }
545
546    reader.seek(SeekFrom::Start(actual_cd_offset))?;
547    let mut cd = vec![0u8; cd_size as usize];
548    reader.read_exact(&mut cd)?;
549
550    let (mut entries, cd_consumed) = parse_cd_entries(&cd, total_entries)?;
551    // Recorded LFH offsets are relative to the archive start; make them absolute
552    // by shifting past any detected prefix so reads land at the right bytes.
553    if prefix > 0 {
554        for e in &mut entries {
555            e.lfh_offset += prefix;
556        }
557    }
558
559    // A CD digital-signature record (header 0x05054b50) sits between the last
560    // central-directory header and the EOCD. Producers disagree on whether its
561    // bytes count toward the EOCD's cd_size: Info-ZIP places it *after* the
562    // cd_size span, while PKWARE SecureZIP includes it *within* cd_size. Detect it
563    // at the point where the headers actually ended, which covers both layouts:
564    // first check the trailing bytes inside the CD buffer, then the bytes that
565    // follow it. The signature is recognized, not verified.
566    let archive_signature_len = {
567        let sig = ARCHIVE_SIG_SIG.to_le_bytes();
568        let trailing = &cd[cd_consumed..];
569        if trailing.len() >= 6 && trailing[..4] == sig {
570            Some(u16::from_le_bytes([trailing[4], trailing[5]]))
571        } else if trailing.is_empty() {
572            // cd_size covered only the headers; the record (if any) follows the
573            // CD block, where the reader is now positioned.
574            let mut hdr = [0u8; 6];
575            match reader.read_exact(&mut hdr) {
576                Ok(()) if hdr[..4] == sig => Some(u16::from_le_bytes([hdr[4], hdr[5]])),
577                _ => None,
578            }
579        } else {
580            None
581        }
582    };
583    let summary = ArchiveSummary {
584        file_len,
585        central_dir_offset: actual_cd_offset,
586        central_dir_size: cd_size,
587        eocd_end_offset,
588        comment_len: eocd.comment_len,
589        disk_number,
590        cd_start_disk,
591        archive_signature_len,
592    };
593    Ok((entries, summary))
594}
595
596/// Whether a central-directory file header signature sits at absolute `offset`.
597/// Used to disambiguate a prepended-data prefix from other inter-record bytes.
598fn cd_header_at<R: Read + Seek>(reader: &mut R, offset: u64) -> bool {
599    if reader.seek(SeekFrom::Start(offset)).is_err() {
600        return false; // cov:unreachable: Cursor/File seek to a u64 offset does not fail
601    }
602    let mut sig = [0u8; 4];
603    match reader.read_exact(&mut sig) {
604        Ok(()) => u32::from_le_bytes(sig) == CD_HEADER_SIG,
605        Err(_) => false, // cov:unreachable: the sole caller only passes offsets < file_len (n>0 ⇒ in-bounds)
606    }
607}
608
609/// Scan backward for the EOCD signature, returning its offset within `tail`.
610fn find_eocd(tail: &[u8]) -> Option<usize> {
611    if tail.len() < EOCD_MIN {
612        return None; // cov:unreachable: parse_central_directory guards scan_len >= EOCD_MIN
613    }
614    let sig = EOCD_SIG.to_le_bytes();
615    // The EOCD starts at most EOCD_MIN bytes before EOF; scan from the latest.
616    (0..=tail.len() - EOCD_MIN)
617        .rev()
618        .find(|&i| tail[i..i + 4] == sig)
619}
620
621/// Parse the fixed EOCD fields. Any size/offset/count may be a Zip64 sentinel.
622fn parse_eocd(buf: &[u8]) -> Result<Eocd32, ZipCoreError> {
623    let mut r = Reader::new(buf);
624    if r.u32()? != EOCD_SIG {
625        return Err(FormatError::NoEocd.into()); // cov:unreachable: find_eocd matched this signature
626    }
627    let disk_number = r.u16()?;
628    let cd_start_disk = r.u16()?;
629    let _entries_this_disk = r.u16()?;
630    let total_entries = r.u16()?;
631    let cd_size = r.u32()?;
632    let cd_offset = r.u32()?;
633    let comment_len = r.u16()?;
634    Ok(Eocd32 {
635        disk_number,
636        cd_start_disk,
637        total_entries,
638        cd_size,
639        cd_offset,
640        comment_len,
641    })
642}
643
644/// Resolve the real central-directory location from the Zip64 EOCD record. The
645/// Zip64 EOCD locator sits immediately before the 32-bit EOCD; it points at the
646/// Zip64 EOCD record holding the true 64-bit offset/size/count.
647fn resolve_zip64_eocd<R: Read + Seek>(
648    reader: &mut R,
649    tail: &[u8],
650    eocd_rel: usize,
651) -> Result<(u64, u64, usize, u32, u32), ZipCoreError> {
652    if eocd_rel < ZIP64_LOCATOR_LEN {
653        return Err(FormatError::Zip64Unsupported.into());
654    }
655    let mut loc = Reader::new(&tail[eocd_rel - ZIP64_LOCATOR_LEN..eocd_rel]);
656    if loc.u32()? != ZIP64_LOCATOR_SIG {
657        return Err(FormatError::Zip64Unsupported.into());
658    }
659    let _disk = loc.u32()?;
660    let z64_eocd_offset = loc.u64()?;
661
662    reader.seek(SeekFrom::Start(z64_eocd_offset))?;
663    let mut rec = [0u8; 56];
664    reader.read_exact(&mut rec)?;
665    let mut r = Reader::new(&rec);
666    if r.u32()? != ZIP64_EOCD_SIG {
667        return Err(FormatError::BadSignature {
668            what: "Zip64 EOCD record",
669            offset: z64_eocd_offset,
670        }
671        .into());
672    }
673    let _record_size = r.u64()?;
674    let _version_made_by = r.u16()?;
675    let _version_needed = r.u16()?;
676    let disk_number = r.u32()?;
677    let cd_start_disk = r.u32()?;
678    let _entries_this_disk = r.u64()?;
679    let total_entries = r.u64()?;
680    let cd_size = r.u64()?;
681    let cd_offset = r.u64()?;
682    let total =
683        usize::try_from(total_entries).map_err(|_| FormatError::TooManyEntries(usize::MAX))?;
684    Ok((cd_offset, cd_size, total, disk_number, cd_start_disk))
685}
686
687/// Parse `total_entries` central-directory file headers from `cd`.
688/// Parse the central-directory headers, returning the entries and the number of
689/// bytes the headers consumed (so the caller can locate a trailing digital
690/// signature record that some producers place inside the `cd_size` span).
691fn parse_cd_entries(
692    cd: &[u8],
693    total_entries: usize,
694) -> Result<(Vec<CentralEntry>, usize), ZipCoreError> {
695    let mut r = Reader::new(cd);
696    let mut entries = Vec::new();
697    for _ in 0..total_entries {
698        if r.remaining() < 46 {
699            return Err(FormatError::Truncated.into());
700        }
701        if r.u32()? != CD_HEADER_SIG {
702            return Err(FormatError::BadSignature {
703                what: "central directory header",
704                offset: (cd.len() - r.remaining()) as u64,
705            }
706            .into());
707        }
708        let _version_made_by = r.u16()?;
709        let _version_needed = r.u16()?;
710        let flags = r.u16()?;
711        let method_raw = r.u16()?;
712        let method = CompressionMethod::from_u16(method_raw);
713        let last_mod_time = r.u16()?;
714        let _mod_date = r.u16()?;
715        let crc32 = r.u32()?;
716        let compressed_size32 = r.u32()?;
717        let uncompressed_size32 = r.u32()?;
718        let name_len = usize::from(r.u16()?);
719        let extra_len = usize::from(r.u16()?);
720        let comment_len = usize::from(r.u16()?);
721        let disk_start = r.u16()?;
722        let _internal_attrs = r.u16()?;
723        let _external_attrs = r.u32()?;
724        let lfh_offset32 = r.u32()?;
725
726        let name_bytes = r.take(name_len)?;
727        let extra = r.take(extra_len)?;
728        let _comment = r.take(comment_len)?;
729
730        // Resolve any 0xFFFFFFFF sentinels from the Zip64 extended-information
731        // extra field (header id 0x0001). Fields appear in a FIXED order and only
732        // when their base field is a sentinel.
733        let mut uncompressed_size = u64::from(uncompressed_size32);
734        let mut compressed_size = u64::from(compressed_size32);
735        let mut lfh_offset = u64::from(lfh_offset32);
736        if uncompressed_size32 == U32_SENTINEL
737            || compressed_size32 == U32_SENTINEL
738            || lfh_offset32 == U32_SENTINEL
739        {
740            apply_zip64_extra(
741                extra,
742                uncompressed_size32 == U32_SENTINEL,
743                compressed_size32 == U32_SENTINEL,
744                lfh_offset32 == U32_SENTINEL,
745                &mut uncompressed_size,
746                &mut compressed_size,
747                &mut lfh_offset,
748            )?;
749        }
750
751        // Filename: UTF-8 when GP flag bit 11 is set, else CP437. We accept either
752        // as best-effort UTF-8 here; a full CP437 table is a follow-up (it only
753        // affects display of non-ASCII names, not entry location).
754        let name = decode_name(name_bytes, flags);
755        // Method 99 = WinZip AES; the AE-x extra field (0x9901) carries the real
756        // method + key strength.
757        let aes = if method_raw == 99 {
758            parse_aes_extra(extra)
759        } else {
760            None
761        };
762
763        entries.push(CentralEntry {
764            name,
765            method,
766            flags,
767            crc32,
768            compressed_size,
769            uncompressed_size,
770            lfh_offset,
771            last_mod_time,
772            aes,
773            disk_start,
774            extra: parse_extra_fields(extra),
775        });
776    }
777    let consumed = cd.len() - r.remaining();
778    Ok((entries, consumed))
779}
780
781/// Override sentinel CD fields from the Zip64 extended-information extra field
782/// (header id 0x0001). The 64-bit fields appear in a fixed order — original size,
783/// compressed size, relative header offset — and ONLY when their base field is a
784/// sentinel. A sentinel with no matching extra field is a malformed Zip64 archive.
785fn apply_zip64_extra(
786    extra: &[u8],
787    need_uncompressed: bool,
788    need_compressed: bool,
789    need_offset: bool,
790    uncompressed_size: &mut u64,
791    compressed_size: &mut u64,
792    lfh_offset: &mut u64,
793) -> Result<(), ZipCoreError> {
794    let mut r = Reader::new(extra);
795    while r.remaining() >= 4 {
796        let id = r.u16()?;
797        let size = usize::from(r.u16()?);
798        if id == ZIP64_EXTRA_ID {
799            let mut z = Reader::new(r.take(size)?);
800            if need_uncompressed {
801                *uncompressed_size = z.u64()?;
802            }
803            if need_compressed {
804                *compressed_size = z.u64()?;
805            }
806            if need_offset {
807                *lfh_offset = z.u64()?;
808            }
809            return Ok(());
810        }
811        r.skip(size)?;
812    }
813    Err(FormatError::Zip64Inconsistent.into())
814}
815
816/// Parse the WinZip AE-x extra field (header id 0x9901) from an entry's extra
817/// data: version (AE-1/AE-2), vendor "AE", AES strength, and the real method.
818fn parse_aes_extra(extra: &[u8]) -> Option<AesInfo> {
819    let mut r = Reader::new(extra);
820    while r.remaining() >= 4 {
821        let id = r.u16().ok()?;
822        let size = usize::from(r.u16().ok()?);
823        if id == 0x9901 {
824            let data = r.take(size).ok()?;
825            let mut d = Reader::new(data);
826            let version = d.u16().ok()?; // 1 = AE-1, 2 = AE-2
827            let _vendor = d.u16().ok()?; // "AE"
828            let strength = d.take(1).ok()?[0];
829            let actual_method = d.u16().ok()?;
830            return Some(AesInfo {
831                strength,
832                actual_method,
833                is_ae2: version == 2,
834            });
835        }
836        r.skip(size).ok()?;
837    }
838    None
839}
840
841/// Parse the common ZIP extra fields from an entry's extra block.
842fn parse_extra_fields(extra: &[u8]) -> ExtraFields {
843    let mut out = ExtraFields::default();
844    let mut r = Reader::new(extra);
845    while r.remaining() >= 4 {
846        let (Ok(id), Ok(size)) = (r.u16(), r.u16()) else {
847            break; // cov:unreachable: the >= 4 guard guarantees two u16 reads succeed
848        };
849        let Ok(data) = r.take(usize::from(size)) else {
850            break;
851        };
852        match id {
853            0x000a => parse_ntfs_times(data, &mut out),
854            0x5455 => parse_unix_times(data, &mut out),
855            0x7075 => out.unicode_path = parse_unicode_extra(data),
856            0x6375 => out.unicode_comment = parse_unicode_extra(data),
857            _ => {}
858        }
859    }
860    out
861}
862
863/// NTFS extra field (0x000a): reserved(4) then tagged attributes; tag 0x0001
864/// carries mtime/atime/ctime as 8-byte FILETIMEs.
865fn parse_ntfs_times(data: &[u8], out: &mut ExtraFields) {
866    let mut r = Reader::new(data);
867    let _ = r.u32(); // reserved
868    while r.remaining() >= 4 {
869        let (Ok(tag), Ok(tsize)) = (r.u16(), r.u16()) else {
870            break; // cov:unreachable: the >= 4 guard guarantees two u16 reads succeed
871        };
872        let Ok(tdata) = r.take(usize::from(tsize)) else {
873            break;
874        };
875        if tag == 0x0001 {
876            let mut s = Reader::new(tdata);
877            if let (Ok(m), Ok(a), Ok(c)) = (s.u64(), s.u64(), s.u64()) {
878                out.ntfs_mtime = Some(m);
879                out.ntfs_atime = Some(a);
880                out.ntfs_ctime = Some(c);
881            }
882        }
883    }
884}
885
886/// Info-ZIP extended timestamp (0x5455): a flags byte then present mtime/atime/
887/// ctime as signed 32-bit seconds, in that order.
888fn parse_unix_times(data: &[u8], out: &mut ExtraFields) {
889    let mut r = Reader::new(data);
890    let Ok(flag_byte) = r.take(1) else {
891        return;
892    };
893    let flags = flag_byte[0];
894    if flags & 0x01 != 0 {
895        out.unix_mtime = take_i32le(&mut r);
896    }
897    if flags & 0x02 != 0 {
898        out.unix_atime = take_i32le(&mut r);
899    }
900    if flags & 0x04 != 0 {
901        out.unix_ctime = take_i32le(&mut r);
902    }
903}
904
905fn take_i32le(r: &mut Reader) -> Option<i32> {
906    r.take(4)
907        .ok()
908        .map(|b| i32::from_le_bytes([b[0], b[1], b[2], b[3]]))
909}
910
911/// Info-ZIP Unicode path/comment (0x7075 / 0x6375): version(1) + name-CRC(4) +
912/// UTF-8 bytes. Returns the UTF-8 string (the CRC linking it to the legacy name
913/// is not re-checked here).
914fn parse_unicode_extra(data: &[u8]) -> Option<String> {
915    if data.len() < 5 {
916        return None;
917    }
918    String::from_utf8(data[5..].to_vec()).ok()
919}
920
921/// The ZipCrypto password-verification byte: the CRC-32 high byte, or the
922/// mod-time high byte when the entry uses a data descriptor (GP flag bit 3),
923/// matching what the encrypter used (PKWARE APPNOTE 6.1.6).
924fn zipcrypto_check_byte(flags: u16, crc32: u32, last_mod_time: u16) -> u8 {
925    if flags & 0x0008 != 0 {
926        (last_mod_time >> 8) as u8
927    } else {
928        (crc32 >> 24) as u8
929    }
930}
931
932/// Decode an entry filename. UTF-8 (flag bit 11) is taken verbatim; otherwise we
933/// map the CP437 high range so non-ASCII names are still legible.
934fn decode_name(bytes: &[u8], flags: u16) -> String {
935    // UTF-8 flag (bit 11) set, or pure ASCII: take the bytes as UTF-8 (lossy).
936    if flags & 0x0800 != 0 || bytes.is_ascii() {
937        return String::from_utf8_lossy(bytes).into_owned();
938    }
939    bytes.iter().map(|&b| crate::cp437::decode(b)).collect()
940}
941
942/// A decoding reader over one ZIP entry. Implements `Read`, yielding decompressed
943/// bytes and verifying CRC-32 at EOF (fail loud on mismatch).
944pub struct ZipFile<'a> {
945    meta: CentralEntry,
946    data_start: u64,
947    decoder: Decoder<Box<dyn Read + 'a>>,
948    hasher: crc32fast::Hasher,
949    bytes_out: u64,
950    verified: bool,
951    /// Whether to verify CRC-32 at EOF. False for `WinZip` AE-2, whose integrity
952    /// is the HMAC (checked by the AES reader) and whose CD CRC field is zero.
953    verify_crc: bool,
954}
955
956impl ZipFile<'_> {
957    /// Entry name (path within the archive).
958    pub fn name(&self) -> &str {
959        &self.meta.name
960    }
961
962    /// Compression method.
963    pub fn compression(&self) -> CompressionMethod {
964        self.meta.method
965    }
966
967    /// Uncompressed size in bytes (from the central directory).
968    pub fn size(&self) -> u64 {
969        self.meta.uncompressed_size
970    }
971
972    /// Compressed size in bytes (from the central directory).
973    pub fn compressed_size(&self) -> u64 {
974        self.meta.compressed_size
975    }
976
977    /// Stored CRC-32 (from the central directory).
978    pub fn crc32(&self) -> u32 {
979        self.meta.crc32
980    }
981
982    /// Absolute offset of the entry's first data byte in the archive. For a
983    /// `Stored` entry this is the start of the in-place, zero-copy window.
984    pub fn data_start(&self) -> u64 {
985        self.data_start
986    }
987
988    /// General-purpose flag bits (bit 0 encryption, bit 3 data descriptor, ...).
989    pub fn flags(&self) -> u16 {
990        self.meta.flags
991    }
992
993    /// Whether the entry names a directory.
994    pub fn is_dir(&self) -> bool {
995        self.meta.is_dir()
996    }
997
998    /// A safe relative path for extraction, or `None` if the entry name escapes
999    /// the destination (parent-dir traversal, absolute, or drive-letter path).
1000    /// The raw [`name`](Self::name) is always preserved as evidence; this is the
1001    /// secure-by-default view a caller should join onto an output directory.
1002    pub fn enclosed_name(&self) -> Option<PathBuf> {
1003        enclosed_name(&self.meta.name)
1004    }
1005}
1006
1007/// Fail loud if the entry's data is not wholly resolvable from the single
1008/// segment we hold — we don't reassemble split volumes, so reading would return
1009/// the wrong bytes. An archive is spanned when the EOCD marks the central
1010/// directory on a later disk (`this_disk`/`cd_start_disk` != 0) *or* the entry's
1011/// own `disk_start` is non-zero. Real Info-ZIP split archives set the former
1012/// while leaving data entries on disk 0, so the per-entry check alone misses them.
1013fn check_local_disk(
1014    meta: &CentralEntry,
1015    this_disk: u32,
1016    cd_start_disk: u32,
1017) -> Result<(), ZipCoreError> {
1018    let disk = if meta.disk_start != 0 {
1019        u32::from(meta.disk_start)
1020    } else if cd_start_disk != 0 {
1021        cd_start_disk
1022    } else if this_disk != 0 {
1023        this_disk
1024    } else {
1025        return Ok(());
1026    };
1027    Err(ZipCoreError::SpannedArchive {
1028        entry: meta.name.clone(),
1029        disk,
1030    })
1031}
1032
1033/// Compute a traversal-safe relative path from a ZIP entry name, treating both
1034/// `/` and `\` as separators (ZIP names may use either) so the check holds on
1035/// every platform regardless of `std::path` separator conventions.
1036fn enclosed_name(name: &str) -> Option<PathBuf> {
1037    if name.is_empty() || name.contains('\0') {
1038        return None;
1039    }
1040    if name.starts_with('/') || name.starts_with('\\') {
1041        return None; // absolute / UNC-style
1042    }
1043    let b = name.as_bytes();
1044    if b.len() >= 2 && b[1] == b':' && b[0].is_ascii_alphabetic() {
1045        return None; // drive-letter prefix (C:\...)
1046    }
1047    let mut out = PathBuf::new();
1048    for comp in name.split(['/', '\\']) {
1049        match comp {
1050            "" | "." => {}
1051            ".." => return None,
1052            other => out.push(other),
1053        }
1054    }
1055    if out.as_os_str().is_empty() {
1056        return None;
1057    }
1058    Some(out)
1059}
1060
1061impl Read for ZipFile<'_> {
1062    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1063        let n = self.decoder.read(buf)?;
1064        if n == 0 {
1065            if !self.verified {
1066                self.verified = true;
1067                let actual = self.hasher.clone().finalize();
1068                if self.verify_crc && actual != self.meta.crc32 {
1069                    return Err(io::Error::other(ZipCoreError::CrcMismatch {
1070                        entry: self.meta.name.clone(),
1071                        expected: self.meta.crc32,
1072                        actual,
1073                    }));
1074                }
1075            }
1076            return Ok(0);
1077        }
1078        self.hasher.update(&buf[..n]);
1079        self.bytes_out += n as u64;
1080        Ok(n)
1081    }
1082}
1083
1084#[cfg(test)]
1085mod tests {
1086    use super::zipcrypto_check_byte;
1087
1088    #[test]
1089    fn check_byte_selects_crc_or_modtime() {
1090        // No data descriptor (bit 3 clear) -> CRC-32 high byte.
1091        assert_eq!(zipcrypto_check_byte(0x0000, 0xAB12_3456, 0x7890), 0xAB);
1092        // Data descriptor (bit 3 set) -> mod-time high byte.
1093        assert_eq!(zipcrypto_check_byte(0x0008, 0xAB12_3456, 0xCD90), 0xCD);
1094    }
1095}