Skip to main content

zip_core/
archive.rs

1//! Pure-Rust ZIP container parser: EOCD + central directory + local file headers,
2//! with a decoding entry reader that verifies CRC-32 on EOF.
3//!
4//! Mirrors the zip-rs surface (`ZipArchive::new` / `by_index` / `by_name` /
5//! `ZipFile` with `name()`/`compression()`/`size()`/`data_start()`) so fleet
6//! consumers migrate with a near-mechanical `zip::` -> `zip_core::` rename.
7
8use std::io::{self, Read, Seek, SeekFrom};
9use std::path::PathBuf;
10
11use crate::bytes::Reader;
12use crate::codec::Decoder;
13use crate::crypto::{AesInfo, AesReader, ZipCryptoReader};
14use crate::{FormatError, ZipCoreError};
15
16const EOCD_SIG: u32 = 0x0605_4b50;
17const CD_HEADER_SIG: u32 = 0x0201_4b50;
18const LFH_SIG: u32 = 0x0403_4b50;
19const ZIP64_EOCD_SIG: u32 = 0x0606_4b50;
20const ZIP64_LOCATOR_SIG: u32 = 0x0706_4b50;
21/// Header id of the Zip64 extended-information extra field.
22const ZIP64_EXTRA_ID: u16 = 0x0001;
23/// 32-bit sentinel: the real value lives in a Zip64 record/extra field.
24const U32_SENTINEL: u32 = 0xFFFF_FFFF;
25/// 16-bit sentinel for counts.
26const U16_SENTINEL: u16 = 0xFFFF;
27
28/// Minimum EOCD record length (no comment).
29const EOCD_MIN: usize = 22;
30/// Largest region we scan back from EOF for the EOCD (record + max comment).
31const EOCD_SCAN_MAX: usize = EOCD_MIN + u16::MAX as usize;
32/// Zip64 EOCD locator record length.
33const ZIP64_LOCATOR_LEN: usize = 20;
34/// Fixed portion of a local file header.
35const LFH_FIXED: usize = 30;
36/// Ceiling on entries we will parse, guarding against a lying EOCD count.
37const MAX_ENTRIES: usize = 16_000_000;
38
39/// ZIP compression method, mirroring zip-rs `CompressionMethod` for the common
40/// methods plus an `Unknown(raw)` that preserves the offending value.
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum CompressionMethod {
43    /// Method 0 — no compression (raw passthrough / in-place window).
44    Stored,
45    /// Method 8 — classic DEFLATE.
46    Deflated,
47    /// Method 9 — Deflate64 / "enhanced deflate".
48    Deflate64,
49    /// Method 12 — bzip2.
50    Bzip2,
51    /// Method 14 — LZMA (with the 4-byte ZIP wrapper prefix).
52    Lzma,
53    /// Method 93 — Zstandard.
54    Zstd,
55    /// Method 95 — XZ.
56    Xz,
57    /// Any other method id — value preserved so callers can report it.
58    Unknown(u16),
59}
60
61impl CompressionMethod {
62    pub(crate) fn from_u16(raw: u16) -> Self {
63        match raw {
64            0 => Self::Stored,
65            8 => Self::Deflated,
66            9 => Self::Deflate64,
67            12 => Self::Bzip2,
68            14 => Self::Lzma,
69            93 => Self::Zstd,
70            95 => Self::Xz,
71            other => Self::Unknown(other),
72        }
73    }
74}
75
76/// Parsed central-directory metadata for one entry.
77#[derive(Debug, Clone)]
78pub(crate) struct CentralEntry {
79    pub(crate) name: String,
80    pub(crate) method: CompressionMethod,
81    pub(crate) flags: u16,
82    pub(crate) crc32: u32,
83    pub(crate) compressed_size: u64,
84    pub(crate) uncompressed_size: u64,
85    pub(crate) lfh_offset: u64,
86    /// DOS mod-time (the ZipCrypto password-check byte when a data descriptor is
87    /// used).
88    pub(crate) last_mod_time: u16,
89    /// WinZip AES parameters when this entry is method-99 encrypted.
90    pub(crate) aes: Option<AesInfo>,
91}
92
93impl CentralEntry {
94    fn is_dir(&self) -> bool {
95        self.name.ends_with('/') || self.name.ends_with('\\')
96    }
97}
98
99/// Container-level offsets/counts, for the forensic analyzer's structural audits
100/// (trailing data, spanning, etc.). Returned by [`ZipArchive::summary`].
101#[derive(Debug, Clone)]
102pub struct ArchiveSummary {
103    /// Total file length.
104    pub file_len: u64,
105    /// Absolute offset of the central directory.
106    pub central_dir_offset: u64,
107    /// Declared central-directory size in bytes.
108    pub central_dir_size: u64,
109    /// Absolute offset just past the end of the 32-bit EOCD record (incl. its
110    /// comment) — bytes beyond this are trailing data.
111    pub eocd_end_offset: u64,
112    /// EOCD archive-comment length.
113    pub comment_len: u16,
114    /// Disk number recorded in the EOCD (0 for a single-file archive).
115    pub disk_number: u32,
116    /// Disk on which the central directory starts (0 for a single-file archive).
117    pub cd_start_disk: u32,
118}
119
120/// A parsed ZIP archive over a seekable reader.
121pub struct ZipArchive<R> {
122    reader: R,
123    entries: Vec<CentralEntry>,
124    summary: ArchiveSummary,
125}
126
127impl<R: Read + Seek> ZipArchive<R> {
128    /// Parse the EOCD and central directory of `reader`.
129    pub fn new(mut reader: R) -> Result<Self, ZipCoreError> {
130        let file_len = reader.seek(SeekFrom::End(0))?;
131        let (entries, summary) = parse_central_directory(&mut reader, file_len)?;
132        Ok(Self {
133            reader,
134            entries,
135            summary,
136        })
137    }
138
139    /// Container-level offsets/counts for structural audits.
140    pub fn summary(&self) -> &ArchiveSummary {
141        &self.summary
142    }
143
144    /// Number of entries in the central directory.
145    pub fn len(&self) -> usize {
146        self.entries.len()
147    }
148
149    /// Whether the archive has no entries.
150    pub fn is_empty(&self) -> bool {
151        self.entries.is_empty()
152    }
153
154    /// Iterate entry names in central-directory order.
155    pub fn file_names(&self) -> impl Iterator<Item = &str> {
156        self.entries.iter().map(|e| e.name.as_str())
157    }
158
159    /// Open the entry at index `i` for decoding (mirrors zip-rs `by_index`).
160    pub fn by_index(&mut self, i: usize) -> Result<ZipFile<'_>, ZipCoreError> {
161        let meta = self
162            .entries
163            .get(i)
164            .ok_or(ZipCoreError::IndexOutOfBounds(i))?
165            .clone();
166        self.open(meta)
167    }
168
169    /// Open the named entry for decoding (mirrors zip-rs `by_name`).
170    pub fn by_name(&mut self, name: &str) -> Result<ZipFile<'_>, ZipCoreError> {
171        let meta = self
172            .entries
173            .iter()
174            .find(|e| e.name == name)
175            .ok_or_else(|| ZipCoreError::EntryNotFound(name.to_string()))?
176            .clone();
177        self.open(meta)
178    }
179
180    /// Open the entry at index `i`, decrypting it with `password` (ZipCrypto or
181    /// WinZip AES). Errors with `WrongPassword` if the password fails the check.
182    pub fn by_index_decrypt(
183        &mut self,
184        i: usize,
185        password: &[u8],
186    ) -> Result<ZipFile<'_>, ZipCoreError> {
187        let meta = self
188            .entries
189            .get(i)
190            .ok_or(ZipCoreError::IndexOutOfBounds(i))?
191            .clone();
192        self.open_decrypt(meta, password)
193    }
194
195    /// Open the named entry, decrypting it with `password`.
196    pub fn by_name_decrypt(
197        &mut self,
198        name: &str,
199        password: &[u8],
200    ) -> Result<ZipFile<'_>, ZipCoreError> {
201        let meta = self
202            .entries
203            .iter()
204            .find(|e| e.name == name)
205            .ok_or_else(|| ZipCoreError::EntryNotFound(name.to_string()))?
206            .clone();
207        self.open_decrypt(meta, password)
208    }
209
210    /// Raw structural view for the forensic analyzer: per entry, the header
211    /// fields as recorded in BOTH the central directory and the local file
212    /// header, plus offsets. This is the seam that lets `zip-forensic` compare
213    /// the two copies (tamper signal) without re-implementing a second parser.
214    pub fn structural_view(&mut self) -> Result<Vec<EntryLayout>, ZipCoreError> {
215        let metas = self.entries.clone();
216        let mut out = Vec::with_capacity(metas.len());
217        for (index, m) in metas.iter().enumerate() {
218            let (local, data_start) = read_lfh_fields(&mut self.reader, m.lfh_offset)?;
219            out.push(EntryLayout {
220                index,
221                lfh_offset: m.lfh_offset,
222                data_start,
223                central: HeaderFields {
224                    name: m.name.clone(),
225                    method: m.method,
226                    flags: m.flags,
227                    crc32: m.crc32,
228                    compressed_size: m.compressed_size,
229                    uncompressed_size: m.uncompressed_size,
230                },
231                local,
232            });
233        }
234        Ok(out)
235    }
236
237    fn open(&mut self, meta: CentralEntry) -> Result<ZipFile<'_>, ZipCoreError> {
238        if meta.flags & 0x0001 != 0 {
239            return Err(ZipCoreError::EncryptedNoPassword(meta.name.clone()));
240        }
241        let (_local, data_start) = read_lfh_fields(&mut self.reader, meta.lfh_offset)?;
242        self.reader.seek(SeekFrom::Start(data_start))?;
243        let limited: Box<dyn Read + '_> = Box::new((&mut self.reader).take(meta.compressed_size));
244        let decoder = Decoder::new(meta.method, meta.uncompressed_size, limited)?;
245        Ok(ZipFile {
246            data_start,
247            decoder,
248            hasher: crc32fast::Hasher::new(),
249            bytes_out: 0,
250            verified: false,
251            verify_crc: true,
252            meta,
253        })
254    }
255
256    fn open_decrypt(
257        &mut self,
258        meta: CentralEntry,
259        password: &[u8],
260    ) -> Result<ZipFile<'_>, ZipCoreError> {
261        // Not encrypted -> the password is irrelevant; read normally.
262        if meta.flags & 0x0001 == 0 && meta.aes.is_none() {
263            return self.open(meta);
264        }
265        let (_local, data_start) = read_lfh_fields(&mut self.reader, meta.lfh_offset)?;
266        self.reader.seek(SeekFrom::Start(data_start))?;
267        let take = (&mut self.reader).take(meta.compressed_size);
268        let (reader, method, verify_crc): (Box<dyn Read + '_>, CompressionMethod, bool) =
269            if let Some(aes) = meta.aes {
270                let r = AesReader::new(take, password, aes, meta.compressed_size, &meta.name)?;
271                // AE-2 zeroes the CRC field; its integrity is the HMAC (checked by
272                // AesReader). AE-1 keeps the CRC, so verify it.
273                (
274                    Box::new(r),
275                    CompressionMethod::from_u16(aes.actual_method),
276                    !aes.is_ae2,
277                )
278            } else {
279                // Traditional ZipCrypto: the check byte is the CRC high byte, or the
280                // mod-time high byte when a data descriptor is used (bit 3).
281                let check = zipcrypto_check_byte(meta.flags, meta.crc32, meta.last_mod_time);
282                let r = ZipCryptoReader::new(take, password, check, &meta.name)?;
283                (Box::new(r), meta.method, true)
284            };
285        let decoder = Decoder::new(method, meta.uncompressed_size, reader)?;
286        Ok(ZipFile {
287            data_start,
288            decoder,
289            hasher: crc32fast::Hasher::new(),
290            bytes_out: 0,
291            verified: false,
292            verify_crc,
293            meta,
294        })
295    }
296}
297
298/// Header fields as recorded in one header copy (central directory OR local file
299/// header). Exposed via [`ZipArchive::structural_view`] for the forensic seam.
300#[derive(Debug, Clone, PartialEq, Eq)]
301pub struct HeaderFields {
302    /// Entry name (decoded).
303    pub name: String,
304    /// Compression method.
305    pub method: CompressionMethod,
306    /// General-purpose flag bits.
307    pub flags: u16,
308    /// CRC-32 as recorded in this header copy.
309    pub crc32: u32,
310    /// Compressed size as recorded in this header copy.
311    pub compressed_size: u64,
312    /// Uncompressed size as recorded in this header copy.
313    pub uncompressed_size: u64,
314}
315
316/// One entry's raw structural layout: the central-directory and local-file-header
317/// copies of its fields plus offsets, for cross-checking (tamper detection).
318#[derive(Debug, Clone)]
319pub struct EntryLayout {
320    /// Index in central-directory order.
321    pub index: usize,
322    /// Absolute offset of the local file header.
323    pub lfh_offset: u64,
324    /// Absolute offset of the entry's first data byte.
325    pub data_start: u64,
326    /// Fields as recorded in the central directory.
327    pub central: HeaderFields,
328    /// Fields as recorded in the local file header.
329    pub local: HeaderFields,
330}
331
332/// Read and parse the local file header at `lfh_offset`, returning its fields and
333/// the absolute offset of the entry's first data byte
334/// (`lfh_offset + 30 + name_len + extra_len`).
335fn read_lfh_fields<R: Read + Seek>(
336    reader: &mut R,
337    lfh_offset: u64,
338) -> Result<(HeaderFields, u64), ZipCoreError> {
339    reader.seek(SeekFrom::Start(lfh_offset))?;
340    let mut fixed = [0u8; LFH_FIXED];
341    reader.read_exact(&mut fixed)?;
342    let mut r = Reader::new(&fixed);
343    if r.u32()? != LFH_SIG {
344        return Err(FormatError::BadSignature {
345            what: "local file header",
346            offset: lfh_offset,
347        }
348        .into());
349    }
350    let _version_needed = r.u16()?;
351    let flags = r.u16()?;
352    let method = CompressionMethod::from_u16(r.u16()?);
353    let _mod_time = r.u16()?;
354    let _mod_date = r.u16()?;
355    let crc32 = r.u32()?;
356    let compressed_size = u64::from(r.u32()?);
357    let uncompressed_size = u64::from(r.u32()?);
358    let name_len = usize::from(r.u16()?);
359    let extra_len = usize::from(r.u16()?);
360
361    let mut name_buf = vec![0u8; name_len];
362    reader.read_exact(&mut name_buf)?;
363    let name = decode_name(&name_buf, flags);
364    let data_start = lfh_offset + LFH_FIXED as u64 + name_len as u64 + extra_len as u64;
365
366    Ok((
367        HeaderFields {
368            name,
369            method,
370            flags,
371            crc32,
372            compressed_size,
373            uncompressed_size,
374        },
375        data_start,
376    ))
377}
378
379/// Locate + parse the EOCD, then read and parse the central directory.
380/// The 32-bit EOCD fields. Any size/offset/count may be a sentinel for Zip64.
381struct Eocd32 {
382    disk_number: u16,
383    cd_start_disk: u16,
384    total_entries: u16,
385    cd_size: u32,
386    cd_offset: u32,
387    comment_len: u16,
388}
389
390fn parse_central_directory<R: Read + Seek>(
391    reader: &mut R,
392    file_len: u64,
393) -> Result<(Vec<CentralEntry>, ArchiveSummary), ZipCoreError> {
394    let scan_len = file_len.min(EOCD_SCAN_MAX as u64);
395    if scan_len < EOCD_MIN as u64 {
396        return Err(FormatError::NoEocd.into());
397    }
398    let scan_start = file_len - scan_len;
399    reader.seek(SeekFrom::Start(scan_start))?;
400    let mut tail = vec![0u8; scan_len as usize];
401    reader.read_exact(&mut tail)?;
402
403    let eocd_rel = find_eocd(&tail).ok_or(FormatError::NoEocd)?;
404    let eocd = parse_eocd(&tail[eocd_rel..])?;
405    // Absolute end of the 32-bit EOCD record incl. its comment; the EOCD is always
406    // the last structure, so anything past this is trailing data.
407    let eocd_end_offset =
408        scan_start + eocd_rel as u64 + EOCD_MIN as u64 + u64::from(eocd.comment_len);
409
410    // Promote to Zip64 when any base field is a sentinel: the real 64-bit
411    // offset/size/count/disk live in the Zip64 EOCD record reached via its locator.
412    let (cd_offset, cd_size, total_entries, disk_number, cd_start_disk) = if eocd.cd_offset
413        == U32_SENTINEL
414        || eocd.cd_size == U32_SENTINEL
415        || eocd.total_entries == U16_SENTINEL
416    {
417        resolve_zip64_eocd(reader, &tail, eocd_rel)?
418    } else {
419        (
420            u64::from(eocd.cd_offset),
421            u64::from(eocd.cd_size),
422            usize::from(eocd.total_entries),
423            u32::from(eocd.disk_number),
424            u32::from(eocd.cd_start_disk),
425        )
426    };
427
428    match cd_offset.checked_add(cd_size) {
429        Some(end) if end <= file_len => {}
430        _ => return Err(FormatError::CentralDirOutOfRange { cd_offset, cd_size }.into()),
431    }
432    if total_entries > MAX_ENTRIES {
433        return Err(FormatError::TooManyEntries(total_entries).into());
434    }
435
436    reader.seek(SeekFrom::Start(cd_offset))?;
437    let mut cd = vec![0u8; cd_size as usize];
438    reader.read_exact(&mut cd)?;
439
440    let entries = parse_cd_entries(&cd, total_entries)?;
441    let summary = ArchiveSummary {
442        file_len,
443        central_dir_offset: cd_offset,
444        central_dir_size: cd_size,
445        eocd_end_offset,
446        comment_len: eocd.comment_len,
447        disk_number,
448        cd_start_disk,
449    };
450    Ok((entries, summary))
451}
452
453/// Scan backward for the EOCD signature, returning its offset within `tail`.
454fn find_eocd(tail: &[u8]) -> Option<usize> {
455    if tail.len() < EOCD_MIN {
456        return None; // cov:unreachable: parse_central_directory guards scan_len >= EOCD_MIN
457    }
458    let sig = EOCD_SIG.to_le_bytes();
459    // The EOCD starts at most EOCD_MIN bytes before EOF; scan from the latest.
460    (0..=tail.len() - EOCD_MIN)
461        .rev()
462        .find(|&i| tail[i..i + 4] == sig)
463}
464
465/// Parse the fixed EOCD fields. Any size/offset/count may be a Zip64 sentinel.
466fn parse_eocd(buf: &[u8]) -> Result<Eocd32, ZipCoreError> {
467    let mut r = Reader::new(buf);
468    if r.u32()? != EOCD_SIG {
469        return Err(FormatError::NoEocd.into()); // cov:unreachable: find_eocd matched this signature
470    }
471    let disk_number = r.u16()?;
472    let cd_start_disk = r.u16()?;
473    let _entries_this_disk = r.u16()?;
474    let total_entries = r.u16()?;
475    let cd_size = r.u32()?;
476    let cd_offset = r.u32()?;
477    let comment_len = r.u16()?;
478    Ok(Eocd32 {
479        disk_number,
480        cd_start_disk,
481        total_entries,
482        cd_size,
483        cd_offset,
484        comment_len,
485    })
486}
487
488/// Resolve the real central-directory location from the Zip64 EOCD record. The
489/// Zip64 EOCD locator sits immediately before the 32-bit EOCD; it points at the
490/// Zip64 EOCD record holding the true 64-bit offset/size/count.
491fn resolve_zip64_eocd<R: Read + Seek>(
492    reader: &mut R,
493    tail: &[u8],
494    eocd_rel: usize,
495) -> Result<(u64, u64, usize, u32, u32), ZipCoreError> {
496    if eocd_rel < ZIP64_LOCATOR_LEN {
497        return Err(FormatError::Zip64Unsupported.into());
498    }
499    let mut loc = Reader::new(&tail[eocd_rel - ZIP64_LOCATOR_LEN..eocd_rel]);
500    if loc.u32()? != ZIP64_LOCATOR_SIG {
501        return Err(FormatError::Zip64Unsupported.into());
502    }
503    let _disk = loc.u32()?;
504    let z64_eocd_offset = loc.u64()?;
505
506    reader.seek(SeekFrom::Start(z64_eocd_offset))?;
507    let mut rec = [0u8; 56];
508    reader.read_exact(&mut rec)?;
509    let mut r = Reader::new(&rec);
510    if r.u32()? != ZIP64_EOCD_SIG {
511        return Err(FormatError::BadSignature {
512            what: "Zip64 EOCD record",
513            offset: z64_eocd_offset,
514        }
515        .into());
516    }
517    let _record_size = r.u64()?;
518    let _version_made_by = r.u16()?;
519    let _version_needed = r.u16()?;
520    let disk_number = r.u32()?;
521    let cd_start_disk = r.u32()?;
522    let _entries_this_disk = r.u64()?;
523    let total_entries = r.u64()?;
524    let cd_size = r.u64()?;
525    let cd_offset = r.u64()?;
526    let total =
527        usize::try_from(total_entries).map_err(|_| FormatError::TooManyEntries(usize::MAX))?;
528    Ok((cd_offset, cd_size, total, disk_number, cd_start_disk))
529}
530
531/// Parse `total_entries` central-directory file headers from `cd`.
532fn parse_cd_entries(cd: &[u8], total_entries: usize) -> Result<Vec<CentralEntry>, ZipCoreError> {
533    let mut r = Reader::new(cd);
534    let mut entries = Vec::new();
535    for _ in 0..total_entries {
536        if r.remaining() < 46 {
537            return Err(FormatError::Truncated.into());
538        }
539        if r.u32()? != CD_HEADER_SIG {
540            return Err(FormatError::BadSignature {
541                what: "central directory header",
542                offset: (cd.len() - r.remaining()) as u64,
543            }
544            .into());
545        }
546        let _version_made_by = r.u16()?;
547        let _version_needed = r.u16()?;
548        let flags = r.u16()?;
549        let method_raw = r.u16()?;
550        let method = CompressionMethod::from_u16(method_raw);
551        let last_mod_time = r.u16()?;
552        let _mod_date = r.u16()?;
553        let crc32 = r.u32()?;
554        let compressed_size32 = r.u32()?;
555        let uncompressed_size32 = r.u32()?;
556        let name_len = usize::from(r.u16()?);
557        let extra_len = usize::from(r.u16()?);
558        let comment_len = usize::from(r.u16()?);
559        let _disk_start = r.u16()?;
560        let _internal_attrs = r.u16()?;
561        let _external_attrs = r.u32()?;
562        let lfh_offset32 = r.u32()?;
563
564        let name_bytes = r.take(name_len)?;
565        let extra = r.take(extra_len)?;
566        let _comment = r.take(comment_len)?;
567
568        // Resolve any 0xFFFFFFFF sentinels from the Zip64 extended-information
569        // extra field (header id 0x0001). Fields appear in a FIXED order and only
570        // when their base field is a sentinel.
571        let mut uncompressed_size = u64::from(uncompressed_size32);
572        let mut compressed_size = u64::from(compressed_size32);
573        let mut lfh_offset = u64::from(lfh_offset32);
574        if uncompressed_size32 == U32_SENTINEL
575            || compressed_size32 == U32_SENTINEL
576            || lfh_offset32 == U32_SENTINEL
577        {
578            apply_zip64_extra(
579                extra,
580                uncompressed_size32 == U32_SENTINEL,
581                compressed_size32 == U32_SENTINEL,
582                lfh_offset32 == U32_SENTINEL,
583                &mut uncompressed_size,
584                &mut compressed_size,
585                &mut lfh_offset,
586            )?;
587        }
588
589        // Filename: UTF-8 when GP flag bit 11 is set, else CP437. We accept either
590        // as best-effort UTF-8 here; a full CP437 table is a follow-up (it only
591        // affects display of non-ASCII names, not entry location).
592        let name = decode_name(name_bytes, flags);
593        // Method 99 = WinZip AES; the AE-x extra field (0x9901) carries the real
594        // method + key strength.
595        let aes = if method_raw == 99 {
596            parse_aes_extra(extra)
597        } else {
598            None
599        };
600
601        entries.push(CentralEntry {
602            name,
603            method,
604            flags,
605            crc32,
606            compressed_size,
607            uncompressed_size,
608            lfh_offset,
609            last_mod_time,
610            aes,
611        });
612    }
613    Ok(entries)
614}
615
616/// Override sentinel CD fields from the Zip64 extended-information extra field
617/// (header id 0x0001). The 64-bit fields appear in a fixed order — original size,
618/// compressed size, relative header offset — and ONLY when their base field is a
619/// sentinel. A sentinel with no matching extra field is a malformed Zip64 archive.
620fn apply_zip64_extra(
621    extra: &[u8],
622    need_uncompressed: bool,
623    need_compressed: bool,
624    need_offset: bool,
625    uncompressed_size: &mut u64,
626    compressed_size: &mut u64,
627    lfh_offset: &mut u64,
628) -> Result<(), ZipCoreError> {
629    let mut r = Reader::new(extra);
630    while r.remaining() >= 4 {
631        let id = r.u16()?;
632        let size = usize::from(r.u16()?);
633        if id == ZIP64_EXTRA_ID {
634            let mut z = Reader::new(r.take(size)?);
635            if need_uncompressed {
636                *uncompressed_size = z.u64()?;
637            }
638            if need_compressed {
639                *compressed_size = z.u64()?;
640            }
641            if need_offset {
642                *lfh_offset = z.u64()?;
643            }
644            return Ok(());
645        }
646        r.skip(size)?;
647    }
648    Err(FormatError::Zip64Inconsistent.into())
649}
650
651/// Parse the WinZip AE-x extra field (header id 0x9901) from an entry's extra
652/// data: version (AE-1/AE-2), vendor "AE", AES strength, and the real method.
653fn parse_aes_extra(extra: &[u8]) -> Option<AesInfo> {
654    let mut r = Reader::new(extra);
655    while r.remaining() >= 4 {
656        let id = r.u16().ok()?;
657        let size = usize::from(r.u16().ok()?);
658        if id == 0x9901 {
659            let data = r.take(size).ok()?;
660            let mut d = Reader::new(data);
661            let version = d.u16().ok()?; // 1 = AE-1, 2 = AE-2
662            let _vendor = d.u16().ok()?; // "AE"
663            let strength = d.take(1).ok()?[0];
664            let actual_method = d.u16().ok()?;
665            return Some(AesInfo {
666                strength,
667                actual_method,
668                is_ae2: version == 2,
669            });
670        }
671        r.skip(size).ok()?;
672    }
673    None
674}
675
676/// The ZipCrypto password-verification byte: the CRC-32 high byte, or the
677/// mod-time high byte when the entry uses a data descriptor (GP flag bit 3),
678/// matching what the encrypter used (PKWARE APPNOTE 6.1.6).
679fn zipcrypto_check_byte(flags: u16, crc32: u32, last_mod_time: u16) -> u8 {
680    if flags & 0x0008 != 0 {
681        (last_mod_time >> 8) as u8
682    } else {
683        (crc32 >> 24) as u8
684    }
685}
686
687/// Decode an entry filename. UTF-8 (flag bit 11) is taken verbatim; otherwise we
688/// map the CP437 high range so non-ASCII names are still legible.
689fn decode_name(bytes: &[u8], flags: u16) -> String {
690    // UTF-8 flag (bit 11) set, or pure ASCII: take the bytes as UTF-8 (lossy).
691    if flags & 0x0800 != 0 || bytes.is_ascii() {
692        return String::from_utf8_lossy(bytes).into_owned();
693    }
694    bytes.iter().map(|&b| crate::cp437::decode(b)).collect()
695}
696
697/// A decoding reader over one ZIP entry. Implements `Read`, yielding decompressed
698/// bytes and verifying CRC-32 at EOF (fail loud on mismatch).
699pub struct ZipFile<'a> {
700    meta: CentralEntry,
701    data_start: u64,
702    decoder: Decoder<Box<dyn Read + 'a>>,
703    hasher: crc32fast::Hasher,
704    bytes_out: u64,
705    verified: bool,
706    /// Whether to verify CRC-32 at EOF. False for `WinZip` AE-2, whose integrity
707    /// is the HMAC (checked by the AES reader) and whose CD CRC field is zero.
708    verify_crc: bool,
709}
710
711impl ZipFile<'_> {
712    /// Entry name (path within the archive).
713    pub fn name(&self) -> &str {
714        &self.meta.name
715    }
716
717    /// Compression method.
718    pub fn compression(&self) -> CompressionMethod {
719        self.meta.method
720    }
721
722    /// Uncompressed size in bytes (from the central directory).
723    pub fn size(&self) -> u64 {
724        self.meta.uncompressed_size
725    }
726
727    /// Compressed size in bytes (from the central directory).
728    pub fn compressed_size(&self) -> u64 {
729        self.meta.compressed_size
730    }
731
732    /// Stored CRC-32 (from the central directory).
733    pub fn crc32(&self) -> u32 {
734        self.meta.crc32
735    }
736
737    /// Absolute offset of the entry's first data byte in the archive. For a
738    /// `Stored` entry this is the start of the in-place, zero-copy window.
739    pub fn data_start(&self) -> u64 {
740        self.data_start
741    }
742
743    /// General-purpose flag bits (bit 0 encryption, bit 3 data descriptor, ...).
744    pub fn flags(&self) -> u16 {
745        self.meta.flags
746    }
747
748    /// Whether the entry names a directory.
749    pub fn is_dir(&self) -> bool {
750        self.meta.is_dir()
751    }
752
753    /// A safe relative path for extraction, or `None` if the entry name escapes
754    /// the destination (parent-dir traversal, absolute, or drive-letter path).
755    /// The raw [`name`](Self::name) is always preserved as evidence; this is the
756    /// secure-by-default view a caller should join onto an output directory.
757    pub fn enclosed_name(&self) -> Option<PathBuf> {
758        enclosed_name(&self.meta.name)
759    }
760}
761
762/// Compute a traversal-safe relative path from a ZIP entry name, treating both
763/// `/` and `\` as separators (ZIP names may use either) so the check holds on
764/// every platform regardless of `std::path` separator conventions.
765fn enclosed_name(name: &str) -> Option<PathBuf> {
766    if name.is_empty() || name.contains('\0') {
767        return None;
768    }
769    if name.starts_with('/') || name.starts_with('\\') {
770        return None; // absolute / UNC-style
771    }
772    let b = name.as_bytes();
773    if b.len() >= 2 && b[1] == b':' && b[0].is_ascii_alphabetic() {
774        return None; // drive-letter prefix (C:\...)
775    }
776    let mut out = PathBuf::new();
777    for comp in name.split(['/', '\\']) {
778        match comp {
779            "" | "." => {}
780            ".." => return None,
781            other => out.push(other),
782        }
783    }
784    if out.as_os_str().is_empty() {
785        return None;
786    }
787    Some(out)
788}
789
790impl Read for ZipFile<'_> {
791    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
792        let n = self.decoder.read(buf)?;
793        if n == 0 {
794            if !self.verified {
795                self.verified = true;
796                let actual = self.hasher.clone().finalize();
797                if self.verify_crc && actual != self.meta.crc32 {
798                    return Err(io::Error::other(ZipCoreError::CrcMismatch {
799                        entry: self.meta.name.clone(),
800                        expected: self.meta.crc32,
801                        actual,
802                    }));
803                }
804            }
805            return Ok(0);
806        }
807        self.hasher.update(&buf[..n]);
808        self.bytes_out += n as u64;
809        Ok(n)
810    }
811}
812
813#[cfg(test)]
814mod tests {
815    use super::zipcrypto_check_byte;
816
817    #[test]
818    fn check_byte_selects_crc_or_modtime() {
819        // No data descriptor (bit 3 clear) -> CRC-32 high byte.
820        assert_eq!(zipcrypto_check_byte(0x0000, 0xAB12_3456, 0x7890), 0xAB);
821        // Data descriptor (bit 3 set) -> mod-time high byte.
822        assert_eq!(zipcrypto_check_byte(0x0008, 0xAB12_3456, 0xCD90), 0xCD);
823    }
824}