Skip to main content

epub_stream/
zip.rs

1//! Streaming ZIP reader for EPUB files
2//!
3//! Memory-efficient ZIP reader that streams files without loading entire archive.
4//! Uses fixed-size central directory cache (max 256 entries, ~4KB).
5//! Supports DEFLATE decompression using miniz_oxide.
6
7extern crate alloc;
8
9use alloc::boxed::Box;
10use alloc::string::{String, ToString};
11use heapless::Vec as HeaplessVec;
12use log;
13use miniz_oxide::{DataFormat, MZFlush, MZStatus};
14use std::io::{Read, Seek, SeekFrom, Write};
15
16#[cfg(target_os = "espidf")]
17const DEFAULT_ZIP_SCRATCH_BYTES: usize = 2 * 1024;
18#[cfg(not(target_os = "espidf"))]
19const DEFAULT_ZIP_SCRATCH_BYTES: usize = 8 * 1024;
20
21/// Maximum number of central directory entries to cache
22const MAX_CD_ENTRIES: usize = 256;
23
24/// Maximum filename length in ZIP entries
25const MAX_FILENAME_LEN: usize = 256;
26
27/// Runtime-configurable ZIP safety limits.
28#[derive(Clone, Copy, Debug, PartialEq, Eq)]
29pub struct ZipLimits {
30    /// Maximum compressed or uncompressed file size allowed for reads.
31    pub max_file_read_size: usize,
32    /// Maximum allowed size for the required `mimetype` entry.
33    pub max_mimetype_size: usize,
34    /// Whether ZIP parsing should fail on strict structural issues.
35    pub strict: bool,
36    /// Maximum bytes scanned from file tail while searching for EOCD.
37    pub max_eocd_scan: usize,
38}
39
40impl ZipLimits {
41    /// Create explicit ZIP limits.
42    pub fn new(max_file_read_size: usize, max_mimetype_size: usize) -> Self {
43        Self {
44            max_file_read_size,
45            max_mimetype_size,
46            strict: false,
47            max_eocd_scan: MAX_EOCD_SCAN,
48        }
49    }
50
51    /// Enable or disable strict ZIP parsing behavior.
52    pub fn with_strict(mut self, strict: bool) -> Self {
53        self.strict = strict;
54        self
55    }
56
57    /// Set a cap for EOCD tail scan bytes.
58    pub fn with_max_eocd_scan(mut self, max_eocd_scan: usize) -> Self {
59        self.max_eocd_scan = max_eocd_scan.max(EOCD_MIN_SIZE);
60        self
61    }
62}
63
64/// Local file header signature (little-endian)
65const SIG_LOCAL_FILE_HEADER: u32 = 0x04034b50;
66
67/// Central directory entry signature (little-endian)
68const SIG_CD_ENTRY: u32 = 0x02014b50;
69
70/// End of central directory signature (little-endian)
71const SIG_EOCD: u32 = 0x06054b50;
72/// ZIP64 end of central directory record signature (little-endian)
73const SIG_ZIP64_EOCD: u32 = 0x06064b50;
74/// ZIP64 end of central directory locator signature (little-endian)
75const SIG_ZIP64_EOCD_LOCATOR: u32 = 0x07064b50;
76/// Minimum EOCD record size in bytes
77const EOCD_MIN_SIZE: usize = 22;
78/// Maximum EOCD search window (EOCD + max comment length)
79const MAX_EOCD_SCAN: usize = EOCD_MIN_SIZE + u16::MAX as usize;
80/// Fixed chunk size for EOCD tail scanning.
81const EOCD_SCAN_CHUNK_BYTES: usize = 2048;
82
83/// Compression methods
84const METHOD_STORED: u16 = 0;
85const METHOD_DEFLATED: u16 = 8;
86
87// Re-export the crate's public ZIP error alias for module consumers.
88pub use crate::error::ZipError;
89
90#[derive(Clone, Copy, Debug)]
91struct EocdInfo {
92    cd_offset: u64,
93    cd_size: u64,
94    num_entries: u64,
95}
96
97#[derive(Clone, Copy, Debug)]
98struct Zip64EocdInfo {
99    disk_number: u32,
100    disk_with_cd_start: u32,
101    num_entries: u64,
102    cd_size: u64,
103    cd_offset: u64,
104}
105
106/// Central directory entry metadata
107#[derive(Debug, Clone)]
108pub struct CdEntry {
109    /// Compression method (0=stored, 8=deflated)
110    pub method: u16,
111    /// Compressed size in bytes
112    pub compressed_size: u64,
113    /// Uncompressed size in bytes
114    pub uncompressed_size: u64,
115    /// Offset to local file header
116    pub local_header_offset: u64,
117    /// CRC32 checksum
118    pub crc32: u32,
119    /// Filename (max 255 chars)
120    pub filename: String,
121}
122
123impl CdEntry {
124    /// Create new empty entry
125    fn new() -> Self {
126        Self {
127            method: 0,
128            compressed_size: 0,
129            uncompressed_size: 0,
130            local_header_offset: 0,
131            crc32: 0,
132            filename: String::with_capacity(0),
133        }
134    }
135}
136
137/// Streaming ZIP file reader
138pub struct StreamingZip<F: Read + Seek> {
139    /// File handle
140    file: F,
141    /// Central directory entries (fixed size)
142    entries: HeaplessVec<CdEntry, MAX_CD_ENTRIES>,
143    /// Number of entries in central directory
144    num_entries: usize,
145    /// Optional configurable resource/safety limits.
146    limits: Option<ZipLimits>,
147    /// Reusable DEFLATE state to avoid large hot-path allocations on embedded.
148    inflate_state: Box<miniz_oxide::inflate::stream::InflateState>,
149}
150
151impl<F: Read + Seek> StreamingZip<F> {
152    /// Open a ZIP file and parse the central directory
153    pub fn new(file: F) -> Result<Self, ZipError> {
154        Self::new_with_limits(file, None)
155    }
156
157    /// Open a ZIP file with explicit runtime limits.
158    pub fn new_with_limits(mut file: F, limits: Option<ZipLimits>) -> Result<Self, ZipError> {
159        // Find and parse EOCD
160        let max_eocd_scan = limits
161            .map(|l| l.max_eocd_scan.min(MAX_EOCD_SCAN))
162            .unwrap_or(MAX_EOCD_SCAN);
163        let eocd = Self::find_eocd(&mut file, max_eocd_scan)?;
164        let strict = limits.is_some_and(|l| l.strict);
165        if strict && eocd.num_entries > MAX_CD_ENTRIES as u64 {
166            return Err(ZipError::CentralDirFull);
167        }
168
169        let mut entries: HeaplessVec<CdEntry, MAX_CD_ENTRIES> = HeaplessVec::new();
170
171        // Parse central directory entries
172        file.seek(SeekFrom::Start(eocd.cd_offset))
173            .map_err(|_| ZipError::IoError)?;
174        let cd_end = eocd
175            .cd_offset
176            .checked_add(eocd.cd_size)
177            .ok_or(ZipError::InvalidFormat)?;
178
179        let entries_to_scan = core::cmp::min(eocd.num_entries, MAX_CD_ENTRIES as u64);
180        for _ in 0..entries_to_scan {
181            let pos = file.stream_position().map_err(|_| ZipError::IoError)?;
182            if pos >= cd_end {
183                if strict {
184                    return Err(ZipError::InvalidFormat);
185                }
186                break;
187            }
188            if let Some(entry) = Self::read_cd_entry(&mut file)? {
189                entries.push(entry).map_err(|_| ZipError::CentralDirFull)?;
190            } else if strict {
191                return Err(ZipError::InvalidFormat);
192            } else {
193                break;
194            }
195        }
196
197        if eocd.num_entries > MAX_CD_ENTRIES as u64 {
198            log::warn!(
199                "[ZIP] Archive has {} entries but only {} were loaded (max: {})",
200                eocd.num_entries,
201                entries.len(),
202                MAX_CD_ENTRIES
203            );
204        }
205
206        log::debug!(
207            "[ZIP] Parsed {} central directory entries (offset {})",
208            entries.len(),
209            eocd.cd_offset
210        );
211
212        Ok(Self {
213            file,
214            entries,
215            num_entries: core::cmp::min(eocd.num_entries, usize::MAX as u64) as usize,
216            limits,
217            inflate_state: Box::new(miniz_oxide::inflate::stream::InflateState::new(
218                DataFormat::Raw,
219            )),
220        })
221    }
222
223    /// Find EOCD and extract central directory info
224    fn find_eocd(file: &mut F, max_eocd_scan: usize) -> Result<EocdInfo, ZipError> {
225        // Get file size
226        let file_size = file.seek(SeekFrom::End(0)).map_err(|_| ZipError::IoError)?;
227
228        if file_size < EOCD_MIN_SIZE as u64 {
229            return Err(ZipError::InvalidFormat);
230        }
231
232        // Scan tail in fixed-size chunks to avoid a large contiguous allocation.
233        let scan_range = file_size.min(max_eocd_scan as u64);
234        let scan_base = file_size - scan_range;
235        let mut remaining = scan_range;
236        let mut chunk_end = file_size;
237        let mut suffix = [0u8; EOCD_MIN_SIZE - 1];
238        let mut suffix_len = 0usize;
239        let mut window = [0u8; EOCD_SCAN_CHUNK_BYTES + (EOCD_MIN_SIZE - 1)];
240
241        while remaining > 0 {
242            let read_len_u64 = remaining.min(EOCD_SCAN_CHUNK_BYTES as u64);
243            let read_len = read_len_u64 as usize;
244            let chunk_start = chunk_end - read_len_u64;
245
246            file.seek(SeekFrom::Start(chunk_start))
247                .map_err(|_| ZipError::IoError)?;
248            file.read_exact(&mut window[..read_len])
249                .map_err(|_| ZipError::IoError)?;
250
251            if suffix_len > 0 {
252                window[read_len..read_len + suffix_len].copy_from_slice(&suffix[..suffix_len]);
253            }
254            let search_len = read_len + suffix_len;
255
256            for i in (0..=search_len.saturating_sub(EOCD_MIN_SIZE)).rev() {
257                if Self::read_u32_le(&window, i) != SIG_EOCD {
258                    continue;
259                }
260
261                let eocd_pos = chunk_start + i as u64;
262                if eocd_pos < scan_base {
263                    continue;
264                }
265
266                let num_entries = Self::read_u16_le(&window, i + 8);
267                let cd_size_32 = Self::read_u32_le(&window, i + 12);
268                let cd_offset_32 = Self::read_u32_le(&window, i + 16) as u64;
269                let comment_len = Self::read_u16_le(&window, i + 20) as u64;
270                let eocd_end = eocd_pos + EOCD_MIN_SIZE as u64 + comment_len;
271                if eocd_end != file_size {
272                    continue;
273                }
274
275                let uses_zip64_sentinel = num_entries == u16::MAX
276                    || cd_size_32 == u32::MAX
277                    || cd_offset_32 == u32::MAX as u64;
278
279                let mut zip64_locator: Option<(u32, u64, u32)> = None;
280                if eocd_pos >= 20 {
281                    file.seek(SeekFrom::Start(eocd_pos - 20))
282                        .map_err(|_| ZipError::IoError)?;
283                    let mut locator = [0u8; 20];
284                    file.read_exact(&mut locator)
285                        .map_err(|_| ZipError::IoError)?;
286                    if u32::from_le_bytes([locator[0], locator[1], locator[2], locator[3]])
287                        == SIG_ZIP64_EOCD_LOCATOR
288                    {
289                        let zip64_disk =
290                            u32::from_le_bytes([locator[4], locator[5], locator[6], locator[7]]);
291                        let zip64_eocd_offset = u64::from_le_bytes([
292                            locator[8],
293                            locator[9],
294                            locator[10],
295                            locator[11],
296                            locator[12],
297                            locator[13],
298                            locator[14],
299                            locator[15],
300                        ]);
301                        let total_disks = u32::from_le_bytes([
302                            locator[16],
303                            locator[17],
304                            locator[18],
305                            locator[19],
306                        ]);
307                        zip64_locator = Some((zip64_disk, zip64_eocd_offset, total_disks));
308                    }
309                }
310
311                if uses_zip64_sentinel || zip64_locator.is_some() {
312                    let (zip64_disk, zip64_eocd_offset, total_disks) =
313                        zip64_locator.ok_or(ZipError::InvalidFormat)?;
314                    if zip64_disk != 0 || total_disks != 1 {
315                        return Err(ZipError::UnsupportedZip64);
316                    }
317                    let zip64 = Self::read_zip64_eocd(file, zip64_eocd_offset)?;
318                    if zip64.disk_number != 0 || zip64.disk_with_cd_start != 0 {
319                        return Err(ZipError::UnsupportedZip64);
320                    }
321                    let cd_end = zip64
322                        .cd_offset
323                        .checked_add(zip64.cd_size)
324                        .ok_or(ZipError::InvalidFormat)?;
325                    if cd_end > eocd_pos || cd_end > file_size {
326                        return Err(ZipError::InvalidFormat);
327                    }
328                    return Ok(EocdInfo {
329                        cd_offset: zip64.cd_offset,
330                        cd_size: zip64.cd_size,
331                        num_entries: zip64.num_entries,
332                    });
333                }
334
335                let cd_end = cd_offset_32
336                    .checked_add(cd_size_32 as u64)
337                    .ok_or(ZipError::InvalidFormat)?;
338                if cd_end > eocd_pos || cd_end > file_size {
339                    return Err(ZipError::InvalidFormat);
340                }
341
342                return Ok(EocdInfo {
343                    cd_offset: cd_offset_32,
344                    cd_size: cd_size_32 as u64,
345                    num_entries: num_entries as u64,
346                });
347            }
348
349            let new_suffix_len = core::cmp::min(EOCD_MIN_SIZE - 1, read_len);
350            suffix[..new_suffix_len].copy_from_slice(&window[..new_suffix_len]);
351            suffix_len = new_suffix_len;
352            chunk_end = chunk_start;
353            remaining -= read_len_u64;
354        }
355
356        Err(ZipError::InvalidFormat)
357    }
358
359    fn read_zip64_eocd(file: &mut F, offset: u64) -> Result<Zip64EocdInfo, ZipError> {
360        file.seek(SeekFrom::Start(offset))
361            .map_err(|_| ZipError::IoError)?;
362        let mut fixed = [0u8; 56];
363        file.read_exact(&mut fixed).map_err(|_| ZipError::IoError)?;
364
365        let sig = u32::from_le_bytes([fixed[0], fixed[1], fixed[2], fixed[3]]);
366        if sig != SIG_ZIP64_EOCD {
367            return Err(ZipError::InvalidFormat);
368        }
369
370        let record_size = u64::from_le_bytes([
371            fixed[4], fixed[5], fixed[6], fixed[7], fixed[8], fixed[9], fixed[10], fixed[11],
372        ]);
373        if record_size < 44 {
374            return Err(ZipError::InvalidFormat);
375        }
376
377        let disk_number = u32::from_le_bytes([fixed[16], fixed[17], fixed[18], fixed[19]]);
378        let disk_with_cd_start = u32::from_le_bytes([fixed[20], fixed[21], fixed[22], fixed[23]]);
379        let num_entries = u64::from_le_bytes([
380            fixed[32], fixed[33], fixed[34], fixed[35], fixed[36], fixed[37], fixed[38], fixed[39],
381        ]);
382        let cd_size = u64::from_le_bytes([
383            fixed[40], fixed[41], fixed[42], fixed[43], fixed[44], fixed[45], fixed[46], fixed[47],
384        ]);
385        let cd_offset = u64::from_le_bytes([
386            fixed[48], fixed[49], fixed[50], fixed[51], fixed[52], fixed[53], fixed[54], fixed[55],
387        ]);
388
389        Ok(Zip64EocdInfo {
390            disk_number,
391            disk_with_cd_start,
392            num_entries,
393            cd_size,
394            cd_offset,
395        })
396    }
397
398    /// Read a central directory entry from file
399    fn read_cd_entry(file: &mut F) -> Result<Option<CdEntry>, ZipError> {
400        let mut sig_buf = [0u8; 4];
401        if file.read_exact(&mut sig_buf).is_err() {
402            return Ok(None);
403        }
404        let sig = u32::from_le_bytes(sig_buf);
405
406        if sig != SIG_CD_ENTRY {
407            return Ok(None); // End of central directory
408        }
409
410        // Read fixed portion of central directory entry (42 bytes = offsets 4-45)
411        // This includes everything up to and including the local header offset
412        let mut buf = [0u8; 42];
413        file.read_exact(&mut buf).map_err(|_| ZipError::IoError)?;
414
415        let mut entry = CdEntry::new();
416
417        // Parse central directory entry fields
418        // buf contains bytes 4-49 of the CD entry (after the 4-byte signature)
419        // buf[N] corresponds to CD entry offset (N + 4)
420        entry.method = u16::from_le_bytes([buf[6], buf[7]]); // CD offset 10
421        entry.crc32 = u32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]); // CD offset 16
422        let compressed_size_32 = u32::from_le_bytes([buf[16], buf[17], buf[18], buf[19]]); // CD offset 20
423        let uncompressed_size_32 = u32::from_le_bytes([buf[20], buf[21], buf[22], buf[23]]); // CD offset 24
424        let name_len = u16::from_le_bytes([buf[24], buf[25]]) as usize; // CD offset 28
425        let extra_len = u16::from_le_bytes([buf[26], buf[27]]) as usize; // CD offset 30
426        let comment_len = u16::from_le_bytes([buf[28], buf[29]]) as usize; // CD offset 32
427        let local_header_offset_32 = u32::from_le_bytes([buf[38], buf[39], buf[40], buf[41]]); // CD offset 42
428        entry.compressed_size = compressed_size_32 as u64;
429        entry.uncompressed_size = uncompressed_size_32 as u64;
430        entry.local_header_offset = local_header_offset_32 as u64;
431
432        // Read filename
433        if name_len > 0 && name_len <= MAX_FILENAME_LEN {
434            let mut name_buf = alloc::vec![0u8; name_len];
435            file.read_exact(&mut name_buf)
436                .map_err(|_| ZipError::IoError)?;
437            entry.filename = String::from_utf8_lossy(&name_buf).to_string();
438        } else if name_len > MAX_FILENAME_LEN {
439            // Skip over filename bytes we can't store
440            file.seek(SeekFrom::Current(name_len as i64))
441                .map_err(|_| ZipError::IoError)?;
442        }
443
444        let needs_zip64_uncompressed = uncompressed_size_32 == u32::MAX;
445        let needs_zip64_compressed = compressed_size_32 == u32::MAX;
446        let needs_zip64_offset = local_header_offset_32 == u32::MAX;
447        let mut got_zip64_uncompressed = false;
448        let mut got_zip64_compressed = false;
449        let mut got_zip64_offset = false;
450
451        // Parse ZIP extra fields, specifically ZIP64 extended information (0x0001).
452        let mut extra_remaining = extra_len;
453        while extra_remaining >= 4 {
454            let mut hdr = [0u8; 4];
455            file.read_exact(&mut hdr).map_err(|_| ZipError::IoError)?;
456            let header_id = u16::from_le_bytes([hdr[0], hdr[1]]);
457            let field_size = u16::from_le_bytes([hdr[2], hdr[3]]) as usize;
458            extra_remaining -= 4;
459
460            if field_size > extra_remaining {
461                return Err(ZipError::InvalidFormat);
462            }
463
464            if header_id == 0x0001 {
465                let mut field_remaining = field_size;
466                if needs_zip64_uncompressed {
467                    if field_remaining < 8 {
468                        return Err(ZipError::InvalidFormat);
469                    }
470                    let mut val = [0u8; 8];
471                    file.read_exact(&mut val).map_err(|_| ZipError::IoError)?;
472                    entry.uncompressed_size = u64::from_le_bytes(val);
473                    got_zip64_uncompressed = true;
474                    field_remaining -= 8;
475                }
476                if needs_zip64_compressed {
477                    if field_remaining < 8 {
478                        return Err(ZipError::InvalidFormat);
479                    }
480                    let mut val = [0u8; 8];
481                    file.read_exact(&mut val).map_err(|_| ZipError::IoError)?;
482                    entry.compressed_size = u64::from_le_bytes(val);
483                    got_zip64_compressed = true;
484                    field_remaining -= 8;
485                }
486                if needs_zip64_offset {
487                    if field_remaining < 8 {
488                        return Err(ZipError::InvalidFormat);
489                    }
490                    let mut val = [0u8; 8];
491                    file.read_exact(&mut val).map_err(|_| ZipError::IoError)?;
492                    entry.local_header_offset = u64::from_le_bytes(val);
493                    got_zip64_offset = true;
494                    field_remaining -= 8;
495                }
496                if field_remaining > 0 {
497                    file.seek(SeekFrom::Current(field_remaining as i64))
498                        .map_err(|_| ZipError::IoError)?;
499                }
500            } else if field_size > 0 {
501                file.seek(SeekFrom::Current(field_size as i64))
502                    .map_err(|_| ZipError::IoError)?;
503            }
504            extra_remaining -= field_size;
505        }
506        if extra_remaining > 0 {
507            file.seek(SeekFrom::Current(extra_remaining as i64))
508                .map_err(|_| ZipError::IoError)?;
509        }
510
511        if (needs_zip64_uncompressed && !got_zip64_uncompressed)
512            || (needs_zip64_compressed && !got_zip64_compressed)
513            || (needs_zip64_offset && !got_zip64_offset)
514        {
515            return Err(ZipError::InvalidFormat);
516        }
517
518        if comment_len > 0 {
519            file.seek(SeekFrom::Current(comment_len as i64))
520                .map_err(|_| ZipError::IoError)?;
521        }
522
523        Ok(Some(entry))
524    }
525
526    /// Get entry by filename (case-insensitive)
527    pub fn get_entry(&self, name: &str) -> Option<&CdEntry> {
528        self.entries.iter().find(|e| {
529            e.filename == name
530                || e.filename.eq_ignore_ascii_case(name)
531                || (name.starts_with('/') && e.filename.eq_ignore_ascii_case(&name[1..]))
532                || (e.filename.starts_with('/') && e.filename[1..].eq_ignore_ascii_case(name))
533        })
534    }
535
536    /// Debug: Log all entries in the ZIP (for troubleshooting)
537    #[allow(dead_code)]
538    fn debug_list_entries(&self) {
539        log::info!(
540            "[ZIP] Central directory contains {} entries:",
541            self.entries.len()
542        );
543        for (i, entry) in self.entries.iter().enumerate() {
544            log::info!(
545                "[ZIP]  [{}] '{}' (method={}, compressed={}, uncompressed={})",
546                i,
547                entry.filename,
548                entry.method,
549                entry.compressed_size,
550                entry.uncompressed_size
551            );
552        }
553    }
554
555    /// Read and decompress a file into the provided buffer
556    /// Returns number of bytes written to buffer
557    pub fn read_file(&mut self, entry: &CdEntry, buf: &mut [u8]) -> Result<usize, ZipError> {
558        let mut input_buf = alloc::vec![0u8; DEFAULT_ZIP_SCRATCH_BYTES];
559        self.read_file_with_scratch(entry, buf, &mut input_buf)
560    }
561
562    /// Read and decompress a file into the provided buffer using caller-provided scratch input.
563    ///
564    /// This is intended for embedded callers that want deterministic allocation behavior.
565    /// `input_buf` must be non-empty.
566    pub fn read_file_with_scratch(
567        &mut self,
568        entry: &CdEntry,
569        buf: &mut [u8],
570        input_buf: &mut [u8],
571    ) -> Result<usize, ZipError> {
572        if input_buf.is_empty() {
573            return Err(ZipError::BufferTooSmall);
574        }
575        if let Some(limits) = self.limits {
576            if entry.uncompressed_size > limits.max_file_read_size as u64 {
577                return Err(ZipError::FileTooLarge);
578            }
579            if entry.compressed_size > limits.max_file_read_size as u64 {
580                return Err(ZipError::FileTooLarge);
581            }
582        }
583        let uncompressed_size =
584            usize::try_from(entry.uncompressed_size).map_err(|_| ZipError::FileTooLarge)?;
585        if uncompressed_size > buf.len() {
586            return Err(ZipError::BufferTooSmall);
587        }
588
589        // Calculate data offset by reading local file header
590        let data_offset = self.calc_data_offset(entry)?;
591
592        // Seek to data
593        self.file
594            .seek(SeekFrom::Start(data_offset))
595            .map_err(|_| ZipError::IoError)?;
596
597        match entry.method {
598            METHOD_STORED => {
599                // Read stored data directly
600                let size =
601                    usize::try_from(entry.compressed_size).map_err(|_| ZipError::FileTooLarge)?;
602                if size > buf.len() {
603                    return Err(ZipError::BufferTooSmall);
604                }
605                self.file
606                    .read_exact(&mut buf[..size])
607                    .map_err(|_| ZipError::IoError)?;
608                // Verify CRC32
609                if entry.crc32 != 0 {
610                    let calc_crc = crc32fast::hash(&buf[..size]);
611                    if calc_crc != entry.crc32 {
612                        return Err(ZipError::CrcMismatch);
613                    }
614                }
615                Ok(size)
616            }
617            METHOD_DEFLATED => {
618                self.inflate_state.reset(DataFormat::Raw);
619                let mut compressed_remaining =
620                    usize::try_from(entry.compressed_size).map_err(|_| ZipError::FileTooLarge)?;
621                let mut pending = &[][..];
622                let mut written = 0usize;
623
624                loop {
625                    if pending.is_empty() && compressed_remaining > 0 {
626                        let take = core::cmp::min(compressed_remaining, input_buf.len());
627                        self.file
628                            .read_exact(&mut input_buf[..take])
629                            .map_err(|_| ZipError::IoError)?;
630                        pending = &input_buf[..take];
631                        compressed_remaining -= take;
632                    }
633
634                    if written >= buf.len() && (compressed_remaining > 0 || !pending.is_empty()) {
635                        return Err(ZipError::BufferTooSmall);
636                    }
637
638                    let result = miniz_oxide::inflate::stream::inflate(
639                        &mut self.inflate_state,
640                        pending,
641                        &mut buf[written..],
642                        MZFlush::None,
643                    );
644                    let consumed = result.bytes_consumed;
645                    let produced = result.bytes_written;
646                    pending = &pending[consumed..];
647                    written += produced;
648
649                    match result.status {
650                        Ok(MZStatus::StreamEnd) => {
651                            if compressed_remaining != 0 || !pending.is_empty() {
652                                return Err(ZipError::DecompressError);
653                            }
654                            break;
655                        }
656                        Ok(MZStatus::Ok) => {
657                            if consumed == 0 && produced == 0 {
658                                return Err(ZipError::DecompressError);
659                            }
660                        }
661                        Ok(MZStatus::NeedDict) => return Err(ZipError::DecompressError),
662                        Err(_) => return Err(ZipError::DecompressError),
663                    }
664                }
665
666                // Verify CRC32 if available
667                if entry.crc32 != 0 {
668                    let calc_crc = crc32fast::hash(&buf[..written]);
669                    if calc_crc != entry.crc32 {
670                        return Err(ZipError::CrcMismatch);
671                    }
672                }
673                Ok(written)
674            }
675            _ => Err(ZipError::UnsupportedCompression),
676        }
677    }
678
679    /// Stream a file's decompressed bytes into an arbitrary writer.
680    ///
681    /// For stored and DEFLATE entries this path is chunked and avoids full-entry output buffers.
682    pub fn read_file_to_writer<W: Write>(
683        &mut self,
684        entry: &CdEntry,
685        writer: &mut W,
686    ) -> Result<usize, ZipError> {
687        let mut input_buf = alloc::vec![0u8; DEFAULT_ZIP_SCRATCH_BYTES];
688        let mut output_buf = alloc::vec![0u8; DEFAULT_ZIP_SCRATCH_BYTES];
689        self.read_file_to_writer_with_scratch(entry, writer, &mut input_buf, &mut output_buf)
690    }
691
692    /// Stream a file's decompressed bytes into an arbitrary writer using caller-provided scratch buffers.
693    ///
694    /// This API is intended for embedded use cases where callers want strict control over
695    /// allocation and stack usage. `input_buf` and `output_buf` must both be non-empty.
696    ///
697    /// For `METHOD_STORED`, only `input_buf` is used for chunked copying.
698    /// For `METHOD_DEFLATED`, both buffers are used.
699    pub fn read_file_to_writer_with_scratch<W: Write>(
700        &mut self,
701        entry: &CdEntry,
702        writer: &mut W,
703        input_buf: &mut [u8],
704        output_buf: &mut [u8],
705    ) -> Result<usize, ZipError> {
706        if input_buf.is_empty() || output_buf.is_empty() {
707            return Err(ZipError::BufferTooSmall);
708        }
709        if let Some(limits) = self.limits {
710            if entry.uncompressed_size > limits.max_file_read_size as u64 {
711                return Err(ZipError::FileTooLarge);
712            }
713            if entry.compressed_size > limits.max_file_read_size as u64 {
714                return Err(ZipError::FileTooLarge);
715            }
716        }
717
718        let data_offset = self.calc_data_offset(entry)?;
719        self.file
720            .seek(SeekFrom::Start(data_offset))
721            .map_err(|_| ZipError::IoError)?;
722
723        match entry.method {
724            METHOD_STORED => {
725                let mut remaining =
726                    usize::try_from(entry.compressed_size).map_err(|_| ZipError::FileTooLarge)?;
727                let mut hasher = crc32fast::Hasher::new();
728                let mut written = 0usize;
729
730                while remaining > 0 {
731                    let take = core::cmp::min(remaining, input_buf.len());
732                    self.file
733                        .read_exact(&mut input_buf[..take])
734                        .map_err(|_| ZipError::IoError)?;
735                    writer
736                        .write_all(&input_buf[..take])
737                        .map_err(|_| ZipError::IoError)?;
738                    hasher.update(&input_buf[..take]);
739                    written += take;
740                    remaining -= take;
741                }
742
743                if entry.crc32 != 0 && hasher.finalize() != entry.crc32 {
744                    return Err(ZipError::CrcMismatch);
745                }
746                Ok(written)
747            }
748            METHOD_DEFLATED => {
749                self.inflate_state.reset(DataFormat::Raw);
750                let mut compressed_remaining =
751                    usize::try_from(entry.compressed_size).map_err(|_| ZipError::FileTooLarge)?;
752                let mut pending = &[][..];
753                let mut written = 0usize;
754                let mut hasher = crc32fast::Hasher::new();
755
756                loop {
757                    if pending.is_empty() && compressed_remaining > 0 {
758                        let take = core::cmp::min(compressed_remaining, input_buf.len());
759                        self.file
760                            .read_exact(&mut input_buf[..take])
761                            .map_err(|_| ZipError::IoError)?;
762                        pending = &input_buf[..take];
763                        compressed_remaining -= take;
764                    }
765
766                    let result = miniz_oxide::inflate::stream::inflate(
767                        &mut self.inflate_state,
768                        pending,
769                        output_buf,
770                        MZFlush::None,
771                    );
772                    let consumed = result.bytes_consumed;
773                    let produced = result.bytes_written;
774                    pending = &pending[consumed..];
775
776                    if produced > 0 {
777                        writer
778                            .write_all(&output_buf[..produced])
779                            .map_err(|_| ZipError::IoError)?;
780                        hasher.update(&output_buf[..produced]);
781                        written += produced;
782                    }
783
784                    match result.status {
785                        Ok(MZStatus::StreamEnd) => {
786                            if compressed_remaining != 0 || !pending.is_empty() {
787                                return Err(ZipError::DecompressError);
788                            }
789                            break;
790                        }
791                        Ok(MZStatus::Ok) => {
792                            if consumed == 0 && produced == 0 {
793                                return Err(ZipError::DecompressError);
794                            }
795                        }
796                        Ok(MZStatus::NeedDict) => return Err(ZipError::DecompressError),
797                        Err(_) => return Err(ZipError::DecompressError),
798                    }
799                }
800
801                if entry.crc32 != 0 && hasher.finalize() != entry.crc32 {
802                    return Err(ZipError::CrcMismatch);
803                }
804                Ok(written)
805            }
806            _ => Err(ZipError::UnsupportedCompression),
807        }
808    }
809
810    /// Read a file by its local header offset (avoids borrow issues)
811    /// This is useful when you need to read a file after getting its metadata
812    pub fn read_file_at_offset(
813        &mut self,
814        local_header_offset: u64,
815        buf: &mut [u8],
816    ) -> Result<usize, ZipError> {
817        // Find entry by offset
818        let entry = self
819            .entries
820            .iter()
821            .find(|e| e.local_header_offset == local_header_offset)
822            .ok_or(ZipError::FileNotFound)?;
823
824        // Create a temporary entry clone to avoid borrow issues
825        let entry_clone = CdEntry {
826            method: entry.method,
827            compressed_size: entry.compressed_size,
828            uncompressed_size: entry.uncompressed_size,
829            local_header_offset: entry.local_header_offset,
830            crc32: entry.crc32,
831            filename: entry.filename.clone(),
832        };
833
834        self.read_file(&entry_clone, buf)
835    }
836
837    /// Calculate the offset to the actual file data (past local header)
838    fn calc_data_offset(&mut self, entry: &CdEntry) -> Result<u64, ZipError> {
839        let offset = entry.local_header_offset;
840        self.file
841            .seek(SeekFrom::Start(offset))
842            .map_err(|_| ZipError::IoError)?;
843
844        // Read local file header (30 bytes fixed + variable filename/extra)
845        let mut header = [0u8; 30];
846        self.file
847            .read_exact(&mut header)
848            .map_err(|_| ZipError::IoError)?;
849
850        // Verify signature
851        let sig = u32::from_le_bytes([header[0], header[1], header[2], header[3]]);
852        if sig != SIG_LOCAL_FILE_HEADER {
853            return Err(ZipError::InvalidFormat);
854        }
855
856        // Get filename and extra field lengths
857        let name_len = u16::from_le_bytes([header[26], header[27]]) as u64;
858        let extra_len = u16::from_le_bytes([header[28], header[29]]) as u64;
859
860        // Data starts after local header + filename + extra field
861        let data_offset = offset + 30 + name_len + extra_len;
862
863        Ok(data_offset)
864    }
865
866    /// Read u16 from buffer at offset (little-endian)
867    fn read_u16_le(buf: &[u8], offset: usize) -> u16 {
868        u16::from_le_bytes([buf[offset], buf[offset + 1]])
869    }
870
871    /// Read u32 from buffer at offset (little-endian)
872    fn read_u32_le(buf: &[u8], offset: usize) -> u32 {
873        u32::from_le_bytes([
874            buf[offset],
875            buf[offset + 1],
876            buf[offset + 2],
877            buf[offset + 3],
878        ])
879    }
880
881    /// Validate that the archive contains a valid EPUB mimetype file
882    ///
883    /// Checks that a file named "mimetype" exists and its content is exactly
884    /// `application/epub+zip`, as required by the EPUB specification.
885    pub fn validate_mimetype(&mut self) -> Result<(), ZipError> {
886        let entry = self
887            .get_entry("mimetype")
888            .ok_or_else(|| {
889                ZipError::InvalidMimetype("mimetype file not found in archive".to_string())
890            })?
891            .clone();
892
893        if let Some(limits) = self.limits {
894            if entry.uncompressed_size > limits.max_mimetype_size as u64 {
895                return Err(ZipError::InvalidMimetype(
896                    "mimetype file too large".to_string(),
897                ));
898            }
899        }
900
901        let size = usize::try_from(entry.uncompressed_size)
902            .map_err(|_| ZipError::InvalidMimetype("mimetype file too large".to_string()))?;
903        let mut buf = alloc::vec![0u8; size];
904        let bytes_read = self.read_file(&entry, &mut buf)?;
905
906        let content = core::str::from_utf8(&buf[..bytes_read]).map_err(|_| {
907            ZipError::InvalidMimetype("mimetype file is not valid UTF-8".to_string())
908        })?;
909
910        if content != "application/epub+zip" {
911            return Err(ZipError::InvalidMimetype(format!(
912                "expected 'application/epub+zip', got '{}'",
913                content
914            )));
915        }
916
917        Ok(())
918    }
919
920    /// Check if this archive is a valid EPUB file
921    ///
922    /// Convenience wrapper around `validate_mimetype()` that returns a boolean.
923    pub fn is_valid_epub(&mut self) -> bool {
924        self.validate_mimetype().is_ok()
925    }
926
927    /// Get number of entries in central directory
928    pub fn num_entries(&self) -> usize {
929        self.num_entries.min(self.entries.len())
930    }
931
932    /// Iterate over all entries
933    pub fn entries(&self) -> impl Iterator<Item = &CdEntry> {
934        self.entries.iter()
935    }
936
937    /// Get entry by index
938    pub fn get_entry_by_index(&self, index: usize) -> Option<&CdEntry> {
939        self.entries.get(index)
940    }
941
942    /// Get the active limits used by this ZIP reader.
943    pub fn limits(&self) -> Option<ZipLimits> {
944        self.limits
945    }
946}
947
948#[cfg(test)]
949mod tests {
950    use super::*;
951
952    // Simple test to verify the module compiles
953    #[test]
954    fn test_zip_error_debug() {
955        let err = ZipError::FileNotFound;
956        assert_eq!(format!("{:?}", err), "FileNotFound");
957    }
958
959    #[test]
960    fn test_zip_error_invalid_mimetype_debug() {
961        let err = ZipError::InvalidMimetype("wrong content".to_string());
962        let debug = format!("{:?}", err);
963        assert!(debug.contains("InvalidMimetype"));
964        assert!(debug.contains("wrong content"));
965    }
966
967    #[test]
968    fn test_zip_error_invalid_mimetype_equality() {
969        let err1 = ZipError::InvalidMimetype("missing".to_string());
970        let err2 = ZipError::InvalidMimetype("missing".to_string());
971        let err3 = ZipError::InvalidMimetype("different".to_string());
972        assert_eq!(err1, err2);
973        assert_ne!(err1, err3);
974    }
975
976    #[test]
977    fn test_zip_error_variants_are_distinct() {
978        let errors: Vec<ZipError> = vec![
979            ZipError::FileNotFound,
980            ZipError::InvalidFormat,
981            ZipError::UnsupportedCompression,
982            ZipError::DecompressError,
983            ZipError::CrcMismatch,
984            ZipError::IoError,
985            ZipError::CentralDirFull,
986            ZipError::BufferTooSmall,
987            ZipError::FileTooLarge,
988            ZipError::InvalidMimetype("test".to_string()),
989            ZipError::UnsupportedZip64,
990        ];
991
992        // Each variant should be different from every other
993        for (i, a) in errors.iter().enumerate() {
994            for (j, b) in errors.iter().enumerate() {
995                if i != j {
996                    assert_ne!(a, b, "variants at index {} and {} should differ", i, j);
997                }
998            }
999        }
1000    }
1001
1002    #[test]
1003    fn test_zip_error_clone() {
1004        let err = ZipError::InvalidMimetype("test message".to_string());
1005        let cloned = err.clone();
1006        assert_eq!(err, cloned);
1007    }
1008
1009    #[test]
1010    fn test_cd_entry_new() {
1011        let entry = CdEntry::new();
1012        assert_eq!(entry.method, 0);
1013        assert_eq!(entry.compressed_size, 0);
1014        assert_eq!(entry.uncompressed_size, 0);
1015        assert_eq!(entry.local_header_offset, 0);
1016        assert_eq!(entry.crc32, 0);
1017        assert!(entry.filename.is_empty());
1018    }
1019
1020    /// Helper to build a minimal valid ZIP archive with a single stored file.
1021    ///
1022    /// The archive contains one file with the given name and content,
1023    /// stored without compression (method 0).
1024    fn build_single_file_zip(filename: &str, content: &[u8]) -> Vec<u8> {
1025        let name_bytes = filename.as_bytes();
1026        let name_len = name_bytes.len() as u16;
1027        let content_len = content.len() as u32;
1028        let crc = crc32fast::hash(content);
1029
1030        let mut zip = Vec::with_capacity(0);
1031
1032        // -- Local file header --
1033        let local_offset = zip.len() as u32;
1034        zip.extend_from_slice(&SIG_LOCAL_FILE_HEADER.to_le_bytes()); // signature
1035        zip.extend_from_slice(&20u16.to_le_bytes()); // version needed
1036        zip.extend_from_slice(&0u16.to_le_bytes()); // flags
1037        zip.extend_from_slice(&METHOD_STORED.to_le_bytes()); // compression
1038        zip.extend_from_slice(&0u16.to_le_bytes()); // mod time
1039        zip.extend_from_slice(&0u16.to_le_bytes()); // mod date
1040        zip.extend_from_slice(&crc.to_le_bytes()); // CRC32
1041        zip.extend_from_slice(&content_len.to_le_bytes()); // compressed size
1042        zip.extend_from_slice(&content_len.to_le_bytes()); // uncompressed size
1043        zip.extend_from_slice(&name_len.to_le_bytes()); // filename length
1044        zip.extend_from_slice(&0u16.to_le_bytes()); // extra field length
1045        zip.extend_from_slice(name_bytes); // filename
1046        zip.extend_from_slice(content); // file data
1047
1048        // -- Central directory entry --
1049        let cd_offset = zip.len() as u32;
1050        zip.extend_from_slice(&SIG_CD_ENTRY.to_le_bytes()); // signature
1051        zip.extend_from_slice(&20u16.to_le_bytes()); // version made by
1052        zip.extend_from_slice(&20u16.to_le_bytes()); // version needed
1053        zip.extend_from_slice(&0u16.to_le_bytes()); // flags
1054        zip.extend_from_slice(&METHOD_STORED.to_le_bytes()); // compression
1055        zip.extend_from_slice(&0u16.to_le_bytes()); // mod time
1056        zip.extend_from_slice(&0u16.to_le_bytes()); // mod date
1057        zip.extend_from_slice(&crc.to_le_bytes()); // CRC32
1058        zip.extend_from_slice(&content_len.to_le_bytes()); // compressed size
1059        zip.extend_from_slice(&content_len.to_le_bytes()); // uncompressed size
1060        zip.extend_from_slice(&name_len.to_le_bytes()); // filename length
1061        zip.extend_from_slice(&0u16.to_le_bytes()); // extra field length
1062        zip.extend_from_slice(&0u16.to_le_bytes()); // comment length
1063        zip.extend_from_slice(&0u16.to_le_bytes()); // disk number start
1064        zip.extend_from_slice(&0u16.to_le_bytes()); // internal attrs
1065        zip.extend_from_slice(&0u32.to_le_bytes()); // external attrs
1066        zip.extend_from_slice(&local_offset.to_le_bytes()); // local header offset
1067        zip.extend_from_slice(name_bytes); // filename
1068
1069        let cd_size = (zip.len() as u32) - cd_offset;
1070
1071        // -- End of central directory --
1072        zip.extend_from_slice(&SIG_EOCD.to_le_bytes()); // signature
1073        zip.extend_from_slice(&0u16.to_le_bytes()); // disk number
1074        zip.extend_from_slice(&0u16.to_le_bytes()); // disk with CD
1075        zip.extend_from_slice(&1u16.to_le_bytes()); // entries on this disk
1076        zip.extend_from_slice(&1u16.to_le_bytes()); // total entries
1077        zip.extend_from_slice(&cd_size.to_le_bytes()); // CD size
1078        zip.extend_from_slice(&cd_offset.to_le_bytes()); // CD offset
1079        zip.extend_from_slice(&0u16.to_le_bytes()); // comment length
1080
1081        zip
1082    }
1083
1084    fn build_single_file_zip64(filename: &str, content: &[u8]) -> Vec<u8> {
1085        let name_bytes = filename.as_bytes();
1086        let name_len = name_bytes.len() as u16;
1087        let content_len = content.len() as u64;
1088        let crc = crc32fast::hash(content);
1089
1090        let mut zip = Vec::with_capacity(0);
1091
1092        // -- Local file header --
1093        let local_offset = zip.len() as u64;
1094        zip.extend_from_slice(&SIG_LOCAL_FILE_HEADER.to_le_bytes()); // signature
1095        zip.extend_from_slice(&45u16.to_le_bytes()); // version needed
1096        zip.extend_from_slice(&0u16.to_le_bytes()); // flags
1097        zip.extend_from_slice(&METHOD_STORED.to_le_bytes()); // compression
1098        zip.extend_from_slice(&0u16.to_le_bytes()); // mod time
1099        zip.extend_from_slice(&0u16.to_le_bytes()); // mod date
1100        zip.extend_from_slice(&crc.to_le_bytes()); // CRC32
1101        zip.extend_from_slice(&(content_len as u32).to_le_bytes()); // compressed size
1102        zip.extend_from_slice(&(content_len as u32).to_le_bytes()); // uncompressed size
1103        zip.extend_from_slice(&name_len.to_le_bytes()); // filename length
1104        zip.extend_from_slice(&0u16.to_le_bytes()); // extra field length
1105        zip.extend_from_slice(name_bytes); // filename
1106        zip.extend_from_slice(content); // file data
1107
1108        // -- Central directory entry with ZIP64 extra field --
1109        let cd_offset = zip.len() as u64;
1110        let zip64_extra_len = 24u16; // uncompressed + compressed + local header offset
1111        zip.extend_from_slice(&SIG_CD_ENTRY.to_le_bytes()); // signature
1112        zip.extend_from_slice(&45u16.to_le_bytes()); // version made by
1113        zip.extend_from_slice(&45u16.to_le_bytes()); // version needed
1114        zip.extend_from_slice(&0u16.to_le_bytes()); // flags
1115        zip.extend_from_slice(&METHOD_STORED.to_le_bytes()); // compression
1116        zip.extend_from_slice(&0u16.to_le_bytes()); // mod time
1117        zip.extend_from_slice(&0u16.to_le_bytes()); // mod date
1118        zip.extend_from_slice(&crc.to_le_bytes()); // CRC32
1119        zip.extend_from_slice(&u32::MAX.to_le_bytes()); // compressed size sentinel
1120        zip.extend_from_slice(&u32::MAX.to_le_bytes()); // uncompressed size sentinel
1121        zip.extend_from_slice(&name_len.to_le_bytes()); // filename length
1122        zip.extend_from_slice(&(zip64_extra_len + 4).to_le_bytes()); // extra field length
1123        zip.extend_from_slice(&0u16.to_le_bytes()); // comment length
1124        zip.extend_from_slice(&0u16.to_le_bytes()); // disk number start
1125        zip.extend_from_slice(&0u16.to_le_bytes()); // internal attrs
1126        zip.extend_from_slice(&0u32.to_le_bytes()); // external attrs
1127        zip.extend_from_slice(&u32::MAX.to_le_bytes()); // local header offset sentinel
1128        zip.extend_from_slice(name_bytes); // filename
1129        zip.extend_from_slice(&0x0001u16.to_le_bytes()); // ZIP64 extra header id
1130        zip.extend_from_slice(&zip64_extra_len.to_le_bytes()); // ZIP64 extra length
1131        zip.extend_from_slice(&content_len.to_le_bytes()); // uncompressed size
1132        zip.extend_from_slice(&content_len.to_le_bytes()); // compressed size
1133        zip.extend_from_slice(&local_offset.to_le_bytes()); // local header offset
1134
1135        let cd_size = (zip.len() as u64) - cd_offset;
1136
1137        // -- ZIP64 EOCD record --
1138        let zip64_eocd_offset = zip.len() as u64;
1139        zip.extend_from_slice(&SIG_ZIP64_EOCD.to_le_bytes()); // signature
1140        zip.extend_from_slice(&44u64.to_le_bytes()); // size of ZIP64 EOCD record
1141        zip.extend_from_slice(&45u16.to_le_bytes()); // version made by
1142        zip.extend_from_slice(&45u16.to_le_bytes()); // version needed
1143        zip.extend_from_slice(&0u32.to_le_bytes()); // disk number
1144        zip.extend_from_slice(&0u32.to_le_bytes()); // disk where CD starts
1145        zip.extend_from_slice(&1u64.to_le_bytes()); // entries on this disk
1146        zip.extend_from_slice(&1u64.to_le_bytes()); // total entries
1147        zip.extend_from_slice(&cd_size.to_le_bytes()); // central directory size
1148        zip.extend_from_slice(&cd_offset.to_le_bytes()); // central directory offset
1149
1150        // -- ZIP64 EOCD locator --
1151        zip.extend_from_slice(&SIG_ZIP64_EOCD_LOCATOR.to_le_bytes()); // signature
1152        zip.extend_from_slice(&0u32.to_le_bytes()); // disk with ZIP64 EOCD
1153        zip.extend_from_slice(&zip64_eocd_offset.to_le_bytes()); // ZIP64 EOCD offset
1154        zip.extend_from_slice(&1u32.to_le_bytes()); // total disks
1155
1156        // -- Legacy EOCD with ZIP64 sentinels --
1157        zip.extend_from_slice(&SIG_EOCD.to_le_bytes()); // signature
1158        zip.extend_from_slice(&0u16.to_le_bytes()); // disk number
1159        zip.extend_from_slice(&0u16.to_le_bytes()); // disk with CD
1160        zip.extend_from_slice(&u16::MAX.to_le_bytes()); // entries on this disk sentinel
1161        zip.extend_from_slice(&u16::MAX.to_le_bytes()); // total entries sentinel
1162        zip.extend_from_slice(&u32::MAX.to_le_bytes()); // CD size sentinel
1163        zip.extend_from_slice(&u32::MAX.to_le_bytes()); // CD offset sentinel
1164        zip.extend_from_slice(&0u16.to_le_bytes()); // comment length
1165
1166        zip
1167    }
1168
1169    fn add_zip_comment(mut zip: Vec<u8>, comment_len: usize) -> Vec<u8> {
1170        let eocd_pos = zip.len() - EOCD_MIN_SIZE;
1171        let comment_len = comment_len as u16;
1172        zip[eocd_pos + 20..eocd_pos + 22].copy_from_slice(&comment_len.to_le_bytes());
1173        zip.extend_from_slice(&vec![b'A'; comment_len as usize]);
1174        zip
1175    }
1176
1177    #[test]
1178    fn test_validate_mimetype_success() {
1179        let zip_data = build_single_file_zip("mimetype", b"application/epub+zip");
1180        let cursor = std::io::Cursor::new(zip_data);
1181        let mut zip = StreamingZip::new(cursor).unwrap();
1182        assert!(zip.validate_mimetype().is_ok());
1183    }
1184
1185    #[test]
1186    fn test_eocd_found_with_long_comment() {
1187        let zip_data = add_zip_comment(
1188            build_single_file_zip("mimetype", b"application/epub+zip"),
1189            2_000,
1190        );
1191        let cursor = std::io::Cursor::new(zip_data);
1192        let mut zip = StreamingZip::new(cursor).expect("EOCD should be discoverable");
1193        assert!(zip.validate_mimetype().is_ok());
1194    }
1195
1196    #[test]
1197    fn test_eocd_scan_limit_rejects_long_tail() {
1198        let zip_data = add_zip_comment(
1199            build_single_file_zip("mimetype", b"application/epub+zip"),
1200            2_000,
1201        );
1202        let cursor = std::io::Cursor::new(zip_data);
1203        let limits = ZipLimits::new(1024 * 1024, 1024).with_max_eocd_scan(128);
1204        let result = StreamingZip::new_with_limits(cursor, Some(limits));
1205        assert!(matches!(result, Err(ZipError::InvalidFormat)));
1206    }
1207
1208    #[test]
1209    fn test_zip64_sentinel_without_locator_is_invalid() {
1210        let mut zip_data = build_single_file_zip("mimetype", b"application/epub+zip");
1211        let eocd_pos = zip_data.len() - EOCD_MIN_SIZE;
1212        zip_data[eocd_pos + 8..eocd_pos + 10].copy_from_slice(&u16::MAX.to_le_bytes());
1213        let cursor = std::io::Cursor::new(zip_data);
1214        let result = StreamingZip::new(cursor);
1215        assert!(matches!(result, Err(ZipError::InvalidFormat)));
1216    }
1217
1218    #[test]
1219    fn test_zip64_single_file_archive_is_readable() {
1220        let content = b"application/epub+zip";
1221        let zip_data = build_single_file_zip64("mimetype", content);
1222        let cursor = std::io::Cursor::new(zip_data);
1223        let mut zip = StreamingZip::new(cursor).expect("ZIP64 archive should parse");
1224        let entry = zip.get_entry("mimetype").expect("mimetype entry").clone();
1225        assert_eq!(entry.uncompressed_size, content.len() as u64);
1226        assert_eq!(entry.compressed_size, content.len() as u64);
1227
1228        let mut buf = [0u8; 64];
1229        let n = zip
1230            .read_file(&entry, &mut buf)
1231            .expect("ZIP64 entry should read");
1232        assert_eq!(&buf[..n], content);
1233    }
1234
1235    #[test]
1236    fn test_strict_rejects_too_many_cd_entries() {
1237        let mut zip_data = build_single_file_zip("mimetype", b"application/epub+zip");
1238        let eocd_pos = zip_data.len() - EOCD_MIN_SIZE;
1239        let count = (MAX_CD_ENTRIES as u16) + 1;
1240        zip_data[eocd_pos + 8..eocd_pos + 10].copy_from_slice(&count.to_le_bytes());
1241        zip_data[eocd_pos + 10..eocd_pos + 12].copy_from_slice(&count.to_le_bytes());
1242        let cursor = std::io::Cursor::new(zip_data);
1243        let limits = ZipLimits::new(1024 * 1024, 1024).with_strict(true);
1244        let result = StreamingZip::new_with_limits(cursor, Some(limits));
1245        assert!(matches!(result, Err(ZipError::CentralDirFull)));
1246    }
1247
1248    #[test]
1249    fn test_validate_mimetype_wrong_content() {
1250        let zip_data = build_single_file_zip("mimetype", b"text/plain");
1251        let cursor = std::io::Cursor::new(zip_data);
1252        let mut zip = StreamingZip::new(cursor).unwrap();
1253        let result = zip.validate_mimetype();
1254        assert!(result.is_err());
1255        match result.unwrap_err() {
1256            ZipError::InvalidMimetype(msg) => {
1257                assert!(msg.contains("text/plain"));
1258            }
1259            other => panic!("Expected InvalidMimetype, got {:?}", other),
1260        }
1261    }
1262
1263    #[test]
1264    fn test_validate_mimetype_missing_file() {
1265        let zip_data = build_single_file_zip("not_mimetype.txt", b"hello");
1266        let cursor = std::io::Cursor::new(zip_data);
1267        let mut zip = StreamingZip::new(cursor).unwrap();
1268        let result = zip.validate_mimetype();
1269        assert!(result.is_err());
1270        match result.unwrap_err() {
1271            ZipError::InvalidMimetype(msg) => {
1272                assert!(msg.contains("not found"));
1273            }
1274            other => panic!("Expected InvalidMimetype, got {:?}", other),
1275        }
1276    }
1277
1278    #[test]
1279    fn test_is_valid_epub_true() {
1280        let zip_data = build_single_file_zip("mimetype", b"application/epub+zip");
1281        let cursor = std::io::Cursor::new(zip_data);
1282        let mut zip = StreamingZip::new(cursor).unwrap();
1283        assert!(zip.is_valid_epub());
1284    }
1285
1286    #[test]
1287    fn test_is_valid_epub_false_wrong_content() {
1288        let zip_data = build_single_file_zip("mimetype", b"application/zip");
1289        let cursor = std::io::Cursor::new(zip_data);
1290        let mut zip = StreamingZip::new(cursor).unwrap();
1291        assert!(!zip.is_valid_epub());
1292    }
1293
1294    #[test]
1295    fn test_is_valid_epub_false_missing() {
1296        let zip_data = build_single_file_zip("other.txt", b"some content");
1297        let cursor = std::io::Cursor::new(zip_data);
1298        let mut zip = StreamingZip::new(cursor).unwrap();
1299        assert!(!zip.is_valid_epub());
1300    }
1301
1302    #[test]
1303    fn test_streaming_zip_read_file() {
1304        let content = b"application/epub+zip";
1305        let zip_data = build_single_file_zip("mimetype", content);
1306        let cursor = std::io::Cursor::new(zip_data);
1307        let mut zip = StreamingZip::new(cursor).unwrap();
1308
1309        assert_eq!(zip.num_entries(), 1);
1310
1311        let entry = zip.get_entry("mimetype").unwrap().clone();
1312        assert_eq!(entry.filename, "mimetype");
1313        assert_eq!(entry.uncompressed_size, content.len() as u64);
1314        assert_eq!(entry.method, METHOD_STORED);
1315
1316        let mut buf = [0u8; 64];
1317        let n = zip.read_file(&entry, &mut buf).unwrap();
1318        assert_eq!(&buf[..n], content);
1319    }
1320
1321    #[test]
1322    fn test_read_file_to_writer_with_scratch_streams_stored_entry() {
1323        let content = b"application/epub+zip";
1324        let zip_data = build_single_file_zip("mimetype", content);
1325        let cursor = std::io::Cursor::new(zip_data);
1326        let mut zip = StreamingZip::new(cursor).unwrap();
1327        let entry = zip.get_entry("mimetype").unwrap().clone();
1328
1329        let mut out = Vec::with_capacity(0);
1330        let mut input = [0u8; 16];
1331        let mut output = [0u8; 16];
1332        let n = zip
1333            .read_file_to_writer_with_scratch(&entry, &mut out, &mut input, &mut output)
1334            .expect("streaming with scratch should succeed");
1335        assert_eq!(n, content.len());
1336        assert_eq!(out, content);
1337    }
1338
1339    #[test]
1340    fn test_read_file_to_writer_with_scratch_rejects_empty_buffers() {
1341        let content = b"application/epub+zip";
1342        let zip_data = build_single_file_zip("mimetype", content);
1343        let cursor = std::io::Cursor::new(zip_data);
1344        let mut zip = StreamingZip::new(cursor).unwrap();
1345        let entry = zip.get_entry("mimetype").unwrap().clone();
1346
1347        let mut out = Vec::with_capacity(0);
1348        let mut input = [];
1349        let mut output = [0u8; 16];
1350        let err = zip
1351            .read_file_to_writer_with_scratch(&entry, &mut out, &mut input, &mut output)
1352            .expect_err("empty input buffer must fail");
1353        assert!(matches!(err, ZipError::BufferTooSmall));
1354    }
1355
1356    #[test]
1357    fn test_read_file_with_scratch_streams_into_output_buffer() {
1358        let content = b"application/epub+zip";
1359        let zip_data = build_single_file_zip("mimetype", content);
1360        let cursor = std::io::Cursor::new(zip_data);
1361        let mut zip = StreamingZip::new(cursor).unwrap();
1362        let entry = zip.get_entry("mimetype").unwrap().clone();
1363
1364        let mut out = [0u8; 64];
1365        let mut input = [0u8; 8];
1366        let n = zip
1367            .read_file_with_scratch(&entry, &mut out, &mut input)
1368            .expect("read_file_with_scratch should succeed");
1369        assert_eq!(&out[..n], content);
1370    }
1371
1372    #[test]
1373    fn test_read_file_with_scratch_rejects_empty_input_buffer() {
1374        let content = b"application/epub+zip";
1375        let zip_data = build_single_file_zip("mimetype", content);
1376        let cursor = std::io::Cursor::new(zip_data);
1377        let mut zip = StreamingZip::new(cursor).unwrap();
1378        let entry = zip.get_entry("mimetype").unwrap().clone();
1379
1380        let mut out = [0u8; 64];
1381        let mut input = [];
1382        let err = zip
1383            .read_file_with_scratch(&entry, &mut out, &mut input)
1384            .expect_err("empty input buffer must fail");
1385        assert!(matches!(err, ZipError::BufferTooSmall));
1386    }
1387
1388    #[test]
1389    fn test_zip_limits_enforced_when_configured() {
1390        let content = b"1234567890";
1391        let zip_data = build_single_file_zip("data.txt", content);
1392        let cursor = std::io::Cursor::new(zip_data);
1393        let limits = ZipLimits::new(8, 8);
1394        let mut zip = StreamingZip::new_with_limits(cursor, Some(limits)).unwrap();
1395        let entry = zip.get_entry("data.txt").unwrap().clone();
1396        let mut buf = [0u8; 32];
1397        let result = zip.read_file(&entry, &mut buf);
1398        assert!(matches!(result, Err(ZipError::FileTooLarge)));
1399    }
1400
1401    #[test]
1402    fn test_zip_limits_not_enforced_by_default() {
1403        let content = b"1234567890";
1404        let zip_data = build_single_file_zip("data.txt", content);
1405        let cursor = std::io::Cursor::new(zip_data);
1406        let mut zip = StreamingZip::new(cursor).unwrap();
1407        let entry = zip.get_entry("data.txt").unwrap().clone();
1408        let mut buf = [0u8; 32];
1409        let n = zip.read_file(&entry, &mut buf).unwrap();
1410        assert_eq!(&buf[..n], content);
1411    }
1412}