Skip to main content

zip_core/
lib.rs

1//! Forensic-grade ZIP reader.
2//!
3//! The headline capability is **deflate-block-indexed random access**: a forensic
4//! image stored in a ZIP (an E01 `Defl:N` entry at ~0% compression) is, at the
5//! deflate level, a run of *stored* blocks (`BTYPE=00`). Those blocks are
6//! byte-aligned, so the uncompressed entry can be addressed at any offset by
7//! seeking directly to the right block — **without inflating from the start**.
8//! This lets a downstream reader (e.g. the EWF parser) random-access a multi-GB
9//! image inside a ZIP with no temp extraction and no repeated decompression.
10//!
11//! Genuinely-compressed entries fall back to a correctness-preserving full
12//! decompress (no worse than extracting the entry), so the type is universal.
13#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used))]
14
15mod archive;
16mod bytes;
17mod codec;
18mod cp437;
19mod crypto;
20
21pub use archive::{
22    ArchiveSummary, CompressionMethod, EntryLayout, ExtraFields, HeaderFields, ZipArchive, ZipFile,
23};
24
25use std::io::Read;
26use std::path::{Path, PathBuf};
27
28/// Errors from opening or reading a ZIP entry.
29#[derive(Debug, thiserror::Error)]
30pub enum ZipCoreError {
31    /// An I/O error occurred.
32    #[error("I/O error: {0}")]
33    Io(#[from] std::io::Error),
34
35    /// The container structure was malformed.
36    #[error("malformed ZIP container: {0}")]
37    Format(#[from] FormatError),
38
39    /// An entry uses a compression method this reader does not (yet) decode.
40    #[error("unsupported compression method: {0:?}")]
41    UnsupportedMethod(CompressionMethod),
42
43    /// The decoded entry's CRC-32 did not match the central-directory value.
44    #[error(
45        "CRC-32 mismatch in entry {entry}: expected {expected:#010x}, computed {actual:#010x}"
46    )]
47    CrcMismatch {
48        /// The entry whose CRC failed.
49        entry: String,
50        /// The CRC recorded in the central directory.
51        expected: u32,
52        /// The CRC computed over the decoded bytes.
53        actual: u32,
54    },
55
56    /// The entry is encrypted but no password was supplied (use `by_*_decrypt`).
57    #[error("entry is encrypted (password required): {0}")]
58    EncryptedNoPassword(String),
59
60    /// The supplied password failed the entry's verification check.
61    #[error("incorrect password for entry: {0}")]
62    WrongPassword(String),
63
64    /// An encrypted entry uses a scheme/parameters this reader cannot handle.
65    #[error("unsupported encryption for entry {entry}: {reason}")]
66    UnsupportedEncryption {
67        /// The entry.
68        entry: String,
69        /// What was unsupported.
70        reason: String,
71    },
72
73    /// No entry with the requested name exists.
74    #[error("entry not found: {0}")]
75    EntryNotFound(String),
76
77    /// The requested entry index is out of range.
78    #[error("entry index out of bounds: {0}")]
79    IndexOutOfBounds(usize),
80
81    /// The entry's data lives on another disk of a spanned/split archive, which
82    /// this reader does not reassemble.
83    #[error("entry {entry} is on disk {disk} of a spanned archive (not supported)")]
84    SpannedArchive {
85        /// The entry.
86        entry: String,
87        /// The disk number holding the entry.
88        disk: u32,
89    },
90
91    /// The entry's deflate stream was malformed (e.g. `LEN`/`NLEN` mismatch).
92    #[error("malformed deflate stream in entry {entry}: {reason}")]
93    Malformed {
94        /// The entry whose stream is malformed.
95        entry: String,
96        /// What was wrong.
97        reason: String,
98    },
99}
100
101/// Structural defects in a ZIP container. Each variant preserves the offending
102/// value/location (CLAUDE.md "Show the unrecognized value").
103#[derive(Debug, thiserror::Error)]
104pub enum FormatError {
105    /// A header read ran past the available bytes.
106    #[error("unexpected end of data")]
107    Truncated,
108
109    /// No End Of Central Directory record was found.
110    #[error("End Of Central Directory record not found")]
111    NoEocd,
112
113    /// A record did not start with its expected signature.
114    #[error("bad signature for {what} at offset {offset}")]
115    BadSignature {
116        /// Which record was expected.
117        what: &'static str,
118        /// Where it was looked for.
119        offset: u64,
120    },
121
122    /// The archive uses Zip64 features not yet implemented.
123    #[error("Zip64 archive not yet supported")]
124    Zip64Unsupported,
125
126    /// A 0xFFFFFFFF sentinel was present but the Zip64 record/extra field that
127    /// should carry the real value is missing or malformed.
128    #[error("Zip64 sentinel without a matching Zip64 record/extra field")]
129    Zip64Inconsistent,
130
131    /// The central directory offset/size fall outside the file.
132    #[error("central directory out of range: offset {cd_offset}, size {cd_size}")]
133    CentralDirOutOfRange {
134        /// Declared central-directory offset.
135        cd_offset: u64,
136        /// Declared central-directory size.
137        cd_size: u64,
138    },
139
140    /// The EOCD declared an entry count beyond the safety ceiling.
141    #[error("declared entry count {0} exceeds the safety ceiling")]
142    TooManyEntries(usize),
143}
144
145/// One byte-addressable stored (`BTYPE=00`) deflate block within an entry.
146#[derive(Debug, Clone, Copy)]
147struct StoredBlock {
148    /// Offset of this block's first byte in the *uncompressed* entry.
149    uncomp_start: u64,
150    /// Number of raw bytes in the block (deflate `LEN`, ≤ 65535).
151    len: u64,
152    /// Offset in the *backing file* where this block's raw bytes begin.
153    file_offset: u64,
154}
155
156/// How an entry is addressed for random access.
157enum Layout {
158    /// The deflate stream is entirely stored blocks — direct seek, no inflation.
159    StoredBlocks(Vec<StoredBlock>),
160    /// A genuinely-compressed entry: correctness-preserving full-decompress path.
161    Fallback { path: PathBuf, name: String },
162}
163
164/// A random-access view over one uncompressed ZIP entry.
165pub struct StoredZipEntry {
166    file: std::fs::File,
167    uncompressed_size: u64,
168    layout: Layout,
169}
170
171impl StoredZipEntry {
172    /// The uncompressed length of the entry, in bytes.
173    pub fn len(&self) -> u64 {
174        self.uncompressed_size
175    }
176
177    /// Whether the entry is empty.
178    pub fn is_empty(&self) -> bool {
179        self.uncompressed_size == 0
180    }
181
182    /// `true` when the entry is stored-block addressable (the fast, no-inflation
183    /// path). `false` means reads go through the full-decompress fallback.
184    pub fn is_stored_block_indexed(&self) -> bool {
185        matches!(self.layout, Layout::StoredBlocks(_))
186    }
187
188    /// Number of indexed stored blocks (0 for the fallback path).
189    pub fn block_count(&self) -> usize {
190        match &self.layout {
191            Layout::StoredBlocks(b) => b.len(),
192            Layout::Fallback { .. } => 0,
193        }
194    }
195
196    /// Read up to `buf.len()` bytes of the **uncompressed** entry starting at
197    /// `offset`. Stored-block entries seek directly to the right block(s) with no
198    /// inflation; this method takes `&self`, so independent reads run lock-free in
199    /// parallel (positioned reads). Returns the number of bytes read (short at EOF).
200    pub fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result<usize> {
201        if offset >= self.uncompressed_size || buf.is_empty() {
202            return Ok(0);
203        }
204        let want_end = (offset + buf.len() as u64).min(self.uncompressed_size);
205        let total = (want_end - offset) as usize;
206        match &self.layout {
207            Layout::StoredBlocks(blocks) => {
208                let mut filled = 0usize;
209                let mut cur = offset;
210                while cur < want_end {
211                    // First block whose uncompressed span extends past `cur`.
212                    let bi = blocks.partition_point(|b| b.uncomp_start + b.len <= cur);
213                    let Some(b) = blocks.get(bi) else {
214                        break; // cov:unreachable: blocks cover [0, uncompressed_size)
215                    };
216                    let within = cur - b.uncomp_start;
217                    let avail = b.len - within;
218                    let n = avail.min(want_end - cur) as usize;
219                    pread_exact(
220                        &self.file,
221                        &mut buf[filled..filled + n],
222                        b.file_offset + within,
223                    )?;
224                    filled += n;
225                    cur += n as u64;
226                }
227                Ok(filled)
228            }
229            Layout::Fallback { path, name } => {
230                // Rare path (genuinely-compressed entry): never hit by 0%-deflate
231                // forensic images. Correct, if O(n) per read. Decoded by the native
232                // pure-Rust parser (no zip-rs), CRC-verified on EOF.
233                let mut archive =
234                    ZipArchive::new(std::fs::File::open(path)?).map_err(std::io::Error::other)?;
235                let mut entry = archive.by_name(name).map_err(std::io::Error::other)?;
236                let mut all = Vec::with_capacity(self.uncompressed_size as usize);
237                entry.read_to_end(&mut all)?;
238                let start = offset as usize;
239                let end = (start + total).min(all.len());
240                let slice = &all[start..end];
241                buf[..slice.len()].copy_from_slice(slice);
242                Ok(slice.len())
243            }
244        }
245    }
246}
247
248/// Open a single entry of a ZIP archive for random access.
249pub fn open_entry(path: &Path, name: &str) -> Result<StoredZipEntry, ZipCoreError> {
250    let file = std::fs::File::open(path)?;
251    let mut archive = ZipArchive::new(std::fs::File::open(path)?)?;
252    let entry = archive.by_name(name)?;
253    let uncompressed_size = entry.size();
254    let compressed_size = entry.compressed_size();
255    let data_start = entry.data_start();
256    let is_deflate = entry.compression() == CompressionMethod::Deflated;
257    let is_stored = entry.compression() == CompressionMethod::Stored;
258    drop(entry);
259    drop(archive);
260
261    let layout = if is_stored {
262        // A method-0 entry is one contiguous run of raw bytes.
263        Layout::StoredBlocks(vec![StoredBlock {
264            uncomp_start: 0,
265            len: uncompressed_size,
266            file_offset: data_start,
267        }])
268    } else if is_deflate {
269        match index_stored_blocks(&file, name, data_start, compressed_size, uncompressed_size)? {
270            Some(blocks) => Layout::StoredBlocks(blocks),
271            None => Layout::Fallback {
272                path: path.to_path_buf(),
273                name: name.to_string(),
274            },
275        }
276    } else {
277        Layout::Fallback {
278            path: path.to_path_buf(),
279            name: name.to_string(),
280        }
281    };
282
283    Ok(StoredZipEntry {
284        file,
285        uncompressed_size,
286        layout,
287    })
288}
289
290/// Walk a deflate stream's block headers. Returns `Some(index)` when every block
291/// is stored (`BTYPE=00`) — the byte-addressable fast path — or `None` the moment
292/// a Huffman block appears (alignment is lost; caller uses the fallback).
293fn index_stored_blocks(
294    file: &std::fs::File,
295    name: &str,
296    data_start: u64,
297    compressed_size: u64,
298    uncompressed_size: u64,
299) -> Result<Option<Vec<StoredBlock>>, ZipCoreError> {
300    let end = data_start + compressed_size;
301    let mut blocks = Vec::new();
302    let mut foff = data_start;
303    let mut uoff = 0u64;
304    loop {
305        if foff + 5 > end {
306            // Ran out of stream before a final block — not a clean stored run.
307            return Ok(None);
308        }
309        let mut hdr = [0u8; 5];
310        pread_exact(file, &mut hdr, foff)?;
311        let bfinal = hdr[0] & 1;
312        let btype = (hdr[0] >> 1) & 0b11;
313        if btype != 0 {
314            return Ok(None); // a compressed block → not byte-addressable
315        }
316        let len = u16::from_le_bytes([hdr[1], hdr[2]]);
317        let nlen = u16::from_le_bytes([hdr[3], hdr[4]]);
318        if nlen != !len {
319            return Err(ZipCoreError::Malformed {
320                entry: name.to_string(),
321                reason: format!("stored block LEN/NLEN mismatch at file offset {foff}"),
322            });
323        }
324        let len = u64::from(len);
325        let data_off = foff + 5;
326        if data_off + len > end {
327            return Err(ZipCoreError::Malformed {
328                entry: name.to_string(),
329                reason: format!("stored block overruns compressed data at offset {data_off}"),
330            });
331        }
332        blocks.push(StoredBlock {
333            uncomp_start: uoff,
334            len,
335            file_offset: data_off,
336        });
337        uoff += len;
338        foff = data_off + len;
339        if bfinal == 1 {
340            break;
341        }
342    }
343    if uoff != uncompressed_size {
344        return Err(ZipCoreError::Malformed {
345            entry: name.to_string(),
346            reason: format!(
347                "stored-block total {uoff} != entry uncompressed size {uncompressed_size}"
348            ),
349        });
350    }
351    Ok(Some(blocks))
352}
353
354#[cfg(unix)]
355fn pread_exact(file: &std::fs::File, buf: &mut [u8], offset: u64) -> std::io::Result<()> {
356    use std::os::unix::fs::FileExt;
357    file.read_exact_at(buf, offset)
358}
359
360#[cfg(windows)]
361fn pread_exact(file: &std::fs::File, buf: &mut [u8], offset: u64) -> std::io::Result<()> {
362    use std::os::windows::fs::FileExt;
363    let mut read = 0usize;
364    while read < buf.len() {
365        let n = file.seek_read(&mut buf[read..], offset + read as u64)?;
366        if n == 0 {
367            return Err(std::io::Error::new(
368                std::io::ErrorKind::UnexpectedEof,
369                "short positioned read",
370            ));
371        }
372        read += n;
373    }
374    Ok(())
375}