Skip to main content

zip_core/
lib.rs

1//! Forensic-grade ZIP reader.
2//!
3//! The headline capability is **deflate-block-indexed random access**: a forensic
4//! image stored in a ZIP (an E01 `Defl:N` entry at ~0% compression) is, at the
5//! deflate level, a run of *stored* blocks (`BTYPE=00`). Those blocks are
6//! byte-aligned, so the uncompressed entry can be addressed at any offset by
7//! seeking directly to the right block — **without inflating from the start**.
8//! This lets a downstream reader (e.g. the EWF parser) random-access a multi-GB
9//! image inside a ZIP with no temp extraction and no repeated decompression.
10//!
11//! Genuinely-compressed entries fall back to a correctness-preserving full
12//! decompress (no worse than extracting the entry), so the type is universal.
13#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used))]
14
15mod archive;
16mod bytes;
17mod codec;
18mod cp437;
19mod crypto;
20
21pub use archive::{
22    ArchiveSummary, CompressionMethod, EntryLayout, HeaderFields, ZipArchive, ZipFile,
23};
24
25use std::io::Read;
26use std::path::{Path, PathBuf};
27
28/// Errors from opening or reading a ZIP entry.
29#[derive(Debug, thiserror::Error)]
30pub enum ZipCoreError {
31    /// An I/O error occurred.
32    #[error("I/O error: {0}")]
33    Io(#[from] std::io::Error),
34
35    /// The container structure was malformed.
36    #[error("malformed ZIP container: {0}")]
37    Format(#[from] FormatError),
38
39    /// An entry uses a compression method this reader does not (yet) decode.
40    #[error("unsupported compression method: {0:?}")]
41    UnsupportedMethod(CompressionMethod),
42
43    /// The decoded entry's CRC-32 did not match the central-directory value.
44    #[error(
45        "CRC-32 mismatch in entry {entry}: expected {expected:#010x}, computed {actual:#010x}"
46    )]
47    CrcMismatch {
48        /// The entry whose CRC failed.
49        entry: String,
50        /// The CRC recorded in the central directory.
51        expected: u32,
52        /// The CRC computed over the decoded bytes.
53        actual: u32,
54    },
55
56    /// The entry is encrypted but no password was supplied (use `by_*_decrypt`).
57    #[error("entry is encrypted (password required): {0}")]
58    EncryptedNoPassword(String),
59
60    /// The supplied password failed the entry's verification check.
61    #[error("incorrect password for entry: {0}")]
62    WrongPassword(String),
63
64    /// An encrypted entry uses a scheme/parameters this reader cannot handle.
65    #[error("unsupported encryption for entry {entry}: {reason}")]
66    UnsupportedEncryption {
67        /// The entry.
68        entry: String,
69        /// What was unsupported.
70        reason: String,
71    },
72
73    /// No entry with the requested name exists.
74    #[error("entry not found: {0}")]
75    EntryNotFound(String),
76
77    /// The requested entry index is out of range.
78    #[error("entry index out of bounds: {0}")]
79    IndexOutOfBounds(usize),
80
81    /// The entry's deflate stream was malformed (e.g. `LEN`/`NLEN` mismatch).
82    #[error("malformed deflate stream in entry {entry}: {reason}")]
83    Malformed {
84        /// The entry whose stream is malformed.
85        entry: String,
86        /// What was wrong.
87        reason: String,
88    },
89}
90
91/// Structural defects in a ZIP container. Each variant preserves the offending
92/// value/location (CLAUDE.md "Show the unrecognized value").
93#[derive(Debug, thiserror::Error)]
94pub enum FormatError {
95    /// A header read ran past the available bytes.
96    #[error("unexpected end of data")]
97    Truncated,
98
99    /// No End Of Central Directory record was found.
100    #[error("End Of Central Directory record not found")]
101    NoEocd,
102
103    /// A record did not start with its expected signature.
104    #[error("bad signature for {what} at offset {offset}")]
105    BadSignature {
106        /// Which record was expected.
107        what: &'static str,
108        /// Where it was looked for.
109        offset: u64,
110    },
111
112    /// The archive uses Zip64 features not yet implemented.
113    #[error("Zip64 archive not yet supported")]
114    Zip64Unsupported,
115
116    /// A 0xFFFFFFFF sentinel was present but the Zip64 record/extra field that
117    /// should carry the real value is missing or malformed.
118    #[error("Zip64 sentinel without a matching Zip64 record/extra field")]
119    Zip64Inconsistent,
120
121    /// The central directory offset/size fall outside the file.
122    #[error("central directory out of range: offset {cd_offset}, size {cd_size}")]
123    CentralDirOutOfRange {
124        /// Declared central-directory offset.
125        cd_offset: u64,
126        /// Declared central-directory size.
127        cd_size: u64,
128    },
129
130    /// The EOCD declared an entry count beyond the safety ceiling.
131    #[error("declared entry count {0} exceeds the safety ceiling")]
132    TooManyEntries(usize),
133}
134
135/// One byte-addressable stored (`BTYPE=00`) deflate block within an entry.
136#[derive(Debug, Clone, Copy)]
137struct StoredBlock {
138    /// Offset of this block's first byte in the *uncompressed* entry.
139    uncomp_start: u64,
140    /// Number of raw bytes in the block (deflate `LEN`, ≤ 65535).
141    len: u64,
142    /// Offset in the *backing file* where this block's raw bytes begin.
143    file_offset: u64,
144}
145
146/// How an entry is addressed for random access.
147enum Layout {
148    /// The deflate stream is entirely stored blocks — direct seek, no inflation.
149    StoredBlocks(Vec<StoredBlock>),
150    /// A genuinely-compressed entry: correctness-preserving full-decompress path.
151    Fallback { path: PathBuf, name: String },
152}
153
154/// A random-access view over one uncompressed ZIP entry.
155pub struct StoredZipEntry {
156    file: std::fs::File,
157    uncompressed_size: u64,
158    layout: Layout,
159}
160
161impl StoredZipEntry {
162    /// The uncompressed length of the entry, in bytes.
163    pub fn len(&self) -> u64 {
164        self.uncompressed_size
165    }
166
167    /// Whether the entry is empty.
168    pub fn is_empty(&self) -> bool {
169        self.uncompressed_size == 0
170    }
171
172    /// `true` when the entry is stored-block addressable (the fast, no-inflation
173    /// path). `false` means reads go through the full-decompress fallback.
174    pub fn is_stored_block_indexed(&self) -> bool {
175        matches!(self.layout, Layout::StoredBlocks(_))
176    }
177
178    /// Number of indexed stored blocks (0 for the fallback path).
179    pub fn block_count(&self) -> usize {
180        match &self.layout {
181            Layout::StoredBlocks(b) => b.len(),
182            Layout::Fallback { .. } => 0,
183        }
184    }
185
186    /// Read up to `buf.len()` bytes of the **uncompressed** entry starting at
187    /// `offset`. Stored-block entries seek directly to the right block(s) with no
188    /// inflation; this method takes `&self`, so independent reads run lock-free in
189    /// parallel (positioned reads). Returns the number of bytes read (short at EOF).
190    pub fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result<usize> {
191        if offset >= self.uncompressed_size || buf.is_empty() {
192            return Ok(0);
193        }
194        let want_end = (offset + buf.len() as u64).min(self.uncompressed_size);
195        let total = (want_end - offset) as usize;
196        match &self.layout {
197            Layout::StoredBlocks(blocks) => {
198                let mut filled = 0usize;
199                let mut cur = offset;
200                while cur < want_end {
201                    // First block whose uncompressed span extends past `cur`.
202                    let bi = blocks.partition_point(|b| b.uncomp_start + b.len <= cur);
203                    let Some(b) = blocks.get(bi) else {
204                        break; // cov:unreachable: blocks cover [0, uncompressed_size)
205                    };
206                    let within = cur - b.uncomp_start;
207                    let avail = b.len - within;
208                    let n = avail.min(want_end - cur) as usize;
209                    pread_exact(
210                        &self.file,
211                        &mut buf[filled..filled + n],
212                        b.file_offset + within,
213                    )?;
214                    filled += n;
215                    cur += n as u64;
216                }
217                Ok(filled)
218            }
219            Layout::Fallback { path, name } => {
220                // Rare path (genuinely-compressed entry): never hit by 0%-deflate
221                // forensic images. Correct, if O(n) per read. Decoded by the native
222                // pure-Rust parser (no zip-rs), CRC-verified on EOF.
223                let mut archive =
224                    ZipArchive::new(std::fs::File::open(path)?).map_err(std::io::Error::other)?;
225                let mut entry = archive.by_name(name).map_err(std::io::Error::other)?;
226                let mut all = Vec::with_capacity(self.uncompressed_size as usize);
227                entry.read_to_end(&mut all)?;
228                let start = offset as usize;
229                let end = (start + total).min(all.len());
230                let slice = &all[start..end];
231                buf[..slice.len()].copy_from_slice(slice);
232                Ok(slice.len())
233            }
234        }
235    }
236}
237
238/// Open a single entry of a ZIP archive for random access.
239pub fn open_entry(path: &Path, name: &str) -> Result<StoredZipEntry, ZipCoreError> {
240    let file = std::fs::File::open(path)?;
241    let mut archive = ZipArchive::new(std::fs::File::open(path)?)?;
242    let entry = archive.by_name(name)?;
243    let uncompressed_size = entry.size();
244    let compressed_size = entry.compressed_size();
245    let data_start = entry.data_start();
246    let is_deflate = entry.compression() == CompressionMethod::Deflated;
247    let is_stored = entry.compression() == CompressionMethod::Stored;
248    drop(entry);
249    drop(archive);
250
251    let layout = if is_stored {
252        // A method-0 entry is one contiguous run of raw bytes.
253        Layout::StoredBlocks(vec![StoredBlock {
254            uncomp_start: 0,
255            len: uncompressed_size,
256            file_offset: data_start,
257        }])
258    } else if is_deflate {
259        match index_stored_blocks(&file, name, data_start, compressed_size, uncompressed_size)? {
260            Some(blocks) => Layout::StoredBlocks(blocks),
261            None => Layout::Fallback {
262                path: path.to_path_buf(),
263                name: name.to_string(),
264            },
265        }
266    } else {
267        Layout::Fallback {
268            path: path.to_path_buf(),
269            name: name.to_string(),
270        }
271    };
272
273    Ok(StoredZipEntry {
274        file,
275        uncompressed_size,
276        layout,
277    })
278}
279
280/// Walk a deflate stream's block headers. Returns `Some(index)` when every block
281/// is stored (`BTYPE=00`) — the byte-addressable fast path — or `None` the moment
282/// a Huffman block appears (alignment is lost; caller uses the fallback).
283fn index_stored_blocks(
284    file: &std::fs::File,
285    name: &str,
286    data_start: u64,
287    compressed_size: u64,
288    uncompressed_size: u64,
289) -> Result<Option<Vec<StoredBlock>>, ZipCoreError> {
290    let end = data_start + compressed_size;
291    let mut blocks = Vec::new();
292    let mut foff = data_start;
293    let mut uoff = 0u64;
294    loop {
295        if foff + 5 > end {
296            // Ran out of stream before a final block — not a clean stored run.
297            return Ok(None);
298        }
299        let mut hdr = [0u8; 5];
300        pread_exact(file, &mut hdr, foff)?;
301        let bfinal = hdr[0] & 1;
302        let btype = (hdr[0] >> 1) & 0b11;
303        if btype != 0 {
304            return Ok(None); // a compressed block → not byte-addressable
305        }
306        let len = u16::from_le_bytes([hdr[1], hdr[2]]);
307        let nlen = u16::from_le_bytes([hdr[3], hdr[4]]);
308        if nlen != !len {
309            return Err(ZipCoreError::Malformed {
310                entry: name.to_string(),
311                reason: format!("stored block LEN/NLEN mismatch at file offset {foff}"),
312            });
313        }
314        let len = u64::from(len);
315        let data_off = foff + 5;
316        if data_off + len > end {
317            return Err(ZipCoreError::Malformed {
318                entry: name.to_string(),
319                reason: format!("stored block overruns compressed data at offset {data_off}"),
320            });
321        }
322        blocks.push(StoredBlock {
323            uncomp_start: uoff,
324            len,
325            file_offset: data_off,
326        });
327        uoff += len;
328        foff = data_off + len;
329        if bfinal == 1 {
330            break;
331        }
332    }
333    if uoff != uncompressed_size {
334        return Err(ZipCoreError::Malformed {
335            entry: name.to_string(),
336            reason: format!(
337                "stored-block total {uoff} != entry uncompressed size {uncompressed_size}"
338            ),
339        });
340    }
341    Ok(Some(blocks))
342}
343
344#[cfg(unix)]
345fn pread_exact(file: &std::fs::File, buf: &mut [u8], offset: u64) -> std::io::Result<()> {
346    use std::os::unix::fs::FileExt;
347    file.read_exact_at(buf, offset)
348}
349
350#[cfg(windows)]
351fn pread_exact(file: &std::fs::File, buf: &mut [u8], offset: u64) -> std::io::Result<()> {
352    use std::os::windows::fs::FileExt;
353    let mut read = 0usize;
354    while read < buf.len() {
355        let n = file.seek_read(&mut buf[read..], offset + read as u64)?;
356        if n == 0 {
357            return Err(std::io::Error::new(
358                std::io::ErrorKind::UnexpectedEof,
359                "short positioned read",
360            ));
361        }
362        read += n;
363    }
364    Ok(())
365}