Skip to main content

zipatch_rs/chunk/
mod.rs

1//! Wire-format chunk types and the [`ZiPatchReader`] streaming parser.
2//!
3//! This module is the parsing layer: it decodes the raw `ZiPatch` byte
4//! stream into a stream of typed [`Chunk`] values. Each top-level
5//! variant corresponds to one 4-byte ASCII wire tag (`FHDR`, `APLY`,
6//! `SQPK`, …); the per-variant submodules below own the binary layout for
7//! their body. Nothing in this module touches the filesystem — apply-time
8//! effects live in [`crate::apply`].
9//!
10//! The [`ZiPatchReader`] parser validates the 12-byte file magic on
11//! construction, then yields one [`ChunkRecord`](crate::chunk::ChunkRecord) per
12//! [`ZiPatchReader::next_chunk`] call until the internal `EOF_` terminator
13//! is consumed or a parse error surfaces.
14
15pub(crate) mod adir;
16pub(crate) mod afsp;
17pub(crate) mod aply;
18pub(crate) mod ddir;
19pub(crate) mod fhdr;
20pub(crate) mod sqpk;
21pub(crate) mod util;
22
23pub(crate) use adir::AddDirectory;
24pub(crate) use afsp::ApplyFreeSpace;
25pub(crate) use aply::ApplyOption;
26pub(crate) use ddir::DeleteDirectory;
27pub(crate) use fhdr::FileHeader;
28pub use sqpk::{SqpackFileId, SqpkCommand, SqpkCompressedBlock};
29pub(crate) use sqpk::{
30    SqpkFile, SqpkFileOperation, SqpkHeader, SqpkHeaderTarget, TargetHeaderKind,
31};
32
33use crate::newtypes::ChunkTag;
34use crate::reader::ReadExt;
35use crate::{ParseError, ParseResult as Result};
36use tracing::trace;
37
38const MAGIC: [u8; 12] = [
39    0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A,
40];
41
42/// Default upper bound (512 MiB) on a single chunk's declared body length.
43///
44/// Used by [`ZiPatchReader::new`] to guard against pathological streams that
45/// would otherwise drive the parser into a huge allocation. Override per
46/// reader via [`ZiPatchReader::with_max_chunk_size`].
47pub const DEFAULT_MAX_CHUNK_SIZE: u32 = 512 * 1024 * 1024;
48
49/// One top-level chunk parsed from a `ZiPatch` stream.
50///
51/// Each variant corresponds to a 4-byte ASCII wire tag.
52///
53/// # Observed frequency
54///
55/// SE's XIVARR+ patch files almost exclusively contain `FHDR`, `APLY`, and
56/// `SQPK` chunks. `ADIR`/`DELD` can theoretically appear and are implemented,
57/// but are rarely emitted in practice. `APFS` has never been observed in modern
58/// patches and is treated as a no-op. `EOF_` is consumed by [`ZiPatchReader`]
59/// and is never yielded to the caller.
60#[derive(Debug)]
61pub enum Chunk {
62    /// `FHDR` — the first chunk in every patch file; carries version and
63    /// per-version patch metadata. See [`FileHeader`] for the versioned body.
64    FileHeader(FileHeader),
65    /// `APLY` — sets or clears a boolean apply-time flag on the
66    /// [`crate::ApplyConfig`] (e.g. "ignore missing files"). See [`ApplyOption`].
67    ApplyOption(ApplyOption),
68    /// `APFS` — free-space book-keeping emitted by old patcher tooling; treated
69    /// as a no-op at apply time. See [`ApplyFreeSpace`].
70    ApplyFreeSpace(ApplyFreeSpace),
71    /// `ADIR` — instructs the patcher to create a directory under the game
72    /// install root. See [`AddDirectory`].
73    AddDirectory(AddDirectory),
74    /// `DELD` — instructs the patcher to remove a directory under the game
75    /// install root. See [`DeleteDirectory`].
76    DeleteDirectory(DeleteDirectory),
77    /// `SQPK` — the workhorse chunk; wraps one of eight sub-commands that
78    /// add, delete, expand, or replace `SqPack` data. See [`SqpkCommand`].
79    Sqpk(SqpkCommand),
80    /// `EOF_` — marks the clean end of the patch stream. [`ZiPatchReader`]
81    /// consumes this chunk internally; it is never yielded to the caller.
82    EndOfFile,
83}
84
85/// One parsed chunk plus its 4-byte ASCII tag and the byte count consumed
86/// from the input stream by its frame.
87///
88/// Returned by [`parse_chunk`]. The `consumed` count is exactly the size of
89/// the chunk's on-wire frame: `4 (body_len) + 4 (tag) + body_len + 4 (crc32)`
90/// = `body_len + 12`. This is what
91/// [`ZiPatchReader`](crate::ZiPatchReader) accumulates into its running
92/// byte counter for progress reporting.
93pub(crate) struct ParsedChunk {
94    pub(crate) chunk: Chunk,
95    pub(crate) tag: ChunkTag,
96    pub(crate) consumed: u64,
97}
98
99/// Parse one chunk frame from `r`.
100///
101/// # Wire framing
102///
103/// Each chunk is laid out as:
104///
105/// ```text
106/// [body_len: u32 BE] [tag: 4 bytes] [body: body_len bytes] [crc32: u32 BE]
107/// ```
108///
109/// The CRC32 is computed over `tag ++ body` (not over `body_len`). When
110/// `verify_checksums` is `true` and the stored CRC does not match the computed
111/// one, [`ParseError::ChecksumMismatch`] is returned.
112///
113/// # Errors
114///
115/// - [`ParseError::TruncatedPatch`] — the reader returns EOF while reading
116///   the `body_len` field (i.e. no more chunks are present but `EOF_` was
117///   never seen).
118/// - [`ParseError::OversizedChunk`] — `body_len` exceeds `max_chunk_size`.
119/// - [`ParseError::ChecksumMismatch`] — CRC32 mismatch (only when
120///   `verify_checksums` is `true`).
121/// - [`ParseError::UnknownChunkTag`] — tag is not recognised.
122/// - [`ParseError::Io`] — any other I/O failure reading from `r`.
123pub(crate) fn parse_chunk<R: std::io::Read>(
124    r: &mut R,
125    verify_checksums: bool,
126    max_chunk_size: u32,
127) -> Result<ParsedChunk> {
128    let size = match r.read_u32_be() {
129        Ok(s) => s as usize,
130        Err(ParseError::Io { source: e }) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
131            return Err(ParseError::TruncatedPatch);
132        }
133        Err(e) => return Err(e),
134    };
135    if size > max_chunk_size as usize {
136        return Err(ParseError::OversizedChunk(size));
137    }
138
139    // Tag (4 B) and CRC (4 B) are always present regardless of body shape.
140    let mut tag = [0u8; 4];
141    r.read_exact(&mut tag)?;
142
143    // Peek at the first 5 bytes of the body without committing to either the
144    // generic single-allocation path or the SQPK `A` zero-copy-into-data path.
145    // For SQPK chunks, those 5 bytes are `[inner_size: i32 BE][sub_cmd: u8]`.
146    // For chunks with bodies shorter than 5 bytes (e.g. `EOF_`), we still read
147    // exactly `size` bytes into the prefix array and leave the rest zero.
148    let mut prefix = [0u8; 5];
149    let prefix_len = size.min(5);
150    if prefix_len > 0 {
151        r.read_exact(&mut prefix[..prefix_len])?;
152    }
153
154    // ---- Fast path: SQPK `A` (SqpkAddData) — see `parse_sqpk_add_data_fast`. ----
155    if &tag == b"SQPK" && size >= 5 + SQPK_ADDDATA_HEADER_SIZE && prefix[4] == b'A' {
156        return parse_sqpk_add_data_fast(r, tag, prefix, size, verify_checksums);
157    }
158
159    // ---- Generic path: one allocation for the whole body. ----
160    let mut body_vec = vec![0u8; size];
161    body_vec[..prefix_len].copy_from_slice(&prefix[..prefix_len]);
162    if size > prefix_len {
163        r.read_exact(&mut body_vec[prefix_len..])?;
164    }
165
166    let mut crc_buf = [0u8; 4];
167    r.read_exact(&mut crc_buf)?;
168    let expected_crc = u32::from_be_bytes(crc_buf);
169
170    if verify_checksums {
171        let mut hasher = crc32fast::Hasher::new();
172        hasher.update(&tag);
173        hasher.update(&body_vec);
174        let actual_crc = hasher.finalize();
175        if actual_crc != expected_crc {
176            return Err(ParseError::ChecksumMismatch {
177                tag: ChunkTag::new(tag),
178                expected: expected_crc,
179                actual: actual_crc,
180            });
181        }
182    }
183
184    trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
185
186    // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
187    let consumed = (size as u64) + 12;
188
189    let body = &body_vec[..];
190
191    let chunk = match &tag {
192        b"EOF_" => Chunk::EndOfFile,
193        b"FHDR" => Chunk::FileHeader(fhdr::parse(body)?),
194        b"APLY" => Chunk::ApplyOption(aply::parse(body)?),
195        b"APFS" => Chunk::ApplyFreeSpace(afsp::parse(body)?),
196        b"ADIR" => Chunk::AddDirectory(adir::parse(body)?),
197        b"DELD" => Chunk::DeleteDirectory(ddir::parse(body)?),
198        b"SQPK" => Chunk::Sqpk(sqpk::parse_sqpk(body)?),
199        _ => return Err(ParseError::UnknownChunkTag(ChunkTag::new(tag))),
200    };
201
202    Ok(ParsedChunk {
203        chunk,
204        tag: ChunkTag::new(tag),
205        consumed,
206    })
207}
208
209// Size of the SqpkAddData fixed header that precedes the inline data payload.
210// Mirrors `add_data::SqpkAddData::DATA_SOURCE_OFFSET` (23) without taking a
211// `u64` round-trip; kept private to the framing path.
212const SQPK_ADDDATA_HEADER_SIZE: usize = 23;
213
214/// Fast path for SQPK `A` (`SqpkAddData`) chunks.
215///
216/// `AddData` is the largest chunk type by byte volume — payloads of hundreds of
217/// KB to MB are typical. The generic framing path allocates one `Vec<u8>` of
218/// `size` for the whole body, then `binrw`'s derived parser allocates a second
219/// `Vec<u8>` of exactly `data_bytes` and memcpys the inline payload into it.
220/// That second allocation + memcpy dominates parse time for `AddData`.
221///
222/// This function reads the `AddData` fixed header into a stack array, parses
223/// the seven fields directly, allocates the `data` payload at its exact size,
224/// and `read_exact`s the source bytes straight into it — one allocation, no
225/// intermediate copy of the payload.
226///
227/// On entry: `tag` and the 5-byte `prefix` (SQPK `inner_size` + sub-command
228/// byte) have already been consumed from `r`. The remaining bytes are
229/// `[fixed_header: 23 B][data: data_bytes][crc32: 4 B]`.
230fn parse_sqpk_add_data_fast<R: std::io::Read>(
231    r: &mut R,
232    tag: [u8; 4],
233    prefix: [u8; 5],
234    size: usize,
235    verify_checksums: bool,
236) -> Result<ParsedChunk> {
237    // Validate the SQPK inner_size against the outer chunk size, matching the
238    // check in `sqpk::parse_sqpk` so callers see byte-identical error behaviour.
239    let inner_size = i32::from_be_bytes([prefix[0], prefix[1], prefix[2], prefix[3]]) as usize;
240    if inner_size != size {
241        return Err(ParseError::InvalidField {
242            context: "SQPK inner size mismatch",
243        });
244    }
245
246    let mut header = [0u8; SQPK_ADDDATA_HEADER_SIZE];
247    r.read_exact(&mut header)?;
248
249    // SqpkAddData fixed-header layout (all big-endian):
250    //   [0..3]   pad
251    //   [3..5]   main_id   u16
252    //   [5..7]   sub_id    u16
253    //   [7..11]  file_id   u32
254    //   [11..15] block_offset_raw  u32 (<< 7 = bytes)
255    //   [15..19] data_bytes_raw    u32 (<< 7 = bytes)
256    //   [19..23] block_delete_raw  u32 (<< 7 = bytes)
257    let main_id = u16::from_be_bytes([header[3], header[4]]);
258    let sub_id = u16::from_be_bytes([header[5], header[6]]);
259    let file_id = u32::from_be_bytes([header[7], header[8], header[9], header[10]]);
260    let block_offset_raw = u32::from_be_bytes([header[11], header[12], header[13], header[14]]);
261    let data_bytes_raw = u32::from_be_bytes([header[15], header[16], header[17], header[18]]);
262    let block_delete_raw = u32::from_be_bytes([header[19], header[20], header[21], header[22]]);
263
264    let block_offset = (block_offset_raw as u64) << 7;
265    let data_bytes = (data_bytes_raw as u64) << 7;
266    let block_delete_number = (block_delete_raw as u64) << 7;
267
268    // The declared payload length must fit exactly within the chunk body:
269    //   size = 5 (inner_size + sub_cmd) + 23 (fixed header) + data_bytes
270    let expected_data = size - 5 - SQPK_ADDDATA_HEADER_SIZE;
271    if data_bytes as usize != expected_data {
272        return Err(ParseError::InvalidField {
273            context: "SqpkAddData data_bytes does not match SQPK body length",
274        });
275    }
276
277    let mut data = vec![0u8; data_bytes as usize];
278    r.read_exact(&mut data)?;
279
280    let mut crc_buf = [0u8; 4];
281    r.read_exact(&mut crc_buf)?;
282    let expected_crc = u32::from_be_bytes(crc_buf);
283
284    if verify_checksums {
285        // CRC is over `tag ++ body`. The body is split across three disjoint
286        // buffers — feed each segment to the incremental hasher.
287        let mut hasher = crc32fast::Hasher::new();
288        hasher.update(&tag);
289        hasher.update(&prefix);
290        hasher.update(&header);
291        hasher.update(&data);
292        let actual_crc = hasher.finalize();
293        if actual_crc != expected_crc {
294            return Err(ParseError::ChecksumMismatch {
295                tag: ChunkTag::new(tag),
296                expected: expected_crc,
297                actual: actual_crc,
298            });
299        }
300    }
301
302    trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
303
304    let chunk = Chunk::Sqpk(sqpk::SqpkCommand::AddData(Box::new(sqpk::SqpkAddData {
305        target_file: sqpk::SqpackFileId {
306            main_id,
307            sub_id,
308            file_id,
309        },
310        block_offset,
311        data_bytes,
312        block_delete_number,
313        data,
314    })));
315
316    // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
317    let consumed = (size as u64) + 12;
318
319    Ok(ParsedChunk {
320        chunk,
321        tag: ChunkTag::new(tag),
322        consumed,
323    })
324}
325
326/// One chunk yielded by [`ZiPatchReader::next_chunk`] together with the
327/// stream-position metadata the parser observed while reading it.
328///
329/// Bundling the chunk with its byte-position metadata in one record lets
330/// downstream consumers (the apply driver, the [`crate::index::PlanBuilder`],
331/// the `zipatch dump` CLI) avoid a second round of accessor calls against
332/// the reader to learn where the chunk sat in the stream. Each field
333/// describes one fact the parser knew at the moment the chunk was yielded;
334/// see the per-field docs.
335///
336/// `#[non_exhaustive]`: stream-position metadata may grow (e.g. compressed
337/// payload size, header-only byte count) as new index-builder needs surface.
338#[non_exhaustive]
339#[derive(Debug)]
340pub struct ChunkRecord {
341    /// The parsed chunk itself.
342    pub chunk: Chunk,
343    /// The 4-byte ASCII wire tag of the chunk (`FHDR`, `SQPK`, `EOF_`, …).
344    ///
345    /// Exposed alongside [`Self::chunk`] so consumers can attach the tag
346    /// to a progress event without re-matching on the [`Chunk`] enum.
347    pub tag: ChunkTag,
348    /// Absolute patch-file offset of the chunk's body — the byte right
349    /// after the 8-byte `[body_len: u32 BE, tag: [u8; 4]]` frame header.
350    ///
351    /// Index builders use this to compute absolute patch-file offsets for
352    /// `SqpkAddData::data`, `SqpkFile` block payloads, and
353    /// `SqpkHeader::header_data` without re-walking the stream.
354    pub body_offset: u64,
355    /// Running total of bytes consumed from the patch stream, including
356    /// the 12-byte magic header, the chunk this record describes, and
357    /// every preceding chunk frame.
358    ///
359    /// Equivalent to the [`crate::ChunkEvent::bytes_read`] field at the
360    /// same emission point; used for the `bytes_applied / total_patch_size`
361    /// progress-bar ratio.
362    pub bytes_read: u64,
363}
364
365/// Streaming parser over the [`Chunk`]s in a `ZiPatch` stream.
366///
367/// `ZiPatchReader` wraps any [`std::io::Read`] source and yields one
368/// [`ChunkRecord`] per call to [`Self::next_chunk`]. It validates the
369/// 12-byte file magic on construction, then reads chunks sequentially
370/// until the `EOF_` terminator is encountered or an error occurs.
371///
372/// # Stream contract
373///
374/// - **Magic** — the first 12 bytes must be `\x91ZIPATCH\r\n\x1a\n`. Any
375///   mismatch returns [`ParseError::InvalidMagic`] from [`ZiPatchReader::new`].
376/// - **Framing** — every chunk is a length-prefixed frame:
377///   `[body_len: u32 BE] [tag: 4 B] [body: body_len B] [crc32: u32 BE]`.
378/// - **CRC32** — computed over `tag ++ body`. Verification is enabled by
379///   default; pass `false` to [`ZiPatchReader::with_checksum_verification`]
380///   to disable it.
381/// - **Termination** — the `EOF_` chunk is consumed internally and causes
382///   [`Self::next_chunk`] to return `Ok(None)`. Call
383///   [`ZiPatchReader::is_complete`] after iteration to distinguish a clean
384///   end from a truncated stream.
385/// - **Fused** — once `Ok(None)` (clean EOF) or an `Err(_)` is returned,
386///   subsequent calls to `next_chunk` also return `Ok(None)`.
387///
388/// # Errors
389///
390/// Each call to [`Self::next_chunk`] returns `Err(e)` on parse failure,
391/// then `Ok(None)` on all future calls. Possible errors include:
392/// - [`ParseError::TruncatedPatch`] — stream ended before `EOF_`.
393/// - [`ParseError::OversizedChunk`] — a declared chunk body exceeds the
394///   configured max chunk size (default [`DEFAULT_MAX_CHUNK_SIZE`], 512 MiB).
395/// - [`ParseError::ChecksumMismatch`] — CRC32 verification failed.
396/// - [`ParseError::UnknownChunkTag`] — unrecognised 4-byte tag.
397/// - [`ParseError::Io`] — underlying I/O failure.
398///
399/// # Async usage
400///
401/// `ZiPatchReader` is a synchronous parser over a [`std::io::Read`]
402/// source — see the crate-level "Async usage" section for the rationale.
403/// Async consumers wrap iteration (and any apply call that drives it)
404/// in `tokio::task::spawn_blocking`. To stream a patch that is itself
405/// arriving over an async transport (e.g. `reqwest::Response::bytes_stream`),
406/// either buffer it through a `tempfile::NamedTempFile` and feed the
407/// reopened [`std::fs::File`] to [`ZiPatchReader::new`], or bridge with a
408/// blocking-reader adapter that pulls from a
409/// [`tokio::sync::mpsc`-equivalent](std::sync::mpsc) channel populated
410/// by the async download task.
411///
412/// # Example
413///
414/// Build a minimal in-memory patch (magic + `ADIR` + `EOF_`) and walk it:
415///
416/// ```rust
417/// use std::io::Cursor;
418/// use zipatch_rs::{Chunk, ZiPatchReader};
419///
420/// // Helper: wrap tag + body into a correctly framed chunk with CRC32.
421/// fn make_chunk(tag: &[u8; 4], body: &[u8]) -> Vec<u8> {
422///     let mut crc_input = Vec::new();
423///     crc_input.extend_from_slice(tag);
424///     crc_input.extend_from_slice(body);
425///     let crc = crc32fast::hash(&crc_input);
426///
427///     let mut out = Vec::new();
428///     out.extend_from_slice(&(body.len() as u32).to_be_bytes());
429///     out.extend_from_slice(tag);
430///     out.extend_from_slice(body);
431///     out.extend_from_slice(&crc.to_be_bytes());
432///     out
433/// }
434///
435/// // 12-byte ZiPatch magic.
436/// let magic: [u8; 12] = [0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A];
437///
438/// // ADIR body: u32 BE name_len (7) + b"created".
439/// let mut adir_body = Vec::new();
440/// adir_body.extend_from_slice(&7u32.to_be_bytes());
441/// adir_body.extend_from_slice(b"created");
442///
443/// let mut patch = Vec::new();
444/// patch.extend_from_slice(&magic);
445/// patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
446/// patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
447///
448/// let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
449/// let mut chunks = Vec::new();
450/// while let Some(rec) = reader.next_chunk().unwrap() {
451///     chunks.push(rec.chunk);
452/// }
453///
454/// assert_eq!(chunks.len(), 1);
455/// assert!(matches!(chunks[0], Chunk::AddDirectory(_)));
456/// ```
457#[derive(Debug)]
458pub struct ZiPatchReader<R> {
459    inner: std::io::BufReader<R>,
460    done: bool,
461    verify_checksums: bool,
462    eof_seen: bool,
463    // Running total of bytes consumed from `inner`, including the 12-byte
464    // magic header. Updated after each successful `parse_chunk` call.
465    pub(crate) bytes_read: u64,
466    // Caller-supplied identifier for the patch source. Stamped onto every
467    // `SequentialCheckpoint` the apply driver emits so a later
468    // `resume_apply_patch` call can refuse a checkpoint that was persisted for
469    // a different patch. `None` when the caller has not set one via
470    // `with_patch_name`.
471    patch_name: Option<String>,
472    // Maximum declared body length the parser will accept; chunks declaring a
473    // larger `body_len` are rejected with `ParseError::OversizedChunk` before
474    // any allocation. Defaults to `DEFAULT_MAX_CHUNK_SIZE`.
475    max_chunk_size: u32,
476}
477
478impl<R: std::io::Read> ZiPatchReader<R> {
479    /// Wrap `reader` and validate the leading 12-byte `ZiPatch` magic.
480    ///
481    /// Consumes exactly 12 bytes from `reader`. The magic is the byte sequence
482    /// `0x91 0x5A 0x49 0x50 0x41 0x54 0x43 0x48 0x0D 0x0A 0x1A 0x0A`
483    /// (i.e. `\x91ZIPATCH\r\n\x1a\n`).
484    ///
485    /// The reader is wrapped in a [`std::io::BufReader`] internally, so the
486    /// many small typed reads the chunk parser issues (4-byte size, 4-byte
487    /// tag, 5-byte SQPK prefix, …) coalesce into a small number of syscalls.
488    /// Callers do not need to pre-wrap a raw [`std::fs::File`] or other
489    /// unbuffered source.
490    ///
491    /// CRC32 verification is **enabled** by default. Call
492    /// [`ZiPatchReader::with_checksum_verification`] with `false` before
493    /// iterating to disable it.
494    ///
495    /// # Errors
496    ///
497    /// - [`ParseError::InvalidMagic`] — the first 12 bytes do not match the
498    ///   expected magic.
499    /// - [`ParseError::Io`] — an I/O error occurred while reading the magic.
500    pub fn new(reader: R) -> Result<Self> {
501        let mut reader = std::io::BufReader::new(reader);
502        let magic = reader.read_exact_vec(12)?;
503        if magic.as_slice() != MAGIC {
504            return Err(ParseError::InvalidMagic);
505        }
506        Ok(Self {
507            inner: reader,
508            done: false,
509            verify_checksums: true,
510            eof_seen: false,
511            // The 12-byte magic header has already been consumed.
512            bytes_read: 12,
513            patch_name: None,
514            max_chunk_size: DEFAULT_MAX_CHUNK_SIZE,
515        })
516    }
517
518    /// Set the upper bound on a single chunk's declared body length, in
519    /// bytes.
520    ///
521    /// The parser rejects any chunk whose `body_len` exceeds `bytes` with
522    /// [`ParseError::OversizedChunk`] before allocating space for its body.
523    /// Defaults to [`DEFAULT_MAX_CHUNK_SIZE`] (512 MiB). Raise it for
524    /// patches with unusually large chunks; lower it when applying untrusted
525    /// streams to bound the parser's worst-case allocation.
526    ///
527    /// # Panics
528    ///
529    /// Panics if `bytes` is zero — a zero ceiling rejects every chunk and
530    /// is a programming error.
531    #[must_use]
532    pub fn with_max_chunk_size(mut self, bytes: u32) -> Self {
533        assert!(bytes > 0, "with_max_chunk_size(0) is invalid");
534        self.max_chunk_size = bytes;
535        self
536    }
537
538    /// Returns the configured maximum chunk-body length, in bytes.
539    #[must_use]
540    pub fn max_chunk_size(&self) -> u32 {
541        self.max_chunk_size
542    }
543
544    /// Attach a human-readable identifier to this patch stream.
545    ///
546    /// The identifier is stamped onto every
547    /// [`SequentialCheckpoint`](crate::apply::SequentialCheckpoint) the apply
548    /// driver emits so a future
549    /// [`resume_apply_patch`](crate::ApplyConfig::resume_apply_patch) call can
550    /// detect a checkpoint that was persisted for a different patch and
551    /// refuse to resume from it.
552    ///
553    /// Typical value is the patch filename (e.g. `"H2017.07.11.0000.0000a.patch"`).
554    /// No interpretation is performed — the string is compared verbatim.
555    #[must_use]
556    pub fn with_patch_name(mut self, name: impl Into<String>) -> Self {
557        self.patch_name = Some(name.into());
558        self
559    }
560
561    /// Returns the caller-supplied patch identifier, if any.
562    ///
563    /// Set by [`Self::with_patch_name`]; `None` otherwise.
564    #[must_use]
565    pub fn patch_name(&self) -> Option<&str> {
566        self.patch_name.as_deref()
567    }
568
569    /// Mutable access to the wrapped [`std::io::BufReader`].
570    ///
571    /// Used by [`crate::ApplyConfig::resume_apply_patch`] to seek the
572    /// underlying source for the patch-size measurement at entry. Not
573    /// part of the stable API — seeking the inner reader while a chunk
574    /// parse is in flight would desync `bytes_read` and break later
575    /// iteration.
576    pub(crate) fn inner_mut(&mut self) -> &mut std::io::BufReader<R> {
577        &mut self.inner
578    }
579
580    /// Toggle per-chunk CRC32 verification.
581    ///
582    /// Verification is **enabled** by default after [`ZiPatchReader::new`].
583    /// Pass `false` to skip CRC checks — useful when the source has already
584    /// been verified out-of-band (e.g. a download hash was checked before the
585    /// file was opened), or when processing known-good test data where the
586    /// overhead is unnecessary.
587    #[must_use]
588    pub fn with_checksum_verification(mut self, on: bool) -> Self {
589        self.verify_checksums = on;
590        self
591    }
592
593    /// Returns `true` if iteration reached the `EOF_` terminator cleanly.
594    ///
595    /// A `false` return after `next()` yields `None` indicates the stream was
596    /// truncated — the download or file copy was incomplete. In that case the
597    /// iterator stopped because of a [`ParseError::TruncatedPatch`] error,
598    /// not because the patch finished normally.
599    pub fn is_complete(&self) -> bool {
600        self.eof_seen
601    }
602
603    /// Returns the running total of bytes consumed from the patch stream.
604    ///
605    /// Starts at `12` after [`ZiPatchReader::new`] (the magic header has been
606    /// read) and increases monotonically by the size of each chunk's wire
607    /// frame after each successful [`Self::next_chunk`] call. Includes the
608    /// `EOF_` terminator's frame.
609    ///
610    /// On parse error, the counter is **not** advanced past the failing
611    /// chunk — it reflects the byte offset at the start of that chunk's
612    /// length prefix, not the broken position somewhere inside its frame.
613    ///
614    /// Per-chunk consumers should read the equivalent counter off the
615    /// [`ChunkRecord::bytes_read`] field. This getter is for end-of-stream
616    /// reporting — after [`Self::next_chunk`] returned `Ok(None)`, no
617    /// [`ChunkRecord`] is produced for the consumed `EOF_` frame, so the
618    /// final stream position is only available through this method.
619    #[must_use]
620    pub fn bytes_read(&self) -> u64 {
621        self.bytes_read
622    }
623
624    /// Read the next chunk frame from the underlying stream.
625    ///
626    /// Returns `Ok(Some(record))` for each successfully parsed chunk in
627    /// stream order, `Ok(None)` after the `EOF_` terminator has been
628    /// consumed (the terminator itself is never surfaced as a record), and
629    /// `Err(_)` on a parse failure. After `Ok(None)` or any `Err(_)`,
630    /// subsequent calls return `Ok(None)` — the reader is fused.
631    ///
632    /// # Errors
633    ///
634    /// See [`Self`]'s "Errors" section.
635    pub fn next_chunk(&mut self) -> Result<Option<ChunkRecord>> {
636        if self.done {
637            return Ok(None);
638        }
639        // Snapshot the body offset before parsing so a successful parse can
640        // commit it without re-walking the stream. The chunk body begins after
641        // the 8-byte `[body_len: u32 BE, tag: [u8; 4]]` frame header.
642        let body_offset = self.bytes_read + 8;
643        match parse_chunk(&mut self.inner, self.verify_checksums, self.max_chunk_size) {
644            Ok(ParsedChunk {
645                chunk: Chunk::EndOfFile,
646                consumed,
647                ..
648            }) => {
649                self.bytes_read += consumed;
650                self.done = true;
651                self.eof_seen = true;
652                Ok(None)
653            }
654            Ok(ParsedChunk {
655                chunk,
656                tag,
657                consumed,
658            }) => {
659                self.bytes_read += consumed;
660                Ok(Some(ChunkRecord {
661                    chunk,
662                    tag,
663                    body_offset,
664                    bytes_read: self.bytes_read,
665                }))
666            }
667            Err(e) => {
668                self.done = true;
669                Err(e)
670            }
671        }
672    }
673}
674
675/// Open the file at `path` and validate the `ZiPatch` magic, returning a
676/// ready-to-iterate [`ZiPatchReader`].
677///
678/// The concrete inner reader type is intentionally hidden behind `impl
679/// Read` so the choice of source and any buffering strategy remain
680/// implementation details. Callers that need to name the type should
681/// construct a reader of their choice and pass it to
682/// [`ZiPatchReader::new`].
683///
684/// # Errors
685///
686/// - [`ParseError::Io`] — the file could not be opened.
687/// - [`ParseError::InvalidMagic`] — the file does not start with the
688///   `ZiPatch` magic bytes.
689pub fn open_patch(
690    path: impl AsRef<std::path::Path>,
691) -> crate::ParseResult<ZiPatchReader<impl std::io::Read + 'static>> {
692    let file = std::fs::File::open(path)?;
693    ZiPatchReader::new(file)
694}
695
696#[cfg(test)]
697mod tests {
698    use super::*;
699    use crate::test_utils::make_chunk;
700    use std::io::Cursor;
701
702    // --- parse_chunk error paths ---
703
704    #[test]
705    fn truncated_at_chunk_boundary_yields_truncated_patch() {
706        // Magic + no chunks: parse_chunk must see EOF on the body_len read and
707        // convert it to TruncatedPatch.
708        let mut patch = Vec::new();
709        patch.extend_from_slice(&MAGIC);
710        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
711        match reader.next_chunk() {
712            Err(ParseError::TruncatedPatch) => {}
713            other => panic!("expected TruncatedPatch, got {other:?}"),
714        }
715        assert!(!reader.is_complete(), "stream is not clean-ended");
716    }
717
718    #[test]
719    fn non_eof_io_error_on_body_len_read_propagates_as_io() {
720        // Exercises the `Err(e) => return Err(e)` arm at line 124: an I/O
721        // error that is NOT UnexpectedEof must propagate verbatim.
722        // We trigger this by passing a reader that errors immediately.
723        struct BrokenReader;
724        impl std::io::Read for BrokenReader {
725            fn read(&mut self, _: &mut [u8]) -> std::io::Result<usize> {
726                Err(std::io::Error::new(
727                    std::io::ErrorKind::BrokenPipe,
728                    "simulated broken pipe",
729                ))
730            }
731        }
732        let result = parse_chunk(&mut BrokenReader, false, DEFAULT_MAX_CHUNK_SIZE);
733        match result {
734            Err(ParseError::Io { source: e }) => {
735                assert_eq!(
736                    e.kind(),
737                    std::io::ErrorKind::BrokenPipe,
738                    "non-EOF I/O error must propagate unchanged, got kind {:?}",
739                    e.kind()
740                );
741            }
742            Err(other) => panic!("expected ParseError::Io(BrokenPipe), got {other:?}"),
743            Ok(_) => panic!("expected an error, got Ok"),
744        }
745    }
746
747    #[test]
748    fn truncated_after_one_chunk_yields_truncated_patch() {
749        // Magic + one well-formed ADIR + no more bytes: the second call to
750        // next() must surface TruncatedPatch, not None.
751        let mut adir_body = Vec::new();
752        adir_body.extend_from_slice(&4u32.to_be_bytes());
753        adir_body.extend_from_slice(b"test");
754        let chunk = make_chunk(b"ADIR", &adir_body);
755
756        let mut patch = Vec::new();
757        patch.extend_from_slice(&MAGIC);
758        patch.extend_from_slice(&chunk);
759
760        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
761        let first = reader.next_chunk();
762        assert!(
763            matches!(first, Ok(Some(_))),
764            "first ADIR chunk should parse cleanly: {first:?}"
765        );
766        match reader.next_chunk() {
767            Err(ParseError::TruncatedPatch) => {}
768            other => panic!("expected TruncatedPatch on truncated stream, got {other:?}"),
769        }
770        assert!(
771            !reader.is_complete(),
772            "is_complete must be false after truncation"
773        );
774    }
775
776    #[test]
777    fn checksum_mismatch_returns_checksum_mismatch_error() {
778        // Corrupt the CRC32 field of an otherwise valid ADIR chunk and verify
779        // that parse_chunk returns ChecksumMismatch (not a panic or a wrong error).
780        let mut adir_body = Vec::new();
781        adir_body.extend_from_slice(&4u32.to_be_bytes());
782        adir_body.extend_from_slice(b"test");
783        let mut chunk = make_chunk(b"ADIR", &adir_body);
784        // Flip the last byte of the CRC32 field.
785        let last = chunk.len() - 1;
786        chunk[last] ^= 0xFF;
787
788        let mut cur = Cursor::new(chunk);
789        let result = parse_chunk(&mut cur, true, DEFAULT_MAX_CHUNK_SIZE);
790        assert!(
791            matches!(result, Err(ParseError::ChecksumMismatch { .. })),
792            "corrupted CRC must yield ChecksumMismatch"
793        );
794    }
795
796    #[test]
797    fn unknown_chunk_tag_returns_unknown_chunk_tag_error() {
798        // A tag of all-Z bytes is not recognised; parse_chunk must return
799        // UnknownChunkTag carrying the raw 4-byte tag.
800        let chunk = make_chunk(b"ZZZZ", &[]);
801        let mut cur = Cursor::new(chunk);
802        match parse_chunk(&mut cur, false, DEFAULT_MAX_CHUNK_SIZE) {
803            Err(ParseError::UnknownChunkTag(tag)) => {
804                assert_eq!(
805                    tag,
806                    ChunkTag::new(*b"ZZZZ"),
807                    "tag bytes must be preserved in error"
808                );
809            }
810            Err(other) => panic!("expected UnknownChunkTag, got {other:?}"),
811            Ok(_) => panic!("expected UnknownChunkTag, got Ok"),
812        }
813    }
814
815    #[test]
816    fn default_max_chunk_size_matches_constant() {
817        let mut patch = Vec::new();
818        patch.extend_from_slice(&MAGIC);
819        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
820        let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
821        assert_eq!(reader.max_chunk_size(), DEFAULT_MAX_CHUNK_SIZE);
822    }
823
824    #[test]
825    fn with_max_chunk_size_overrides_default() {
826        let mut patch = Vec::new();
827        patch.extend_from_slice(&MAGIC);
828        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
829        let reader = ZiPatchReader::new(Cursor::new(patch))
830            .unwrap()
831            .with_max_chunk_size(4096);
832        assert_eq!(reader.max_chunk_size(), 4096);
833    }
834
835    #[test]
836    #[should_panic(expected = "with_max_chunk_size(0) is invalid")]
837    fn with_max_chunk_size_zero_panics() {
838        let mut patch = Vec::new();
839        patch.extend_from_slice(&MAGIC);
840        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
841        let _ = ZiPatchReader::new(Cursor::new(patch))
842            .unwrap()
843            .with_max_chunk_size(0);
844    }
845
846    #[test]
847    fn custom_max_chunk_size_rejects_chunks_above_threshold() {
848        // ADIR body of 9 bytes (4 len + 5 ascii) → frame body_len = 9. With
849        // max_chunk_size = 4, the parser must reject it as Oversized.
850        let mut adir_body = Vec::new();
851        adir_body.extend_from_slice(&5u32.to_be_bytes());
852        adir_body.extend_from_slice(b"hello");
853        let chunk = make_chunk(b"ADIR", &adir_body);
854
855        let mut patch = Vec::new();
856        patch.extend_from_slice(&MAGIC);
857        patch.extend_from_slice(&chunk);
858
859        let mut reader = ZiPatchReader::new(Cursor::new(patch))
860            .unwrap()
861            .with_max_chunk_size(4);
862        match reader.next_chunk() {
863            Err(ParseError::OversizedChunk(size)) => assert_eq!(size, 9),
864            other => panic!("expected OversizedChunk(9), got {other:?}"),
865        }
866    }
867
868    #[test]
869    fn oversized_chunk_body_len_returns_oversized_chunk_error() {
870        // body_len == u32::MAX (> 512 MiB) must be rejected before any allocation.
871        let bytes = [0xFFu8, 0xFF, 0xFF, 0xFF];
872        let mut cur = Cursor::new(&bytes[..]);
873        let Err(ParseError::OversizedChunk(size)) =
874            parse_chunk(&mut cur, false, DEFAULT_MAX_CHUNK_SIZE)
875        else {
876            panic!("expected OversizedChunk for u32::MAX body_len")
877        };
878        assert!(
879            size > DEFAULT_MAX_CHUNK_SIZE as usize,
880            "reported size {size} must exceed DEFAULT_MAX_CHUNK_SIZE {DEFAULT_MAX_CHUNK_SIZE}"
881        );
882    }
883
884    // --- ZiPatchReader byte-counter and per-record metadata ---
885
886    #[test]
887    fn bytes_read_starts_at_12_before_first_chunk() {
888        // The magic header is 12 bytes; bytes_read must reflect that immediately
889        // after construction, before any chunk is read.
890        let mut patch = Vec::new();
891        patch.extend_from_slice(&MAGIC);
892        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
893        let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
894        assert_eq!(
895            reader.bytes_read(),
896            12,
897            "bytes_read must be 12 (magic only) before iteration starts"
898        );
899    }
900
901    #[test]
902    fn record_carries_tag_body_offset_and_bytes_read() {
903        // MAGIC + ADIR("a") + EOF_ — verify the per-record metadata matches
904        // the expected frame sizes and offsets.
905        let mut adir_body = Vec::new();
906        adir_body.extend_from_slice(&1u32.to_be_bytes());
907        adir_body.extend_from_slice(b"a");
908        // ADIR frame: 4(size) + 4(tag) + 5(body) + 4(crc) = 17 bytes
909        // EOF_  frame: 4 + 4 + 0 + 4 = 12 bytes
910
911        let mut patch = Vec::new();
912        patch.extend_from_slice(&MAGIC);
913        patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
914        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
915
916        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
917        assert_eq!(reader.bytes_read(), 12, "pre-read: magic only");
918
919        let rec = reader.next_chunk().unwrap().expect("first ADIR record");
920        assert!(
921            matches!(rec.chunk, Chunk::AddDirectory(_)),
922            "first chunk must be ADIR"
923        );
924        assert_eq!(rec.tag, ChunkTag::ADIR);
925        // ADIR body sits after magic(12) + body_len(4) + tag(4) = 20.
926        assert_eq!(rec.body_offset, 20);
927        assert_eq!(rec.bytes_read, 12 + 17, "magic + ADIR frame");
928
929        assert!(
930            reader.next_chunk().unwrap().is_none(),
931            "EOF_ must terminate iteration"
932        );
933        assert_eq!(
934            reader.bytes_read(),
935            12 + 17 + 12,
936            "after EOF_: magic + ADIR + EOF_ frames"
937        );
938        assert!(reader.is_complete(), "is_complete must be true after EOF_");
939    }
940
941    #[test]
942    fn bytes_read_is_monotonically_non_decreasing() {
943        // Stream with two ADIR chunks + EOF_ — verify bytes_read only ever
944        // increases between calls to next_chunk() and that consuming the EOF_
945        // chunk (whose body is empty but whose frame is 12 bytes) still
946        // advances the counter past the last non-EOF position.
947        let make_adir = |name: &[u8]| -> Vec<u8> {
948            let mut body = Vec::new();
949            body.extend_from_slice(&(name.len() as u32).to_be_bytes());
950            body.extend_from_slice(name);
951            make_chunk(b"ADIR", &body)
952        };
953
954        let mut patch = Vec::new();
955        patch.extend_from_slice(&MAGIC);
956        patch.extend_from_slice(&make_adir(b"a"));
957        patch.extend_from_slice(&make_adir(b"bb"));
958        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
959
960        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
961        let mut prev = reader.bytes_read();
962        while let Some(rec) = reader.next_chunk().unwrap() {
963            let current = rec.bytes_read;
964            assert_eq!(
965                current,
966                reader.bytes_read(),
967                "record's bytes_read must equal reader's running counter"
968            );
969            assert!(
970                current > prev,
971                "non-empty ADIR frame must strictly advance bytes_read: \
972                 {prev} -> {current}"
973            );
974            prev = current;
975        }
976        // EOF_ has been consumed: its 12-byte empty-body frame must have
977        // pushed the counter past the previous position.
978        assert!(
979            reader.bytes_read() > prev,
980            "consuming EOF_ must advance bytes_read by its 12-byte frame: \
981             {prev} -> {}",
982            reader.bytes_read()
983        );
984    }
985
986    // --- open_patch constructor ---
987
988    #[test]
989    fn open_patch_opens_minimal_patch_and_reaches_eof() {
990        let mut bytes = Vec::new();
991        bytes.extend_from_slice(&MAGIC);
992        bytes.extend_from_slice(&make_chunk(b"EOF_", &[]));
993
994        let tmp = tempfile::tempdir().unwrap();
995        let file_path = tmp.path().join("test.patch");
996        std::fs::write(&file_path, &bytes).unwrap();
997
998        let mut reader = open_patch(&file_path).expect("open_patch must open valid patch");
999        assert!(
1000            reader.next_chunk().unwrap().is_none(),
1001            "EOF_ must terminate iteration immediately"
1002        );
1003        assert!(reader.is_complete(), "is_complete must be true after EOF_");
1004    }
1005
1006    #[test]
1007    fn open_patch_returns_io_error_when_file_is_missing() {
1008        let tmp = tempfile::tempdir().unwrap();
1009        let file_path = tmp.path().join("nonexistent.patch");
1010        assert!(
1011            matches!(open_patch(&file_path), Err(ParseError::Io { .. })),
1012            "open_patch on a missing file must return ParseError::Io"
1013        );
1014    }
1015
1016    // --- Fused-ness and is_complete ---
1017
1018    #[test]
1019    fn reader_is_fused_after_error() {
1020        // Once next_chunk yields Err(_), all subsequent calls must yield Ok(None).
1021        let mut patch = Vec::new();
1022        patch.extend_from_slice(&MAGIC);
1023        patch.extend_from_slice(&make_chunk(b"ZZZZ", &[])); // unknown tag → error
1024
1025        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
1026        let first = reader.next_chunk();
1027        assert!(
1028            matches!(first, Err(ParseError::UnknownChunkTag(_))),
1029            "first call must yield the error: {first:?}"
1030        );
1031        // All subsequent calls must return Ok(None).
1032        assert!(
1033            matches!(reader.next_chunk(), Ok(None)),
1034            "fused: must return Ok(None) after error"
1035        );
1036        assert!(
1037            matches!(reader.next_chunk(), Ok(None)),
1038            "fused: still Ok(None) on third call"
1039        );
1040    }
1041
1042    #[test]
1043    fn is_complete_false_until_eof_seen() {
1044        let mut adir_body = Vec::new();
1045        adir_body.extend_from_slice(&1u32.to_be_bytes());
1046        adir_body.extend_from_slice(b"x");
1047
1048        let mut patch = Vec::new();
1049        patch.extend_from_slice(&MAGIC);
1050        patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
1051        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
1052
1053        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
1054        assert!(
1055            !reader.is_complete(),
1056            "not complete before reading anything"
1057        );
1058        reader.next_chunk().unwrap().unwrap(); // consume ADIR
1059        assert!(
1060            !reader.is_complete(),
1061            "not complete after ADIR, before EOF_"
1062        );
1063        assert!(reader.next_chunk().unwrap().is_none(), "EOF_ consumed");
1064        assert!(reader.is_complete(), "complete after EOF_ consumed");
1065    }
1066}