Skip to main content

zipatch_rs/chunk/
mod.rs

1//! Wire-format chunk types and the [`ZiPatchReader`] streaming parser.
2//!
3//! This module is the parsing layer: it decodes the raw `ZiPatch` byte
4//! stream into a stream of typed [`Chunk`] values. Each top-level
5//! variant corresponds to one 4-byte ASCII wire tag (`FHDR`, `APLY`,
6//! `SQPK`, …); the per-variant submodules below own the binary layout for
7//! their body. Nothing in this module touches the filesystem — apply-time
8//! effects live in [`crate::apply`].
9//!
10//! The [`ZiPatchReader`] parser validates the 12-byte file magic on
11//! construction, then yields one [`ChunkRecord`](crate::chunk::ChunkRecord) per
12//! [`ZiPatchReader::next_chunk`] call until the internal `EOF_` terminator
13//! is consumed or a parse error surfaces.
14
15pub(crate) mod adir;
16pub(crate) mod afsp;
17pub(crate) mod aply;
18pub(crate) mod ddir;
19pub(crate) mod fhdr;
20pub(crate) mod sqpk;
21pub(crate) mod util;
22
23pub use adir::AddDirectory;
24pub use afsp::ApplyFreeSpace;
25pub use aply::{ApplyOption, ApplyOptionKind};
26pub use ddir::DeleteDirectory;
27pub use fhdr::{FileHeader, FileHeaderV2, FileHeaderV3};
28pub use sqpk::{
29    IndexCommand, SqpackFileId, SqpkAddData, SqpkCommand, SqpkCompressedBlock, SqpkDeleteData,
30    SqpkExpandData, SqpkFile, SqpkFileOperation, SqpkHeader, SqpkHeaderTarget, SqpkIndex,
31    SqpkPatchInfo, SqpkTargetInfo, TargetFileKind, TargetHeaderKind,
32};
33
34use crate::newtypes::ChunkTag;
35use crate::{ParseError, ParseResult as Result};
36use std::io::Read;
37use tracing::trace;
38
39const MAGIC: [u8; 12] = [
40    0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A,
41];
42
43/// Default upper bound (512 MiB) on a single chunk's declared body length.
44///
45/// Used by [`ZiPatchReader::new`] to guard against pathological streams that
46/// would otherwise drive the parser into a huge allocation. Override per
47/// reader via [`ZiPatchReader::with_max_chunk_size`].
48pub const DEFAULT_MAX_CHUNK_SIZE: u32 = 512 * 1024 * 1024;
49
50/// One top-level chunk parsed from a `ZiPatch` stream.
51///
52/// Each variant corresponds to a 4-byte ASCII wire tag.
53///
54/// # Observed frequency
55///
56/// SE's XIVARR+ patch files almost exclusively contain `FHDR`, `APLY`, and
57/// `SQPK` chunks. `ADIR`/`DELD` can theoretically appear and are implemented,
58/// but are rarely emitted in practice. `APFS` has never been observed in modern
59/// patches and is treated as a no-op. `EOF_` is consumed by [`ZiPatchReader`]
60/// and is never yielded to the caller.
61#[derive(Debug)]
62pub enum Chunk {
63    /// `FHDR` — the first chunk in every patch file; carries version and
64    /// per-version patch metadata. See [`FileHeader`] for the versioned body.
65    FileHeader(FileHeader),
66    /// `APLY` — sets or clears a boolean apply-time flag on the
67    /// [`crate::ApplyConfig`] (e.g. "ignore missing files"). See [`ApplyOption`].
68    ApplyOption(ApplyOption),
69    /// `APFS` — free-space book-keeping emitted by old patcher tooling; treated
70    /// as a no-op at apply time. See [`ApplyFreeSpace`].
71    ApplyFreeSpace(ApplyFreeSpace),
72    /// `ADIR` — instructs the patcher to create a directory under the game
73    /// install root. See [`AddDirectory`].
74    AddDirectory(AddDirectory),
75    /// `DELD` — instructs the patcher to remove a directory under the game
76    /// install root. See [`DeleteDirectory`].
77    DeleteDirectory(DeleteDirectory),
78    /// `SQPK` — the workhorse chunk; wraps one of eight sub-commands that
79    /// add, delete, expand, or replace `SqPack` data. See [`SqpkCommand`].
80    Sqpk(SqpkCommand),
81    /// `EOF_` — marks the clean end of the patch stream. [`ZiPatchReader`]
82    /// consumes this chunk internally; it is never yielded to the caller.
83    EndOfFile,
84}
85
86/// One parsed chunk plus its 4-byte ASCII tag and the byte count consumed
87/// from the input stream by its frame.
88///
89/// Returned by [`parse_chunk`]. The `consumed` count is exactly the size of
90/// the chunk's on-wire frame: `4 (body_len) + 4 (tag) + body_len + 4 (crc32)`
91/// = `body_len + 12`. This is what
92/// [`ZiPatchReader`](crate::ZiPatchReader) accumulates into its running
93/// byte counter for progress reporting.
94pub(crate) struct ParsedChunk {
95    pub(crate) chunk: Chunk,
96    pub(crate) tag: ChunkTag,
97    pub(crate) consumed: u64,
98}
99
100/// Parse one chunk frame from `r`.
101///
102/// # Wire framing
103///
104/// Each chunk is laid out as:
105///
106/// ```text
107/// [body_len: u32 BE] [tag: 4 bytes] [body: body_len bytes] [crc32: u32 BE]
108/// ```
109///
110/// The CRC32 is computed over `tag ++ body` (not over `body_len`). When
111/// `verify_checksums` is `true` and the stored CRC does not match the computed
112/// one, [`ParseError::ChecksumMismatch`] is returned.
113///
114/// # Errors
115///
116/// - [`ParseError::TruncatedPatch`] — the reader returns EOF while reading
117///   the `body_len` field (i.e. no more chunks are present but `EOF_` was
118///   never seen).
119/// - [`ParseError::OversizedChunk`] — `body_len` exceeds `max_chunk_size`.
120/// - [`ParseError::ChecksumMismatch`] — CRC32 mismatch (only when
121///   `verify_checksums` is `true`).
122/// - [`ParseError::UnknownChunkTag`] — tag is not recognised.
123/// - [`ParseError::Io`] — any other I/O failure reading from `r`.
124pub(crate) fn parse_chunk<R: std::io::Read>(
125    r: &mut R,
126    verify_checksums: bool,
127    max_chunk_size: u32,
128) -> Result<ParsedChunk> {
129    let mut size_buf = [0u8; 4];
130    let size = match r.read_exact(&mut size_buf) {
131        Ok(()) => u32::from_be_bytes(size_buf) as usize,
132        Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
133            return Err(ParseError::TruncatedPatch);
134        }
135        Err(e) => return Err(e.into()),
136    };
137    if size > max_chunk_size as usize {
138        return Err(ParseError::OversizedChunk(size));
139    }
140
141    // Tag (4 B) and CRC (4 B) are always present regardless of body shape.
142    let mut tag = [0u8; 4];
143    r.read_exact(&mut tag)?;
144
145    // Peek at the first 5 bytes of the body without committing to either the
146    // generic single-allocation path or the SQPK `A` zero-copy-into-data path.
147    // For SQPK chunks, those 5 bytes are `[inner_size: i32 BE][sub_cmd: u8]`.
148    // For chunks with bodies shorter than 5 bytes (e.g. `EOF_`), we still read
149    // exactly `size` bytes into the prefix array and leave the rest zero.
150    let mut prefix = [0u8; 5];
151    let prefix_len = size.min(5);
152    if prefix_len > 0 {
153        r.read_exact(&mut prefix[..prefix_len])?;
154    }
155
156    // ---- Fast path: SQPK `A` (SqpkAddData) — see `parse_sqpk_add_data_fast`. ----
157    if &tag == b"SQPK" && size >= 5 + SQPK_ADDDATA_HEADER_SIZE && prefix[4] == b'A' {
158        return parse_sqpk_add_data_fast(r, tag, prefix, size, verify_checksums);
159    }
160
161    // ---- Generic path: one allocation for the whole body. ----
162    let mut body_vec = vec![0u8; size];
163    body_vec[..prefix_len].copy_from_slice(&prefix[..prefix_len]);
164    if size > prefix_len {
165        r.read_exact(&mut body_vec[prefix_len..])?;
166    }
167
168    let mut crc_buf = [0u8; 4];
169    r.read_exact(&mut crc_buf)?;
170    let expected_crc = u32::from_be_bytes(crc_buf);
171
172    if verify_checksums {
173        let mut hasher = crc32fast::Hasher::new();
174        hasher.update(&tag);
175        hasher.update(&body_vec);
176        let actual_crc = hasher.finalize();
177        if actual_crc != expected_crc {
178            return Err(ParseError::ChecksumMismatch {
179                tag: ChunkTag::new(tag),
180                expected: expected_crc,
181                actual: actual_crc,
182            });
183        }
184    }
185
186    trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
187
188    // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
189    let consumed = (size as u64) + 12;
190
191    let body = &body_vec[..];
192
193    let chunk = match &tag {
194        b"EOF_" => Chunk::EndOfFile,
195        b"FHDR" => Chunk::FileHeader(fhdr::parse(body)?),
196        b"APLY" => Chunk::ApplyOption(aply::parse(body)?),
197        b"APFS" => Chunk::ApplyFreeSpace(afsp::parse(body)?),
198        b"ADIR" => Chunk::AddDirectory(adir::parse(body)?),
199        b"DELD" => Chunk::DeleteDirectory(ddir::parse(body)?),
200        b"SQPK" => Chunk::Sqpk(sqpk::parse_sqpk(body)?),
201        _ => return Err(ParseError::UnknownChunkTag(ChunkTag::new(tag))),
202    };
203
204    Ok(ParsedChunk {
205        chunk,
206        tag: ChunkTag::new(tag),
207        consumed,
208    })
209}
210
211// Size of the SqpkAddData fixed header that precedes the inline data payload.
212// Mirrors `add_data::SqpkAddData::DATA_SOURCE_OFFSET` (23) without taking a
213// `u64` round-trip; kept private to the framing path.
214const SQPK_ADDDATA_HEADER_SIZE: usize = 23;
215
216/// Fast path for SQPK `A` (`SqpkAddData`) chunks.
217///
218/// `AddData` is the largest chunk type by byte volume — payloads of hundreds of
219/// KB to MB are typical. The generic framing path allocates one `Vec<u8>` of
220/// `size` for the whole body, then `binrw`'s derived parser allocates a second
221/// `Vec<u8>` of exactly `data_bytes` and memcpys the inline payload into it.
222/// That second allocation + memcpy dominates parse time for `AddData`.
223///
224/// This function reads the `AddData` fixed header into a stack array, parses
225/// the seven fields directly, allocates the `data` payload at its exact size,
226/// and `read_exact`s the source bytes straight into it — one allocation, no
227/// intermediate copy of the payload.
228///
229/// On entry: `tag` and the 5-byte `prefix` (SQPK `inner_size` + sub-command
230/// byte) have already been consumed from `r`. The remaining bytes are
231/// `[fixed_header: 23 B][data: data_bytes][crc32: 4 B]`.
232fn parse_sqpk_add_data_fast<R: std::io::Read>(
233    r: &mut R,
234    tag: [u8; 4],
235    prefix: [u8; 5],
236    size: usize,
237    verify_checksums: bool,
238) -> Result<ParsedChunk> {
239    // Validate the SQPK inner_size against the outer chunk size, matching the
240    // check in `sqpk::parse_sqpk` so callers see byte-identical error behaviour.
241    let inner_size = i32::from_be_bytes([prefix[0], prefix[1], prefix[2], prefix[3]]) as usize;
242    if inner_size != size {
243        return Err(ParseError::InvalidField {
244            context: "SQPK inner size mismatch",
245        });
246    }
247
248    let mut header = [0u8; SQPK_ADDDATA_HEADER_SIZE];
249    r.read_exact(&mut header)?;
250
251    // SqpkAddData fixed-header layout (all big-endian):
252    //   [0..3]   pad
253    //   [3..5]   main_id   u16
254    //   [5..7]   sub_id    u16
255    //   [7..11]  file_id   u32
256    //   [11..15] block_offset_raw  u32 (<< 7 = bytes)
257    //   [15..19] data_bytes_raw    u32 (<< 7 = bytes)
258    //   [19..23] block_delete_raw  u32 (<< 7 = bytes)
259    let main_id = u16::from_be_bytes([header[3], header[4]]);
260    let sub_id = u16::from_be_bytes([header[5], header[6]]);
261    let file_id = u32::from_be_bytes([header[7], header[8], header[9], header[10]]);
262    let block_offset_raw = u32::from_be_bytes([header[11], header[12], header[13], header[14]]);
263    let data_bytes_raw = u32::from_be_bytes([header[15], header[16], header[17], header[18]]);
264    let block_delete_raw = u32::from_be_bytes([header[19], header[20], header[21], header[22]]);
265
266    let block_offset = (block_offset_raw as u64) << 7;
267    let data_bytes = (data_bytes_raw as u64) << 7;
268    let block_delete_number = (block_delete_raw as u64) << 7;
269
270    // The declared payload length must fit exactly within the chunk body:
271    //   size = 5 (inner_size + sub_cmd) + 23 (fixed header) + data_bytes
272    let expected_data = size - 5 - SQPK_ADDDATA_HEADER_SIZE;
273    if data_bytes as usize != expected_data {
274        return Err(ParseError::InvalidField {
275            context: "SqpkAddData data_bytes does not match SQPK body length",
276        });
277    }
278
279    let mut data = vec![0u8; data_bytes as usize];
280    r.read_exact(&mut data)?;
281
282    let mut crc_buf = [0u8; 4];
283    r.read_exact(&mut crc_buf)?;
284    let expected_crc = u32::from_be_bytes(crc_buf);
285
286    if verify_checksums {
287        // CRC is over `tag ++ body`. The body is split across three disjoint
288        // buffers — feed each segment to the incremental hasher.
289        let mut hasher = crc32fast::Hasher::new();
290        hasher.update(&tag);
291        hasher.update(&prefix);
292        hasher.update(&header);
293        hasher.update(&data);
294        let actual_crc = hasher.finalize();
295        if actual_crc != expected_crc {
296            return Err(ParseError::ChecksumMismatch {
297                tag: ChunkTag::new(tag),
298                expected: expected_crc,
299                actual: actual_crc,
300            });
301        }
302    }
303
304    trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
305
306    let chunk = Chunk::Sqpk(sqpk::SqpkCommand::AddData(Box::new(sqpk::SqpkAddData {
307        target_file: sqpk::SqpackFileId {
308            main_id,
309            sub_id,
310            file_id,
311        },
312        block_offset,
313        data_bytes,
314        block_delete_number,
315        data,
316    })));
317
318    // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
319    let consumed = (size as u64) + 12;
320
321    Ok(ParsedChunk {
322        chunk,
323        tag: ChunkTag::new(tag),
324        consumed,
325    })
326}
327
328/// One chunk yielded by [`ZiPatchReader::next_chunk`] together with the
329/// stream-position metadata the parser observed while reading it.
330///
331/// Bundling the chunk with its byte-position metadata in one record lets
332/// downstream consumers (the apply driver, the [`crate::index::PlanBuilder`],
333/// the `zipatch dump` CLI) avoid a second round of accessor calls against
334/// the reader to learn where the chunk sat in the stream. Each field
335/// describes one fact the parser knew at the moment the chunk was yielded;
336/// see the per-field docs.
337///
338/// `#[non_exhaustive]`: stream-position metadata may grow (e.g. compressed
339/// payload size, header-only byte count) as new index-builder needs surface.
340#[non_exhaustive]
341#[derive(Debug)]
342pub struct ChunkRecord {
343    /// The parsed chunk itself.
344    pub chunk: Chunk,
345    /// The 4-byte ASCII wire tag of the chunk (`FHDR`, `SQPK`, `EOF_`, …).
346    ///
347    /// Exposed alongside [`Self::chunk`] so consumers can attach the tag
348    /// to a progress event without re-matching on the [`Chunk`] enum.
349    pub tag: ChunkTag,
350    /// Absolute patch-file offset of the chunk's body — the byte right
351    /// after the 8-byte `[body_len: u32 BE, tag: [u8; 4]]` frame header.
352    ///
353    /// Index builders use this to compute absolute patch-file offsets for
354    /// `SqpkAddData::data`, `SqpkFile` block payloads, and
355    /// `SqpkHeader::header_data` without re-walking the stream.
356    pub body_offset: u64,
357    /// Running total of bytes consumed from the patch stream, including
358    /// the 12-byte magic header, the chunk this record describes, and
359    /// every preceding chunk frame.
360    ///
361    /// Equivalent to the [`crate::ChunkEvent::bytes_read`] field at the
362    /// same emission point; used for the `bytes_applied / total_patch_size`
363    /// progress-bar ratio.
364    pub bytes_read: u64,
365}
366
367/// Streaming parser over the [`Chunk`]s in a `ZiPatch` stream.
368///
369/// `ZiPatchReader` wraps any [`std::io::Read`] source and yields one
370/// [`ChunkRecord`] per call to [`Self::next_chunk`]. It validates the
371/// 12-byte file magic on construction, then reads chunks sequentially
372/// until the `EOF_` terminator is encountered or an error occurs.
373///
374/// # Stream contract
375///
376/// - **Magic** — the first 12 bytes must be `\x91ZIPATCH\r\n\x1a\n`. Any
377///   mismatch returns [`ParseError::InvalidMagic`] from [`ZiPatchReader::new`].
378/// - **Framing** — every chunk is a length-prefixed frame:
379///   `[body_len: u32 BE] [tag: 4 B] [body: body_len B] [crc32: u32 BE]`.
380/// - **CRC32** — computed over `tag ++ body`. Verification is enabled by
381///   default; pass `false` to [`ZiPatchReader::with_checksum_verification`]
382///   to disable it.
383/// - **Termination** — the `EOF_` chunk is consumed internally and causes
384///   [`Self::next_chunk`] to return `Ok(None)`. Call
385///   [`ZiPatchReader::is_complete`] after iteration to distinguish a clean
386///   end from a truncated stream.
387/// - **Fused** — once `Ok(None)` (clean EOF) or an `Err(_)` is returned,
388///   subsequent calls to `next_chunk` also return `Ok(None)`.
389///
390/// # Errors
391///
392/// Each call to [`Self::next_chunk`] returns `Err(e)` on parse failure,
393/// then `Ok(None)` on all future calls. Possible errors include:
394/// - [`ParseError::TruncatedPatch`] — stream ended before `EOF_`.
395/// - [`ParseError::OversizedChunk`] — a declared chunk body exceeds the
396///   configured max chunk size (default [`DEFAULT_MAX_CHUNK_SIZE`], 512 MiB).
397/// - [`ParseError::ChecksumMismatch`] — CRC32 verification failed.
398/// - [`ParseError::UnknownChunkTag`] — unrecognised 4-byte tag.
399/// - [`ParseError::Io`] — underlying I/O failure.
400///
401/// # Async usage
402///
403/// `ZiPatchReader` is a synchronous parser over a [`std::io::Read`]
404/// source — see the crate-level "Async usage" section for the rationale.
405/// Async consumers wrap iteration (and any apply call that drives it)
406/// in `tokio::task::spawn_blocking`. To stream a patch that is itself
407/// arriving over an async transport (e.g. `reqwest::Response::bytes_stream`),
408/// either buffer it through a `tempfile::NamedTempFile` and feed the
409/// reopened [`std::fs::File`] to [`ZiPatchReader::new`], or bridge with a
410/// blocking-reader adapter that pulls from a
411/// [`tokio::sync::mpsc`-equivalent](std::sync::mpsc) channel populated
412/// by the async download task.
413///
414/// # Example
415///
416/// Build a minimal in-memory patch (magic + `ADIR` + `EOF_`) and walk it:
417///
418/// ```rust
419/// use std::io::Cursor;
420/// use zipatch_rs::{Chunk, ZiPatchReader};
421///
422/// // Helper: wrap tag + body into a correctly framed chunk with CRC32.
423/// fn make_chunk(tag: &[u8; 4], body: &[u8]) -> Vec<u8> {
424///     let mut crc_input = Vec::new();
425///     crc_input.extend_from_slice(tag);
426///     crc_input.extend_from_slice(body);
427///     let crc = crc32fast::hash(&crc_input);
428///
429///     let mut out = Vec::new();
430///     out.extend_from_slice(&(body.len() as u32).to_be_bytes());
431///     out.extend_from_slice(tag);
432///     out.extend_from_slice(body);
433///     out.extend_from_slice(&crc.to_be_bytes());
434///     out
435/// }
436///
437/// // 12-byte ZiPatch magic.
438/// let magic: [u8; 12] = [0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A];
439///
440/// // ADIR body: u32 BE name_len (7) + b"created".
441/// let mut adir_body = Vec::new();
442/// adir_body.extend_from_slice(&7u32.to_be_bytes());
443/// adir_body.extend_from_slice(b"created");
444///
445/// let mut patch = Vec::new();
446/// patch.extend_from_slice(&magic);
447/// patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
448/// patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
449///
450/// let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
451/// let mut chunks = Vec::new();
452/// while let Some(rec) = reader.next_chunk().unwrap() {
453///     chunks.push(rec.chunk);
454/// }
455///
456/// assert_eq!(chunks.len(), 1);
457/// assert!(matches!(chunks[0], Chunk::AddDirectory(_)));
458/// ```
459#[derive(Debug)]
460pub struct ZiPatchReader<R> {
461    inner: std::io::BufReader<R>,
462    done: bool,
463    verify_checksums: bool,
464    eof_seen: bool,
465    // Running total of bytes consumed from `inner`, including the 12-byte
466    // magic header. Updated after each successful `parse_chunk` call.
467    pub(crate) bytes_read: u64,
468    // Caller-supplied identifier for the patch source. Stamped onto every
469    // `SequentialCheckpoint` the apply driver emits so a later
470    // `resume_apply_patch` call can refuse a checkpoint that was persisted for
471    // a different patch. `None` when the caller has not set one via
472    // `with_patch_name`.
473    patch_name: Option<String>,
474    // Maximum declared body length the parser will accept; chunks declaring a
475    // larger `body_len` are rejected with `ParseError::OversizedChunk` before
476    // any allocation. Defaults to `DEFAULT_MAX_CHUNK_SIZE`.
477    max_chunk_size: u32,
478}
479
480impl<R: std::io::Read> ZiPatchReader<R> {
481    /// Wrap `reader` and validate the leading 12-byte `ZiPatch` magic.
482    ///
483    /// Consumes exactly 12 bytes from `reader`. The magic is the byte sequence
484    /// `0x91 0x5A 0x49 0x50 0x41 0x54 0x43 0x48 0x0D 0x0A 0x1A 0x0A`
485    /// (i.e. `\x91ZIPATCH\r\n\x1a\n`).
486    ///
487    /// The reader is wrapped in a [`std::io::BufReader`] internally, so the
488    /// many small typed reads the chunk parser issues (4-byte size, 4-byte
489    /// tag, 5-byte SQPK prefix, …) coalesce into a small number of syscalls.
490    /// Callers do not need to pre-wrap a raw [`std::fs::File`] or other
491    /// unbuffered source.
492    ///
493    /// CRC32 verification is **enabled** by default. Call
494    /// [`ZiPatchReader::with_checksum_verification`] with `false` before
495    /// iterating to disable it.
496    ///
497    /// # Errors
498    ///
499    /// - [`ParseError::InvalidMagic`] — the first 12 bytes do not match the
500    ///   expected magic.
501    /// - [`ParseError::Io`] — an I/O error occurred while reading the magic.
502    pub fn new(reader: R) -> Result<Self> {
503        let mut reader = std::io::BufReader::new(reader);
504        let mut magic = [0u8; 12];
505        reader.read_exact(&mut magic)?;
506        if magic != MAGIC {
507            return Err(ParseError::InvalidMagic);
508        }
509        Ok(Self {
510            inner: reader,
511            done: false,
512            verify_checksums: true,
513            eof_seen: false,
514            // The 12-byte magic header has already been consumed.
515            bytes_read: 12,
516            patch_name: None,
517            max_chunk_size: DEFAULT_MAX_CHUNK_SIZE,
518        })
519    }
520
521    /// Set the upper bound on a single chunk's declared body length, in
522    /// bytes.
523    ///
524    /// The parser rejects any chunk whose `body_len` exceeds `bytes` with
525    /// [`ParseError::OversizedChunk`] before allocating space for its body.
526    /// Defaults to [`DEFAULT_MAX_CHUNK_SIZE`] (512 MiB). Raise it for
527    /// patches with unusually large chunks; lower it when applying untrusted
528    /// streams to bound the parser's worst-case allocation.
529    ///
530    /// # Panics
531    ///
532    /// Panics if `bytes` is zero — a zero ceiling rejects every chunk and
533    /// is a programming error.
534    #[must_use]
535    pub fn with_max_chunk_size(mut self, bytes: u32) -> Self {
536        assert!(bytes > 0, "with_max_chunk_size(0) is invalid");
537        self.max_chunk_size = bytes;
538        self
539    }
540
541    /// Returns the configured maximum chunk-body length, in bytes.
542    #[must_use]
543    pub fn max_chunk_size(&self) -> u32 {
544        self.max_chunk_size
545    }
546
547    /// Attach a human-readable identifier to this patch stream.
548    ///
549    /// The identifier is stamped onto every
550    /// [`SequentialCheckpoint`](crate::apply::SequentialCheckpoint) the apply
551    /// driver emits so a future
552    /// [`resume_apply_patch`](crate::ApplyConfig::resume_apply_patch) call can
553    /// detect a checkpoint that was persisted for a different patch and
554    /// refuse to resume from it.
555    ///
556    /// Typical value is the patch filename (e.g. `"H2017.07.11.0000.0000a.patch"`).
557    /// No interpretation is performed — the string is compared verbatim.
558    #[must_use]
559    pub fn with_patch_name(mut self, name: impl Into<String>) -> Self {
560        self.patch_name = Some(name.into());
561        self
562    }
563
564    /// Returns the caller-supplied patch identifier, if any.
565    ///
566    /// Set by [`Self::with_patch_name`]; `None` otherwise.
567    #[must_use]
568    pub fn patch_name(&self) -> Option<&str> {
569        self.patch_name.as_deref()
570    }
571
572    /// Mutable access to the wrapped [`std::io::BufReader`].
573    ///
574    /// Used by [`crate::ApplyConfig::resume_apply_patch`] to seek the
575    /// underlying source for the patch-size measurement at entry. Not
576    /// part of the stable API — seeking the inner reader while a chunk
577    /// parse is in flight would desync `bytes_read` and break later
578    /// iteration.
579    pub(crate) fn inner_mut(&mut self) -> &mut std::io::BufReader<R> {
580        &mut self.inner
581    }
582
583    /// Toggle per-chunk CRC32 verification.
584    ///
585    /// Verification is **enabled** by default after [`ZiPatchReader::new`].
586    /// Pass `false` to skip CRC checks — useful when the source has already
587    /// been verified out-of-band (e.g. a download hash was checked before the
588    /// file was opened), or when processing known-good test data where the
589    /// overhead is unnecessary.
590    #[must_use]
591    pub fn with_checksum_verification(mut self, on: bool) -> Self {
592        self.verify_checksums = on;
593        self
594    }
595
596    /// Returns `true` if iteration reached the `EOF_` terminator cleanly.
597    ///
598    /// A `false` return after `next()` yields `None` indicates the stream was
599    /// truncated — the download or file copy was incomplete. In that case the
600    /// iterator stopped because of a [`ParseError::TruncatedPatch`] error,
601    /// not because the patch finished normally.
602    pub fn is_complete(&self) -> bool {
603        self.eof_seen
604    }
605
606    /// Returns the running total of bytes consumed from the patch stream.
607    ///
608    /// Starts at `12` after [`ZiPatchReader::new`] (the magic header has been
609    /// read) and increases monotonically by the size of each chunk's wire
610    /// frame after each successful [`Self::next_chunk`] call. Includes the
611    /// `EOF_` terminator's frame.
612    ///
613    /// On parse error, the counter is **not** advanced past the failing
614    /// chunk — it reflects the byte offset at the start of that chunk's
615    /// length prefix, not the broken position somewhere inside its frame.
616    ///
617    /// Per-chunk consumers should read the equivalent counter off the
618    /// [`ChunkRecord::bytes_read`] field. This getter is for end-of-stream
619    /// reporting — after [`Self::next_chunk`] returned `Ok(None)`, no
620    /// [`ChunkRecord`] is produced for the consumed `EOF_` frame, so the
621    /// final stream position is only available through this method.
622    #[must_use]
623    pub fn bytes_read(&self) -> u64 {
624        self.bytes_read
625    }
626
627    /// Read the next chunk frame from the underlying stream.
628    ///
629    /// Returns `Ok(Some(record))` for each successfully parsed chunk in
630    /// stream order, `Ok(None)` after the `EOF_` terminator has been
631    /// consumed (the terminator itself is never surfaced as a record), and
632    /// `Err(_)` on a parse failure. After `Ok(None)` or any `Err(_)`,
633    /// subsequent calls return `Ok(None)` — the reader is fused.
634    ///
635    /// # Errors
636    ///
637    /// See [`Self`]'s "Errors" section.
638    pub fn next_chunk(&mut self) -> Result<Option<ChunkRecord>> {
639        if self.done {
640            return Ok(None);
641        }
642        // Snapshot the body offset before parsing so a successful parse can
643        // commit it without re-walking the stream. The chunk body begins after
644        // the 8-byte `[body_len: u32 BE, tag: [u8; 4]]` frame header.
645        let body_offset = self.bytes_read + 8;
646        match parse_chunk(&mut self.inner, self.verify_checksums, self.max_chunk_size) {
647            Ok(ParsedChunk {
648                chunk: Chunk::EndOfFile,
649                consumed,
650                ..
651            }) => {
652                self.bytes_read += consumed;
653                self.done = true;
654                self.eof_seen = true;
655                Ok(None)
656            }
657            Ok(ParsedChunk {
658                chunk,
659                tag,
660                consumed,
661            }) => {
662                self.bytes_read += consumed;
663                Ok(Some(ChunkRecord {
664                    chunk,
665                    tag,
666                    body_offset,
667                    bytes_read: self.bytes_read,
668                }))
669            }
670            Err(e) => {
671                self.done = true;
672                Err(e)
673            }
674        }
675    }
676}
677
678/// Open the file at `path` and validate the `ZiPatch` magic, returning a
679/// ready-to-iterate [`ZiPatchReader`].
680///
681/// The concrete inner reader type is intentionally hidden behind `impl
682/// Read` so the choice of source and any buffering strategy remain
683/// implementation details. Callers that need to name the type should
684/// construct a reader of their choice and pass it to
685/// [`ZiPatchReader::new`].
686///
687/// # Errors
688///
689/// - [`ParseError::Io`] — the file could not be opened.
690/// - [`ParseError::InvalidMagic`] — the file does not start with the
691///   `ZiPatch` magic bytes.
692pub fn open_patch(
693    path: impl AsRef<std::path::Path>,
694) -> crate::ParseResult<ZiPatchReader<impl std::io::Read + 'static>> {
695    let file = std::fs::File::open(path)?;
696    ZiPatchReader::new(file)
697}
698
699#[cfg(test)]
700mod tests {
701    use super::*;
702    use crate::test_utils::make_chunk;
703    use std::io::Cursor;
704
705    // --- parse_chunk error paths ---
706
707    #[test]
708    fn truncated_at_chunk_boundary_yields_truncated_patch() {
709        // Magic + no chunks: parse_chunk must see EOF on the body_len read and
710        // convert it to TruncatedPatch.
711        let mut patch = Vec::new();
712        patch.extend_from_slice(&MAGIC);
713        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
714        match reader.next_chunk() {
715            Err(ParseError::TruncatedPatch) => {}
716            other => panic!("expected TruncatedPatch, got {other:?}"),
717        }
718        assert!(!reader.is_complete(), "stream is not clean-ended");
719    }
720
721    #[test]
722    fn non_eof_io_error_on_body_len_read_propagates_as_io() {
723        // Exercises the `Err(e) => return Err(e)` arm at line 124: an I/O
724        // error that is NOT UnexpectedEof must propagate verbatim.
725        // We trigger this by passing a reader that errors immediately.
726        struct BrokenReader;
727        impl std::io::Read for BrokenReader {
728            fn read(&mut self, _: &mut [u8]) -> std::io::Result<usize> {
729                Err(std::io::Error::new(
730                    std::io::ErrorKind::BrokenPipe,
731                    "simulated broken pipe",
732                ))
733            }
734        }
735        let result = parse_chunk(&mut BrokenReader, false, DEFAULT_MAX_CHUNK_SIZE);
736        match result {
737            Err(ParseError::Io { source: e }) => {
738                assert_eq!(
739                    e.kind(),
740                    std::io::ErrorKind::BrokenPipe,
741                    "non-EOF I/O error must propagate unchanged, got kind {:?}",
742                    e.kind()
743                );
744            }
745            Err(other) => panic!("expected ParseError::Io(BrokenPipe), got {other:?}"),
746            Ok(_) => panic!("expected an error, got Ok"),
747        }
748    }
749
750    #[test]
751    fn truncated_after_one_chunk_yields_truncated_patch() {
752        // Magic + one well-formed ADIR + no more bytes: the second call to
753        // next() must surface TruncatedPatch, not None.
754        let mut adir_body = Vec::new();
755        adir_body.extend_from_slice(&4u32.to_be_bytes());
756        adir_body.extend_from_slice(b"test");
757        let chunk = make_chunk(b"ADIR", &adir_body);
758
759        let mut patch = Vec::new();
760        patch.extend_from_slice(&MAGIC);
761        patch.extend_from_slice(&chunk);
762
763        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
764        let first = reader.next_chunk();
765        assert!(
766            matches!(first, Ok(Some(_))),
767            "first ADIR chunk should parse cleanly: {first:?}"
768        );
769        match reader.next_chunk() {
770            Err(ParseError::TruncatedPatch) => {}
771            other => panic!("expected TruncatedPatch on truncated stream, got {other:?}"),
772        }
773        assert!(
774            !reader.is_complete(),
775            "is_complete must be false after truncation"
776        );
777    }
778
779    #[test]
780    fn checksum_mismatch_returns_checksum_mismatch_error() {
781        // Corrupt the CRC32 field of an otherwise valid ADIR chunk and verify
782        // that parse_chunk returns ChecksumMismatch (not a panic or a wrong error).
783        let mut adir_body = Vec::new();
784        adir_body.extend_from_slice(&4u32.to_be_bytes());
785        adir_body.extend_from_slice(b"test");
786        let mut chunk = make_chunk(b"ADIR", &adir_body);
787        // Flip the last byte of the CRC32 field.
788        let last = chunk.len() - 1;
789        chunk[last] ^= 0xFF;
790
791        let mut cur = Cursor::new(chunk);
792        let result = parse_chunk(&mut cur, true, DEFAULT_MAX_CHUNK_SIZE);
793        assert!(
794            matches!(result, Err(ParseError::ChecksumMismatch { .. })),
795            "corrupted CRC must yield ChecksumMismatch"
796        );
797    }
798
799    #[test]
800    fn unknown_chunk_tag_returns_unknown_chunk_tag_error() {
801        // A tag of all-Z bytes is not recognised; parse_chunk must return
802        // UnknownChunkTag carrying the raw 4-byte tag.
803        let chunk = make_chunk(b"ZZZZ", &[]);
804        let mut cur = Cursor::new(chunk);
805        match parse_chunk(&mut cur, false, DEFAULT_MAX_CHUNK_SIZE) {
806            Err(ParseError::UnknownChunkTag(tag)) => {
807                assert_eq!(
808                    tag,
809                    ChunkTag::new(*b"ZZZZ"),
810                    "tag bytes must be preserved in error"
811                );
812            }
813            Err(other) => panic!("expected UnknownChunkTag, got {other:?}"),
814            Ok(_) => panic!("expected UnknownChunkTag, got Ok"),
815        }
816    }
817
818    #[test]
819    fn default_max_chunk_size_matches_constant() {
820        let mut patch = Vec::new();
821        patch.extend_from_slice(&MAGIC);
822        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
823        let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
824        assert_eq!(reader.max_chunk_size(), DEFAULT_MAX_CHUNK_SIZE);
825    }
826
827    #[test]
828    fn with_max_chunk_size_overrides_default() {
829        let mut patch = Vec::new();
830        patch.extend_from_slice(&MAGIC);
831        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
832        let reader = ZiPatchReader::new(Cursor::new(patch))
833            .unwrap()
834            .with_max_chunk_size(4096);
835        assert_eq!(reader.max_chunk_size(), 4096);
836    }
837
838    #[test]
839    #[should_panic(expected = "with_max_chunk_size(0) is invalid")]
840    fn with_max_chunk_size_zero_panics() {
841        let mut patch = Vec::new();
842        patch.extend_from_slice(&MAGIC);
843        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
844        let _ = ZiPatchReader::new(Cursor::new(patch))
845            .unwrap()
846            .with_max_chunk_size(0);
847    }
848
849    #[test]
850    fn custom_max_chunk_size_rejects_chunks_above_threshold() {
851        // ADIR body of 9 bytes (4 len + 5 ascii) → frame body_len = 9. With
852        // max_chunk_size = 4, the parser must reject it as Oversized.
853        let mut adir_body = Vec::new();
854        adir_body.extend_from_slice(&5u32.to_be_bytes());
855        adir_body.extend_from_slice(b"hello");
856        let chunk = make_chunk(b"ADIR", &adir_body);
857
858        let mut patch = Vec::new();
859        patch.extend_from_slice(&MAGIC);
860        patch.extend_from_slice(&chunk);
861
862        let mut reader = ZiPatchReader::new(Cursor::new(patch))
863            .unwrap()
864            .with_max_chunk_size(4);
865        match reader.next_chunk() {
866            Err(ParseError::OversizedChunk(size)) => assert_eq!(size, 9),
867            other => panic!("expected OversizedChunk(9), got {other:?}"),
868        }
869    }
870
871    #[test]
872    fn oversized_chunk_body_len_returns_oversized_chunk_error() {
873        // body_len == u32::MAX (> 512 MiB) must be rejected before any allocation.
874        let bytes = [0xFFu8, 0xFF, 0xFF, 0xFF];
875        let mut cur = Cursor::new(&bytes[..]);
876        let Err(ParseError::OversizedChunk(size)) =
877            parse_chunk(&mut cur, false, DEFAULT_MAX_CHUNK_SIZE)
878        else {
879            panic!("expected OversizedChunk for u32::MAX body_len")
880        };
881        assert!(
882            size > DEFAULT_MAX_CHUNK_SIZE as usize,
883            "reported size {size} must exceed DEFAULT_MAX_CHUNK_SIZE {DEFAULT_MAX_CHUNK_SIZE}"
884        );
885    }
886
887    // --- ZiPatchReader byte-counter and per-record metadata ---
888
889    #[test]
890    fn bytes_read_starts_at_12_before_first_chunk() {
891        // The magic header is 12 bytes; bytes_read must reflect that immediately
892        // after construction, before any chunk is read.
893        let mut patch = Vec::new();
894        patch.extend_from_slice(&MAGIC);
895        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
896        let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
897        assert_eq!(
898            reader.bytes_read(),
899            12,
900            "bytes_read must be 12 (magic only) before iteration starts"
901        );
902    }
903
904    #[test]
905    fn record_carries_tag_body_offset_and_bytes_read() {
906        // MAGIC + ADIR("a") + EOF_ — verify the per-record metadata matches
907        // the expected frame sizes and offsets.
908        let mut adir_body = Vec::new();
909        adir_body.extend_from_slice(&1u32.to_be_bytes());
910        adir_body.extend_from_slice(b"a");
911        // ADIR frame: 4(size) + 4(tag) + 5(body) + 4(crc) = 17 bytes
912        // EOF_  frame: 4 + 4 + 0 + 4 = 12 bytes
913
914        let mut patch = Vec::new();
915        patch.extend_from_slice(&MAGIC);
916        patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
917        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
918
919        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
920        assert_eq!(reader.bytes_read(), 12, "pre-read: magic only");
921
922        let rec = reader.next_chunk().unwrap().expect("first ADIR record");
923        assert!(
924            matches!(rec.chunk, Chunk::AddDirectory(_)),
925            "first chunk must be ADIR"
926        );
927        assert_eq!(rec.tag, ChunkTag::ADIR);
928        // ADIR body sits after magic(12) + body_len(4) + tag(4) = 20.
929        assert_eq!(rec.body_offset, 20);
930        assert_eq!(rec.bytes_read, 12 + 17, "magic + ADIR frame");
931
932        assert!(
933            reader.next_chunk().unwrap().is_none(),
934            "EOF_ must terminate iteration"
935        );
936        assert_eq!(
937            reader.bytes_read(),
938            12 + 17 + 12,
939            "after EOF_: magic + ADIR + EOF_ frames"
940        );
941        assert!(reader.is_complete(), "is_complete must be true after EOF_");
942    }
943
944    #[test]
945    fn bytes_read_is_monotonically_non_decreasing() {
946        // Stream with two ADIR chunks + EOF_ — verify bytes_read only ever
947        // increases between calls to next_chunk() and that consuming the EOF_
948        // chunk (whose body is empty but whose frame is 12 bytes) still
949        // advances the counter past the last non-EOF position.
950        let make_adir = |name: &[u8]| -> Vec<u8> {
951            let mut body = Vec::new();
952            body.extend_from_slice(&(name.len() as u32).to_be_bytes());
953            body.extend_from_slice(name);
954            make_chunk(b"ADIR", &body)
955        };
956
957        let mut patch = Vec::new();
958        patch.extend_from_slice(&MAGIC);
959        patch.extend_from_slice(&make_adir(b"a"));
960        patch.extend_from_slice(&make_adir(b"bb"));
961        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
962
963        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
964        let mut prev = reader.bytes_read();
965        while let Some(rec) = reader.next_chunk().unwrap() {
966            let current = rec.bytes_read;
967            assert_eq!(
968                current,
969                reader.bytes_read(),
970                "record's bytes_read must equal reader's running counter"
971            );
972            assert!(
973                current > prev,
974                "non-empty ADIR frame must strictly advance bytes_read: \
975                 {prev} -> {current}"
976            );
977            prev = current;
978        }
979        // EOF_ has been consumed: its 12-byte empty-body frame must have
980        // pushed the counter past the previous position.
981        assert!(
982            reader.bytes_read() > prev,
983            "consuming EOF_ must advance bytes_read by its 12-byte frame: \
984             {prev} -> {}",
985            reader.bytes_read()
986        );
987    }
988
989    // --- open_patch constructor ---
990
991    #[test]
992    fn open_patch_opens_minimal_patch_and_reaches_eof() {
993        let mut bytes = Vec::new();
994        bytes.extend_from_slice(&MAGIC);
995        bytes.extend_from_slice(&make_chunk(b"EOF_", &[]));
996
997        let tmp = tempfile::tempdir().unwrap();
998        let file_path = tmp.path().join("test.patch");
999        std::fs::write(&file_path, &bytes).unwrap();
1000
1001        let mut reader = open_patch(&file_path).expect("open_patch must open valid patch");
1002        assert!(
1003            reader.next_chunk().unwrap().is_none(),
1004            "EOF_ must terminate iteration immediately"
1005        );
1006        assert!(reader.is_complete(), "is_complete must be true after EOF_");
1007    }
1008
1009    #[test]
1010    fn open_patch_returns_io_error_when_file_is_missing() {
1011        let tmp = tempfile::tempdir().unwrap();
1012        let file_path = tmp.path().join("nonexistent.patch");
1013        assert!(
1014            matches!(open_patch(&file_path), Err(ParseError::Io { .. })),
1015            "open_patch on a missing file must return ParseError::Io"
1016        );
1017    }
1018
1019    // --- Fused-ness and is_complete ---
1020
1021    #[test]
1022    fn reader_is_fused_after_error() {
1023        // Once next_chunk yields Err(_), all subsequent calls must yield Ok(None).
1024        let mut patch = Vec::new();
1025        patch.extend_from_slice(&MAGIC);
1026        patch.extend_from_slice(&make_chunk(b"ZZZZ", &[])); // unknown tag → error
1027
1028        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
1029        let first = reader.next_chunk();
1030        assert!(
1031            matches!(first, Err(ParseError::UnknownChunkTag(_))),
1032            "first call must yield the error: {first:?}"
1033        );
1034        // All subsequent calls must return Ok(None).
1035        assert!(
1036            matches!(reader.next_chunk(), Ok(None)),
1037            "fused: must return Ok(None) after error"
1038        );
1039        assert!(
1040            matches!(reader.next_chunk(), Ok(None)),
1041            "fused: still Ok(None) on third call"
1042        );
1043    }
1044
1045    #[test]
1046    fn is_complete_false_until_eof_seen() {
1047        let mut adir_body = Vec::new();
1048        adir_body.extend_from_slice(&1u32.to_be_bytes());
1049        adir_body.extend_from_slice(b"x");
1050
1051        let mut patch = Vec::new();
1052        patch.extend_from_slice(&MAGIC);
1053        patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
1054        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
1055
1056        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
1057        assert!(
1058            !reader.is_complete(),
1059            "not complete before reading anything"
1060        );
1061        reader.next_chunk().unwrap().unwrap(); // consume ADIR
1062        assert!(
1063            !reader.is_complete(),
1064            "not complete after ADIR, before EOF_"
1065        );
1066        assert!(reader.next_chunk().unwrap().is_none(), "EOF_ consumed");
1067        assert!(reader.is_complete(), "complete after EOF_ consumed");
1068    }
1069}