Skip to main content

zipatch_rs/chunk/
mod.rs

1//! Wire-format chunk types and the [`ZiPatchReader`] iterator.
2//!
3//! This module is the parsing layer: it decodes the raw `ZiPatch` byte
4//! stream into a stream of typed [`Chunk`] values. Each top-level
5//! variant corresponds to one 4-byte ASCII wire tag (`FHDR`, `APLY`,
6//! `SQPK`, …); the per-variant submodules below own the binary layout for
7//! their body. Nothing in this module touches the filesystem — apply-time
8//! effects live in [`crate::apply`].
9//!
10//! The [`ZiPatchReader`] iterator validates the 12-byte file magic on
11//! construction, then yields one [`Chunk`] per [`Iterator::next`] call
12//! until the internal `EOF_` terminator is consumed or a parse error
13//! surfaces.
14
15pub(crate) mod adir;
16pub(crate) mod afsp;
17pub(crate) mod aply;
18pub(crate) mod ddir;
19pub(crate) mod fhdr;
20pub(crate) mod sqpk;
21pub(crate) mod util;
22
23pub use adir::AddDirectory;
24pub use afsp::ApplyFreeSpace;
25pub use aply::{ApplyOption, ApplyOptionKind};
26pub use ddir::DeleteDirectory;
27pub use fhdr::{FileHeader, FileHeaderV2, FileHeaderV3};
28pub use sqpk::{SqpackFile, SqpkCommand};
29// Re-export SqpkCommand sub-types so callers can match on them
30pub use sqpk::{
31    IndexCommand, SqpkAddData, SqpkCompressedBlock, SqpkDeleteData, SqpkExpandData, SqpkFile,
32    SqpkFileOperation, SqpkHeader, SqpkHeaderTarget, SqpkIndex, SqpkPatchInfo, SqpkTargetInfo,
33    TargetFileKind, TargetHeaderKind,
34};
35
36use crate::reader::ReadExt;
37use crate::{Result, ZiPatchError};
38use tracing::trace;
39
40const MAGIC: [u8; 12] = [
41    0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A,
42];
43
44const MAX_CHUNK_SIZE: usize = 512 * 1024 * 1024;
45
46/// One top-level chunk parsed from a `ZiPatch` stream.
47///
48/// Each variant corresponds to a 4-byte ASCII wire tag. The tag dispatch table
49/// mirrors the C# reference in
50/// `lib/FFXIVQuickLauncher/.../Patching/ZiPatch/Chunk/ZiPatchChunk.cs`.
51///
52/// # Observed frequency
53///
54/// SE's XIVARR+ patch files almost exclusively contain `FHDR`, `APLY`, and
55/// `SQPK` chunks. `ADIR`/`DELD` can theoretically appear and are implemented,
56/// but are rarely emitted in practice. `APFS` has never been observed in modern
57/// patches (the reference implementation treats it as a no-op). `EOF_` is
58/// consumed by [`ZiPatchReader`] and is never yielded to the caller.
59///
60/// # Exhaustiveness
61///
62/// The enum is `#[non_exhaustive]`. Match arms should include a wildcard to
63/// remain forward-compatible as new chunk types are added.
64#[non_exhaustive]
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub enum Chunk {
67    /// `FHDR` — the first chunk in every patch file; carries version and
68    /// per-version patch metadata. See [`FileHeader`] for the versioned body.
69    FileHeader(FileHeader),
70    /// `APLY` — sets or clears a boolean apply-time flag on the
71    /// [`crate::ApplyContext`] (e.g. "ignore missing files"). See [`ApplyOption`].
72    ApplyOption(ApplyOption),
73    /// `APFS` — free-space book-keeping emitted by old patcher tooling; treated
74    /// as a no-op at apply time. See [`ApplyFreeSpace`].
75    ApplyFreeSpace(ApplyFreeSpace),
76    /// `ADIR` — instructs the patcher to create a directory under the game
77    /// install root. See [`AddDirectory`].
78    AddDirectory(AddDirectory),
79    /// `DELD` — instructs the patcher to remove a directory under the game
80    /// install root. See [`DeleteDirectory`].
81    DeleteDirectory(DeleteDirectory),
82    /// `SQPK` — the workhorse chunk; wraps one of eight sub-commands that
83    /// add, delete, expand, or replace `SqPack` data. See [`SqpkCommand`].
84    Sqpk(SqpkCommand),
85    /// `EOF_` — marks the clean end of the patch stream. [`ZiPatchReader`]
86    /// consumes this chunk internally; it is never yielded to the caller.
87    EndOfFile,
88}
89
90/// One parsed chunk plus its 4-byte ASCII tag and the byte count consumed
91/// from the input stream by its frame.
92///
93/// Returned by [`parse_chunk`]. The `consumed` count is exactly the size of
94/// the chunk's on-wire frame: `4 (body_len) + 4 (tag) + body_len + 4 (crc32)`
95/// = `body_len + 12`. This is what
96/// [`ZiPatchReader`](crate::ZiPatchReader) accumulates into its running
97/// byte counter for progress reporting.
98pub(crate) struct ParsedChunk {
99    pub(crate) chunk: Chunk,
100    pub(crate) tag: [u8; 4],
101    pub(crate) consumed: u64,
102}
103
104/// Parse one chunk frame from `r`.
105///
106/// # Wire framing
107///
108/// Each chunk is laid out as:
109///
110/// ```text
111/// [body_len: u32 BE] [tag: 4 bytes] [body: body_len bytes] [crc32: u32 BE]
112/// ```
113///
114/// The CRC32 is computed over `tag ++ body` (not over `body_len`), matching
115/// the C# `ChecksumBinaryReader` in the `XIVLauncher` reference. When
116/// `verify_checksums` is `true` and the stored CRC does not match the computed
117/// one, [`ZiPatchError::ChecksumMismatch`] is returned.
118///
119/// # Errors
120///
121/// - [`ZiPatchError::TruncatedPatch`] — the reader returns EOF while reading
122///   the `body_len` field (i.e. no more chunks are present but `EOF_` was
123///   never seen).
124/// - [`ZiPatchError::OversizedChunk`] — `body_len` exceeds 512 MiB.
125/// - [`ZiPatchError::ChecksumMismatch`] — CRC32 mismatch (only when
126///   `verify_checksums` is `true`).
127/// - [`ZiPatchError::UnknownChunkTag`] — tag is not recognised.
128/// - [`ZiPatchError::Io`] — any other I/O failure reading from `r`.
129pub(crate) fn parse_chunk<R: std::io::Read>(
130    r: &mut R,
131    verify_checksums: bool,
132) -> Result<ParsedChunk> {
133    let size = match r.read_u32_be() {
134        Ok(s) => s as usize,
135        Err(ZiPatchError::Io(e)) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
136            return Err(ZiPatchError::TruncatedPatch);
137        }
138        Err(e) => return Err(e),
139    };
140    if size > MAX_CHUNK_SIZE {
141        return Err(ZiPatchError::OversizedChunk(size));
142    }
143
144    // Tag (4 B) and CRC (4 B) are always present regardless of body shape.
145    let mut tag = [0u8; 4];
146    r.read_exact(&mut tag)?;
147
148    // Peek at the first 5 bytes of the body without committing to either the
149    // generic single-allocation path or the SQPK `A` zero-copy-into-data path.
150    // For SQPK chunks, those 5 bytes are `[inner_size: i32 BE][sub_cmd: u8]`.
151    // For chunks with bodies shorter than 5 bytes (e.g. `EOF_`), we still read
152    // exactly `size` bytes into the prefix array and leave the rest zero.
153    let mut prefix = [0u8; 5];
154    let prefix_len = size.min(5);
155    if prefix_len > 0 {
156        r.read_exact(&mut prefix[..prefix_len])?;
157    }
158
159    // ---- Fast path: SQPK `A` (SqpkAddData) — see `parse_sqpk_add_data_fast`. ----
160    if &tag == b"SQPK" && size >= 5 + SQPK_ADDDATA_HEADER_SIZE && prefix[4] == b'A' {
161        return parse_sqpk_add_data_fast(r, tag, prefix, size, verify_checksums);
162    }
163
164    // ---- Generic path: one allocation for the whole body. ----
165    let mut body_vec = vec![0u8; size];
166    body_vec[..prefix_len].copy_from_slice(&prefix[..prefix_len]);
167    if size > prefix_len {
168        r.read_exact(&mut body_vec[prefix_len..])?;
169    }
170
171    let mut crc_buf = [0u8; 4];
172    r.read_exact(&mut crc_buf)?;
173    let expected_crc = u32::from_be_bytes(crc_buf);
174
175    if verify_checksums {
176        let mut hasher = crc32fast::Hasher::new();
177        hasher.update(&tag);
178        hasher.update(&body_vec);
179        let actual_crc = hasher.finalize();
180        if actual_crc != expected_crc {
181            return Err(ZiPatchError::ChecksumMismatch {
182                tag,
183                expected: expected_crc,
184                actual: actual_crc,
185            });
186        }
187    }
188
189    trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
190
191    // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
192    let consumed = (size as u64) + 12;
193
194    let body = &body_vec[..];
195
196    let chunk = match &tag {
197        b"EOF_" => Chunk::EndOfFile,
198        b"FHDR" => Chunk::FileHeader(fhdr::parse(body)?),
199        b"APLY" => Chunk::ApplyOption(aply::parse(body)?),
200        b"APFS" => Chunk::ApplyFreeSpace(afsp::parse(body)?),
201        b"ADIR" => Chunk::AddDirectory(adir::parse(body)?),
202        b"DELD" => Chunk::DeleteDirectory(ddir::parse(body)?),
203        b"SQPK" => Chunk::Sqpk(sqpk::parse_sqpk(body)?),
204        _ => return Err(ZiPatchError::UnknownChunkTag(tag)),
205    };
206
207    Ok(ParsedChunk {
208        chunk,
209        tag,
210        consumed,
211    })
212}
213
214// Size of the SqpkAddData fixed header that precedes the inline data payload.
215// Mirrors `add_data::SqpkAddData::DATA_SOURCE_OFFSET` (23) without taking a
216// `u64` round-trip; kept private to the framing path.
217const SQPK_ADDDATA_HEADER_SIZE: usize = 23;
218
219/// Fast path for SQPK `A` (`SqpkAddData`) chunks.
220///
221/// `AddData` is the largest chunk type by byte volume — payloads of hundreds of
222/// KB to MB are typical. The generic framing path allocates one `Vec<u8>` of
223/// `size` for the whole body, then `binrw`'s derived parser allocates a second
224/// `Vec<u8>` of exactly `data_bytes` and memcpys the inline payload into it.
225/// That second allocation + memcpy dominates parse time for `AddData`.
226///
227/// This function reads the `AddData` fixed header into a stack array, parses
228/// the seven fields directly, allocates the `data` payload at its exact size,
229/// and `read_exact`s the source bytes straight into it — one allocation, no
230/// intermediate copy of the payload.
231///
232/// On entry: `tag` and the 5-byte `prefix` (SQPK `inner_size` + sub-command
233/// byte) have already been consumed from `r`. The remaining bytes are
234/// `[fixed_header: 23 B][data: data_bytes][crc32: 4 B]`.
235fn parse_sqpk_add_data_fast<R: std::io::Read>(
236    r: &mut R,
237    tag: [u8; 4],
238    prefix: [u8; 5],
239    size: usize,
240    verify_checksums: bool,
241) -> Result<ParsedChunk> {
242    // Validate the SQPK inner_size against the outer chunk size, matching the
243    // check in `sqpk::parse_sqpk` so callers see byte-identical error behaviour.
244    let inner_size = i32::from_be_bytes([prefix[0], prefix[1], prefix[2], prefix[3]]) as usize;
245    if inner_size != size {
246        return Err(ZiPatchError::InvalidField {
247            context: "SQPK inner size mismatch",
248        });
249    }
250
251    let mut header = [0u8; SQPK_ADDDATA_HEADER_SIZE];
252    r.read_exact(&mut header)?;
253
254    // SqpkAddData fixed-header layout (all big-endian):
255    //   [0..3]   pad
256    //   [3..5]   main_id   u16
257    //   [5..7]   sub_id    u16
258    //   [7..11]  file_id   u32
259    //   [11..15] block_offset_raw  u32 (<< 7 = bytes)
260    //   [15..19] data_bytes_raw    u32 (<< 7 = bytes)
261    //   [19..23] block_delete_raw  u32 (<< 7 = bytes)
262    let main_id = u16::from_be_bytes([header[3], header[4]]);
263    let sub_id = u16::from_be_bytes([header[5], header[6]]);
264    let file_id = u32::from_be_bytes([header[7], header[8], header[9], header[10]]);
265    let block_offset_raw = u32::from_be_bytes([header[11], header[12], header[13], header[14]]);
266    let data_bytes_raw = u32::from_be_bytes([header[15], header[16], header[17], header[18]]);
267    let block_delete_raw = u32::from_be_bytes([header[19], header[20], header[21], header[22]]);
268
269    let block_offset = (block_offset_raw as u64) << 7;
270    let data_bytes = (data_bytes_raw as u64) << 7;
271    let block_delete_number = (block_delete_raw as u64) << 7;
272
273    // The declared payload length must fit exactly within the chunk body:
274    //   size = 5 (inner_size + sub_cmd) + 23 (fixed header) + data_bytes
275    let expected_data = size - 5 - SQPK_ADDDATA_HEADER_SIZE;
276    if data_bytes as usize != expected_data {
277        return Err(ZiPatchError::InvalidField {
278            context: "SqpkAddData data_bytes does not match SQPK body length",
279        });
280    }
281
282    let mut data = vec![0u8; data_bytes as usize];
283    r.read_exact(&mut data)?;
284
285    let mut crc_buf = [0u8; 4];
286    r.read_exact(&mut crc_buf)?;
287    let expected_crc = u32::from_be_bytes(crc_buf);
288
289    if verify_checksums {
290        // CRC is over `tag ++ body`. The body is split across three disjoint
291        // buffers — feed each segment to the incremental hasher.
292        let mut hasher = crc32fast::Hasher::new();
293        hasher.update(&tag);
294        hasher.update(&prefix);
295        hasher.update(&header);
296        hasher.update(&data);
297        let actual_crc = hasher.finalize();
298        if actual_crc != expected_crc {
299            return Err(ZiPatchError::ChecksumMismatch {
300                tag,
301                expected: expected_crc,
302                actual: actual_crc,
303            });
304        }
305    }
306
307    trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
308
309    let chunk = Chunk::Sqpk(sqpk::SqpkCommand::AddData(Box::new(sqpk::SqpkAddData {
310        target_file: sqpk::SqpackFile {
311            main_id,
312            sub_id,
313            file_id,
314        },
315        block_offset,
316        data_bytes,
317        block_delete_number,
318        data,
319    })));
320
321    // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
322    let consumed = (size as u64) + 12;
323
324    Ok(ParsedChunk {
325        chunk,
326        tag,
327        consumed,
328    })
329}
330
331/// Iterator over the [`Chunk`]s in a `ZiPatch` stream.
332///
333/// `ZiPatchReader` wraps any [`std::io::Read`] source and yields one
334/// [`Chunk`] per call to [`Iterator::next`]. It validates the 12-byte file
335/// magic on construction, then reads chunks sequentially until the `EOF_`
336/// terminator is encountered or an error occurs.
337///
338/// # Stream contract
339///
340/// - **Magic** — the first 12 bytes must be `\x91ZIPATCH\r\n\x1a\n`. Any
341///   mismatch returns [`ZiPatchError::InvalidMagic`] from [`ZiPatchReader::new`].
342/// - **Framing** — every chunk is a length-prefixed frame:
343///   `[body_len: u32 BE] [tag: 4 B] [body: body_len B] [crc32: u32 BE]`.
344/// - **CRC32** — computed over `tag ++ body`. Verification is enabled by
345///   default; use [`ZiPatchReader::skip_checksum_verification`] to disable it.
346/// - **Termination** — the `EOF_` chunk is consumed internally and causes
347///   the iterator to return `None`. Call [`ZiPatchReader::is_complete`] after
348///   iteration to distinguish a clean end from a truncated stream.
349/// - **Fused** — once `None` is returned (either from `EOF_` or an error),
350///   subsequent calls to `next` also return `None`. The iterator implements
351///   [`std::iter::FusedIterator`].
352///
353/// # Errors
354///
355/// Each call to [`Iterator::next`] returns `Some(Err(e))` on parse failure,
356/// then `None` on all future calls. Possible errors include:
357/// - [`ZiPatchError::TruncatedPatch`] — stream ended before `EOF_`.
358/// - [`ZiPatchError::OversizedChunk`] — a declared chunk body exceeds 512 MiB.
359/// - [`ZiPatchError::ChecksumMismatch`] — CRC32 verification failed.
360/// - [`ZiPatchError::UnknownChunkTag`] — unrecognised 4-byte tag.
361/// - [`ZiPatchError::Io`] — underlying I/O failure.
362///
363/// # Example
364///
365/// Build a minimal in-memory patch (magic + `ADIR` + `EOF_`) and iterate it:
366///
367/// ```rust
368/// use std::io::Cursor;
369/// use zipatch_rs::{Chunk, ZiPatchReader};
370///
371/// // Helper: wrap tag + body into a correctly framed chunk with CRC32.
372/// fn make_chunk(tag: &[u8; 4], body: &[u8]) -> Vec<u8> {
373///     let mut crc_input = Vec::new();
374///     crc_input.extend_from_slice(tag);
375///     crc_input.extend_from_slice(body);
376///     let crc = crc32fast::hash(&crc_input);
377///
378///     let mut out = Vec::new();
379///     out.extend_from_slice(&(body.len() as u32).to_be_bytes());
380///     out.extend_from_slice(tag);
381///     out.extend_from_slice(body);
382///     out.extend_from_slice(&crc.to_be_bytes());
383///     out
384/// }
385///
386/// // 12-byte ZiPatch magic.
387/// let magic: [u8; 12] = [0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A];
388///
389/// // ADIR body: u32 BE name_len (7) + b"created".
390/// let mut adir_body = Vec::new();
391/// adir_body.extend_from_slice(&7u32.to_be_bytes());
392/// adir_body.extend_from_slice(b"created");
393///
394/// let mut patch = Vec::new();
395/// patch.extend_from_slice(&magic);
396/// patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
397/// patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
398///
399/// let chunks: Vec<_> = ZiPatchReader::new(Cursor::new(patch))
400///     .unwrap()
401///     .collect::<Result<_, _>>()
402///     .unwrap();
403///
404/// assert_eq!(chunks.len(), 1);
405/// assert!(matches!(chunks[0], Chunk::AddDirectory(_)));
406/// ```
407#[derive(Debug)]
408pub struct ZiPatchReader<R> {
409    inner: std::io::BufReader<R>,
410    done: bool,
411    verify_checksums: bool,
412    eof_seen: bool,
413    // Running total of bytes consumed from `inner`, including the 12-byte
414    // magic header. Updated after each successful `parse_chunk` call.
415    // Exposed via `bytes_read()` so the apply driver can fire monotonic
416    // progress events without instrumenting the underlying `Read` source.
417    bytes_read: u64,
418    // 4-byte ASCII tag of the most recently yielded chunk. `None` before the
419    // first successful `next()` and after iteration completes. Used by
420    // `apply_to` to attach the tag to per-chunk progress events without
421    // re-matching on the `Chunk` enum.
422    last_tag: Option<[u8; 4]>,
423    // Absolute patch-file offset of the body of the most recently yielded
424    // chunk (i.e. the byte right after the 8-byte `[len: u32 BE, tag: [u8;4]]`
425    // frame header). `None` until the first chunk is successfully yielded; the
426    // value is only set on the success arms of `next()` so a parse failure
427    // never exposes a stale offset.
428    current_body_offset: Option<u64>,
429    // Caller-supplied identifier for the patch source. Stamped onto every
430    // `SequentialCheckpoint` the apply driver emits so a later
431    // `resume_apply_to` call can refuse a checkpoint that was persisted for
432    // a different patch. `None` when the caller has not set one via
433    // `with_patch_name`.
434    patch_name: Option<String>,
435}
436
437impl<R: std::io::Read> ZiPatchReader<R> {
438    /// Wrap `reader` and validate the leading 12-byte `ZiPatch` magic.
439    ///
440    /// Consumes exactly 12 bytes from `reader`. The magic is the byte sequence
441    /// `0x91 0x5A 0x49 0x50 0x41 0x54 0x43 0x48 0x0D 0x0A 0x1A 0x0A`
442    /// (i.e. `\x91ZIPATCH\r\n\x1a\n`).
443    ///
444    /// The reader is wrapped in a [`std::io::BufReader`] internally, so the
445    /// many small typed reads the chunk parser issues (4-byte size, 4-byte
446    /// tag, 5-byte SQPK prefix, …) coalesce into a small number of syscalls.
447    /// Callers do not need to pre-wrap a raw [`std::fs::File`] or other
448    /// unbuffered source.
449    ///
450    /// CRC32 verification is **enabled** by default. Call
451    /// [`ZiPatchReader::skip_checksum_verification`] before iterating to
452    /// disable it.
453    ///
454    /// # Errors
455    ///
456    /// - [`ZiPatchError::InvalidMagic`] — the first 12 bytes do not match the
457    ///   expected magic.
458    /// - [`ZiPatchError::Io`] — an I/O error occurred while reading the magic.
459    pub fn new(reader: R) -> Result<Self> {
460        let mut reader = std::io::BufReader::new(reader);
461        let magic = reader.read_exact_vec(12)?;
462        if magic.as_slice() != MAGIC {
463            return Err(ZiPatchError::InvalidMagic);
464        }
465        Ok(Self {
466            inner: reader,
467            done: false,
468            verify_checksums: true,
469            eof_seen: false,
470            // The 12-byte magic header has already been consumed.
471            bytes_read: 12,
472            last_tag: None,
473            current_body_offset: None,
474            patch_name: None,
475        })
476    }
477
478    /// Attach a human-readable identifier to this patch stream.
479    ///
480    /// The identifier is stamped onto every
481    /// [`SequentialCheckpoint`](crate::SequentialCheckpoint) the apply
482    /// driver emits so a future
483    /// [`resume_apply_to`](crate::ZiPatchReader::resume_apply_to) call can
484    /// detect a checkpoint that was persisted for a different patch and
485    /// refuse to resume from it.
486    ///
487    /// Typical value is the patch filename (e.g. `"H2017.07.11.0000.0000a.patch"`).
488    /// No interpretation is performed — the string is compared verbatim.
489    #[must_use]
490    pub fn with_patch_name(mut self, name: impl Into<String>) -> Self {
491        self.patch_name = Some(name.into());
492        self
493    }
494
495    /// Returns the caller-supplied patch identifier, if any.
496    ///
497    /// Set by [`Self::with_patch_name`]; `None` otherwise.
498    #[must_use]
499    pub fn patch_name(&self) -> Option<&str> {
500        self.patch_name.as_deref()
501    }
502
503    /// Mutable access to the wrapped [`std::io::BufReader`].
504    ///
505    /// Used by [`crate::ZiPatchReader::resume_apply_to`] to seek the
506    /// underlying source for the patch-size measurement at entry. Not
507    /// part of the stable API — seeking the inner reader while a chunk
508    /// parse is in flight would desync `bytes_read` and break later
509    /// iteration.
510    pub(crate) fn inner_mut(&mut self) -> &mut std::io::BufReader<R> {
511        &mut self.inner
512    }
513
514    /// Enable per-chunk CRC32 verification (the default).
515    ///
516    /// This is the default state after [`ZiPatchReader::new`]. Calling this
517    /// method after construction is only necessary if
518    /// [`ZiPatchReader::skip_checksum_verification`] was previously called.
519    #[must_use]
520    pub fn verify_checksums(mut self) -> Self {
521        self.verify_checksums = true;
522        self
523    }
524
525    /// Disable per-chunk CRC32 verification.
526    ///
527    /// Useful when the source has already been verified out-of-band (e.g. a
528    /// download hash was checked before the file was opened), or when
529    /// processing known-good test data where the overhead is unnecessary.
530    #[must_use]
531    pub fn skip_checksum_verification(mut self) -> Self {
532        self.verify_checksums = false;
533        self
534    }
535
536    /// Returns `true` if iteration reached the `EOF_` terminator cleanly.
537    ///
538    /// A `false` return after `next()` yields `None` indicates the stream was
539    /// truncated — the download or file copy was incomplete. In that case the
540    /// iterator stopped because of a [`ZiPatchError::TruncatedPatch`] error,
541    /// not because the patch finished normally.
542    pub fn is_complete(&self) -> bool {
543        self.eof_seen
544    }
545
546    /// Returns the running total of bytes consumed from the patch stream.
547    ///
548    /// Starts at `12` after [`ZiPatchReader::new`] (the magic header has been
549    /// read) and increases monotonically by the size of each chunk's wire
550    /// frame after each successful [`Iterator::next`] call. Includes the
551    /// `EOF_` terminator's frame.
552    ///
553    /// On parse error, the counter is **not** advanced past the failing
554    /// chunk — it reflects the byte offset at the start of that chunk's
555    /// length prefix, not the broken position somewhere inside its frame.
556    /// Use this offset together with the surfaced error to point a user at
557    /// where the patch became unreadable.
558    ///
559    /// This is the same counter that the
560    /// [`apply_to`](crate::ZiPatchReader::apply_to) driver attaches to
561    /// [`ChunkEvent::bytes_read`](crate::ChunkEvent::bytes_read) when firing
562    /// progress events. Useful for the `bytes_applied / total_patch_size`
563    /// ratio in a progress bar.
564    #[must_use]
565    pub fn bytes_read(&self) -> u64 {
566        self.bytes_read
567    }
568
569    /// Returns the 4-byte ASCII tag of the most recently yielded chunk.
570    ///
571    /// `None` before the first successful [`Iterator::next`] call and after
572    /// the `EOF_` terminator has been consumed (or an error has been
573    /// surfaced). Used by [`apply_to`](crate::ZiPatchReader::apply_to) to
574    /// populate [`ChunkEvent::kind`](crate::ChunkEvent::kind).
575    #[must_use]
576    pub fn last_tag(&self) -> Option<[u8; 4]> {
577        self.last_tag
578    }
579
580    /// Returns the absolute patch-file offset of the body of the most recently
581    /// yielded chunk.
582    ///
583    /// The chunk body begins immediately after the 8-byte
584    /// `[body_len: u32 BE, tag: [u8; 4]]` frame header, so the value points at
585    /// the first byte of the body — for `SQPK` chunks that is the start of
586    /// `[inner_size: i32 BE, sub_cmd: u8, …]`; for the other chunk types it
587    /// is the start of the variant-specific body.
588    ///
589    /// Index builders use this to compute absolute patch-file offsets for
590    /// `SqpkAddData::data`, `SqpkFile` block payloads, and `SqpkHeader::header_data`
591    /// without re-walking the stream.
592    ///
593    /// `None` before the first chunk is successfully yielded. A parse failure
594    /// leaves the previously-set value untouched (the offset returned by this
595    /// method always points at a chunk that was successfully parsed).
596    #[must_use]
597    pub fn current_chunk_body_offset(&self) -> Option<u64> {
598        self.current_body_offset
599    }
600}
601
602impl ZiPatchReader<std::io::BufReader<std::fs::File>> {
603    /// Open the file at `path`, wrap it in a [`std::io::BufReader`], and
604    /// validate the `ZiPatch` magic.
605    ///
606    /// This is a convenience constructor equivalent to:
607    ///
608    /// ```rust,no_run
609    /// # use std::io::BufReader;
610    /// # use std::fs::File;
611    /// # use zipatch_rs::ZiPatchReader;
612    /// let reader = ZiPatchReader::new(BufReader::new(File::open("patch.patch").unwrap())).unwrap();
613    /// ```
614    ///
615    /// # Errors
616    ///
617    /// - [`ZiPatchError::Io`] — the file could not be opened.
618    /// - [`ZiPatchError::InvalidMagic`] — the file does not start with the
619    ///   `ZiPatch` magic bytes.
620    pub fn from_path(path: impl AsRef<std::path::Path>) -> crate::Result<Self> {
621        let file = std::fs::File::open(path)?;
622        Self::new(std::io::BufReader::new(file))
623    }
624}
625
626impl<R: std::io::Read> Iterator for ZiPatchReader<R> {
627    type Item = Result<Chunk>;
628
629    fn next(&mut self) -> Option<Self::Item> {
630        if self.done {
631            return None;
632        }
633        // Snapshot the body offset before parsing so a successful parse can
634        // commit it without re-walking the stream. The chunk body begins after
635        // the 8-byte `[body_len: u32 BE, tag: [u8; 4]]` frame header.
636        let body_offset = self.bytes_read + 8;
637        match parse_chunk(&mut self.inner, self.verify_checksums) {
638            Ok(ParsedChunk {
639                chunk: Chunk::EndOfFile,
640                tag,
641                consumed,
642            }) => {
643                self.bytes_read += consumed;
644                self.last_tag = Some(tag);
645                self.current_body_offset = Some(body_offset);
646                self.done = true;
647                self.eof_seen = true;
648                None
649            }
650            Ok(ParsedChunk {
651                chunk,
652                tag,
653                consumed,
654            }) => {
655                self.bytes_read += consumed;
656                self.last_tag = Some(tag);
657                self.current_body_offset = Some(body_offset);
658                Some(Ok(chunk))
659            }
660            Err(e) => {
661                self.done = true;
662                Some(Err(e))
663            }
664        }
665    }
666}
667
668impl<R: std::io::Read> std::iter::FusedIterator for ZiPatchReader<R> {}
669
670#[cfg(test)]
671mod tests {
672    use super::*;
673    use crate::test_utils::make_chunk;
674    use std::io::Cursor;
675
676    // --- parse_chunk error paths ---
677
678    #[test]
679    fn truncated_at_chunk_boundary_yields_truncated_patch() {
680        // Magic + no chunks: parse_chunk must see EOF on the body_len read and
681        // convert it to TruncatedPatch.  This exercises the
682        // `Err(ZiPatchError::Io(e)) if e.kind() == UnexpectedEof` arm at
683        // chunk/mod.rs line 121.
684        let mut patch = Vec::new();
685        patch.extend_from_slice(&MAGIC);
686        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
687        match reader
688            .next()
689            .expect("iterator must yield an error, not None")
690        {
691            Err(ZiPatchError::TruncatedPatch) => {}
692            other => panic!("expected TruncatedPatch, got {other:?}"),
693        }
694        assert!(!reader.is_complete(), "stream is not clean-ended");
695    }
696
697    #[test]
698    fn non_eof_io_error_on_body_len_read_propagates_as_io() {
699        // Exercises the `Err(e) => return Err(e)` arm at line 124: an I/O
700        // error that is NOT UnexpectedEof must propagate verbatim.
701        // We trigger this by passing a reader that errors immediately.
702        struct BrokenReader;
703        impl std::io::Read for BrokenReader {
704            fn read(&mut self, _: &mut [u8]) -> std::io::Result<usize> {
705                Err(std::io::Error::new(
706                    std::io::ErrorKind::BrokenPipe,
707                    "simulated broken pipe",
708                ))
709            }
710        }
711        let result = parse_chunk(&mut BrokenReader, false);
712        match result {
713            Err(ZiPatchError::Io(e)) => {
714                assert_eq!(
715                    e.kind(),
716                    std::io::ErrorKind::BrokenPipe,
717                    "non-EOF I/O error must propagate unchanged, got kind {:?}",
718                    e.kind()
719                );
720            }
721            Err(other) => panic!("expected ZiPatchError::Io(BrokenPipe), got {other:?}"),
722            Ok(_) => panic!("expected an error, got Ok"),
723        }
724    }
725
726    #[test]
727    fn truncated_after_one_chunk_yields_truncated_patch() {
728        // Magic + one well-formed ADIR + no more bytes: the second call to
729        // next() must surface TruncatedPatch, not None.
730        let mut adir_body = Vec::new();
731        adir_body.extend_from_slice(&4u32.to_be_bytes());
732        adir_body.extend_from_slice(b"test");
733        let chunk = make_chunk(b"ADIR", &adir_body);
734
735        let mut patch = Vec::new();
736        patch.extend_from_slice(&MAGIC);
737        patch.extend_from_slice(&chunk);
738
739        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
740        let first = reader.next().expect("first chunk must be present");
741        assert!(
742            first.is_ok(),
743            "first ADIR chunk should parse cleanly: {first:?}"
744        );
745        match reader.next().expect("second call must yield an error") {
746            Err(ZiPatchError::TruncatedPatch) => {}
747            other => panic!("expected TruncatedPatch on truncated stream, got {other:?}"),
748        }
749        assert!(
750            !reader.is_complete(),
751            "is_complete must be false after truncation"
752        );
753    }
754
755    #[test]
756    fn checksum_mismatch_returns_checksum_mismatch_error() {
757        // Corrupt the CRC32 field of an otherwise valid ADIR chunk and verify
758        // that parse_chunk returns ChecksumMismatch (not a panic or a wrong error).
759        let mut adir_body = Vec::new();
760        adir_body.extend_from_slice(&4u32.to_be_bytes());
761        adir_body.extend_from_slice(b"test");
762        let mut chunk = make_chunk(b"ADIR", &adir_body);
763        // Flip the last byte of the CRC32 field.
764        let last = chunk.len() - 1;
765        chunk[last] ^= 0xFF;
766
767        let mut cur = Cursor::new(chunk);
768        let result = parse_chunk(&mut cur, true);
769        assert!(
770            matches!(result, Err(ZiPatchError::ChecksumMismatch { .. })),
771            "corrupted CRC must yield ChecksumMismatch"
772        );
773    }
774
775    #[test]
776    fn unknown_chunk_tag_returns_unknown_chunk_tag_error() {
777        // A tag of all-Z bytes is not recognised; parse_chunk must return
778        // UnknownChunkTag carrying the raw 4-byte tag.
779        let chunk = make_chunk(b"ZZZZ", &[]);
780        let mut cur = Cursor::new(chunk);
781        match parse_chunk(&mut cur, false) {
782            Err(ZiPatchError::UnknownChunkTag(tag)) => {
783                assert_eq!(tag, *b"ZZZZ", "tag bytes must be preserved in error");
784            }
785            Err(other) => panic!("expected UnknownChunkTag, got {other:?}"),
786            Ok(_) => panic!("expected UnknownChunkTag, got Ok"),
787        }
788    }
789
790    #[test]
791    fn oversized_chunk_body_len_returns_oversized_chunk_error() {
792        // body_len == u32::MAX (> 512 MiB) must be rejected before any allocation.
793        let bytes = [0xFFu8, 0xFF, 0xFF, 0xFF];
794        let mut cur = Cursor::new(&bytes[..]);
795        let Err(ZiPatchError::OversizedChunk(size)) = parse_chunk(&mut cur, false) else {
796            panic!("expected OversizedChunk for u32::MAX body_len")
797        };
798        assert!(
799            size > MAX_CHUNK_SIZE,
800            "reported size {size} must exceed MAX_CHUNK_SIZE {MAX_CHUNK_SIZE}"
801        );
802    }
803
804    // --- ZiPatchReader byte-counter and tag accessors ---
805
806    #[test]
807    fn bytes_read_starts_at_12_before_first_chunk() {
808        // The magic header is 12 bytes; bytes_read must reflect that immediately
809        // after construction, before any chunk is read.
810        let mut patch = Vec::new();
811        patch.extend_from_slice(&MAGIC);
812        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
813        let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
814        assert_eq!(
815            reader.bytes_read(),
816            12,
817            "bytes_read must be 12 (magic only) before iteration starts"
818        );
819    }
820
821    #[test]
822    fn last_tag_is_none_before_first_chunk() {
823        // Before calling next(), last_tag must be None.
824        let mut patch = Vec::new();
825        patch.extend_from_slice(&MAGIC);
826        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
827        let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
828        assert_eq!(
829            reader.last_tag(),
830            None,
831            "last_tag must be None before any chunk is read"
832        );
833    }
834
835    #[test]
836    fn bytes_read_and_last_tag_track_each_chunk_frame() {
837        // MAGIC + ADIR("a") + EOF_ — verify bytes_read grows by the exact frame
838        // size after each chunk and that last_tag follows the stream.
839        let mut adir_body = Vec::new();
840        adir_body.extend_from_slice(&1u32.to_be_bytes());
841        adir_body.extend_from_slice(b"a");
842        // ADIR frame: 4(size) + 4(tag) + 5(body) + 4(crc) = 17 bytes
843        // EOF_  frame: 4 + 4 + 0 + 4 = 12 bytes
844
845        let mut patch = Vec::new();
846        patch.extend_from_slice(&MAGIC);
847        patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
848        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
849
850        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
851        assert_eq!(reader.bytes_read(), 12, "pre-read: magic only");
852        assert_eq!(reader.last_tag(), None, "pre-read: no tag yet");
853
854        let chunk = reader.next().unwrap().unwrap();
855        assert!(
856            matches!(chunk, Chunk::AddDirectory(_)),
857            "first chunk must be ADIR"
858        );
859        assert_eq!(
860            reader.bytes_read(),
861            12 + 17,
862            "after ADIR: magic + ADIR frame"
863        );
864        assert_eq!(
865            reader.last_tag(),
866            Some(*b"ADIR"),
867            "last_tag must be ADIR after first next()"
868        );
869
870        assert!(reader.next().is_none(), "EOF_ must terminate iteration");
871        assert_eq!(
872            reader.bytes_read(),
873            12 + 17 + 12,
874            "after EOF_: magic + ADIR + EOF_ frames"
875        );
876        assert_eq!(
877            reader.last_tag(),
878            Some(*b"EOF_"),
879            "last_tag must be EOF_ after stream ends"
880        );
881        assert!(reader.is_complete(), "is_complete must be true after EOF_");
882    }
883
884    #[test]
885    fn bytes_read_is_monotonically_non_decreasing() {
886        // Stream with two ADIR chunks + EOF_ — verify bytes_read only ever
887        // increases between calls to next() and that consuming the EOF_
888        // chunk (whose body is empty but whose frame is 12 bytes) still
889        // advances the counter past the last non-EOF position.
890        let make_adir = |name: &[u8]| -> Vec<u8> {
891            let mut body = Vec::new();
892            body.extend_from_slice(&(name.len() as u32).to_be_bytes());
893            body.extend_from_slice(name);
894            make_chunk(b"ADIR", &body)
895        };
896
897        let mut patch = Vec::new();
898        patch.extend_from_slice(&MAGIC);
899        patch.extend_from_slice(&make_adir(b"a"));
900        patch.extend_from_slice(&make_adir(b"bb"));
901        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
902
903        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
904        let mut prev = reader.bytes_read();
905        while let Some(result) = reader.next() {
906            result.unwrap();
907            let current = reader.bytes_read();
908            assert!(
909                current >= prev,
910                "bytes_read must be monotonically non-decreasing: {prev} -> {current}"
911            );
912            // For ADIR chunks with non-empty bodies, the increment must be
913            // strictly positive — a body of N bytes adds N + 12 frame bytes.
914            assert!(
915                current > prev,
916                "non-empty ADIR frame must strictly advance bytes_read: \
917                 {prev} -> {current}"
918            );
919            prev = current;
920        }
921        // EOF_ has been consumed: its 12-byte empty-body frame must have
922        // pushed the counter past the previous position.
923        assert!(
924            reader.bytes_read() > prev,
925            "consuming EOF_ must advance bytes_read by its 12-byte frame: \
926             {prev} -> {}",
927            reader.bytes_read()
928        );
929    }
930
931    // --- from_path constructor ---
932
933    #[test]
934    fn from_path_opens_minimal_patch_and_reaches_eof() {
935        let mut bytes = Vec::new();
936        bytes.extend_from_slice(&MAGIC);
937        bytes.extend_from_slice(&make_chunk(b"EOF_", &[]));
938
939        let tmp = tempfile::tempdir().unwrap();
940        let file_path = tmp.path().join("test.patch");
941        std::fs::write(&file_path, &bytes).unwrap();
942
943        let mut reader =
944            ZiPatchReader::from_path(&file_path).expect("from_path must open valid patch");
945        assert!(
946            reader.next().is_none(),
947            "EOF_ must terminate iteration immediately"
948        );
949        assert!(reader.is_complete(), "is_complete must be true after EOF_");
950    }
951
952    #[test]
953    fn from_path_returns_io_error_when_file_is_missing() {
954        let tmp = tempfile::tempdir().unwrap();
955        let file_path = tmp.path().join("nonexistent.patch");
956        assert!(
957            matches!(
958                ZiPatchReader::from_path(&file_path),
959                Err(ZiPatchError::Io(_))
960            ),
961            "from_path on a missing file must return ZiPatchError::Io"
962        );
963    }
964
965    // --- Iterator fused-ness and is_complete ---
966
967    #[test]
968    fn iterator_is_fused_after_error() {
969        // Once next() yields Some(Err(_)), all subsequent calls must yield None.
970        let mut patch = Vec::new();
971        patch.extend_from_slice(&MAGIC);
972        patch.extend_from_slice(&make_chunk(b"ZZZZ", &[])); // unknown tag → error
973
974        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
975        let first = reader.next();
976        assert!(
977            matches!(first, Some(Err(ZiPatchError::UnknownChunkTag(_)))),
978            "first call must yield the error: {first:?}"
979        );
980        // All subsequent calls must return None.
981        assert!(
982            reader.next().is_none(),
983            "fused: must return None after error"
984        );
985        assert!(reader.next().is_none(), "fused: still None on third call");
986    }
987
988    #[test]
989    fn is_complete_false_until_eof_seen() {
990        let mut adir_body = Vec::new();
991        adir_body.extend_from_slice(&1u32.to_be_bytes());
992        adir_body.extend_from_slice(b"x");
993
994        let mut patch = Vec::new();
995        patch.extend_from_slice(&MAGIC);
996        patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
997        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
998
999        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
1000        assert!(
1001            !reader.is_complete(),
1002            "not complete before reading anything"
1003        );
1004        reader.next().unwrap().unwrap(); // consume ADIR
1005        assert!(
1006            !reader.is_complete(),
1007            "not complete after ADIR, before EOF_"
1008        );
1009        assert!(reader.next().is_none(), "EOF_ consumed");
1010        assert!(reader.is_complete(), "complete after EOF_ consumed");
1011    }
1012}