Skip to main content

zipatch_rs/chunk/
mod.rs

1//! Wire-format chunk types and the [`ZiPatchReader`] iterator.
2//!
3//! This module is the parsing layer: it decodes the raw `ZiPatch` byte
4//! stream into a stream of typed [`Chunk`] values. Each top-level
5//! variant corresponds to one 4-byte ASCII wire tag (`FHDR`, `APLY`,
6//! `SQPK`, …); the per-variant submodules below own the binary layout for
7//! their body. Nothing in this module touches the filesystem — apply-time
8//! effects live in [`crate::apply`].
9//!
10//! The [`ZiPatchReader`] iterator validates the 12-byte file magic on
11//! construction, then yields one [`Chunk`] per [`Iterator::next`] call
12//! until the internal `EOF_` terminator is consumed or a parse error
13//! surfaces.
14
15pub(crate) mod adir;
16pub(crate) mod afsp;
17pub(crate) mod aply;
18pub(crate) mod ddir;
19pub(crate) mod fhdr;
20pub(crate) mod sqpk;
21pub(crate) mod util;
22
23pub use adir::AddDirectory;
24pub use afsp::ApplyFreeSpace;
25pub use aply::{ApplyOption, ApplyOptionKind};
26pub use ddir::DeleteDirectory;
27pub use fhdr::{FileHeader, FileHeaderV2, FileHeaderV3};
28pub use sqpk::{SqpackFile, SqpkCommand};
29// Re-export SqpkCommand sub-types so callers can match on them
30pub use sqpk::{
31    IndexCommand, SqpkAddData, SqpkCompressedBlock, SqpkDeleteData, SqpkExpandData, SqpkFile,
32    SqpkFileOperation, SqpkHeader, SqpkHeaderTarget, SqpkIndex, SqpkPatchInfo, SqpkTargetInfo,
33    TargetFileKind, TargetHeaderKind,
34};
35
36use crate::reader::ReadExt;
37use crate::{Result, ZiPatchError};
38use tracing::trace;
39
40const MAGIC: [u8; 12] = [
41    0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A,
42];
43
44const MAX_CHUNK_SIZE: usize = 512 * 1024 * 1024;
45
46/// One top-level chunk parsed from a `ZiPatch` stream.
47///
48/// Each variant corresponds to a 4-byte ASCII wire tag. The tag dispatch table
49/// mirrors the C# reference in
50/// `lib/FFXIVQuickLauncher/.../Patching/ZiPatch/Chunk/ZiPatchChunk.cs`.
51///
52/// # Observed frequency
53///
54/// SE's XIVARR+ patch files almost exclusively contain `FHDR`, `APLY`, and
55/// `SQPK` chunks. `ADIR`/`DELD` can theoretically appear and are implemented,
56/// but are rarely emitted in practice. `APFS` has never been observed in modern
57/// patches (the reference implementation treats it as a no-op). `EOF_` is
58/// consumed by [`ZiPatchReader`] and is never yielded to the caller.
59///
60/// # Exhaustiveness
61///
62/// The enum is `#[non_exhaustive]`. Match arms should include a wildcard to
63/// remain forward-compatible as new chunk types are added.
64#[non_exhaustive]
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub enum Chunk {
67    /// `FHDR` — the first chunk in every patch file; carries version and
68    /// per-version patch metadata. See [`FileHeader`] for the versioned body.
69    FileHeader(FileHeader),
70    /// `APLY` — sets or clears a boolean apply-time flag on the
71    /// [`crate::ApplyContext`] (e.g. "ignore missing files"). See [`ApplyOption`].
72    ApplyOption(ApplyOption),
73    /// `APFS` — free-space book-keeping emitted by old patcher tooling; treated
74    /// as a no-op at apply time. See [`ApplyFreeSpace`].
75    ApplyFreeSpace(ApplyFreeSpace),
76    /// `ADIR` — instructs the patcher to create a directory under the game
77    /// install root. See [`AddDirectory`].
78    AddDirectory(AddDirectory),
79    /// `DELD` — instructs the patcher to remove a directory under the game
80    /// install root. See [`DeleteDirectory`].
81    DeleteDirectory(DeleteDirectory),
82    /// `SQPK` — the workhorse chunk; wraps one of eight sub-commands that
83    /// add, delete, expand, or replace `SqPack` data. See [`SqpkCommand`].
84    Sqpk(SqpkCommand),
85    /// `EOF_` — marks the clean end of the patch stream. [`ZiPatchReader`]
86    /// consumes this chunk internally; it is never yielded to the caller.
87    EndOfFile,
88}
89
90/// One parsed chunk plus its 4-byte ASCII tag and the byte count consumed
91/// from the input stream by its frame.
92///
93/// Returned by [`parse_chunk`]. The `consumed` count is exactly the size of
94/// the chunk's on-wire frame: `4 (body_len) + 4 (tag) + body_len + 4 (crc32)`
95/// = `body_len + 12`. This is what
96/// [`ZiPatchReader`](crate::ZiPatchReader) accumulates into its running
97/// byte counter for progress reporting.
98pub(crate) struct ParsedChunk {
99    pub(crate) chunk: Chunk,
100    pub(crate) tag: [u8; 4],
101    pub(crate) consumed: u64,
102}
103
104/// Parse one chunk frame from `r`.
105///
106/// # Wire framing
107///
108/// Each chunk is laid out as:
109///
110/// ```text
111/// [body_len: u32 BE] [tag: 4 bytes] [body: body_len bytes] [crc32: u32 BE]
112/// ```
113///
114/// The CRC32 is computed over `tag ++ body` (not over `body_len`), matching
115/// the C# `ChecksumBinaryReader` in the `XIVLauncher` reference. When
116/// `verify_checksums` is `true` and the stored CRC does not match the computed
117/// one, [`ZiPatchError::ChecksumMismatch`] is returned.
118///
119/// # Errors
120///
121/// - [`ZiPatchError::TruncatedPatch`] — the reader returns EOF while reading
122///   the `body_len` field (i.e. no more chunks are present but `EOF_` was
123///   never seen).
124/// - [`ZiPatchError::OversizedChunk`] — `body_len` exceeds 512 MiB.
125/// - [`ZiPatchError::ChecksumMismatch`] — CRC32 mismatch (only when
126///   `verify_checksums` is `true`).
127/// - [`ZiPatchError::UnknownChunkTag`] — tag is not recognised.
128/// - [`ZiPatchError::Io`] — any other I/O failure reading from `r`.
129pub(crate) fn parse_chunk<R: std::io::Read>(
130    r: &mut R,
131    verify_checksums: bool,
132) -> Result<ParsedChunk> {
133    let size = match r.read_u32_be() {
134        Ok(s) => s as usize,
135        Err(ZiPatchError::Io(e)) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
136            return Err(ZiPatchError::TruncatedPatch);
137        }
138        Err(e) => return Err(e),
139    };
140    if size > MAX_CHUNK_SIZE {
141        return Err(ZiPatchError::OversizedChunk(size));
142    }
143
144    // Tag (4 B) and CRC (4 B) are always present regardless of body shape.
145    let mut tag = [0u8; 4];
146    r.read_exact(&mut tag)?;
147
148    // Peek at the first 5 bytes of the body without committing to either the
149    // generic single-allocation path or the SQPK `A` zero-copy-into-data path.
150    // For SQPK chunks, those 5 bytes are `[inner_size: i32 BE][sub_cmd: u8]`.
151    // For chunks with bodies shorter than 5 bytes (e.g. `EOF_`), we still read
152    // exactly `size` bytes into the prefix array and leave the rest zero.
153    let mut prefix = [0u8; 5];
154    let prefix_len = size.min(5);
155    if prefix_len > 0 {
156        r.read_exact(&mut prefix[..prefix_len])?;
157    }
158
159    // ---- Fast path: SQPK `A` (SqpkAddData) — see `parse_sqpk_add_data_fast`. ----
160    if &tag == b"SQPK" && size >= 5 + SQPK_ADDDATA_HEADER_SIZE && prefix[4] == b'A' {
161        return parse_sqpk_add_data_fast(r, tag, prefix, size, verify_checksums);
162    }
163
164    // ---- Generic path: one allocation for the whole body. ----
165    let mut body_vec = vec![0u8; size];
166    body_vec[..prefix_len].copy_from_slice(&prefix[..prefix_len]);
167    if size > prefix_len {
168        r.read_exact(&mut body_vec[prefix_len..])?;
169    }
170
171    let mut crc_buf = [0u8; 4];
172    r.read_exact(&mut crc_buf)?;
173    let expected_crc = u32::from_be_bytes(crc_buf);
174
175    if verify_checksums {
176        let mut hasher = crc32fast::Hasher::new();
177        hasher.update(&tag);
178        hasher.update(&body_vec);
179        let actual_crc = hasher.finalize();
180        if actual_crc != expected_crc {
181            return Err(ZiPatchError::ChecksumMismatch {
182                tag,
183                expected: expected_crc,
184                actual: actual_crc,
185            });
186        }
187    }
188
189    trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
190
191    // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
192    let consumed = (size as u64) + 12;
193
194    let body = &body_vec[..];
195
196    let chunk = match &tag {
197        b"EOF_" => Chunk::EndOfFile,
198        b"FHDR" => Chunk::FileHeader(fhdr::parse(body)?),
199        b"APLY" => Chunk::ApplyOption(aply::parse(body)?),
200        b"APFS" => Chunk::ApplyFreeSpace(afsp::parse(body)?),
201        b"ADIR" => Chunk::AddDirectory(adir::parse(body)?),
202        b"DELD" => Chunk::DeleteDirectory(ddir::parse(body)?),
203        b"SQPK" => Chunk::Sqpk(sqpk::parse_sqpk(body)?),
204        _ => return Err(ZiPatchError::UnknownChunkTag(tag)),
205    };
206
207    Ok(ParsedChunk {
208        chunk,
209        tag,
210        consumed,
211    })
212}
213
214// Size of the SqpkAddData fixed header that precedes the inline data payload.
215// Mirrors `add_data::SqpkAddData::DATA_SOURCE_OFFSET` (23) without taking a
216// `u64` round-trip; kept private to the framing path.
217const SQPK_ADDDATA_HEADER_SIZE: usize = 23;
218
219/// Fast path for SQPK `A` (`SqpkAddData`) chunks.
220///
221/// `AddData` is the largest chunk type by byte volume — payloads of hundreds of
222/// KB to MB are typical. The generic framing path allocates one `Vec<u8>` of
223/// `size` for the whole body, then `binrw`'s derived parser allocates a second
224/// `Vec<u8>` of exactly `data_bytes` and memcpys the inline payload into it.
225/// That second allocation + memcpy dominates parse time for `AddData`.
226///
227/// This function reads the `AddData` fixed header into a stack array, parses
228/// the seven fields directly, allocates the `data` payload at its exact size,
229/// and `read_exact`s the source bytes straight into it — one allocation, no
230/// intermediate copy of the payload.
231///
232/// On entry: `tag` and the 5-byte `prefix` (SQPK `inner_size` + sub-command
233/// byte) have already been consumed from `r`. The remaining bytes are
234/// `[fixed_header: 23 B][data: data_bytes][crc32: 4 B]`.
235fn parse_sqpk_add_data_fast<R: std::io::Read>(
236    r: &mut R,
237    tag: [u8; 4],
238    prefix: [u8; 5],
239    size: usize,
240    verify_checksums: bool,
241) -> Result<ParsedChunk> {
242    // Validate the SQPK inner_size against the outer chunk size, matching the
243    // check in `sqpk::parse_sqpk` so callers see byte-identical error behaviour.
244    let inner_size = i32::from_be_bytes([prefix[0], prefix[1], prefix[2], prefix[3]]) as usize;
245    if inner_size != size {
246        return Err(ZiPatchError::InvalidField {
247            context: "SQPK inner size mismatch",
248        });
249    }
250
251    let mut header = [0u8; SQPK_ADDDATA_HEADER_SIZE];
252    r.read_exact(&mut header)?;
253
254    // SqpkAddData fixed-header layout (all big-endian):
255    //   [0..3]   pad
256    //   [3..5]   main_id   u16
257    //   [5..7]   sub_id    u16
258    //   [7..11]  file_id   u32
259    //   [11..15] block_offset_raw  u32 (<< 7 = bytes)
260    //   [15..19] data_bytes_raw    u32 (<< 7 = bytes)
261    //   [19..23] block_delete_raw  u32 (<< 7 = bytes)
262    let main_id = u16::from_be_bytes([header[3], header[4]]);
263    let sub_id = u16::from_be_bytes([header[5], header[6]]);
264    let file_id = u32::from_be_bytes([header[7], header[8], header[9], header[10]]);
265    let block_offset_raw = u32::from_be_bytes([header[11], header[12], header[13], header[14]]);
266    let data_bytes_raw = u32::from_be_bytes([header[15], header[16], header[17], header[18]]);
267    let block_delete_raw = u32::from_be_bytes([header[19], header[20], header[21], header[22]]);
268
269    let block_offset = (block_offset_raw as u64) << 7;
270    let data_bytes = (data_bytes_raw as u64) << 7;
271    let block_delete_number = (block_delete_raw as u64) << 7;
272
273    // The declared payload length must fit exactly within the chunk body:
274    //   size = 5 (inner_size + sub_cmd) + 23 (fixed header) + data_bytes
275    let expected_data = size - 5 - SQPK_ADDDATA_HEADER_SIZE;
276    if data_bytes as usize != expected_data {
277        return Err(ZiPatchError::InvalidField {
278            context: "SqpkAddData data_bytes does not match SQPK body length",
279        });
280    }
281
282    let mut data = vec![0u8; data_bytes as usize];
283    r.read_exact(&mut data)?;
284
285    let mut crc_buf = [0u8; 4];
286    r.read_exact(&mut crc_buf)?;
287    let expected_crc = u32::from_be_bytes(crc_buf);
288
289    if verify_checksums {
290        // CRC is over `tag ++ body`. The body is split across three disjoint
291        // buffers — feed each segment to the incremental hasher.
292        let mut hasher = crc32fast::Hasher::new();
293        hasher.update(&tag);
294        hasher.update(&prefix);
295        hasher.update(&header);
296        hasher.update(&data);
297        let actual_crc = hasher.finalize();
298        if actual_crc != expected_crc {
299            return Err(ZiPatchError::ChecksumMismatch {
300                tag,
301                expected: expected_crc,
302                actual: actual_crc,
303            });
304        }
305    }
306
307    trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
308
309    let chunk = Chunk::Sqpk(sqpk::SqpkCommand::AddData(Box::new(sqpk::SqpkAddData {
310        target_file: sqpk::SqpackFile {
311            main_id,
312            sub_id,
313            file_id,
314        },
315        block_offset,
316        data_bytes,
317        block_delete_number,
318        data,
319    })));
320
321    // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
322    let consumed = (size as u64) + 12;
323
324    Ok(ParsedChunk {
325        chunk,
326        tag,
327        consumed,
328    })
329}
330
331/// Iterator over the [`Chunk`]s in a `ZiPatch` stream.
332///
333/// `ZiPatchReader` wraps any [`std::io::Read`] source and yields one
334/// [`Chunk`] per call to [`Iterator::next`]. It validates the 12-byte file
335/// magic on construction, then reads chunks sequentially until the `EOF_`
336/// terminator is encountered or an error occurs.
337///
338/// # Stream contract
339///
340/// - **Magic** — the first 12 bytes must be `\x91ZIPATCH\r\n\x1a\n`. Any
341///   mismatch returns [`ZiPatchError::InvalidMagic`] from [`ZiPatchReader::new`].
342/// - **Framing** — every chunk is a length-prefixed frame:
343///   `[body_len: u32 BE] [tag: 4 B] [body: body_len B] [crc32: u32 BE]`.
344/// - **CRC32** — computed over `tag ++ body`. Verification is enabled by
345///   default; use [`ZiPatchReader::skip_checksum_verification`] to disable it.
346/// - **Termination** — the `EOF_` chunk is consumed internally and causes
347///   the iterator to return `None`. Call [`ZiPatchReader::is_complete`] after
348///   iteration to distinguish a clean end from a truncated stream.
349/// - **Fused** — once `None` is returned (either from `EOF_` or an error),
350///   subsequent calls to `next` also return `None`. The iterator implements
351///   [`std::iter::FusedIterator`].
352///
353/// # Errors
354///
355/// Each call to [`Iterator::next`] returns `Some(Err(e))` on parse failure,
356/// then `None` on all future calls. Possible errors include:
357/// - [`ZiPatchError::TruncatedPatch`] — stream ended before `EOF_`.
358/// - [`ZiPatchError::OversizedChunk`] — a declared chunk body exceeds 512 MiB.
359/// - [`ZiPatchError::ChecksumMismatch`] — CRC32 verification failed.
360/// - [`ZiPatchError::UnknownChunkTag`] — unrecognised 4-byte tag.
361/// - [`ZiPatchError::Io`] — underlying I/O failure.
362///
363/// # Example
364///
365/// Build a minimal in-memory patch (magic + `ADIR` + `EOF_`) and iterate it:
366///
367/// ```rust
368/// use std::io::Cursor;
369/// use zipatch_rs::{Chunk, ZiPatchReader};
370///
371/// // Helper: wrap tag + body into a correctly framed chunk with CRC32.
372/// fn make_chunk(tag: &[u8; 4], body: &[u8]) -> Vec<u8> {
373///     let mut crc_input = Vec::new();
374///     crc_input.extend_from_slice(tag);
375///     crc_input.extend_from_slice(body);
376///     let crc = crc32fast::hash(&crc_input);
377///
378///     let mut out = Vec::new();
379///     out.extend_from_slice(&(body.len() as u32).to_be_bytes());
380///     out.extend_from_slice(tag);
381///     out.extend_from_slice(body);
382///     out.extend_from_slice(&crc.to_be_bytes());
383///     out
384/// }
385///
386/// // 12-byte ZiPatch magic.
387/// let magic: [u8; 12] = [0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A];
388///
389/// // ADIR body: u32 BE name_len (7) + b"created".
390/// let mut adir_body = Vec::new();
391/// adir_body.extend_from_slice(&7u32.to_be_bytes());
392/// adir_body.extend_from_slice(b"created");
393///
394/// let mut patch = Vec::new();
395/// patch.extend_from_slice(&magic);
396/// patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
397/// patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
398///
399/// let chunks: Vec<_> = ZiPatchReader::new(Cursor::new(patch))
400///     .unwrap()
401///     .collect::<Result<_, _>>()
402///     .unwrap();
403///
404/// assert_eq!(chunks.len(), 1);
405/// assert!(matches!(chunks[0], Chunk::AddDirectory(_)));
406/// ```
407#[derive(Debug)]
408pub struct ZiPatchReader<R> {
409    inner: std::io::BufReader<R>,
410    done: bool,
411    verify_checksums: bool,
412    eof_seen: bool,
413    // Running total of bytes consumed from `inner`, including the 12-byte
414    // magic header. Updated after each successful `parse_chunk` call.
415    // Exposed via `bytes_read()` so the apply driver can fire monotonic
416    // progress events without instrumenting the underlying `Read` source.
417    bytes_read: u64,
418    // 4-byte ASCII tag of the most recently yielded chunk. `None` before the
419    // first successful `next()` and after iteration completes. Used by
420    // `apply_to` to attach the tag to per-chunk progress events without
421    // re-matching on the `Chunk` enum.
422    last_tag: Option<[u8; 4]>,
423    // Absolute patch-file offset of the body of the most recently yielded
424    // chunk (i.e. the byte right after the 8-byte `[len: u32 BE, tag: [u8;4]]`
425    // frame header). `None` until the first chunk is successfully yielded; the
426    // value is only set on the success arms of `next()` so a parse failure
427    // never exposes a stale offset.
428    current_body_offset: Option<u64>,
429}
430
431impl<R: std::io::Read> ZiPatchReader<R> {
432    /// Wrap `reader` and validate the leading 12-byte `ZiPatch` magic.
433    ///
434    /// Consumes exactly 12 bytes from `reader`. The magic is the byte sequence
435    /// `0x91 0x5A 0x49 0x50 0x41 0x54 0x43 0x48 0x0D 0x0A 0x1A 0x0A`
436    /// (i.e. `\x91ZIPATCH\r\n\x1a\n`).
437    ///
438    /// The reader is wrapped in a [`std::io::BufReader`] internally, so the
439    /// many small typed reads the chunk parser issues (4-byte size, 4-byte
440    /// tag, 5-byte SQPK prefix, …) coalesce into a small number of syscalls.
441    /// Callers do not need to pre-wrap a raw [`std::fs::File`] or other
442    /// unbuffered source.
443    ///
444    /// CRC32 verification is **enabled** by default. Call
445    /// [`ZiPatchReader::skip_checksum_verification`] before iterating to
446    /// disable it.
447    ///
448    /// # Errors
449    ///
450    /// - [`ZiPatchError::InvalidMagic`] — the first 12 bytes do not match the
451    ///   expected magic.
452    /// - [`ZiPatchError::Io`] — an I/O error occurred while reading the magic.
453    pub fn new(reader: R) -> Result<Self> {
454        let mut reader = std::io::BufReader::new(reader);
455        let magic = reader.read_exact_vec(12)?;
456        if magic.as_slice() != MAGIC {
457            return Err(ZiPatchError::InvalidMagic);
458        }
459        Ok(Self {
460            inner: reader,
461            done: false,
462            verify_checksums: true,
463            eof_seen: false,
464            // The 12-byte magic header has already been consumed.
465            bytes_read: 12,
466            last_tag: None,
467            current_body_offset: None,
468        })
469    }
470
471    /// Enable per-chunk CRC32 verification (the default).
472    ///
473    /// This is the default state after [`ZiPatchReader::new`]. Calling this
474    /// method after construction is only necessary if
475    /// [`ZiPatchReader::skip_checksum_verification`] was previously called.
476    #[must_use]
477    pub fn verify_checksums(mut self) -> Self {
478        self.verify_checksums = true;
479        self
480    }
481
482    /// Disable per-chunk CRC32 verification.
483    ///
484    /// Useful when the source has already been verified out-of-band (e.g. a
485    /// download hash was checked before the file was opened), or when
486    /// processing known-good test data where the overhead is unnecessary.
487    #[must_use]
488    pub fn skip_checksum_verification(mut self) -> Self {
489        self.verify_checksums = false;
490        self
491    }
492
493    /// Returns `true` if iteration reached the `EOF_` terminator cleanly.
494    ///
495    /// A `false` return after `next()` yields `None` indicates the stream was
496    /// truncated — the download or file copy was incomplete. In that case the
497    /// iterator stopped because of a [`ZiPatchError::TruncatedPatch`] error,
498    /// not because the patch finished normally.
499    pub fn is_complete(&self) -> bool {
500        self.eof_seen
501    }
502
503    /// Returns the running total of bytes consumed from the patch stream.
504    ///
505    /// Starts at `12` after [`ZiPatchReader::new`] (the magic header has been
506    /// read) and increases monotonically by the size of each chunk's wire
507    /// frame after each successful [`Iterator::next`] call. Includes the
508    /// `EOF_` terminator's frame.
509    ///
510    /// On parse error, the counter is **not** advanced past the failing
511    /// chunk — it reflects the byte offset at the start of that chunk's
512    /// length prefix, not the broken position somewhere inside its frame.
513    /// Use this offset together with the surfaced error to point a user at
514    /// where the patch became unreadable.
515    ///
516    /// This is the same counter that the
517    /// [`apply_to`](crate::ZiPatchReader::apply_to) driver attaches to
518    /// [`ChunkEvent::bytes_read`](crate::ChunkEvent::bytes_read) when firing
519    /// progress events. Useful for the `bytes_applied / total_patch_size`
520    /// ratio in a progress bar.
521    #[must_use]
522    pub fn bytes_read(&self) -> u64 {
523        self.bytes_read
524    }
525
526    /// Returns the 4-byte ASCII tag of the most recently yielded chunk.
527    ///
528    /// `None` before the first successful [`Iterator::next`] call and after
529    /// the `EOF_` terminator has been consumed (or an error has been
530    /// surfaced). Used by [`apply_to`](crate::ZiPatchReader::apply_to) to
531    /// populate [`ChunkEvent::kind`](crate::ChunkEvent::kind).
532    #[must_use]
533    pub fn last_tag(&self) -> Option<[u8; 4]> {
534        self.last_tag
535    }
536
537    /// Returns the absolute patch-file offset of the body of the most recently
538    /// yielded chunk.
539    ///
540    /// The chunk body begins immediately after the 8-byte
541    /// `[body_len: u32 BE, tag: [u8; 4]]` frame header, so the value points at
542    /// the first byte of the body — for `SQPK` chunks that is the start of
543    /// `[inner_size: i32 BE, sub_cmd: u8, …]`; for the other chunk types it
544    /// is the start of the variant-specific body.
545    ///
546    /// Index builders use this to compute absolute patch-file offsets for
547    /// `SqpkAddData::data`, `SqpkFile` block payloads, and `SqpkHeader::header_data`
548    /// without re-walking the stream.
549    ///
550    /// `None` before the first chunk is successfully yielded. A parse failure
551    /// leaves the previously-set value untouched (the offset returned by this
552    /// method always points at a chunk that was successfully parsed).
553    #[must_use]
554    pub fn current_chunk_body_offset(&self) -> Option<u64> {
555        self.current_body_offset
556    }
557}
558
559impl ZiPatchReader<std::io::BufReader<std::fs::File>> {
560    /// Open the file at `path`, wrap it in a [`std::io::BufReader`], and
561    /// validate the `ZiPatch` magic.
562    ///
563    /// This is a convenience constructor equivalent to:
564    ///
565    /// ```rust,no_run
566    /// # use std::io::BufReader;
567    /// # use std::fs::File;
568    /// # use zipatch_rs::ZiPatchReader;
569    /// let reader = ZiPatchReader::new(BufReader::new(File::open("patch.patch").unwrap())).unwrap();
570    /// ```
571    ///
572    /// # Errors
573    ///
574    /// - [`ZiPatchError::Io`] — the file could not be opened.
575    /// - [`ZiPatchError::InvalidMagic`] — the file does not start with the
576    ///   `ZiPatch` magic bytes.
577    pub fn from_path(path: impl AsRef<std::path::Path>) -> crate::Result<Self> {
578        let file = std::fs::File::open(path)?;
579        Self::new(std::io::BufReader::new(file))
580    }
581}
582
583impl<R: std::io::Read> Iterator for ZiPatchReader<R> {
584    type Item = Result<Chunk>;
585
586    fn next(&mut self) -> Option<Self::Item> {
587        if self.done {
588            return None;
589        }
590        // Snapshot the body offset before parsing so a successful parse can
591        // commit it without re-walking the stream. The chunk body begins after
592        // the 8-byte `[body_len: u32 BE, tag: [u8; 4]]` frame header.
593        let body_offset = self.bytes_read + 8;
594        match parse_chunk(&mut self.inner, self.verify_checksums) {
595            Ok(ParsedChunk {
596                chunk: Chunk::EndOfFile,
597                tag,
598                consumed,
599            }) => {
600                self.bytes_read += consumed;
601                self.last_tag = Some(tag);
602                self.current_body_offset = Some(body_offset);
603                self.done = true;
604                self.eof_seen = true;
605                None
606            }
607            Ok(ParsedChunk {
608                chunk,
609                tag,
610                consumed,
611            }) => {
612                self.bytes_read += consumed;
613                self.last_tag = Some(tag);
614                self.current_body_offset = Some(body_offset);
615                Some(Ok(chunk))
616            }
617            Err(e) => {
618                self.done = true;
619                Some(Err(e))
620            }
621        }
622    }
623}
624
625impl<R: std::io::Read> std::iter::FusedIterator for ZiPatchReader<R> {}
626
627#[cfg(test)]
628mod tests {
629    use super::*;
630    use crate::test_utils::make_chunk;
631    use std::io::Cursor;
632
633    // --- parse_chunk error paths ---
634
635    #[test]
636    fn truncated_at_chunk_boundary_yields_truncated_patch() {
637        // Magic + no chunks: parse_chunk must see EOF on the body_len read and
638        // convert it to TruncatedPatch.  This exercises the
639        // `Err(ZiPatchError::Io(e)) if e.kind() == UnexpectedEof` arm at
640        // chunk/mod.rs line 121.
641        let mut patch = Vec::new();
642        patch.extend_from_slice(&MAGIC);
643        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
644        match reader
645            .next()
646            .expect("iterator must yield an error, not None")
647        {
648            Err(ZiPatchError::TruncatedPatch) => {}
649            other => panic!("expected TruncatedPatch, got {other:?}"),
650        }
651        assert!(!reader.is_complete(), "stream is not clean-ended");
652    }
653
654    #[test]
655    fn non_eof_io_error_on_body_len_read_propagates_as_io() {
656        // Exercises the `Err(e) => return Err(e)` arm at line 124: an I/O
657        // error that is NOT UnexpectedEof must propagate verbatim.
658        // We trigger this by passing a reader that errors immediately.
659        struct BrokenReader;
660        impl std::io::Read for BrokenReader {
661            fn read(&mut self, _: &mut [u8]) -> std::io::Result<usize> {
662                Err(std::io::Error::new(
663                    std::io::ErrorKind::BrokenPipe,
664                    "simulated broken pipe",
665                ))
666            }
667        }
668        let result = parse_chunk(&mut BrokenReader, false);
669        match result {
670            Err(ZiPatchError::Io(e)) => {
671                assert_eq!(
672                    e.kind(),
673                    std::io::ErrorKind::BrokenPipe,
674                    "non-EOF I/O error must propagate unchanged, got kind {:?}",
675                    e.kind()
676                );
677            }
678            Err(other) => panic!("expected ZiPatchError::Io(BrokenPipe), got {other:?}"),
679            Ok(_) => panic!("expected an error, got Ok"),
680        }
681    }
682
683    #[test]
684    fn truncated_after_one_chunk_yields_truncated_patch() {
685        // Magic + one well-formed ADIR + no more bytes: the second call to
686        // next() must surface TruncatedPatch, not None.
687        let mut adir_body = Vec::new();
688        adir_body.extend_from_slice(&4u32.to_be_bytes());
689        adir_body.extend_from_slice(b"test");
690        let chunk = make_chunk(b"ADIR", &adir_body);
691
692        let mut patch = Vec::new();
693        patch.extend_from_slice(&MAGIC);
694        patch.extend_from_slice(&chunk);
695
696        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
697        let first = reader.next().expect("first chunk must be present");
698        assert!(
699            first.is_ok(),
700            "first ADIR chunk should parse cleanly: {first:?}"
701        );
702        match reader.next().expect("second call must yield an error") {
703            Err(ZiPatchError::TruncatedPatch) => {}
704            other => panic!("expected TruncatedPatch on truncated stream, got {other:?}"),
705        }
706        assert!(
707            !reader.is_complete(),
708            "is_complete must be false after truncation"
709        );
710    }
711
712    #[test]
713    fn checksum_mismatch_returns_checksum_mismatch_error() {
714        // Corrupt the CRC32 field of an otherwise valid ADIR chunk and verify
715        // that parse_chunk returns ChecksumMismatch (not a panic or a wrong error).
716        let mut adir_body = Vec::new();
717        adir_body.extend_from_slice(&4u32.to_be_bytes());
718        adir_body.extend_from_slice(b"test");
719        let mut chunk = make_chunk(b"ADIR", &adir_body);
720        // Flip the last byte of the CRC32 field.
721        let last = chunk.len() - 1;
722        chunk[last] ^= 0xFF;
723
724        let mut cur = Cursor::new(chunk);
725        let result = parse_chunk(&mut cur, true);
726        assert!(
727            matches!(result, Err(ZiPatchError::ChecksumMismatch { .. })),
728            "corrupted CRC must yield ChecksumMismatch"
729        );
730    }
731
732    #[test]
733    fn unknown_chunk_tag_returns_unknown_chunk_tag_error() {
734        // A tag of all-Z bytes is not recognised; parse_chunk must return
735        // UnknownChunkTag carrying the raw 4-byte tag.
736        let chunk = make_chunk(b"ZZZZ", &[]);
737        let mut cur = Cursor::new(chunk);
738        match parse_chunk(&mut cur, false) {
739            Err(ZiPatchError::UnknownChunkTag(tag)) => {
740                assert_eq!(tag, *b"ZZZZ", "tag bytes must be preserved in error");
741            }
742            Err(other) => panic!("expected UnknownChunkTag, got {other:?}"),
743            Ok(_) => panic!("expected UnknownChunkTag, got Ok"),
744        }
745    }
746
747    #[test]
748    fn oversized_chunk_body_len_returns_oversized_chunk_error() {
749        // body_len == u32::MAX (> 512 MiB) must be rejected before any allocation.
750        let bytes = [0xFFu8, 0xFF, 0xFF, 0xFF];
751        let mut cur = Cursor::new(&bytes[..]);
752        let Err(ZiPatchError::OversizedChunk(size)) = parse_chunk(&mut cur, false) else {
753            panic!("expected OversizedChunk for u32::MAX body_len")
754        };
755        assert!(
756            size > MAX_CHUNK_SIZE,
757            "reported size {size} must exceed MAX_CHUNK_SIZE {MAX_CHUNK_SIZE}"
758        );
759    }
760
761    // --- ZiPatchReader byte-counter and tag accessors ---
762
763    #[test]
764    fn bytes_read_starts_at_12_before_first_chunk() {
765        // The magic header is 12 bytes; bytes_read must reflect that immediately
766        // after construction, before any chunk is read.
767        let mut patch = Vec::new();
768        patch.extend_from_slice(&MAGIC);
769        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
770        let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
771        assert_eq!(
772            reader.bytes_read(),
773            12,
774            "bytes_read must be 12 (magic only) before iteration starts"
775        );
776    }
777
778    #[test]
779    fn last_tag_is_none_before_first_chunk() {
780        // Before calling next(), last_tag must be None.
781        let mut patch = Vec::new();
782        patch.extend_from_slice(&MAGIC);
783        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
784        let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
785        assert_eq!(
786            reader.last_tag(),
787            None,
788            "last_tag must be None before any chunk is read"
789        );
790    }
791
792    #[test]
793    fn bytes_read_and_last_tag_track_each_chunk_frame() {
794        // MAGIC + ADIR("a") + EOF_ — verify bytes_read grows by the exact frame
795        // size after each chunk and that last_tag follows the stream.
796        let mut adir_body = Vec::new();
797        adir_body.extend_from_slice(&1u32.to_be_bytes());
798        adir_body.extend_from_slice(b"a");
799        // ADIR frame: 4(size) + 4(tag) + 5(body) + 4(crc) = 17 bytes
800        // EOF_  frame: 4 + 4 + 0 + 4 = 12 bytes
801
802        let mut patch = Vec::new();
803        patch.extend_from_slice(&MAGIC);
804        patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
805        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
806
807        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
808        assert_eq!(reader.bytes_read(), 12, "pre-read: magic only");
809        assert_eq!(reader.last_tag(), None, "pre-read: no tag yet");
810
811        let chunk = reader.next().unwrap().unwrap();
812        assert!(
813            matches!(chunk, Chunk::AddDirectory(_)),
814            "first chunk must be ADIR"
815        );
816        assert_eq!(
817            reader.bytes_read(),
818            12 + 17,
819            "after ADIR: magic + ADIR frame"
820        );
821        assert_eq!(
822            reader.last_tag(),
823            Some(*b"ADIR"),
824            "last_tag must be ADIR after first next()"
825        );
826
827        assert!(reader.next().is_none(), "EOF_ must terminate iteration");
828        assert_eq!(
829            reader.bytes_read(),
830            12 + 17 + 12,
831            "after EOF_: magic + ADIR + EOF_ frames"
832        );
833        assert_eq!(
834            reader.last_tag(),
835            Some(*b"EOF_"),
836            "last_tag must be EOF_ after stream ends"
837        );
838        assert!(reader.is_complete(), "is_complete must be true after EOF_");
839    }
840
841    #[test]
842    fn bytes_read_is_monotonically_non_decreasing() {
843        // Stream with two ADIR chunks + EOF_ — verify bytes_read only ever
844        // increases between calls to next() and that consuming the EOF_
845        // chunk (whose body is empty but whose frame is 12 bytes) still
846        // advances the counter past the last non-EOF position.
847        let make_adir = |name: &[u8]| -> Vec<u8> {
848            let mut body = Vec::new();
849            body.extend_from_slice(&(name.len() as u32).to_be_bytes());
850            body.extend_from_slice(name);
851            make_chunk(b"ADIR", &body)
852        };
853
854        let mut patch = Vec::new();
855        patch.extend_from_slice(&MAGIC);
856        patch.extend_from_slice(&make_adir(b"a"));
857        patch.extend_from_slice(&make_adir(b"bb"));
858        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
859
860        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
861        let mut prev = reader.bytes_read();
862        while let Some(result) = reader.next() {
863            result.unwrap();
864            let current = reader.bytes_read();
865            assert!(
866                current >= prev,
867                "bytes_read must be monotonically non-decreasing: {prev} -> {current}"
868            );
869            // For ADIR chunks with non-empty bodies, the increment must be
870            // strictly positive — a body of N bytes adds N + 12 frame bytes.
871            assert!(
872                current > prev,
873                "non-empty ADIR frame must strictly advance bytes_read: \
874                 {prev} -> {current}"
875            );
876            prev = current;
877        }
878        // EOF_ has been consumed: its 12-byte empty-body frame must have
879        // pushed the counter past the previous position.
880        assert!(
881            reader.bytes_read() > prev,
882            "consuming EOF_ must advance bytes_read by its 12-byte frame: \
883             {prev} -> {}",
884            reader.bytes_read()
885        );
886    }
887
888    // --- from_path constructor ---
889
890    #[test]
891    fn from_path_opens_minimal_patch_and_reaches_eof() {
892        let mut bytes = Vec::new();
893        bytes.extend_from_slice(&MAGIC);
894        bytes.extend_from_slice(&make_chunk(b"EOF_", &[]));
895
896        let tmp = tempfile::tempdir().unwrap();
897        let file_path = tmp.path().join("test.patch");
898        std::fs::write(&file_path, &bytes).unwrap();
899
900        let mut reader =
901            ZiPatchReader::from_path(&file_path).expect("from_path must open valid patch");
902        assert!(
903            reader.next().is_none(),
904            "EOF_ must terminate iteration immediately"
905        );
906        assert!(reader.is_complete(), "is_complete must be true after EOF_");
907    }
908
909    #[test]
910    fn from_path_returns_io_error_when_file_is_missing() {
911        let tmp = tempfile::tempdir().unwrap();
912        let file_path = tmp.path().join("nonexistent.patch");
913        assert!(
914            matches!(
915                ZiPatchReader::from_path(&file_path),
916                Err(ZiPatchError::Io(_))
917            ),
918            "from_path on a missing file must return ZiPatchError::Io"
919        );
920    }
921
922    // --- Iterator fused-ness and is_complete ---
923
924    #[test]
925    fn iterator_is_fused_after_error() {
926        // Once next() yields Some(Err(_)), all subsequent calls must yield None.
927        let mut patch = Vec::new();
928        patch.extend_from_slice(&MAGIC);
929        patch.extend_from_slice(&make_chunk(b"ZZZZ", &[])); // unknown tag → error
930
931        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
932        let first = reader.next();
933        assert!(
934            matches!(first, Some(Err(ZiPatchError::UnknownChunkTag(_)))),
935            "first call must yield the error: {first:?}"
936        );
937        // All subsequent calls must return None.
938        assert!(
939            reader.next().is_none(),
940            "fused: must return None after error"
941        );
942        assert!(reader.next().is_none(), "fused: still None on third call");
943    }
944
945    #[test]
946    fn is_complete_false_until_eof_seen() {
947        let mut adir_body = Vec::new();
948        adir_body.extend_from_slice(&1u32.to_be_bytes());
949        adir_body.extend_from_slice(b"x");
950
951        let mut patch = Vec::new();
952        patch.extend_from_slice(&MAGIC);
953        patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
954        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
955
956        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
957        assert!(
958            !reader.is_complete(),
959            "not complete before reading anything"
960        );
961        reader.next().unwrap().unwrap(); // consume ADIR
962        assert!(
963            !reader.is_complete(),
964            "not complete after ADIR, before EOF_"
965        );
966        assert!(reader.next().is_none(), "EOF_ consumed");
967        assert!(reader.is_complete(), "complete after EOF_ consumed");
968    }
969}