Skip to main content

zipatch_rs/chunk/
mod.rs

1//! Wire-format chunk types and the [`ZiPatchReader`] iterator.
2//!
3//! This module is the parsing layer: it decodes the raw `ZiPatch` byte
4//! stream into a stream of typed [`Chunk`] values. Each top-level
5//! variant corresponds to one 4-byte ASCII wire tag (`FHDR`, `APLY`,
6//! `SQPK`, …); the per-variant submodules below own the binary layout for
7//! their body. Nothing in this module touches the filesystem — apply-time
8//! effects live in [`crate::apply`].
9//!
10//! The [`ZiPatchReader`] iterator validates the 12-byte file magic on
11//! construction, then yields one [`Chunk`] per [`Iterator::next`] call
12//! until the internal `EOF_` terminator is consumed or a parse error
13//! surfaces.
14
15pub(crate) mod adir;
16pub(crate) mod afsp;
17pub(crate) mod aply;
18pub(crate) mod ddir;
19pub(crate) mod fhdr;
20pub(crate) mod sqpk;
21pub(crate) mod util;
22
23pub use adir::AddDirectory;
24pub use afsp::ApplyFreeSpace;
25pub use aply::{ApplyOption, ApplyOptionKind};
26pub use ddir::DeleteDirectory;
27pub use fhdr::{FileHeader, FileHeaderV2, FileHeaderV3};
28pub use sqpk::{SqpackFile, SqpkCommand};
29// Re-export SqpkCommand sub-types so callers can match on them
30pub use sqpk::{
31    IndexCommand, SqpkAddData, SqpkCompressedBlock, SqpkDeleteData, SqpkExpandData, SqpkFile,
32    SqpkFileOperation, SqpkHeader, SqpkHeaderTarget, SqpkIndex, SqpkPatchInfo, SqpkTargetInfo,
33    TargetFileKind, TargetHeaderKind,
34};
35
36use crate::reader::ReadExt;
37use crate::{Result, ZiPatchError};
38use tracing::trace;
39
40const MAGIC: [u8; 12] = [
41    0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A,
42];
43
44const MAX_CHUNK_SIZE: usize = 512 * 1024 * 1024;
45
46/// One top-level chunk parsed from a `ZiPatch` stream.
47///
48/// Each variant corresponds to a 4-byte ASCII wire tag. The tag dispatch table
49/// mirrors the C# reference in
50/// `lib/FFXIVQuickLauncher/.../Patching/ZiPatch/Chunk/ZiPatchChunk.cs`.
51///
52/// # Observed frequency
53///
54/// SE's XIVARR+ patch files almost exclusively contain `FHDR`, `APLY`, and
55/// `SQPK` chunks. `ADIR`/`DELD` can theoretically appear and are implemented,
56/// but are rarely emitted in practice. `APFS` has never been observed in modern
57/// patches (the reference implementation treats it as a no-op). `EOF_` is
58/// consumed by [`ZiPatchReader`] and is never yielded to the caller.
59///
60/// # Exhaustiveness
61///
62/// The enum is `#[non_exhaustive]`. Match arms should include a wildcard to
63/// remain forward-compatible as new chunk types are added.
64#[non_exhaustive]
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub enum Chunk {
67    /// `FHDR` — the first chunk in every patch file; carries version and
68    /// per-version patch metadata. See [`FileHeader`] for the versioned body.
69    FileHeader(FileHeader),
70    /// `APLY` — sets or clears a boolean apply-time flag on the
71    /// [`crate::ApplyContext`] (e.g. "ignore missing files"). See [`ApplyOption`].
72    ApplyOption(ApplyOption),
73    /// `APFS` — free-space book-keeping emitted by old patcher tooling; treated
74    /// as a no-op at apply time. See [`ApplyFreeSpace`].
75    ApplyFreeSpace(ApplyFreeSpace),
76    /// `ADIR` — instructs the patcher to create a directory under the game
77    /// install root. See [`AddDirectory`].
78    AddDirectory(AddDirectory),
79    /// `DELD` — instructs the patcher to remove a directory under the game
80    /// install root. See [`DeleteDirectory`].
81    DeleteDirectory(DeleteDirectory),
82    /// `SQPK` — the workhorse chunk; wraps one of eight sub-commands that
83    /// add, delete, expand, or replace `SqPack` data. See [`SqpkCommand`].
84    Sqpk(SqpkCommand),
85    /// `EOF_` — marks the clean end of the patch stream. [`ZiPatchReader`]
86    /// consumes this chunk internally; it is never yielded to the caller.
87    EndOfFile,
88}
89
90/// One parsed chunk plus its 4-byte ASCII tag and the byte count consumed
91/// from the input stream by its frame.
92///
93/// Returned by [`parse_chunk`]. The `consumed` count is exactly the size of
94/// the chunk's on-wire frame: `4 (body_len) + 4 (tag) + body_len + 4 (crc32)`
95/// = `body_len + 12`. This is what
96/// [`ZiPatchReader`](crate::ZiPatchReader) accumulates into its running
97/// byte counter for progress reporting.
98pub(crate) struct ParsedChunk {
99    pub(crate) chunk: Chunk,
100    pub(crate) tag: [u8; 4],
101    pub(crate) consumed: u64,
102}
103
104/// Parse one chunk frame from `r`.
105///
106/// # Wire framing
107///
108/// Each chunk is laid out as:
109///
110/// ```text
111/// [body_len: u32 BE] [tag: 4 bytes] [body: body_len bytes] [crc32: u32 BE]
112/// ```
113///
114/// The CRC32 is computed over `tag ++ body` (not over `body_len`), matching
115/// the C# `ChecksumBinaryReader` in the `XIVLauncher` reference. When
116/// `verify_checksums` is `true` and the stored CRC does not match the computed
117/// one, [`ZiPatchError::ChecksumMismatch`] is returned.
118///
119/// # Errors
120///
121/// - [`ZiPatchError::TruncatedPatch`] — the reader returns EOF while reading
122///   the `body_len` field (i.e. no more chunks are present but `EOF_` was
123///   never seen).
124/// - [`ZiPatchError::OversizedChunk`] — `body_len` exceeds 512 MiB.
125/// - [`ZiPatchError::ChecksumMismatch`] — CRC32 mismatch (only when
126///   `verify_checksums` is `true`).
127/// - [`ZiPatchError::UnknownChunkTag`] — tag is not recognised.
128/// - [`ZiPatchError::Io`] — any other I/O failure reading from `r`.
129pub(crate) fn parse_chunk<R: std::io::Read>(
130    r: &mut R,
131    verify_checksums: bool,
132) -> Result<ParsedChunk> {
133    let size = match r.read_u32_be() {
134        Ok(s) => s as usize,
135        Err(ZiPatchError::Io(e)) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
136            return Err(ZiPatchError::TruncatedPatch);
137        }
138        Err(e) => return Err(e),
139    };
140    if size > MAX_CHUNK_SIZE {
141        return Err(ZiPatchError::OversizedChunk(size));
142    }
143
144    // Tag (4 B) and CRC (4 B) are always present regardless of body shape.
145    let mut tag = [0u8; 4];
146    r.read_exact(&mut tag)?;
147
148    // Peek at the first 5 bytes of the body without committing to either the
149    // generic single-allocation path or the SQPK `A` zero-copy-into-data path.
150    // For SQPK chunks, those 5 bytes are `[inner_size: i32 BE][sub_cmd: u8]`.
151    // For chunks with bodies shorter than 5 bytes (e.g. `EOF_`), we still read
152    // exactly `size` bytes into the prefix array and leave the rest zero.
153    let mut prefix = [0u8; 5];
154    let prefix_len = size.min(5);
155    if prefix_len > 0 {
156        r.read_exact(&mut prefix[..prefix_len])?;
157    }
158
159    // ---- Fast path: SQPK `A` (SqpkAddData) — see `parse_sqpk_add_data_fast`. ----
160    if &tag == b"SQPK" && size >= 5 + SQPK_ADDDATA_HEADER_SIZE && prefix[4] == b'A' {
161        return parse_sqpk_add_data_fast(r, tag, prefix, size, verify_checksums);
162    }
163
164    // ---- Generic path: one allocation for the whole body. ----
165    let mut body_vec = vec![0u8; size];
166    body_vec[..prefix_len].copy_from_slice(&prefix[..prefix_len]);
167    if size > prefix_len {
168        r.read_exact(&mut body_vec[prefix_len..])?;
169    }
170
171    let mut crc_buf = [0u8; 4];
172    r.read_exact(&mut crc_buf)?;
173    let expected_crc = u32::from_be_bytes(crc_buf);
174
175    if verify_checksums {
176        let mut hasher = crc32fast::Hasher::new();
177        hasher.update(&tag);
178        hasher.update(&body_vec);
179        let actual_crc = hasher.finalize();
180        if actual_crc != expected_crc {
181            return Err(ZiPatchError::ChecksumMismatch {
182                tag,
183                expected: expected_crc,
184                actual: actual_crc,
185            });
186        }
187    }
188
189    trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
190
191    // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
192    let consumed = (size as u64) + 12;
193
194    let body = &body_vec[..];
195
196    let chunk = match &tag {
197        b"EOF_" => Chunk::EndOfFile,
198        b"FHDR" => Chunk::FileHeader(fhdr::parse(body)?),
199        b"APLY" => Chunk::ApplyOption(aply::parse(body)?),
200        b"APFS" => Chunk::ApplyFreeSpace(afsp::parse(body)?),
201        b"ADIR" => Chunk::AddDirectory(adir::parse(body)?),
202        b"DELD" => Chunk::DeleteDirectory(ddir::parse(body)?),
203        b"SQPK" => Chunk::Sqpk(sqpk::parse_sqpk(body)?),
204        _ => return Err(ZiPatchError::UnknownChunkTag(tag)),
205    };
206
207    Ok(ParsedChunk {
208        chunk,
209        tag,
210        consumed,
211    })
212}
213
214// Size of the SqpkAddData fixed header that precedes the inline data payload.
215// Mirrors `add_data::SqpkAddData::DATA_SOURCE_OFFSET` (23) without taking a
216// `u64` round-trip; kept private to the framing path.
217const SQPK_ADDDATA_HEADER_SIZE: usize = 23;
218
219/// Fast path for SQPK `A` (`SqpkAddData`) chunks.
220///
221/// `AddData` is the largest chunk type by byte volume — payloads of hundreds of
222/// KB to MB are typical. The generic framing path allocates one `Vec<u8>` of
223/// `size` for the whole body, then `binrw`'s derived parser allocates a second
224/// `Vec<u8>` of exactly `data_bytes` and memcpys the inline payload into it.
225/// That second allocation + memcpy dominates parse time for `AddData`.
226///
227/// This function reads the `AddData` fixed header into a stack array, parses
228/// the seven fields directly, allocates the `data` payload at its exact size,
229/// and `read_exact`s the source bytes straight into it — one allocation, no
230/// intermediate copy of the payload.
231///
232/// On entry: `tag` and the 5-byte `prefix` (SQPK `inner_size` + sub-command
233/// byte) have already been consumed from `r`. The remaining bytes are
234/// `[fixed_header: 23 B][data: data_bytes][crc32: 4 B]`.
235fn parse_sqpk_add_data_fast<R: std::io::Read>(
236    r: &mut R,
237    tag: [u8; 4],
238    prefix: [u8; 5],
239    size: usize,
240    verify_checksums: bool,
241) -> Result<ParsedChunk> {
242    // Validate the SQPK inner_size against the outer chunk size, matching the
243    // check in `sqpk::parse_sqpk` so callers see byte-identical error behaviour.
244    let inner_size = i32::from_be_bytes([prefix[0], prefix[1], prefix[2], prefix[3]]) as usize;
245    if inner_size != size {
246        return Err(ZiPatchError::InvalidField {
247            context: "SQPK inner size mismatch",
248        });
249    }
250
251    let mut header = [0u8; SQPK_ADDDATA_HEADER_SIZE];
252    r.read_exact(&mut header)?;
253
254    // SqpkAddData fixed-header layout (all big-endian):
255    //   [0..3]   pad
256    //   [3..5]   main_id   u16
257    //   [5..7]   sub_id    u16
258    //   [7..11]  file_id   u32
259    //   [11..15] block_offset_raw  u32 (<< 7 = bytes)
260    //   [15..19] data_bytes_raw    u32 (<< 7 = bytes)
261    //   [19..23] block_delete_raw  u32 (<< 7 = bytes)
262    let main_id = u16::from_be_bytes([header[3], header[4]]);
263    let sub_id = u16::from_be_bytes([header[5], header[6]]);
264    let file_id = u32::from_be_bytes([header[7], header[8], header[9], header[10]]);
265    let block_offset_raw = u32::from_be_bytes([header[11], header[12], header[13], header[14]]);
266    let data_bytes_raw = u32::from_be_bytes([header[15], header[16], header[17], header[18]]);
267    let block_delete_raw = u32::from_be_bytes([header[19], header[20], header[21], header[22]]);
268
269    let block_offset = (block_offset_raw as u64) << 7;
270    let data_bytes = (data_bytes_raw as u64) << 7;
271    let block_delete_number = (block_delete_raw as u64) << 7;
272
273    // The declared payload length must fit exactly within the chunk body:
274    //   size = 5 (inner_size + sub_cmd) + 23 (fixed header) + data_bytes
275    let expected_data = size - 5 - SQPK_ADDDATA_HEADER_SIZE;
276    if data_bytes as usize != expected_data {
277        return Err(ZiPatchError::InvalidField {
278            context: "SqpkAddData data_bytes does not match SQPK body length",
279        });
280    }
281
282    let mut data = vec![0u8; data_bytes as usize];
283    r.read_exact(&mut data)?;
284
285    let mut crc_buf = [0u8; 4];
286    r.read_exact(&mut crc_buf)?;
287    let expected_crc = u32::from_be_bytes(crc_buf);
288
289    if verify_checksums {
290        // CRC is over `tag ++ body`. The body is split across three disjoint
291        // buffers — feed each segment to the incremental hasher.
292        let mut hasher = crc32fast::Hasher::new();
293        hasher.update(&tag);
294        hasher.update(&prefix);
295        hasher.update(&header);
296        hasher.update(&data);
297        let actual_crc = hasher.finalize();
298        if actual_crc != expected_crc {
299            return Err(ZiPatchError::ChecksumMismatch {
300                tag,
301                expected: expected_crc,
302                actual: actual_crc,
303            });
304        }
305    }
306
307    trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
308
309    let chunk = Chunk::Sqpk(sqpk::SqpkCommand::AddData(Box::new(sqpk::SqpkAddData {
310        target_file: sqpk::SqpackFile {
311            main_id,
312            sub_id,
313            file_id,
314        },
315        block_offset,
316        data_bytes,
317        block_delete_number,
318        data,
319    })));
320
321    // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
322    let consumed = (size as u64) + 12;
323
324    Ok(ParsedChunk {
325        chunk,
326        tag,
327        consumed,
328    })
329}
330
331/// Iterator over the [`Chunk`]s in a `ZiPatch` stream.
332///
333/// `ZiPatchReader` wraps any [`std::io::Read`] source and yields one
334/// [`Chunk`] per call to [`Iterator::next`]. It validates the 12-byte file
335/// magic on construction, then reads chunks sequentially until the `EOF_`
336/// terminator is encountered or an error occurs.
337///
338/// # Stream contract
339///
340/// - **Magic** — the first 12 bytes must be `\x91ZIPATCH\r\n\x1a\n`. Any
341///   mismatch returns [`ZiPatchError::InvalidMagic`] from [`ZiPatchReader::new`].
342/// - **Framing** — every chunk is a length-prefixed frame:
343///   `[body_len: u32 BE] [tag: 4 B] [body: body_len B] [crc32: u32 BE]`.
344/// - **CRC32** — computed over `tag ++ body`. Verification is enabled by
345///   default; use [`ZiPatchReader::skip_checksum_verification`] to disable it.
346/// - **Termination** — the `EOF_` chunk is consumed internally and causes
347///   the iterator to return `None`. Call [`ZiPatchReader::is_complete`] after
348///   iteration to distinguish a clean end from a truncated stream.
349/// - **Fused** — once `None` is returned (either from `EOF_` or an error),
350///   subsequent calls to `next` also return `None`. The iterator implements
351///   [`std::iter::FusedIterator`].
352///
353/// # Errors
354///
355/// Each call to [`Iterator::next`] returns `Some(Err(e))` on parse failure,
356/// then `None` on all future calls. Possible errors include:
357/// - [`ZiPatchError::TruncatedPatch`] — stream ended before `EOF_`.
358/// - [`ZiPatchError::OversizedChunk`] — a declared chunk body exceeds 512 MiB.
359/// - [`ZiPatchError::ChecksumMismatch`] — CRC32 verification failed.
360/// - [`ZiPatchError::UnknownChunkTag`] — unrecognised 4-byte tag.
361/// - [`ZiPatchError::Io`] — underlying I/O failure.
362///
363/// # Example
364///
365/// Build a minimal in-memory patch (magic + `ADIR` + `EOF_`) and iterate it:
366///
367/// ```rust
368/// use std::io::Cursor;
369/// use zipatch_rs::{Chunk, ZiPatchReader};
370///
371/// // Helper: wrap tag + body into a correctly framed chunk with CRC32.
372/// fn make_chunk(tag: &[u8; 4], body: &[u8]) -> Vec<u8> {
373///     let mut crc_input = Vec::new();
374///     crc_input.extend_from_slice(tag);
375///     crc_input.extend_from_slice(body);
376///     let crc = crc32fast::hash(&crc_input);
377///
378///     let mut out = Vec::new();
379///     out.extend_from_slice(&(body.len() as u32).to_be_bytes());
380///     out.extend_from_slice(tag);
381///     out.extend_from_slice(body);
382///     out.extend_from_slice(&crc.to_be_bytes());
383///     out
384/// }
385///
386/// // 12-byte ZiPatch magic.
387/// let magic: [u8; 12] = [0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A];
388///
389/// // ADIR body: u32 BE name_len (7) + b"created".
390/// let mut adir_body = Vec::new();
391/// adir_body.extend_from_slice(&7u32.to_be_bytes());
392/// adir_body.extend_from_slice(b"created");
393///
394/// let mut patch = Vec::new();
395/// patch.extend_from_slice(&magic);
396/// patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
397/// patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
398///
399/// let chunks: Vec<_> = ZiPatchReader::new(Cursor::new(patch))
400///     .unwrap()
401///     .collect::<Result<_, _>>()
402///     .unwrap();
403///
404/// assert_eq!(chunks.len(), 1);
405/// assert!(matches!(chunks[0], Chunk::AddDirectory(_)));
406/// ```
407#[derive(Debug)]
408pub struct ZiPatchReader<R> {
409    inner: std::io::BufReader<R>,
410    done: bool,
411    verify_checksums: bool,
412    eof_seen: bool,
413    // Running total of bytes consumed from `inner`, including the 12-byte
414    // magic header. Updated after each successful `parse_chunk` call.
415    // Exposed via `bytes_read()` so the apply driver can fire monotonic
416    // progress events without instrumenting the underlying `Read` source.
417    bytes_read: u64,
418    // 4-byte ASCII tag of the most recently yielded chunk. `None` before the
419    // first successful `next()` and after iteration completes. Used by
420    // `apply_to` to attach the tag to per-chunk progress events without
421    // re-matching on the `Chunk` enum.
422    last_tag: Option<[u8; 4]>,
423}
424
425impl<R: std::io::Read> ZiPatchReader<R> {
426    /// Wrap `reader` and validate the leading 12-byte `ZiPatch` magic.
427    ///
428    /// Consumes exactly 12 bytes from `reader`. The magic is the byte sequence
429    /// `0x91 0x5A 0x49 0x50 0x41 0x54 0x43 0x48 0x0D 0x0A 0x1A 0x0A`
430    /// (i.e. `\x91ZIPATCH\r\n\x1a\n`).
431    ///
432    /// The reader is wrapped in a [`std::io::BufReader`] internally, so the
433    /// many small typed reads the chunk parser issues (4-byte size, 4-byte
434    /// tag, 5-byte SQPK prefix, …) coalesce into a small number of syscalls.
435    /// Callers do not need to pre-wrap a raw [`std::fs::File`] or other
436    /// unbuffered source.
437    ///
438    /// CRC32 verification is **enabled** by default. Call
439    /// [`ZiPatchReader::skip_checksum_verification`] before iterating to
440    /// disable it.
441    ///
442    /// # Errors
443    ///
444    /// - [`ZiPatchError::InvalidMagic`] — the first 12 bytes do not match the
445    ///   expected magic.
446    /// - [`ZiPatchError::Io`] — an I/O error occurred while reading the magic.
447    pub fn new(reader: R) -> Result<Self> {
448        let mut reader = std::io::BufReader::new(reader);
449        let magic = reader.read_exact_vec(12)?;
450        if magic.as_slice() != MAGIC {
451            return Err(ZiPatchError::InvalidMagic);
452        }
453        Ok(Self {
454            inner: reader,
455            done: false,
456            verify_checksums: true,
457            eof_seen: false,
458            // The 12-byte magic header has already been consumed.
459            bytes_read: 12,
460            last_tag: None,
461        })
462    }
463
464    /// Enable per-chunk CRC32 verification (the default).
465    ///
466    /// This is the default state after [`ZiPatchReader::new`]. Calling this
467    /// method after construction is only necessary if
468    /// [`ZiPatchReader::skip_checksum_verification`] was previously called.
469    #[must_use]
470    pub fn verify_checksums(mut self) -> Self {
471        self.verify_checksums = true;
472        self
473    }
474
475    /// Disable per-chunk CRC32 verification.
476    ///
477    /// Useful when the source has already been verified out-of-band (e.g. a
478    /// download hash was checked before the file was opened), or when
479    /// processing known-good test data where the overhead is unnecessary.
480    #[must_use]
481    pub fn skip_checksum_verification(mut self) -> Self {
482        self.verify_checksums = false;
483        self
484    }
485
486    /// Returns `true` if iteration reached the `EOF_` terminator cleanly.
487    ///
488    /// A `false` return after `next()` yields `None` indicates the stream was
489    /// truncated — the download or file copy was incomplete. In that case the
490    /// iterator stopped because of a [`ZiPatchError::TruncatedPatch`] error,
491    /// not because the patch finished normally.
492    pub fn is_complete(&self) -> bool {
493        self.eof_seen
494    }
495
496    /// Returns the running total of bytes consumed from the patch stream.
497    ///
498    /// Starts at `12` after [`ZiPatchReader::new`] (the magic header has been
499    /// read) and increases monotonically by the size of each chunk's wire
500    /// frame after each successful [`Iterator::next`] call. Includes the
501    /// `EOF_` terminator's frame.
502    ///
503    /// On parse error, the counter is **not** advanced past the failing
504    /// chunk — it reflects the byte offset at the start of that chunk's
505    /// length prefix, not the broken position somewhere inside its frame.
506    /// Use this offset together with the surfaced error to point a user at
507    /// where the patch became unreadable.
508    ///
509    /// This is the same counter that the
510    /// [`apply_to`](crate::ZiPatchReader::apply_to) driver attaches to
511    /// [`ChunkEvent::bytes_read`](crate::ChunkEvent::bytes_read) when firing
512    /// progress events. Useful for the `bytes_applied / total_patch_size`
513    /// ratio in a progress bar.
514    #[must_use]
515    pub fn bytes_read(&self) -> u64 {
516        self.bytes_read
517    }
518
519    /// Returns the 4-byte ASCII tag of the most recently yielded chunk.
520    ///
521    /// `None` before the first successful [`Iterator::next`] call and after
522    /// the `EOF_` terminator has been consumed (or an error has been
523    /// surfaced). Used by [`apply_to`](crate::ZiPatchReader::apply_to) to
524    /// populate [`ChunkEvent::kind`](crate::ChunkEvent::kind).
525    #[must_use]
526    pub fn last_tag(&self) -> Option<[u8; 4]> {
527        self.last_tag
528    }
529}
530
531impl ZiPatchReader<std::io::BufReader<std::fs::File>> {
532    /// Open the file at `path`, wrap it in a [`std::io::BufReader`], and
533    /// validate the `ZiPatch` magic.
534    ///
535    /// This is a convenience constructor equivalent to:
536    ///
537    /// ```rust,no_run
538    /// # use std::io::BufReader;
539    /// # use std::fs::File;
540    /// # use zipatch_rs::ZiPatchReader;
541    /// let reader = ZiPatchReader::new(BufReader::new(File::open("patch.patch").unwrap())).unwrap();
542    /// ```
543    ///
544    /// # Errors
545    ///
546    /// - [`ZiPatchError::Io`] — the file could not be opened.
547    /// - [`ZiPatchError::InvalidMagic`] — the file does not start with the
548    ///   `ZiPatch` magic bytes.
549    pub fn from_path(path: impl AsRef<std::path::Path>) -> crate::Result<Self> {
550        let file = std::fs::File::open(path)?;
551        Self::new(std::io::BufReader::new(file))
552    }
553}
554
555impl<R: std::io::Read> Iterator for ZiPatchReader<R> {
556    type Item = Result<Chunk>;
557
558    fn next(&mut self) -> Option<Self::Item> {
559        if self.done {
560            return None;
561        }
562        match parse_chunk(&mut self.inner, self.verify_checksums) {
563            Ok(ParsedChunk {
564                chunk: Chunk::EndOfFile,
565                tag,
566                consumed,
567            }) => {
568                self.bytes_read += consumed;
569                self.last_tag = Some(tag);
570                self.done = true;
571                self.eof_seen = true;
572                None
573            }
574            Ok(ParsedChunk {
575                chunk,
576                tag,
577                consumed,
578            }) => {
579                self.bytes_read += consumed;
580                self.last_tag = Some(tag);
581                Some(Ok(chunk))
582            }
583            Err(e) => {
584                self.done = true;
585                Some(Err(e))
586            }
587        }
588    }
589}
590
591impl<R: std::io::Read> std::iter::FusedIterator for ZiPatchReader<R> {}
592
593#[cfg(test)]
594mod tests {
595    use super::*;
596    use crate::test_utils::make_chunk;
597    use std::io::Cursor;
598
599    // --- parse_chunk error paths ---
600
601    #[test]
602    fn truncated_at_chunk_boundary_yields_truncated_patch() {
603        // Magic + no chunks: parse_chunk must see EOF on the body_len read and
604        // convert it to TruncatedPatch.  This exercises the
605        // `Err(ZiPatchError::Io(e)) if e.kind() == UnexpectedEof` arm at
606        // chunk/mod.rs line 121.
607        let mut patch = Vec::new();
608        patch.extend_from_slice(&MAGIC);
609        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
610        match reader
611            .next()
612            .expect("iterator must yield an error, not None")
613        {
614            Err(ZiPatchError::TruncatedPatch) => {}
615            other => panic!("expected TruncatedPatch, got {other:?}"),
616        }
617        assert!(!reader.is_complete(), "stream is not clean-ended");
618    }
619
620    #[test]
621    fn non_eof_io_error_on_body_len_read_propagates_as_io() {
622        // Exercises the `Err(e) => return Err(e)` arm at line 124: an I/O
623        // error that is NOT UnexpectedEof must propagate verbatim.
624        // We trigger this by passing a reader that errors immediately.
625        struct BrokenReader;
626        impl std::io::Read for BrokenReader {
627            fn read(&mut self, _: &mut [u8]) -> std::io::Result<usize> {
628                Err(std::io::Error::new(
629                    std::io::ErrorKind::BrokenPipe,
630                    "simulated broken pipe",
631                ))
632            }
633        }
634        let result = parse_chunk(&mut BrokenReader, false);
635        match result {
636            Err(ZiPatchError::Io(e)) => {
637                assert_eq!(
638                    e.kind(),
639                    std::io::ErrorKind::BrokenPipe,
640                    "non-EOF I/O error must propagate unchanged, got kind {:?}",
641                    e.kind()
642                );
643            }
644            Err(other) => panic!("expected ZiPatchError::Io(BrokenPipe), got {other:?}"),
645            Ok(_) => panic!("expected an error, got Ok"),
646        }
647    }
648
649    #[test]
650    fn truncated_after_one_chunk_yields_truncated_patch() {
651        // Magic + one well-formed ADIR + no more bytes: the second call to
652        // next() must surface TruncatedPatch, not None.
653        let mut adir_body = Vec::new();
654        adir_body.extend_from_slice(&4u32.to_be_bytes());
655        adir_body.extend_from_slice(b"test");
656        let chunk = make_chunk(b"ADIR", &adir_body);
657
658        let mut patch = Vec::new();
659        patch.extend_from_slice(&MAGIC);
660        patch.extend_from_slice(&chunk);
661
662        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
663        let first = reader.next().expect("first chunk must be present");
664        assert!(
665            first.is_ok(),
666            "first ADIR chunk should parse cleanly: {first:?}"
667        );
668        match reader.next().expect("second call must yield an error") {
669            Err(ZiPatchError::TruncatedPatch) => {}
670            other => panic!("expected TruncatedPatch on truncated stream, got {other:?}"),
671        }
672        assert!(
673            !reader.is_complete(),
674            "is_complete must be false after truncation"
675        );
676    }
677
678    #[test]
679    fn checksum_mismatch_returns_checksum_mismatch_error() {
680        // Corrupt the CRC32 field of an otherwise valid ADIR chunk and verify
681        // that parse_chunk returns ChecksumMismatch (not a panic or a wrong error).
682        let mut adir_body = Vec::new();
683        adir_body.extend_from_slice(&4u32.to_be_bytes());
684        adir_body.extend_from_slice(b"test");
685        let mut chunk = make_chunk(b"ADIR", &adir_body);
686        // Flip the last byte of the CRC32 field.
687        let last = chunk.len() - 1;
688        chunk[last] ^= 0xFF;
689
690        let mut cur = Cursor::new(chunk);
691        let result = parse_chunk(&mut cur, true);
692        assert!(
693            matches!(result, Err(ZiPatchError::ChecksumMismatch { .. })),
694            "corrupted CRC must yield ChecksumMismatch"
695        );
696    }
697
698    #[test]
699    fn unknown_chunk_tag_returns_unknown_chunk_tag_error() {
700        // A tag of all-Z bytes is not recognised; parse_chunk must return
701        // UnknownChunkTag carrying the raw 4-byte tag.
702        let chunk = make_chunk(b"ZZZZ", &[]);
703        let mut cur = Cursor::new(chunk);
704        match parse_chunk(&mut cur, false) {
705            Err(ZiPatchError::UnknownChunkTag(tag)) => {
706                assert_eq!(tag, *b"ZZZZ", "tag bytes must be preserved in error");
707            }
708            Err(other) => panic!("expected UnknownChunkTag, got {other:?}"),
709            Ok(_) => panic!("expected UnknownChunkTag, got Ok"),
710        }
711    }
712
713    #[test]
714    fn oversized_chunk_body_len_returns_oversized_chunk_error() {
715        // body_len == u32::MAX (> 512 MiB) must be rejected before any allocation.
716        let bytes = [0xFFu8, 0xFF, 0xFF, 0xFF];
717        let mut cur = Cursor::new(&bytes[..]);
718        let Err(ZiPatchError::OversizedChunk(size)) = parse_chunk(&mut cur, false) else {
719            panic!("expected OversizedChunk for u32::MAX body_len")
720        };
721        assert!(
722            size > MAX_CHUNK_SIZE,
723            "reported size {size} must exceed MAX_CHUNK_SIZE {MAX_CHUNK_SIZE}"
724        );
725    }
726
727    // --- ZiPatchReader byte-counter and tag accessors ---
728
729    #[test]
730    fn bytes_read_starts_at_12_before_first_chunk() {
731        // The magic header is 12 bytes; bytes_read must reflect that immediately
732        // after construction, before any chunk is read.
733        let mut patch = Vec::new();
734        patch.extend_from_slice(&MAGIC);
735        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
736        let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
737        assert_eq!(
738            reader.bytes_read(),
739            12,
740            "bytes_read must be 12 (magic only) before iteration starts"
741        );
742    }
743
744    #[test]
745    fn last_tag_is_none_before_first_chunk() {
746        // Before calling next(), last_tag must be None.
747        let mut patch = Vec::new();
748        patch.extend_from_slice(&MAGIC);
749        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
750        let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
751        assert_eq!(
752            reader.last_tag(),
753            None,
754            "last_tag must be None before any chunk is read"
755        );
756    }
757
758    #[test]
759    fn bytes_read_and_last_tag_track_each_chunk_frame() {
760        // MAGIC + ADIR("a") + EOF_ — verify bytes_read grows by the exact frame
761        // size after each chunk and that last_tag follows the stream.
762        let mut adir_body = Vec::new();
763        adir_body.extend_from_slice(&1u32.to_be_bytes());
764        adir_body.extend_from_slice(b"a");
765        // ADIR frame: 4(size) + 4(tag) + 5(body) + 4(crc) = 17 bytes
766        // EOF_  frame: 4 + 4 + 0 + 4 = 12 bytes
767
768        let mut patch = Vec::new();
769        patch.extend_from_slice(&MAGIC);
770        patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
771        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
772
773        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
774        assert_eq!(reader.bytes_read(), 12, "pre-read: magic only");
775        assert_eq!(reader.last_tag(), None, "pre-read: no tag yet");
776
777        let chunk = reader.next().unwrap().unwrap();
778        assert!(
779            matches!(chunk, Chunk::AddDirectory(_)),
780            "first chunk must be ADIR"
781        );
782        assert_eq!(
783            reader.bytes_read(),
784            12 + 17,
785            "after ADIR: magic + ADIR frame"
786        );
787        assert_eq!(
788            reader.last_tag(),
789            Some(*b"ADIR"),
790            "last_tag must be ADIR after first next()"
791        );
792
793        assert!(reader.next().is_none(), "EOF_ must terminate iteration");
794        assert_eq!(
795            reader.bytes_read(),
796            12 + 17 + 12,
797            "after EOF_: magic + ADIR + EOF_ frames"
798        );
799        assert_eq!(
800            reader.last_tag(),
801            Some(*b"EOF_"),
802            "last_tag must be EOF_ after stream ends"
803        );
804        assert!(reader.is_complete(), "is_complete must be true after EOF_");
805    }
806
807    #[test]
808    fn bytes_read_is_monotonically_non_decreasing() {
809        // Stream with two ADIR chunks + EOF_ — verify bytes_read only ever
810        // increases between calls to next() and that consuming the EOF_
811        // chunk (whose body is empty but whose frame is 12 bytes) still
812        // advances the counter past the last non-EOF position.
813        let make_adir = |name: &[u8]| -> Vec<u8> {
814            let mut body = Vec::new();
815            body.extend_from_slice(&(name.len() as u32).to_be_bytes());
816            body.extend_from_slice(name);
817            make_chunk(b"ADIR", &body)
818        };
819
820        let mut patch = Vec::new();
821        patch.extend_from_slice(&MAGIC);
822        patch.extend_from_slice(&make_adir(b"a"));
823        patch.extend_from_slice(&make_adir(b"bb"));
824        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
825
826        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
827        let mut prev = reader.bytes_read();
828        while let Some(result) = reader.next() {
829            result.unwrap();
830            let current = reader.bytes_read();
831            assert!(
832                current >= prev,
833                "bytes_read must be monotonically non-decreasing: {prev} -> {current}"
834            );
835            // For ADIR chunks with non-empty bodies, the increment must be
836            // strictly positive — a body of N bytes adds N + 12 frame bytes.
837            assert!(
838                current > prev,
839                "non-empty ADIR frame must strictly advance bytes_read: \
840                 {prev} -> {current}"
841            );
842            prev = current;
843        }
844        // EOF_ has been consumed: its 12-byte empty-body frame must have
845        // pushed the counter past the previous position.
846        assert!(
847            reader.bytes_read() > prev,
848            "consuming EOF_ must advance bytes_read by its 12-byte frame: \
849             {prev} -> {}",
850            reader.bytes_read()
851        );
852    }
853
854    // --- from_path constructor ---
855
856    #[test]
857    fn from_path_opens_minimal_patch_and_reaches_eof() {
858        let mut bytes = Vec::new();
859        bytes.extend_from_slice(&MAGIC);
860        bytes.extend_from_slice(&make_chunk(b"EOF_", &[]));
861
862        let tmp = tempfile::tempdir().unwrap();
863        let file_path = tmp.path().join("test.patch");
864        std::fs::write(&file_path, &bytes).unwrap();
865
866        let mut reader =
867            ZiPatchReader::from_path(&file_path).expect("from_path must open valid patch");
868        assert!(
869            reader.next().is_none(),
870            "EOF_ must terminate iteration immediately"
871        );
872        assert!(reader.is_complete(), "is_complete must be true after EOF_");
873    }
874
875    #[test]
876    fn from_path_returns_io_error_when_file_is_missing() {
877        let tmp = tempfile::tempdir().unwrap();
878        let file_path = tmp.path().join("nonexistent.patch");
879        assert!(
880            matches!(
881                ZiPatchReader::from_path(&file_path),
882                Err(ZiPatchError::Io(_))
883            ),
884            "from_path on a missing file must return ZiPatchError::Io"
885        );
886    }
887
888    // --- Iterator fused-ness and is_complete ---
889
890    #[test]
891    fn iterator_is_fused_after_error() {
892        // Once next() yields Some(Err(_)), all subsequent calls must yield None.
893        let mut patch = Vec::new();
894        patch.extend_from_slice(&MAGIC);
895        patch.extend_from_slice(&make_chunk(b"ZZZZ", &[])); // unknown tag → error
896
897        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
898        let first = reader.next();
899        assert!(
900            matches!(first, Some(Err(ZiPatchError::UnknownChunkTag(_)))),
901            "first call must yield the error: {first:?}"
902        );
903        // All subsequent calls must return None.
904        assert!(
905            reader.next().is_none(),
906            "fused: must return None after error"
907        );
908        assert!(reader.next().is_none(), "fused: still None on third call");
909    }
910
911    #[test]
912    fn is_complete_false_until_eof_seen() {
913        let mut adir_body = Vec::new();
914        adir_body.extend_from_slice(&1u32.to_be_bytes());
915        adir_body.extend_from_slice(b"x");
916
917        let mut patch = Vec::new();
918        patch.extend_from_slice(&MAGIC);
919        patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
920        patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
921
922        let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
923        assert!(
924            !reader.is_complete(),
925            "not complete before reading anything"
926        );
927        reader.next().unwrap().unwrap(); // consume ADIR
928        assert!(
929            !reader.is_complete(),
930            "not complete after ADIR, before EOF_"
931        );
932        assert!(reader.next().is_none(), "EOF_ consumed");
933        assert!(reader.is_complete(), "complete after EOF_ consumed");
934    }
935}