zipatch_rs/chunk/mod.rs
1//! Wire-format chunk types and the [`ZiPatchReader`] iterator.
2//!
3//! This module is the parsing layer: it decodes the raw `ZiPatch` byte
4//! stream into a stream of typed [`Chunk`] values. Each top-level
5//! variant corresponds to one 4-byte ASCII wire tag (`FHDR`, `APLY`,
6//! `SQPK`, …); the per-variant submodules below own the binary layout for
7//! their body. Nothing in this module touches the filesystem — apply-time
8//! effects live in [`crate::apply`].
9//!
10//! The [`ZiPatchReader`] iterator validates the 12-byte file magic on
11//! construction, then yields one [`Chunk`] per [`Iterator::next`] call
12//! until the internal `EOF_` terminator is consumed or a parse error
13//! surfaces.
14
15pub(crate) mod adir;
16pub(crate) mod afsp;
17pub(crate) mod aply;
18pub(crate) mod ddir;
19pub(crate) mod fhdr;
20pub(crate) mod sqpk;
21pub(crate) mod util;
22
23pub use adir::AddDirectory;
24pub use afsp::ApplyFreeSpace;
25pub use aply::{ApplyOption, ApplyOptionKind};
26pub use ddir::DeleteDirectory;
27pub use fhdr::{FileHeader, FileHeaderV2, FileHeaderV3};
28pub use sqpk::{SqpackFile, SqpkCommand};
29// Re-export SqpkCommand sub-types so callers can match on them
30pub use sqpk::{
31 IndexCommand, SqpkAddData, SqpkCompressedBlock, SqpkDeleteData, SqpkExpandData, SqpkFile,
32 SqpkFileOperation, SqpkHeader, SqpkHeaderTarget, SqpkIndex, SqpkPatchInfo, SqpkTargetInfo,
33 TargetFileKind, TargetHeaderKind,
34};
35
36use crate::reader::ReadExt;
37use crate::{Result, ZiPatchError};
38use tracing::trace;
39
40const MAGIC: [u8; 12] = [
41 0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A,
42];
43
44const MAX_CHUNK_SIZE: usize = 512 * 1024 * 1024;
45
46/// One top-level chunk parsed from a `ZiPatch` stream.
47///
48/// Each variant corresponds to a 4-byte ASCII wire tag. The tag dispatch table
49/// mirrors the C# reference in
50/// `lib/FFXIVQuickLauncher/.../Patching/ZiPatch/Chunk/ZiPatchChunk.cs`.
51///
52/// # Observed frequency
53///
54/// SE's XIVARR+ patch files almost exclusively contain `FHDR`, `APLY`, and
55/// `SQPK` chunks. `ADIR`/`DELD` can theoretically appear and are implemented,
56/// but are rarely emitted in practice. `APFS` has never been observed in modern
57/// patches (the reference implementation treats it as a no-op). `EOF_` is
58/// consumed by [`ZiPatchReader`] and is never yielded to the caller.
59///
60/// # Exhaustiveness
61///
62/// The enum is `#[non_exhaustive]`. Match arms should include a wildcard to
63/// remain forward-compatible as new chunk types are added.
64#[non_exhaustive]
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub enum Chunk {
67 /// `FHDR` — the first chunk in every patch file; carries version and
68 /// per-version patch metadata. See [`FileHeader`] for the versioned body.
69 FileHeader(FileHeader),
70 /// `APLY` — sets or clears a boolean apply-time flag on the
71 /// [`crate::ApplyContext`] (e.g. "ignore missing files"). See [`ApplyOption`].
72 ApplyOption(ApplyOption),
73 /// `APFS` — free-space book-keeping emitted by old patcher tooling; treated
74 /// as a no-op at apply time. See [`ApplyFreeSpace`].
75 ApplyFreeSpace(ApplyFreeSpace),
76 /// `ADIR` — instructs the patcher to create a directory under the game
77 /// install root. See [`AddDirectory`].
78 AddDirectory(AddDirectory),
79 /// `DELD` — instructs the patcher to remove a directory under the game
80 /// install root. See [`DeleteDirectory`].
81 DeleteDirectory(DeleteDirectory),
82 /// `SQPK` — the workhorse chunk; wraps one of eight sub-commands that
83 /// add, delete, expand, or replace `SqPack` data. See [`SqpkCommand`].
84 Sqpk(SqpkCommand),
85 /// `EOF_` — marks the clean end of the patch stream. [`ZiPatchReader`]
86 /// consumes this chunk internally; it is never yielded to the caller.
87 EndOfFile,
88}
89
90/// One parsed chunk plus its 4-byte ASCII tag and the byte count consumed
91/// from the input stream by its frame.
92///
93/// Returned by [`parse_chunk`]. The `consumed` count is exactly the size of
94/// the chunk's on-wire frame: `4 (body_len) + 4 (tag) + body_len + 4 (crc32)`
95/// = `body_len + 12`. This is what
96/// [`ZiPatchReader`](crate::ZiPatchReader) accumulates into its running
97/// byte counter for progress reporting.
98pub(crate) struct ParsedChunk {
99 pub(crate) chunk: Chunk,
100 pub(crate) tag: [u8; 4],
101 pub(crate) consumed: u64,
102}
103
104/// Parse one chunk frame from `r`.
105///
106/// # Wire framing
107///
108/// Each chunk is laid out as:
109///
110/// ```text
111/// [body_len: u32 BE] [tag: 4 bytes] [body: body_len bytes] [crc32: u32 BE]
112/// ```
113///
114/// The CRC32 is computed over `tag ++ body` (not over `body_len`), matching
115/// the C# `ChecksumBinaryReader` in the `XIVLauncher` reference. When
116/// `verify_checksums` is `true` and the stored CRC does not match the computed
117/// one, [`ZiPatchError::ChecksumMismatch`] is returned.
118///
119/// # Errors
120///
121/// - [`ZiPatchError::TruncatedPatch`] — the reader returns EOF while reading
122/// the `body_len` field (i.e. no more chunks are present but `EOF_` was
123/// never seen).
124/// - [`ZiPatchError::OversizedChunk`] — `body_len` exceeds 512 MiB.
125/// - [`ZiPatchError::ChecksumMismatch`] — CRC32 mismatch (only when
126/// `verify_checksums` is `true`).
127/// - [`ZiPatchError::UnknownChunkTag`] — tag is not recognised.
128/// - [`ZiPatchError::Io`] — any other I/O failure reading from `r`.
129pub(crate) fn parse_chunk<R: std::io::Read>(
130 r: &mut R,
131 verify_checksums: bool,
132) -> Result<ParsedChunk> {
133 let size = match r.read_u32_be() {
134 Ok(s) => s as usize,
135 Err(ZiPatchError::Io(e)) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
136 return Err(ZiPatchError::TruncatedPatch);
137 }
138 Err(e) => return Err(e),
139 };
140 if size > MAX_CHUNK_SIZE {
141 return Err(ZiPatchError::OversizedChunk(size));
142 }
143
144 // Tag (4 B) and CRC (4 B) are always present regardless of body shape.
145 let mut tag = [0u8; 4];
146 r.read_exact(&mut tag)?;
147
148 // Peek at the first 5 bytes of the body without committing to either the
149 // generic single-allocation path or the SQPK `A` zero-copy-into-data path.
150 // For SQPK chunks, those 5 bytes are `[inner_size: i32 BE][sub_cmd: u8]`.
151 // For chunks with bodies shorter than 5 bytes (e.g. `EOF_`), we still read
152 // exactly `size` bytes into the prefix array and leave the rest zero.
153 let mut prefix = [0u8; 5];
154 let prefix_len = size.min(5);
155 if prefix_len > 0 {
156 r.read_exact(&mut prefix[..prefix_len])?;
157 }
158
159 // ---- Fast path: SQPK `A` (SqpkAddData) — see `parse_sqpk_add_data_fast`. ----
160 if &tag == b"SQPK" && size >= 5 + SQPK_ADDDATA_HEADER_SIZE && prefix[4] == b'A' {
161 return parse_sqpk_add_data_fast(r, tag, prefix, size, verify_checksums);
162 }
163
164 // ---- Generic path: one allocation for the whole body. ----
165 let mut body_vec = vec![0u8; size];
166 body_vec[..prefix_len].copy_from_slice(&prefix[..prefix_len]);
167 if size > prefix_len {
168 r.read_exact(&mut body_vec[prefix_len..])?;
169 }
170
171 let mut crc_buf = [0u8; 4];
172 r.read_exact(&mut crc_buf)?;
173 let expected_crc = u32::from_be_bytes(crc_buf);
174
175 if verify_checksums {
176 let mut hasher = crc32fast::Hasher::new();
177 hasher.update(&tag);
178 hasher.update(&body_vec);
179 let actual_crc = hasher.finalize();
180 if actual_crc != expected_crc {
181 return Err(ZiPatchError::ChecksumMismatch {
182 tag,
183 expected: expected_crc,
184 actual: actual_crc,
185 });
186 }
187 }
188
189 trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
190
191 // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
192 let consumed = (size as u64) + 12;
193
194 let body = &body_vec[..];
195
196 let chunk = match &tag {
197 b"EOF_" => Chunk::EndOfFile,
198 b"FHDR" => Chunk::FileHeader(fhdr::parse(body)?),
199 b"APLY" => Chunk::ApplyOption(aply::parse(body)?),
200 b"APFS" => Chunk::ApplyFreeSpace(afsp::parse(body)?),
201 b"ADIR" => Chunk::AddDirectory(adir::parse(body)?),
202 b"DELD" => Chunk::DeleteDirectory(ddir::parse(body)?),
203 b"SQPK" => Chunk::Sqpk(sqpk::parse_sqpk(body)?),
204 _ => return Err(ZiPatchError::UnknownChunkTag(tag)),
205 };
206
207 Ok(ParsedChunk {
208 chunk,
209 tag,
210 consumed,
211 })
212}
213
214// Size of the SqpkAddData fixed header that precedes the inline data payload.
215// Mirrors `add_data::SqpkAddData::DATA_SOURCE_OFFSET` (23) without taking a
216// `u64` round-trip; kept private to the framing path.
217const SQPK_ADDDATA_HEADER_SIZE: usize = 23;
218
219/// Fast path for SQPK `A` (`SqpkAddData`) chunks.
220///
221/// `AddData` is the largest chunk type by byte volume — payloads of hundreds of
222/// KB to MB are typical. The generic framing path allocates one `Vec<u8>` of
223/// `size` for the whole body, then `binrw`'s derived parser allocates a second
224/// `Vec<u8>` of exactly `data_bytes` and memcpys the inline payload into it.
225/// That second allocation + memcpy dominates parse time for `AddData`.
226///
227/// This function reads the `AddData` fixed header into a stack array, parses
228/// the seven fields directly, allocates the `data` payload at its exact size,
229/// and `read_exact`s the source bytes straight into it — one allocation, no
230/// intermediate copy of the payload.
231///
232/// On entry: `tag` and the 5-byte `prefix` (SQPK `inner_size` + sub-command
233/// byte) have already been consumed from `r`. The remaining bytes are
234/// `[fixed_header: 23 B][data: data_bytes][crc32: 4 B]`.
235fn parse_sqpk_add_data_fast<R: std::io::Read>(
236 r: &mut R,
237 tag: [u8; 4],
238 prefix: [u8; 5],
239 size: usize,
240 verify_checksums: bool,
241) -> Result<ParsedChunk> {
242 // Validate the SQPK inner_size against the outer chunk size, matching the
243 // check in `sqpk::parse_sqpk` so callers see byte-identical error behaviour.
244 let inner_size = i32::from_be_bytes([prefix[0], prefix[1], prefix[2], prefix[3]]) as usize;
245 if inner_size != size {
246 return Err(ZiPatchError::InvalidField {
247 context: "SQPK inner size mismatch",
248 });
249 }
250
251 let mut header = [0u8; SQPK_ADDDATA_HEADER_SIZE];
252 r.read_exact(&mut header)?;
253
254 // SqpkAddData fixed-header layout (all big-endian):
255 // [0..3] pad
256 // [3..5] main_id u16
257 // [5..7] sub_id u16
258 // [7..11] file_id u32
259 // [11..15] block_offset_raw u32 (<< 7 = bytes)
260 // [15..19] data_bytes_raw u32 (<< 7 = bytes)
261 // [19..23] block_delete_raw u32 (<< 7 = bytes)
262 let main_id = u16::from_be_bytes([header[3], header[4]]);
263 let sub_id = u16::from_be_bytes([header[5], header[6]]);
264 let file_id = u32::from_be_bytes([header[7], header[8], header[9], header[10]]);
265 let block_offset_raw = u32::from_be_bytes([header[11], header[12], header[13], header[14]]);
266 let data_bytes_raw = u32::from_be_bytes([header[15], header[16], header[17], header[18]]);
267 let block_delete_raw = u32::from_be_bytes([header[19], header[20], header[21], header[22]]);
268
269 let block_offset = (block_offset_raw as u64) << 7;
270 let data_bytes = (data_bytes_raw as u64) << 7;
271 let block_delete_number = (block_delete_raw as u64) << 7;
272
273 // The declared payload length must fit exactly within the chunk body:
274 // size = 5 (inner_size + sub_cmd) + 23 (fixed header) + data_bytes
275 let expected_data = size - 5 - SQPK_ADDDATA_HEADER_SIZE;
276 if data_bytes as usize != expected_data {
277 return Err(ZiPatchError::InvalidField {
278 context: "SqpkAddData data_bytes does not match SQPK body length",
279 });
280 }
281
282 let mut data = vec![0u8; data_bytes as usize];
283 r.read_exact(&mut data)?;
284
285 let mut crc_buf = [0u8; 4];
286 r.read_exact(&mut crc_buf)?;
287 let expected_crc = u32::from_be_bytes(crc_buf);
288
289 if verify_checksums {
290 // CRC is over `tag ++ body`. The body is split across three disjoint
291 // buffers — feed each segment to the incremental hasher.
292 let mut hasher = crc32fast::Hasher::new();
293 hasher.update(&tag);
294 hasher.update(&prefix);
295 hasher.update(&header);
296 hasher.update(&data);
297 let actual_crc = hasher.finalize();
298 if actual_crc != expected_crc {
299 return Err(ZiPatchError::ChecksumMismatch {
300 tag,
301 expected: expected_crc,
302 actual: actual_crc,
303 });
304 }
305 }
306
307 trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
308
309 let chunk = Chunk::Sqpk(sqpk::SqpkCommand::AddData(Box::new(sqpk::SqpkAddData {
310 target_file: sqpk::SqpackFile {
311 main_id,
312 sub_id,
313 file_id,
314 },
315 block_offset,
316 data_bytes,
317 block_delete_number,
318 data,
319 })));
320
321 // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
322 let consumed = (size as u64) + 12;
323
324 Ok(ParsedChunk {
325 chunk,
326 tag,
327 consumed,
328 })
329}
330
331/// Iterator over the [`Chunk`]s in a `ZiPatch` stream.
332///
333/// `ZiPatchReader` wraps any [`std::io::Read`] source and yields one
334/// [`Chunk`] per call to [`Iterator::next`]. It validates the 12-byte file
335/// magic on construction, then reads chunks sequentially until the `EOF_`
336/// terminator is encountered or an error occurs.
337///
338/// # Stream contract
339///
340/// - **Magic** — the first 12 bytes must be `\x91ZIPATCH\r\n\x1a\n`. Any
341/// mismatch returns [`ZiPatchError::InvalidMagic`] from [`ZiPatchReader::new`].
342/// - **Framing** — every chunk is a length-prefixed frame:
343/// `[body_len: u32 BE] [tag: 4 B] [body: body_len B] [crc32: u32 BE]`.
344/// - **CRC32** — computed over `tag ++ body`. Verification is enabled by
345/// default; use [`ZiPatchReader::skip_checksum_verification`] to disable it.
346/// - **Termination** — the `EOF_` chunk is consumed internally and causes
347/// the iterator to return `None`. Call [`ZiPatchReader::is_complete`] after
348/// iteration to distinguish a clean end from a truncated stream.
349/// - **Fused** — once `None` is returned (either from `EOF_` or an error),
350/// subsequent calls to `next` also return `None`. The iterator implements
351/// [`std::iter::FusedIterator`].
352///
353/// # Errors
354///
355/// Each call to [`Iterator::next`] returns `Some(Err(e))` on parse failure,
356/// then `None` on all future calls. Possible errors include:
357/// - [`ZiPatchError::TruncatedPatch`] — stream ended before `EOF_`.
358/// - [`ZiPatchError::OversizedChunk`] — a declared chunk body exceeds 512 MiB.
359/// - [`ZiPatchError::ChecksumMismatch`] — CRC32 verification failed.
360/// - [`ZiPatchError::UnknownChunkTag`] — unrecognised 4-byte tag.
361/// - [`ZiPatchError::Io`] — underlying I/O failure.
362///
363/// # Example
364///
365/// Build a minimal in-memory patch (magic + `ADIR` + `EOF_`) and iterate it:
366///
367/// ```rust
368/// use std::io::Cursor;
369/// use zipatch_rs::{Chunk, ZiPatchReader};
370///
371/// // Helper: wrap tag + body into a correctly framed chunk with CRC32.
372/// fn make_chunk(tag: &[u8; 4], body: &[u8]) -> Vec<u8> {
373/// let mut crc_input = Vec::new();
374/// crc_input.extend_from_slice(tag);
375/// crc_input.extend_from_slice(body);
376/// let crc = crc32fast::hash(&crc_input);
377///
378/// let mut out = Vec::new();
379/// out.extend_from_slice(&(body.len() as u32).to_be_bytes());
380/// out.extend_from_slice(tag);
381/// out.extend_from_slice(body);
382/// out.extend_from_slice(&crc.to_be_bytes());
383/// out
384/// }
385///
386/// // 12-byte ZiPatch magic.
387/// let magic: [u8; 12] = [0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A];
388///
389/// // ADIR body: u32 BE name_len (7) + b"created".
390/// let mut adir_body = Vec::new();
391/// adir_body.extend_from_slice(&7u32.to_be_bytes());
392/// adir_body.extend_from_slice(b"created");
393///
394/// let mut patch = Vec::new();
395/// patch.extend_from_slice(&magic);
396/// patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
397/// patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
398///
399/// let chunks: Vec<_> = ZiPatchReader::new(Cursor::new(patch))
400/// .unwrap()
401/// .collect::<Result<_, _>>()
402/// .unwrap();
403///
404/// assert_eq!(chunks.len(), 1);
405/// assert!(matches!(chunks[0], Chunk::AddDirectory(_)));
406/// ```
407#[derive(Debug)]
408pub struct ZiPatchReader<R> {
409 inner: std::io::BufReader<R>,
410 done: bool,
411 verify_checksums: bool,
412 eof_seen: bool,
413 // Running total of bytes consumed from `inner`, including the 12-byte
414 // magic header. Updated after each successful `parse_chunk` call.
415 // Exposed via `bytes_read()` so the apply driver can fire monotonic
416 // progress events without instrumenting the underlying `Read` source.
417 bytes_read: u64,
418 // 4-byte ASCII tag of the most recently yielded chunk. `None` before the
419 // first successful `next()` and after iteration completes. Used by
420 // `apply_to` to attach the tag to per-chunk progress events without
421 // re-matching on the `Chunk` enum.
422 last_tag: Option<[u8; 4]>,
423 // Absolute patch-file offset of the body of the most recently yielded
424 // chunk (i.e. the byte right after the 8-byte `[len: u32 BE, tag: [u8;4]]`
425 // frame header). `None` until the first chunk is successfully yielded; the
426 // value is only set on the success arms of `next()` so a parse failure
427 // never exposes a stale offset.
428 current_body_offset: Option<u64>,
429}
430
431impl<R: std::io::Read> ZiPatchReader<R> {
432 /// Wrap `reader` and validate the leading 12-byte `ZiPatch` magic.
433 ///
434 /// Consumes exactly 12 bytes from `reader`. The magic is the byte sequence
435 /// `0x91 0x5A 0x49 0x50 0x41 0x54 0x43 0x48 0x0D 0x0A 0x1A 0x0A`
436 /// (i.e. `\x91ZIPATCH\r\n\x1a\n`).
437 ///
438 /// The reader is wrapped in a [`std::io::BufReader`] internally, so the
439 /// many small typed reads the chunk parser issues (4-byte size, 4-byte
440 /// tag, 5-byte SQPK prefix, …) coalesce into a small number of syscalls.
441 /// Callers do not need to pre-wrap a raw [`std::fs::File`] or other
442 /// unbuffered source.
443 ///
444 /// CRC32 verification is **enabled** by default. Call
445 /// [`ZiPatchReader::skip_checksum_verification`] before iterating to
446 /// disable it.
447 ///
448 /// # Errors
449 ///
450 /// - [`ZiPatchError::InvalidMagic`] — the first 12 bytes do not match the
451 /// expected magic.
452 /// - [`ZiPatchError::Io`] — an I/O error occurred while reading the magic.
453 pub fn new(reader: R) -> Result<Self> {
454 let mut reader = std::io::BufReader::new(reader);
455 let magic = reader.read_exact_vec(12)?;
456 if magic.as_slice() != MAGIC {
457 return Err(ZiPatchError::InvalidMagic);
458 }
459 Ok(Self {
460 inner: reader,
461 done: false,
462 verify_checksums: true,
463 eof_seen: false,
464 // The 12-byte magic header has already been consumed.
465 bytes_read: 12,
466 last_tag: None,
467 current_body_offset: None,
468 })
469 }
470
471 /// Enable per-chunk CRC32 verification (the default).
472 ///
473 /// This is the default state after [`ZiPatchReader::new`]. Calling this
474 /// method after construction is only necessary if
475 /// [`ZiPatchReader::skip_checksum_verification`] was previously called.
476 #[must_use]
477 pub fn verify_checksums(mut self) -> Self {
478 self.verify_checksums = true;
479 self
480 }
481
482 /// Disable per-chunk CRC32 verification.
483 ///
484 /// Useful when the source has already been verified out-of-band (e.g. a
485 /// download hash was checked before the file was opened), or when
486 /// processing known-good test data where the overhead is unnecessary.
487 #[must_use]
488 pub fn skip_checksum_verification(mut self) -> Self {
489 self.verify_checksums = false;
490 self
491 }
492
493 /// Returns `true` if iteration reached the `EOF_` terminator cleanly.
494 ///
495 /// A `false` return after `next()` yields `None` indicates the stream was
496 /// truncated — the download or file copy was incomplete. In that case the
497 /// iterator stopped because of a [`ZiPatchError::TruncatedPatch`] error,
498 /// not because the patch finished normally.
499 pub fn is_complete(&self) -> bool {
500 self.eof_seen
501 }
502
503 /// Returns the running total of bytes consumed from the patch stream.
504 ///
505 /// Starts at `12` after [`ZiPatchReader::new`] (the magic header has been
506 /// read) and increases monotonically by the size of each chunk's wire
507 /// frame after each successful [`Iterator::next`] call. Includes the
508 /// `EOF_` terminator's frame.
509 ///
510 /// On parse error, the counter is **not** advanced past the failing
511 /// chunk — it reflects the byte offset at the start of that chunk's
512 /// length prefix, not the broken position somewhere inside its frame.
513 /// Use this offset together with the surfaced error to point a user at
514 /// where the patch became unreadable.
515 ///
516 /// This is the same counter that the
517 /// [`apply_to`](crate::ZiPatchReader::apply_to) driver attaches to
518 /// [`ChunkEvent::bytes_read`](crate::ChunkEvent::bytes_read) when firing
519 /// progress events. Useful for the `bytes_applied / total_patch_size`
520 /// ratio in a progress bar.
521 #[must_use]
522 pub fn bytes_read(&self) -> u64 {
523 self.bytes_read
524 }
525
526 /// Returns the 4-byte ASCII tag of the most recently yielded chunk.
527 ///
528 /// `None` before the first successful [`Iterator::next`] call and after
529 /// the `EOF_` terminator has been consumed (or an error has been
530 /// surfaced). Used by [`apply_to`](crate::ZiPatchReader::apply_to) to
531 /// populate [`ChunkEvent::kind`](crate::ChunkEvent::kind).
532 #[must_use]
533 pub fn last_tag(&self) -> Option<[u8; 4]> {
534 self.last_tag
535 }
536
537 /// Returns the absolute patch-file offset of the body of the most recently
538 /// yielded chunk.
539 ///
540 /// The chunk body begins immediately after the 8-byte
541 /// `[body_len: u32 BE, tag: [u8; 4]]` frame header, so the value points at
542 /// the first byte of the body — for `SQPK` chunks that is the start of
543 /// `[inner_size: i32 BE, sub_cmd: u8, …]`; for the other chunk types it
544 /// is the start of the variant-specific body.
545 ///
546 /// Index builders use this to compute absolute patch-file offsets for
547 /// `SqpkAddData::data`, `SqpkFile` block payloads, and `SqpkHeader::header_data`
548 /// without re-walking the stream.
549 ///
550 /// `None` before the first chunk is successfully yielded. A parse failure
551 /// leaves the previously-set value untouched (the offset returned by this
552 /// method always points at a chunk that was successfully parsed).
553 #[must_use]
554 pub fn current_chunk_body_offset(&self) -> Option<u64> {
555 self.current_body_offset
556 }
557}
558
559impl ZiPatchReader<std::io::BufReader<std::fs::File>> {
560 /// Open the file at `path`, wrap it in a [`std::io::BufReader`], and
561 /// validate the `ZiPatch` magic.
562 ///
563 /// This is a convenience constructor equivalent to:
564 ///
565 /// ```rust,no_run
566 /// # use std::io::BufReader;
567 /// # use std::fs::File;
568 /// # use zipatch_rs::ZiPatchReader;
569 /// let reader = ZiPatchReader::new(BufReader::new(File::open("patch.patch").unwrap())).unwrap();
570 /// ```
571 ///
572 /// # Errors
573 ///
574 /// - [`ZiPatchError::Io`] — the file could not be opened.
575 /// - [`ZiPatchError::InvalidMagic`] — the file does not start with the
576 /// `ZiPatch` magic bytes.
577 pub fn from_path(path: impl AsRef<std::path::Path>) -> crate::Result<Self> {
578 let file = std::fs::File::open(path)?;
579 Self::new(std::io::BufReader::new(file))
580 }
581}
582
583impl<R: std::io::Read> Iterator for ZiPatchReader<R> {
584 type Item = Result<Chunk>;
585
586 fn next(&mut self) -> Option<Self::Item> {
587 if self.done {
588 return None;
589 }
590 // Snapshot the body offset before parsing so a successful parse can
591 // commit it without re-walking the stream. The chunk body begins after
592 // the 8-byte `[body_len: u32 BE, tag: [u8; 4]]` frame header.
593 let body_offset = self.bytes_read + 8;
594 match parse_chunk(&mut self.inner, self.verify_checksums) {
595 Ok(ParsedChunk {
596 chunk: Chunk::EndOfFile,
597 tag,
598 consumed,
599 }) => {
600 self.bytes_read += consumed;
601 self.last_tag = Some(tag);
602 self.current_body_offset = Some(body_offset);
603 self.done = true;
604 self.eof_seen = true;
605 None
606 }
607 Ok(ParsedChunk {
608 chunk,
609 tag,
610 consumed,
611 }) => {
612 self.bytes_read += consumed;
613 self.last_tag = Some(tag);
614 self.current_body_offset = Some(body_offset);
615 Some(Ok(chunk))
616 }
617 Err(e) => {
618 self.done = true;
619 Some(Err(e))
620 }
621 }
622 }
623}
624
625impl<R: std::io::Read> std::iter::FusedIterator for ZiPatchReader<R> {}
626
627#[cfg(test)]
628mod tests {
629 use super::*;
630 use crate::test_utils::make_chunk;
631 use std::io::Cursor;
632
633 // --- parse_chunk error paths ---
634
635 #[test]
636 fn truncated_at_chunk_boundary_yields_truncated_patch() {
637 // Magic + no chunks: parse_chunk must see EOF on the body_len read and
638 // convert it to TruncatedPatch. This exercises the
639 // `Err(ZiPatchError::Io(e)) if e.kind() == UnexpectedEof` arm at
640 // chunk/mod.rs line 121.
641 let mut patch = Vec::new();
642 patch.extend_from_slice(&MAGIC);
643 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
644 match reader
645 .next()
646 .expect("iterator must yield an error, not None")
647 {
648 Err(ZiPatchError::TruncatedPatch) => {}
649 other => panic!("expected TruncatedPatch, got {other:?}"),
650 }
651 assert!(!reader.is_complete(), "stream is not clean-ended");
652 }
653
654 #[test]
655 fn non_eof_io_error_on_body_len_read_propagates_as_io() {
656 // Exercises the `Err(e) => return Err(e)` arm at line 124: an I/O
657 // error that is NOT UnexpectedEof must propagate verbatim.
658 // We trigger this by passing a reader that errors immediately.
659 struct BrokenReader;
660 impl std::io::Read for BrokenReader {
661 fn read(&mut self, _: &mut [u8]) -> std::io::Result<usize> {
662 Err(std::io::Error::new(
663 std::io::ErrorKind::BrokenPipe,
664 "simulated broken pipe",
665 ))
666 }
667 }
668 let result = parse_chunk(&mut BrokenReader, false);
669 match result {
670 Err(ZiPatchError::Io(e)) => {
671 assert_eq!(
672 e.kind(),
673 std::io::ErrorKind::BrokenPipe,
674 "non-EOF I/O error must propagate unchanged, got kind {:?}",
675 e.kind()
676 );
677 }
678 Err(other) => panic!("expected ZiPatchError::Io(BrokenPipe), got {other:?}"),
679 Ok(_) => panic!("expected an error, got Ok"),
680 }
681 }
682
683 #[test]
684 fn truncated_after_one_chunk_yields_truncated_patch() {
685 // Magic + one well-formed ADIR + no more bytes: the second call to
686 // next() must surface TruncatedPatch, not None.
687 let mut adir_body = Vec::new();
688 adir_body.extend_from_slice(&4u32.to_be_bytes());
689 adir_body.extend_from_slice(b"test");
690 let chunk = make_chunk(b"ADIR", &adir_body);
691
692 let mut patch = Vec::new();
693 patch.extend_from_slice(&MAGIC);
694 patch.extend_from_slice(&chunk);
695
696 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
697 let first = reader.next().expect("first chunk must be present");
698 assert!(
699 first.is_ok(),
700 "first ADIR chunk should parse cleanly: {first:?}"
701 );
702 match reader.next().expect("second call must yield an error") {
703 Err(ZiPatchError::TruncatedPatch) => {}
704 other => panic!("expected TruncatedPatch on truncated stream, got {other:?}"),
705 }
706 assert!(
707 !reader.is_complete(),
708 "is_complete must be false after truncation"
709 );
710 }
711
712 #[test]
713 fn checksum_mismatch_returns_checksum_mismatch_error() {
714 // Corrupt the CRC32 field of an otherwise valid ADIR chunk and verify
715 // that parse_chunk returns ChecksumMismatch (not a panic or a wrong error).
716 let mut adir_body = Vec::new();
717 adir_body.extend_from_slice(&4u32.to_be_bytes());
718 adir_body.extend_from_slice(b"test");
719 let mut chunk = make_chunk(b"ADIR", &adir_body);
720 // Flip the last byte of the CRC32 field.
721 let last = chunk.len() - 1;
722 chunk[last] ^= 0xFF;
723
724 let mut cur = Cursor::new(chunk);
725 let result = parse_chunk(&mut cur, true);
726 assert!(
727 matches!(result, Err(ZiPatchError::ChecksumMismatch { .. })),
728 "corrupted CRC must yield ChecksumMismatch"
729 );
730 }
731
732 #[test]
733 fn unknown_chunk_tag_returns_unknown_chunk_tag_error() {
734 // A tag of all-Z bytes is not recognised; parse_chunk must return
735 // UnknownChunkTag carrying the raw 4-byte tag.
736 let chunk = make_chunk(b"ZZZZ", &[]);
737 let mut cur = Cursor::new(chunk);
738 match parse_chunk(&mut cur, false) {
739 Err(ZiPatchError::UnknownChunkTag(tag)) => {
740 assert_eq!(tag, *b"ZZZZ", "tag bytes must be preserved in error");
741 }
742 Err(other) => panic!("expected UnknownChunkTag, got {other:?}"),
743 Ok(_) => panic!("expected UnknownChunkTag, got Ok"),
744 }
745 }
746
747 #[test]
748 fn oversized_chunk_body_len_returns_oversized_chunk_error() {
749 // body_len == u32::MAX (> 512 MiB) must be rejected before any allocation.
750 let bytes = [0xFFu8, 0xFF, 0xFF, 0xFF];
751 let mut cur = Cursor::new(&bytes[..]);
752 let Err(ZiPatchError::OversizedChunk(size)) = parse_chunk(&mut cur, false) else {
753 panic!("expected OversizedChunk for u32::MAX body_len")
754 };
755 assert!(
756 size > MAX_CHUNK_SIZE,
757 "reported size {size} must exceed MAX_CHUNK_SIZE {MAX_CHUNK_SIZE}"
758 );
759 }
760
761 // --- ZiPatchReader byte-counter and tag accessors ---
762
763 #[test]
764 fn bytes_read_starts_at_12_before_first_chunk() {
765 // The magic header is 12 bytes; bytes_read must reflect that immediately
766 // after construction, before any chunk is read.
767 let mut patch = Vec::new();
768 patch.extend_from_slice(&MAGIC);
769 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
770 let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
771 assert_eq!(
772 reader.bytes_read(),
773 12,
774 "bytes_read must be 12 (magic only) before iteration starts"
775 );
776 }
777
778 #[test]
779 fn last_tag_is_none_before_first_chunk() {
780 // Before calling next(), last_tag must be None.
781 let mut patch = Vec::new();
782 patch.extend_from_slice(&MAGIC);
783 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
784 let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
785 assert_eq!(
786 reader.last_tag(),
787 None,
788 "last_tag must be None before any chunk is read"
789 );
790 }
791
792 #[test]
793 fn bytes_read_and_last_tag_track_each_chunk_frame() {
794 // MAGIC + ADIR("a") + EOF_ — verify bytes_read grows by the exact frame
795 // size after each chunk and that last_tag follows the stream.
796 let mut adir_body = Vec::new();
797 adir_body.extend_from_slice(&1u32.to_be_bytes());
798 adir_body.extend_from_slice(b"a");
799 // ADIR frame: 4(size) + 4(tag) + 5(body) + 4(crc) = 17 bytes
800 // EOF_ frame: 4 + 4 + 0 + 4 = 12 bytes
801
802 let mut patch = Vec::new();
803 patch.extend_from_slice(&MAGIC);
804 patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
805 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
806
807 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
808 assert_eq!(reader.bytes_read(), 12, "pre-read: magic only");
809 assert_eq!(reader.last_tag(), None, "pre-read: no tag yet");
810
811 let chunk = reader.next().unwrap().unwrap();
812 assert!(
813 matches!(chunk, Chunk::AddDirectory(_)),
814 "first chunk must be ADIR"
815 );
816 assert_eq!(
817 reader.bytes_read(),
818 12 + 17,
819 "after ADIR: magic + ADIR frame"
820 );
821 assert_eq!(
822 reader.last_tag(),
823 Some(*b"ADIR"),
824 "last_tag must be ADIR after first next()"
825 );
826
827 assert!(reader.next().is_none(), "EOF_ must terminate iteration");
828 assert_eq!(
829 reader.bytes_read(),
830 12 + 17 + 12,
831 "after EOF_: magic + ADIR + EOF_ frames"
832 );
833 assert_eq!(
834 reader.last_tag(),
835 Some(*b"EOF_"),
836 "last_tag must be EOF_ after stream ends"
837 );
838 assert!(reader.is_complete(), "is_complete must be true after EOF_");
839 }
840
841 #[test]
842 fn bytes_read_is_monotonically_non_decreasing() {
843 // Stream with two ADIR chunks + EOF_ — verify bytes_read only ever
844 // increases between calls to next() and that consuming the EOF_
845 // chunk (whose body is empty but whose frame is 12 bytes) still
846 // advances the counter past the last non-EOF position.
847 let make_adir = |name: &[u8]| -> Vec<u8> {
848 let mut body = Vec::new();
849 body.extend_from_slice(&(name.len() as u32).to_be_bytes());
850 body.extend_from_slice(name);
851 make_chunk(b"ADIR", &body)
852 };
853
854 let mut patch = Vec::new();
855 patch.extend_from_slice(&MAGIC);
856 patch.extend_from_slice(&make_adir(b"a"));
857 patch.extend_from_slice(&make_adir(b"bb"));
858 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
859
860 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
861 let mut prev = reader.bytes_read();
862 while let Some(result) = reader.next() {
863 result.unwrap();
864 let current = reader.bytes_read();
865 assert!(
866 current >= prev,
867 "bytes_read must be monotonically non-decreasing: {prev} -> {current}"
868 );
869 // For ADIR chunks with non-empty bodies, the increment must be
870 // strictly positive — a body of N bytes adds N + 12 frame bytes.
871 assert!(
872 current > prev,
873 "non-empty ADIR frame must strictly advance bytes_read: \
874 {prev} -> {current}"
875 );
876 prev = current;
877 }
878 // EOF_ has been consumed: its 12-byte empty-body frame must have
879 // pushed the counter past the previous position.
880 assert!(
881 reader.bytes_read() > prev,
882 "consuming EOF_ must advance bytes_read by its 12-byte frame: \
883 {prev} -> {}",
884 reader.bytes_read()
885 );
886 }
887
888 // --- from_path constructor ---
889
890 #[test]
891 fn from_path_opens_minimal_patch_and_reaches_eof() {
892 let mut bytes = Vec::new();
893 bytes.extend_from_slice(&MAGIC);
894 bytes.extend_from_slice(&make_chunk(b"EOF_", &[]));
895
896 let tmp = tempfile::tempdir().unwrap();
897 let file_path = tmp.path().join("test.patch");
898 std::fs::write(&file_path, &bytes).unwrap();
899
900 let mut reader =
901 ZiPatchReader::from_path(&file_path).expect("from_path must open valid patch");
902 assert!(
903 reader.next().is_none(),
904 "EOF_ must terminate iteration immediately"
905 );
906 assert!(reader.is_complete(), "is_complete must be true after EOF_");
907 }
908
909 #[test]
910 fn from_path_returns_io_error_when_file_is_missing() {
911 let tmp = tempfile::tempdir().unwrap();
912 let file_path = tmp.path().join("nonexistent.patch");
913 assert!(
914 matches!(
915 ZiPatchReader::from_path(&file_path),
916 Err(ZiPatchError::Io(_))
917 ),
918 "from_path on a missing file must return ZiPatchError::Io"
919 );
920 }
921
922 // --- Iterator fused-ness and is_complete ---
923
924 #[test]
925 fn iterator_is_fused_after_error() {
926 // Once next() yields Some(Err(_)), all subsequent calls must yield None.
927 let mut patch = Vec::new();
928 patch.extend_from_slice(&MAGIC);
929 patch.extend_from_slice(&make_chunk(b"ZZZZ", &[])); // unknown tag → error
930
931 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
932 let first = reader.next();
933 assert!(
934 matches!(first, Some(Err(ZiPatchError::UnknownChunkTag(_)))),
935 "first call must yield the error: {first:?}"
936 );
937 // All subsequent calls must return None.
938 assert!(
939 reader.next().is_none(),
940 "fused: must return None after error"
941 );
942 assert!(reader.next().is_none(), "fused: still None on third call");
943 }
944
945 #[test]
946 fn is_complete_false_until_eof_seen() {
947 let mut adir_body = Vec::new();
948 adir_body.extend_from_slice(&1u32.to_be_bytes());
949 adir_body.extend_from_slice(b"x");
950
951 let mut patch = Vec::new();
952 patch.extend_from_slice(&MAGIC);
953 patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
954 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
955
956 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
957 assert!(
958 !reader.is_complete(),
959 "not complete before reading anything"
960 );
961 reader.next().unwrap().unwrap(); // consume ADIR
962 assert!(
963 !reader.is_complete(),
964 "not complete after ADIR, before EOF_"
965 );
966 assert!(reader.next().is_none(), "EOF_ consumed");
967 assert!(reader.is_complete(), "complete after EOF_ consumed");
968 }
969}