zipatch_rs/chunk/mod.rs
1//! Wire-format chunk types and the [`ZiPatchReader`] iterator.
2//!
3//! This module is the parsing layer: it decodes the raw `ZiPatch` byte
4//! stream into a stream of typed [`Chunk`] values. Each top-level
5//! variant corresponds to one 4-byte ASCII wire tag (`FHDR`, `APLY`,
6//! `SQPK`, …); the per-variant submodules below own the binary layout for
7//! their body. Nothing in this module touches the filesystem — apply-time
8//! effects live in [`crate::apply`].
9//!
10//! The [`ZiPatchReader`] iterator validates the 12-byte file magic on
11//! construction, then yields one [`Chunk`] per [`Iterator::next`] call
12//! until the internal `EOF_` terminator is consumed or a parse error
13//! surfaces.
14
15pub(crate) mod adir;
16pub(crate) mod afsp;
17pub(crate) mod aply;
18pub(crate) mod ddir;
19pub(crate) mod fhdr;
20pub(crate) mod sqpk;
21pub(crate) mod util;
22
23pub use adir::AddDirectory;
24pub use afsp::ApplyFreeSpace;
25pub use aply::{ApplyOption, ApplyOptionKind};
26pub use ddir::DeleteDirectory;
27pub use fhdr::{FileHeader, FileHeaderV2, FileHeaderV3};
28pub use sqpk::{SqpackFile, SqpkCommand};
29// Re-export SqpkCommand sub-types so callers can match on them
30pub use sqpk::{
31 IndexCommand, SqpkAddData, SqpkCompressedBlock, SqpkDeleteData, SqpkExpandData, SqpkFile,
32 SqpkFileOperation, SqpkHeader, SqpkHeaderTarget, SqpkIndex, SqpkPatchInfo, SqpkTargetInfo,
33 TargetFileKind, TargetHeaderKind,
34};
35
36use crate::reader::ReadExt;
37use crate::{Result, ZiPatchError};
38use tracing::trace;
39
40const MAGIC: [u8; 12] = [
41 0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A,
42];
43
44const MAX_CHUNK_SIZE: usize = 512 * 1024 * 1024;
45
46/// One top-level chunk parsed from a `ZiPatch` stream.
47///
48/// Each variant corresponds to a 4-byte ASCII wire tag. The tag dispatch table
49/// mirrors the C# reference in
50/// `lib/FFXIVQuickLauncher/.../Patching/ZiPatch/Chunk/ZiPatchChunk.cs`.
51///
52/// # Observed frequency
53///
54/// SE's XIVARR+ patch files almost exclusively contain `FHDR`, `APLY`, and
55/// `SQPK` chunks. `ADIR`/`DELD` can theoretically appear and are implemented,
56/// but are rarely emitted in practice. `APFS` has never been observed in modern
57/// patches (the reference implementation treats it as a no-op). `EOF_` is
58/// consumed by [`ZiPatchReader`] and is never yielded to the caller.
59///
60/// # Exhaustiveness
61///
62/// The enum is `#[non_exhaustive]`. Match arms should include a wildcard to
63/// remain forward-compatible as new chunk types are added.
64#[non_exhaustive]
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub enum Chunk {
67 /// `FHDR` — the first chunk in every patch file; carries version and
68 /// per-version patch metadata. See [`FileHeader`] for the versioned body.
69 FileHeader(FileHeader),
70 /// `APLY` — sets or clears a boolean apply-time flag on the
71 /// [`crate::ApplyContext`] (e.g. "ignore missing files"). See [`ApplyOption`].
72 ApplyOption(ApplyOption),
73 /// `APFS` — free-space book-keeping emitted by old patcher tooling; treated
74 /// as a no-op at apply time. See [`ApplyFreeSpace`].
75 ApplyFreeSpace(ApplyFreeSpace),
76 /// `ADIR` — instructs the patcher to create a directory under the game
77 /// install root. See [`AddDirectory`].
78 AddDirectory(AddDirectory),
79 /// `DELD` — instructs the patcher to remove a directory under the game
80 /// install root. See [`DeleteDirectory`].
81 DeleteDirectory(DeleteDirectory),
82 /// `SQPK` — the workhorse chunk; wraps one of eight sub-commands that
83 /// add, delete, expand, or replace `SqPack` data. See [`SqpkCommand`].
84 Sqpk(SqpkCommand),
85 /// `EOF_` — marks the clean end of the patch stream. [`ZiPatchReader`]
86 /// consumes this chunk internally; it is never yielded to the caller.
87 EndOfFile,
88}
89
90/// One parsed chunk plus its 4-byte ASCII tag and the byte count consumed
91/// from the input stream by its frame.
92///
93/// Returned by [`parse_chunk`]. The `consumed` count is exactly the size of
94/// the chunk's on-wire frame: `4 (body_len) + 4 (tag) + body_len + 4 (crc32)`
95/// = `body_len + 12`. This is what
96/// [`ZiPatchReader`](crate::ZiPatchReader) accumulates into its running
97/// byte counter for progress reporting.
98pub(crate) struct ParsedChunk {
99 pub(crate) chunk: Chunk,
100 pub(crate) tag: [u8; 4],
101 pub(crate) consumed: u64,
102}
103
104/// Parse one chunk frame from `r`.
105///
106/// # Wire framing
107///
108/// Each chunk is laid out as:
109///
110/// ```text
111/// [body_len: u32 BE] [tag: 4 bytes] [body: body_len bytes] [crc32: u32 BE]
112/// ```
113///
114/// The CRC32 is computed over `tag ++ body` (not over `body_len`), matching
115/// the C# `ChecksumBinaryReader` in the `XIVLauncher` reference. When
116/// `verify_checksums` is `true` and the stored CRC does not match the computed
117/// one, [`ZiPatchError::ChecksumMismatch`] is returned.
118///
119/// # Errors
120///
121/// - [`ZiPatchError::TruncatedPatch`] — the reader returns EOF while reading
122/// the `body_len` field (i.e. no more chunks are present but `EOF_` was
123/// never seen).
124/// - [`ZiPatchError::OversizedChunk`] — `body_len` exceeds 512 MiB.
125/// - [`ZiPatchError::ChecksumMismatch`] — CRC32 mismatch (only when
126/// `verify_checksums` is `true`).
127/// - [`ZiPatchError::UnknownChunkTag`] — tag is not recognised.
128/// - [`ZiPatchError::Io`] — any other I/O failure reading from `r`.
129pub(crate) fn parse_chunk<R: std::io::Read>(
130 r: &mut R,
131 verify_checksums: bool,
132) -> Result<ParsedChunk> {
133 let size = match r.read_u32_be() {
134 Ok(s) => s as usize,
135 Err(ZiPatchError::Io(e)) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
136 return Err(ZiPatchError::TruncatedPatch);
137 }
138 Err(e) => return Err(e),
139 };
140 if size > MAX_CHUNK_SIZE {
141 return Err(ZiPatchError::OversizedChunk(size));
142 }
143
144 // Tag (4 B) and CRC (4 B) are always present regardless of body shape.
145 let mut tag = [0u8; 4];
146 r.read_exact(&mut tag)?;
147
148 // Peek at the first 5 bytes of the body without committing to either the
149 // generic single-allocation path or the SQPK `A` zero-copy-into-data path.
150 // For SQPK chunks, those 5 bytes are `[inner_size: i32 BE][sub_cmd: u8]`.
151 // For chunks with bodies shorter than 5 bytes (e.g. `EOF_`), we still read
152 // exactly `size` bytes into the prefix array and leave the rest zero.
153 let mut prefix = [0u8; 5];
154 let prefix_len = size.min(5);
155 if prefix_len > 0 {
156 r.read_exact(&mut prefix[..prefix_len])?;
157 }
158
159 // ---- Fast path: SQPK `A` (SqpkAddData) — see `parse_sqpk_add_data_fast`. ----
160 if &tag == b"SQPK" && size >= 5 + SQPK_ADDDATA_HEADER_SIZE && prefix[4] == b'A' {
161 return parse_sqpk_add_data_fast(r, tag, prefix, size, verify_checksums);
162 }
163
164 // ---- Generic path: one allocation for the whole body. ----
165 let mut body_vec = vec![0u8; size];
166 body_vec[..prefix_len].copy_from_slice(&prefix[..prefix_len]);
167 if size > prefix_len {
168 r.read_exact(&mut body_vec[prefix_len..])?;
169 }
170
171 let mut crc_buf = [0u8; 4];
172 r.read_exact(&mut crc_buf)?;
173 let expected_crc = u32::from_be_bytes(crc_buf);
174
175 if verify_checksums {
176 let mut hasher = crc32fast::Hasher::new();
177 hasher.update(&tag);
178 hasher.update(&body_vec);
179 let actual_crc = hasher.finalize();
180 if actual_crc != expected_crc {
181 return Err(ZiPatchError::ChecksumMismatch {
182 tag,
183 expected: expected_crc,
184 actual: actual_crc,
185 });
186 }
187 }
188
189 trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
190
191 // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
192 let consumed = (size as u64) + 12;
193
194 let body = &body_vec[..];
195
196 let chunk = match &tag {
197 b"EOF_" => Chunk::EndOfFile,
198 b"FHDR" => Chunk::FileHeader(fhdr::parse(body)?),
199 b"APLY" => Chunk::ApplyOption(aply::parse(body)?),
200 b"APFS" => Chunk::ApplyFreeSpace(afsp::parse(body)?),
201 b"ADIR" => Chunk::AddDirectory(adir::parse(body)?),
202 b"DELD" => Chunk::DeleteDirectory(ddir::parse(body)?),
203 b"SQPK" => Chunk::Sqpk(sqpk::parse_sqpk(body)?),
204 _ => return Err(ZiPatchError::UnknownChunkTag(tag)),
205 };
206
207 Ok(ParsedChunk {
208 chunk,
209 tag,
210 consumed,
211 })
212}
213
214// Size of the SqpkAddData fixed header that precedes the inline data payload.
215// Mirrors `add_data::SqpkAddData::DATA_SOURCE_OFFSET` (23) without taking a
216// `u64` round-trip; kept private to the framing path.
217const SQPK_ADDDATA_HEADER_SIZE: usize = 23;
218
219/// Fast path for SQPK `A` (`SqpkAddData`) chunks.
220///
221/// `AddData` is the largest chunk type by byte volume — payloads of hundreds of
222/// KB to MB are typical. The generic framing path allocates one `Vec<u8>` of
223/// `size` for the whole body, then `binrw`'s derived parser allocates a second
224/// `Vec<u8>` of exactly `data_bytes` and memcpys the inline payload into it.
225/// That second allocation + memcpy dominates parse time for `AddData`.
226///
227/// This function reads the `AddData` fixed header into a stack array, parses
228/// the seven fields directly, allocates the `data` payload at its exact size,
229/// and `read_exact`s the source bytes straight into it — one allocation, no
230/// intermediate copy of the payload.
231///
232/// On entry: `tag` and the 5-byte `prefix` (SQPK `inner_size` + sub-command
233/// byte) have already been consumed from `r`. The remaining bytes are
234/// `[fixed_header: 23 B][data: data_bytes][crc32: 4 B]`.
235fn parse_sqpk_add_data_fast<R: std::io::Read>(
236 r: &mut R,
237 tag: [u8; 4],
238 prefix: [u8; 5],
239 size: usize,
240 verify_checksums: bool,
241) -> Result<ParsedChunk> {
242 // Validate the SQPK inner_size against the outer chunk size, matching the
243 // check in `sqpk::parse_sqpk` so callers see byte-identical error behaviour.
244 let inner_size = i32::from_be_bytes([prefix[0], prefix[1], prefix[2], prefix[3]]) as usize;
245 if inner_size != size {
246 return Err(ZiPatchError::InvalidField {
247 context: "SQPK inner size mismatch",
248 });
249 }
250
251 let mut header = [0u8; SQPK_ADDDATA_HEADER_SIZE];
252 r.read_exact(&mut header)?;
253
254 // SqpkAddData fixed-header layout (all big-endian):
255 // [0..3] pad
256 // [3..5] main_id u16
257 // [5..7] sub_id u16
258 // [7..11] file_id u32
259 // [11..15] block_offset_raw u32 (<< 7 = bytes)
260 // [15..19] data_bytes_raw u32 (<< 7 = bytes)
261 // [19..23] block_delete_raw u32 (<< 7 = bytes)
262 let main_id = u16::from_be_bytes([header[3], header[4]]);
263 let sub_id = u16::from_be_bytes([header[5], header[6]]);
264 let file_id = u32::from_be_bytes([header[7], header[8], header[9], header[10]]);
265 let block_offset_raw = u32::from_be_bytes([header[11], header[12], header[13], header[14]]);
266 let data_bytes_raw = u32::from_be_bytes([header[15], header[16], header[17], header[18]]);
267 let block_delete_raw = u32::from_be_bytes([header[19], header[20], header[21], header[22]]);
268
269 let block_offset = (block_offset_raw as u64) << 7;
270 let data_bytes = (data_bytes_raw as u64) << 7;
271 let block_delete_number = (block_delete_raw as u64) << 7;
272
273 // The declared payload length must fit exactly within the chunk body:
274 // size = 5 (inner_size + sub_cmd) + 23 (fixed header) + data_bytes
275 let expected_data = size - 5 - SQPK_ADDDATA_HEADER_SIZE;
276 if data_bytes as usize != expected_data {
277 return Err(ZiPatchError::InvalidField {
278 context: "SqpkAddData data_bytes does not match SQPK body length",
279 });
280 }
281
282 let mut data = vec![0u8; data_bytes as usize];
283 r.read_exact(&mut data)?;
284
285 let mut crc_buf = [0u8; 4];
286 r.read_exact(&mut crc_buf)?;
287 let expected_crc = u32::from_be_bytes(crc_buf);
288
289 if verify_checksums {
290 // CRC is over `tag ++ body`. The body is split across three disjoint
291 // buffers — feed each segment to the incremental hasher.
292 let mut hasher = crc32fast::Hasher::new();
293 hasher.update(&tag);
294 hasher.update(&prefix);
295 hasher.update(&header);
296 hasher.update(&data);
297 let actual_crc = hasher.finalize();
298 if actual_crc != expected_crc {
299 return Err(ZiPatchError::ChecksumMismatch {
300 tag,
301 expected: expected_crc,
302 actual: actual_crc,
303 });
304 }
305 }
306
307 trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
308
309 let chunk = Chunk::Sqpk(sqpk::SqpkCommand::AddData(Box::new(sqpk::SqpkAddData {
310 target_file: sqpk::SqpackFile {
311 main_id,
312 sub_id,
313 file_id,
314 },
315 block_offset,
316 data_bytes,
317 block_delete_number,
318 data,
319 })));
320
321 // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
322 let consumed = (size as u64) + 12;
323
324 Ok(ParsedChunk {
325 chunk,
326 tag,
327 consumed,
328 })
329}
330
331/// Iterator over the [`Chunk`]s in a `ZiPatch` stream.
332///
333/// `ZiPatchReader` wraps any [`std::io::Read`] source and yields one
334/// [`Chunk`] per call to [`Iterator::next`]. It validates the 12-byte file
335/// magic on construction, then reads chunks sequentially until the `EOF_`
336/// terminator is encountered or an error occurs.
337///
338/// # Stream contract
339///
340/// - **Magic** — the first 12 bytes must be `\x91ZIPATCH\r\n\x1a\n`. Any
341/// mismatch returns [`ZiPatchError::InvalidMagic`] from [`ZiPatchReader::new`].
342/// - **Framing** — every chunk is a length-prefixed frame:
343/// `[body_len: u32 BE] [tag: 4 B] [body: body_len B] [crc32: u32 BE]`.
344/// - **CRC32** — computed over `tag ++ body`. Verification is enabled by
345/// default; use [`ZiPatchReader::skip_checksum_verification`] to disable it.
346/// - **Termination** — the `EOF_` chunk is consumed internally and causes
347/// the iterator to return `None`. Call [`ZiPatchReader::is_complete`] after
348/// iteration to distinguish a clean end from a truncated stream.
349/// - **Fused** — once `None` is returned (either from `EOF_` or an error),
350/// subsequent calls to `next` also return `None`. The iterator implements
351/// [`std::iter::FusedIterator`].
352///
353/// # Errors
354///
355/// Each call to [`Iterator::next`] returns `Some(Err(e))` on parse failure,
356/// then `None` on all future calls. Possible errors include:
357/// - [`ZiPatchError::TruncatedPatch`] — stream ended before `EOF_`.
358/// - [`ZiPatchError::OversizedChunk`] — a declared chunk body exceeds 512 MiB.
359/// - [`ZiPatchError::ChecksumMismatch`] — CRC32 verification failed.
360/// - [`ZiPatchError::UnknownChunkTag`] — unrecognised 4-byte tag.
361/// - [`ZiPatchError::Io`] — underlying I/O failure.
362///
363/// # Example
364///
365/// Build a minimal in-memory patch (magic + `ADIR` + `EOF_`) and iterate it:
366///
367/// ```rust
368/// use std::io::Cursor;
369/// use zipatch_rs::{Chunk, ZiPatchReader};
370///
371/// // Helper: wrap tag + body into a correctly framed chunk with CRC32.
372/// fn make_chunk(tag: &[u8; 4], body: &[u8]) -> Vec<u8> {
373/// let mut crc_input = Vec::new();
374/// crc_input.extend_from_slice(tag);
375/// crc_input.extend_from_slice(body);
376/// let crc = crc32fast::hash(&crc_input);
377///
378/// let mut out = Vec::new();
379/// out.extend_from_slice(&(body.len() as u32).to_be_bytes());
380/// out.extend_from_slice(tag);
381/// out.extend_from_slice(body);
382/// out.extend_from_slice(&crc.to_be_bytes());
383/// out
384/// }
385///
386/// // 12-byte ZiPatch magic.
387/// let magic: [u8; 12] = [0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A];
388///
389/// // ADIR body: u32 BE name_len (7) + b"created".
390/// let mut adir_body = Vec::new();
391/// adir_body.extend_from_slice(&7u32.to_be_bytes());
392/// adir_body.extend_from_slice(b"created");
393///
394/// let mut patch = Vec::new();
395/// patch.extend_from_slice(&magic);
396/// patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
397/// patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
398///
399/// let chunks: Vec<_> = ZiPatchReader::new(Cursor::new(patch))
400/// .unwrap()
401/// .collect::<Result<_, _>>()
402/// .unwrap();
403///
404/// assert_eq!(chunks.len(), 1);
405/// assert!(matches!(chunks[0], Chunk::AddDirectory(_)));
406/// ```
407#[derive(Debug)]
408pub struct ZiPatchReader<R> {
409 inner: std::io::BufReader<R>,
410 done: bool,
411 verify_checksums: bool,
412 eof_seen: bool,
413 // Running total of bytes consumed from `inner`, including the 12-byte
414 // magic header. Updated after each successful `parse_chunk` call.
415 // Exposed via `bytes_read()` so the apply driver can fire monotonic
416 // progress events without instrumenting the underlying `Read` source.
417 bytes_read: u64,
418 // 4-byte ASCII tag of the most recently yielded chunk. `None` before the
419 // first successful `next()` and after iteration completes. Used by
420 // `apply_to` to attach the tag to per-chunk progress events without
421 // re-matching on the `Chunk` enum.
422 last_tag: Option<[u8; 4]>,
423}
424
425impl<R: std::io::Read> ZiPatchReader<R> {
426 /// Wrap `reader` and validate the leading 12-byte `ZiPatch` magic.
427 ///
428 /// Consumes exactly 12 bytes from `reader`. The magic is the byte sequence
429 /// `0x91 0x5A 0x49 0x50 0x41 0x54 0x43 0x48 0x0D 0x0A 0x1A 0x0A`
430 /// (i.e. `\x91ZIPATCH\r\n\x1a\n`).
431 ///
432 /// The reader is wrapped in a [`std::io::BufReader`] internally, so the
433 /// many small typed reads the chunk parser issues (4-byte size, 4-byte
434 /// tag, 5-byte SQPK prefix, …) coalesce into a small number of syscalls.
435 /// Callers do not need to pre-wrap a raw [`std::fs::File`] or other
436 /// unbuffered source.
437 ///
438 /// CRC32 verification is **enabled** by default. Call
439 /// [`ZiPatchReader::skip_checksum_verification`] before iterating to
440 /// disable it.
441 ///
442 /// # Errors
443 ///
444 /// - [`ZiPatchError::InvalidMagic`] — the first 12 bytes do not match the
445 /// expected magic.
446 /// - [`ZiPatchError::Io`] — an I/O error occurred while reading the magic.
447 pub fn new(reader: R) -> Result<Self> {
448 let mut reader = std::io::BufReader::new(reader);
449 let magic = reader.read_exact_vec(12)?;
450 if magic.as_slice() != MAGIC {
451 return Err(ZiPatchError::InvalidMagic);
452 }
453 Ok(Self {
454 inner: reader,
455 done: false,
456 verify_checksums: true,
457 eof_seen: false,
458 // The 12-byte magic header has already been consumed.
459 bytes_read: 12,
460 last_tag: None,
461 })
462 }
463
464 /// Enable per-chunk CRC32 verification (the default).
465 ///
466 /// This is the default state after [`ZiPatchReader::new`]. Calling this
467 /// method after construction is only necessary if
468 /// [`ZiPatchReader::skip_checksum_verification`] was previously called.
469 #[must_use]
470 pub fn verify_checksums(mut self) -> Self {
471 self.verify_checksums = true;
472 self
473 }
474
475 /// Disable per-chunk CRC32 verification.
476 ///
477 /// Useful when the source has already been verified out-of-band (e.g. a
478 /// download hash was checked before the file was opened), or when
479 /// processing known-good test data where the overhead is unnecessary.
480 #[must_use]
481 pub fn skip_checksum_verification(mut self) -> Self {
482 self.verify_checksums = false;
483 self
484 }
485
486 /// Returns `true` if iteration reached the `EOF_` terminator cleanly.
487 ///
488 /// A `false` return after `next()` yields `None` indicates the stream was
489 /// truncated — the download or file copy was incomplete. In that case the
490 /// iterator stopped because of a [`ZiPatchError::TruncatedPatch`] error,
491 /// not because the patch finished normally.
492 pub fn is_complete(&self) -> bool {
493 self.eof_seen
494 }
495
496 /// Returns the running total of bytes consumed from the patch stream.
497 ///
498 /// Starts at `12` after [`ZiPatchReader::new`] (the magic header has been
499 /// read) and increases monotonically by the size of each chunk's wire
500 /// frame after each successful [`Iterator::next`] call. Includes the
501 /// `EOF_` terminator's frame.
502 ///
503 /// On parse error, the counter is **not** advanced past the failing
504 /// chunk — it reflects the byte offset at the start of that chunk's
505 /// length prefix, not the broken position somewhere inside its frame.
506 /// Use this offset together with the surfaced error to point a user at
507 /// where the patch became unreadable.
508 ///
509 /// This is the same counter that the
510 /// [`apply_to`](crate::ZiPatchReader::apply_to) driver attaches to
511 /// [`ChunkEvent::bytes_read`](crate::ChunkEvent::bytes_read) when firing
512 /// progress events. Useful for the `bytes_applied / total_patch_size`
513 /// ratio in a progress bar.
514 #[must_use]
515 pub fn bytes_read(&self) -> u64 {
516 self.bytes_read
517 }
518
519 /// Returns the 4-byte ASCII tag of the most recently yielded chunk.
520 ///
521 /// `None` before the first successful [`Iterator::next`] call and after
522 /// the `EOF_` terminator has been consumed (or an error has been
523 /// surfaced). Used by [`apply_to`](crate::ZiPatchReader::apply_to) to
524 /// populate [`ChunkEvent::kind`](crate::ChunkEvent::kind).
525 #[must_use]
526 pub fn last_tag(&self) -> Option<[u8; 4]> {
527 self.last_tag
528 }
529}
530
531impl ZiPatchReader<std::io::BufReader<std::fs::File>> {
532 /// Open the file at `path`, wrap it in a [`std::io::BufReader`], and
533 /// validate the `ZiPatch` magic.
534 ///
535 /// This is a convenience constructor equivalent to:
536 ///
537 /// ```rust,no_run
538 /// # use std::io::BufReader;
539 /// # use std::fs::File;
540 /// # use zipatch_rs::ZiPatchReader;
541 /// let reader = ZiPatchReader::new(BufReader::new(File::open("patch.patch").unwrap())).unwrap();
542 /// ```
543 ///
544 /// # Errors
545 ///
546 /// - [`ZiPatchError::Io`] — the file could not be opened.
547 /// - [`ZiPatchError::InvalidMagic`] — the file does not start with the
548 /// `ZiPatch` magic bytes.
549 pub fn from_path(path: impl AsRef<std::path::Path>) -> crate::Result<Self> {
550 let file = std::fs::File::open(path)?;
551 Self::new(std::io::BufReader::new(file))
552 }
553}
554
555impl<R: std::io::Read> Iterator for ZiPatchReader<R> {
556 type Item = Result<Chunk>;
557
558 fn next(&mut self) -> Option<Self::Item> {
559 if self.done {
560 return None;
561 }
562 match parse_chunk(&mut self.inner, self.verify_checksums) {
563 Ok(ParsedChunk {
564 chunk: Chunk::EndOfFile,
565 tag,
566 consumed,
567 }) => {
568 self.bytes_read += consumed;
569 self.last_tag = Some(tag);
570 self.done = true;
571 self.eof_seen = true;
572 None
573 }
574 Ok(ParsedChunk {
575 chunk,
576 tag,
577 consumed,
578 }) => {
579 self.bytes_read += consumed;
580 self.last_tag = Some(tag);
581 Some(Ok(chunk))
582 }
583 Err(e) => {
584 self.done = true;
585 Some(Err(e))
586 }
587 }
588 }
589}
590
591impl<R: std::io::Read> std::iter::FusedIterator for ZiPatchReader<R> {}
592
593#[cfg(test)]
594mod tests {
595 use super::*;
596 use crate::test_utils::make_chunk;
597 use std::io::Cursor;
598
599 // --- parse_chunk error paths ---
600
601 #[test]
602 fn truncated_at_chunk_boundary_yields_truncated_patch() {
603 // Magic + no chunks: parse_chunk must see EOF on the body_len read and
604 // convert it to TruncatedPatch. This exercises the
605 // `Err(ZiPatchError::Io(e)) if e.kind() == UnexpectedEof` arm at
606 // chunk/mod.rs line 121.
607 let mut patch = Vec::new();
608 patch.extend_from_slice(&MAGIC);
609 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
610 match reader
611 .next()
612 .expect("iterator must yield an error, not None")
613 {
614 Err(ZiPatchError::TruncatedPatch) => {}
615 other => panic!("expected TruncatedPatch, got {other:?}"),
616 }
617 assert!(!reader.is_complete(), "stream is not clean-ended");
618 }
619
620 #[test]
621 fn non_eof_io_error_on_body_len_read_propagates_as_io() {
622 // Exercises the `Err(e) => return Err(e)` arm at line 124: an I/O
623 // error that is NOT UnexpectedEof must propagate verbatim.
624 // We trigger this by passing a reader that errors immediately.
625 struct BrokenReader;
626 impl std::io::Read for BrokenReader {
627 fn read(&mut self, _: &mut [u8]) -> std::io::Result<usize> {
628 Err(std::io::Error::new(
629 std::io::ErrorKind::BrokenPipe,
630 "simulated broken pipe",
631 ))
632 }
633 }
634 let result = parse_chunk(&mut BrokenReader, false);
635 match result {
636 Err(ZiPatchError::Io(e)) => {
637 assert_eq!(
638 e.kind(),
639 std::io::ErrorKind::BrokenPipe,
640 "non-EOF I/O error must propagate unchanged, got kind {:?}",
641 e.kind()
642 );
643 }
644 Err(other) => panic!("expected ZiPatchError::Io(BrokenPipe), got {other:?}"),
645 Ok(_) => panic!("expected an error, got Ok"),
646 }
647 }
648
649 #[test]
650 fn truncated_after_one_chunk_yields_truncated_patch() {
651 // Magic + one well-formed ADIR + no more bytes: the second call to
652 // next() must surface TruncatedPatch, not None.
653 let mut adir_body = Vec::new();
654 adir_body.extend_from_slice(&4u32.to_be_bytes());
655 adir_body.extend_from_slice(b"test");
656 let chunk = make_chunk(b"ADIR", &adir_body);
657
658 let mut patch = Vec::new();
659 patch.extend_from_slice(&MAGIC);
660 patch.extend_from_slice(&chunk);
661
662 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
663 let first = reader.next().expect("first chunk must be present");
664 assert!(
665 first.is_ok(),
666 "first ADIR chunk should parse cleanly: {first:?}"
667 );
668 match reader.next().expect("second call must yield an error") {
669 Err(ZiPatchError::TruncatedPatch) => {}
670 other => panic!("expected TruncatedPatch on truncated stream, got {other:?}"),
671 }
672 assert!(
673 !reader.is_complete(),
674 "is_complete must be false after truncation"
675 );
676 }
677
678 #[test]
679 fn checksum_mismatch_returns_checksum_mismatch_error() {
680 // Corrupt the CRC32 field of an otherwise valid ADIR chunk and verify
681 // that parse_chunk returns ChecksumMismatch (not a panic or a wrong error).
682 let mut adir_body = Vec::new();
683 adir_body.extend_from_slice(&4u32.to_be_bytes());
684 adir_body.extend_from_slice(b"test");
685 let mut chunk = make_chunk(b"ADIR", &adir_body);
686 // Flip the last byte of the CRC32 field.
687 let last = chunk.len() - 1;
688 chunk[last] ^= 0xFF;
689
690 let mut cur = Cursor::new(chunk);
691 let result = parse_chunk(&mut cur, true);
692 assert!(
693 matches!(result, Err(ZiPatchError::ChecksumMismatch { .. })),
694 "corrupted CRC must yield ChecksumMismatch"
695 );
696 }
697
698 #[test]
699 fn unknown_chunk_tag_returns_unknown_chunk_tag_error() {
700 // A tag of all-Z bytes is not recognised; parse_chunk must return
701 // UnknownChunkTag carrying the raw 4-byte tag.
702 let chunk = make_chunk(b"ZZZZ", &[]);
703 let mut cur = Cursor::new(chunk);
704 match parse_chunk(&mut cur, false) {
705 Err(ZiPatchError::UnknownChunkTag(tag)) => {
706 assert_eq!(tag, *b"ZZZZ", "tag bytes must be preserved in error");
707 }
708 Err(other) => panic!("expected UnknownChunkTag, got {other:?}"),
709 Ok(_) => panic!("expected UnknownChunkTag, got Ok"),
710 }
711 }
712
713 #[test]
714 fn oversized_chunk_body_len_returns_oversized_chunk_error() {
715 // body_len == u32::MAX (> 512 MiB) must be rejected before any allocation.
716 let bytes = [0xFFu8, 0xFF, 0xFF, 0xFF];
717 let mut cur = Cursor::new(&bytes[..]);
718 let Err(ZiPatchError::OversizedChunk(size)) = parse_chunk(&mut cur, false) else {
719 panic!("expected OversizedChunk for u32::MAX body_len")
720 };
721 assert!(
722 size > MAX_CHUNK_SIZE,
723 "reported size {size} must exceed MAX_CHUNK_SIZE {MAX_CHUNK_SIZE}"
724 );
725 }
726
727 // --- ZiPatchReader byte-counter and tag accessors ---
728
729 #[test]
730 fn bytes_read_starts_at_12_before_first_chunk() {
731 // The magic header is 12 bytes; bytes_read must reflect that immediately
732 // after construction, before any chunk is read.
733 let mut patch = Vec::new();
734 patch.extend_from_slice(&MAGIC);
735 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
736 let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
737 assert_eq!(
738 reader.bytes_read(),
739 12,
740 "bytes_read must be 12 (magic only) before iteration starts"
741 );
742 }
743
744 #[test]
745 fn last_tag_is_none_before_first_chunk() {
746 // Before calling next(), last_tag must be None.
747 let mut patch = Vec::new();
748 patch.extend_from_slice(&MAGIC);
749 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
750 let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
751 assert_eq!(
752 reader.last_tag(),
753 None,
754 "last_tag must be None before any chunk is read"
755 );
756 }
757
758 #[test]
759 fn bytes_read_and_last_tag_track_each_chunk_frame() {
760 // MAGIC + ADIR("a") + EOF_ — verify bytes_read grows by the exact frame
761 // size after each chunk and that last_tag follows the stream.
762 let mut adir_body = Vec::new();
763 adir_body.extend_from_slice(&1u32.to_be_bytes());
764 adir_body.extend_from_slice(b"a");
765 // ADIR frame: 4(size) + 4(tag) + 5(body) + 4(crc) = 17 bytes
766 // EOF_ frame: 4 + 4 + 0 + 4 = 12 bytes
767
768 let mut patch = Vec::new();
769 patch.extend_from_slice(&MAGIC);
770 patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
771 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
772
773 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
774 assert_eq!(reader.bytes_read(), 12, "pre-read: magic only");
775 assert_eq!(reader.last_tag(), None, "pre-read: no tag yet");
776
777 let chunk = reader.next().unwrap().unwrap();
778 assert!(
779 matches!(chunk, Chunk::AddDirectory(_)),
780 "first chunk must be ADIR"
781 );
782 assert_eq!(
783 reader.bytes_read(),
784 12 + 17,
785 "after ADIR: magic + ADIR frame"
786 );
787 assert_eq!(
788 reader.last_tag(),
789 Some(*b"ADIR"),
790 "last_tag must be ADIR after first next()"
791 );
792
793 assert!(reader.next().is_none(), "EOF_ must terminate iteration");
794 assert_eq!(
795 reader.bytes_read(),
796 12 + 17 + 12,
797 "after EOF_: magic + ADIR + EOF_ frames"
798 );
799 assert_eq!(
800 reader.last_tag(),
801 Some(*b"EOF_"),
802 "last_tag must be EOF_ after stream ends"
803 );
804 assert!(reader.is_complete(), "is_complete must be true after EOF_");
805 }
806
807 #[test]
808 fn bytes_read_is_monotonically_non_decreasing() {
809 // Stream with two ADIR chunks + EOF_ — verify bytes_read only ever
810 // increases between calls to next() and that consuming the EOF_
811 // chunk (whose body is empty but whose frame is 12 bytes) still
812 // advances the counter past the last non-EOF position.
813 let make_adir = |name: &[u8]| -> Vec<u8> {
814 let mut body = Vec::new();
815 body.extend_from_slice(&(name.len() as u32).to_be_bytes());
816 body.extend_from_slice(name);
817 make_chunk(b"ADIR", &body)
818 };
819
820 let mut patch = Vec::new();
821 patch.extend_from_slice(&MAGIC);
822 patch.extend_from_slice(&make_adir(b"a"));
823 patch.extend_from_slice(&make_adir(b"bb"));
824 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
825
826 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
827 let mut prev = reader.bytes_read();
828 while let Some(result) = reader.next() {
829 result.unwrap();
830 let current = reader.bytes_read();
831 assert!(
832 current >= prev,
833 "bytes_read must be monotonically non-decreasing: {prev} -> {current}"
834 );
835 // For ADIR chunks with non-empty bodies, the increment must be
836 // strictly positive — a body of N bytes adds N + 12 frame bytes.
837 assert!(
838 current > prev,
839 "non-empty ADIR frame must strictly advance bytes_read: \
840 {prev} -> {current}"
841 );
842 prev = current;
843 }
844 // EOF_ has been consumed: its 12-byte empty-body frame must have
845 // pushed the counter past the previous position.
846 assert!(
847 reader.bytes_read() > prev,
848 "consuming EOF_ must advance bytes_read by its 12-byte frame: \
849 {prev} -> {}",
850 reader.bytes_read()
851 );
852 }
853
854 // --- from_path constructor ---
855
856 #[test]
857 fn from_path_opens_minimal_patch_and_reaches_eof() {
858 let mut bytes = Vec::new();
859 bytes.extend_from_slice(&MAGIC);
860 bytes.extend_from_slice(&make_chunk(b"EOF_", &[]));
861
862 let tmp = tempfile::tempdir().unwrap();
863 let file_path = tmp.path().join("test.patch");
864 std::fs::write(&file_path, &bytes).unwrap();
865
866 let mut reader =
867 ZiPatchReader::from_path(&file_path).expect("from_path must open valid patch");
868 assert!(
869 reader.next().is_none(),
870 "EOF_ must terminate iteration immediately"
871 );
872 assert!(reader.is_complete(), "is_complete must be true after EOF_");
873 }
874
875 #[test]
876 fn from_path_returns_io_error_when_file_is_missing() {
877 let tmp = tempfile::tempdir().unwrap();
878 let file_path = tmp.path().join("nonexistent.patch");
879 assert!(
880 matches!(
881 ZiPatchReader::from_path(&file_path),
882 Err(ZiPatchError::Io(_))
883 ),
884 "from_path on a missing file must return ZiPatchError::Io"
885 );
886 }
887
888 // --- Iterator fused-ness and is_complete ---
889
890 #[test]
891 fn iterator_is_fused_after_error() {
892 // Once next() yields Some(Err(_)), all subsequent calls must yield None.
893 let mut patch = Vec::new();
894 patch.extend_from_slice(&MAGIC);
895 patch.extend_from_slice(&make_chunk(b"ZZZZ", &[])); // unknown tag → error
896
897 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
898 let first = reader.next();
899 assert!(
900 matches!(first, Some(Err(ZiPatchError::UnknownChunkTag(_)))),
901 "first call must yield the error: {first:?}"
902 );
903 // All subsequent calls must return None.
904 assert!(
905 reader.next().is_none(),
906 "fused: must return None after error"
907 );
908 assert!(reader.next().is_none(), "fused: still None on third call");
909 }
910
911 #[test]
912 fn is_complete_false_until_eof_seen() {
913 let mut adir_body = Vec::new();
914 adir_body.extend_from_slice(&1u32.to_be_bytes());
915 adir_body.extend_from_slice(b"x");
916
917 let mut patch = Vec::new();
918 patch.extend_from_slice(&MAGIC);
919 patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
920 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
921
922 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
923 assert!(
924 !reader.is_complete(),
925 "not complete before reading anything"
926 );
927 reader.next().unwrap().unwrap(); // consume ADIR
928 assert!(
929 !reader.is_complete(),
930 "not complete after ADIR, before EOF_"
931 );
932 assert!(reader.next().is_none(), "EOF_ consumed");
933 assert!(reader.is_complete(), "complete after EOF_ consumed");
934 }
935}