zipatch_rs/chunk/mod.rs
1//! Wire-format chunk types and the [`ZiPatchReader`] streaming parser.
2//!
3//! This module is the parsing layer: it decodes the raw `ZiPatch` byte
4//! stream into a stream of typed [`Chunk`] values. Each top-level
5//! variant corresponds to one 4-byte ASCII wire tag (`FHDR`, `APLY`,
6//! `SQPK`, …); the per-variant submodules below own the binary layout for
7//! their body. Nothing in this module touches the filesystem — apply-time
8//! effects live in [`crate::apply`].
9//!
10//! The [`ZiPatchReader`] parser validates the 12-byte file magic on
11//! construction, then yields one [`ChunkRecord`](crate::chunk::ChunkRecord) per
12//! [`ZiPatchReader::next_chunk`] call until the internal `EOF_` terminator
13//! is consumed or a parse error surfaces.
14
15pub(crate) mod adir;
16pub(crate) mod afsp;
17pub(crate) mod aply;
18pub(crate) mod ddir;
19pub(crate) mod fhdr;
20pub(crate) mod sqpk;
21pub(crate) mod util;
22
23pub(crate) use adir::AddDirectory;
24pub(crate) use afsp::ApplyFreeSpace;
25pub(crate) use aply::ApplyOption;
26pub(crate) use ddir::DeleteDirectory;
27pub(crate) use fhdr::FileHeader;
28pub use sqpk::{SqpackFileId, SqpkCommand, SqpkCompressedBlock};
29pub(crate) use sqpk::{
30 SqpkFile, SqpkFileOperation, SqpkHeader, SqpkHeaderTarget, TargetHeaderKind,
31};
32
33use crate::newtypes::ChunkTag;
34use crate::reader::ReadExt;
35use crate::{ParseError, ParseResult as Result};
36use tracing::trace;
37
38const MAGIC: [u8; 12] = [
39 0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A,
40];
41
42/// Default upper bound (512 MiB) on a single chunk's declared body length.
43///
44/// Used by [`ZiPatchReader::new`] to guard against pathological streams that
45/// would otherwise drive the parser into a huge allocation. Override per
46/// reader via [`ZiPatchReader::with_max_chunk_size`].
47pub const DEFAULT_MAX_CHUNK_SIZE: u32 = 512 * 1024 * 1024;
48
49/// One top-level chunk parsed from a `ZiPatch` stream.
50///
51/// Each variant corresponds to a 4-byte ASCII wire tag.
52///
53/// # Observed frequency
54///
55/// SE's XIVARR+ patch files almost exclusively contain `FHDR`, `APLY`, and
56/// `SQPK` chunks. `ADIR`/`DELD` can theoretically appear and are implemented,
57/// but are rarely emitted in practice. `APFS` has never been observed in modern
58/// patches and is treated as a no-op. `EOF_` is consumed by [`ZiPatchReader`]
59/// and is never yielded to the caller.
60#[derive(Debug)]
61pub enum Chunk {
62 /// `FHDR` — the first chunk in every patch file; carries version and
63 /// per-version patch metadata. See [`FileHeader`] for the versioned body.
64 FileHeader(FileHeader),
65 /// `APLY` — sets or clears a boolean apply-time flag on the
66 /// [`crate::ApplyConfig`] (e.g. "ignore missing files"). See [`ApplyOption`].
67 ApplyOption(ApplyOption),
68 /// `APFS` — free-space book-keeping emitted by old patcher tooling; treated
69 /// as a no-op at apply time. See [`ApplyFreeSpace`].
70 ApplyFreeSpace(ApplyFreeSpace),
71 /// `ADIR` — instructs the patcher to create a directory under the game
72 /// install root. See [`AddDirectory`].
73 AddDirectory(AddDirectory),
74 /// `DELD` — instructs the patcher to remove a directory under the game
75 /// install root. See [`DeleteDirectory`].
76 DeleteDirectory(DeleteDirectory),
77 /// `SQPK` — the workhorse chunk; wraps one of eight sub-commands that
78 /// add, delete, expand, or replace `SqPack` data. See [`SqpkCommand`].
79 Sqpk(SqpkCommand),
80 /// `EOF_` — marks the clean end of the patch stream. [`ZiPatchReader`]
81 /// consumes this chunk internally; it is never yielded to the caller.
82 EndOfFile,
83}
84
85/// One parsed chunk plus its 4-byte ASCII tag and the byte count consumed
86/// from the input stream by its frame.
87///
88/// Returned by [`parse_chunk`]. The `consumed` count is exactly the size of
89/// the chunk's on-wire frame: `4 (body_len) + 4 (tag) + body_len + 4 (crc32)`
90/// = `body_len + 12`. This is what
91/// [`ZiPatchReader`](crate::ZiPatchReader) accumulates into its running
92/// byte counter for progress reporting.
93pub(crate) struct ParsedChunk {
94 pub(crate) chunk: Chunk,
95 pub(crate) tag: ChunkTag,
96 pub(crate) consumed: u64,
97}
98
99/// Parse one chunk frame from `r`.
100///
101/// # Wire framing
102///
103/// Each chunk is laid out as:
104///
105/// ```text
106/// [body_len: u32 BE] [tag: 4 bytes] [body: body_len bytes] [crc32: u32 BE]
107/// ```
108///
109/// The CRC32 is computed over `tag ++ body` (not over `body_len`). When
110/// `verify_checksums` is `true` and the stored CRC does not match the computed
111/// one, [`ParseError::ChecksumMismatch`] is returned.
112///
113/// # Errors
114///
115/// - [`ParseError::TruncatedPatch`] — the reader returns EOF while reading
116/// the `body_len` field (i.e. no more chunks are present but `EOF_` was
117/// never seen).
118/// - [`ParseError::OversizedChunk`] — `body_len` exceeds `max_chunk_size`.
119/// - [`ParseError::ChecksumMismatch`] — CRC32 mismatch (only when
120/// `verify_checksums` is `true`).
121/// - [`ParseError::UnknownChunkTag`] — tag is not recognised.
122/// - [`ParseError::Io`] — any other I/O failure reading from `r`.
123pub(crate) fn parse_chunk<R: std::io::Read>(
124 r: &mut R,
125 verify_checksums: bool,
126 max_chunk_size: u32,
127) -> Result<ParsedChunk> {
128 let size = match r.read_u32_be() {
129 Ok(s) => s as usize,
130 Err(ParseError::Io { source: e }) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
131 return Err(ParseError::TruncatedPatch);
132 }
133 Err(e) => return Err(e),
134 };
135 if size > max_chunk_size as usize {
136 return Err(ParseError::OversizedChunk(size));
137 }
138
139 // Tag (4 B) and CRC (4 B) are always present regardless of body shape.
140 let mut tag = [0u8; 4];
141 r.read_exact(&mut tag)?;
142
143 // Peek at the first 5 bytes of the body without committing to either the
144 // generic single-allocation path or the SQPK `A` zero-copy-into-data path.
145 // For SQPK chunks, those 5 bytes are `[inner_size: i32 BE][sub_cmd: u8]`.
146 // For chunks with bodies shorter than 5 bytes (e.g. `EOF_`), we still read
147 // exactly `size` bytes into the prefix array and leave the rest zero.
148 let mut prefix = [0u8; 5];
149 let prefix_len = size.min(5);
150 if prefix_len > 0 {
151 r.read_exact(&mut prefix[..prefix_len])?;
152 }
153
154 // ---- Fast path: SQPK `A` (SqpkAddData) — see `parse_sqpk_add_data_fast`. ----
155 if &tag == b"SQPK" && size >= 5 + SQPK_ADDDATA_HEADER_SIZE && prefix[4] == b'A' {
156 return parse_sqpk_add_data_fast(r, tag, prefix, size, verify_checksums);
157 }
158
159 // ---- Generic path: one allocation for the whole body. ----
160 let mut body_vec = vec![0u8; size];
161 body_vec[..prefix_len].copy_from_slice(&prefix[..prefix_len]);
162 if size > prefix_len {
163 r.read_exact(&mut body_vec[prefix_len..])?;
164 }
165
166 let mut crc_buf = [0u8; 4];
167 r.read_exact(&mut crc_buf)?;
168 let expected_crc = u32::from_be_bytes(crc_buf);
169
170 if verify_checksums {
171 let mut hasher = crc32fast::Hasher::new();
172 hasher.update(&tag);
173 hasher.update(&body_vec);
174 let actual_crc = hasher.finalize();
175 if actual_crc != expected_crc {
176 return Err(ParseError::ChecksumMismatch {
177 tag: ChunkTag::new(tag),
178 expected: expected_crc,
179 actual: actual_crc,
180 });
181 }
182 }
183
184 trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
185
186 // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
187 let consumed = (size as u64) + 12;
188
189 let body = &body_vec[..];
190
191 let chunk = match &tag {
192 b"EOF_" => Chunk::EndOfFile,
193 b"FHDR" => Chunk::FileHeader(fhdr::parse(body)?),
194 b"APLY" => Chunk::ApplyOption(aply::parse(body)?),
195 b"APFS" => Chunk::ApplyFreeSpace(afsp::parse(body)?),
196 b"ADIR" => Chunk::AddDirectory(adir::parse(body)?),
197 b"DELD" => Chunk::DeleteDirectory(ddir::parse(body)?),
198 b"SQPK" => Chunk::Sqpk(sqpk::parse_sqpk(body)?),
199 _ => return Err(ParseError::UnknownChunkTag(ChunkTag::new(tag))),
200 };
201
202 Ok(ParsedChunk {
203 chunk,
204 tag: ChunkTag::new(tag),
205 consumed,
206 })
207}
208
209// Size of the SqpkAddData fixed header that precedes the inline data payload.
210// Mirrors `add_data::SqpkAddData::DATA_SOURCE_OFFSET` (23) without taking a
211// `u64` round-trip; kept private to the framing path.
212const SQPK_ADDDATA_HEADER_SIZE: usize = 23;
213
214/// Fast path for SQPK `A` (`SqpkAddData`) chunks.
215///
216/// `AddData` is the largest chunk type by byte volume — payloads of hundreds of
217/// KB to MB are typical. The generic framing path allocates one `Vec<u8>` of
218/// `size` for the whole body, then `binrw`'s derived parser allocates a second
219/// `Vec<u8>` of exactly `data_bytes` and memcpys the inline payload into it.
220/// That second allocation + memcpy dominates parse time for `AddData`.
221///
222/// This function reads the `AddData` fixed header into a stack array, parses
223/// the seven fields directly, allocates the `data` payload at its exact size,
224/// and `read_exact`s the source bytes straight into it — one allocation, no
225/// intermediate copy of the payload.
226///
227/// On entry: `tag` and the 5-byte `prefix` (SQPK `inner_size` + sub-command
228/// byte) have already been consumed from `r`. The remaining bytes are
229/// `[fixed_header: 23 B][data: data_bytes][crc32: 4 B]`.
230fn parse_sqpk_add_data_fast<R: std::io::Read>(
231 r: &mut R,
232 tag: [u8; 4],
233 prefix: [u8; 5],
234 size: usize,
235 verify_checksums: bool,
236) -> Result<ParsedChunk> {
237 // Validate the SQPK inner_size against the outer chunk size, matching the
238 // check in `sqpk::parse_sqpk` so callers see byte-identical error behaviour.
239 let inner_size = i32::from_be_bytes([prefix[0], prefix[1], prefix[2], prefix[3]]) as usize;
240 if inner_size != size {
241 return Err(ParseError::InvalidField {
242 context: "SQPK inner size mismatch",
243 });
244 }
245
246 let mut header = [0u8; SQPK_ADDDATA_HEADER_SIZE];
247 r.read_exact(&mut header)?;
248
249 // SqpkAddData fixed-header layout (all big-endian):
250 // [0..3] pad
251 // [3..5] main_id u16
252 // [5..7] sub_id u16
253 // [7..11] file_id u32
254 // [11..15] block_offset_raw u32 (<< 7 = bytes)
255 // [15..19] data_bytes_raw u32 (<< 7 = bytes)
256 // [19..23] block_delete_raw u32 (<< 7 = bytes)
257 let main_id = u16::from_be_bytes([header[3], header[4]]);
258 let sub_id = u16::from_be_bytes([header[5], header[6]]);
259 let file_id = u32::from_be_bytes([header[7], header[8], header[9], header[10]]);
260 let block_offset_raw = u32::from_be_bytes([header[11], header[12], header[13], header[14]]);
261 let data_bytes_raw = u32::from_be_bytes([header[15], header[16], header[17], header[18]]);
262 let block_delete_raw = u32::from_be_bytes([header[19], header[20], header[21], header[22]]);
263
264 let block_offset = (block_offset_raw as u64) << 7;
265 let data_bytes = (data_bytes_raw as u64) << 7;
266 let block_delete_number = (block_delete_raw as u64) << 7;
267
268 // The declared payload length must fit exactly within the chunk body:
269 // size = 5 (inner_size + sub_cmd) + 23 (fixed header) + data_bytes
270 let expected_data = size - 5 - SQPK_ADDDATA_HEADER_SIZE;
271 if data_bytes as usize != expected_data {
272 return Err(ParseError::InvalidField {
273 context: "SqpkAddData data_bytes does not match SQPK body length",
274 });
275 }
276
277 let mut data = vec![0u8; data_bytes as usize];
278 r.read_exact(&mut data)?;
279
280 let mut crc_buf = [0u8; 4];
281 r.read_exact(&mut crc_buf)?;
282 let expected_crc = u32::from_be_bytes(crc_buf);
283
284 if verify_checksums {
285 // CRC is over `tag ++ body`. The body is split across three disjoint
286 // buffers — feed each segment to the incremental hasher.
287 let mut hasher = crc32fast::Hasher::new();
288 hasher.update(&tag);
289 hasher.update(&prefix);
290 hasher.update(&header);
291 hasher.update(&data);
292 let actual_crc = hasher.finalize();
293 if actual_crc != expected_crc {
294 return Err(ParseError::ChecksumMismatch {
295 tag: ChunkTag::new(tag),
296 expected: expected_crc,
297 actual: actual_crc,
298 });
299 }
300 }
301
302 trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
303
304 let chunk = Chunk::Sqpk(sqpk::SqpkCommand::AddData(Box::new(sqpk::SqpkAddData {
305 target_file: sqpk::SqpackFileId {
306 main_id,
307 sub_id,
308 file_id,
309 },
310 block_offset,
311 data_bytes,
312 block_delete_number,
313 data,
314 })));
315
316 // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
317 let consumed = (size as u64) + 12;
318
319 Ok(ParsedChunk {
320 chunk,
321 tag: ChunkTag::new(tag),
322 consumed,
323 })
324}
325
326/// One chunk yielded by [`ZiPatchReader::next_chunk`] together with the
327/// stream-position metadata the parser observed while reading it.
328///
329/// Bundling the chunk with its byte-position metadata in one record lets
330/// downstream consumers (the apply driver, the [`crate::index::PlanBuilder`],
331/// the `zipatch dump` CLI) avoid a second round of accessor calls against
332/// the reader to learn where the chunk sat in the stream. Each field
333/// describes one fact the parser knew at the moment the chunk was yielded;
334/// see the per-field docs.
335///
336/// `#[non_exhaustive]`: stream-position metadata may grow (e.g. compressed
337/// payload size, header-only byte count) as new index-builder needs surface.
338#[non_exhaustive]
339#[derive(Debug)]
340pub struct ChunkRecord {
341 /// The parsed chunk itself.
342 pub chunk: Chunk,
343 /// The 4-byte ASCII wire tag of the chunk (`FHDR`, `SQPK`, `EOF_`, …).
344 ///
345 /// Exposed alongside [`Self::chunk`] so consumers can attach the tag
346 /// to a progress event without re-matching on the [`Chunk`] enum.
347 pub tag: ChunkTag,
348 /// Absolute patch-file offset of the chunk's body — the byte right
349 /// after the 8-byte `[body_len: u32 BE, tag: [u8; 4]]` frame header.
350 ///
351 /// Index builders use this to compute absolute patch-file offsets for
352 /// `SqpkAddData::data`, `SqpkFile` block payloads, and
353 /// `SqpkHeader::header_data` without re-walking the stream.
354 pub body_offset: u64,
355 /// Running total of bytes consumed from the patch stream, including
356 /// the 12-byte magic header, the chunk this record describes, and
357 /// every preceding chunk frame.
358 ///
359 /// Equivalent to the [`crate::ChunkEvent::bytes_read`] field at the
360 /// same emission point; used for the `bytes_applied / total_patch_size`
361 /// progress-bar ratio.
362 pub bytes_read: u64,
363}
364
365/// Streaming parser over the [`Chunk`]s in a `ZiPatch` stream.
366///
367/// `ZiPatchReader` wraps any [`std::io::Read`] source and yields one
368/// [`ChunkRecord`] per call to [`Self::next_chunk`]. It validates the
369/// 12-byte file magic on construction, then reads chunks sequentially
370/// until the `EOF_` terminator is encountered or an error occurs.
371///
372/// # Stream contract
373///
374/// - **Magic** — the first 12 bytes must be `\x91ZIPATCH\r\n\x1a\n`. Any
375/// mismatch returns [`ParseError::InvalidMagic`] from [`ZiPatchReader::new`].
376/// - **Framing** — every chunk is a length-prefixed frame:
377/// `[body_len: u32 BE] [tag: 4 B] [body: body_len B] [crc32: u32 BE]`.
378/// - **CRC32** — computed over `tag ++ body`. Verification is enabled by
379/// default; pass `false` to [`ZiPatchReader::with_checksum_verification`]
380/// to disable it.
381/// - **Termination** — the `EOF_` chunk is consumed internally and causes
382/// [`Self::next_chunk`] to return `Ok(None)`. Call
383/// [`ZiPatchReader::is_complete`] after iteration to distinguish a clean
384/// end from a truncated stream.
385/// - **Fused** — once `Ok(None)` (clean EOF) or an `Err(_)` is returned,
386/// subsequent calls to `next_chunk` also return `Ok(None)`.
387///
388/// # Errors
389///
390/// Each call to [`Self::next_chunk`] returns `Err(e)` on parse failure,
391/// then `Ok(None)` on all future calls. Possible errors include:
392/// - [`ParseError::TruncatedPatch`] — stream ended before `EOF_`.
393/// - [`ParseError::OversizedChunk`] — a declared chunk body exceeds the
394/// configured max chunk size (default [`DEFAULT_MAX_CHUNK_SIZE`], 512 MiB).
395/// - [`ParseError::ChecksumMismatch`] — CRC32 verification failed.
396/// - [`ParseError::UnknownChunkTag`] — unrecognised 4-byte tag.
397/// - [`ParseError::Io`] — underlying I/O failure.
398///
399/// # Async usage
400///
401/// `ZiPatchReader` is a synchronous parser over a [`std::io::Read`]
402/// source — see the crate-level "Async usage" section for the rationale.
403/// Async consumers wrap iteration (and any apply call that drives it)
404/// in `tokio::task::spawn_blocking`. To stream a patch that is itself
405/// arriving over an async transport (e.g. `reqwest::Response::bytes_stream`),
406/// either buffer it through a `tempfile::NamedTempFile` and feed the
407/// reopened [`std::fs::File`] to [`ZiPatchReader::new`], or bridge with a
408/// blocking-reader adapter that pulls from a
409/// [`tokio::sync::mpsc`-equivalent](std::sync::mpsc) channel populated
410/// by the async download task.
411///
412/// # Example
413///
414/// Build a minimal in-memory patch (magic + `ADIR` + `EOF_`) and walk it:
415///
416/// ```rust
417/// use std::io::Cursor;
418/// use zipatch_rs::{Chunk, ZiPatchReader};
419///
420/// // Helper: wrap tag + body into a correctly framed chunk with CRC32.
421/// fn make_chunk(tag: &[u8; 4], body: &[u8]) -> Vec<u8> {
422/// let mut crc_input = Vec::new();
423/// crc_input.extend_from_slice(tag);
424/// crc_input.extend_from_slice(body);
425/// let crc = crc32fast::hash(&crc_input);
426///
427/// let mut out = Vec::new();
428/// out.extend_from_slice(&(body.len() as u32).to_be_bytes());
429/// out.extend_from_slice(tag);
430/// out.extend_from_slice(body);
431/// out.extend_from_slice(&crc.to_be_bytes());
432/// out
433/// }
434///
435/// // 12-byte ZiPatch magic.
436/// let magic: [u8; 12] = [0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A];
437///
438/// // ADIR body: u32 BE name_len (7) + b"created".
439/// let mut adir_body = Vec::new();
440/// adir_body.extend_from_slice(&7u32.to_be_bytes());
441/// adir_body.extend_from_slice(b"created");
442///
443/// let mut patch = Vec::new();
444/// patch.extend_from_slice(&magic);
445/// patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
446/// patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
447///
448/// let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
449/// let mut chunks = Vec::new();
450/// while let Some(rec) = reader.next_chunk().unwrap() {
451/// chunks.push(rec.chunk);
452/// }
453///
454/// assert_eq!(chunks.len(), 1);
455/// assert!(matches!(chunks[0], Chunk::AddDirectory(_)));
456/// ```
457#[derive(Debug)]
458pub struct ZiPatchReader<R> {
459 inner: std::io::BufReader<R>,
460 done: bool,
461 verify_checksums: bool,
462 eof_seen: bool,
463 // Running total of bytes consumed from `inner`, including the 12-byte
464 // magic header. Updated after each successful `parse_chunk` call.
465 pub(crate) bytes_read: u64,
466 // Caller-supplied identifier for the patch source. Stamped onto every
467 // `SequentialCheckpoint` the apply driver emits so a later
468 // `resume_apply_patch` call can refuse a checkpoint that was persisted for
469 // a different patch. `None` when the caller has not set one via
470 // `with_patch_name`.
471 patch_name: Option<String>,
472 // Maximum declared body length the parser will accept; chunks declaring a
473 // larger `body_len` are rejected with `ParseError::OversizedChunk` before
474 // any allocation. Defaults to `DEFAULT_MAX_CHUNK_SIZE`.
475 max_chunk_size: u32,
476}
477
478impl<R: std::io::Read> ZiPatchReader<R> {
479 /// Wrap `reader` and validate the leading 12-byte `ZiPatch` magic.
480 ///
481 /// Consumes exactly 12 bytes from `reader`. The magic is the byte sequence
482 /// `0x91 0x5A 0x49 0x50 0x41 0x54 0x43 0x48 0x0D 0x0A 0x1A 0x0A`
483 /// (i.e. `\x91ZIPATCH\r\n\x1a\n`).
484 ///
485 /// The reader is wrapped in a [`std::io::BufReader`] internally, so the
486 /// many small typed reads the chunk parser issues (4-byte size, 4-byte
487 /// tag, 5-byte SQPK prefix, …) coalesce into a small number of syscalls.
488 /// Callers do not need to pre-wrap a raw [`std::fs::File`] or other
489 /// unbuffered source.
490 ///
491 /// CRC32 verification is **enabled** by default. Call
492 /// [`ZiPatchReader::with_checksum_verification`] with `false` before
493 /// iterating to disable it.
494 ///
495 /// # Errors
496 ///
497 /// - [`ParseError::InvalidMagic`] — the first 12 bytes do not match the
498 /// expected magic.
499 /// - [`ParseError::Io`] — an I/O error occurred while reading the magic.
500 pub fn new(reader: R) -> Result<Self> {
501 let mut reader = std::io::BufReader::new(reader);
502 let magic = reader.read_exact_vec(12)?;
503 if magic.as_slice() != MAGIC {
504 return Err(ParseError::InvalidMagic);
505 }
506 Ok(Self {
507 inner: reader,
508 done: false,
509 verify_checksums: true,
510 eof_seen: false,
511 // The 12-byte magic header has already been consumed.
512 bytes_read: 12,
513 patch_name: None,
514 max_chunk_size: DEFAULT_MAX_CHUNK_SIZE,
515 })
516 }
517
518 /// Set the upper bound on a single chunk's declared body length, in
519 /// bytes.
520 ///
521 /// The parser rejects any chunk whose `body_len` exceeds `bytes` with
522 /// [`ParseError::OversizedChunk`] before allocating space for its body.
523 /// Defaults to [`DEFAULT_MAX_CHUNK_SIZE`] (512 MiB). Raise it for
524 /// patches with unusually large chunks; lower it when applying untrusted
525 /// streams to bound the parser's worst-case allocation.
526 ///
527 /// # Panics
528 ///
529 /// Panics if `bytes` is zero — a zero ceiling rejects every chunk and
530 /// is a programming error.
531 #[must_use]
532 pub fn with_max_chunk_size(mut self, bytes: u32) -> Self {
533 assert!(bytes > 0, "with_max_chunk_size(0) is invalid");
534 self.max_chunk_size = bytes;
535 self
536 }
537
538 /// Returns the configured maximum chunk-body length, in bytes.
539 #[must_use]
540 pub fn max_chunk_size(&self) -> u32 {
541 self.max_chunk_size
542 }
543
544 /// Attach a human-readable identifier to this patch stream.
545 ///
546 /// The identifier is stamped onto every
547 /// [`SequentialCheckpoint`](crate::apply::SequentialCheckpoint) the apply
548 /// driver emits so a future
549 /// [`resume_apply_patch`](crate::ApplyConfig::resume_apply_patch) call can
550 /// detect a checkpoint that was persisted for a different patch and
551 /// refuse to resume from it.
552 ///
553 /// Typical value is the patch filename (e.g. `"H2017.07.11.0000.0000a.patch"`).
554 /// No interpretation is performed — the string is compared verbatim.
555 #[must_use]
556 pub fn with_patch_name(mut self, name: impl Into<String>) -> Self {
557 self.patch_name = Some(name.into());
558 self
559 }
560
561 /// Returns the caller-supplied patch identifier, if any.
562 ///
563 /// Set by [`Self::with_patch_name`]; `None` otherwise.
564 #[must_use]
565 pub fn patch_name(&self) -> Option<&str> {
566 self.patch_name.as_deref()
567 }
568
569 /// Mutable access to the wrapped [`std::io::BufReader`].
570 ///
571 /// Used by [`crate::ApplyConfig::resume_apply_patch`] to seek the
572 /// underlying source for the patch-size measurement at entry. Not
573 /// part of the stable API — seeking the inner reader while a chunk
574 /// parse is in flight would desync `bytes_read` and break later
575 /// iteration.
576 pub(crate) fn inner_mut(&mut self) -> &mut std::io::BufReader<R> {
577 &mut self.inner
578 }
579
580 /// Toggle per-chunk CRC32 verification.
581 ///
582 /// Verification is **enabled** by default after [`ZiPatchReader::new`].
583 /// Pass `false` to skip CRC checks — useful when the source has already
584 /// been verified out-of-band (e.g. a download hash was checked before the
585 /// file was opened), or when processing known-good test data where the
586 /// overhead is unnecessary.
587 #[must_use]
588 pub fn with_checksum_verification(mut self, on: bool) -> Self {
589 self.verify_checksums = on;
590 self
591 }
592
593 /// Returns `true` if iteration reached the `EOF_` terminator cleanly.
594 ///
595 /// A `false` return after `next()` yields `None` indicates the stream was
596 /// truncated — the download or file copy was incomplete. In that case the
597 /// iterator stopped because of a [`ParseError::TruncatedPatch`] error,
598 /// not because the patch finished normally.
599 pub fn is_complete(&self) -> bool {
600 self.eof_seen
601 }
602
603 /// Returns the running total of bytes consumed from the patch stream.
604 ///
605 /// Starts at `12` after [`ZiPatchReader::new`] (the magic header has been
606 /// read) and increases monotonically by the size of each chunk's wire
607 /// frame after each successful [`Self::next_chunk`] call. Includes the
608 /// `EOF_` terminator's frame.
609 ///
610 /// On parse error, the counter is **not** advanced past the failing
611 /// chunk — it reflects the byte offset at the start of that chunk's
612 /// length prefix, not the broken position somewhere inside its frame.
613 ///
614 /// Per-chunk consumers should read the equivalent counter off the
615 /// [`ChunkRecord::bytes_read`] field. This getter is for end-of-stream
616 /// reporting — after [`Self::next_chunk`] returned `Ok(None)`, no
617 /// [`ChunkRecord`] is produced for the consumed `EOF_` frame, so the
618 /// final stream position is only available through this method.
619 #[must_use]
620 pub fn bytes_read(&self) -> u64 {
621 self.bytes_read
622 }
623
624 /// Read the next chunk frame from the underlying stream.
625 ///
626 /// Returns `Ok(Some(record))` for each successfully parsed chunk in
627 /// stream order, `Ok(None)` after the `EOF_` terminator has been
628 /// consumed (the terminator itself is never surfaced as a record), and
629 /// `Err(_)` on a parse failure. After `Ok(None)` or any `Err(_)`,
630 /// subsequent calls return `Ok(None)` — the reader is fused.
631 ///
632 /// # Errors
633 ///
634 /// See [`Self`]'s "Errors" section.
635 pub fn next_chunk(&mut self) -> Result<Option<ChunkRecord>> {
636 if self.done {
637 return Ok(None);
638 }
639 // Snapshot the body offset before parsing so a successful parse can
640 // commit it without re-walking the stream. The chunk body begins after
641 // the 8-byte `[body_len: u32 BE, tag: [u8; 4]]` frame header.
642 let body_offset = self.bytes_read + 8;
643 match parse_chunk(&mut self.inner, self.verify_checksums, self.max_chunk_size) {
644 Ok(ParsedChunk {
645 chunk: Chunk::EndOfFile,
646 consumed,
647 ..
648 }) => {
649 self.bytes_read += consumed;
650 self.done = true;
651 self.eof_seen = true;
652 Ok(None)
653 }
654 Ok(ParsedChunk {
655 chunk,
656 tag,
657 consumed,
658 }) => {
659 self.bytes_read += consumed;
660 Ok(Some(ChunkRecord {
661 chunk,
662 tag,
663 body_offset,
664 bytes_read: self.bytes_read,
665 }))
666 }
667 Err(e) => {
668 self.done = true;
669 Err(e)
670 }
671 }
672 }
673}
674
675/// Open the file at `path` and validate the `ZiPatch` magic, returning a
676/// ready-to-iterate [`ZiPatchReader`].
677///
678/// The concrete inner reader type is intentionally hidden behind `impl
679/// Read` so the choice of source and any buffering strategy remain
680/// implementation details. Callers that need to name the type should
681/// construct a reader of their choice and pass it to
682/// [`ZiPatchReader::new`].
683///
684/// # Errors
685///
686/// - [`ParseError::Io`] — the file could not be opened.
687/// - [`ParseError::InvalidMagic`] — the file does not start with the
688/// `ZiPatch` magic bytes.
689pub fn open_patch(
690 path: impl AsRef<std::path::Path>,
691) -> crate::ParseResult<ZiPatchReader<impl std::io::Read + 'static>> {
692 let file = std::fs::File::open(path)?;
693 ZiPatchReader::new(file)
694}
695
696#[cfg(test)]
697mod tests {
698 use super::*;
699 use crate::test_utils::make_chunk;
700 use std::io::Cursor;
701
702 // --- parse_chunk error paths ---
703
704 #[test]
705 fn truncated_at_chunk_boundary_yields_truncated_patch() {
706 // Magic + no chunks: parse_chunk must see EOF on the body_len read and
707 // convert it to TruncatedPatch.
708 let mut patch = Vec::new();
709 patch.extend_from_slice(&MAGIC);
710 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
711 match reader.next_chunk() {
712 Err(ParseError::TruncatedPatch) => {}
713 other => panic!("expected TruncatedPatch, got {other:?}"),
714 }
715 assert!(!reader.is_complete(), "stream is not clean-ended");
716 }
717
718 #[test]
719 fn non_eof_io_error_on_body_len_read_propagates_as_io() {
720 // Exercises the `Err(e) => return Err(e)` arm at line 124: an I/O
721 // error that is NOT UnexpectedEof must propagate verbatim.
722 // We trigger this by passing a reader that errors immediately.
723 struct BrokenReader;
724 impl std::io::Read for BrokenReader {
725 fn read(&mut self, _: &mut [u8]) -> std::io::Result<usize> {
726 Err(std::io::Error::new(
727 std::io::ErrorKind::BrokenPipe,
728 "simulated broken pipe",
729 ))
730 }
731 }
732 let result = parse_chunk(&mut BrokenReader, false, DEFAULT_MAX_CHUNK_SIZE);
733 match result {
734 Err(ParseError::Io { source: e }) => {
735 assert_eq!(
736 e.kind(),
737 std::io::ErrorKind::BrokenPipe,
738 "non-EOF I/O error must propagate unchanged, got kind {:?}",
739 e.kind()
740 );
741 }
742 Err(other) => panic!("expected ParseError::Io(BrokenPipe), got {other:?}"),
743 Ok(_) => panic!("expected an error, got Ok"),
744 }
745 }
746
747 #[test]
748 fn truncated_after_one_chunk_yields_truncated_patch() {
749 // Magic + one well-formed ADIR + no more bytes: the second call to
750 // next() must surface TruncatedPatch, not None.
751 let mut adir_body = Vec::new();
752 adir_body.extend_from_slice(&4u32.to_be_bytes());
753 adir_body.extend_from_slice(b"test");
754 let chunk = make_chunk(b"ADIR", &adir_body);
755
756 let mut patch = Vec::new();
757 patch.extend_from_slice(&MAGIC);
758 patch.extend_from_slice(&chunk);
759
760 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
761 let first = reader.next_chunk();
762 assert!(
763 matches!(first, Ok(Some(_))),
764 "first ADIR chunk should parse cleanly: {first:?}"
765 );
766 match reader.next_chunk() {
767 Err(ParseError::TruncatedPatch) => {}
768 other => panic!("expected TruncatedPatch on truncated stream, got {other:?}"),
769 }
770 assert!(
771 !reader.is_complete(),
772 "is_complete must be false after truncation"
773 );
774 }
775
776 #[test]
777 fn checksum_mismatch_returns_checksum_mismatch_error() {
778 // Corrupt the CRC32 field of an otherwise valid ADIR chunk and verify
779 // that parse_chunk returns ChecksumMismatch (not a panic or a wrong error).
780 let mut adir_body = Vec::new();
781 adir_body.extend_from_slice(&4u32.to_be_bytes());
782 adir_body.extend_from_slice(b"test");
783 let mut chunk = make_chunk(b"ADIR", &adir_body);
784 // Flip the last byte of the CRC32 field.
785 let last = chunk.len() - 1;
786 chunk[last] ^= 0xFF;
787
788 let mut cur = Cursor::new(chunk);
789 let result = parse_chunk(&mut cur, true, DEFAULT_MAX_CHUNK_SIZE);
790 assert!(
791 matches!(result, Err(ParseError::ChecksumMismatch { .. })),
792 "corrupted CRC must yield ChecksumMismatch"
793 );
794 }
795
796 #[test]
797 fn unknown_chunk_tag_returns_unknown_chunk_tag_error() {
798 // A tag of all-Z bytes is not recognised; parse_chunk must return
799 // UnknownChunkTag carrying the raw 4-byte tag.
800 let chunk = make_chunk(b"ZZZZ", &[]);
801 let mut cur = Cursor::new(chunk);
802 match parse_chunk(&mut cur, false, DEFAULT_MAX_CHUNK_SIZE) {
803 Err(ParseError::UnknownChunkTag(tag)) => {
804 assert_eq!(
805 tag,
806 ChunkTag::new(*b"ZZZZ"),
807 "tag bytes must be preserved in error"
808 );
809 }
810 Err(other) => panic!("expected UnknownChunkTag, got {other:?}"),
811 Ok(_) => panic!("expected UnknownChunkTag, got Ok"),
812 }
813 }
814
815 #[test]
816 fn default_max_chunk_size_matches_constant() {
817 let mut patch = Vec::new();
818 patch.extend_from_slice(&MAGIC);
819 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
820 let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
821 assert_eq!(reader.max_chunk_size(), DEFAULT_MAX_CHUNK_SIZE);
822 }
823
824 #[test]
825 fn with_max_chunk_size_overrides_default() {
826 let mut patch = Vec::new();
827 patch.extend_from_slice(&MAGIC);
828 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
829 let reader = ZiPatchReader::new(Cursor::new(patch))
830 .unwrap()
831 .with_max_chunk_size(4096);
832 assert_eq!(reader.max_chunk_size(), 4096);
833 }
834
835 #[test]
836 #[should_panic(expected = "with_max_chunk_size(0) is invalid")]
837 fn with_max_chunk_size_zero_panics() {
838 let mut patch = Vec::new();
839 patch.extend_from_slice(&MAGIC);
840 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
841 let _ = ZiPatchReader::new(Cursor::new(patch))
842 .unwrap()
843 .with_max_chunk_size(0);
844 }
845
846 #[test]
847 fn custom_max_chunk_size_rejects_chunks_above_threshold() {
848 // ADIR body of 9 bytes (4 len + 5 ascii) → frame body_len = 9. With
849 // max_chunk_size = 4, the parser must reject it as Oversized.
850 let mut adir_body = Vec::new();
851 adir_body.extend_from_slice(&5u32.to_be_bytes());
852 adir_body.extend_from_slice(b"hello");
853 let chunk = make_chunk(b"ADIR", &adir_body);
854
855 let mut patch = Vec::new();
856 patch.extend_from_slice(&MAGIC);
857 patch.extend_from_slice(&chunk);
858
859 let mut reader = ZiPatchReader::new(Cursor::new(patch))
860 .unwrap()
861 .with_max_chunk_size(4);
862 match reader.next_chunk() {
863 Err(ParseError::OversizedChunk(size)) => assert_eq!(size, 9),
864 other => panic!("expected OversizedChunk(9), got {other:?}"),
865 }
866 }
867
868 #[test]
869 fn oversized_chunk_body_len_returns_oversized_chunk_error() {
870 // body_len == u32::MAX (> 512 MiB) must be rejected before any allocation.
871 let bytes = [0xFFu8, 0xFF, 0xFF, 0xFF];
872 let mut cur = Cursor::new(&bytes[..]);
873 let Err(ParseError::OversizedChunk(size)) =
874 parse_chunk(&mut cur, false, DEFAULT_MAX_CHUNK_SIZE)
875 else {
876 panic!("expected OversizedChunk for u32::MAX body_len")
877 };
878 assert!(
879 size > DEFAULT_MAX_CHUNK_SIZE as usize,
880 "reported size {size} must exceed DEFAULT_MAX_CHUNK_SIZE {DEFAULT_MAX_CHUNK_SIZE}"
881 );
882 }
883
884 // --- ZiPatchReader byte-counter and per-record metadata ---
885
886 #[test]
887 fn bytes_read_starts_at_12_before_first_chunk() {
888 // The magic header is 12 bytes; bytes_read must reflect that immediately
889 // after construction, before any chunk is read.
890 let mut patch = Vec::new();
891 patch.extend_from_slice(&MAGIC);
892 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
893 let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
894 assert_eq!(
895 reader.bytes_read(),
896 12,
897 "bytes_read must be 12 (magic only) before iteration starts"
898 );
899 }
900
901 #[test]
902 fn record_carries_tag_body_offset_and_bytes_read() {
903 // MAGIC + ADIR("a") + EOF_ — verify the per-record metadata matches
904 // the expected frame sizes and offsets.
905 let mut adir_body = Vec::new();
906 adir_body.extend_from_slice(&1u32.to_be_bytes());
907 adir_body.extend_from_slice(b"a");
908 // ADIR frame: 4(size) + 4(tag) + 5(body) + 4(crc) = 17 bytes
909 // EOF_ frame: 4 + 4 + 0 + 4 = 12 bytes
910
911 let mut patch = Vec::new();
912 patch.extend_from_slice(&MAGIC);
913 patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
914 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
915
916 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
917 assert_eq!(reader.bytes_read(), 12, "pre-read: magic only");
918
919 let rec = reader.next_chunk().unwrap().expect("first ADIR record");
920 assert!(
921 matches!(rec.chunk, Chunk::AddDirectory(_)),
922 "first chunk must be ADIR"
923 );
924 assert_eq!(rec.tag, ChunkTag::ADIR);
925 // ADIR body sits after magic(12) + body_len(4) + tag(4) = 20.
926 assert_eq!(rec.body_offset, 20);
927 assert_eq!(rec.bytes_read, 12 + 17, "magic + ADIR frame");
928
929 assert!(
930 reader.next_chunk().unwrap().is_none(),
931 "EOF_ must terminate iteration"
932 );
933 assert_eq!(
934 reader.bytes_read(),
935 12 + 17 + 12,
936 "after EOF_: magic + ADIR + EOF_ frames"
937 );
938 assert!(reader.is_complete(), "is_complete must be true after EOF_");
939 }
940
941 #[test]
942 fn bytes_read_is_monotonically_non_decreasing() {
943 // Stream with two ADIR chunks + EOF_ — verify bytes_read only ever
944 // increases between calls to next_chunk() and that consuming the EOF_
945 // chunk (whose body is empty but whose frame is 12 bytes) still
946 // advances the counter past the last non-EOF position.
947 let make_adir = |name: &[u8]| -> Vec<u8> {
948 let mut body = Vec::new();
949 body.extend_from_slice(&(name.len() as u32).to_be_bytes());
950 body.extend_from_slice(name);
951 make_chunk(b"ADIR", &body)
952 };
953
954 let mut patch = Vec::new();
955 patch.extend_from_slice(&MAGIC);
956 patch.extend_from_slice(&make_adir(b"a"));
957 patch.extend_from_slice(&make_adir(b"bb"));
958 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
959
960 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
961 let mut prev = reader.bytes_read();
962 while let Some(rec) = reader.next_chunk().unwrap() {
963 let current = rec.bytes_read;
964 assert_eq!(
965 current,
966 reader.bytes_read(),
967 "record's bytes_read must equal reader's running counter"
968 );
969 assert!(
970 current > prev,
971 "non-empty ADIR frame must strictly advance bytes_read: \
972 {prev} -> {current}"
973 );
974 prev = current;
975 }
976 // EOF_ has been consumed: its 12-byte empty-body frame must have
977 // pushed the counter past the previous position.
978 assert!(
979 reader.bytes_read() > prev,
980 "consuming EOF_ must advance bytes_read by its 12-byte frame: \
981 {prev} -> {}",
982 reader.bytes_read()
983 );
984 }
985
986 // --- open_patch constructor ---
987
988 #[test]
989 fn open_patch_opens_minimal_patch_and_reaches_eof() {
990 let mut bytes = Vec::new();
991 bytes.extend_from_slice(&MAGIC);
992 bytes.extend_from_slice(&make_chunk(b"EOF_", &[]));
993
994 let tmp = tempfile::tempdir().unwrap();
995 let file_path = tmp.path().join("test.patch");
996 std::fs::write(&file_path, &bytes).unwrap();
997
998 let mut reader = open_patch(&file_path).expect("open_patch must open valid patch");
999 assert!(
1000 reader.next_chunk().unwrap().is_none(),
1001 "EOF_ must terminate iteration immediately"
1002 );
1003 assert!(reader.is_complete(), "is_complete must be true after EOF_");
1004 }
1005
1006 #[test]
1007 fn open_patch_returns_io_error_when_file_is_missing() {
1008 let tmp = tempfile::tempdir().unwrap();
1009 let file_path = tmp.path().join("nonexistent.patch");
1010 assert!(
1011 matches!(open_patch(&file_path), Err(ParseError::Io { .. })),
1012 "open_patch on a missing file must return ParseError::Io"
1013 );
1014 }
1015
1016 // --- Fused-ness and is_complete ---
1017
1018 #[test]
1019 fn reader_is_fused_after_error() {
1020 // Once next_chunk yields Err(_), all subsequent calls must yield Ok(None).
1021 let mut patch = Vec::new();
1022 patch.extend_from_slice(&MAGIC);
1023 patch.extend_from_slice(&make_chunk(b"ZZZZ", &[])); // unknown tag → error
1024
1025 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
1026 let first = reader.next_chunk();
1027 assert!(
1028 matches!(first, Err(ParseError::UnknownChunkTag(_))),
1029 "first call must yield the error: {first:?}"
1030 );
1031 // All subsequent calls must return Ok(None).
1032 assert!(
1033 matches!(reader.next_chunk(), Ok(None)),
1034 "fused: must return Ok(None) after error"
1035 );
1036 assert!(
1037 matches!(reader.next_chunk(), Ok(None)),
1038 "fused: still Ok(None) on third call"
1039 );
1040 }
1041
1042 #[test]
1043 fn is_complete_false_until_eof_seen() {
1044 let mut adir_body = Vec::new();
1045 adir_body.extend_from_slice(&1u32.to_be_bytes());
1046 adir_body.extend_from_slice(b"x");
1047
1048 let mut patch = Vec::new();
1049 patch.extend_from_slice(&MAGIC);
1050 patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
1051 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
1052
1053 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
1054 assert!(
1055 !reader.is_complete(),
1056 "not complete before reading anything"
1057 );
1058 reader.next_chunk().unwrap().unwrap(); // consume ADIR
1059 assert!(
1060 !reader.is_complete(),
1061 "not complete after ADIR, before EOF_"
1062 );
1063 assert!(reader.next_chunk().unwrap().is_none(), "EOF_ consumed");
1064 assert!(reader.is_complete(), "complete after EOF_ consumed");
1065 }
1066}