zipatch_rs/chunk/mod.rs
1//! Wire-format chunk types and the [`ZiPatchReader`] streaming parser.
2//!
3//! This module is the parsing layer: it decodes the raw `ZiPatch` byte
4//! stream into a stream of typed [`Chunk`] values. Each top-level
5//! variant corresponds to one 4-byte ASCII wire tag (`FHDR`, `APLY`,
6//! `SQPK`, …); the per-variant submodules below own the binary layout for
7//! their body. Nothing in this module touches the filesystem — apply-time
8//! effects live in [`crate::apply`].
9//!
10//! The [`ZiPatchReader`] parser validates the 12-byte file magic on
11//! construction, then yields one [`ChunkRecord`](crate::chunk::ChunkRecord) per
12//! [`ZiPatchReader::next_chunk`] call until the internal `EOF_` terminator
13//! is consumed or a parse error surfaces.
14
15pub(crate) mod adir;
16pub(crate) mod afsp;
17pub(crate) mod aply;
18pub(crate) mod ddir;
19pub(crate) mod fhdr;
20pub(crate) mod sqpk;
21pub(crate) mod util;
22
23pub use adir::AddDirectory;
24pub use afsp::ApplyFreeSpace;
25pub use aply::{ApplyOption, ApplyOptionKind};
26pub use ddir::DeleteDirectory;
27pub use fhdr::{FileHeader, FileHeaderV2, FileHeaderV3};
28pub use sqpk::{
29 IndexCommand, SqpackFileId, SqpkAddData, SqpkCommand, SqpkCompressedBlock, SqpkDeleteData,
30 SqpkExpandData, SqpkFile, SqpkFileOperation, SqpkHeader, SqpkHeaderTarget, SqpkIndex,
31 SqpkPatchInfo, SqpkTargetInfo, TargetFileKind, TargetHeaderKind,
32};
33
34use crate::newtypes::ChunkTag;
35use crate::{ParseError, ParseResult as Result};
36use std::io::Read;
37use tracing::trace;
38
39const MAGIC: [u8; 12] = [
40 0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A,
41];
42
43/// Default upper bound (512 MiB) on a single chunk's declared body length.
44///
45/// Used by [`ZiPatchReader::new`] to guard against pathological streams that
46/// would otherwise drive the parser into a huge allocation. Override per
47/// reader via [`ZiPatchReader::with_max_chunk_size`].
48pub const DEFAULT_MAX_CHUNK_SIZE: u32 = 512 * 1024 * 1024;
49
50/// One top-level chunk parsed from a `ZiPatch` stream.
51///
52/// Each variant corresponds to a 4-byte ASCII wire tag.
53///
54/// # Observed frequency
55///
56/// SE's XIVARR+ patch files almost exclusively contain `FHDR`, `APLY`, and
57/// `SQPK` chunks. `ADIR`/`DELD` can theoretically appear and are implemented,
58/// but are rarely emitted in practice. `APFS` has never been observed in modern
59/// patches and is treated as a no-op. `EOF_` is consumed by [`ZiPatchReader`]
60/// and is never yielded to the caller.
61#[derive(Debug)]
62pub enum Chunk {
63 /// `FHDR` — the first chunk in every patch file; carries version and
64 /// per-version patch metadata. See [`FileHeader`] for the versioned body.
65 FileHeader(FileHeader),
66 /// `APLY` — sets or clears a boolean apply-time flag on the
67 /// [`crate::ApplyConfig`] (e.g. "ignore missing files"). See [`ApplyOption`].
68 ApplyOption(ApplyOption),
69 /// `APFS` — free-space book-keeping emitted by old patcher tooling; treated
70 /// as a no-op at apply time. See [`ApplyFreeSpace`].
71 ApplyFreeSpace(ApplyFreeSpace),
72 /// `ADIR` — instructs the patcher to create a directory under the game
73 /// install root. See [`AddDirectory`].
74 AddDirectory(AddDirectory),
75 /// `DELD` — instructs the patcher to remove a directory under the game
76 /// install root. See [`DeleteDirectory`].
77 DeleteDirectory(DeleteDirectory),
78 /// `SQPK` — the workhorse chunk; wraps one of eight sub-commands that
79 /// add, delete, expand, or replace `SqPack` data. See [`SqpkCommand`].
80 Sqpk(SqpkCommand),
81 /// `EOF_` — marks the clean end of the patch stream. [`ZiPatchReader`]
82 /// consumes this chunk internally; it is never yielded to the caller.
83 EndOfFile,
84}
85
86/// One parsed chunk plus its 4-byte ASCII tag and the byte count consumed
87/// from the input stream by its frame.
88///
89/// Returned by [`parse_chunk`]. The `consumed` count is exactly the size of
90/// the chunk's on-wire frame: `4 (body_len) + 4 (tag) + body_len + 4 (crc32)`
91/// = `body_len + 12`. This is what
92/// [`ZiPatchReader`](crate::ZiPatchReader) accumulates into its running
93/// byte counter for progress reporting.
94pub(crate) struct ParsedChunk {
95 pub(crate) chunk: Chunk,
96 pub(crate) tag: ChunkTag,
97 pub(crate) consumed: u64,
98}
99
100/// Parse one chunk frame from `r`.
101///
102/// # Wire framing
103///
104/// Each chunk is laid out as:
105///
106/// ```text
107/// [body_len: u32 BE] [tag: 4 bytes] [body: body_len bytes] [crc32: u32 BE]
108/// ```
109///
110/// The CRC32 is computed over `tag ++ body` (not over `body_len`). When
111/// `verify_checksums` is `true` and the stored CRC does not match the computed
112/// one, [`ParseError::ChecksumMismatch`] is returned.
113///
114/// # Errors
115///
116/// - [`ParseError::TruncatedPatch`] — the reader returns EOF while reading
117/// the `body_len` field (i.e. no more chunks are present but `EOF_` was
118/// never seen).
119/// - [`ParseError::OversizedChunk`] — `body_len` exceeds `max_chunk_size`.
120/// - [`ParseError::ChecksumMismatch`] — CRC32 mismatch (only when
121/// `verify_checksums` is `true`).
122/// - [`ParseError::UnknownChunkTag`] — tag is not recognised.
123/// - [`ParseError::Io`] — any other I/O failure reading from `r`.
124pub(crate) fn parse_chunk<R: std::io::Read>(
125 r: &mut R,
126 verify_checksums: bool,
127 max_chunk_size: u32,
128) -> Result<ParsedChunk> {
129 let mut size_buf = [0u8; 4];
130 let size = match r.read_exact(&mut size_buf) {
131 Ok(()) => u32::from_be_bytes(size_buf) as usize,
132 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
133 return Err(ParseError::TruncatedPatch);
134 }
135 Err(e) => return Err(e.into()),
136 };
137 if size > max_chunk_size as usize {
138 return Err(ParseError::OversizedChunk(size));
139 }
140
141 // Tag (4 B) and CRC (4 B) are always present regardless of body shape.
142 let mut tag = [0u8; 4];
143 r.read_exact(&mut tag)?;
144
145 // Peek at the first 5 bytes of the body without committing to either the
146 // generic single-allocation path or the SQPK `A` zero-copy-into-data path.
147 // For SQPK chunks, those 5 bytes are `[inner_size: i32 BE][sub_cmd: u8]`.
148 // For chunks with bodies shorter than 5 bytes (e.g. `EOF_`), we still read
149 // exactly `size` bytes into the prefix array and leave the rest zero.
150 let mut prefix = [0u8; 5];
151 let prefix_len = size.min(5);
152 if prefix_len > 0 {
153 r.read_exact(&mut prefix[..prefix_len])?;
154 }
155
156 // ---- Fast path: SQPK `A` (SqpkAddData) — see `parse_sqpk_add_data_fast`. ----
157 if &tag == b"SQPK" && size >= 5 + SQPK_ADDDATA_HEADER_SIZE && prefix[4] == b'A' {
158 return parse_sqpk_add_data_fast(r, tag, prefix, size, verify_checksums);
159 }
160
161 // ---- Generic path: one allocation for the whole body. ----
162 let mut body_vec = vec![0u8; size];
163 body_vec[..prefix_len].copy_from_slice(&prefix[..prefix_len]);
164 if size > prefix_len {
165 r.read_exact(&mut body_vec[prefix_len..])?;
166 }
167
168 let mut crc_buf = [0u8; 4];
169 r.read_exact(&mut crc_buf)?;
170 let expected_crc = u32::from_be_bytes(crc_buf);
171
172 if verify_checksums {
173 let mut hasher = crc32fast::Hasher::new();
174 hasher.update(&tag);
175 hasher.update(&body_vec);
176 let actual_crc = hasher.finalize();
177 if actual_crc != expected_crc {
178 return Err(ParseError::ChecksumMismatch {
179 tag: ChunkTag::new(tag),
180 expected: expected_crc,
181 actual: actual_crc,
182 });
183 }
184 }
185
186 trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
187
188 // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
189 let consumed = (size as u64) + 12;
190
191 let body = &body_vec[..];
192
193 let chunk = match &tag {
194 b"EOF_" => Chunk::EndOfFile,
195 b"FHDR" => Chunk::FileHeader(fhdr::parse(body)?),
196 b"APLY" => Chunk::ApplyOption(aply::parse(body)?),
197 b"APFS" => Chunk::ApplyFreeSpace(afsp::parse(body)?),
198 b"ADIR" => Chunk::AddDirectory(adir::parse(body)?),
199 b"DELD" => Chunk::DeleteDirectory(ddir::parse(body)?),
200 b"SQPK" => Chunk::Sqpk(sqpk::parse_sqpk(body)?),
201 _ => return Err(ParseError::UnknownChunkTag(ChunkTag::new(tag))),
202 };
203
204 Ok(ParsedChunk {
205 chunk,
206 tag: ChunkTag::new(tag),
207 consumed,
208 })
209}
210
211// Size of the SqpkAddData fixed header that precedes the inline data payload.
212// Mirrors `add_data::SqpkAddData::DATA_SOURCE_OFFSET` (23) without taking a
213// `u64` round-trip; kept private to the framing path.
214const SQPK_ADDDATA_HEADER_SIZE: usize = 23;
215
216/// Fast path for SQPK `A` (`SqpkAddData`) chunks.
217///
218/// `AddData` is the largest chunk type by byte volume — payloads of hundreds of
219/// KB to MB are typical. The generic framing path allocates one `Vec<u8>` of
220/// `size` for the whole body, then `binrw`'s derived parser allocates a second
221/// `Vec<u8>` of exactly `data_bytes` and memcpys the inline payload into it.
222/// That second allocation + memcpy dominates parse time for `AddData`.
223///
224/// This function reads the `AddData` fixed header into a stack array, parses
225/// the seven fields directly, allocates the `data` payload at its exact size,
226/// and `read_exact`s the source bytes straight into it — one allocation, no
227/// intermediate copy of the payload.
228///
229/// On entry: `tag` and the 5-byte `prefix` (SQPK `inner_size` + sub-command
230/// byte) have already been consumed from `r`. The remaining bytes are
231/// `[fixed_header: 23 B][data: data_bytes][crc32: 4 B]`.
232fn parse_sqpk_add_data_fast<R: std::io::Read>(
233 r: &mut R,
234 tag: [u8; 4],
235 prefix: [u8; 5],
236 size: usize,
237 verify_checksums: bool,
238) -> Result<ParsedChunk> {
239 // Validate the SQPK inner_size against the outer chunk size, matching the
240 // check in `sqpk::parse_sqpk` so callers see byte-identical error behaviour.
241 let inner_size = i32::from_be_bytes([prefix[0], prefix[1], prefix[2], prefix[3]]) as usize;
242 if inner_size != size {
243 return Err(ParseError::InvalidField {
244 context: "SQPK inner size mismatch",
245 });
246 }
247
248 let mut header = [0u8; SQPK_ADDDATA_HEADER_SIZE];
249 r.read_exact(&mut header)?;
250
251 // SqpkAddData fixed-header layout (all big-endian):
252 // [0..3] pad
253 // [3..5] main_id u16
254 // [5..7] sub_id u16
255 // [7..11] file_id u32
256 // [11..15] block_offset_raw u32 (<< 7 = bytes)
257 // [15..19] data_bytes_raw u32 (<< 7 = bytes)
258 // [19..23] block_delete_raw u32 (<< 7 = bytes)
259 let main_id = u16::from_be_bytes([header[3], header[4]]);
260 let sub_id = u16::from_be_bytes([header[5], header[6]]);
261 let file_id = u32::from_be_bytes([header[7], header[8], header[9], header[10]]);
262 let block_offset_raw = u32::from_be_bytes([header[11], header[12], header[13], header[14]]);
263 let data_bytes_raw = u32::from_be_bytes([header[15], header[16], header[17], header[18]]);
264 let block_delete_raw = u32::from_be_bytes([header[19], header[20], header[21], header[22]]);
265
266 let block_offset = (block_offset_raw as u64) << 7;
267 let data_bytes = (data_bytes_raw as u64) << 7;
268 let block_delete_number = (block_delete_raw as u64) << 7;
269
270 // The declared payload length must fit exactly within the chunk body:
271 // size = 5 (inner_size + sub_cmd) + 23 (fixed header) + data_bytes
272 let expected_data = size - 5 - SQPK_ADDDATA_HEADER_SIZE;
273 if data_bytes as usize != expected_data {
274 return Err(ParseError::InvalidField {
275 context: "SqpkAddData data_bytes does not match SQPK body length",
276 });
277 }
278
279 let mut data = vec![0u8; data_bytes as usize];
280 r.read_exact(&mut data)?;
281
282 let mut crc_buf = [0u8; 4];
283 r.read_exact(&mut crc_buf)?;
284 let expected_crc = u32::from_be_bytes(crc_buf);
285
286 if verify_checksums {
287 // CRC is over `tag ++ body`. The body is split across three disjoint
288 // buffers — feed each segment to the incremental hasher.
289 let mut hasher = crc32fast::Hasher::new();
290 hasher.update(&tag);
291 hasher.update(&prefix);
292 hasher.update(&header);
293 hasher.update(&data);
294 let actual_crc = hasher.finalize();
295 if actual_crc != expected_crc {
296 return Err(ParseError::ChecksumMismatch {
297 tag: ChunkTag::new(tag),
298 expected: expected_crc,
299 actual: actual_crc,
300 });
301 }
302 }
303
304 trace!(tag = %String::from_utf8_lossy(&tag), "chunk");
305
306 let chunk = Chunk::Sqpk(sqpk::SqpkCommand::AddData(Box::new(sqpk::SqpkAddData {
307 target_file: sqpk::SqpackFileId {
308 main_id,
309 sub_id,
310 file_id,
311 },
312 block_offset,
313 data_bytes,
314 block_delete_number,
315 data,
316 })));
317
318 // 4 (body_len) + 4 (tag) + size (body) + 4 (crc32)
319 let consumed = (size as u64) + 12;
320
321 Ok(ParsedChunk {
322 chunk,
323 tag: ChunkTag::new(tag),
324 consumed,
325 })
326}
327
328/// One chunk yielded by [`ZiPatchReader::next_chunk`] together with the
329/// stream-position metadata the parser observed while reading it.
330///
331/// Bundling the chunk with its byte-position metadata in one record lets
332/// downstream consumers (the apply driver, the [`crate::index::PlanBuilder`],
333/// the `zipatch dump` CLI) avoid a second round of accessor calls against
334/// the reader to learn where the chunk sat in the stream. Each field
335/// describes one fact the parser knew at the moment the chunk was yielded;
336/// see the per-field docs.
337///
338/// `#[non_exhaustive]`: stream-position metadata may grow (e.g. compressed
339/// payload size, header-only byte count) as new index-builder needs surface.
340#[non_exhaustive]
341#[derive(Debug)]
342pub struct ChunkRecord {
343 /// The parsed chunk itself.
344 pub chunk: Chunk,
345 /// The 4-byte ASCII wire tag of the chunk (`FHDR`, `SQPK`, `EOF_`, …).
346 ///
347 /// Exposed alongside [`Self::chunk`] so consumers can attach the tag
348 /// to a progress event without re-matching on the [`Chunk`] enum.
349 pub tag: ChunkTag,
350 /// Absolute patch-file offset of the chunk's body — the byte right
351 /// after the 8-byte `[body_len: u32 BE, tag: [u8; 4]]` frame header.
352 ///
353 /// Index builders use this to compute absolute patch-file offsets for
354 /// `SqpkAddData::data`, `SqpkFile` block payloads, and
355 /// `SqpkHeader::header_data` without re-walking the stream.
356 pub body_offset: u64,
357 /// Running total of bytes consumed from the patch stream, including
358 /// the 12-byte magic header, the chunk this record describes, and
359 /// every preceding chunk frame.
360 ///
361 /// Equivalent to the [`crate::ChunkEvent::bytes_read`] field at the
362 /// same emission point; used for the `bytes_applied / total_patch_size`
363 /// progress-bar ratio.
364 pub bytes_read: u64,
365}
366
367/// Streaming parser over the [`Chunk`]s in a `ZiPatch` stream.
368///
369/// `ZiPatchReader` wraps any [`std::io::Read`] source and yields one
370/// [`ChunkRecord`] per call to [`Self::next_chunk`]. It validates the
371/// 12-byte file magic on construction, then reads chunks sequentially
372/// until the `EOF_` terminator is encountered or an error occurs.
373///
374/// # Stream contract
375///
376/// - **Magic** — the first 12 bytes must be `\x91ZIPATCH\r\n\x1a\n`. Any
377/// mismatch returns [`ParseError::InvalidMagic`] from [`ZiPatchReader::new`].
378/// - **Framing** — every chunk is a length-prefixed frame:
379/// `[body_len: u32 BE] [tag: 4 B] [body: body_len B] [crc32: u32 BE]`.
380/// - **CRC32** — computed over `tag ++ body`. Verification is enabled by
381/// default; pass `false` to [`ZiPatchReader::with_checksum_verification`]
382/// to disable it.
383/// - **Termination** — the `EOF_` chunk is consumed internally and causes
384/// [`Self::next_chunk`] to return `Ok(None)`. Call
385/// [`ZiPatchReader::is_complete`] after iteration to distinguish a clean
386/// end from a truncated stream.
387/// - **Fused** — once `Ok(None)` (clean EOF) or an `Err(_)` is returned,
388/// subsequent calls to `next_chunk` also return `Ok(None)`.
389///
390/// # Errors
391///
392/// Each call to [`Self::next_chunk`] returns `Err(e)` on parse failure,
393/// then `Ok(None)` on all future calls. Possible errors include:
394/// - [`ParseError::TruncatedPatch`] — stream ended before `EOF_`.
395/// - [`ParseError::OversizedChunk`] — a declared chunk body exceeds the
396/// configured max chunk size (default [`DEFAULT_MAX_CHUNK_SIZE`], 512 MiB).
397/// - [`ParseError::ChecksumMismatch`] — CRC32 verification failed.
398/// - [`ParseError::UnknownChunkTag`] — unrecognised 4-byte tag.
399/// - [`ParseError::Io`] — underlying I/O failure.
400///
401/// # Async usage
402///
403/// `ZiPatchReader` is a synchronous parser over a [`std::io::Read`]
404/// source — see the crate-level "Async usage" section for the rationale.
405/// Async consumers wrap iteration (and any apply call that drives it)
406/// in `tokio::task::spawn_blocking`. To stream a patch that is itself
407/// arriving over an async transport (e.g. `reqwest::Response::bytes_stream`),
408/// either buffer it through a `tempfile::NamedTempFile` and feed the
409/// reopened [`std::fs::File`] to [`ZiPatchReader::new`], or bridge with a
410/// blocking-reader adapter that pulls from a
411/// [`tokio::sync::mpsc`-equivalent](std::sync::mpsc) channel populated
412/// by the async download task.
413///
414/// # Example
415///
416/// Build a minimal in-memory patch (magic + `ADIR` + `EOF_`) and walk it:
417///
418/// ```rust
419/// use std::io::Cursor;
420/// use zipatch_rs::{Chunk, ZiPatchReader};
421///
422/// // Helper: wrap tag + body into a correctly framed chunk with CRC32.
423/// fn make_chunk(tag: &[u8; 4], body: &[u8]) -> Vec<u8> {
424/// let mut crc_input = Vec::new();
425/// crc_input.extend_from_slice(tag);
426/// crc_input.extend_from_slice(body);
427/// let crc = crc32fast::hash(&crc_input);
428///
429/// let mut out = Vec::new();
430/// out.extend_from_slice(&(body.len() as u32).to_be_bytes());
431/// out.extend_from_slice(tag);
432/// out.extend_from_slice(body);
433/// out.extend_from_slice(&crc.to_be_bytes());
434/// out
435/// }
436///
437/// // 12-byte ZiPatch magic.
438/// let magic: [u8; 12] = [0x91, 0x5A, 0x49, 0x50, 0x41, 0x54, 0x43, 0x48, 0x0D, 0x0A, 0x1A, 0x0A];
439///
440/// // ADIR body: u32 BE name_len (7) + b"created".
441/// let mut adir_body = Vec::new();
442/// adir_body.extend_from_slice(&7u32.to_be_bytes());
443/// adir_body.extend_from_slice(b"created");
444///
445/// let mut patch = Vec::new();
446/// patch.extend_from_slice(&magic);
447/// patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
448/// patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
449///
450/// let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
451/// let mut chunks = Vec::new();
452/// while let Some(rec) = reader.next_chunk().unwrap() {
453/// chunks.push(rec.chunk);
454/// }
455///
456/// assert_eq!(chunks.len(), 1);
457/// assert!(matches!(chunks[0], Chunk::AddDirectory(_)));
458/// ```
459#[derive(Debug)]
460pub struct ZiPatchReader<R> {
461 inner: std::io::BufReader<R>,
462 done: bool,
463 verify_checksums: bool,
464 eof_seen: bool,
465 // Running total of bytes consumed from `inner`, including the 12-byte
466 // magic header. Updated after each successful `parse_chunk` call.
467 pub(crate) bytes_read: u64,
468 // Caller-supplied identifier for the patch source. Stamped onto every
469 // `SequentialCheckpoint` the apply driver emits so a later
470 // `resume_apply_patch` call can refuse a checkpoint that was persisted for
471 // a different patch. `None` when the caller has not set one via
472 // `with_patch_name`.
473 patch_name: Option<String>,
474 // Maximum declared body length the parser will accept; chunks declaring a
475 // larger `body_len` are rejected with `ParseError::OversizedChunk` before
476 // any allocation. Defaults to `DEFAULT_MAX_CHUNK_SIZE`.
477 max_chunk_size: u32,
478}
479
480impl<R: std::io::Read> ZiPatchReader<R> {
481 /// Wrap `reader` and validate the leading 12-byte `ZiPatch` magic.
482 ///
483 /// Consumes exactly 12 bytes from `reader`. The magic is the byte sequence
484 /// `0x91 0x5A 0x49 0x50 0x41 0x54 0x43 0x48 0x0D 0x0A 0x1A 0x0A`
485 /// (i.e. `\x91ZIPATCH\r\n\x1a\n`).
486 ///
487 /// The reader is wrapped in a [`std::io::BufReader`] internally, so the
488 /// many small typed reads the chunk parser issues (4-byte size, 4-byte
489 /// tag, 5-byte SQPK prefix, …) coalesce into a small number of syscalls.
490 /// Callers do not need to pre-wrap a raw [`std::fs::File`] or other
491 /// unbuffered source.
492 ///
493 /// CRC32 verification is **enabled** by default. Call
494 /// [`ZiPatchReader::with_checksum_verification`] with `false` before
495 /// iterating to disable it.
496 ///
497 /// # Errors
498 ///
499 /// - [`ParseError::InvalidMagic`] — the first 12 bytes do not match the
500 /// expected magic.
501 /// - [`ParseError::Io`] — an I/O error occurred while reading the magic.
502 pub fn new(reader: R) -> Result<Self> {
503 let mut reader = std::io::BufReader::new(reader);
504 let mut magic = [0u8; 12];
505 reader.read_exact(&mut magic)?;
506 if magic != MAGIC {
507 return Err(ParseError::InvalidMagic);
508 }
509 Ok(Self {
510 inner: reader,
511 done: false,
512 verify_checksums: true,
513 eof_seen: false,
514 // The 12-byte magic header has already been consumed.
515 bytes_read: 12,
516 patch_name: None,
517 max_chunk_size: DEFAULT_MAX_CHUNK_SIZE,
518 })
519 }
520
521 /// Set the upper bound on a single chunk's declared body length, in
522 /// bytes.
523 ///
524 /// The parser rejects any chunk whose `body_len` exceeds `bytes` with
525 /// [`ParseError::OversizedChunk`] before allocating space for its body.
526 /// Defaults to [`DEFAULT_MAX_CHUNK_SIZE`] (512 MiB). Raise it for
527 /// patches with unusually large chunks; lower it when applying untrusted
528 /// streams to bound the parser's worst-case allocation.
529 ///
530 /// # Panics
531 ///
532 /// Panics if `bytes` is zero — a zero ceiling rejects every chunk and
533 /// is a programming error.
534 #[must_use]
535 pub fn with_max_chunk_size(mut self, bytes: u32) -> Self {
536 assert!(bytes > 0, "with_max_chunk_size(0) is invalid");
537 self.max_chunk_size = bytes;
538 self
539 }
540
541 /// Returns the configured maximum chunk-body length, in bytes.
542 #[must_use]
543 pub fn max_chunk_size(&self) -> u32 {
544 self.max_chunk_size
545 }
546
547 /// Attach a human-readable identifier to this patch stream.
548 ///
549 /// The identifier is stamped onto every
550 /// [`SequentialCheckpoint`](crate::apply::SequentialCheckpoint) the apply
551 /// driver emits so a future
552 /// [`resume_apply_patch`](crate::ApplyConfig::resume_apply_patch) call can
553 /// detect a checkpoint that was persisted for a different patch and
554 /// refuse to resume from it.
555 ///
556 /// Typical value is the patch filename (e.g. `"H2017.07.11.0000.0000a.patch"`).
557 /// No interpretation is performed — the string is compared verbatim.
558 #[must_use]
559 pub fn with_patch_name(mut self, name: impl Into<String>) -> Self {
560 self.patch_name = Some(name.into());
561 self
562 }
563
564 /// Returns the caller-supplied patch identifier, if any.
565 ///
566 /// Set by [`Self::with_patch_name`]; `None` otherwise.
567 #[must_use]
568 pub fn patch_name(&self) -> Option<&str> {
569 self.patch_name.as_deref()
570 }
571
572 /// Mutable access to the wrapped [`std::io::BufReader`].
573 ///
574 /// Used by [`crate::ApplyConfig::resume_apply_patch`] to seek the
575 /// underlying source for the patch-size measurement at entry. Not
576 /// part of the stable API — seeking the inner reader while a chunk
577 /// parse is in flight would desync `bytes_read` and break later
578 /// iteration.
579 pub(crate) fn inner_mut(&mut self) -> &mut std::io::BufReader<R> {
580 &mut self.inner
581 }
582
583 /// Toggle per-chunk CRC32 verification.
584 ///
585 /// Verification is **enabled** by default after [`ZiPatchReader::new`].
586 /// Pass `false` to skip CRC checks — useful when the source has already
587 /// been verified out-of-band (e.g. a download hash was checked before the
588 /// file was opened), or when processing known-good test data where the
589 /// overhead is unnecessary.
590 #[must_use]
591 pub fn with_checksum_verification(mut self, on: bool) -> Self {
592 self.verify_checksums = on;
593 self
594 }
595
596 /// Returns `true` if iteration reached the `EOF_` terminator cleanly.
597 ///
598 /// A `false` return after `next()` yields `None` indicates the stream was
599 /// truncated — the download or file copy was incomplete. In that case the
600 /// iterator stopped because of a [`ParseError::TruncatedPatch`] error,
601 /// not because the patch finished normally.
602 pub fn is_complete(&self) -> bool {
603 self.eof_seen
604 }
605
606 /// Returns the running total of bytes consumed from the patch stream.
607 ///
608 /// Starts at `12` after [`ZiPatchReader::new`] (the magic header has been
609 /// read) and increases monotonically by the size of each chunk's wire
610 /// frame after each successful [`Self::next_chunk`] call. Includes the
611 /// `EOF_` terminator's frame.
612 ///
613 /// On parse error, the counter is **not** advanced past the failing
614 /// chunk — it reflects the byte offset at the start of that chunk's
615 /// length prefix, not the broken position somewhere inside its frame.
616 ///
617 /// Per-chunk consumers should read the equivalent counter off the
618 /// [`ChunkRecord::bytes_read`] field. This getter is for end-of-stream
619 /// reporting — after [`Self::next_chunk`] returned `Ok(None)`, no
620 /// [`ChunkRecord`] is produced for the consumed `EOF_` frame, so the
621 /// final stream position is only available through this method.
622 #[must_use]
623 pub fn bytes_read(&self) -> u64 {
624 self.bytes_read
625 }
626
627 /// Read the next chunk frame from the underlying stream.
628 ///
629 /// Returns `Ok(Some(record))` for each successfully parsed chunk in
630 /// stream order, `Ok(None)` after the `EOF_` terminator has been
631 /// consumed (the terminator itself is never surfaced as a record), and
632 /// `Err(_)` on a parse failure. After `Ok(None)` or any `Err(_)`,
633 /// subsequent calls return `Ok(None)` — the reader is fused.
634 ///
635 /// # Errors
636 ///
637 /// See [`Self`]'s "Errors" section.
638 pub fn next_chunk(&mut self) -> Result<Option<ChunkRecord>> {
639 if self.done {
640 return Ok(None);
641 }
642 // Snapshot the body offset before parsing so a successful parse can
643 // commit it without re-walking the stream. The chunk body begins after
644 // the 8-byte `[body_len: u32 BE, tag: [u8; 4]]` frame header.
645 let body_offset = self.bytes_read + 8;
646 match parse_chunk(&mut self.inner, self.verify_checksums, self.max_chunk_size) {
647 Ok(ParsedChunk {
648 chunk: Chunk::EndOfFile,
649 consumed,
650 ..
651 }) => {
652 self.bytes_read += consumed;
653 self.done = true;
654 self.eof_seen = true;
655 Ok(None)
656 }
657 Ok(ParsedChunk {
658 chunk,
659 tag,
660 consumed,
661 }) => {
662 self.bytes_read += consumed;
663 Ok(Some(ChunkRecord {
664 chunk,
665 tag,
666 body_offset,
667 bytes_read: self.bytes_read,
668 }))
669 }
670 Err(e) => {
671 self.done = true;
672 Err(e)
673 }
674 }
675 }
676}
677
678/// Open the file at `path` and validate the `ZiPatch` magic, returning a
679/// ready-to-iterate [`ZiPatchReader`].
680///
681/// The concrete inner reader type is intentionally hidden behind `impl
682/// Read` so the choice of source and any buffering strategy remain
683/// implementation details. Callers that need to name the type should
684/// construct a reader of their choice and pass it to
685/// [`ZiPatchReader::new`].
686///
687/// # Errors
688///
689/// - [`ParseError::Io`] — the file could not be opened.
690/// - [`ParseError::InvalidMagic`] — the file does not start with the
691/// `ZiPatch` magic bytes.
692pub fn open_patch(
693 path: impl AsRef<std::path::Path>,
694) -> crate::ParseResult<ZiPatchReader<impl std::io::Read + 'static>> {
695 let file = std::fs::File::open(path)?;
696 ZiPatchReader::new(file)
697}
698
699#[cfg(test)]
700mod tests {
701 use super::*;
702 use crate::test_utils::make_chunk;
703 use std::io::Cursor;
704
705 // --- parse_chunk error paths ---
706
707 #[test]
708 fn truncated_at_chunk_boundary_yields_truncated_patch() {
709 // Magic + no chunks: parse_chunk must see EOF on the body_len read and
710 // convert it to TruncatedPatch.
711 let mut patch = Vec::new();
712 patch.extend_from_slice(&MAGIC);
713 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
714 match reader.next_chunk() {
715 Err(ParseError::TruncatedPatch) => {}
716 other => panic!("expected TruncatedPatch, got {other:?}"),
717 }
718 assert!(!reader.is_complete(), "stream is not clean-ended");
719 }
720
721 #[test]
722 fn non_eof_io_error_on_body_len_read_propagates_as_io() {
723 // Exercises the `Err(e) => return Err(e)` arm at line 124: an I/O
724 // error that is NOT UnexpectedEof must propagate verbatim.
725 // We trigger this by passing a reader that errors immediately.
726 struct BrokenReader;
727 impl std::io::Read for BrokenReader {
728 fn read(&mut self, _: &mut [u8]) -> std::io::Result<usize> {
729 Err(std::io::Error::new(
730 std::io::ErrorKind::BrokenPipe,
731 "simulated broken pipe",
732 ))
733 }
734 }
735 let result = parse_chunk(&mut BrokenReader, false, DEFAULT_MAX_CHUNK_SIZE);
736 match result {
737 Err(ParseError::Io { source: e }) => {
738 assert_eq!(
739 e.kind(),
740 std::io::ErrorKind::BrokenPipe,
741 "non-EOF I/O error must propagate unchanged, got kind {:?}",
742 e.kind()
743 );
744 }
745 Err(other) => panic!("expected ParseError::Io(BrokenPipe), got {other:?}"),
746 Ok(_) => panic!("expected an error, got Ok"),
747 }
748 }
749
750 #[test]
751 fn truncated_after_one_chunk_yields_truncated_patch() {
752 // Magic + one well-formed ADIR + no more bytes: the second call to
753 // next() must surface TruncatedPatch, not None.
754 let mut adir_body = Vec::new();
755 adir_body.extend_from_slice(&4u32.to_be_bytes());
756 adir_body.extend_from_slice(b"test");
757 let chunk = make_chunk(b"ADIR", &adir_body);
758
759 let mut patch = Vec::new();
760 patch.extend_from_slice(&MAGIC);
761 patch.extend_from_slice(&chunk);
762
763 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
764 let first = reader.next_chunk();
765 assert!(
766 matches!(first, Ok(Some(_))),
767 "first ADIR chunk should parse cleanly: {first:?}"
768 );
769 match reader.next_chunk() {
770 Err(ParseError::TruncatedPatch) => {}
771 other => panic!("expected TruncatedPatch on truncated stream, got {other:?}"),
772 }
773 assert!(
774 !reader.is_complete(),
775 "is_complete must be false after truncation"
776 );
777 }
778
779 #[test]
780 fn checksum_mismatch_returns_checksum_mismatch_error() {
781 // Corrupt the CRC32 field of an otherwise valid ADIR chunk and verify
782 // that parse_chunk returns ChecksumMismatch (not a panic or a wrong error).
783 let mut adir_body = Vec::new();
784 adir_body.extend_from_slice(&4u32.to_be_bytes());
785 adir_body.extend_from_slice(b"test");
786 let mut chunk = make_chunk(b"ADIR", &adir_body);
787 // Flip the last byte of the CRC32 field.
788 let last = chunk.len() - 1;
789 chunk[last] ^= 0xFF;
790
791 let mut cur = Cursor::new(chunk);
792 let result = parse_chunk(&mut cur, true, DEFAULT_MAX_CHUNK_SIZE);
793 assert!(
794 matches!(result, Err(ParseError::ChecksumMismatch { .. })),
795 "corrupted CRC must yield ChecksumMismatch"
796 );
797 }
798
799 #[test]
800 fn unknown_chunk_tag_returns_unknown_chunk_tag_error() {
801 // A tag of all-Z bytes is not recognised; parse_chunk must return
802 // UnknownChunkTag carrying the raw 4-byte tag.
803 let chunk = make_chunk(b"ZZZZ", &[]);
804 let mut cur = Cursor::new(chunk);
805 match parse_chunk(&mut cur, false, DEFAULT_MAX_CHUNK_SIZE) {
806 Err(ParseError::UnknownChunkTag(tag)) => {
807 assert_eq!(
808 tag,
809 ChunkTag::new(*b"ZZZZ"),
810 "tag bytes must be preserved in error"
811 );
812 }
813 Err(other) => panic!("expected UnknownChunkTag, got {other:?}"),
814 Ok(_) => panic!("expected UnknownChunkTag, got Ok"),
815 }
816 }
817
818 #[test]
819 fn default_max_chunk_size_matches_constant() {
820 let mut patch = Vec::new();
821 patch.extend_from_slice(&MAGIC);
822 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
823 let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
824 assert_eq!(reader.max_chunk_size(), DEFAULT_MAX_CHUNK_SIZE);
825 }
826
827 #[test]
828 fn with_max_chunk_size_overrides_default() {
829 let mut patch = Vec::new();
830 patch.extend_from_slice(&MAGIC);
831 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
832 let reader = ZiPatchReader::new(Cursor::new(patch))
833 .unwrap()
834 .with_max_chunk_size(4096);
835 assert_eq!(reader.max_chunk_size(), 4096);
836 }
837
838 #[test]
839 #[should_panic(expected = "with_max_chunk_size(0) is invalid")]
840 fn with_max_chunk_size_zero_panics() {
841 let mut patch = Vec::new();
842 patch.extend_from_slice(&MAGIC);
843 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
844 let _ = ZiPatchReader::new(Cursor::new(patch))
845 .unwrap()
846 .with_max_chunk_size(0);
847 }
848
849 #[test]
850 fn custom_max_chunk_size_rejects_chunks_above_threshold() {
851 // ADIR body of 9 bytes (4 len + 5 ascii) → frame body_len = 9. With
852 // max_chunk_size = 4, the parser must reject it as Oversized.
853 let mut adir_body = Vec::new();
854 adir_body.extend_from_slice(&5u32.to_be_bytes());
855 adir_body.extend_from_slice(b"hello");
856 let chunk = make_chunk(b"ADIR", &adir_body);
857
858 let mut patch = Vec::new();
859 patch.extend_from_slice(&MAGIC);
860 patch.extend_from_slice(&chunk);
861
862 let mut reader = ZiPatchReader::new(Cursor::new(patch))
863 .unwrap()
864 .with_max_chunk_size(4);
865 match reader.next_chunk() {
866 Err(ParseError::OversizedChunk(size)) => assert_eq!(size, 9),
867 other => panic!("expected OversizedChunk(9), got {other:?}"),
868 }
869 }
870
871 #[test]
872 fn oversized_chunk_body_len_returns_oversized_chunk_error() {
873 // body_len == u32::MAX (> 512 MiB) must be rejected before any allocation.
874 let bytes = [0xFFu8, 0xFF, 0xFF, 0xFF];
875 let mut cur = Cursor::new(&bytes[..]);
876 let Err(ParseError::OversizedChunk(size)) =
877 parse_chunk(&mut cur, false, DEFAULT_MAX_CHUNK_SIZE)
878 else {
879 panic!("expected OversizedChunk for u32::MAX body_len")
880 };
881 assert!(
882 size > DEFAULT_MAX_CHUNK_SIZE as usize,
883 "reported size {size} must exceed DEFAULT_MAX_CHUNK_SIZE {DEFAULT_MAX_CHUNK_SIZE}"
884 );
885 }
886
887 // --- ZiPatchReader byte-counter and per-record metadata ---
888
889 #[test]
890 fn bytes_read_starts_at_12_before_first_chunk() {
891 // The magic header is 12 bytes; bytes_read must reflect that immediately
892 // after construction, before any chunk is read.
893 let mut patch = Vec::new();
894 patch.extend_from_slice(&MAGIC);
895 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
896 let reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
897 assert_eq!(
898 reader.bytes_read(),
899 12,
900 "bytes_read must be 12 (magic only) before iteration starts"
901 );
902 }
903
904 #[test]
905 fn record_carries_tag_body_offset_and_bytes_read() {
906 // MAGIC + ADIR("a") + EOF_ — verify the per-record metadata matches
907 // the expected frame sizes and offsets.
908 let mut adir_body = Vec::new();
909 adir_body.extend_from_slice(&1u32.to_be_bytes());
910 adir_body.extend_from_slice(b"a");
911 // ADIR frame: 4(size) + 4(tag) + 5(body) + 4(crc) = 17 bytes
912 // EOF_ frame: 4 + 4 + 0 + 4 = 12 bytes
913
914 let mut patch = Vec::new();
915 patch.extend_from_slice(&MAGIC);
916 patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
917 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
918
919 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
920 assert_eq!(reader.bytes_read(), 12, "pre-read: magic only");
921
922 let rec = reader.next_chunk().unwrap().expect("first ADIR record");
923 assert!(
924 matches!(rec.chunk, Chunk::AddDirectory(_)),
925 "first chunk must be ADIR"
926 );
927 assert_eq!(rec.tag, ChunkTag::ADIR);
928 // ADIR body sits after magic(12) + body_len(4) + tag(4) = 20.
929 assert_eq!(rec.body_offset, 20);
930 assert_eq!(rec.bytes_read, 12 + 17, "magic + ADIR frame");
931
932 assert!(
933 reader.next_chunk().unwrap().is_none(),
934 "EOF_ must terminate iteration"
935 );
936 assert_eq!(
937 reader.bytes_read(),
938 12 + 17 + 12,
939 "after EOF_: magic + ADIR + EOF_ frames"
940 );
941 assert!(reader.is_complete(), "is_complete must be true after EOF_");
942 }
943
944 #[test]
945 fn bytes_read_is_monotonically_non_decreasing() {
946 // Stream with two ADIR chunks + EOF_ — verify bytes_read only ever
947 // increases between calls to next_chunk() and that consuming the EOF_
948 // chunk (whose body is empty but whose frame is 12 bytes) still
949 // advances the counter past the last non-EOF position.
950 let make_adir = |name: &[u8]| -> Vec<u8> {
951 let mut body = Vec::new();
952 body.extend_from_slice(&(name.len() as u32).to_be_bytes());
953 body.extend_from_slice(name);
954 make_chunk(b"ADIR", &body)
955 };
956
957 let mut patch = Vec::new();
958 patch.extend_from_slice(&MAGIC);
959 patch.extend_from_slice(&make_adir(b"a"));
960 patch.extend_from_slice(&make_adir(b"bb"));
961 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
962
963 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
964 let mut prev = reader.bytes_read();
965 while let Some(rec) = reader.next_chunk().unwrap() {
966 let current = rec.bytes_read;
967 assert_eq!(
968 current,
969 reader.bytes_read(),
970 "record's bytes_read must equal reader's running counter"
971 );
972 assert!(
973 current > prev,
974 "non-empty ADIR frame must strictly advance bytes_read: \
975 {prev} -> {current}"
976 );
977 prev = current;
978 }
979 // EOF_ has been consumed: its 12-byte empty-body frame must have
980 // pushed the counter past the previous position.
981 assert!(
982 reader.bytes_read() > prev,
983 "consuming EOF_ must advance bytes_read by its 12-byte frame: \
984 {prev} -> {}",
985 reader.bytes_read()
986 );
987 }
988
989 // --- open_patch constructor ---
990
991 #[test]
992 fn open_patch_opens_minimal_patch_and_reaches_eof() {
993 let mut bytes = Vec::new();
994 bytes.extend_from_slice(&MAGIC);
995 bytes.extend_from_slice(&make_chunk(b"EOF_", &[]));
996
997 let tmp = tempfile::tempdir().unwrap();
998 let file_path = tmp.path().join("test.patch");
999 std::fs::write(&file_path, &bytes).unwrap();
1000
1001 let mut reader = open_patch(&file_path).expect("open_patch must open valid patch");
1002 assert!(
1003 reader.next_chunk().unwrap().is_none(),
1004 "EOF_ must terminate iteration immediately"
1005 );
1006 assert!(reader.is_complete(), "is_complete must be true after EOF_");
1007 }
1008
1009 #[test]
1010 fn open_patch_returns_io_error_when_file_is_missing() {
1011 let tmp = tempfile::tempdir().unwrap();
1012 let file_path = tmp.path().join("nonexistent.patch");
1013 assert!(
1014 matches!(open_patch(&file_path), Err(ParseError::Io { .. })),
1015 "open_patch on a missing file must return ParseError::Io"
1016 );
1017 }
1018
1019 // --- Fused-ness and is_complete ---
1020
1021 #[test]
1022 fn reader_is_fused_after_error() {
1023 // Once next_chunk yields Err(_), all subsequent calls must yield Ok(None).
1024 let mut patch = Vec::new();
1025 patch.extend_from_slice(&MAGIC);
1026 patch.extend_from_slice(&make_chunk(b"ZZZZ", &[])); // unknown tag → error
1027
1028 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
1029 let first = reader.next_chunk();
1030 assert!(
1031 matches!(first, Err(ParseError::UnknownChunkTag(_))),
1032 "first call must yield the error: {first:?}"
1033 );
1034 // All subsequent calls must return Ok(None).
1035 assert!(
1036 matches!(reader.next_chunk(), Ok(None)),
1037 "fused: must return Ok(None) after error"
1038 );
1039 assert!(
1040 matches!(reader.next_chunk(), Ok(None)),
1041 "fused: still Ok(None) on third call"
1042 );
1043 }
1044
1045 #[test]
1046 fn is_complete_false_until_eof_seen() {
1047 let mut adir_body = Vec::new();
1048 adir_body.extend_from_slice(&1u32.to_be_bytes());
1049 adir_body.extend_from_slice(b"x");
1050
1051 let mut patch = Vec::new();
1052 patch.extend_from_slice(&MAGIC);
1053 patch.extend_from_slice(&make_chunk(b"ADIR", &adir_body));
1054 patch.extend_from_slice(&make_chunk(b"EOF_", &[]));
1055
1056 let mut reader = ZiPatchReader::new(Cursor::new(patch)).unwrap();
1057 assert!(
1058 !reader.is_complete(),
1059 "not complete before reading anything"
1060 );
1061 reader.next_chunk().unwrap().unwrap(); // consume ADIR
1062 assert!(
1063 !reader.is_complete(),
1064 "not complete after ADIR, before EOF_"
1065 );
1066 assert!(reader.next_chunk().unwrap().is_none(), "EOF_ consumed");
1067 assert!(reader.is_complete(), "complete after EOF_ consumed");
1068 }
1069}