zipatch_rs/chunk/sqpk/file.rs
1use crate::{ParseError, ParseResult as Result};
2use binrw::BinRead;
3use flate2::read::DeflateDecoder;
4use flate2::{Decompress, FlushDecompress, Status};
5use std::borrow::Cow;
6use std::io::{self, Cursor, Read, Write};
7
8/// Upper bound on the bytes pre-allocated for a size-hint `Vec` whose size
9/// comes from an attacker-controlled length field. Genuine large reads grow
10/// the `Vec` incrementally via `read_to_end`; absurd hints paired with a
11/// short input fall through to the truncation check without an intermediate
12/// multi-gigabyte allocation. See issue #30 for the fuzz finding that
13/// motivated this cap.
14const PREALLOC_CAP: usize = 64 * 1024;
15
16// 16-byte little-endian header preceding each `SqpkCompressedBlock` payload.
17// Field meanings are documented on `SqpkCompressedBlock`. The 4-byte pad word
18// after `header_size` is consumed via `pad_after` rather than a named field
19// so the struct shape stays minimal.
20#[derive(BinRead)]
21#[br(little)]
22#[allow(clippy::struct_field_names)]
23struct BlockHeader {
24 #[br(pad_after = 4)]
25 header_size: i32,
26 compressed_size: i32,
27 decompressed_size: i32,
28}
29
30// 27-byte big-endian header at the start of every `SqpkFile` command body.
31// The variable-length `path` and (for `AddFile`) trailing block list follow.
32#[derive(BinRead)]
33#[br(big)]
34struct FileCommandHeader {
35 operation: u8,
36 #[br(pad_before = 2)]
37 file_offset: u64,
38 file_size: u64,
39 path_len: u32,
40 #[br(pad_after = 2)]
41 expansion_id: u16,
42}
43
44// Read exactly `n` bytes into a fresh `Vec<u8>`, capping the initial
45// allocation at `PREALLOC_CAP` so an attacker-controlled length field cannot
46// trigger a multi-gigabyte allocation against a short input. Genuine large
47// reads grow the `Vec` incrementally via `read_to_end`.
48fn read_exact_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>> {
49 let mut buf = Vec::with_capacity(n.min(PREALLOC_CAP));
50 r.by_ref().take(n as u64).read_to_end(&mut buf)?;
51 if buf.len() < n {
52 return Err(io::Error::new(
53 io::ErrorKind::UnexpectedEof,
54 "read_exact_vec: unexpected EOF",
55 )
56 .into());
57 }
58 Ok(buf)
59}
60
61// Discard exactly `n` bytes from `r`. Returns `UnexpectedEof` if the source
62// runs short.
63fn skip_exact<R: Read>(r: &mut R, n: u64) -> Result<()> {
64 let consumed = io::copy(&mut r.by_ref().take(n), &mut io::sink())?;
65 if consumed < n {
66 return Err(
67 io::Error::new(io::ErrorKind::UnexpectedEof, "skip_exact: unexpected EOF").into(),
68 );
69 }
70 Ok(())
71}
72
73/// Operation byte of a SQPK `F` command; selects what the command does to
74/// the game install tree.
75///
76/// Encoded as a single ASCII byte in the wire format:
77/// `b'A'` → `AddFile`, `b'R'` → `RemoveAll`, `b'D'` → `DeleteFile`,
78/// `b'M'` → `MakeDirTree`. Any other byte is rejected with
79/// [`ParseError::UnknownFileOperation`].
80#[derive(Debug, Clone, Copy, PartialEq, Eq)]
81pub enum SqpkFileOperation {
82 /// `A` — write the inline compressed-block payload into a file under the
83 /// game install root, creating it (or overwriting it) as needed.
84 ///
85 /// Parent directories are created automatically. If `file_offset` is zero,
86 /// the target file is truncated to zero before writing (full replacement);
87 /// if `file_offset` is non-zero, only the covered range is overwritten.
88 AddFile,
89 /// `R` — delete all files in the expansion folder (`sqpack/<expansion>/`
90 /// and `movie/<expansion>/`) that are not on the keep-list.
91 ///
92 /// Kept unconditionally: `.var` files and `00000.bk2`–`00003.bk2`.
93 /// Files `00004.bk2` and beyond are deleted. `expansion_id` selects
94 /// the target expansion folder.
95 RemoveAll,
96 /// `D` — delete a single file at the path given by `SqpkFile::path`.
97 DeleteFile,
98 /// `M` — create the directory tree at `SqpkFile::path` (equivalent to
99 /// `std::fs::create_dir_all`). Idempotent.
100 MakeDirTree,
101}
102
103/// One block of a [`SqpkFile`] `AddFile` payload, which may be DEFLATE-compressed
104/// or stored raw.
105///
106/// `SqpkFile` payloads are split into a sequence of these blocks. Each block
107/// begins with a 16-byte little-endian header that describes the compressed
108/// and decompressed sizes, followed by the data bytes padded to a 128-byte
109/// boundary.
110///
111/// ## Compression sentinel
112///
113/// The `compressed_size` field in the wire header uses the value `0x7d00`
114/// (decimal **32000**) as a sentinel meaning "this block is not compressed".
115/// Any other value means the data bytes are a raw DEFLATE stream
116/// (no zlib wrapper, no gzip header — just RFC 1951 raw deflate).
117///
118/// ## Wire format of one block (all little-endian)
119///
120/// ```text
121/// ┌─────────────────────────────────────────────────────────────────────┐
122/// │ header_size : i32 LE always 16 in practice │ bytes 0–3
123/// │ <pad> : u32 LE always zero │ bytes 4–7
124/// │ compressed_size : i32 LE byte count of DEFLATE data │ bytes 8–11
125/// │ OR 0x7d00 (32000) if uncompressed │
126/// │ decompressed_size : i32 LE byte count of decompressed output │ bytes 12–15
127/// │ data : [u8] compressed or raw bytes │ bytes 16–…
128/// │ <alignment> : [u8] zero-padding to 128-byte boundary │
129/// └─────────────────────────────────────────────────────────────────────┘
130/// ```
131///
132/// ## 128-byte alignment formula
133///
134/// The total byte count to read for a block's data + alignment is:
135///
136/// ```text
137/// block_len = (data_len + 143) & !127
138/// ```
139///
140/// where `data_len` is `compressed_size` if compressed, or `decompressed_size`
141/// if uncompressed. The constant 143 is `128 - 1 + 16` (subtract the 16-byte
142/// header that is not included in `data_len`, then round up to the next
143/// 128-byte boundary). The number of data bytes actually read is
144/// `block_len - header_size`; the alignment padding is consumed but discarded.
145///
146/// ## `pub(crate)` visibility
147///
148/// `SqpkCompressedBlock` is `pub` so that it appears in rustdoc and can be
149/// named in `SqpkFile::blocks`, but it can only be constructed via
150/// [`new`](SqpkCompressedBlock::new) (for tests) or by parsing a [`SqpkFile`].
151#[derive(Debug)]
152pub struct SqpkCompressedBlock {
153 // true → data holds raw DEFLATE bytes (compressed_size != 0x7d00)
154 // false → data holds the exact decompressed bytes (compressed_size == 0x7d00)
155 is_compressed: bool,
156 // Expected output size in bytes; used to pre-allocate the decompression buffer.
157 decompressed_size: usize,
158 // Compressed blocks: the raw DEFLATE stream, trimmed to compressed_size bytes
159 // (alignment padding is consumed by read() but not stored here).
160 // Uncompressed blocks: the exact payload bytes, already stripped of padding.
161 data: Vec<u8>,
162}
163
164impl SqpkCompressedBlock {
165 /// Construct a block directly from its component parts.
166 ///
167 /// This constructor exists primarily for unit tests. Production code
168 /// creates blocks by parsing a [`SqpkFile`] from a patch byte stream.
169 ///
170 /// - `is_compressed`: `true` if `data` is a raw DEFLATE stream.
171 /// - `decompressed_size`: the expected number of bytes after decompression;
172 /// used to pre-allocate the output buffer in
173 /// [`decompress`](SqpkCompressedBlock::decompress).
174 /// - `data`: raw compressed bytes or exact uncompressed bytes, depending
175 /// on `is_compressed`.
176 #[must_use]
177 pub fn new(is_compressed: bool, decompressed_size: usize, data: Vec<u8>) -> Self {
178 Self {
179 is_compressed,
180 decompressed_size,
181 data,
182 }
183 }
184
185 // Parse one block from the reader, consuming header + data + alignment padding.
186 //
187 // Reads the 16-byte little-endian block header, determines whether the block
188 // is compressed (compressed_size != 0x7d00), computes the 128-byte-aligned
189 // total length via (data_len + 143) & !127, then reads exactly that many
190 // bytes minus the header size — leaving the reader positioned at the start
191 // of the next block.
192 fn read<R: Read>(r: &mut R) -> Result<Self> {
193 // 16-byte block header, all fields little-endian. Read into a stack
194 // buffer and parse via `binrw` over a `Cursor`; that keeps the derive
195 // wiring while avoiding any seek requirement on the upstream reader.
196 let mut header_buf = [0u8; 16];
197 r.read_exact(&mut header_buf)?;
198 let header = BlockHeader::read_le(&mut Cursor::new(&header_buf[..]))?;
199
200 if header.header_size < 0 {
201 return Err(ParseError::InvalidField {
202 context: "negative header_size in block",
203 });
204 }
205 if header.decompressed_size < 0 {
206 return Err(ParseError::InvalidField {
207 context: "negative decompressed_size in block",
208 });
209 }
210 // 0x7d00 (32000) is the sentinel for "store raw, not compressed".
211 // Any other value is the byte count of the DEFLATE stream.
212 let is_compressed = header.compressed_size != 0x7d00;
213 if is_compressed && header.compressed_size < 0 {
214 return Err(ParseError::InvalidField {
215 context: "negative compressed_size in block",
216 });
217 }
218
219 let header_size = header.header_size as usize;
220 let decompressed_size = header.decompressed_size as usize;
221 // data_len is the logical size used for alignment: for compressed blocks
222 // it is the compressed byte count; for uncompressed it is the raw byte count.
223 let data_len = if is_compressed {
224 header.compressed_size
225 } else {
226 header.decompressed_size
227 };
228 // Round data_len up to the next 128-byte boundary, accounting for the
229 // 16-byte header that precedes the data in the stream.
230 // Formula: (data_len + 128 - 1 + (header_size=16)) & !127
231 // = (data_len + 143) & !127
232 let block_len = ((data_len as u32 + 143) & !127u32) as usize;
233 // Underflow guard: a malformed header where `header_size` exceeds the
234 // aligned `block_len` would wrap to a huge size in release builds.
235 let data_region = block_len
236 .checked_sub(header_size)
237 .ok_or(ParseError::InvalidField {
238 context: "block_len smaller than header_size",
239 })?;
240 let data = if is_compressed {
241 // Read the DEFLATE payload plus any alignment padding. For compressed
242 // blocks we store everything (padding included) because DeflateDecoder
243 // stops at the end of the DEFLATE stream before reading into padding.
244 read_exact_vec(r, data_region)?
245 } else {
246 // Uncompressed: read exactly decompressed_size bytes of payload,
247 // then skip any alignment padding so the reader is positioned at
248 // the start of the next block.
249 let padding =
250 data_region
251 .checked_sub(decompressed_size)
252 .ok_or(ParseError::InvalidField {
253 context: "block data region smaller than decompressed_size",
254 })?;
255 let d = read_exact_vec(r, decompressed_size)?;
256 skip_exact(r, padding as u64)?;
257 d
258 };
259 Ok(SqpkCompressedBlock {
260 is_compressed,
261 decompressed_size,
262 data,
263 })
264 }
265
266 /// Stream the block's decompressed bytes into `w`.
267 ///
268 /// For uncompressed blocks, `w.write_all(&self.data)` is called directly.
269 /// For compressed blocks, the data is piped through [`DeflateDecoder`] (raw
270 /// DEFLATE, RFC 1951 — no zlib or gzip wrapper) before being written.
271 ///
272 /// This is the primary write path used by the apply layer: each block in a
273 /// [`SqpkFile`] `AddFile` operation is streamed into the target file handle
274 /// in sequence.
275 ///
276 /// # Errors
277 ///
278 /// - [`ParseError::Decompress`] — the DEFLATE stream is malformed or
279 /// truncated.
280 /// - [`ParseError::Io`] — `w.write_all` failed.
281 pub fn decompress_into(&self, w: &mut impl Write) -> Result<()> {
282 if self.is_compressed {
283 std::io::copy(&mut DeflateDecoder::new(self.data.as_slice()), w)
284 .map_err(|e| ParseError::Decompress { source: e })?;
285 } else {
286 w.write_all(&self.data)?;
287 }
288 Ok(())
289 }
290
291 /// Stream the block's decompressed bytes into `w`, reusing a caller-owned
292 /// [`Decompress`] state across blocks.
293 ///
294 /// Equivalent to [`decompress_into`](SqpkCompressedBlock::decompress_into)
295 /// in behaviour and error semantics, but avoids the per-call ~100 KiB
296 /// zlib-state allocation that [`DeflateDecoder::new`] would otherwise
297 /// pay. The apply layer threads a single `Decompress` through every
298 /// block in a multi-block `SqpkFile::AddFile` chunk; uncompressed blocks
299 /// short-circuit to `write_all` and leave the decompressor untouched.
300 ///
301 /// `decompressor` is reset via [`Decompress::reset(false)`](Decompress::reset)
302 /// at the start of every compressed block, so callers may pass an
303 /// already-used state without manually resetting it.
304 ///
305 /// # Errors
306 ///
307 /// - [`ParseError::Decompress`] — the DEFLATE stream is malformed or
308 /// the manual feed loop made no forward progress (corrupt or truncated
309 /// payload).
310 /// - [`ParseError::Io`] — `w.write_all` failed.
311 pub fn decompress_into_with(
312 &self,
313 decompressor: &mut Decompress,
314 w: &mut impl Write,
315 ) -> Result<()> {
316 if !self.is_compressed {
317 w.write_all(&self.data)?;
318 return Ok(());
319 }
320
321 // Raw DEFLATE — match the legacy `DeflateDecoder::new(_)` zlib_header=false.
322 decompressor.reset(false);
323 // 8 KiB output buffer matches `std::io::copy`'s default and is plenty
324 // for the per-iteration output the underlying miniz_oxide / zlib-ng
325 // backends emit. Stays on the stack — no allocation per block.
326 let mut out = [0u8; 8 * 1024];
327 let mut input: &[u8] = &self.data;
328 loop {
329 let before_in = decompressor.total_in();
330 let before_out = decompressor.total_out();
331 let status = decompressor
332 .decompress(input, &mut out, FlushDecompress::None)
333 .map_err(|e| ParseError::Decompress {
334 source: std::io::Error::new(std::io::ErrorKind::InvalidData, e),
335 })?;
336 let consumed = (decompressor.total_in() - before_in) as usize;
337 let produced = (decompressor.total_out() - before_out) as usize;
338 if produced > 0 {
339 w.write_all(&out[..produced])?;
340 }
341 input = &input[consumed..];
342 match status {
343 Status::StreamEnd => return Ok(()),
344 Status::Ok | Status::BufError => {
345 // Forward progress is required. SqPack DEFLATE blocks are
346 // self-contained — the trailing alignment padding the parser
347 // intentionally leaves in `self.data` is past the
348 // end-of-stream marker, so the decoder must signal
349 // StreamEnd before exhausting the input. A no-progress loop
350 // means the payload is corrupt or truncated.
351 if consumed == 0 && produced == 0 {
352 return Err(ParseError::Decompress {
353 source: std::io::Error::new(
354 std::io::ErrorKind::InvalidData,
355 "DEFLATE stream made no forward progress",
356 ),
357 });
358 }
359 }
360 }
361 }
362 }
363
364 /// Returns `true` if the block stores a raw DEFLATE stream.
365 ///
366 /// `false` means the block carries already-decompressed bytes (the
367 /// `compressed_size == 0x7d00` sentinel).
368 #[must_use]
369 pub fn is_compressed(&self) -> bool {
370 self.is_compressed
371 }
372
373 /// Returns the block's expected decompressed length in bytes.
374 #[must_use]
375 pub fn decompressed_size(&self) -> usize {
376 self.decompressed_size
377 }
378
379 /// Returns the byte length of the block's stored `data` slab.
380 ///
381 /// For compressed blocks this is the length of the DEFLATE payload as the
382 /// parser stored it (which may include trailing 128-byte alignment padding
383 /// that the decoder ignores past the end-of-stream marker). For
384 /// uncompressed blocks it equals [`decompressed_size`](Self::decompressed_size).
385 #[must_use]
386 pub fn data_len(&self) -> usize {
387 self.data.len()
388 }
389
390 /// Return the block's decompressed bytes as a [`Cow`].
391 ///
392 /// Uncompressed blocks return `Cow::Borrowed(&self.data)` — a zero-copy
393 /// borrow into the block's existing buffer. Compressed blocks decompress
394 /// into a newly allocated `Vec` and return `Cow::Owned`.
395 ///
396 /// Use [`decompress_into`](SqpkCompressedBlock::decompress_into) instead
397 /// when writing to a file handle, to avoid the intermediate allocation.
398 ///
399 /// # Errors
400 ///
401 /// - [`ParseError::Decompress`] — the DEFLATE stream is malformed or
402 /// truncated (compressed blocks only).
403 pub fn decompress(&self) -> crate::ParseResult<Cow<'_, [u8]>> {
404 if self.is_compressed {
405 // Cap pre-alloc: `decompressed_size` originates from the parsed
406 // block header. See `PREALLOC_CAP` (above) for rationale.
407 let mut out = Vec::with_capacity(self.decompressed_size.min(PREALLOC_CAP));
408 self.decompress_into(&mut out)?;
409 Ok(Cow::Owned(out))
410 } else {
411 Ok(Cow::Borrowed(&self.data))
412 }
413 }
414}
415
416/// SQPK `F` command body: a file-level operation on the game install tree.
417///
418/// Unlike the block-oriented commands (`A`, `D`, `E`) that target `SqPack`
419/// archive internals, `F` operates on whole files in the install directory.
420/// The operation to perform is selected by [`operation`](SqpkFile::operation).
421///
422/// ## Wire format
423///
424/// ```text
425/// ┌──────────────────────────────────────────────────────────────────────────┐
426/// │ operation : u8 b'A', b'R', b'D', or b'M' │ byte 0
427/// │ <padding> : [u8; 2] (always zero) │ bytes 1–2
428/// │ file_offset : u64 BE destination byte offset within the target file │ bytes 3–10
429/// │ file_size : u64 BE declared size of the target file after operation │ bytes 11–18
430/// │ path_len : u32 BE byte length of the path field (including NUL) │ bytes 19–22
431/// │ expansion_id : u16 BE expansion folder selector for `RemoveAll` │ bytes 23–24
432/// │ <padding> : [u8; 2] (always zero) │ bytes 25–26
433/// │ path : [u8; path_len] NUL-terminated UTF-8 path │ bytes 27–…
434/// │ [blocks] : SqpkCompressedBlock… (only for `AddFile`) │
435/// └──────────────────────────────────────────────────────────────────────────┘
436/// ```
437///
438/// `file_offset` and `file_size` are stored as big-endian `u64` in the wire
439/// format. `file_offset` is range-checked against `i64::MAX` at parse time —
440/// values with the high bit set (which would round-trip as a negative `i64`
441/// in the legacy wire interpretation) are rejected with
442/// [`ParseError::NegativeFileOffset`] before the chunk is constructed.
443///
444/// The NUL terminator in `path` is stripped during parsing; [`path`](SqpkFile::path)
445/// always contains a clean UTF-8 string.
446///
447/// For `AddFile` operations the remaining bytes in the command body after the
448/// path form a sequence of [`SqpkCompressedBlock`]s (see that type's
449/// documentation for the block wire format). For all other operations the block
450/// list is empty.
451///
452/// ## Reference
453///
454/// # Errors
455///
456/// Parsing returns a [`crate::ParseError`] if:
457/// - The operation byte is not `b'A'`, `b'R'`, `b'D'`, or `b'M'`
458/// → [`ParseError::UnknownFileOperation`].
459/// - The path bytes are not valid UTF-8 → [`ParseError::Utf8Error`].
460/// - A block header contains a negative `header_size` or `decompressed_size`,
461/// or a negative non-sentinel `compressed_size`
462/// → [`ParseError::InvalidField`].
463/// - The body is too short → [`ParseError::Io`].
464#[derive(Debug)]
465pub struct SqpkFile {
466 /// The file operation to perform.
467 pub operation: SqpkFileOperation,
468 /// Destination byte offset within the target file.
469 ///
470 /// For `AddFile`: if zero, the target file is truncated to zero before
471 /// writing (complete replacement); if positive, writing begins at this
472 /// byte offset in the existing file. Values with the high bit set in the
473 /// wire `u64` are rejected at parse time with
474 /// [`ParseError::NegativeFileOffset`], so every value reaching here fits
475 /// in an `i64`.
476 ///
477 /// Unused by `RemoveAll`, `DeleteFile`, and `MakeDirTree`.
478 pub file_offset: u64,
479 /// Declared total size of the target file after the operation, in bytes.
480 ///
481 /// Informational; the apply layer does not use this to pre-allocate or
482 /// truncate the file (truncation is controlled by `file_offset == 0`).
483 pub file_size: u64,
484 /// Expansion folder selector used by `RemoveAll`.
485 ///
486 /// `0` → `ffxiv` (base game), `n > 0` → `ex<n>`. Corresponds to the
487 /// high byte of `sub_id` in block-oriented commands.
488 pub expansion_id: u16,
489 /// Relative path to the target file or directory under the game install root.
490 ///
491 /// NUL terminator is stripped during parsing. For `AddFile` / `DeleteFile`
492 /// this is joined with the install root via `generic_path`. For `MakeDirTree`
493 /// it is the directory tree to create.
494 pub path: String,
495 /// Byte offset of each block's data payload — measured from the start of
496 /// the SQPK command body slice — after skipping the block's 16-byte header.
497 ///
498 /// `block_source_offsets[i]` corresponds to `blocks[i]`. Adding the chunk's
499 /// absolute position in the patch file to this offset gives the patch-file
500 /// byte offset where the block's data begins, enabling `IndexedZiPatch`
501 /// random-access reads that do not need to decompress the full stream.
502 ///
503 /// Empty for all operations other than `AddFile`.
504 pub block_source_offsets: Vec<u64>,
505 /// Inline compressed-or-raw block payloads that make up the file content.
506 ///
507 /// Only populated for `AddFile`; empty for `RemoveAll`, `DeleteFile`, and
508 /// `MakeDirTree`. Each block is decompressed in sequence into the target
509 /// file by the apply layer. See [`SqpkCompressedBlock`] for the block wire
510 /// format and DEFLATE discrimination logic.
511 pub blocks: Vec<SqpkCompressedBlock>,
512}
513
514// Parse a SQPK 'F' command body into a SqpkFile.
515//
516// Reads the fixed-size header fields (operation, offsets, sizes, path),
517// then — for AddFile only — iterates over the remaining bytes in `body`,
518// parsing SqpkCompressedBlock entries until the cursor reaches the end.
519// The block source offsets are recorded as the cursor position + 16 (to
520// skip the block's own 16-byte header) before each SqpkCompressedBlock::read
521// call.
522pub(crate) fn parse(body: &[u8]) -> Result<SqpkFile> {
523 let mut c = Cursor::new(body);
524
525 let header = FileCommandHeader::read(&mut c)?;
526 let operation = match header.operation {
527 b'A' => SqpkFileOperation::AddFile,
528 b'R' => SqpkFileOperation::RemoveAll,
529 b'D' => SqpkFileOperation::DeleteFile,
530 b'M' => SqpkFileOperation::MakeDirTree,
531 b => {
532 return Err(ParseError::UnknownFileOperation(b));
533 }
534 };
535
536 // The wire field is u64 BE, but the legacy interpretation treated it as
537 // a signed i64 — values with the high bit set surface as ParseError so
538 // the public `file_offset: u64` only ever carries non-negative offsets
539 // (i.e. fits in i64 as well). The error variant keeps the raw value
540 // re-encoded as the i64 the legacy reader would have produced.
541 if header.file_offset > i64::MAX as u64 {
542 return Err(ParseError::NegativeFileOffset(header.file_offset as i64));
543 }
544 let file_offset = header.file_offset;
545 let file_size = header.file_size;
546 let path_len = header.path_len as usize;
547 let expansion_id = header.expansion_id;
548
549 // Cap path_len against remaining body bytes — without this an attacker
550 // can declare a 4 GiB path and OOM the patcher (issue #30).
551 let remaining = body.len().saturating_sub(c.position() as usize);
552 if path_len > remaining {
553 return Err(ParseError::InvalidField {
554 context: "SqpkFile path_len exceeds remaining body bytes",
555 });
556 }
557 let path_bytes = read_exact_vec(&mut c, path_len)?;
558 let path = String::from_utf8(path_bytes)
559 .map(|s| s.trim_end_matches('\0').to_owned())
560 .map_err(ParseError::Utf8Error)?;
561
562 let (blocks, block_source_offsets) = if matches!(operation, SqpkFileOperation::AddFile) {
563 let mut blocks = Vec::new();
564 let mut offsets = Vec::new();
565 while (c.position() as usize) < body.len() {
566 // Record offset of the data payload (after the fixed 16-byte block header).
567 offsets.push(c.position() + 16);
568 blocks.push(SqpkCompressedBlock::read(&mut c)?);
569 }
570 (blocks, offsets)
571 } else {
572 (Vec::new(), Vec::new())
573 };
574
575 Ok(SqpkFile {
576 operation,
577 file_offset,
578 file_size,
579 expansion_id,
580 path,
581 block_source_offsets,
582 blocks,
583 })
584}
585
586#[cfg(test)]
587mod tests {
588 use super::*;
589
590 fn make_header(
591 op: u8,
592 file_offset: u64,
593 file_size: u64,
594 path: &[u8],
595 expansion_id: u16,
596 ) -> Vec<u8> {
597 let mut body = Vec::new();
598 body.push(op);
599 body.extend_from_slice(&[0u8; 2]); // alignment
600 body.extend_from_slice(&file_offset.to_be_bytes());
601 body.extend_from_slice(&file_size.to_be_bytes());
602 body.extend_from_slice(&(path.len() as u32).to_be_bytes());
603 body.extend_from_slice(&expansion_id.to_be_bytes());
604 body.extend_from_slice(&[0u8; 2]); // padding
605 body.extend_from_slice(path);
606 body
607 }
608
609 #[test]
610 fn parses_add_file_no_blocks() {
611 let body = make_header(b'A', 0, 512, b"test\0", 1);
612 let cmd = parse(&body).unwrap();
613 assert!(matches!(cmd.operation, SqpkFileOperation::AddFile));
614 assert_eq!(cmd.file_offset, 0);
615 assert_eq!(cmd.file_size, 512);
616 assert_eq!(cmd.expansion_id, 1);
617 assert_eq!(cmd.path, "test");
618 assert!(cmd.blocks.is_empty());
619 assert!(cmd.block_source_offsets.is_empty());
620 }
621
622 #[test]
623 fn parses_add_file_uncompressed_block() {
624 // block_len = ((8 + 143) & !127) = 128; read 8 data bytes + skip 104 padding
625 let mut body = make_header(b'A', 0, 0, b"\0", 0);
626 // header bytes: 1+2+8+8+4+2+2+1 = 28 — block starts at offset 28
627 body.extend_from_slice(&16i32.to_le_bytes()); // header_size
628 body.extend_from_slice(&0u32.to_le_bytes()); // pad
629 body.extend_from_slice(&0x7d00i32.to_le_bytes()); // compressed_size = uncompressed sentinel
630 body.extend_from_slice(&8i32.to_le_bytes()); // decompressed_size
631 body.extend_from_slice(&[0xABu8; 8]); // data
632 body.extend_from_slice(&[0u8; 104]); // alignment padding
633
634 let cmd = parse(&body).unwrap();
635 assert_eq!(cmd.blocks.len(), 1);
636 let block = &cmd.blocks[0];
637 assert!(!block.is_compressed);
638 assert_eq!(block.decompressed_size, 8);
639 assert_eq!(block.data.len(), 8);
640 assert!(block.data.iter().all(|&b| b == 0xAB));
641 assert_eq!(block.decompress().unwrap(), vec![0xABu8; 8]);
642 assert_eq!(cmd.block_source_offsets, vec![44u64]); // 28 (header) + 16 (block header)
643 }
644
645 #[test]
646 fn rejects_negative_file_offset_at_parse() {
647 // A `u64` wire value with the high bit set must surface as
648 // `ParseError::NegativeFileOffset(i64)` — the error preserves the raw
649 // value as the legacy signed reading for diagnostics.
650 let body = make_header(b'A', u64::MAX, 0, b"\0", 0);
651 match parse(&body) {
652 Err(ParseError::NegativeFileOffset(v)) => assert_eq!(v, -1),
653 other => panic!("expected NegativeFileOffset(-1), got {other:?}"),
654 }
655 }
656
657 #[test]
658 fn parses_remove_all_operation() {
659 let body = make_header(b'R', 0, 0, b"\0", 0);
660 let cmd = parse(&body).unwrap();
661 assert!(matches!(cmd.operation, SqpkFileOperation::RemoveAll));
662 assert!(cmd.blocks.is_empty());
663 assert!(cmd.block_source_offsets.is_empty());
664 }
665
666 #[test]
667 fn parses_delete_file_operation() {
668 let body = make_header(b'D', 0, 0, b"sqpack/foo.dat\0", 0);
669 let cmd = parse(&body).unwrap();
670 assert!(matches!(cmd.operation, SqpkFileOperation::DeleteFile));
671 assert_eq!(cmd.path, "sqpack/foo.dat");
672 }
673
674 #[test]
675 fn parses_make_dir_tree_operation() {
676 let body = make_header(b'M', 0, 0, b"sqpack/ex1\0", 0);
677 let cmd = parse(&body).unwrap();
678 assert!(matches!(cmd.operation, SqpkFileOperation::MakeDirTree));
679 assert_eq!(cmd.path, "sqpack/ex1");
680 }
681
682 #[test]
683 fn rejects_unknown_operation() {
684 let body = make_header(b'Z', 0, 0, b"\0", 0);
685 assert!(parse(&body).is_err());
686 }
687
688 fn block_with_sizes(header_size: i32, compressed_size: i32, decompressed_size: i32) -> Vec<u8> {
689 let mut body = make_header(b'A', 0, 0, b"\0", 0);
690 body.extend_from_slice(&header_size.to_le_bytes());
691 body.extend_from_slice(&0u32.to_le_bytes()); // pad
692 body.extend_from_slice(&compressed_size.to_le_bytes());
693 body.extend_from_slice(&decompressed_size.to_le_bytes());
694 body
695 }
696
697 #[test]
698 fn rejects_negative_header_size() {
699 let body = block_with_sizes(-1, 0x7d00, 0);
700 let Err(ParseError::InvalidField { context }) = parse(&body) else {
701 panic!("expected InvalidField for negative header_size");
702 };
703 assert!(
704 context.contains("header_size"),
705 "unexpected context: {context}"
706 );
707 }
708
709 #[test]
710 fn rejects_negative_decompressed_size() {
711 let body = block_with_sizes(16, 0x7d00, -1);
712 let Err(ParseError::InvalidField { context }) = parse(&body) else {
713 panic!("expected InvalidField for negative decompressed_size");
714 };
715 assert!(
716 context.contains("decompressed_size"),
717 "unexpected context: {context}"
718 );
719 }
720
721 #[test]
722 fn rejects_negative_compressed_size() {
723 // is_compressed = (compressed_size != 0x7d00) — pass -1 (not 0x7d00).
724 let body = block_with_sizes(16, -1, 8);
725 let Err(ParseError::InvalidField { context }) = parse(&body) else {
726 panic!("expected InvalidField for negative compressed_size");
727 };
728 assert!(
729 context.contains("compressed_size"),
730 "unexpected context: {context}"
731 );
732 }
733
734 #[test]
735 fn rejects_invalid_utf8_in_path() {
736 // 0xFF is not valid UTF-8 — Utf8Error path on `String::from_utf8`.
737 let body = make_header(b'D', 0, 0, &[0xFFu8], 0);
738 assert!(matches!(parse(&body), Err(ParseError::Utf8Error(_))));
739 }
740
741 #[test]
742 fn decompress_into_uncompressed_writes_data_verbatim() {
743 // Uncompressed branch: w.write_all(&self.data).
744 let block = SqpkCompressedBlock::new(false, 5, b"hello".to_vec());
745 let mut out = Vec::new();
746 block.decompress_into(&mut out).unwrap();
747 assert_eq!(out, b"hello");
748 }
749
750 #[test]
751 fn decompress_into_with_reuses_decompressor_across_blocks() {
752 // Verifies the contract of `decompress_into_with`: the same
753 // `Decompress` instance can be threaded through multiple consecutive
754 // compressed blocks, with `reset` between calls, and produce identical
755 // output to `decompress_into`. This is the apply-layer hot path.
756 use flate2::Compression;
757 use flate2::write::DeflateEncoder;
758 use std::io::Write;
759
760 let payload_a: &[u8] = b"alpha alpha alpha beta beta gamma";
761 let payload_b: &[u8] = b"the quick brown fox jumps over the lazy dog";
762
763 let compress = |raw: &[u8]| -> SqpkCompressedBlock {
764 let mut enc = DeflateEncoder::new(Vec::new(), Compression::default());
765 enc.write_all(raw).unwrap();
766 SqpkCompressedBlock::new(true, raw.len(), enc.finish().unwrap())
767 };
768 let a = compress(payload_a);
769 let b = compress(payload_b);
770
771 let mut state = Decompress::new(false);
772 let mut out_a = Vec::new();
773 a.decompress_into_with(&mut state, &mut out_a).unwrap();
774 assert_eq!(out_a, payload_a, "first block must round-trip");
775
776 let mut out_b = Vec::new();
777 b.decompress_into_with(&mut state, &mut out_b).unwrap();
778 assert_eq!(out_b, payload_b, "reused state must reset and round-trip");
779 }
780
781 #[test]
782 fn decompress_into_with_uncompressed_skips_decompressor() {
783 // The uncompressed branch must never touch the supplied state — it
784 // delegates to `write_all`. Verify the state's `total_in`/`total_out`
785 // are unchanged after the call.
786 let block = SqpkCompressedBlock::new(false, 5, b"hello".to_vec());
787 let mut state = Decompress::new(false);
788 let before_in = state.total_in();
789 let before_out = state.total_out();
790 let mut out = Vec::new();
791 block.decompress_into_with(&mut state, &mut out).unwrap();
792 assert_eq!(out, b"hello");
793 assert_eq!(state.total_in(), before_in);
794 assert_eq!(state.total_out(), before_out);
795 }
796
797 #[test]
798 fn decompress_into_with_propagates_corrupt_stream_error() {
799 // Garbage DEFLATE payload must surface as ParseError::Decompress
800 // rather than panic or loop forever.
801 let block = SqpkCompressedBlock::new(true, 16, vec![0xFFu8; 16]);
802 let mut state = Decompress::new(false);
803 let mut out = Vec::new();
804 assert!(matches!(
805 block.decompress_into_with(&mut state, &mut out),
806 Err(ParseError::Decompress { .. })
807 ));
808 }
809
810 #[test]
811 fn decompress_returns_borrowed_for_uncompressed() {
812 // Cow::Borrowed branch — no allocation, points at the block's data.
813 let block = SqpkCompressedBlock::new(false, 4, b"data".to_vec());
814 let cow = block.decompress().unwrap();
815 assert!(matches!(cow, Cow::Borrowed(_)));
816 assert_eq!(&*cow, b"data");
817 }
818
819 #[test]
820 fn decompress_into_compressed_propagates_decompress_error() {
821 // Garbage DEFLATE payload — the `.map_err(|e| ParseError::Decompress { source: e })?` arm.
822 let block = SqpkCompressedBlock::new(true, 16, vec![0xFFu8; 16]);
823 let mut out = Vec::new();
824 assert!(matches!(
825 block.decompress_into(&mut out),
826 Err(ParseError::Decompress { .. })
827 ));
828 // And via the `decompress()` wrapper — the `?` error arm at line 106.
829 assert!(matches!(
830 block.decompress(),
831 Err(ParseError::Decompress { .. })
832 ));
833 }
834
835 #[test]
836 fn parses_compressed_block() {
837 use flate2::Compression;
838 use flate2::write::DeflateEncoder;
839 use std::io::Write;
840
841 let raw: &[u8] = b"hello compressed world";
842 let mut enc = DeflateEncoder::new(Vec::new(), Compression::default());
843 enc.write_all(raw).unwrap();
844 let compressed = enc.finish().unwrap();
845
846 let header_size: i32 = 16;
847 let compressed_size = compressed.len() as i32;
848 let decompressed_size = raw.len() as i32;
849 let block_len = ((compressed_size as u32 + 143) & !127) as usize;
850 let trailing_pad = block_len - header_size as usize - compressed.len();
851
852 // header bytes: 1+2+8+8+4+2+2+1 = 28 — block starts at offset 28
853 let mut body = make_header(b'A', 0, 0, b"\0", 0);
854 body.extend_from_slice(&header_size.to_le_bytes());
855 body.extend_from_slice(&0u32.to_le_bytes()); // pad
856 body.extend_from_slice(&compressed_size.to_le_bytes());
857 body.extend_from_slice(&decompressed_size.to_le_bytes());
858 body.extend_from_slice(&compressed);
859 body.extend_from_slice(&vec![0u8; trailing_pad]);
860
861 let cmd = parse(&body).unwrap();
862 assert_eq!(cmd.blocks.len(), 1);
863 let block = &cmd.blocks[0];
864 assert!(block.is_compressed);
865 assert_eq!(block.decompressed_size, raw.len());
866 assert_eq!(block.decompress().unwrap(), raw);
867 assert_eq!(cmd.block_source_offsets, vec![44u64]); // 28 (header) + 16 (block header)
868 }
869
870 #[test]
871 fn parse_rejects_oversized_path_len_issue_30() {
872 // Regression for issue #30: a u32 `path_len` from untrusted patch
873 // bytes was fed straight into `Vec::with_capacity`, allowing a
874 // malicious patch to trigger a ~4 GiB allocation and OOM-abort the
875 // process. The parser must now reject such a header with
876 // `InvalidField` before any allocation occurs.
877 //
878 // Original 32-byte fuzz input (from the `parser_sqpk` harness; byte 0
879 // is the harness's sub-command selector, dropped here):
880 // 2c 41 e5 11 00 36 36 36 36 00 00 00 00 00 00 ff
881 // ff ff ff ff ff ff 00 00 21 00 ac 00 00 00 00 00
882 let body: &[u8] = &[
883 0x41, 0xe5, 0x11, // op=AddFile, alignment
884 0x00, 0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, // file_offset
885 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, // file_size
886 0xff, 0xff, 0xff, 0xff, // path_len = u32::MAX
887 0xff, 0xff, // expansion_id
888 0x00, 0x00, // padding
889 0x21, 0x00, 0xac, 0x00, // remaining body bytes
890 ];
891 assert_eq!(body.len(), 31, "test input is the post-selector body");
892 let err = parse(body).expect_err("oversized path_len must error");
893 assert!(
894 matches!(
895 err,
896 ParseError::InvalidField { context }
897 if context.contains("path_len")
898 ),
899 "expected InvalidField on oversized path_len, got: {err:?}"
900 );
901 }
902}