Skip to main content

zipatch_rs/chunk/sqpk/
file.rs

1use crate::{ParseError, ParseResult as Result};
2use binrw::BinRead;
3use flate2::read::DeflateDecoder;
4use flate2::{Decompress, FlushDecompress, Status};
5use std::borrow::Cow;
6use std::io::{self, Cursor, Read, Write};
7
8/// Upper bound on the bytes pre-allocated for a size-hint `Vec` whose size
9/// comes from an attacker-controlled length field. Genuine large reads grow
10/// the `Vec` incrementally via `read_to_end`; absurd hints paired with a
11/// short input fall through to the truncation check without an intermediate
12/// multi-gigabyte allocation. See issue #30 for the fuzz finding that
13/// motivated this cap.
14const PREALLOC_CAP: usize = 64 * 1024;
15
16// 16-byte little-endian header preceding each `SqpkCompressedBlock` payload.
17// Field meanings are documented on `SqpkCompressedBlock`. The 4-byte pad word
18// after `header_size` is consumed via `pad_after` rather than a named field
19// so the struct shape stays minimal.
20#[derive(BinRead)]
21#[br(little)]
22#[allow(clippy::struct_field_names)]
23struct BlockHeader {
24    #[br(pad_after = 4)]
25    header_size: i32,
26    compressed_size: i32,
27    decompressed_size: i32,
28}
29
30// 27-byte big-endian header at the start of every `SqpkFile` command body.
31// The variable-length `path` and (for `AddFile`) trailing block list follow.
32#[derive(BinRead)]
33#[br(big)]
34struct FileCommandHeader {
35    operation: u8,
36    #[br(pad_before = 2)]
37    file_offset: u64,
38    file_size: u64,
39    path_len: u32,
40    #[br(pad_after = 2)]
41    expansion_id: u16,
42}
43
44// Read exactly `n` bytes into a fresh `Vec<u8>`, capping the initial
45// allocation at `PREALLOC_CAP` so an attacker-controlled length field cannot
46// trigger a multi-gigabyte allocation against a short input. Genuine large
47// reads grow the `Vec` incrementally via `read_to_end`.
48fn read_exact_vec<R: Read>(r: &mut R, n: usize) -> Result<Vec<u8>> {
49    let mut buf = Vec::with_capacity(n.min(PREALLOC_CAP));
50    r.by_ref().take(n as u64).read_to_end(&mut buf)?;
51    if buf.len() < n {
52        return Err(io::Error::new(
53            io::ErrorKind::UnexpectedEof,
54            "read_exact_vec: unexpected EOF",
55        )
56        .into());
57    }
58    Ok(buf)
59}
60
61// Discard exactly `n` bytes from `r`. Returns `UnexpectedEof` if the source
62// runs short.
63fn skip_exact<R: Read>(r: &mut R, n: u64) -> Result<()> {
64    let consumed = io::copy(&mut r.by_ref().take(n), &mut io::sink())?;
65    if consumed < n {
66        return Err(
67            io::Error::new(io::ErrorKind::UnexpectedEof, "skip_exact: unexpected EOF").into(),
68        );
69    }
70    Ok(())
71}
72
73/// Operation byte of a SQPK `F` command; selects what the command does to
74/// the game install tree.
75///
76/// Encoded as a single ASCII byte in the wire format:
77/// `b'A'` → `AddFile`, `b'R'` → `RemoveAll`, `b'D'` → `DeleteFile`,
78/// `b'M'` → `MakeDirTree`. Any other byte is rejected with
79/// [`ParseError::UnknownFileOperation`].
80#[derive(Debug, Clone, Copy, PartialEq, Eq)]
81pub enum SqpkFileOperation {
82    /// `A` — write the inline compressed-block payload into a file under the
83    /// game install root, creating it (or overwriting it) as needed.
84    ///
85    /// Parent directories are created automatically. If `file_offset` is zero,
86    /// the target file is truncated to zero before writing (full replacement);
87    /// if `file_offset` is non-zero, only the covered range is overwritten.
88    AddFile,
89    /// `R` — delete all files in the expansion folder (`sqpack/<expansion>/`
90    /// and `movie/<expansion>/`) that are not on the keep-list.
91    ///
92    /// Kept unconditionally: `.var` files and `00000.bk2`–`00003.bk2`.
93    /// Files `00004.bk2` and beyond are deleted. `expansion_id` selects
94    /// the target expansion folder.
95    RemoveAll,
96    /// `D` — delete a single file at the path given by `SqpkFile::path`.
97    DeleteFile,
98    /// `M` — create the directory tree at `SqpkFile::path` (equivalent to
99    /// `std::fs::create_dir_all`). Idempotent.
100    MakeDirTree,
101}
102
103/// One block of a [`SqpkFile`] `AddFile` payload, which may be DEFLATE-compressed
104/// or stored raw.
105///
106/// `SqpkFile` payloads are split into a sequence of these blocks. Each block
107/// begins with a 16-byte little-endian header that describes the compressed
108/// and decompressed sizes, followed by the data bytes padded to a 128-byte
109/// boundary.
110///
111/// ## Compression sentinel
112///
113/// The `compressed_size` field in the wire header uses the value `0x7d00`
114/// (decimal **32000**) as a sentinel meaning "this block is not compressed".
115/// Any other value means the data bytes are a raw DEFLATE stream
116/// (no zlib wrapper, no gzip header — just RFC 1951 raw deflate).
117///
118/// ## Wire format of one block (all little-endian)
119///
120/// ```text
121/// ┌─────────────────────────────────────────────────────────────────────┐
122/// │ header_size     : i32 LE   always 16 in practice                   │  bytes 0–3
123/// │ <pad>           : u32 LE   always zero                              │  bytes 4–7
124/// │ compressed_size : i32 LE   byte count of DEFLATE data               │  bytes 8–11
125/// │                             OR 0x7d00 (32000) if uncompressed       │
126/// │ decompressed_size : i32 LE  byte count of decompressed output       │  bytes 12–15
127/// │ data            : [u8]     compressed or raw bytes                  │  bytes 16–…
128/// │ <alignment>     : [u8]     zero-padding to 128-byte boundary        │
129/// └─────────────────────────────────────────────────────────────────────┘
130/// ```
131///
132/// ## 128-byte alignment formula
133///
134/// The total byte count to read for a block's data + alignment is:
135///
136/// ```text
137/// block_len = (data_len + 143) & !127
138/// ```
139///
140/// where `data_len` is `compressed_size` if compressed, or `decompressed_size`
141/// if uncompressed. The constant 143 is `128 - 1 + 16` (subtract the 16-byte
142/// header that is not included in `data_len`, then round up to the next
143/// 128-byte boundary). The number of data bytes actually read is
144/// `block_len - header_size`; the alignment padding is consumed but discarded.
145///
146/// ## `pub(crate)` visibility
147///
148/// `SqpkCompressedBlock` is `pub` so that it appears in rustdoc and can be
149/// named in `SqpkFile::blocks`, but it can only be constructed via
150/// [`new`](SqpkCompressedBlock::new) (for tests) or by parsing a [`SqpkFile`].
151#[derive(Debug)]
152pub struct SqpkCompressedBlock {
153    // true  → data holds raw DEFLATE bytes (compressed_size != 0x7d00)
154    // false → data holds the exact decompressed bytes (compressed_size == 0x7d00)
155    is_compressed: bool,
156    // Expected output size in bytes; used to pre-allocate the decompression buffer.
157    decompressed_size: usize,
158    // Compressed blocks: the raw DEFLATE stream, trimmed to compressed_size bytes
159    //   (alignment padding is consumed by read() but not stored here).
160    // Uncompressed blocks: the exact payload bytes, already stripped of padding.
161    data: Vec<u8>,
162}
163
164impl SqpkCompressedBlock {
165    /// Construct a block directly from its component parts.
166    ///
167    /// This constructor exists primarily for unit tests. Production code
168    /// creates blocks by parsing a [`SqpkFile`] from a patch byte stream.
169    ///
170    /// - `is_compressed`: `true` if `data` is a raw DEFLATE stream.
171    /// - `decompressed_size`: the expected number of bytes after decompression;
172    ///   used to pre-allocate the output buffer in
173    ///   [`decompress`](SqpkCompressedBlock::decompress).
174    /// - `data`: raw compressed bytes or exact uncompressed bytes, depending
175    ///   on `is_compressed`.
176    #[must_use]
177    pub fn new(is_compressed: bool, decompressed_size: usize, data: Vec<u8>) -> Self {
178        Self {
179            is_compressed,
180            decompressed_size,
181            data,
182        }
183    }
184
185    // Parse one block from the reader, consuming header + data + alignment padding.
186    //
187    // Reads the 16-byte little-endian block header, determines whether the block
188    // is compressed (compressed_size != 0x7d00), computes the 128-byte-aligned
189    // total length via (data_len + 143) & !127, then reads exactly that many
190    // bytes minus the header size — leaving the reader positioned at the start
191    // of the next block.
192    fn read<R: Read>(r: &mut R) -> Result<Self> {
193        // 16-byte block header, all fields little-endian. Read into a stack
194        // buffer and parse via `binrw` over a `Cursor`; that keeps the derive
195        // wiring while avoiding any seek requirement on the upstream reader.
196        let mut header_buf = [0u8; 16];
197        r.read_exact(&mut header_buf)?;
198        let header = BlockHeader::read_le(&mut Cursor::new(&header_buf[..]))?;
199
200        if header.header_size < 0 {
201            return Err(ParseError::InvalidField {
202                context: "negative header_size in block",
203            });
204        }
205        if header.decompressed_size < 0 {
206            return Err(ParseError::InvalidField {
207                context: "negative decompressed_size in block",
208            });
209        }
210        // 0x7d00 (32000) is the sentinel for "store raw, not compressed".
211        // Any other value is the byte count of the DEFLATE stream.
212        let is_compressed = header.compressed_size != 0x7d00;
213        if is_compressed && header.compressed_size < 0 {
214            return Err(ParseError::InvalidField {
215                context: "negative compressed_size in block",
216            });
217        }
218
219        let header_size = header.header_size as usize;
220        let decompressed_size = header.decompressed_size as usize;
221        // data_len is the logical size used for alignment: for compressed blocks
222        // it is the compressed byte count; for uncompressed it is the raw byte count.
223        let data_len = if is_compressed {
224            header.compressed_size
225        } else {
226            header.decompressed_size
227        };
228        // Round data_len up to the next 128-byte boundary, accounting for the
229        // 16-byte header that precedes the data in the stream.
230        // Formula: (data_len + 128 - 1 + (header_size=16)) & !127
231        //        = (data_len + 143) & !127
232        let block_len = ((data_len as u32 + 143) & !127u32) as usize;
233        // Underflow guard: a malformed header where `header_size` exceeds the
234        // aligned `block_len` would wrap to a huge size in release builds.
235        let data_region = block_len
236            .checked_sub(header_size)
237            .ok_or(ParseError::InvalidField {
238                context: "block_len smaller than header_size",
239            })?;
240        let data = if is_compressed {
241            // Read the DEFLATE payload plus any alignment padding. For compressed
242            // blocks we store everything (padding included) because DeflateDecoder
243            // stops at the end of the DEFLATE stream before reading into padding.
244            read_exact_vec(r, data_region)?
245        } else {
246            // Uncompressed: read exactly decompressed_size bytes of payload,
247            // then skip any alignment padding so the reader is positioned at
248            // the start of the next block.
249            let padding =
250                data_region
251                    .checked_sub(decompressed_size)
252                    .ok_or(ParseError::InvalidField {
253                        context: "block data region smaller than decompressed_size",
254                    })?;
255            let d = read_exact_vec(r, decompressed_size)?;
256            skip_exact(r, padding as u64)?;
257            d
258        };
259        Ok(SqpkCompressedBlock {
260            is_compressed,
261            decompressed_size,
262            data,
263        })
264    }
265
266    /// Stream the block's decompressed bytes into `w`.
267    ///
268    /// For uncompressed blocks, `w.write_all(&self.data)` is called directly.
269    /// For compressed blocks, the data is piped through [`DeflateDecoder`] (raw
270    /// DEFLATE, RFC 1951 — no zlib or gzip wrapper) before being written.
271    ///
272    /// This is the primary write path used by the apply layer: each block in a
273    /// [`SqpkFile`] `AddFile` operation is streamed into the target file handle
274    /// in sequence.
275    ///
276    /// # Errors
277    ///
278    /// - [`ParseError::Decompress`] — the DEFLATE stream is malformed or
279    ///   truncated.
280    /// - [`ParseError::Io`] — `w.write_all` failed.
281    pub fn decompress_into(&self, w: &mut impl Write) -> Result<()> {
282        if self.is_compressed {
283            std::io::copy(&mut DeflateDecoder::new(self.data.as_slice()), w)
284                .map_err(|e| ParseError::Decompress { source: e })?;
285        } else {
286            w.write_all(&self.data)?;
287        }
288        Ok(())
289    }
290
291    /// Stream the block's decompressed bytes into `w`, reusing a caller-owned
292    /// [`Decompress`] state across blocks.
293    ///
294    /// Equivalent to [`decompress_into`](SqpkCompressedBlock::decompress_into)
295    /// in behaviour and error semantics, but avoids the per-call ~100 KiB
296    /// zlib-state allocation that [`DeflateDecoder::new`] would otherwise
297    /// pay. The apply layer threads a single `Decompress` through every
298    /// block in a multi-block `SqpkFile::AddFile` chunk; uncompressed blocks
299    /// short-circuit to `write_all` and leave the decompressor untouched.
300    ///
301    /// `decompressor` is reset via [`Decompress::reset(false)`](Decompress::reset)
302    /// at the start of every compressed block, so callers may pass an
303    /// already-used state without manually resetting it.
304    ///
305    /// # Errors
306    ///
307    /// - [`ParseError::Decompress`] — the DEFLATE stream is malformed or
308    ///   the manual feed loop made no forward progress (corrupt or truncated
309    ///   payload).
310    /// - [`ParseError::Io`] — `w.write_all` failed.
311    pub fn decompress_into_with(
312        &self,
313        decompressor: &mut Decompress,
314        w: &mut impl Write,
315    ) -> Result<()> {
316        if !self.is_compressed {
317            w.write_all(&self.data)?;
318            return Ok(());
319        }
320
321        // Raw DEFLATE — match the legacy `DeflateDecoder::new(_)` zlib_header=false.
322        decompressor.reset(false);
323        // 8 KiB output buffer matches `std::io::copy`'s default and is plenty
324        // for the per-iteration output the underlying miniz_oxide / zlib-ng
325        // backends emit. Stays on the stack — no allocation per block.
326        let mut out = [0u8; 8 * 1024];
327        let mut input: &[u8] = &self.data;
328        loop {
329            let before_in = decompressor.total_in();
330            let before_out = decompressor.total_out();
331            let status = decompressor
332                .decompress(input, &mut out, FlushDecompress::None)
333                .map_err(|e| ParseError::Decompress {
334                    source: std::io::Error::new(std::io::ErrorKind::InvalidData, e),
335                })?;
336            let consumed = (decompressor.total_in() - before_in) as usize;
337            let produced = (decompressor.total_out() - before_out) as usize;
338            if produced > 0 {
339                w.write_all(&out[..produced])?;
340            }
341            input = &input[consumed..];
342            match status {
343                Status::StreamEnd => return Ok(()),
344                Status::Ok | Status::BufError => {
345                    // Forward progress is required. SqPack DEFLATE blocks are
346                    // self-contained — the trailing alignment padding the parser
347                    // intentionally leaves in `self.data` is past the
348                    // end-of-stream marker, so the decoder must signal
349                    // StreamEnd before exhausting the input. A no-progress loop
350                    // means the payload is corrupt or truncated.
351                    if consumed == 0 && produced == 0 {
352                        return Err(ParseError::Decompress {
353                            source: std::io::Error::new(
354                                std::io::ErrorKind::InvalidData,
355                                "DEFLATE stream made no forward progress",
356                            ),
357                        });
358                    }
359                }
360            }
361        }
362    }
363
364    /// Returns `true` if the block stores a raw DEFLATE stream.
365    ///
366    /// `false` means the block carries already-decompressed bytes (the
367    /// `compressed_size == 0x7d00` sentinel).
368    #[must_use]
369    pub fn is_compressed(&self) -> bool {
370        self.is_compressed
371    }
372
373    /// Returns the block's expected decompressed length in bytes.
374    #[must_use]
375    pub fn decompressed_size(&self) -> usize {
376        self.decompressed_size
377    }
378
379    /// Returns the byte length of the block's stored `data` slab.
380    ///
381    /// For compressed blocks this is the length of the DEFLATE payload as the
382    /// parser stored it (which may include trailing 128-byte alignment padding
383    /// that the decoder ignores past the end-of-stream marker). For
384    /// uncompressed blocks it equals [`decompressed_size`](Self::decompressed_size).
385    #[must_use]
386    pub fn data_len(&self) -> usize {
387        self.data.len()
388    }
389
390    /// Return the block's decompressed bytes as a [`Cow`].
391    ///
392    /// Uncompressed blocks return `Cow::Borrowed(&self.data)` — a zero-copy
393    /// borrow into the block's existing buffer. Compressed blocks decompress
394    /// into a newly allocated `Vec` and return `Cow::Owned`.
395    ///
396    /// Use [`decompress_into`](SqpkCompressedBlock::decompress_into) instead
397    /// when writing to a file handle, to avoid the intermediate allocation.
398    ///
399    /// # Errors
400    ///
401    /// - [`ParseError::Decompress`] — the DEFLATE stream is malformed or
402    ///   truncated (compressed blocks only).
403    pub fn decompress(&self) -> crate::ParseResult<Cow<'_, [u8]>> {
404        if self.is_compressed {
405            // Cap pre-alloc: `decompressed_size` originates from the parsed
406            // block header. See `PREALLOC_CAP` (above) for rationale.
407            let mut out = Vec::with_capacity(self.decompressed_size.min(PREALLOC_CAP));
408            self.decompress_into(&mut out)?;
409            Ok(Cow::Owned(out))
410        } else {
411            Ok(Cow::Borrowed(&self.data))
412        }
413    }
414}
415
416/// SQPK `F` command body: a file-level operation on the game install tree.
417///
418/// Unlike the block-oriented commands (`A`, `D`, `E`) that target `SqPack`
419/// archive internals, `F` operates on whole files in the install directory.
420/// The operation to perform is selected by [`operation`](SqpkFile::operation).
421///
422/// ## Wire format
423///
424/// ```text
425/// ┌──────────────────────────────────────────────────────────────────────────┐
426/// │ operation    : u8      b'A', b'R', b'D', or b'M'                        │  byte 0
427/// │ <padding>    : [u8; 2] (always zero)                                     │  bytes 1–2
428/// │ file_offset  : u64 BE  destination byte offset within the target file    │  bytes 3–10
429/// │ file_size    : u64 BE  declared size of the target file after operation  │  bytes 11–18
430/// │ path_len     : u32 BE  byte length of the path field (including NUL)     │  bytes 19–22
431/// │ expansion_id : u16 BE  expansion folder selector for `RemoveAll`         │  bytes 23–24
432/// │ <padding>    : [u8; 2] (always zero)                                     │  bytes 25–26
433/// │ path         : [u8; path_len]  NUL-terminated UTF-8 path                │  bytes 27–…
434/// │ [blocks]     : SqpkCompressedBlock…  (only for `AddFile`)                │
435/// └──────────────────────────────────────────────────────────────────────────┘
436/// ```
437///
438/// `file_offset` and `file_size` are stored as big-endian `u64` in the wire
439/// format. `file_offset` is range-checked against `i64::MAX` at parse time —
440/// values with the high bit set (which would round-trip as a negative `i64`
441/// in the legacy wire interpretation) are rejected with
442/// [`ParseError::NegativeFileOffset`] before the chunk is constructed.
443///
444/// The NUL terminator in `path` is stripped during parsing; [`path`](SqpkFile::path)
445/// always contains a clean UTF-8 string.
446///
447/// For `AddFile` operations the remaining bytes in the command body after the
448/// path form a sequence of [`SqpkCompressedBlock`]s (see that type's
449/// documentation for the block wire format). For all other operations the block
450/// list is empty.
451///
452/// ## Reference
453///
454/// # Errors
455///
456/// Parsing returns a [`crate::ParseError`] if:
457/// - The operation byte is not `b'A'`, `b'R'`, `b'D'`, or `b'M'`
458///   → [`ParseError::UnknownFileOperation`].
459/// - The path bytes are not valid UTF-8 → [`ParseError::Utf8Error`].
460/// - A block header contains a negative `header_size` or `decompressed_size`,
461///   or a negative non-sentinel `compressed_size`
462///   → [`ParseError::InvalidField`].
463/// - The body is too short → [`ParseError::Io`].
464#[derive(Debug)]
465pub struct SqpkFile {
466    /// The file operation to perform.
467    pub operation: SqpkFileOperation,
468    /// Destination byte offset within the target file.
469    ///
470    /// For `AddFile`: if zero, the target file is truncated to zero before
471    /// writing (complete replacement); if positive, writing begins at this
472    /// byte offset in the existing file. Values with the high bit set in the
473    /// wire `u64` are rejected at parse time with
474    /// [`ParseError::NegativeFileOffset`], so every value reaching here fits
475    /// in an `i64`.
476    ///
477    /// Unused by `RemoveAll`, `DeleteFile`, and `MakeDirTree`.
478    pub file_offset: u64,
479    /// Declared total size of the target file after the operation, in bytes.
480    ///
481    /// Informational; the apply layer does not use this to pre-allocate or
482    /// truncate the file (truncation is controlled by `file_offset == 0`).
483    pub file_size: u64,
484    /// Expansion folder selector used by `RemoveAll`.
485    ///
486    /// `0` → `ffxiv` (base game), `n > 0` → `ex<n>`. Corresponds to the
487    /// high byte of `sub_id` in block-oriented commands.
488    pub expansion_id: u16,
489    /// Relative path to the target file or directory under the game install root.
490    ///
491    /// NUL terminator is stripped during parsing. For `AddFile` / `DeleteFile`
492    /// this is joined with the install root via `generic_path`. For `MakeDirTree`
493    /// it is the directory tree to create.
494    pub path: String,
495    /// Byte offset of each block's data payload — measured from the start of
496    /// the SQPK command body slice — after skipping the block's 16-byte header.
497    ///
498    /// `block_source_offsets[i]` corresponds to `blocks[i]`. Adding the chunk's
499    /// absolute position in the patch file to this offset gives the patch-file
500    /// byte offset where the block's data begins, enabling `IndexedZiPatch`
501    /// random-access reads that do not need to decompress the full stream.
502    ///
503    /// Empty for all operations other than `AddFile`.
504    pub block_source_offsets: Vec<u64>,
505    /// Inline compressed-or-raw block payloads that make up the file content.
506    ///
507    /// Only populated for `AddFile`; empty for `RemoveAll`, `DeleteFile`, and
508    /// `MakeDirTree`. Each block is decompressed in sequence into the target
509    /// file by the apply layer. See [`SqpkCompressedBlock`] for the block wire
510    /// format and DEFLATE discrimination logic.
511    pub blocks: Vec<SqpkCompressedBlock>,
512}
513
514// Parse a SQPK 'F' command body into a SqpkFile.
515//
516// Reads the fixed-size header fields (operation, offsets, sizes, path),
517// then — for AddFile only — iterates over the remaining bytes in `body`,
518// parsing SqpkCompressedBlock entries until the cursor reaches the end.
519// The block source offsets are recorded as the cursor position + 16 (to
520// skip the block's own 16-byte header) before each SqpkCompressedBlock::read
521// call.
522pub(crate) fn parse(body: &[u8]) -> Result<SqpkFile> {
523    let mut c = Cursor::new(body);
524
525    let header = FileCommandHeader::read(&mut c)?;
526    let operation = match header.operation {
527        b'A' => SqpkFileOperation::AddFile,
528        b'R' => SqpkFileOperation::RemoveAll,
529        b'D' => SqpkFileOperation::DeleteFile,
530        b'M' => SqpkFileOperation::MakeDirTree,
531        b => {
532            return Err(ParseError::UnknownFileOperation(b));
533        }
534    };
535
536    // The wire field is u64 BE, but the legacy interpretation treated it as
537    // a signed i64 — values with the high bit set surface as ParseError so
538    // the public `file_offset: u64` only ever carries non-negative offsets
539    // (i.e. fits in i64 as well). The error variant keeps the raw value
540    // re-encoded as the i64 the legacy reader would have produced.
541    if header.file_offset > i64::MAX as u64 {
542        return Err(ParseError::NegativeFileOffset(header.file_offset as i64));
543    }
544    let file_offset = header.file_offset;
545    let file_size = header.file_size;
546    let path_len = header.path_len as usize;
547    let expansion_id = header.expansion_id;
548
549    // Cap path_len against remaining body bytes — without this an attacker
550    // can declare a 4 GiB path and OOM the patcher (issue #30).
551    let remaining = body.len().saturating_sub(c.position() as usize);
552    if path_len > remaining {
553        return Err(ParseError::InvalidField {
554            context: "SqpkFile path_len exceeds remaining body bytes",
555        });
556    }
557    let path_bytes = read_exact_vec(&mut c, path_len)?;
558    let path = String::from_utf8(path_bytes)
559        .map(|s| s.trim_end_matches('\0').to_owned())
560        .map_err(ParseError::Utf8Error)?;
561
562    let (blocks, block_source_offsets) = if matches!(operation, SqpkFileOperation::AddFile) {
563        let mut blocks = Vec::new();
564        let mut offsets = Vec::new();
565        while (c.position() as usize) < body.len() {
566            // Record offset of the data payload (after the fixed 16-byte block header).
567            offsets.push(c.position() + 16);
568            blocks.push(SqpkCompressedBlock::read(&mut c)?);
569        }
570        (blocks, offsets)
571    } else {
572        (Vec::new(), Vec::new())
573    };
574
575    Ok(SqpkFile {
576        operation,
577        file_offset,
578        file_size,
579        expansion_id,
580        path,
581        block_source_offsets,
582        blocks,
583    })
584}
585
586#[cfg(test)]
587mod tests {
588    use super::*;
589
590    fn make_header(
591        op: u8,
592        file_offset: u64,
593        file_size: u64,
594        path: &[u8],
595        expansion_id: u16,
596    ) -> Vec<u8> {
597        let mut body = Vec::new();
598        body.push(op);
599        body.extend_from_slice(&[0u8; 2]); // alignment
600        body.extend_from_slice(&file_offset.to_be_bytes());
601        body.extend_from_slice(&file_size.to_be_bytes());
602        body.extend_from_slice(&(path.len() as u32).to_be_bytes());
603        body.extend_from_slice(&expansion_id.to_be_bytes());
604        body.extend_from_slice(&[0u8; 2]); // padding
605        body.extend_from_slice(path);
606        body
607    }
608
609    #[test]
610    fn parses_add_file_no_blocks() {
611        let body = make_header(b'A', 0, 512, b"test\0", 1);
612        let cmd = parse(&body).unwrap();
613        assert!(matches!(cmd.operation, SqpkFileOperation::AddFile));
614        assert_eq!(cmd.file_offset, 0);
615        assert_eq!(cmd.file_size, 512);
616        assert_eq!(cmd.expansion_id, 1);
617        assert_eq!(cmd.path, "test");
618        assert!(cmd.blocks.is_empty());
619        assert!(cmd.block_source_offsets.is_empty());
620    }
621
622    #[test]
623    fn parses_add_file_uncompressed_block() {
624        // block_len = ((8 + 143) & !127) = 128; read 8 data bytes + skip 104 padding
625        let mut body = make_header(b'A', 0, 0, b"\0", 0);
626        // header bytes: 1+2+8+8+4+2+2+1 = 28 — block starts at offset 28
627        body.extend_from_slice(&16i32.to_le_bytes()); // header_size
628        body.extend_from_slice(&0u32.to_le_bytes()); // pad
629        body.extend_from_slice(&0x7d00i32.to_le_bytes()); // compressed_size = uncompressed sentinel
630        body.extend_from_slice(&8i32.to_le_bytes()); // decompressed_size
631        body.extend_from_slice(&[0xABu8; 8]); // data
632        body.extend_from_slice(&[0u8; 104]); // alignment padding
633
634        let cmd = parse(&body).unwrap();
635        assert_eq!(cmd.blocks.len(), 1);
636        let block = &cmd.blocks[0];
637        assert!(!block.is_compressed);
638        assert_eq!(block.decompressed_size, 8);
639        assert_eq!(block.data.len(), 8);
640        assert!(block.data.iter().all(|&b| b == 0xAB));
641        assert_eq!(block.decompress().unwrap(), vec![0xABu8; 8]);
642        assert_eq!(cmd.block_source_offsets, vec![44u64]); // 28 (header) + 16 (block header)
643    }
644
645    #[test]
646    fn rejects_negative_file_offset_at_parse() {
647        // A `u64` wire value with the high bit set must surface as
648        // `ParseError::NegativeFileOffset(i64)` — the error preserves the raw
649        // value as the legacy signed reading for diagnostics.
650        let body = make_header(b'A', u64::MAX, 0, b"\0", 0);
651        match parse(&body) {
652            Err(ParseError::NegativeFileOffset(v)) => assert_eq!(v, -1),
653            other => panic!("expected NegativeFileOffset(-1), got {other:?}"),
654        }
655    }
656
657    #[test]
658    fn parses_remove_all_operation() {
659        let body = make_header(b'R', 0, 0, b"\0", 0);
660        let cmd = parse(&body).unwrap();
661        assert!(matches!(cmd.operation, SqpkFileOperation::RemoveAll));
662        assert!(cmd.blocks.is_empty());
663        assert!(cmd.block_source_offsets.is_empty());
664    }
665
666    #[test]
667    fn parses_delete_file_operation() {
668        let body = make_header(b'D', 0, 0, b"sqpack/foo.dat\0", 0);
669        let cmd = parse(&body).unwrap();
670        assert!(matches!(cmd.operation, SqpkFileOperation::DeleteFile));
671        assert_eq!(cmd.path, "sqpack/foo.dat");
672    }
673
674    #[test]
675    fn parses_make_dir_tree_operation() {
676        let body = make_header(b'M', 0, 0, b"sqpack/ex1\0", 0);
677        let cmd = parse(&body).unwrap();
678        assert!(matches!(cmd.operation, SqpkFileOperation::MakeDirTree));
679        assert_eq!(cmd.path, "sqpack/ex1");
680    }
681
682    #[test]
683    fn rejects_unknown_operation() {
684        let body = make_header(b'Z', 0, 0, b"\0", 0);
685        assert!(parse(&body).is_err());
686    }
687
688    fn block_with_sizes(header_size: i32, compressed_size: i32, decompressed_size: i32) -> Vec<u8> {
689        let mut body = make_header(b'A', 0, 0, b"\0", 0);
690        body.extend_from_slice(&header_size.to_le_bytes());
691        body.extend_from_slice(&0u32.to_le_bytes()); // pad
692        body.extend_from_slice(&compressed_size.to_le_bytes());
693        body.extend_from_slice(&decompressed_size.to_le_bytes());
694        body
695    }
696
697    #[test]
698    fn rejects_negative_header_size() {
699        let body = block_with_sizes(-1, 0x7d00, 0);
700        let Err(ParseError::InvalidField { context }) = parse(&body) else {
701            panic!("expected InvalidField for negative header_size");
702        };
703        assert!(
704            context.contains("header_size"),
705            "unexpected context: {context}"
706        );
707    }
708
709    #[test]
710    fn rejects_negative_decompressed_size() {
711        let body = block_with_sizes(16, 0x7d00, -1);
712        let Err(ParseError::InvalidField { context }) = parse(&body) else {
713            panic!("expected InvalidField for negative decompressed_size");
714        };
715        assert!(
716            context.contains("decompressed_size"),
717            "unexpected context: {context}"
718        );
719    }
720
721    #[test]
722    fn rejects_negative_compressed_size() {
723        // is_compressed = (compressed_size != 0x7d00) — pass -1 (not 0x7d00).
724        let body = block_with_sizes(16, -1, 8);
725        let Err(ParseError::InvalidField { context }) = parse(&body) else {
726            panic!("expected InvalidField for negative compressed_size");
727        };
728        assert!(
729            context.contains("compressed_size"),
730            "unexpected context: {context}"
731        );
732    }
733
734    #[test]
735    fn rejects_invalid_utf8_in_path() {
736        // 0xFF is not valid UTF-8 — Utf8Error path on `String::from_utf8`.
737        let body = make_header(b'D', 0, 0, &[0xFFu8], 0);
738        assert!(matches!(parse(&body), Err(ParseError::Utf8Error(_))));
739    }
740
741    #[test]
742    fn decompress_into_uncompressed_writes_data_verbatim() {
743        // Uncompressed branch: w.write_all(&self.data).
744        let block = SqpkCompressedBlock::new(false, 5, b"hello".to_vec());
745        let mut out = Vec::new();
746        block.decompress_into(&mut out).unwrap();
747        assert_eq!(out, b"hello");
748    }
749
750    #[test]
751    fn decompress_into_with_reuses_decompressor_across_blocks() {
752        // Verifies the contract of `decompress_into_with`: the same
753        // `Decompress` instance can be threaded through multiple consecutive
754        // compressed blocks, with `reset` between calls, and produce identical
755        // output to `decompress_into`. This is the apply-layer hot path.
756        use flate2::Compression;
757        use flate2::write::DeflateEncoder;
758        use std::io::Write;
759
760        let payload_a: &[u8] = b"alpha alpha alpha beta beta gamma";
761        let payload_b: &[u8] = b"the quick brown fox jumps over the lazy dog";
762
763        let compress = |raw: &[u8]| -> SqpkCompressedBlock {
764            let mut enc = DeflateEncoder::new(Vec::new(), Compression::default());
765            enc.write_all(raw).unwrap();
766            SqpkCompressedBlock::new(true, raw.len(), enc.finish().unwrap())
767        };
768        let a = compress(payload_a);
769        let b = compress(payload_b);
770
771        let mut state = Decompress::new(false);
772        let mut out_a = Vec::new();
773        a.decompress_into_with(&mut state, &mut out_a).unwrap();
774        assert_eq!(out_a, payload_a, "first block must round-trip");
775
776        let mut out_b = Vec::new();
777        b.decompress_into_with(&mut state, &mut out_b).unwrap();
778        assert_eq!(out_b, payload_b, "reused state must reset and round-trip");
779    }
780
781    #[test]
782    fn decompress_into_with_uncompressed_skips_decompressor() {
783        // The uncompressed branch must never touch the supplied state — it
784        // delegates to `write_all`. Verify the state's `total_in`/`total_out`
785        // are unchanged after the call.
786        let block = SqpkCompressedBlock::new(false, 5, b"hello".to_vec());
787        let mut state = Decompress::new(false);
788        let before_in = state.total_in();
789        let before_out = state.total_out();
790        let mut out = Vec::new();
791        block.decompress_into_with(&mut state, &mut out).unwrap();
792        assert_eq!(out, b"hello");
793        assert_eq!(state.total_in(), before_in);
794        assert_eq!(state.total_out(), before_out);
795    }
796
797    #[test]
798    fn decompress_into_with_propagates_corrupt_stream_error() {
799        // Garbage DEFLATE payload must surface as ParseError::Decompress
800        // rather than panic or loop forever.
801        let block = SqpkCompressedBlock::new(true, 16, vec![0xFFu8; 16]);
802        let mut state = Decompress::new(false);
803        let mut out = Vec::new();
804        assert!(matches!(
805            block.decompress_into_with(&mut state, &mut out),
806            Err(ParseError::Decompress { .. })
807        ));
808    }
809
810    #[test]
811    fn decompress_returns_borrowed_for_uncompressed() {
812        // Cow::Borrowed branch — no allocation, points at the block's data.
813        let block = SqpkCompressedBlock::new(false, 4, b"data".to_vec());
814        let cow = block.decompress().unwrap();
815        assert!(matches!(cow, Cow::Borrowed(_)));
816        assert_eq!(&*cow, b"data");
817    }
818
819    #[test]
820    fn decompress_into_compressed_propagates_decompress_error() {
821        // Garbage DEFLATE payload — the `.map_err(|e| ParseError::Decompress { source: e })?` arm.
822        let block = SqpkCompressedBlock::new(true, 16, vec![0xFFu8; 16]);
823        let mut out = Vec::new();
824        assert!(matches!(
825            block.decompress_into(&mut out),
826            Err(ParseError::Decompress { .. })
827        ));
828        // And via the `decompress()` wrapper — the `?` error arm at line 106.
829        assert!(matches!(
830            block.decompress(),
831            Err(ParseError::Decompress { .. })
832        ));
833    }
834
835    #[test]
836    fn parses_compressed_block() {
837        use flate2::Compression;
838        use flate2::write::DeflateEncoder;
839        use std::io::Write;
840
841        let raw: &[u8] = b"hello compressed world";
842        let mut enc = DeflateEncoder::new(Vec::new(), Compression::default());
843        enc.write_all(raw).unwrap();
844        let compressed = enc.finish().unwrap();
845
846        let header_size: i32 = 16;
847        let compressed_size = compressed.len() as i32;
848        let decompressed_size = raw.len() as i32;
849        let block_len = ((compressed_size as u32 + 143) & !127) as usize;
850        let trailing_pad = block_len - header_size as usize - compressed.len();
851
852        // header bytes: 1+2+8+8+4+2+2+1 = 28 — block starts at offset 28
853        let mut body = make_header(b'A', 0, 0, b"\0", 0);
854        body.extend_from_slice(&header_size.to_le_bytes());
855        body.extend_from_slice(&0u32.to_le_bytes()); // pad
856        body.extend_from_slice(&compressed_size.to_le_bytes());
857        body.extend_from_slice(&decompressed_size.to_le_bytes());
858        body.extend_from_slice(&compressed);
859        body.extend_from_slice(&vec![0u8; trailing_pad]);
860
861        let cmd = parse(&body).unwrap();
862        assert_eq!(cmd.blocks.len(), 1);
863        let block = &cmd.blocks[0];
864        assert!(block.is_compressed);
865        assert_eq!(block.decompressed_size, raw.len());
866        assert_eq!(block.decompress().unwrap(), raw);
867        assert_eq!(cmd.block_source_offsets, vec![44u64]); // 28 (header) + 16 (block header)
868    }
869
870    #[test]
871    fn parse_rejects_oversized_path_len_issue_30() {
872        // Regression for issue #30: a u32 `path_len` from untrusted patch
873        // bytes was fed straight into `Vec::with_capacity`, allowing a
874        // malicious patch to trigger a ~4 GiB allocation and OOM-abort the
875        // process. The parser must now reject such a header with
876        // `InvalidField` before any allocation occurs.
877        //
878        // Original 32-byte fuzz input (from the `parser_sqpk` harness; byte 0
879        // is the harness's sub-command selector, dropped here):
880        //   2c 41 e5 11 00 36 36 36 36 00 00 00 00 00 00 ff
881        //   ff ff ff ff ff ff 00 00 21 00 ac 00 00 00 00 00
882        let body: &[u8] = &[
883            0x41, 0xe5, 0x11, // op=AddFile, alignment
884            0x00, 0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, // file_offset
885            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, // file_size
886            0xff, 0xff, 0xff, 0xff, // path_len = u32::MAX
887            0xff, 0xff, // expansion_id
888            0x00, 0x00, // padding
889            0x21, 0x00, 0xac, 0x00, // remaining body bytes
890        ];
891        assert_eq!(body.len(), 31, "test input is the post-selector body");
892        let err = parse(body).expect_err("oversized path_len must error");
893        assert!(
894            matches!(
895                err,
896                ParseError::InvalidField { context }
897                    if context.contains("path_len")
898            ),
899            "expected InvalidField on oversized path_len, got: {err:?}"
900        );
901    }
902}