Skip to main content

zipatch_rs/chunk/sqpk/
file.rs

1use crate::reader::{PREALLOC_CAP, ReadExt};
2use crate::{ParseError, ParseResult as Result};
3use flate2::read::DeflateDecoder;
4use flate2::{Decompress, FlushDecompress, Status};
5use std::borrow::Cow;
6use std::io::{Cursor, Read, Write};
7
8/// Operation byte of a SQPK `F` command; selects what the command does to
9/// the game install tree.
10///
11/// Encoded as a single ASCII byte in the wire format:
12/// `b'A'` → `AddFile`, `b'R'` → `RemoveAll`, `b'D'` → `DeleteFile`,
13/// `b'M'` → `MakeDirTree`. Any other byte is rejected with
14/// [`ParseError::UnknownFileOperation`].
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum SqpkFileOperation {
17    /// `A` — write the inline compressed-block payload into a file under the
18    /// game install root, creating it (or overwriting it) as needed.
19    ///
20    /// Parent directories are created automatically. If `file_offset` is zero,
21    /// the target file is truncated to zero before writing (full replacement);
22    /// if `file_offset` is non-zero, only the covered range is overwritten.
23    AddFile,
24    /// `R` — delete all files in the expansion folder (`sqpack/<expansion>/`
25    /// and `movie/<expansion>/`) that are not on the keep-list.
26    ///
27    /// Kept unconditionally: `.var` files and `00000.bk2`–`00003.bk2`.
28    /// Files `00004.bk2` and beyond are deleted. `expansion_id` selects
29    /// the target expansion folder.
30    RemoveAll,
31    /// `D` — delete a single file at the path given by `SqpkFile::path`.
32    DeleteFile,
33    /// `M` — create the directory tree at `SqpkFile::path` (equivalent to
34    /// `std::fs::create_dir_all`). Idempotent.
35    MakeDirTree,
36}
37
38/// One block of a [`SqpkFile`] `AddFile` payload, which may be DEFLATE-compressed
39/// or stored raw.
40///
41/// `SqpkFile` payloads are split into a sequence of these blocks. Each block
42/// begins with a 16-byte little-endian header that describes the compressed
43/// and decompressed sizes, followed by the data bytes padded to a 128-byte
44/// boundary.
45///
46/// ## Compression sentinel
47///
48/// The `compressed_size` field in the wire header uses the value `0x7d00`
49/// (decimal **32000**) as a sentinel meaning "this block is not compressed".
50/// Any other value means the data bytes are a raw DEFLATE stream
51/// (no zlib wrapper, no gzip header — just RFC 1951 raw deflate).
52///
53/// ## Wire format of one block (all little-endian)
54///
55/// ```text
56/// ┌─────────────────────────────────────────────────────────────────────┐
57/// │ header_size     : i32 LE   always 16 in practice                   │  bytes 0–3
58/// │ <pad>           : u32 LE   always zero                              │  bytes 4–7
59/// │ compressed_size : i32 LE   byte count of DEFLATE data               │  bytes 8–11
60/// │                             OR 0x7d00 (32000) if uncompressed       │
61/// │ decompressed_size : i32 LE  byte count of decompressed output       │  bytes 12–15
62/// │ data            : [u8]     compressed or raw bytes                  │  bytes 16–…
63/// │ <alignment>     : [u8]     zero-padding to 128-byte boundary        │
64/// └─────────────────────────────────────────────────────────────────────┘
65/// ```
66///
67/// ## 128-byte alignment formula
68///
69/// The total byte count to read for a block's data + alignment is:
70///
71/// ```text
72/// block_len = (data_len + 143) & !127
73/// ```
74///
75/// where `data_len` is `compressed_size` if compressed, or `decompressed_size`
76/// if uncompressed. The constant 143 is `128 - 1 + 16` (subtract the 16-byte
77/// header that is not included in `data_len`, then round up to the next
78/// 128-byte boundary). The number of data bytes actually read is
79/// `block_len - header_size`; the alignment padding is consumed but discarded.
80///
81/// ## `pub(crate)` visibility
82///
83/// `SqpkCompressedBlock` is `pub` so that it appears in rustdoc and can be
84/// named in `SqpkFile::blocks`, but it can only be constructed via
85/// [`new`](SqpkCompressedBlock::new) (for tests) or by parsing a [`SqpkFile`].
86#[derive(Debug)]
87pub struct SqpkCompressedBlock {
88    // true  → data holds raw DEFLATE bytes (compressed_size != 0x7d00)
89    // false → data holds the exact decompressed bytes (compressed_size == 0x7d00)
90    is_compressed: bool,
91    // Expected output size in bytes; used to pre-allocate the decompression buffer.
92    decompressed_size: usize,
93    // Compressed blocks: the raw DEFLATE stream, trimmed to compressed_size bytes
94    //   (alignment padding is consumed by read() but not stored here).
95    // Uncompressed blocks: the exact payload bytes, already stripped of padding.
96    data: Vec<u8>,
97}
98
99impl SqpkCompressedBlock {
100    /// Construct a block directly from its component parts.
101    ///
102    /// This constructor exists primarily for unit tests. Production code
103    /// creates blocks by parsing a [`SqpkFile`] from a patch byte stream.
104    ///
105    /// - `is_compressed`: `true` if `data` is a raw DEFLATE stream.
106    /// - `decompressed_size`: the expected number of bytes after decompression;
107    ///   used to pre-allocate the output buffer in
108    ///   [`decompress`](SqpkCompressedBlock::decompress).
109    /// - `data`: raw compressed bytes or exact uncompressed bytes, depending
110    ///   on `is_compressed`.
111    #[must_use]
112    pub fn new(is_compressed: bool, decompressed_size: usize, data: Vec<u8>) -> Self {
113        Self {
114            is_compressed,
115            decompressed_size,
116            data,
117        }
118    }
119
120    // Parse one block from the reader, consuming header + data + alignment padding.
121    //
122    // Reads the 16-byte little-endian block header, determines whether the block
123    // is compressed (compressed_size != 0x7d00), computes the 128-byte-aligned
124    // total length via (data_len + 143) & !127, then reads exactly that many
125    // bytes minus the header size — leaving the reader positioned at the start
126    // of the next block.
127    fn read<R: Read>(r: &mut R) -> Result<Self> {
128        // 16-byte block header, all fields little-endian:
129        //   i32 header_size  (always 16)
130        //   u32 pad          (always 0)
131        //   i32 compressed_size   (0x7d00 = uncompressed sentinel)
132        //   i32 decompressed_size
133        let header_size_raw = r.read_i32_le()?;
134        r.skip(4)?; // pad — always zero, no semantic content
135        let compressed_size = r.read_i32_le()?;
136        let decompressed_size_raw = r.read_i32_le()?;
137
138        if header_size_raw < 0 {
139            return Err(ParseError::InvalidField {
140                context: "negative header_size in block",
141            });
142        }
143        if decompressed_size_raw < 0 {
144            return Err(ParseError::InvalidField {
145                context: "negative decompressed_size in block",
146            });
147        }
148        // 0x7d00 (32000) is the sentinel for "store raw, not compressed".
149        // Any other value is the byte count of the DEFLATE stream.
150        let is_compressed = compressed_size != 0x7d00;
151        if is_compressed && compressed_size < 0 {
152            return Err(ParseError::InvalidField {
153                context: "negative compressed_size in block",
154            });
155        }
156
157        let header_size = header_size_raw as usize;
158        let decompressed_size = decompressed_size_raw as usize;
159        // data_len is the logical size used for alignment: for compressed blocks
160        // it is the compressed byte count; for uncompressed it is the raw byte count.
161        let data_len = if is_compressed {
162            compressed_size
163        } else {
164            decompressed_size_raw
165        };
166        // Round data_len up to the next 128-byte boundary, accounting for the
167        // 16-byte header that precedes the data in the stream.
168        // Formula: (data_len + 128 - 1 + (header_size=16)) & !127
169        //        = (data_len + 143) & !127
170        let block_len = ((data_len as u32 + 143) & !127u32) as usize;
171        // Underflow guard: a malformed header where `header_size` exceeds the
172        // aligned `block_len` would wrap to a huge size in release builds.
173        let data_region = block_len
174            .checked_sub(header_size)
175            .ok_or(ParseError::InvalidField {
176                context: "block_len smaller than header_size",
177            })?;
178        let data = if is_compressed {
179            // Read the DEFLATE payload plus any alignment padding. For compressed
180            // blocks we store everything (padding included) because DeflateDecoder
181            // stops at the end of the DEFLATE stream before reading into padding.
182            r.read_exact_vec(data_region)?
183        } else {
184            // Uncompressed: read exactly decompressed_size bytes of payload,
185            // then skip any alignment padding so the reader is positioned at
186            // the start of the next block.
187            let padding =
188                data_region
189                    .checked_sub(decompressed_size)
190                    .ok_or(ParseError::InvalidField {
191                        context: "block data region smaller than decompressed_size",
192                    })?;
193            let d = r.read_exact_vec(decompressed_size)?;
194            r.skip(padding as u64)?;
195            d
196        };
197        Ok(SqpkCompressedBlock {
198            is_compressed,
199            decompressed_size,
200            data,
201        })
202    }
203
204    /// Stream the block's decompressed bytes into `w`.
205    ///
206    /// For uncompressed blocks, `w.write_all(&self.data)` is called directly.
207    /// For compressed blocks, the data is piped through [`DeflateDecoder`] (raw
208    /// DEFLATE, RFC 1951 — no zlib or gzip wrapper) before being written.
209    ///
210    /// This is the primary write path used by the apply layer: each block in a
211    /// [`SqpkFile`] `AddFile` operation is streamed into the target file handle
212    /// in sequence.
213    ///
214    /// # Errors
215    ///
216    /// - [`ParseError::Decompress`] — the DEFLATE stream is malformed or
217    ///   truncated.
218    /// - [`ParseError::Io`] — `w.write_all` failed.
219    pub fn decompress_into(&self, w: &mut impl Write) -> Result<()> {
220        if self.is_compressed {
221            std::io::copy(&mut DeflateDecoder::new(self.data.as_slice()), w)
222                .map_err(|e| ParseError::Decompress { source: e })?;
223        } else {
224            w.write_all(&self.data)?;
225        }
226        Ok(())
227    }
228
229    /// Stream the block's decompressed bytes into `w`, reusing a caller-owned
230    /// [`Decompress`] state across blocks.
231    ///
232    /// Equivalent to [`decompress_into`](SqpkCompressedBlock::decompress_into)
233    /// in behaviour and error semantics, but avoids the per-call ~100 KiB
234    /// zlib-state allocation that [`DeflateDecoder::new`] would otherwise
235    /// pay. The apply layer threads a single `Decompress` through every
236    /// block in a multi-block `SqpkFile::AddFile` chunk; uncompressed blocks
237    /// short-circuit to `write_all` and leave the decompressor untouched.
238    ///
239    /// `decompressor` is reset via [`Decompress::reset(false)`](Decompress::reset)
240    /// at the start of every compressed block, so callers may pass an
241    /// already-used state without manually resetting it.
242    ///
243    /// # Errors
244    ///
245    /// - [`ParseError::Decompress`] — the DEFLATE stream is malformed or
246    ///   the manual feed loop made no forward progress (corrupt or truncated
247    ///   payload).
248    /// - [`ParseError::Io`] — `w.write_all` failed.
249    pub fn decompress_into_with(
250        &self,
251        decompressor: &mut Decompress,
252        w: &mut impl Write,
253    ) -> Result<()> {
254        if !self.is_compressed {
255            w.write_all(&self.data)?;
256            return Ok(());
257        }
258
259        // Raw DEFLATE — match the legacy `DeflateDecoder::new(_)` zlib_header=false.
260        decompressor.reset(false);
261        // 8 KiB output buffer matches `std::io::copy`'s default and is plenty
262        // for the per-iteration output the underlying miniz_oxide / zlib-ng
263        // backends emit. Stays on the stack — no allocation per block.
264        let mut out = [0u8; 8 * 1024];
265        let mut input: &[u8] = &self.data;
266        loop {
267            let before_in = decompressor.total_in();
268            let before_out = decompressor.total_out();
269            let status = decompressor
270                .decompress(input, &mut out, FlushDecompress::None)
271                .map_err(|e| ParseError::Decompress {
272                    source: std::io::Error::new(std::io::ErrorKind::InvalidData, e),
273                })?;
274            let consumed = (decompressor.total_in() - before_in) as usize;
275            let produced = (decompressor.total_out() - before_out) as usize;
276            if produced > 0 {
277                w.write_all(&out[..produced])?;
278            }
279            input = &input[consumed..];
280            match status {
281                Status::StreamEnd => return Ok(()),
282                Status::Ok | Status::BufError => {
283                    // Forward progress is required. SqPack DEFLATE blocks are
284                    // self-contained — the trailing alignment padding the parser
285                    // intentionally leaves in `self.data` is past the
286                    // end-of-stream marker, so the decoder must signal
287                    // StreamEnd before exhausting the input. A no-progress loop
288                    // means the payload is corrupt or truncated.
289                    if consumed == 0 && produced == 0 {
290                        return Err(ParseError::Decompress {
291                            source: std::io::Error::new(
292                                std::io::ErrorKind::InvalidData,
293                                "DEFLATE stream made no forward progress",
294                            ),
295                        });
296                    }
297                }
298            }
299        }
300    }
301
302    /// Returns `true` if the block stores a raw DEFLATE stream.
303    ///
304    /// `false` means the block carries already-decompressed bytes (the
305    /// `compressed_size == 0x7d00` sentinel).
306    #[must_use]
307    pub fn is_compressed(&self) -> bool {
308        self.is_compressed
309    }
310
311    /// Returns the block's expected decompressed length in bytes.
312    #[must_use]
313    pub fn decompressed_size(&self) -> usize {
314        self.decompressed_size
315    }
316
317    /// Returns the byte length of the block's stored `data` slab.
318    ///
319    /// For compressed blocks this is the length of the DEFLATE payload as the
320    /// parser stored it (which may include trailing 128-byte alignment padding
321    /// that the decoder ignores past the end-of-stream marker). For
322    /// uncompressed blocks it equals [`decompressed_size`](Self::decompressed_size).
323    #[must_use]
324    pub fn data_len(&self) -> usize {
325        self.data.len()
326    }
327
328    /// Return the block's decompressed bytes as a [`Cow`].
329    ///
330    /// Uncompressed blocks return `Cow::Borrowed(&self.data)` — a zero-copy
331    /// borrow into the block's existing buffer. Compressed blocks decompress
332    /// into a newly allocated `Vec` and return `Cow::Owned`.
333    ///
334    /// Use [`decompress_into`](SqpkCompressedBlock::decompress_into) instead
335    /// when writing to a file handle, to avoid the intermediate allocation.
336    ///
337    /// # Errors
338    ///
339    /// - [`ParseError::Decompress`] — the DEFLATE stream is malformed or
340    ///   truncated (compressed blocks only).
341    pub fn decompress(&self) -> crate::ParseResult<Cow<'_, [u8]>> {
342        if self.is_compressed {
343            // Cap pre-alloc: `decompressed_size` originates from the parsed
344            // block header. See [`crate::reader::PREALLOC_CAP`] for rationale.
345            let mut out = Vec::with_capacity(self.decompressed_size.min(PREALLOC_CAP));
346            self.decompress_into(&mut out)?;
347            Ok(Cow::Owned(out))
348        } else {
349            Ok(Cow::Borrowed(&self.data))
350        }
351    }
352}
353
354/// SQPK `F` command body: a file-level operation on the game install tree.
355///
356/// Unlike the block-oriented commands (`A`, `D`, `E`) that target `SqPack`
357/// archive internals, `F` operates on whole files in the install directory.
358/// The operation to perform is selected by [`operation`](SqpkFile::operation).
359///
360/// ## Wire format
361///
362/// ```text
363/// ┌──────────────────────────────────────────────────────────────────────────┐
364/// │ operation    : u8      b'A', b'R', b'D', or b'M'                        │  byte 0
365/// │ <padding>    : [u8; 2] (always zero)                                     │  bytes 1–2
366/// │ file_offset  : u64 BE  destination byte offset within the target file    │  bytes 3–10
367/// │ file_size    : u64 BE  declared size of the target file after operation  │  bytes 11–18
368/// │ path_len     : u32 BE  byte length of the path field (including NUL)     │  bytes 19–22
369/// │ expansion_id : u16 BE  expansion folder selector for `RemoveAll`         │  bytes 23–24
370/// │ <padding>    : [u8; 2] (always zero)                                     │  bytes 25–26
371/// │ path         : [u8; path_len]  NUL-terminated UTF-8 path                │  bytes 27–…
372/// │ [blocks]     : SqpkCompressedBlock…  (only for `AddFile`)                │
373/// └──────────────────────────────────────────────────────────────────────────┘
374/// ```
375///
376/// `file_offset` and `file_size` are stored as big-endian `u64` in the wire
377/// format. `file_offset` is range-checked against `i64::MAX` at parse time —
378/// values with the high bit set (which would round-trip as a negative `i64`
379/// in the legacy wire interpretation) are rejected with
380/// [`ParseError::NegativeFileOffset`] before the chunk is constructed.
381///
382/// The NUL terminator in `path` is stripped during parsing; [`path`](SqpkFile::path)
383/// always contains a clean UTF-8 string.
384///
385/// For `AddFile` operations the remaining bytes in the command body after the
386/// path form a sequence of [`SqpkCompressedBlock`]s (see that type's
387/// documentation for the block wire format). For all other operations the block
388/// list is empty.
389///
390/// ## Reference
391///
392/// # Errors
393///
394/// Parsing returns a [`crate::ParseError`] if:
395/// - The operation byte is not `b'A'`, `b'R'`, `b'D'`, or `b'M'`
396///   → [`ParseError::UnknownFileOperation`].
397/// - The path bytes are not valid UTF-8 → [`ParseError::Utf8Error`].
398/// - A block header contains a negative `header_size` or `decompressed_size`,
399///   or a negative non-sentinel `compressed_size`
400///   → [`ParseError::InvalidField`].
401/// - The body is too short → [`ParseError::Io`].
402#[derive(Debug)]
403pub struct SqpkFile {
404    /// The file operation to perform.
405    pub operation: SqpkFileOperation,
406    /// Destination byte offset within the target file.
407    ///
408    /// For `AddFile`: if zero, the target file is truncated to zero before
409    /// writing (complete replacement); if positive, writing begins at this
410    /// byte offset in the existing file. Values with the high bit set in the
411    /// wire `u64` are rejected at parse time with
412    /// [`ParseError::NegativeFileOffset`], so every value reaching here fits
413    /// in an `i64`.
414    ///
415    /// Unused by `RemoveAll`, `DeleteFile`, and `MakeDirTree`.
416    pub file_offset: u64,
417    /// Declared total size of the target file after the operation, in bytes.
418    ///
419    /// Informational; the apply layer does not use this to pre-allocate or
420    /// truncate the file (truncation is controlled by `file_offset == 0`).
421    pub file_size: u64,
422    /// Expansion folder selector used by `RemoveAll`.
423    ///
424    /// `0` → `ffxiv` (base game), `n > 0` → `ex<n>`. Corresponds to the
425    /// high byte of `sub_id` in block-oriented commands.
426    pub expansion_id: u16,
427    /// Relative path to the target file or directory under the game install root.
428    ///
429    /// NUL terminator is stripped during parsing. For `AddFile` / `DeleteFile`
430    /// this is joined with the install root via `generic_path`. For `MakeDirTree`
431    /// it is the directory tree to create.
432    pub path: String,
433    /// Byte offset of each block's data payload — measured from the start of
434    /// the SQPK command body slice — after skipping the block's 16-byte header.
435    ///
436    /// `block_source_offsets[i]` corresponds to `blocks[i]`. Adding the chunk's
437    /// absolute position in the patch file to this offset gives the patch-file
438    /// byte offset where the block's data begins, enabling `IndexedZiPatch`
439    /// random-access reads that do not need to decompress the full stream.
440    ///
441    /// Empty for all operations other than `AddFile`.
442    pub block_source_offsets: Vec<u64>,
443    /// Inline compressed-or-raw block payloads that make up the file content.
444    ///
445    /// Only populated for `AddFile`; empty for `RemoveAll`, `DeleteFile`, and
446    /// `MakeDirTree`. Each block is decompressed in sequence into the target
447    /// file by the apply layer. See [`SqpkCompressedBlock`] for the block wire
448    /// format and DEFLATE discrimination logic.
449    pub blocks: Vec<SqpkCompressedBlock>,
450}
451
452// Parse a SQPK 'F' command body into a SqpkFile.
453//
454// Reads the fixed-size header fields (operation, offsets, sizes, path),
455// then — for AddFile only — iterates over the remaining bytes in `body`,
456// parsing SqpkCompressedBlock entries until the cursor reaches the end.
457// The block source offsets are recorded as the cursor position + 16 (to
458// skip the block's own 16-byte header) before each SqpkCompressedBlock::read
459// call.
460pub(crate) fn parse(body: &[u8]) -> Result<SqpkFile> {
461    let mut c = Cursor::new(body);
462
463    let operation = match c.read_u8()? {
464        b'A' => SqpkFileOperation::AddFile,
465        b'R' => SqpkFileOperation::RemoveAll,
466        b'D' => SqpkFileOperation::DeleteFile,
467        b'M' => SqpkFileOperation::MakeDirTree,
468        b => {
469            return Err(ParseError::UnknownFileOperation(b));
470        }
471    };
472    c.skip(2)?; // alignment
473
474    let file_offset_raw = c.read_u64_be()?;
475    // The wire field is u64 BE, but the legacy interpretation treated it as
476    // a signed i64 — values with the high bit set surface as ParseError so
477    // the public `file_offset: u64` only ever carries non-negative offsets
478    // (i.e. fits in i64 as well). The error variant keeps the raw value
479    // re-encoded as the i64 the legacy reader would have produced.
480    if file_offset_raw > i64::MAX as u64 {
481        return Err(ParseError::NegativeFileOffset(file_offset_raw as i64));
482    }
483    let file_offset = file_offset_raw;
484    let file_size = c.read_u64_be()?;
485    let path_len = c.read_u32_be()? as usize;
486    let expansion_id = c.read_u16_be()?;
487    c.skip(2)?; // padding
488
489    // Cap path_len against remaining body bytes — without this an attacker
490    // can declare a 4 GiB path and OOM the patcher (issue #30).
491    let remaining = body.len().saturating_sub(c.position() as usize);
492    if path_len > remaining {
493        return Err(ParseError::InvalidField {
494            context: "SqpkFile path_len exceeds remaining body bytes",
495        });
496    }
497    let path_bytes = c.read_exact_vec(path_len)?;
498    let path = String::from_utf8(path_bytes)
499        .map(|s| s.trim_end_matches('\0').to_owned())
500        .map_err(ParseError::Utf8Error)?;
501
502    let (blocks, block_source_offsets) = if matches!(operation, SqpkFileOperation::AddFile) {
503        let mut blocks = Vec::new();
504        let mut offsets = Vec::new();
505        while (c.position() as usize) < body.len() {
506            // Record offset of the data payload (after the fixed 16-byte block header).
507            offsets.push(c.position() + 16);
508            blocks.push(SqpkCompressedBlock::read(&mut c)?);
509        }
510        (blocks, offsets)
511    } else {
512        (Vec::new(), Vec::new())
513    };
514
515    Ok(SqpkFile {
516        operation,
517        file_offset,
518        file_size,
519        expansion_id,
520        path,
521        block_source_offsets,
522        blocks,
523    })
524}
525
526#[cfg(test)]
527mod tests {
528    use super::*;
529
530    fn make_header(
531        op: u8,
532        file_offset: u64,
533        file_size: u64,
534        path: &[u8],
535        expansion_id: u16,
536    ) -> Vec<u8> {
537        let mut body = Vec::new();
538        body.push(op);
539        body.extend_from_slice(&[0u8; 2]); // alignment
540        body.extend_from_slice(&file_offset.to_be_bytes());
541        body.extend_from_slice(&file_size.to_be_bytes());
542        body.extend_from_slice(&(path.len() as u32).to_be_bytes());
543        body.extend_from_slice(&expansion_id.to_be_bytes());
544        body.extend_from_slice(&[0u8; 2]); // padding
545        body.extend_from_slice(path);
546        body
547    }
548
549    #[test]
550    fn parses_add_file_no_blocks() {
551        let body = make_header(b'A', 0, 512, b"test\0", 1);
552        let cmd = parse(&body).unwrap();
553        assert!(matches!(cmd.operation, SqpkFileOperation::AddFile));
554        assert_eq!(cmd.file_offset, 0);
555        assert_eq!(cmd.file_size, 512);
556        assert_eq!(cmd.expansion_id, 1);
557        assert_eq!(cmd.path, "test");
558        assert!(cmd.blocks.is_empty());
559        assert!(cmd.block_source_offsets.is_empty());
560    }
561
562    #[test]
563    fn parses_add_file_uncompressed_block() {
564        // block_len = ((8 + 143) & !127) = 128; read 8 data bytes + skip 104 padding
565        let mut body = make_header(b'A', 0, 0, b"\0", 0);
566        // header bytes: 1+2+8+8+4+2+2+1 = 28 — block starts at offset 28
567        body.extend_from_slice(&16i32.to_le_bytes()); // header_size
568        body.extend_from_slice(&0u32.to_le_bytes()); // pad
569        body.extend_from_slice(&0x7d00i32.to_le_bytes()); // compressed_size = uncompressed sentinel
570        body.extend_from_slice(&8i32.to_le_bytes()); // decompressed_size
571        body.extend_from_slice(&[0xABu8; 8]); // data
572        body.extend_from_slice(&[0u8; 104]); // alignment padding
573
574        let cmd = parse(&body).unwrap();
575        assert_eq!(cmd.blocks.len(), 1);
576        let block = &cmd.blocks[0];
577        assert!(!block.is_compressed);
578        assert_eq!(block.decompressed_size, 8);
579        assert_eq!(block.data.len(), 8);
580        assert!(block.data.iter().all(|&b| b == 0xAB));
581        assert_eq!(block.decompress().unwrap(), vec![0xABu8; 8]);
582        assert_eq!(cmd.block_source_offsets, vec![44u64]); // 28 (header) + 16 (block header)
583    }
584
585    #[test]
586    fn rejects_negative_file_offset_at_parse() {
587        // A `u64` wire value with the high bit set must surface as
588        // `ParseError::NegativeFileOffset(i64)` — the error preserves the raw
589        // value as the legacy signed reading for diagnostics.
590        let body = make_header(b'A', u64::MAX, 0, b"\0", 0);
591        match parse(&body) {
592            Err(ParseError::NegativeFileOffset(v)) => assert_eq!(v, -1),
593            other => panic!("expected NegativeFileOffset(-1), got {other:?}"),
594        }
595    }
596
597    #[test]
598    fn parses_remove_all_operation() {
599        let body = make_header(b'R', 0, 0, b"\0", 0);
600        let cmd = parse(&body).unwrap();
601        assert!(matches!(cmd.operation, SqpkFileOperation::RemoveAll));
602        assert!(cmd.blocks.is_empty());
603        assert!(cmd.block_source_offsets.is_empty());
604    }
605
606    #[test]
607    fn parses_delete_file_operation() {
608        let body = make_header(b'D', 0, 0, b"sqpack/foo.dat\0", 0);
609        let cmd = parse(&body).unwrap();
610        assert!(matches!(cmd.operation, SqpkFileOperation::DeleteFile));
611        assert_eq!(cmd.path, "sqpack/foo.dat");
612    }
613
614    #[test]
615    fn parses_make_dir_tree_operation() {
616        let body = make_header(b'M', 0, 0, b"sqpack/ex1\0", 0);
617        let cmd = parse(&body).unwrap();
618        assert!(matches!(cmd.operation, SqpkFileOperation::MakeDirTree));
619        assert_eq!(cmd.path, "sqpack/ex1");
620    }
621
622    #[test]
623    fn rejects_unknown_operation() {
624        let body = make_header(b'Z', 0, 0, b"\0", 0);
625        assert!(parse(&body).is_err());
626    }
627
628    fn block_with_sizes(header_size: i32, compressed_size: i32, decompressed_size: i32) -> Vec<u8> {
629        let mut body = make_header(b'A', 0, 0, b"\0", 0);
630        body.extend_from_slice(&header_size.to_le_bytes());
631        body.extend_from_slice(&0u32.to_le_bytes()); // pad
632        body.extend_from_slice(&compressed_size.to_le_bytes());
633        body.extend_from_slice(&decompressed_size.to_le_bytes());
634        body
635    }
636
637    #[test]
638    fn rejects_negative_header_size() {
639        let body = block_with_sizes(-1, 0x7d00, 0);
640        let Err(ParseError::InvalidField { context }) = parse(&body) else {
641            panic!("expected InvalidField for negative header_size");
642        };
643        assert!(
644            context.contains("header_size"),
645            "unexpected context: {context}"
646        );
647    }
648
649    #[test]
650    fn rejects_negative_decompressed_size() {
651        let body = block_with_sizes(16, 0x7d00, -1);
652        let Err(ParseError::InvalidField { context }) = parse(&body) else {
653            panic!("expected InvalidField for negative decompressed_size");
654        };
655        assert!(
656            context.contains("decompressed_size"),
657            "unexpected context: {context}"
658        );
659    }
660
661    #[test]
662    fn rejects_negative_compressed_size() {
663        // is_compressed = (compressed_size != 0x7d00) — pass -1 (not 0x7d00).
664        let body = block_with_sizes(16, -1, 8);
665        let Err(ParseError::InvalidField { context }) = parse(&body) else {
666            panic!("expected InvalidField for negative compressed_size");
667        };
668        assert!(
669            context.contains("compressed_size"),
670            "unexpected context: {context}"
671        );
672    }
673
674    #[test]
675    fn rejects_invalid_utf8_in_path() {
676        // 0xFF is not valid UTF-8 — Utf8Error path on `String::from_utf8`.
677        let body = make_header(b'D', 0, 0, &[0xFFu8], 0);
678        assert!(matches!(parse(&body), Err(ParseError::Utf8Error(_))));
679    }
680
681    #[test]
682    fn decompress_into_uncompressed_writes_data_verbatim() {
683        // Uncompressed branch: w.write_all(&self.data).
684        let block = SqpkCompressedBlock::new(false, 5, b"hello".to_vec());
685        let mut out = Vec::new();
686        block.decompress_into(&mut out).unwrap();
687        assert_eq!(out, b"hello");
688    }
689
690    #[test]
691    fn decompress_into_with_reuses_decompressor_across_blocks() {
692        // Verifies the contract of `decompress_into_with`: the same
693        // `Decompress` instance can be threaded through multiple consecutive
694        // compressed blocks, with `reset` between calls, and produce identical
695        // output to `decompress_into`. This is the apply-layer hot path.
696        use flate2::Compression;
697        use flate2::write::DeflateEncoder;
698        use std::io::Write;
699
700        let payload_a: &[u8] = b"alpha alpha alpha beta beta gamma";
701        let payload_b: &[u8] = b"the quick brown fox jumps over the lazy dog";
702
703        let compress = |raw: &[u8]| -> SqpkCompressedBlock {
704            let mut enc = DeflateEncoder::new(Vec::new(), Compression::default());
705            enc.write_all(raw).unwrap();
706            SqpkCompressedBlock::new(true, raw.len(), enc.finish().unwrap())
707        };
708        let a = compress(payload_a);
709        let b = compress(payload_b);
710
711        let mut state = Decompress::new(false);
712        let mut out_a = Vec::new();
713        a.decompress_into_with(&mut state, &mut out_a).unwrap();
714        assert_eq!(out_a, payload_a, "first block must round-trip");
715
716        let mut out_b = Vec::new();
717        b.decompress_into_with(&mut state, &mut out_b).unwrap();
718        assert_eq!(out_b, payload_b, "reused state must reset and round-trip");
719    }
720
721    #[test]
722    fn decompress_into_with_uncompressed_skips_decompressor() {
723        // The uncompressed branch must never touch the supplied state — it
724        // delegates to `write_all`. Verify the state's `total_in`/`total_out`
725        // are unchanged after the call.
726        let block = SqpkCompressedBlock::new(false, 5, b"hello".to_vec());
727        let mut state = Decompress::new(false);
728        let before_in = state.total_in();
729        let before_out = state.total_out();
730        let mut out = Vec::new();
731        block.decompress_into_with(&mut state, &mut out).unwrap();
732        assert_eq!(out, b"hello");
733        assert_eq!(state.total_in(), before_in);
734        assert_eq!(state.total_out(), before_out);
735    }
736
737    #[test]
738    fn decompress_into_with_propagates_corrupt_stream_error() {
739        // Garbage DEFLATE payload must surface as ParseError::Decompress
740        // rather than panic or loop forever.
741        let block = SqpkCompressedBlock::new(true, 16, vec![0xFFu8; 16]);
742        let mut state = Decompress::new(false);
743        let mut out = Vec::new();
744        assert!(matches!(
745            block.decompress_into_with(&mut state, &mut out),
746            Err(ParseError::Decompress { .. })
747        ));
748    }
749
750    #[test]
751    fn decompress_returns_borrowed_for_uncompressed() {
752        // Cow::Borrowed branch — no allocation, points at the block's data.
753        let block = SqpkCompressedBlock::new(false, 4, b"data".to_vec());
754        let cow = block.decompress().unwrap();
755        assert!(matches!(cow, Cow::Borrowed(_)));
756        assert_eq!(&*cow, b"data");
757    }
758
759    #[test]
760    fn decompress_into_compressed_propagates_decompress_error() {
761        // Garbage DEFLATE payload — the `.map_err(|e| ParseError::Decompress { source: e })?` arm.
762        let block = SqpkCompressedBlock::new(true, 16, vec![0xFFu8; 16]);
763        let mut out = Vec::new();
764        assert!(matches!(
765            block.decompress_into(&mut out),
766            Err(ParseError::Decompress { .. })
767        ));
768        // And via the `decompress()` wrapper — the `?` error arm at line 106.
769        assert!(matches!(
770            block.decompress(),
771            Err(ParseError::Decompress { .. })
772        ));
773    }
774
775    #[test]
776    fn parses_compressed_block() {
777        use flate2::Compression;
778        use flate2::write::DeflateEncoder;
779        use std::io::Write;
780
781        let raw: &[u8] = b"hello compressed world";
782        let mut enc = DeflateEncoder::new(Vec::new(), Compression::default());
783        enc.write_all(raw).unwrap();
784        let compressed = enc.finish().unwrap();
785
786        let header_size: i32 = 16;
787        let compressed_size = compressed.len() as i32;
788        let decompressed_size = raw.len() as i32;
789        let block_len = ((compressed_size as u32 + 143) & !127) as usize;
790        let trailing_pad = block_len - header_size as usize - compressed.len();
791
792        // header bytes: 1+2+8+8+4+2+2+1 = 28 — block starts at offset 28
793        let mut body = make_header(b'A', 0, 0, b"\0", 0);
794        body.extend_from_slice(&header_size.to_le_bytes());
795        body.extend_from_slice(&0u32.to_le_bytes()); // pad
796        body.extend_from_slice(&compressed_size.to_le_bytes());
797        body.extend_from_slice(&decompressed_size.to_le_bytes());
798        body.extend_from_slice(&compressed);
799        body.extend_from_slice(&vec![0u8; trailing_pad]);
800
801        let cmd = parse(&body).unwrap();
802        assert_eq!(cmd.blocks.len(), 1);
803        let block = &cmd.blocks[0];
804        assert!(block.is_compressed);
805        assert_eq!(block.decompressed_size, raw.len());
806        assert_eq!(block.decompress().unwrap(), raw);
807        assert_eq!(cmd.block_source_offsets, vec![44u64]); // 28 (header) + 16 (block header)
808    }
809
810    #[test]
811    fn parse_rejects_oversized_path_len_issue_30() {
812        // Regression for issue #30: a u32 `path_len` from untrusted patch
813        // bytes was fed straight into `Vec::with_capacity`, allowing a
814        // malicious patch to trigger a ~4 GiB allocation and OOM-abort the
815        // process. The parser must now reject such a header with
816        // `InvalidField` before any allocation occurs.
817        //
818        // Original 32-byte fuzz input (from the `parser_sqpk` harness; byte 0
819        // is the harness's sub-command selector, dropped here):
820        //   2c 41 e5 11 00 36 36 36 36 00 00 00 00 00 00 ff
821        //   ff ff ff ff ff ff 00 00 21 00 ac 00 00 00 00 00
822        let body: &[u8] = &[
823            0x41, 0xe5, 0x11, // op=AddFile, alignment
824            0x00, 0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, // file_offset
825            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, // file_size
826            0xff, 0xff, 0xff, 0xff, // path_len = u32::MAX
827            0xff, 0xff, // expansion_id
828            0x00, 0x00, // padding
829            0x21, 0x00, 0xac, 0x00, // remaining body bytes
830        ];
831        assert_eq!(body.len(), 31, "test input is the post-selector body");
832        let err = parse(body).expect_err("oversized path_len must error");
833        assert!(
834            matches!(
835                err,
836                ParseError::InvalidField { context }
837                    if context.contains("path_len")
838            ),
839            "expected InvalidField on oversized path_len, got: {err:?}"
840        );
841    }
842}