zipatch-rs 1.2.0

Parser for FFXIV ZiPatch patch files
Documentation
use std::io::{self, Read};

use crate::Result;

/// Upper bound on the bytes pre-allocated for a size-hint `Vec` whose size
/// comes from an attacker-controlled length field. Genuine large reads grow
/// the `Vec` incrementally via `read_to_end`; absurd hints paired with a
/// short input fall through to the truncation check without an intermediate
/// multi-gigabyte allocation. See issue #30 for the fuzz finding that
/// motivated this cap.
pub(crate) const PREALLOC_CAP: usize = 64 * 1024;

/// Extension trait that adds typed binary reads on top of [`std::io::Read`].
///
/// This trait is crate-internal — it is implemented for all `R: Read` types
/// via a blanket `impl` and is only called from within the parsing layer
/// ([`crate::chunk`]). Consumers of the public API never need to import or
/// call these methods directly.
///
/// All methods that read numeric types perform a single `read_exact` call and
/// interpret the resulting bytes according to the endianness named in the
/// method. On truncation (fewer bytes available than required), they return
/// [`ZiPatchError::Io`](crate::ZiPatchError::Io) wrapping an
/// [`std::io::ErrorKind::UnexpectedEof`] error.
pub trait ReadExt: Read {
    /// Read a single unsigned byte.
    fn read_u8(&mut self) -> Result<u8>;

    /// Read a big-endian `u16` (2 bytes, most-significant byte first).
    fn read_u16_be(&mut self) -> Result<u16>;

    /// Read a big-endian `u32` (4 bytes, most-significant byte first).
    ///
    /// Used for chunk `body_len` fields and most unsigned 32-bit quantities in
    /// the `ZiPatch` wire format.
    fn read_u32_be(&mut self) -> Result<u32>;

    /// Read a big-endian `i32` (4 bytes, most-significant byte first, two's
    /// complement signed).
    fn read_i32_be(&mut self) -> Result<i32>;

    /// Read a little-endian `i32` (4 bytes, least-significant byte first, two's
    /// complement signed).
    ///
    /// Used for the few fields in the format that are specified as little-endian
    /// signed integers (e.g. some `SqpkFile` offset fields in older chunk
    /// parsers).
    fn read_i32_le(&mut self) -> Result<i32>;

    /// Read a big-endian `u64` (8 bytes, most-significant byte first).
    ///
    /// Used for large size and offset fields such as `deleted_data_size` and
    /// `seek_count` in `SqpkTargetInfo`.
    fn read_u64_be(&mut self) -> Result<u64>;

    /// Read exactly `n` bytes into a freshly allocated `Vec<u8>`.
    ///
    /// Reserves capacity for `n` bytes without zero-initialising them, then
    /// fills the buffer via `read_to_end` over a [`Read::take`] adapter limited
    /// to `n` bytes. A final length check returns
    /// [`std::io::ErrorKind::UnexpectedEof`] if the source ran short, preserving
    /// the exact-`n` semantics callers expect.
    ///
    /// Used for reading chunk bodies (after the framing header) and for the
    /// initial 12-byte magic check in [`ZiPatchReader::new`](crate::ZiPatchReader::new).
    fn read_exact_vec(&mut self, n: usize) -> Result<Vec<u8>>;

    /// Discard exactly `n` bytes from the stream.
    ///
    /// Implemented with [`io::copy`] and [`Read::take`] into [`io::sink`] so
    /// that it works on any `Read` source, including non-seekable streams.
    /// If the source reaches EOF before `n` bytes have been consumed, the
    /// method returns [`ZiPatchError::Io`](crate::ZiPatchError::Io) with kind
    /// [`std::io::ErrorKind::UnexpectedEof`].
    ///
    /// Used to skip over padding and reserved fields in chunk bodies without
    /// allocating a discard buffer.
    fn skip(&mut self, n: u64) -> Result<()>;
}

impl<R: Read> ReadExt for R {
    fn read_u8(&mut self) -> Result<u8> {
        let mut buf = [0u8; 1];
        self.read_exact(&mut buf)?;
        Ok(buf[0])
    }

    fn read_u16_be(&mut self) -> Result<u16> {
        let mut buf = [0u8; 2];
        self.read_exact(&mut buf)?;
        Ok(u16::from_be_bytes(buf))
    }

    fn read_u32_be(&mut self) -> Result<u32> {
        let mut buf = [0u8; 4];
        self.read_exact(&mut buf)?;
        Ok(u32::from_be_bytes(buf))
    }

    fn read_i32_be(&mut self) -> Result<i32> {
        let mut buf = [0u8; 4];
        self.read_exact(&mut buf)?;
        Ok(i32::from_be_bytes(buf))
    }

    fn read_i32_le(&mut self) -> Result<i32> {
        let mut buf = [0u8; 4];
        self.read_exact(&mut buf)?;
        Ok(i32::from_le_bytes(buf))
    }

    fn read_u64_be(&mut self) -> Result<u64> {
        let mut buf = [0u8; 8];
        self.read_exact(&mut buf)?;
        Ok(u64::from_be_bytes(buf))
    }

    fn read_exact_vec(&mut self, n: usize) -> Result<Vec<u8>> {
        let mut buf = Vec::with_capacity(n.min(PREALLOC_CAP));
        self.by_ref().take(n as u64).read_to_end(&mut buf)?;
        if buf.len() < n {
            return Err(io::Error::new(
                io::ErrorKind::UnexpectedEof,
                "read_exact_vec: unexpected EOF",
            )
            .into());
        }
        Ok(buf)
    }

    fn skip(&mut self, n: u64) -> Result<()> {
        let consumed = io::copy(&mut self.by_ref().take(n), &mut io::sink())?;
        if consumed < n {
            return Err(
                io::Error::new(io::ErrorKind::UnexpectedEof, "skip: unexpected EOF").into(),
            );
        }
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::ReadExt;
    use std::io::Cursor;

    // --- EOF / partial-read errors ---

    #[test]
    fn read_u8_eof() {
        assert!(Cursor::new([]).read_u8().is_err());
    }

    #[test]
    fn read_u16_be_truncated() {
        assert!(Cursor::new([0x12u8]).read_u16_be().is_err());
    }

    #[test]
    fn read_u32_be_truncated() {
        assert!(Cursor::new([0x01u8, 0x02, 0x03]).read_u32_be().is_err());
    }

    #[test]
    fn read_u64_be_truncated() {
        assert!(
            Cursor::new([0x01u8, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07])
                .read_u64_be()
                .is_err()
        );
    }

    #[test]
    fn read_exact_vec_truncated() {
        assert!(Cursor::new(b"hi!!" as &[u8]).read_exact_vec(5).is_err());
    }

    // --- Endianness-distinguishing ---

    #[test]
    fn read_u32_be_endian() {
        // LE would give 0x04030201
        assert_eq!(
            Cursor::new([0x01u8, 0x02, 0x03, 0x04])
                .read_u32_be()
                .unwrap(),
            0x0102_0304
        );
    }

    #[test]
    fn read_i32_le_endian() {
        // BE would give 0x04030201
        assert_eq!(
            Cursor::new([0x04u8, 0x03, 0x02, 0x01])
                .read_i32_le()
                .unwrap(),
            0x0102_0304
        );
    }

    #[test]
    fn read_u64_be_endian() {
        // LE would give 0x0807060504030201
        assert_eq!(
            Cursor::new([0x01u8, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08])
                .read_u64_be()
                .unwrap(),
            0x0102_0304_0506_0708
        );
    }

    // --- Sign extension ---

    #[test]
    fn read_i32_be_min() {
        // u32 read would give u32::MAX >> 1 + 1, not i32::MIN
        assert_eq!(
            Cursor::new([0x80u8, 0x00, 0x00, 0x00])
                .read_i32_be()
                .unwrap(),
            i32::MIN
        );
    }

    #[test]
    fn read_i32_le_min() {
        assert_eq!(
            Cursor::new([0x00u8, 0x00, 0x00, 0x80])
                .read_i32_le()
                .unwrap(),
            i32::MIN
        );
    }

    // --- skip() edge cases ---

    #[test]
    fn skip_zero() {
        let mut cur = Cursor::new([1u8, 2]);
        cur.skip(0).unwrap();
        assert_eq!(cur.read_u8().unwrap(), 1);
    }

    #[test]
    fn skip_advances_position() {
        let mut cur = Cursor::new([1u8, 2, 3, 4, 5]);
        cur.skip(3).unwrap();
        assert_eq!(cur.read_u8().unwrap(), 4);
    }

    #[test]
    fn skip_past_eof() {
        let mut cur = Cursor::new([1u8, 2, 3, 4, 5]);
        assert!(cur.skip(100).is_err());
    }

    // --- read_exact_vec edge case ---

    #[test]
    fn read_exact_vec_empty() {
        assert_eq!(
            Cursor::new(b"hello" as &[u8]).read_exact_vec(0).unwrap(),
            b""
        );
    }

    // --- Truncated error paths for methods without truncation tests ---

    #[test]
    fn read_i32_be_truncated() {
        assert!(Cursor::new([0x01u8, 0x02]).read_i32_be().is_err());
    }

    #[test]
    fn read_i32_le_truncated() {
        assert!(Cursor::new([0x01u8, 0x02]).read_i32_le().is_err());
    }
}