libgm 0.5.0

A tool for modding, unpacking and decompiling GameMaker games
Documentation
use std::collections::HashMap;

use crate::gml::instruction::InstanceType;
use crate::prelude::*;
use crate::util::assert;
use crate::wad::chunk::ChunkName;
use crate::wad::data::Endianness;
use crate::wad::deserialize::ParsingOptions;
use crate::wad::deserialize::chunk::ChunkBounds;
use crate::wad::deserialize::chunk::ChunkMap;
use crate::wad::elements::GMElement;
use crate::wad::elements::function::GMFunction;
use crate::wad::elements::general_info::GMGeneralInfo;
use crate::wad::elements::texture_page_item::GMTexturePageItem;
use crate::wad::elements::variable::GMVariable;
use crate::wad::reference::GMRef;
use crate::wad::version::GMVersion;
use crate::wad::version::GMVersionReq;

#[derive(Debug)]
pub struct DataReader<'a> {
    /// The raw data buffer belonging to the GameMaker data file which is
    /// currently being parsed.
    data: &'a [u8],

    /// The current read position within the data buffer.
    /// Reading data will be read from this position; incrementing it.
    pub cur_pos: u32,

    /// The GameMaker version specified by GEN8.
    /// The "actual" version will be detected later and stored in
    /// `general_info.version`.
    pub specified_version: GMVersion,

    /// How many null bytes of padding should be at the end of every chunk
    /// (except the last one). Only relevant in certain GameMaker versions.
    /// Defaults to 16, but will be set to 4 or 1 if detected.
    pub chunk_padding: u32,

    /// Indicates the data file's byte endianness.
    /// In most cases (and assumed by default), this is set to little-endian.
    /// Big endian is an edge case for certain target platforms (e.g. PS3 or
    /// Xbox 360).
    pub endianness: Endianness,

    /// Map of all chunks specified by `FORM`; indexed by chunk name.
    /// Read chunks will be removed from this `HashMap` when calling
    /// [`DataReader::read_chunk`]. May contain unknown chunks (if there is
    /// a GameMaker update, for example).
    pub chunks: ChunkMap,

    /// Metadata about the currently parsed chunk of data.
    /// This includes the chunk's name, start position, and end position within
    /// the data buffer. When reading data, these bounds are checked to
    /// ensure the read operation stays within the chunk.
    ///
    /// **Safety Warning**: If the chunk's start/end positions are set
    /// incorrectly, the program becomes memory unsafe.
    pub chunk: ChunkBounds,

    /// The name of the last chunk in the data file.
    /// Is properly initialized after parsing `FORM`.
    pub last_chunk: ChunkName,

    /// General info about this data file. Includes game name, GameMaker Version
    /// and WAD Version. Contains garbage placeholders until the `GEN8`
    /// chunk is deserialized. Use [`DataReader::read_gen8_version`] to get
    /// the GameMaker version before `GEN8` is parsed.
    pub general_info: GMGeneralInfo,

    /// Will be set after chunk `STRG` is parsed (first chunk to parse).
    /// Contains all GameMaker strings by ID (aka index)
    /// Needed for String references in Push Instructions.
    pub strings: Vec<String>,

    /// Chunk `STRG`.
    /// Is properly initialized after parsing `FORM`.
    pub string_chunk: ChunkBounds,

    /// Contains parsing options (wow!).
    /// Properly initialized after parsing `FORM`.
    pub options: ParsingOptions,

    /// Should only be set by [`crate::wad::elements::texture_page_item`].
    /// This means that `TPAG` has to be parsed before any chunk with texture
    /// page item pointers.
    pub texture_page_item_occurrences: HashMap<u32, GMRef<GMTexturePageItem>>,

    /// Should only be set by [`crate::wad::elements::variable`].
    /// This means that `VARI` has to be parsed before `CODE`.
    pub variable_occurrences: HashMap<u32, (GMRef<GMVariable>, InstanceType)>,

    /// Should only be set by [`crate::wad::elements::function`].
    /// This means that `FUNC` has to be parsed before `CODE`.
    pub function_occurrences: HashMap<u32, GMRef<GMFunction>>,
}

impl<'a> DataReader<'a> {
    /// Creates a new [`DataReader`] for the given raw GameMaker data.
    ///
    /// Most fields will be filled with a sentinel/stub value before
    /// being properly initialized while parsing FORM.
    pub fn new(data: &'a [u8]) -> Self {
        // Memory Safety Assertion. This should've been verified before, though.
        let end_pos: u32 = data
            .len()
            .try_into()
            .expect("Data length out of u32 bounds");

        Self {
            data,
            cur_pos: 0,
            specified_version: GMVersion::default(), // stub
            // The default padding value is 16, if used.
            chunk_padding: 16,
            // Assume little endian; big endian is an edge case.
            endianness: Endianness::Little,
            chunk: ChunkBounds { start_pos: 0, end_pos },
            chunks: ChunkMap::new(),
            last_chunk: ChunkName::new("XXXX"),     // stub
            general_info: GMGeneralInfo::default(), // stub
            strings: vec![],
            string_chunk: ChunkBounds::default(), // stub
            options: ParsingOptions::default(),   // stub
            texture_page_item_occurrences: HashMap::new(),
            variable_occurrences: HashMap::new(),
            function_occurrences: HashMap::new(),
        }
    }

    /// The size / byte length of the data file.
    pub const fn size(&self) -> u32 {
        self.data.len() as u32
    }

    /// Reads the specified number of bytes from the data file while advancing
    /// the data position. Returns an error when trying to read out of chunk
    /// bounds.
    ///
    /// This is the core data reading abstraction. All other methods build up on
    /// this.
    pub fn read_bytes_dyn(&mut self, count: u32) -> Result<&'a [u8]> {
        let start: u32 = self.cur_pos;
        let end: u32 = self
            .cur_pos
            .checked_add(count)
            .ok_or("Trying to read out of u32 bounds")?;

        // Lower chunk bounds check
        if start < self.chunk.start_pos {
            bail!(
                "Trying to read {} bytes out of lower chunk bounds at position {} with start \
                 position {}",
                count,
                self.cur_pos,
                self.chunk.start_pos,
            );
        }

        // Upper chunk bounds check
        if end > self.chunk.end_pos {
            bail!(
                "Trying to read {} bytes out of upper chunk bounds at position {} with end \
                 position {}",
                count,
                self.cur_pos,
                self.chunk.end_pos,
            );
        }

        // SAFETY: If chunk.end_pos is set correctly, this should never read memory out
        // of bounds.
        let start = start as usize;
        let end = end as usize;
        let slice: &[u8] = unsafe { self.data.get_unchecked(start..end) };
        self.cur_pos += count;
        Ok(slice)
    }

    /// Reads a constant number of bytes from the data file while advancing the
    /// data position. Useful for reading slices with specified sizes like
    /// `[u8; 16]`.
    ///
    /// **Safety Note:** `N` must be less than `u32::MAX`.
    /// The const assertion should guarantee this, though.
    pub fn read_bytes_const<const N: usize>(&mut self) -> Result<&[u8; N]> {
        const {
            assert!(N < u32::MAX as usize);
        }
        let slice: &[u8] = self.read_bytes_dyn(N as u32)?;
        // SAFETY: read_bytes_dyn is guaranteed to read exactly N bytes.
        Ok(unsafe { &*slice.as_ptr().cast::<[u8; N]>() })
    }

    /// Reads a 32-bit integer and convert it to a boolean.
    /// ___
    /// Returns an error when the read number is neither 0 nor 1.
    pub fn read_bool32(&mut self) -> Result<bool> {
        let number = self.read_u32()?;
        match number {
            0 => Ok(false),
            1 => Ok(true),
            n => bail!(
                "Read invalid boolean value {n} (0x{n:08X}) at position {}",
                self.cur_pos - 4,
            ),
        }
    }

    /// Reads a UTF-8 character string with the specified byte length.
    /// ___
    /// For reading standard GameMaker string references, see
    /// [`DataReader::read_gm_string`].
    pub fn read_literal_string(&mut self, length: u32) -> Result<String> {
        let bytes: Vec<u8> = self
            .read_bytes_dyn(length)
            .with_context(|| format!("reading literal string with length {length}"))?
            .to_vec();

        let string: String = String::from_utf8(bytes).with_context_src(|| {
            format!(
                "parsing literal UTF-8 string with length {} at position {}",
                length,
                self.cur_pos - length,
            )
        })?;

        Ok(string)
    }

    /// Reads bytes until the reader position is divisible by the specified
    /// alignment. Ensures the read padding bytes are all zero.
    pub fn align(&mut self, alignment: u32) -> Result<()> {
        while !self.cur_pos.is_multiple_of(alignment) {
            let byte = self.read_u8()?;
            assert::int(byte, 0, "Padding Byte")
                .with_context(|| format!("aligning reader to {alignment}"))?;
        }
        Ok(())
    }

    /// Sets the reader position to the current chunk's start position plus the
    /// specified relative position.
    pub fn set_rel_cur_pos(&mut self, relative_pos: u32) -> Result<()> {
        let start = self.chunk.start_pos;
        let end = self.chunk.end_pos;
        let pos = start.checked_add(relative_pos).ok_or_else(|| {
            format!(
                "Relative position {relative_pos} would 
                overflow from start position {start}"
            )
        })?;

        if pos > end {
            bail!(
                "Position {pos} (start {start} + relative {relative_pos})
                exceeds chunk end position {end}"
            );
        }

        self.cur_pos = pos;
        Ok(())
    }

    /// Deserializes an element if the GameMaker version meets the requirement
    /// (`>=`).
    ///
    /// This is useful for handling format changes across different GameMaker
    /// versions where certain chunks or fields were added, removed, or
    /// modified.
    ///
    /// # Returns
    /// - `Ok(Some(T))` if the version requirement is met and deserialization
    ///   succeeds
    /// - `Ok(None)` if the version requirement is not met
    /// - `Err(_)` if the version requirement is met but deserialization fails
    pub fn deserialize_if_gm_version<T: GMElement, V: Into<GMVersionReq>>(
        &mut self,
        ver_req: V,
    ) -> Result<Option<T>> {
        if self.general_info.is_version_at_least(ver_req) {
            Ok(Some(T::deserialize(self)?))
        } else {
            Ok(None)
        }
    }

    /// Deserializes an element if the WAD version meets the requirement (`>=`).
    ///
    /// # Returns
    /// - `Ok(Some(T))` if the WAD version requirement is met and
    ///   deserialization succeeds
    /// - `Ok(None)` if the WAD version requirement is not met
    /// - `Err(_)` if the WAD version requirement is met but deserialization
    ///   fails
    pub fn deserialize_if_wad_version<T: GMElement>(&mut self, ver_req: u8) -> Result<Option<T>> {
        if self.general_info.wad_version >= ver_req {
            Ok(Some(T::deserialize(self)?))
        } else {
            Ok(None)
        }
    }
}