1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
use std::collections::HashMap;
use crate::gml::instruction::InstanceType;
use crate::prelude::*;
use crate::util::assert;
use crate::wad::chunk::ChunkName;
use crate::wad::data::Endianness;
use crate::wad::deserialize::ParsingOptions;
use crate::wad::deserialize::chunk::ChunkBounds;
use crate::wad::deserialize::chunk::ChunkMap;
use crate::wad::elements::GMElement;
use crate::wad::elements::function::GMFunction;
use crate::wad::elements::general_info::GMGeneralInfo;
use crate::wad::elements::texture_page_item::GMTexturePageItem;
use crate::wad::elements::variable::GMVariable;
use crate::wad::reference::GMRef;
use crate::wad::version::GMVersion;
use crate::wad::version::GMVersionReq;
#[derive(Debug)]
pub struct DataReader<'a> {
/// The raw data buffer belonging to the GameMaker data file which is
/// currently being parsed.
data: &'a [u8],
/// The current read position within the data buffer.
/// Reading data will be read from this position; incrementing it.
pub cur_pos: u32,
/// The GameMaker version specified by GEN8.
/// The "actual" version will be detected later and stored in
/// `general_info.version`.
pub specified_version: GMVersion,
/// How many null bytes of padding should be at the end of every chunk
/// (except the last one). Only relevant in certain GameMaker versions.
/// Defaults to 16, but will be set to 4 or 1 if detected.
pub chunk_padding: u32,
/// Indicates the data file's byte endianness.
/// In most cases (and assumed by default), this is set to little-endian.
/// Big endian is an edge case for certain target platforms (e.g. PS3 or
/// Xbox 360).
pub endianness: Endianness,
/// Map of all chunks specified by `FORM`; indexed by chunk name.
/// Read chunks will be removed from this `HashMap` when calling
/// [`DataReader::read_chunk`]. May contain unknown chunks (if there is
/// a GameMaker update, for example).
pub chunks: ChunkMap,
/// Metadata about the currently parsed chunk of data.
/// This includes the chunk's name, start position, and end position within
/// the data buffer. When reading data, these bounds are checked to
/// ensure the read operation stays within the chunk.
///
/// **Safety Warning**: If the chunk's start/end positions are set
/// incorrectly, the program becomes memory unsafe.
pub chunk: ChunkBounds,
/// The name of the last chunk in the data file.
/// Is properly initialized after parsing `FORM`.
pub last_chunk: ChunkName,
/// General info about this data file. Includes game name, GameMaker Version
/// and WAD Version. Contains garbage placeholders until the `GEN8`
/// chunk is deserialized. Use [`DataReader::read_gen8_version`] to get
/// the GameMaker version before `GEN8` is parsed.
pub general_info: GMGeneralInfo,
/// Will be set after chunk `STRG` is parsed (first chunk to parse).
/// Contains all GameMaker strings by ID (aka index)
/// Needed for String references in Push Instructions.
pub strings: Vec<String>,
/// Chunk `STRG`.
/// Is properly initialized after parsing `FORM`.
pub string_chunk: ChunkBounds,
/// Contains parsing options (wow!).
/// Properly initialized after parsing `FORM`.
pub options: ParsingOptions,
/// Should only be set by [`crate::wad::elements::texture_page_item`].
/// This means that `TPAG` has to be parsed before any chunk with texture
/// page item pointers.
pub texture_page_item_occurrences: HashMap<u32, GMRef<GMTexturePageItem>>,
/// Should only be set by [`crate::wad::elements::variable`].
/// This means that `VARI` has to be parsed before `CODE`.
pub variable_occurrences: HashMap<u32, (GMRef<GMVariable>, InstanceType)>,
/// Should only be set by [`crate::wad::elements::function`].
/// This means that `FUNC` has to be parsed before `CODE`.
pub function_occurrences: HashMap<u32, GMRef<GMFunction>>,
}
impl<'a> DataReader<'a> {
/// Creates a new [`DataReader`] for the given raw GameMaker data.
///
/// Most fields will be filled with a sentinel/stub value before
/// being properly initialized while parsing FORM.
pub fn new(data: &'a [u8]) -> Self {
// Memory Safety Assertion. This should've been verified before, though.
let end_pos: u32 = data
.len()
.try_into()
.expect("Data length out of u32 bounds");
Self {
data,
cur_pos: 0,
specified_version: GMVersion::default(), // stub
// The default padding value is 16, if used.
chunk_padding: 16,
// Assume little endian; big endian is an edge case.
endianness: Endianness::Little,
chunk: ChunkBounds { start_pos: 0, end_pos },
chunks: ChunkMap::new(),
last_chunk: ChunkName::new("XXXX"), // stub
general_info: GMGeneralInfo::default(), // stub
strings: vec![],
string_chunk: ChunkBounds::default(), // stub
options: ParsingOptions::default(), // stub
texture_page_item_occurrences: HashMap::new(),
variable_occurrences: HashMap::new(),
function_occurrences: HashMap::new(),
}
}
/// The size / byte length of the data file.
pub const fn size(&self) -> u32 {
self.data.len() as u32
}
/// Reads the specified number of bytes from the data file while advancing
/// the data position. Returns an error when trying to read out of chunk
/// bounds.
///
/// This is the core data reading abstraction. All other methods build up on
/// this.
pub fn read_bytes_dyn(&mut self, count: u32) -> Result<&'a [u8]> {
let start: u32 = self.cur_pos;
let end: u32 = self
.cur_pos
.checked_add(count)
.ok_or("Trying to read out of u32 bounds")?;
// Lower chunk bounds check
if start < self.chunk.start_pos {
bail!(
"Trying to read {} bytes out of lower chunk bounds at position {} with start \
position {}",
count,
self.cur_pos,
self.chunk.start_pos,
);
}
// Upper chunk bounds check
if end > self.chunk.end_pos {
bail!(
"Trying to read {} bytes out of upper chunk bounds at position {} with end \
position {}",
count,
self.cur_pos,
self.chunk.end_pos,
);
}
// SAFETY: If chunk.end_pos is set correctly, this should never read memory out
// of bounds.
let start = start as usize;
let end = end as usize;
let slice: &[u8] = unsafe { self.data.get_unchecked(start..end) };
self.cur_pos += count;
Ok(slice)
}
/// Reads a constant number of bytes from the data file while advancing the
/// data position. Useful for reading slices with specified sizes like
/// `[u8; 16]`.
///
/// **Safety Note:** `N` must be less than `u32::MAX`.
/// The const assertion should guarantee this, though.
pub fn read_bytes_const<const N: usize>(&mut self) -> Result<&[u8; N]> {
const {
assert!(N < u32::MAX as usize);
}
let slice: &[u8] = self.read_bytes_dyn(N as u32)?;
// SAFETY: read_bytes_dyn is guaranteed to read exactly N bytes.
Ok(unsafe { &*slice.as_ptr().cast::<[u8; N]>() })
}
/// Reads a 32-bit integer and convert it to a boolean.
/// ___
/// Returns an error when the read number is neither 0 nor 1.
pub fn read_bool32(&mut self) -> Result<bool> {
let number = self.read_u32()?;
match number {
0 => Ok(false),
1 => Ok(true),
n => bail!(
"Read invalid boolean value {n} (0x{n:08X}) at position {}",
self.cur_pos - 4,
),
}
}
/// Reads a UTF-8 character string with the specified byte length.
/// ___
/// For reading standard GameMaker string references, see
/// [`DataReader::read_gm_string`].
pub fn read_literal_string(&mut self, length: u32) -> Result<String> {
let bytes: Vec<u8> = self
.read_bytes_dyn(length)
.with_context(|| format!("reading literal string with length {length}"))?
.to_vec();
let string: String = String::from_utf8(bytes).with_context_src(|| {
format!(
"parsing literal UTF-8 string with length {} at position {}",
length,
self.cur_pos - length,
)
})?;
Ok(string)
}
/// Reads bytes until the reader position is divisible by the specified
/// alignment. Ensures the read padding bytes are all zero.
pub fn align(&mut self, alignment: u32) -> Result<()> {
while !self.cur_pos.is_multiple_of(alignment) {
let byte = self.read_u8()?;
assert::int(byte, 0, "Padding Byte")
.with_context(|| format!("aligning reader to {alignment}"))?;
}
Ok(())
}
/// Sets the reader position to the current chunk's start position plus the
/// specified relative position.
pub fn set_rel_cur_pos(&mut self, relative_pos: u32) -> Result<()> {
let start = self.chunk.start_pos;
let end = self.chunk.end_pos;
let pos = start.checked_add(relative_pos).ok_or_else(|| {
format!(
"Relative position {relative_pos} would
overflow from start position {start}"
)
})?;
if pos > end {
bail!(
"Position {pos} (start {start} + relative {relative_pos})
exceeds chunk end position {end}"
);
}
self.cur_pos = pos;
Ok(())
}
/// Deserializes an element if the GameMaker version meets the requirement
/// (`>=`).
///
/// This is useful for handling format changes across different GameMaker
/// versions where certain chunks or fields were added, removed, or
/// modified.
///
/// # Returns
/// - `Ok(Some(T))` if the version requirement is met and deserialization
/// succeeds
/// - `Ok(None)` if the version requirement is not met
/// - `Err(_)` if the version requirement is met but deserialization fails
pub fn deserialize_if_gm_version<T: GMElement, V: Into<GMVersionReq>>(
&mut self,
ver_req: V,
) -> Result<Option<T>> {
if self.general_info.is_version_at_least(ver_req) {
Ok(Some(T::deserialize(self)?))
} else {
Ok(None)
}
}
/// Deserializes an element if the WAD version meets the requirement (`>=`).
///
/// # Returns
/// - `Ok(Some(T))` if the WAD version requirement is met and
/// deserialization succeeds
/// - `Ok(None)` if the WAD version requirement is not met
/// - `Err(_)` if the WAD version requirement is met but deserialization
/// fails
pub fn deserialize_if_wad_version<T: GMElement>(&mut self, ver_req: u8) -> Result<Option<T>> {
if self.general_info.wad_version >= ver_req {
Ok(Some(T::deserialize(self)?))
} else {
Ok(None)
}
}
}