unity_asset_binary/
reader.rs

1//! Binary data reader for Unity files
2
3use crate::error::{BinaryError, Result};
4use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
5use std::io::{Cursor, Read, Seek, SeekFrom};
6
7/// Byte order for reading binary data
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
9pub enum ByteOrder {
10    /// Big endian (network byte order)
11    Big,
12    /// Little endian (most common on x86/x64)
13    #[default]
14    Little,
15}
16
17/// Binary reader for Unity file formats
18pub struct BinaryReader<'a> {
19    cursor: Cursor<&'a [u8]>,
20    byte_order: ByteOrder,
21}
22
23impl<'a> BinaryReader<'a> {
24    /// Default maximum length for length-prefixed strings.
25    ///
26    /// Unity files can contain large text blobs (e.g. TextAsset), but unbounded allocations are a
27    /// DoS risk when parsing hostile input.
28    pub const DEFAULT_MAX_STRING_LEN: usize = 16 * 1024 * 1024; // 16 MiB
29
30    /// Create a new binary reader from byte slice
31    pub fn new(data: &'a [u8], byte_order: ByteOrder) -> Self {
32        Self {
33            cursor: Cursor::new(data),
34            byte_order,
35        }
36    }
37
38    /// Get current position in the stream
39    pub fn position(&self) -> u64 {
40        self.cursor.position()
41    }
42
43    /// Set position in the stream
44    pub fn set_position(&mut self, pos: u64) -> Result<()> {
45        self.cursor.set_position(pos);
46        Ok(())
47    }
48
49    /// Seek to a position relative to the current position
50    pub fn seek(&mut self, offset: i64) -> Result<u64> {
51        Ok(self.cursor.seek(SeekFrom::Current(offset))?)
52    }
53
54    /// Get the total length of the data
55    pub fn len(&self) -> usize {
56        self.cursor.get_ref().len()
57    }
58
59    /// Check if the reader is empty
60    pub fn is_empty(&self) -> bool {
61        self.len() == 0
62    }
63
64    /// Get remaining bytes from current position
65    pub fn remaining(&self) -> usize {
66        self.len().saturating_sub(self.position() as usize)
67    }
68
69    /// Check if we have at least `count` bytes remaining
70    pub fn has_bytes(&self, count: usize) -> bool {
71        self.remaining() >= count
72    }
73
74    /// Align to the next 4-byte boundary
75    pub fn align(&mut self) -> Result<()> {
76        self.align_to(4)
77    }
78
79    /// Align to the specified byte boundary
80    pub fn align_to(&mut self, alignment: u64) -> Result<()> {
81        let pos = self.position();
82        let aligned = (pos + alignment - 1) & !(alignment - 1);
83        if aligned != pos {
84            self.set_position(aligned)?;
85        }
86        Ok(())
87    }
88
89    /// Read a single byte
90    pub fn read_u8(&mut self) -> Result<u8> {
91        if !self.has_bytes(1) {
92            return Err(BinaryError::not_enough_data(1, self.remaining()));
93        }
94        Ok(self.cursor.read_u8()?)
95    }
96
97    /// Read a boolean (as u8, 0 = false, non-zero = true)
98    pub fn read_bool(&mut self) -> Result<bool> {
99        Ok(self.read_u8()? != 0)
100    }
101
102    /// Read a signed 8-bit integer
103    pub fn read_i8(&mut self) -> Result<i8> {
104        Ok(self.read_u8()? as i8)
105    }
106
107    /// Read an unsigned 16-bit integer
108    pub fn read_u16(&mut self) -> Result<u16> {
109        if !self.has_bytes(2) {
110            return Err(BinaryError::not_enough_data(2, self.remaining()));
111        }
112        match self.byte_order {
113            ByteOrder::Big => Ok(self.cursor.read_u16::<BigEndian>()?),
114            ByteOrder::Little => Ok(self.cursor.read_u16::<LittleEndian>()?),
115        }
116    }
117
118    /// Read a signed 16-bit integer
119    pub fn read_i16(&mut self) -> Result<i16> {
120        if !self.has_bytes(2) {
121            return Err(BinaryError::not_enough_data(2, self.remaining()));
122        }
123        match self.byte_order {
124            ByteOrder::Big => Ok(self.cursor.read_i16::<BigEndian>()?),
125            ByteOrder::Little => Ok(self.cursor.read_i16::<LittleEndian>()?),
126        }
127    }
128
129    /// Read an unsigned 32-bit integer
130    pub fn read_u32(&mut self) -> Result<u32> {
131        if !self.has_bytes(4) {
132            return Err(BinaryError::not_enough_data(4, self.remaining()));
133        }
134        match self.byte_order {
135            ByteOrder::Big => Ok(self.cursor.read_u32::<BigEndian>()?),
136            ByteOrder::Little => Ok(self.cursor.read_u32::<LittleEndian>()?),
137        }
138    }
139
140    /// Read a signed 32-bit integer
141    pub fn read_i32(&mut self) -> Result<i32> {
142        if !self.has_bytes(4) {
143            return Err(BinaryError::not_enough_data(4, self.remaining()));
144        }
145        match self.byte_order {
146            ByteOrder::Big => Ok(self.cursor.read_i32::<BigEndian>()?),
147            ByteOrder::Little => Ok(self.cursor.read_i32::<LittleEndian>()?),
148        }
149    }
150
151    /// Read an unsigned 64-bit integer
152    pub fn read_u64(&mut self) -> Result<u64> {
153        if !self.has_bytes(8) {
154            return Err(BinaryError::not_enough_data(8, self.remaining()));
155        }
156        match self.byte_order {
157            ByteOrder::Big => Ok(self.cursor.read_u64::<BigEndian>()?),
158            ByteOrder::Little => Ok(self.cursor.read_u64::<LittleEndian>()?),
159        }
160    }
161
162    /// Read a signed 64-bit integer
163    pub fn read_i64(&mut self) -> Result<i64> {
164        if !self.has_bytes(8) {
165            return Err(BinaryError::not_enough_data(8, self.remaining()));
166        }
167        match self.byte_order {
168            ByteOrder::Big => Ok(self.cursor.read_i64::<BigEndian>()?),
169            ByteOrder::Little => Ok(self.cursor.read_i64::<LittleEndian>()?),
170        }
171    }
172
173    /// Read a 32-bit floating point number
174    pub fn read_f32(&mut self) -> Result<f32> {
175        if !self.has_bytes(4) {
176            return Err(BinaryError::not_enough_data(4, self.remaining()));
177        }
178        match self.byte_order {
179            ByteOrder::Big => Ok(self.cursor.read_f32::<BigEndian>()?),
180            ByteOrder::Little => Ok(self.cursor.read_f32::<LittleEndian>()?),
181        }
182    }
183
184    /// Read a 64-bit floating point number
185    pub fn read_f64(&mut self) -> Result<f64> {
186        if !self.has_bytes(8) {
187            return Err(BinaryError::not_enough_data(8, self.remaining()));
188        }
189        match self.byte_order {
190            ByteOrder::Big => Ok(self.cursor.read_f64::<BigEndian>()?),
191            ByteOrder::Little => Ok(self.cursor.read_f64::<LittleEndian>()?),
192        }
193    }
194
195    /// Read a fixed number of bytes
196    pub fn read_bytes(&mut self, count: usize) -> Result<Vec<u8>> {
197        if !self.has_bytes(count) {
198            return Err(BinaryError::not_enough_data(count, self.remaining()));
199        }
200        let mut buffer = vec![0u8; count];
201        self.cursor.read_exact(&mut buffer)?;
202        Ok(buffer)
203    }
204
205    /// Skip a fixed number of bytes without allocating.
206    pub fn skip_bytes(&mut self, count: usize) -> Result<()> {
207        if !self.has_bytes(count) {
208            return Err(BinaryError::not_enough_data(count, self.remaining()));
209        }
210        self.seek(count as i64)?;
211        Ok(())
212    }
213
214    /// Read all remaining bytes
215    pub fn read_remaining(&mut self) -> &[u8] {
216        let pos = self.cursor.position() as usize;
217        let data = self.cursor.get_ref();
218        &data[pos..]
219    }
220
221    /// Read a null-terminated string
222    pub fn read_cstring(&mut self) -> Result<String> {
223        let mut bytes = Vec::new();
224        loop {
225            let byte = self.read_u8()?;
226            if byte == 0 {
227                break;
228            }
229            bytes.push(byte);
230        }
231        Ok(String::from_utf8(bytes)?)
232    }
233
234    /// Read a string with a length prefix (32-bit)
235    pub fn read_string(&mut self) -> Result<String> {
236        self.read_string_limited(Self::DEFAULT_MAX_STRING_LEN)
237    }
238
239    /// Read a string with a length prefix and an explicit maximum size.
240    ///
241    /// Unity typically encodes these lengths as signed 32-bit integers.
242    pub fn read_string_limited(&mut self, max_len: usize) -> Result<String> {
243        let length = self.read_i32()?;
244        if length < 0 {
245            return Err(BinaryError::invalid_data(format!(
246                "Negative string length: {}",
247                length
248            )));
249        }
250
251        let length: usize = length as usize;
252        if length > max_len {
253            return Err(BinaryError::invalid_data(format!(
254                "String length {} exceeds limit {}",
255                length, max_len
256            )));
257        }
258
259        // Hard check against remaining to avoid allocating huge buffers just to fail later.
260        let remaining = self.remaining();
261        if length > remaining {
262            return Err(BinaryError::not_enough_data(length, remaining));
263        }
264
265        let bytes = self.read_bytes(length)?;
266        Ok(String::from_utf8(bytes)?)
267    }
268
269    /// Read a string with a specific length
270    pub fn read_string_fixed(&mut self, length: usize) -> Result<String> {
271        let bytes = self.read_bytes(length)?;
272        // Remove null terminators
273        let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
274        Ok(String::from_utf8(bytes[..end].to_vec())?)
275    }
276
277    /// Read an aligned string (Unity format)
278    pub fn read_aligned_string(&mut self) -> Result<String> {
279        let string = self.read_string()?;
280        // Align to 4-byte boundary
281        self.align()?;
282        Ok(string)
283    }
284
285    /// Get the current byte order
286    pub fn byte_order(&self) -> ByteOrder {
287        self.byte_order
288    }
289
290    /// Set the byte order
291    pub fn set_byte_order(&mut self, byte_order: ByteOrder) {
292        self.byte_order = byte_order;
293    }
294
295    /// Get a slice of the remaining data
296    pub fn remaining_slice(&self) -> &[u8] {
297        let pos = self.position() as usize;
298        &self.cursor.get_ref()[pos..]
299    }
300
301    /// Create a new reader for a subset of the data
302    pub fn sub_reader(&self, offset: usize, length: usize) -> Result<BinaryReader<'a>> {
303        let data = self.cursor.get_ref();
304        if offset + length > data.len() {
305            return Err(BinaryError::not_enough_data(offset + length, data.len()));
306        }
307        Ok(BinaryReader::new(
308            &data[offset..offset + length],
309            self.byte_order,
310        ))
311    }
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317
318    #[test]
319    fn test_basic_reading() {
320        let data = [0x01, 0x02, 0x03, 0x04];
321        let mut reader = BinaryReader::new(&data, ByteOrder::Little);
322
323        assert_eq!(reader.read_u8().unwrap(), 0x01);
324        assert_eq!(reader.read_u8().unwrap(), 0x02);
325        assert_eq!(reader.position(), 2);
326        assert_eq!(reader.remaining(), 2);
327    }
328
329    #[test]
330    fn test_skip_bytes() {
331        let data = [0x01, 0x02, 0x03, 0x04, 0x05];
332        let mut reader = BinaryReader::new(&data, ByteOrder::Little);
333
334        reader.skip_bytes(2).unwrap();
335        assert_eq!(reader.position(), 2);
336        assert_eq!(reader.read_u8().unwrap(), 0x03);
337
338        assert!(reader.skip_bytes(10).is_err());
339    }
340
341    #[test]
342    fn test_endianness() {
343        let data = [0x01, 0x02, 0x03, 0x04];
344
345        let mut reader_le = BinaryReader::new(&data, ByteOrder::Little);
346        assert_eq!(reader_le.read_u32().unwrap(), 0x04030201);
347
348        let mut reader_be = BinaryReader::new(&data, ByteOrder::Big);
349        assert_eq!(reader_be.read_u32().unwrap(), 0x01020304);
350    }
351
352    #[test]
353    fn test_string_reading() {
354        let data = b"Hello\0World\0";
355        let mut reader = BinaryReader::new(data, ByteOrder::Little);
356
357        assert_eq!(reader.read_cstring().unwrap(), "Hello");
358        assert_eq!(reader.read_cstring().unwrap(), "World");
359    }
360
361    #[test]
362    fn test_alignment() {
363        let data = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
364        let mut reader = BinaryReader::new(&data, ByteOrder::Little);
365
366        reader.read_u8().unwrap(); // pos = 1
367        reader.align().unwrap(); // pos = 4
368        assert_eq!(reader.position(), 4);
369    }
370}