Skip to main content

wzlib_rs/wz/
binary_reader.rs

1//! WZ binary reader — reads encrypted strings, compressed ints, and offsets.
2//!
3//! Ported from MapleLib's `WzBinaryReader.cs`.
4
5use std::io::{Read, Seek, SeekFrom};
6
7use super::error::{WzError, WzResult};
8use super::header::WzHeader;
9use super::keys::WzKey;
10use crate::crypto::constants::WZ_OFFSET_CONSTANT;
11
12pub struct WzBinaryReader<R: Read + Seek> {
13    reader: R,
14    pub wz_key: WzKey,
15    pub hash: u32,
16    pub header: WzHeader,
17    pub start_offset: u64,
18}
19
20macro_rules! impl_read_le {
21    ($($name:ident -> $ty:ty),+ $(,)?) => { $(
22        pub fn $name(&mut self) -> WzResult<$ty> {
23            let mut buf = [0u8; std::mem::size_of::<$ty>()];
24            self.reader.read_exact(&mut buf)?;
25            Ok(<$ty>::from_le_bytes(buf))
26        }
27    )+ };
28}
29
30impl<R: Read + Seek> WzBinaryReader<R> {
31    pub fn new(reader: R, iv: [u8; 4], header: WzHeader, start_offset: u64) -> Self {
32        WzBinaryReader {
33            reader,
34            wz_key: WzKey::new(iv),
35            hash: 0,
36            header,
37            start_offset,
38        }
39    }
40
41    pub fn position(&mut self) -> WzResult<u64> {
42        Ok(self.reader.stream_position()?)
43    }
44
45    pub fn seek(&mut self, pos: u64) -> WzResult<()> {
46        self.reader.seek(SeekFrom::Start(pos))?;
47        Ok(())
48    }
49
50    pub fn available(&mut self) -> WzResult<u64> {
51        let pos = self.position()?;
52        let end = self.header.data_start as u64 + self.header.file_size;
53        Ok(end.saturating_sub(pos))
54    }
55
56    // ── Primitive reads ──────────────────────────────────────────────
57
58    impl_read_le! {
59        read_u8  -> u8,
60        read_u16 -> u16,
61        read_i16 -> i16,
62        read_u32 -> u32,
63        read_i32 -> i32,
64        read_i64 -> i64,
65        read_f32 -> f32,
66        read_f64 -> f64,
67    }
68
69    pub fn read_i8(&mut self) -> WzResult<i8> {
70        Ok(self.read_u8()? as i8)
71    }
72
73    pub fn read_bytes(&mut self, len: usize) -> WzResult<Vec<u8>> {
74        // Prevent OOM panics (which become WASM `unreachable` traps) from
75        // corrupted size values. No single WZ property should exceed 256 MB.
76        const MAX_READ: usize = 256 * 1024 * 1024;
77        if len > MAX_READ {
78            return Err(WzError::Custom(format!(
79                "Read request too large: {} bytes (max {})",
80                len, MAX_READ
81            )));
82        }
83        let mut buf = vec![0u8; len];
84        self.reader.read_exact(&mut buf)?;
85        Ok(buf)
86    }
87
88    // ── WZ-specific compressed reads ─────────────────────────────────
89
90    pub fn read_compressed_int(&mut self) -> WzResult<i32> {
91        let indicator = self.read_i8()?;
92        if indicator == -128 {
93            self.read_i32()
94        } else {
95            Ok(indicator as i32)
96        }
97    }
98
99    pub fn read_compressed_long(&mut self) -> WzResult<i64> {
100        let indicator = self.read_i8()?;
101        if indicator == -128 {
102            self.read_i64()
103        } else {
104            Ok(indicator as i64)
105        }
106    }
107
108    // ── WZ encrypted string reads ────────────────────────────────────
109
110    pub fn read_wz_string(&mut self) -> WzResult<String> {
111        let indicator = self.read_i8()?;
112
113        if indicator >= 0 {
114            // Unicode string
115            self.read_wz_unicode_string(indicator)
116        } else {
117            // ASCII string
118            self.read_wz_ascii_string(indicator)
119        }
120    }
121
122    fn read_wz_unicode_string(&mut self, indicator: i8) -> WzResult<String> {
123        let length = if indicator == 127 {
124            let len = self.read_i32()?;
125            if len <= 0 {
126                return Ok(String::new());
127            }
128            len as usize
129        } else {
130            indicator as usize
131        };
132
133        if length == 0 {
134            return Ok(String::new());
135        }
136
137        if length > super::MAX_WZ_STRING_LEN {
138            return Err(WzError::Custom(format!(
139                "Unicode string length too large: {}",
140                length
141            )));
142        }
143
144        self.wz_key.ensure_size(length * 2);
145
146        let mut chars = Vec::with_capacity(length);
147        let mut mask: u16 = super::WZ_UNICODE_MASK_INIT;
148
149        for i in 0..length {
150            let encrypted = self.read_u16()?;
151            let key_lo = self.wz_key[i * 2] as u16;
152            let key_hi = self.wz_key[i * 2 + 1] as u16;
153            let key_word = key_lo | (key_hi << 8);
154
155            let decrypted = encrypted ^ mask ^ key_word;
156            mask = mask.wrapping_add(1);
157            chars.push(decrypted);
158        }
159
160        Ok(String::from_utf16_lossy(&chars))
161    }
162
163    fn read_wz_ascii_string(&mut self, indicator: i8) -> WzResult<String> {
164        let length = if indicator == -128 {
165            let len = self.read_i32()?;
166            if len <= 0 {
167                return Ok(String::new());
168            }
169            len as usize
170        } else {
171            -(indicator as i32) as usize
172        };
173
174        if length == 0 {
175            return Ok(String::new());
176        }
177
178        if length > super::MAX_WZ_STRING_LEN {
179            return Err(WzError::Custom(format!(
180                "ASCII string length too large: {}",
181                length
182            )));
183        }
184
185        self.wz_key.ensure_size(length);
186
187        let mut bytes = self.read_bytes(length)?;
188        let mut mask: u8 = super::WZ_ASCII_MASK_INIT;
189
190        for (i, byte) in bytes.iter_mut().enumerate() {
191            *byte ^= mask;
192            *byte ^= self.wz_key[i];
193            mask = mask.wrapping_add(1);
194        }
195
196        Ok(String::from_utf8_lossy(&bytes).to_string())
197    }
198
199    // C#'s `ReadStringAtOffset()`: adjusts by start_offset for embedded sub-files.
200    pub fn read_string_at_offset(&mut self, offset: u64) -> WzResult<String> {
201        let saved = self.position()?;
202        self.seek(offset - self.start_offset)?;
203        let s = self.read_wz_string()?;
204        self.seek(saved)?;
205        Ok(s)
206    }
207
208    // Type byte: 0x00|0x73 = inline string, 0x01|0x1B = string at offset, else empty
209    pub fn read_string_block(&mut self, offset: u64) -> WzResult<String> {
210        let type_byte = self.read_u8()?;
211        match type_byte {
212            0x00 | 0x73 => self.read_wz_string(),
213            0x01 | 0x1B => {
214                let str_offset = self.read_i32()?;
215                self.read_string_at_offset(offset.wrapping_add(str_offset as i64 as u64))
216            }
217            _ => Ok(String::new()),
218        }
219    }
220
221    // ── WZ offset decryption ─────────────────────────────────────────
222
223    pub fn read_wz_offset(&mut self) -> WzResult<u64> {
224        let cur_pos = self.position()? as u32;
225        let fstart = self.header.data_start;
226
227        let mut offset = (cur_pos.wrapping_sub(fstart)) ^ 0xFFFF_FFFF;
228        offset = offset.wrapping_mul(self.hash);
229        offset = offset.wrapping_sub(WZ_OFFSET_CONSTANT);
230        offset = offset.rotate_left(offset & 0x1F);
231
232        let encrypted = self.read_u32()?;
233        offset ^= encrypted;
234        offset = offset.wrapping_add(fstart.wrapping_mul(2));
235
236        Ok(offset as u64 + self.start_offset)
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243    use crate::wz::test_utils::*;
244    use std::io::Cursor;
245
246    // ── Compressed int (existing) ──────────────────────────────────
247
248    #[test]
249    fn test_read_compressed_int_small() {
250        let mut reader = make_reader(vec![42]); // indicator = 42
251        assert_eq!(reader.read_compressed_int().unwrap(), 42);
252    }
253
254    #[test]
255    fn test_read_compressed_int_large() {
256        let mut data = vec![0x80u8]; // indicator = -128 → read i32
257        data.extend_from_slice(&1000i32.to_le_bytes());
258        let mut reader = make_reader(data);
259        assert_eq!(reader.read_compressed_int().unwrap(), 1000);
260    }
261
262    #[test]
263    fn test_read_compressed_int_negative() {
264        let mut reader = make_reader(vec![0xFE]); // -2 as i8
265        assert_eq!(reader.read_compressed_int().unwrap(), -2);
266    }
267
268    // ── Compressed long ────────────────────────────────────────────
269
270    #[test]
271    fn test_read_compressed_long_small() {
272        let mut reader = make_reader(vec![42]);
273        assert_eq!(reader.read_compressed_long().unwrap(), 42i64);
274    }
275
276    #[test]
277    fn test_read_compressed_long_large() {
278        let mut data = vec![0x80u8]; // indicator = -128 → read i64
279        data.extend_from_slice(&999_999_999i64.to_le_bytes());
280        let mut reader = make_reader(data);
281        assert_eq!(reader.read_compressed_long().unwrap(), 999_999_999);
282    }
283
284    #[test]
285    fn test_read_compressed_long_negative() {
286        let mut reader = make_reader(vec![0xFDu8]); // -3 as i8
287        assert_eq!(reader.read_compressed_long().unwrap(), -3i64);
288    }
289
290    // ── ASCII string (BMS zero-key) ────────────────────────────────
291
292    #[test]
293    fn test_read_wz_string_ascii_short() {
294        // Encode "Hi" with BMS zero-key
295        let data = encode_wz_ascii("Hi");
296        let mut reader = make_reader(data);
297        assert_eq!(reader.read_wz_string().unwrap(), "Hi");
298    }
299
300    #[test]
301    fn test_read_wz_string_ascii_property() {
302        let data = encode_wz_ascii("Property");
303        let mut reader = make_reader(data);
304        assert_eq!(reader.read_wz_string().unwrap(), "Property");
305    }
306
307    #[test]
308    fn test_read_wz_string_ascii_long_indicator() {
309        // indicator = -128 (0x80), then i32 length, then encrypted bytes
310        let s = "TestLongString";
311        let len = s.len() as i32;
312        let mut data = vec![0x80u8];
313        data.extend_from_slice(&len.to_le_bytes());
314        let mut mask: u8 = 0xAA;
315        for b in s.bytes() {
316            data.push(b ^ mask);
317            mask = mask.wrapping_add(1);
318        }
319        let mut reader = make_reader(data);
320        assert_eq!(reader.read_wz_string().unwrap(), s);
321    }
322
323    // ── Unicode string (BMS zero-key) ──────────────────────────────
324
325    #[test]
326    fn test_read_wz_string_unicode_short() {
327        let data = encode_wz_unicode("AB");
328        let mut reader = make_reader(data);
329        assert_eq!(reader.read_wz_string().unwrap(), "AB");
330    }
331
332    #[test]
333    fn test_read_wz_string_unicode_single_char() {
334        let data = encode_wz_unicode("X");
335        let mut reader = make_reader(data);
336        assert_eq!(reader.read_wz_string().unwrap(), "X");
337    }
338
339    // ── String block ───────────────────────────────────────────────
340
341    #[test]
342    fn test_read_string_block_inline_0x73() {
343        // type=0x73 → inline WZ string
344        let mut data = vec![0x73u8];
345        data.extend_from_slice(&encode_wz_ascii("Hello"));
346        let mut reader = make_reader(data);
347        assert_eq!(reader.read_string_block(0).unwrap(), "Hello");
348    }
349
350    #[test]
351    fn test_read_string_block_inline_0x00() {
352        let mut data = vec![0x00u8];
353        data.extend_from_slice(&encode_wz_ascii("Test"));
354        let mut reader = make_reader(data);
355        assert_eq!(reader.read_string_block(0).unwrap(), "Test");
356    }
357
358    #[test]
359    fn test_read_string_block_unknown_type_returns_empty() {
360        let data = vec![0xFFu8];
361        let mut reader = make_reader(data);
362        assert_eq!(reader.read_string_block(0).unwrap(), "");
363    }
364
365    #[test]
366    fn test_read_string_block_offset_0x01() {
367        // Layout: [type=0x01 at pos 0] [offset i32 at pos 1..5] [...padding...] [string at pos 10]
368        // We set base_offset=0, and the i32 offset value = 10
369        // So it reads string at position (0 + 10) - start_offset(0) = 10
370        let target_str = encode_wz_ascii("AtOffset");
371        let mut data = vec![0x01u8];
372        data.extend_from_slice(&10i32.to_le_bytes()); // offset = 10
373                                                      // Pad to position 10
374        while data.len() < 10 {
375            data.push(0x00);
376        }
377        data.extend_from_slice(&target_str);
378        let mut reader = make_reader(data);
379        assert_eq!(reader.read_string_block(0).unwrap(), "AtOffset");
380    }
381
382    // ── Position / seek / available ────────────────────────────────
383
384    #[test]
385    fn test_position_starts_at_zero() {
386        let mut reader = make_reader(vec![0; 10]);
387        assert_eq!(reader.position().unwrap(), 0);
388    }
389
390    #[test]
391    fn test_seek_and_position_roundtrip() {
392        let mut reader = make_reader(vec![0; 100]);
393        reader.seek(42).unwrap();
394        assert_eq!(reader.position().unwrap(), 42);
395        reader.seek(0).unwrap();
396        assert_eq!(reader.position().unwrap(), 0);
397    }
398
399    #[test]
400    fn test_available_full() {
401        // file_size=100, data_start=0, pos=0 → available = 0+100-0 = 100
402        let mut reader = make_reader_with_header(vec![0; 100], 0, 100);
403        assert_eq!(reader.available().unwrap(), 100);
404    }
405
406    #[test]
407    fn test_available_after_read() {
408        let mut reader = make_reader_with_header(vec![0; 100], 0, 100);
409        reader.read_u8().unwrap(); // consume 1 byte
410        assert_eq!(reader.available().unwrap(), 99);
411    }
412
413    #[test]
414    fn test_available_with_data_start() {
415        // file_size=50, data_start=10, pos=0 → end = 10+50=60, available = 60-0=60
416        let mut reader = make_reader_with_header(vec![0; 100], 10, 50);
417        assert_eq!(reader.available().unwrap(), 60);
418    }
419
420    // ── WZ offset decryption ───────────────────────────────────────
421
422    // ── read_string_at_offset ─────────────────────────────────────
423
424    #[test]
425    fn test_read_string_at_offset() {
426        let encoded = encode_wz_ascii("TargetString");
427        let mut data = vec![0u8; 20];
428        data.extend_from_slice(&encoded);
429        data.extend_from_slice(&[0u8; 10]); // trailing padding
430
431        let mut reader = make_reader(data);
432        reader.seek(5).unwrap();
433        let result = reader.read_string_at_offset(20).unwrap();
434        assert_eq!(result, "TargetString");
435        // Position restored
436        assert_eq!(reader.position().unwrap(), 5);
437    }
438
439    #[test]
440    fn test_read_string_at_offset_with_start_offset() {
441        // String at buffer position 10. start_offset=5, so caller passes offset=15.
442        let encoded = encode_wz_ascii("Offset");
443        let mut data = vec![0u8; 10];
444        data.extend_from_slice(&encoded);
445        let header = dummy_header(data.len() as u64);
446        let mut reader = WzBinaryReader::new(Cursor::new(data), [0; 4], header, 5);
447        let result = reader.read_string_at_offset(15).unwrap();
448        assert_eq!(result, "Offset");
449    }
450
451    #[test]
452    fn test_read_wz_offset_deterministic() {
453        // Set up: data_start=60, hash=713421, position at byte 60
454        // We need 4 bytes of encrypted offset data at position 60
455        let fstart: u32 = 60;
456        let hash: u32 = 713421;
457
458        // Calculate expected intermediate values:
459        // cur_pos = 60, offset = (60-60) ^ 0xFFFFFFFF = 0xFFFFFFFF
460        // offset *= 713421 (wrapping) → some value
461        // offset -= WZ_OFFSET_CONSTANT → some value
462        // offset = rotate_left(offset, offset & 0x1F)
463        // Then we pick encrypted=0 so offset ^= 0 is unchanged
464        // offset += fstart * 2 = 120
465
466        let mut offset: u32 = (fstart.wrapping_sub(fstart)) ^ 0xFFFF_FFFF;
467        offset = offset.wrapping_mul(hash);
468        offset = offset.wrapping_sub(WZ_OFFSET_CONSTANT);
469        offset = offset.rotate_left(offset & 0x1F);
470        let pre_xor = offset;
471        // If encrypted_u32 = 0, final = pre_xor + fstart*2
472        let expected = pre_xor.wrapping_add(fstart.wrapping_mul(2)) as u64;
473
474        // Build data: 60 bytes of padding + 4 bytes of encrypted offset (0)
475        let data = vec![0u8; 64];
476        // encrypted u32 = 0 (already zero)
477
478        let mut reader = make_reader_with_header(data.clone(), fstart, data.len() as u64);
479        reader.hash = hash;
480        reader.seek(fstart as u64).unwrap();
481        let result = reader.read_wz_offset().unwrap();
482        assert_eq!(result, expected);
483    }
484}