wzlib_rs/wz/
binary_reader.rs1use std::io::{Read, Seek, SeekFrom};
6
7use super::error::{WzError, WzResult};
8use super::header::WzHeader;
9use super::keys::WzKey;
10use crate::crypto::constants::WZ_OFFSET_CONSTANT;
11
12pub struct WzBinaryReader<R: Read + Seek> {
13 reader: R,
14 pub wz_key: WzKey,
15 pub hash: u32,
16 pub header: WzHeader,
17 pub start_offset: u64,
18}
19
20macro_rules! impl_read_le {
21 ($($name:ident -> $ty:ty),+ $(,)?) => { $(
22 pub fn $name(&mut self) -> WzResult<$ty> {
23 let mut buf = [0u8; std::mem::size_of::<$ty>()];
24 self.reader.read_exact(&mut buf)?;
25 Ok(<$ty>::from_le_bytes(buf))
26 }
27 )+ };
28}
29
30impl<R: Read + Seek> WzBinaryReader<R> {
31 pub fn new(reader: R, iv: [u8; 4], header: WzHeader, start_offset: u64) -> Self {
32 WzBinaryReader {
33 reader,
34 wz_key: WzKey::new(iv),
35 hash: 0,
36 header,
37 start_offset,
38 }
39 }
40
41 pub fn position(&mut self) -> WzResult<u64> {
42 Ok(self.reader.stream_position()?)
43 }
44
45 pub fn seek(&mut self, pos: u64) -> WzResult<()> {
46 self.reader.seek(SeekFrom::Start(pos))?;
47 Ok(())
48 }
49
50 pub fn available(&mut self) -> WzResult<u64> {
51 let pos = self.position()?;
52 let end = self.header.data_start as u64 + self.header.file_size;
53 Ok(end.saturating_sub(pos))
54 }
55
56 impl_read_le! {
59 read_u8 -> u8,
60 read_u16 -> u16,
61 read_i16 -> i16,
62 read_u32 -> u32,
63 read_i32 -> i32,
64 read_i64 -> i64,
65 read_f32 -> f32,
66 read_f64 -> f64,
67 }
68
69 pub fn read_i8(&mut self) -> WzResult<i8> {
70 Ok(self.read_u8()? as i8)
71 }
72
73 pub fn read_bytes(&mut self, len: usize) -> WzResult<Vec<u8>> {
74 const MAX_READ: usize = 256 * 1024 * 1024;
77 if len > MAX_READ {
78 return Err(WzError::Custom(format!(
79 "Read request too large: {} bytes (max {})",
80 len, MAX_READ
81 )));
82 }
83 let mut buf = vec![0u8; len];
84 self.reader.read_exact(&mut buf)?;
85 Ok(buf)
86 }
87
88 pub fn read_compressed_int(&mut self) -> WzResult<i32> {
91 let indicator = self.read_i8()?;
92 if indicator == -128 {
93 self.read_i32()
94 } else {
95 Ok(indicator as i32)
96 }
97 }
98
99 pub fn read_compressed_long(&mut self) -> WzResult<i64> {
100 let indicator = self.read_i8()?;
101 if indicator == -128 {
102 self.read_i64()
103 } else {
104 Ok(indicator as i64)
105 }
106 }
107
108 pub fn read_wz_string(&mut self) -> WzResult<String> {
111 let indicator = self.read_i8()?;
112
113 if indicator >= 0 {
114 self.read_wz_unicode_string(indicator)
116 } else {
117 self.read_wz_ascii_string(indicator)
119 }
120 }
121
122 fn read_wz_unicode_string(&mut self, indicator: i8) -> WzResult<String> {
123 let length = if indicator == 127 {
124 let len = self.read_i32()?;
125 if len <= 0 {
126 return Ok(String::new());
127 }
128 len as usize
129 } else {
130 indicator as usize
131 };
132
133 if length == 0 {
134 return Ok(String::new());
135 }
136
137 if length > super::MAX_WZ_STRING_LEN {
138 return Err(WzError::Custom(format!(
139 "Unicode string length too large: {}",
140 length
141 )));
142 }
143
144 self.wz_key.ensure_size(length * 2);
145
146 let mut chars = Vec::with_capacity(length);
147 let mut mask: u16 = super::WZ_UNICODE_MASK_INIT;
148
149 for i in 0..length {
150 let encrypted = self.read_u16()?;
151 let key_lo = self.wz_key[i * 2] as u16;
152 let key_hi = self.wz_key[i * 2 + 1] as u16;
153 let key_word = key_lo | (key_hi << 8);
154
155 let decrypted = encrypted ^ mask ^ key_word;
156 mask = mask.wrapping_add(1);
157 chars.push(decrypted);
158 }
159
160 Ok(String::from_utf16_lossy(&chars))
161 }
162
163 fn read_wz_ascii_string(&mut self, indicator: i8) -> WzResult<String> {
164 let length = if indicator == -128 {
165 let len = self.read_i32()?;
166 if len <= 0 {
167 return Ok(String::new());
168 }
169 len as usize
170 } else {
171 -(indicator as i32) as usize
172 };
173
174 if length == 0 {
175 return Ok(String::new());
176 }
177
178 if length > super::MAX_WZ_STRING_LEN {
179 return Err(WzError::Custom(format!(
180 "ASCII string length too large: {}",
181 length
182 )));
183 }
184
185 self.wz_key.ensure_size(length);
186
187 let mut bytes = self.read_bytes(length)?;
188 let mut mask: u8 = super::WZ_ASCII_MASK_INIT;
189
190 for (i, byte) in bytes.iter_mut().enumerate() {
191 *byte ^= mask;
192 *byte ^= self.wz_key[i];
193 mask = mask.wrapping_add(1);
194 }
195
196 Ok(String::from_utf8_lossy(&bytes).to_string())
197 }
198
199 pub fn read_string_at_offset(&mut self, offset: u64) -> WzResult<String> {
201 let saved = self.position()?;
202 self.seek(offset - self.start_offset)?;
203 let s = self.read_wz_string()?;
204 self.seek(saved)?;
205 Ok(s)
206 }
207
208 pub fn read_string_block(&mut self, offset: u64) -> WzResult<String> {
210 let type_byte = self.read_u8()?;
211 match type_byte {
212 0x00 | 0x73 => self.read_wz_string(),
213 0x01 | 0x1B => {
214 let str_offset = self.read_i32()?;
215 self.read_string_at_offset(offset.wrapping_add(str_offset as i64 as u64))
216 }
217 _ => Ok(String::new()),
218 }
219 }
220
221 pub fn read_wz_offset(&mut self) -> WzResult<u64> {
224 let cur_pos = self.position()? as u32;
225 let fstart = self.header.data_start;
226
227 let mut offset = (cur_pos.wrapping_sub(fstart)) ^ 0xFFFF_FFFF;
228 offset = offset.wrapping_mul(self.hash);
229 offset = offset.wrapping_sub(WZ_OFFSET_CONSTANT);
230 offset = offset.rotate_left(offset & 0x1F);
231
232 let encrypted = self.read_u32()?;
233 offset ^= encrypted;
234 offset = offset.wrapping_add(fstart.wrapping_mul(2));
235
236 Ok(offset as u64 + self.start_offset)
237 }
238}
239
240#[cfg(test)]
241mod tests {
242 use super::*;
243 use crate::wz::test_utils::*;
244 use std::io::Cursor;
245
246 #[test]
249 fn test_read_compressed_int_small() {
250 let mut reader = make_reader(vec![42]); assert_eq!(reader.read_compressed_int().unwrap(), 42);
252 }
253
254 #[test]
255 fn test_read_compressed_int_large() {
256 let mut data = vec![0x80u8]; data.extend_from_slice(&1000i32.to_le_bytes());
258 let mut reader = make_reader(data);
259 assert_eq!(reader.read_compressed_int().unwrap(), 1000);
260 }
261
262 #[test]
263 fn test_read_compressed_int_negative() {
264 let mut reader = make_reader(vec![0xFE]); assert_eq!(reader.read_compressed_int().unwrap(), -2);
266 }
267
268 #[test]
271 fn test_read_compressed_long_small() {
272 let mut reader = make_reader(vec![42]);
273 assert_eq!(reader.read_compressed_long().unwrap(), 42i64);
274 }
275
276 #[test]
277 fn test_read_compressed_long_large() {
278 let mut data = vec![0x80u8]; data.extend_from_slice(&999_999_999i64.to_le_bytes());
280 let mut reader = make_reader(data);
281 assert_eq!(reader.read_compressed_long().unwrap(), 999_999_999);
282 }
283
284 #[test]
285 fn test_read_compressed_long_negative() {
286 let mut reader = make_reader(vec![0xFDu8]); assert_eq!(reader.read_compressed_long().unwrap(), -3i64);
288 }
289
290 #[test]
293 fn test_read_wz_string_ascii_short() {
294 let data = encode_wz_ascii("Hi");
296 let mut reader = make_reader(data);
297 assert_eq!(reader.read_wz_string().unwrap(), "Hi");
298 }
299
300 #[test]
301 fn test_read_wz_string_ascii_property() {
302 let data = encode_wz_ascii("Property");
303 let mut reader = make_reader(data);
304 assert_eq!(reader.read_wz_string().unwrap(), "Property");
305 }
306
307 #[test]
308 fn test_read_wz_string_ascii_long_indicator() {
309 let s = "TestLongString";
311 let len = s.len() as i32;
312 let mut data = vec![0x80u8];
313 data.extend_from_slice(&len.to_le_bytes());
314 let mut mask: u8 = 0xAA;
315 for b in s.bytes() {
316 data.push(b ^ mask);
317 mask = mask.wrapping_add(1);
318 }
319 let mut reader = make_reader(data);
320 assert_eq!(reader.read_wz_string().unwrap(), s);
321 }
322
323 #[test]
326 fn test_read_wz_string_unicode_short() {
327 let data = encode_wz_unicode("AB");
328 let mut reader = make_reader(data);
329 assert_eq!(reader.read_wz_string().unwrap(), "AB");
330 }
331
332 #[test]
333 fn test_read_wz_string_unicode_single_char() {
334 let data = encode_wz_unicode("X");
335 let mut reader = make_reader(data);
336 assert_eq!(reader.read_wz_string().unwrap(), "X");
337 }
338
339 #[test]
342 fn test_read_string_block_inline_0x73() {
343 let mut data = vec![0x73u8];
345 data.extend_from_slice(&encode_wz_ascii("Hello"));
346 let mut reader = make_reader(data);
347 assert_eq!(reader.read_string_block(0).unwrap(), "Hello");
348 }
349
350 #[test]
351 fn test_read_string_block_inline_0x00() {
352 let mut data = vec![0x00u8];
353 data.extend_from_slice(&encode_wz_ascii("Test"));
354 let mut reader = make_reader(data);
355 assert_eq!(reader.read_string_block(0).unwrap(), "Test");
356 }
357
358 #[test]
359 fn test_read_string_block_unknown_type_returns_empty() {
360 let data = vec![0xFFu8];
361 let mut reader = make_reader(data);
362 assert_eq!(reader.read_string_block(0).unwrap(), "");
363 }
364
365 #[test]
366 fn test_read_string_block_offset_0x01() {
367 let target_str = encode_wz_ascii("AtOffset");
371 let mut data = vec![0x01u8];
372 data.extend_from_slice(&10i32.to_le_bytes()); while data.len() < 10 {
375 data.push(0x00);
376 }
377 data.extend_from_slice(&target_str);
378 let mut reader = make_reader(data);
379 assert_eq!(reader.read_string_block(0).unwrap(), "AtOffset");
380 }
381
382 #[test]
385 fn test_position_starts_at_zero() {
386 let mut reader = make_reader(vec![0; 10]);
387 assert_eq!(reader.position().unwrap(), 0);
388 }
389
390 #[test]
391 fn test_seek_and_position_roundtrip() {
392 let mut reader = make_reader(vec![0; 100]);
393 reader.seek(42).unwrap();
394 assert_eq!(reader.position().unwrap(), 42);
395 reader.seek(0).unwrap();
396 assert_eq!(reader.position().unwrap(), 0);
397 }
398
399 #[test]
400 fn test_available_full() {
401 let mut reader = make_reader_with_header(vec![0; 100], 0, 100);
403 assert_eq!(reader.available().unwrap(), 100);
404 }
405
406 #[test]
407 fn test_available_after_read() {
408 let mut reader = make_reader_with_header(vec![0; 100], 0, 100);
409 reader.read_u8().unwrap(); assert_eq!(reader.available().unwrap(), 99);
411 }
412
413 #[test]
414 fn test_available_with_data_start() {
415 let mut reader = make_reader_with_header(vec![0; 100], 10, 50);
417 assert_eq!(reader.available().unwrap(), 60);
418 }
419
420 #[test]
425 fn test_read_string_at_offset() {
426 let encoded = encode_wz_ascii("TargetString");
427 let mut data = vec![0u8; 20];
428 data.extend_from_slice(&encoded);
429 data.extend_from_slice(&[0u8; 10]); let mut reader = make_reader(data);
432 reader.seek(5).unwrap();
433 let result = reader.read_string_at_offset(20).unwrap();
434 assert_eq!(result, "TargetString");
435 assert_eq!(reader.position().unwrap(), 5);
437 }
438
439 #[test]
440 fn test_read_string_at_offset_with_start_offset() {
441 let encoded = encode_wz_ascii("Offset");
443 let mut data = vec![0u8; 10];
444 data.extend_from_slice(&encoded);
445 let header = dummy_header(data.len() as u64);
446 let mut reader = WzBinaryReader::new(Cursor::new(data), [0; 4], header, 5);
447 let result = reader.read_string_at_offset(15).unwrap();
448 assert_eq!(result, "Offset");
449 }
450
451 #[test]
452 fn test_read_wz_offset_deterministic() {
453 let fstart: u32 = 60;
456 let hash: u32 = 713421;
457
458 let mut offset: u32 = (fstart.wrapping_sub(fstart)) ^ 0xFFFF_FFFF;
467 offset = offset.wrapping_mul(hash);
468 offset = offset.wrapping_sub(WZ_OFFSET_CONSTANT);
469 offset = offset.rotate_left(offset & 0x1F);
470 let pre_xor = offset;
471 let expected = pre_xor.wrapping_add(fstart.wrapping_mul(2)) as u64;
473
474 let data = vec![0u8; 64];
476 let mut reader = make_reader_with_header(data.clone(), fstart, data.len() as u64);
479 reader.hash = hash;
480 reader.seek(fstart as u64).unwrap();
481 let result = reader.read_wz_offset().unwrap();
482 assert_eq!(result, expected);
483 }
484}