wow_mpq/
header.rs

1//! MPQ header structures and parsing
2
3use crate::security::{SecurityLimits, validate_header_security};
4use crate::{Error, Result};
5use byteorder::{LittleEndian, ReadBytesExt};
6use std::io::{Read, Seek, SeekFrom};
7
8use crate::debug::{format_size, hex_string};
9
10/// MPQ archive header signature ('MPQ\x1A')
11pub const MPQ_HEADER_SIGNATURE: u32 = 0x1A51504D;
12
13/// MPQ user data header signature ('MPQ\x1B')
14pub const MPQ_USERDATA_SIGNATURE: u32 = 0x1B51504D;
15
16/// Header alignment requirement (512 bytes)
17pub const HEADER_ALIGNMENT: u64 = 0x200;
18
19/// MPQ format version
20#[repr(u16)]
21#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
22pub enum FormatVersion {
23    /// Version 1 - Original format (32-byte header)
24    V1 = 0,
25    /// Version 2 - Burning Crusade (44-byte header)
26    V2 = 1,
27    /// Version 3 - Cataclysm Beta (68-byte header)
28    V3 = 2,
29    /// Version 4 - Cataclysm+ (208-byte header)
30    V4 = 3,
31}
32
33impl FormatVersion {
34    /// Get the header size for this version
35    pub fn header_size(&self) -> u32 {
36        match self {
37            FormatVersion::V1 => 0x20, // 32 bytes
38            FormatVersion::V2 => 0x2C, // 44 bytes
39            FormatVersion::V3 => 0x44, // 68 bytes
40            FormatVersion::V4 => 0xD0, // 208 bytes
41        }
42    }
43
44    /// Create from raw version number
45    pub fn from_raw(raw: u16) -> Option<Self> {
46        match raw {
47            0 => Some(FormatVersion::V1),
48            1 => Some(FormatVersion::V2),
49            2 => Some(FormatVersion::V3),
50            3 => Some(FormatVersion::V4),
51            _ => None,
52        }
53    }
54}
55
56/// MPQ user data header (optional, appears before main header)
57#[derive(Debug, Clone)]
58pub struct UserDataHeader {
59    /// Maximum size of the user data
60    pub user_data_size: u32,
61    /// Offset of the MPQ header, relative to the beginning of this header
62    pub header_offset: u32,
63    /// Size of user data header (commonly used in SC2 maps)
64    pub user_data_header_size: u32,
65}
66
67/// Main MPQ header structure
68#[derive(Debug, Clone)]
69pub struct MpqHeader {
70    /// Size of the archive header
71    pub header_size: u32,
72    /// Size of MPQ archive (deprecated in v2+)
73    pub archive_size: u32,
74    /// Format version
75    pub format_version: FormatVersion,
76    /// Block size (power of two exponent)
77    pub block_size: u16,
78    /// Offset to the hash table
79    pub hash_table_pos: u32,
80    /// Offset to the block table
81    pub block_table_pos: u32,
82    /// Number of entries in the hash table
83    pub hash_table_size: u32,
84    /// Number of entries in the block table
85    pub block_table_size: u32,
86
87    // Version 2+ fields
88    /// Extended block table position
89    pub hi_block_table_pos: Option<u64>,
90    /// High 16 bits of hash table offset
91    pub hash_table_pos_hi: Option<u16>,
92    /// High 16 bits of block table offset
93    pub block_table_pos_hi: Option<u16>,
94
95    // Version 3+ fields
96    /// 64-bit archive size
97    pub archive_size_64: Option<u64>,
98    /// Position of BET table
99    pub bet_table_pos: Option<u64>,
100    /// Position of HET table
101    pub het_table_pos: Option<u64>,
102
103    // Version 4 fields
104    /// Compressed sizes and MD5 hashes
105    pub v4_data: Option<MpqHeaderV4Data>,
106}
107
108/// Version 4 specific header data
109#[derive(Debug, Clone)]
110pub struct MpqHeaderV4Data {
111    /// Compressed size of hash table
112    pub hash_table_size_64: u64,
113    /// Compressed size of block table
114    pub block_table_size_64: u64,
115    /// Compressed size of hi-block table
116    pub hi_block_table_size_64: u64,
117    /// Compressed size of HET table
118    pub het_table_size_64: u64,
119    /// Compressed size of BET table
120    pub bet_table_size_64: u64,
121    /// Size of raw data chunk for MD5
122    pub raw_chunk_size: u32,
123    /// MD5 of block table
124    pub md5_block_table: [u8; 16],
125    /// MD5 of hash table
126    pub md5_hash_table: [u8; 16],
127    /// MD5 of hi-block table
128    pub md5_hi_block_table: [u8; 16],
129    /// MD5 of BET table
130    pub md5_bet_table: [u8; 16],
131    /// MD5 of HET table
132    pub md5_het_table: [u8; 16],
133    /// MD5 of MPQ header
134    pub md5_mpq_header: [u8; 16],
135}
136
137impl MpqHeader {
138    /// Read an MPQ header from the given reader with security validation
139    pub fn read<R: Read + Seek>(reader: &mut R) -> Result<Self> {
140        Self::read_with_limits(reader, &SecurityLimits::default())
141    }
142
143    /// Read an MPQ header with custom security limits
144    pub fn read_with_limits<R: Read + Seek>(
145        reader: &mut R,
146        limits: &SecurityLimits,
147    ) -> Result<Self> {
148        // Read the signature
149        let signature = reader.read_u32::<LittleEndian>()?;
150        if signature != MPQ_HEADER_SIGNATURE {
151            return Err(Error::invalid_format("Invalid MPQ header signature"));
152        }
153
154        // Remember where the header starts (after signature)
155        let header_start = reader.stream_position()? - 4;
156
157        // Read basic header fields
158        let header_size = reader.read_u32::<LittleEndian>()?;
159        let archive_size = reader.read_u32::<LittleEndian>()?;
160        let format_version_raw = reader.read_u16::<LittleEndian>()?;
161        let block_size = reader.read_u16::<LittleEndian>()?;
162        let hash_table_pos = reader.read_u32::<LittleEndian>()?;
163        let block_table_pos = reader.read_u32::<LittleEndian>()?;
164        let hash_table_size = reader.read_u32::<LittleEndian>()?;
165        let block_table_size = reader.read_u32::<LittleEndian>()?;
166
167        let format_version = FormatVersion::from_raw(format_version_raw)
168            .ok_or(Error::UnsupportedVersion(format_version_raw))?;
169
170        // Security validation - validate header before proceeding
171        validate_header_security(
172            signature,
173            header_size,
174            archive_size,
175            format_version_raw,
176            block_size,
177            hash_table_pos,
178            block_table_pos,
179            hash_table_size,
180            block_table_size,
181            limits,
182        )?;
183
184        // Validate header size
185        if header_size < format_version.header_size() {
186            return Err(Error::invalid_format(format!(
187                "Header size {header_size} too small for version {format_version:?}"
188            )));
189        }
190
191        let mut header = MpqHeader {
192            header_size,
193            archive_size,
194            format_version,
195            block_size,
196            hash_table_pos,
197            block_table_pos,
198            hash_table_size,
199            block_table_size,
200            hi_block_table_pos: None,
201            hash_table_pos_hi: None,
202            block_table_pos_hi: None,
203            archive_size_64: None,
204            bet_table_pos: None,
205            het_table_pos: None,
206            v4_data: None,
207        };
208
209        // Read version-specific fields
210        if format_version >= FormatVersion::V2 {
211            // Version 2+ fields
212            header.hi_block_table_pos = Some(reader.read_u64::<LittleEndian>()?);
213            header.hash_table_pos_hi = Some(reader.read_u16::<LittleEndian>()?);
214            header.block_table_pos_hi = Some(reader.read_u16::<LittleEndian>()?);
215        }
216
217        if format_version >= FormatVersion::V3 {
218            // Version 3+ fields
219            header.archive_size_64 = Some(reader.read_u64::<LittleEndian>()?);
220            header.bet_table_pos = Some(reader.read_u64::<LittleEndian>()?);
221            header.het_table_pos = Some(reader.read_u64::<LittleEndian>()?);
222        }
223
224        // Check if we have V4 data based on header size
225        // Some V3 archives have 208-byte headers with V4 data
226        if format_version >= FormatVersion::V3 && header_size >= 208 {
227            // First check if we have enough data to read V4 fields
228            let current_pos = reader.stream_position()?;
229            let remaining_header = (header_start + header_size as u64).saturating_sub(current_pos);
230            let v4_data_size = 8 * 5 + 4 + 16 * 6; // 5 u64s + 1 u32 + 6 MD5s
231
232            if remaining_header < v4_data_size as u64 {
233                log::warn!(
234                    "Header size is 208 but not enough space for V4 data. Remaining: {remaining_header}, needed: {v4_data_size}"
235                );
236            } else {
237                // Version 4 fields
238                let mut v4_data = MpqHeaderV4Data {
239                    hash_table_size_64: reader.read_u64::<LittleEndian>()?,
240                    block_table_size_64: reader.read_u64::<LittleEndian>()?,
241                    hi_block_table_size_64: reader.read_u64::<LittleEndian>()?,
242                    het_table_size_64: reader.read_u64::<LittleEndian>()?,
243                    bet_table_size_64: reader.read_u64::<LittleEndian>()?,
244                    raw_chunk_size: reader.read_u32::<LittleEndian>()?,
245                    md5_block_table: [0; 16],
246                    md5_hash_table: [0; 16],
247                    md5_hi_block_table: [0; 16],
248                    md5_bet_table: [0; 16],
249                    md5_het_table: [0; 16],
250                    md5_mpq_header: [0; 16],
251                };
252
253                reader.read_exact(&mut v4_data.md5_block_table)?;
254                reader.read_exact(&mut v4_data.md5_hash_table)?;
255                reader.read_exact(&mut v4_data.md5_hi_block_table)?;
256                reader.read_exact(&mut v4_data.md5_bet_table)?;
257                reader.read_exact(&mut v4_data.md5_het_table)?;
258                reader.read_exact(&mut v4_data.md5_mpq_header)?;
259
260                header.v4_data = Some(v4_data);
261            }
262        }
263
264        Ok(header)
265    }
266
267    /// Get the actual archive size (using 64-bit value if available)
268    pub fn get_archive_size(&self) -> u64 {
269        self.archive_size_64.unwrap_or(self.archive_size as u64)
270    }
271
272    /// Get the full hash table position
273    pub fn get_hash_table_pos(&self) -> u64 {
274        if let Some(hi) = self.hash_table_pos_hi {
275            ((hi as u64) << 32) | (self.hash_table_pos as u64)
276        } else {
277            self.hash_table_pos as u64
278        }
279    }
280
281    /// Get the full block table position
282    pub fn get_block_table_pos(&self) -> u64 {
283        if let Some(hi) = self.block_table_pos_hi {
284            ((hi as u64) << 32) | (self.block_table_pos as u64)
285        } else {
286            self.block_table_pos as u64
287        }
288    }
289
290    /// Calculate the sector size from block size
291    pub fn sector_size(&self) -> usize {
292        512 << self.block_size
293    }
294}
295
296/// Find the MPQ header in a file
297pub fn find_header<R: Read + Seek>(
298    reader: &mut R,
299) -> Result<(u64, Option<UserDataHeader>, MpqHeader)> {
300    find_header_with_limits(reader, &SecurityLimits::default())
301}
302
303/// Find MPQ header with custom security limits
304pub fn find_header_with_limits<R: Read + Seek>(
305    reader: &mut R,
306    limits: &SecurityLimits,
307) -> Result<(u64, Option<UserDataHeader>, MpqHeader)> {
308    let mut offset = 0u64;
309    let file_size = reader.seek(SeekFrom::End(0))?;
310    reader.seek(SeekFrom::Start(0))?;
311
312    loop {
313        if offset >= file_size {
314            return Err(Error::invalid_format("No MPQ header found"));
315        }
316
317        reader.seek(SeekFrom::Start(offset))?;
318
319        // Try to read a signature
320        let signature = match reader.read_u32::<LittleEndian>() {
321            Ok(sig) => sig,
322            Err(_) => {
323                offset += HEADER_ALIGNMENT;
324                continue;
325            }
326        };
327
328        match signature {
329            MPQ_HEADER_SIGNATURE => {
330                // Found standard MPQ header
331                reader.seek(SeekFrom::Start(offset))?;
332                let header = MpqHeader::read_with_limits(reader, limits)?;
333                return Ok((offset, None, header));
334            }
335            MPQ_USERDATA_SIGNATURE => {
336                // Found user data header
337                let user_data_size = reader.read_u32::<LittleEndian>()?;
338                let header_offset = reader.read_u32::<LittleEndian>()?;
339                let user_data_header_size = reader.read_u32::<LittleEndian>()?;
340
341                let user_data = UserDataHeader {
342                    user_data_size,
343                    header_offset,
344                    user_data_header_size,
345                };
346
347                // Calculate actual header position
348                let mpq_offset = offset + header_offset as u64;
349                if mpq_offset < file_size {
350                    reader.seek(SeekFrom::Start(mpq_offset))?;
351
352                    // Verify there's an MPQ header at the calculated position
353                    let mpq_sig = reader.read_u32::<LittleEndian>()?;
354                    if mpq_sig == MPQ_HEADER_SIGNATURE {
355                        reader.seek(SeekFrom::Start(mpq_offset))?;
356                        let header = MpqHeader::read_with_limits(reader, limits)?;
357                        return Ok((mpq_offset, Some(user_data), header));
358                    }
359                }
360            }
361            _ => {}
362        }
363
364        // Move to next potential header position
365        offset += HEADER_ALIGNMENT;
366    }
367}
368
369impl MpqHeader {
370    /// Debug dump the MPQ header
371    pub fn debug_dump(&self) -> String {
372        let mut output = String::new();
373        output.push_str("MPQ Header Debug Dump\n");
374        output.push_str("====================\n");
375        output.push_str(&format!("Header Size: {}\n", self.header_size));
376        output.push_str(&format!(
377            "Archive Size (v1): {}\n",
378            format_size(self.archive_size as u64)
379        ));
380        output.push_str(&format!(
381            "Format Version: {:?} (v{})\n",
382            self.format_version,
383            self.format_version as u16 + 1
384        ));
385        output.push_str(&format!(
386            "Block Size: {} (sector size: {})\n",
387            self.block_size,
388            format_size((512 << self.block_size) as u64)
389        ));
390        output.push_str(&format!(
391            "Hash Table: offset=0x{:08X}, size={} entries\n",
392            self.hash_table_pos, self.hash_table_size
393        ));
394        output.push_str(&format!(
395            "Block Table: offset=0x{:08X}, size={} entries\n",
396            self.block_table_pos, self.block_table_size
397        ));
398
399        if let Some(hi_pos) = self.hi_block_table_pos {
400            output.push_str("\nVersion 2+ fields:\n");
401            output.push_str(&format!("  Hi-Block Table: offset=0x{hi_pos:016X}\n"));
402            output.push_str(&format!(
403                "  Hash Table High: 0x{:04X}\n",
404                self.hash_table_pos_hi.unwrap_or(0)
405            ));
406            output.push_str(&format!(
407                "  Block Table High: 0x{:04X}\n",
408                self.block_table_pos_hi.unwrap_or(0)
409            ));
410        }
411
412        if let Some(size64) = self.archive_size_64 {
413            output.push_str("\nVersion 3+ fields:\n");
414            output.push_str(&format!(
415                "  Archive Size (64-bit): {}\n",
416                format_size(size64)
417            ));
418            output.push_str(&format!(
419                "  BET Table: offset=0x{:016X}\n",
420                self.bet_table_pos.unwrap_or(0)
421            ));
422            output.push_str(&format!(
423                "  HET Table: offset=0x{:016X}\n",
424                self.het_table_pos.unwrap_or(0)
425            ));
426        }
427
428        if let Some(ref v4) = self.v4_data {
429            output.push_str("\nVersion 4 fields:\n");
430            output.push_str(&format!(
431                "  Hash Table Size: {}\n",
432                format_size(v4.hash_table_size_64)
433            ));
434            output.push_str(&format!(
435                "  Block Table Size: {}\n",
436                format_size(v4.block_table_size_64)
437            ));
438            output.push_str(&format!(
439                "  Hi-Block Table Size: {}\n",
440                format_size(v4.hi_block_table_size_64)
441            ));
442            output.push_str(&format!(
443                "  HET Table Size: {}\n",
444                format_size(v4.het_table_size_64)
445            ));
446            output.push_str(&format!(
447                "  BET Table Size: {}\n",
448                format_size(v4.bet_table_size_64)
449            ));
450            output.push_str(&format!(
451                "  Raw Chunk Size: {}\n",
452                format_size(v4.raw_chunk_size as u64)
453            ));
454            output.push_str("  MD5 Hashes:\n");
455            output.push_str(&format!(
456                "    Block Table: {}\n",
457                hex_string(&v4.md5_block_table, 16)
458            ));
459            output.push_str(&format!(
460                "    Hash Table: {}\n",
461                hex_string(&v4.md5_hash_table, 16)
462            ));
463            output.push_str(&format!(
464                "    Hi-Block Table: {}\n",
465                hex_string(&v4.md5_hi_block_table, 16)
466            ));
467            output.push_str(&format!(
468                "    BET Table: {}\n",
469                hex_string(&v4.md5_bet_table, 16)
470            ));
471            output.push_str(&format!(
472                "    HET Table: {}\n",
473                hex_string(&v4.md5_het_table, 16)
474            ));
475            output.push_str(&format!(
476                "    MPQ Header: {}\n",
477                hex_string(&v4.md5_mpq_header, 16)
478            ));
479        }
480
481        output
482    }
483}
484
485impl UserDataHeader {
486    /// Debug dump the user data header
487    pub fn debug_dump(&self) -> String {
488        let mut output = String::new();
489        output.push_str("MPQ User Data Header\n");
490        output.push_str("===================\n");
491        output.push_str(&format!(
492            "User Data Size: {}\n",
493            format_size(self.user_data_size as u64)
494        ));
495        output.push_str(&format!("Header Offset: 0x{:08X}\n", self.header_offset));
496        output.push_str(&format!(
497            "User Data Header Size: {}\n",
498            self.user_data_header_size
499        ));
500        output
501    }
502}