nom_mpq/parser/
mod.rs

1//! Nom Parsing the MPQ file format
2//!
3//! NOTES:
4//! - All numbers in the MoPaQ format are in little endian byte order
5//! - Signed numbers use the two's complement system.
6//! - Structure members are listed in the following general form:
7//!   - offset from the beginning of the structure: data type(array size)
8//!     member nameĀ : member description
9
10use crate::{MPQParserError, MPQResult};
11
12use super::{MPQBuilder, MPQ};
13use crate::dbg_dmp;
14use nom::bytes::complete::{tag, take};
15use nom::multi::count;
16use nom::number::Endianness;
17use nom::Parser;
18use std::convert::From;
19use std::convert::TryFrom;
20use std::fs::File;
21use std::io::prelude::*;
22
23pub mod mpq_block_table_entry;
24pub mod mpq_file_header;
25pub mod mpq_file_header_ext;
26pub mod mpq_hash_table_entry;
27pub mod mpq_user_data;
28pub use mpq_block_table_entry::MPQBlockTableEntry;
29pub use mpq_file_header::MPQFileHeader;
30pub use mpq_file_header_ext::MPQFileHeaderExt;
31pub use mpq_hash_table_entry::MPQHashTableEntry;
32pub use mpq_user_data::MPQUserData;
33
34/// Final byte of the magic to identify particularly the Archive Header.
35pub const MPQ_ARCHIVE_HEADER_TYPE: u8 = 0x1a;
36/// Final byte of the magic to identify particularly the User Data.
37pub const MPQ_USER_DATA_HEADER_TYPE: u8 = 0x1b;
38/// The numeric values read are encoded in little endian LE
39pub const LITTLE_ENDIAN: Endianness = Endianness::Little;
40/// The characters used as displayable by [`peek_hex`]
41pub static CHARS: &[u8] = b"0123456789abcdef";
42
43/// Validates the first three bytes of the magic, it must be followed by either the
44/// [`MPQ_ARCHIVE_HEADER_TYPE`] or the [`MPQ_USER_DATA_HEADER_TYPE`]
45fn validate_magic(input: &[u8]) -> MPQResult<&[u8], &[u8]> {
46    dbg_dmp(tag(&b"MPQ"[..]), "tag")(input).map_err(|e| e.into())
47}
48
49/// Different HashTypes used in MPQ Archives, they are used to identify
50/// embedded filenames.
51#[derive(Debug, PartialEq, Copy, Clone)]
52pub enum MPQHashType {
53    /// A hashing of type TableOffset
54    TableOffset,
55    /// A Hashing of type A
56    HashA,
57    /// A Hashing of type B
58    HashB,
59    /// A Hashing of type Table
60    Table,
61}
62
63impl TryFrom<u32> for MPQHashType {
64    type Error = MPQParserError;
65    fn try_from(value: u32) -> Result<Self, Self::Error> {
66        match value {
67            0 => Ok(Self::TableOffset),
68            1 => Ok(Self::HashA),
69            2 => Ok(Self::HashB),
70            3 => Ok(Self::Table),
71            _ => Err(MPQParserError::InvalidHashType(value)),
72        }
73    }
74}
75
76impl TryFrom<MPQHashType> for u32 {
77    type Error = MPQParserError;
78    fn try_from(value: MPQHashType) -> Result<Self, Self::Error> {
79        match value {
80            MPQHashType::TableOffset => Ok(0),
81            MPQHashType::HashA => Ok(1),
82            MPQHashType::HashB => Ok(2),
83            MPQHashType::Table => Ok(3),
84        }
85    }
86}
87
88/// The type of sections that are available in an MPQ archive
89#[derive(Debug, PartialEq)]
90pub enum MPQSectionType {
91    /// The MPQ Section is of type User Data
92    UserData,
93    /// The MPQ Section is of type Header
94    Header,
95    /// The MPQ Section type is unknown.
96    Unknown,
97}
98
99impl From<&[u8]> for MPQSectionType {
100    fn from(input: &[u8]) -> Self {
101        if input.len() != 1 {
102            Self::Unknown
103        } else {
104            match input[0] {
105                MPQ_ARCHIVE_HEADER_TYPE => Self::Header,
106                MPQ_USER_DATA_HEADER_TYPE => Self::UserData,
107                _ => Self::Unknown,
108            }
109        }
110    }
111}
112
113/// A helper function that shows only up to the first 8 bytes of an u8 slice in
114/// xxd format.
115pub fn peek_hex(data: &[u8]) -> String {
116    let mut max_length = 8usize;
117    if data.len() < max_length {
118        max_length = data.len();
119    }
120    let data = &data[0..max_length];
121    let chunk_size = 8usize;
122    let mut v = Vec::with_capacity(data.len() * 3);
123    for chunk in data.chunks(chunk_size) {
124        v.push(b'[');
125        let mut even_space = false;
126        for &byte in chunk {
127            v.push(CHARS[(byte >> 4) as usize]);
128            v.push(CHARS[(byte & 0xf) as usize]);
129            if even_space {
130                v.push(b' ');
131            }
132            even_space = !even_space;
133        }
134        if chunk_size > chunk.len() {
135            for _j in 0..(chunk_size - chunk.len()) {
136                v.push(b' ');
137                v.push(b' ');
138                v.push(b' ');
139            }
140        }
141        v.push(b' ');
142
143        for &byte in chunk {
144            if (32..=126).contains(&byte) {
145                v.push(byte);
146            } else {
147                v.push(b'.');
148            }
149        }
150        v.push(b']');
151        v.push(b',');
152    }
153    v.pop();
154    String::from_utf8_lossy(&v[..]).into_owned()
155}
156
157/// Gets the header type from the MPQ file
158#[tracing::instrument(level = "trace", skip(input), fields(input = peek_hex(input)))]
159pub fn get_header_type(input: &[u8]) -> MPQResult<&[u8], MPQSectionType> {
160    let (input, _) = validate_magic(input)?;
161    let (input, mpq_type) = dbg_dmp(take(1usize), "mpq_type")(input)?;
162    let mpq_type = MPQSectionType::from(mpq_type);
163    Ok((input, mpq_type))
164}
165
166/// Reads the file headers, headers must contain the Archive File Header
167/// but they may optionally contain the User Data Headers.
168#[tracing::instrument(level = "trace", skip(input), fields(input = peek_hex(input)))]
169pub fn read_headers(input: &[u8]) -> MPQResult<&[u8], (MPQFileHeader, Option<MPQUserData>)> {
170    let mut user_data: Option<MPQUserData> = None;
171    let (input, mpq_type) = get_header_type(input)?;
172    let (input, archive_header) = match mpq_type {
173        MPQSectionType::UserData => {
174            let (input, parsed_user_data) = MPQUserData::parse(input)?;
175            let header_offset = parsed_user_data.archive_header_offset;
176            user_data = Some(parsed_user_data);
177            // If there is user data, it must be immediately followed by the Archive Header
178            let (input, mpq_type) = get_header_type(input)?;
179            assert!(MPQSectionType::Header == mpq_type);
180            MPQFileHeader::parse(input, header_offset as usize)?
181        }
182        MPQSectionType::Header => MPQFileHeader::parse(input, 0)?,
183        MPQSectionType::Unknown => {
184            tracing::error!("Unable to identify magic/section-type combination");
185            return MPQResult::Err(MPQParserError::MissingArchiveHeader);
186        }
187    };
188    Ok((input, (archive_header, user_data)))
189}
190
191/// Parses the whole input into an MPQ
192pub fn parse(orig_input: &[u8]) -> MPQResult<&[u8], MPQ> {
193    let builder = MPQBuilder::new();
194    let hash_table_key = builder.mpq_string_hash("(hash table)", MPQHashType::Table)?;
195    let block_table_key = builder.mpq_string_hash("(block table)", MPQHashType::Table)?;
196    let (tail, (archive_header, user_data)) = read_headers(orig_input)?;
197    // "seek" to the hash table offset.
198    let hash_table_offset = archive_header.hash_table_offset as usize + archive_header.offset;
199    let (_, encrypted_hash_table_data) = dbg_dmp(
200        take(16usize * archive_header.hash_table_entries as usize),
201        "encrypted_hash_table_data",
202    )(&orig_input[hash_table_offset..])?;
203    let decrypted_hash_table_data =
204        match builder.mpq_data_decrypt(encrypted_hash_table_data, hash_table_key) {
205            Ok((_, value)) => value,
206            Err(err) => {
207                tracing::warn!(
208                    "Unabe to use key: '{}' to decrypt MPQHashTable data: {}: {:?}",
209                    hash_table_key,
210                    peek_hex(encrypted_hash_table_data),
211                    err,
212                );
213                return Err(MPQParserError::DecryptionDataWithKey(
214                    hash_table_key.to_string(),
215                ));
216            }
217        };
218    let (_, hash_table_entries) = match count(
219        MPQHashTableEntry::parse,
220        archive_header.hash_table_entries as usize,
221    )
222    .parse(&decrypted_hash_table_data)
223    {
224        Ok((tail, value)) => (tail, value),
225        Err(err) => {
226            tracing::error!("Unable to use decrypted data: {:?}", err);
227            return Err(MPQParserError::IncompleteData);
228        }
229    };
230    // "seek" to the block table offset.
231    let block_table_offset = archive_header.block_table_offset as usize + archive_header.offset;
232    let (_, encrypted_block_table_data) = dbg_dmp(
233        take(16usize * archive_header.block_table_entries as usize),
234        "encrypted_block_table_data",
235    )(&orig_input[block_table_offset..])?;
236    let (_, decrypted_block_table_data) =
237        builder.mpq_data_decrypt(encrypted_block_table_data, block_table_key)?;
238    let (_, block_table_entries) = match count(
239        MPQBlockTableEntry::parse,
240        archive_header.block_table_entries as usize,
241    )
242    .parse(&decrypted_block_table_data)
243    {
244        Ok((tail, value)) => (tail, value),
245        Err(err) => {
246            tracing::error!("Unable to use decrypted data: {:?}", err);
247            return Err(MPQParserError::IncompleteData);
248        }
249    };
250    let mpq = builder
251        .with_archive_header(archive_header)
252        .with_user_data(user_data)
253        .with_hash_table(hash_table_entries)
254        .with_block_table(block_table_entries)
255        .build(orig_input)
256        .unwrap();
257    Ok((tail, mpq))
258}
259
260/// Convenience function to read a file to parse, mostly for testing.
261pub fn read_file(path: &str) -> Vec<u8> {
262    let mut f = File::open(path).unwrap();
263    let mut buffer: Vec<u8> = vec![];
264    // read the whole file
265    f.read_to_end(&mut buffer).unwrap();
266    buffer
267}
268
269#[cfg(test)]
270mod tests {
271    use super::mpq_file_header::tests::basic_file_header;
272    use super::mpq_user_data::tests::basic_user_header;
273    use super::*;
274    use test_log::test;
275
276    #[test]
277    fn it_parses_headers() {
278        // Let's build the MoPaQ progressively.
279        let mut user_data_header_input = basic_user_header();
280        let mut archive_header_input = basic_file_header();
281        user_data_header_input.append(&mut archive_header_input);
282        let (_input, (_archive_header, user_data_header)) =
283            read_headers(&user_data_header_input).unwrap();
284        assert!(user_data_header.is_some());
285    }
286    #[test]
287    fn it_generates_hashes() {
288        let builder = MPQBuilder::new();
289        let hash_table_key = builder
290            .mpq_string_hash("(hash table)", MPQHashType::Table)
291            .unwrap();
292        let block_table_key = builder
293            .mpq_string_hash("(block table)", MPQHashType::Table)
294            .unwrap();
295        assert_eq!(hash_table_key, 0xc3af3770);
296        assert_eq!(block_table_key, 0xec83b3a3);
297        let encrypted_hash_table_data = vec![
298            0x07, 0xf8, 0xb8, 0x55, 0x4f, 0xb4, 0x8e, 0x3c, 0x7c, 0xa8, 0x7b, 0xac, 0xae, 0x1a,
299            0x00, 0xe0, 0xc7, 0xc9, 0xdc, 0xc5, 0x3e, 0x6c, 0xfe, 0xc3, 0xa2, 0x02, 0x33, 0xa7,
300            0xb8, 0x1b, 0x6d, 0xb7, 0x83, 0x4f, 0x4c, 0x63, 0x15, 0x59, 0x4d, 0xf8, 0xda, 0x7e,
301            0x55, 0xfa, 0xe7, 0xb5, 0x2b, 0x0b, 0xe6, 0xd8, 0x76, 0xe6, 0xef, 0x30, 0x78, 0x8b,
302            0x70, 0x31, 0xdb, 0x02, 0xa2, 0x78, 0xb8, 0x89, 0x07, 0x90, 0x24, 0xb9, 0xb4, 0xec,
303            0xdc, 0xa3, 0x53, 0xe9, 0x4e, 0x95, 0xfc, 0x4e, 0x52, 0x15, 0x92, 0x59, 0xe3, 0xf1,
304            0x37, 0x9f, 0x4b, 0xec, 0x53, 0x8d, 0x7c, 0x04, 0x02, 0xdc, 0xe7, 0xcd, 0x95, 0xfe,
305            0x32, 0x21, 0x83, 0x94, 0x8d, 0x32, 0x23, 0x36, 0xa9, 0xd4, 0x76, 0xe1, 0x58, 0x3e,
306            0x12, 0x12, 0x33, 0x2a, 0xb1, 0x95, 0x30, 0x1e, 0xff, 0xac, 0x45, 0x0e, 0xb1, 0x11,
307            0xd5, 0x00, 0xc1, 0xed, 0x64, 0x49, 0xd4, 0xa3, 0x4b, 0x5a, 0xe0, 0x69, 0x0a, 0x5a,
308            0x35, 0x4a, 0x31, 0xd5, 0xa7, 0x53, 0xe3, 0xf8, 0xd8, 0x27, 0x11, 0x93, 0x86, 0x65,
309            0x21, 0xd5, 0x3d, 0xfd, 0xd6, 0x4d, 0x45, 0x62, 0xda, 0xc3, 0x7b, 0x0c, 0xab, 0xc7,
310            0x9d, 0x48, 0xbb, 0xbf, 0x15, 0x21, 0xfe, 0xe0, 0xca, 0x9e, 0x9a, 0x07, 0x3c, 0x91,
311            0x65, 0x26, 0xe1, 0xbb, 0x74, 0xeb, 0xce, 0x93, 0x32, 0x20, 0xad, 0x73, 0x59, 0x9c,
312            0x96, 0x24, 0xae, 0xfd, 0xf7, 0x99, 0xcf, 0xbb, 0x09, 0xf2, 0x39, 0x61, 0x4e, 0x36,
313            0xd5, 0x80, 0xdb, 0x5b, 0xa2, 0x61, 0x5a, 0x3d, 0xc2, 0x0b, 0xe3, 0x23, 0x30, 0x5a,
314            0xd4, 0xcd, 0xc6, 0x4a, 0x11, 0x47, 0xa1, 0x95, 0x7d, 0xbb, 0xd8, 0xcf, 0x76, 0xcf,
315            0xc9, 0x04, 0x13, 0x75, 0xba, 0x19, 0x98, 0xc8, 0xd6, 0xe3, 0xbe, 0x91, 0xb2, 0x1c,
316            0x6e, 0xb0, 0x8d, 0x87,
317        ];
318        let decrypted_hash_table_data = vec![
319            0xcb, 0x37, 0x84, 0xd3, 0xec, 0xea, 0xdf, 0x07, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00,
320            0x00, 0x00, 0x4b, 0xa5, 0xc2, 0xaa, 0x95, 0x2b, 0x76, 0xf4, 0x00, 0x00, 0x00, 0x00,
321            0x02, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
322            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
323            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
324            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x70, 0xb7, 0xe5, 0xc9,
325            0xb6, 0xf6, 0x18, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x7b, 0x08,
326            0x3c, 0x34, 0x82, 0x36, 0x8e, 0x27, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
327            0xa0, 0x1e, 0x2b, 0x3b, 0x57, 0xf0, 0x2e, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
328            0x00, 0x00, 0xdc, 0x8b, 0x7e, 0x5a, 0x5c, 0x3f, 0x25, 0xff, 0x00, 0x00, 0x00, 0x00,
329            0x01, 0x00, 0x00, 0x00, 0x10, 0x79, 0x65, 0xfd, 0xa7, 0x98, 0x9b, 0x4e, 0x00, 0x00,
330            0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x9c, 0xc2, 0x83, 0xd3, 0x92, 0x2e, 0x40, 0xef,
331            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
332            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
333            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
334            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
335            0xcf, 0xb0, 0xa8, 0x1d, 0x28, 0xff, 0xce, 0xa2, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00,
336            0x00, 0x00, 0x89, 0x22, 0x95, 0x31, 0xa3, 0xfa, 0x5f, 0x6a, 0x00, 0x00, 0x00, 0x00,
337            0x03, 0x00, 0x00, 0x00,
338        ];
339        let (_, decrypted_entries) = builder
340            .mpq_data_decrypt(&encrypted_hash_table_data, hash_table_key)
341            .unwrap();
342        assert_eq!(decrypted_entries, decrypted_hash_table_data);
343        let encrypted_block_table_data = vec![
344            0xa7, 0x67, 0x48, 0x3d, 0x7a, 0xd1, 0x08, 0xca, 0x4c, 0xbc, 0x35, 0xf8, 0x06, 0x04,
345            0x34, 0xe9, 0xbe, 0xb3, 0xb5, 0xb3, 0x7d, 0xeb, 0x0e, 0x11, 0x05, 0xb9, 0xf4, 0x17,
346            0xd3, 0x1b, 0x38, 0x21, 0x2f, 0xfd, 0x94, 0x62, 0xa1, 0xea, 0xe2, 0x2e, 0x29, 0xde,
347            0xe8, 0xdf, 0x4d, 0x84, 0x0b, 0x54, 0x88, 0xe4, 0x87, 0xdc, 0xcc, 0xca, 0xd6, 0xf6,
348            0xe6, 0xb4, 0x09, 0x0c, 0xf8, 0x27, 0xec, 0x87, 0x5d, 0x33, 0x7b, 0x3a, 0x9c, 0xb5,
349            0xd9, 0x80, 0x8c, 0x3c, 0x19, 0x81, 0x6c, 0x76, 0xec, 0xac, 0x53, 0x55, 0xd6, 0xa6,
350            0xf6, 0x7d, 0x18, 0xfb, 0xa9, 0x86, 0x30, 0x33, 0x29, 0xcb, 0x63, 0x11, 0xfa, 0xb5,
351            0xe6, 0x02, 0x7f, 0x23, 0x4b, 0xe9, 0xd8, 0x77, 0x0c, 0x4d, 0xc8, 0x1e, 0x41, 0xe9,
352            0xf2, 0x84, 0x6e, 0xc6, 0x75, 0xbd, 0x47, 0x8b, 0x04, 0x7d, 0x48, 0xd9, 0xc2, 0xa1,
353            0x02, 0x0d, 0x04, 0xdf, 0xb3, 0xc7, 0x82, 0xf5, 0x77, 0x37, 0x81, 0x9d, 0x7f, 0xfb,
354            0x65, 0x5d, 0x96, 0xe3, 0xa2, 0x0a, 0x68, 0x1b, 0xb6, 0x6b, 0x7c, 0x12, 0x3e, 0x7b,
355            0x63, 0x9c, 0x00, 0x7b, 0x7e, 0x23,
356        ];
357        let decrypted_block_table_data = vec![
358            0x2c, 0x00, 0x00, 0x00, 0xd7, 0x02, 0x00, 0x00, 0x7a, 0x03, 0x00, 0x00, 0x00, 0x02,
359            0x00, 0x81, 0x03, 0x03, 0x00, 0x00, 0x21, 0x03, 0x00, 0x00, 0xe9, 0x04, 0x00, 0x00,
360            0x00, 0x02, 0x00, 0x81, 0x24, 0x06, 0x00, 0x00, 0x30, 0xf6, 0x02, 0x00, 0x7d, 0x52,
361            0x07, 0x00, 0x00, 0x02, 0x00, 0x81, 0x54, 0xfc, 0x02, 0x00, 0xe2, 0x00, 0x00, 0x00,
362            0x4e, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x81, 0x36, 0xfd, 0x02, 0x00, 0x61, 0x00,
363            0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x81, 0x97, 0xfd, 0x02, 0x00,
364            0x2b, 0x05, 0x00, 0x00, 0xb2, 0x07, 0x00, 0x00, 0x00, 0x02, 0x00, 0x81, 0xc2, 0x02,
365            0x03, 0x00, 0x07, 0x19, 0x00, 0x00, 0x8f, 0x30, 0x00, 0x00, 0x00, 0x02, 0x00, 0x81,
366            0xc9, 0x1b, 0x03, 0x00, 0x15, 0x02, 0x00, 0x00, 0x60, 0x09, 0x00, 0x00, 0x00, 0x02,
367            0x00, 0x81, 0xde, 0x1d, 0x03, 0x00, 0x78, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00,
368            0x00, 0x02, 0x00, 0x81, 0x56, 0x1e, 0x03, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x20, 0x01,
369            0x00, 0x00, 0x00, 0x02, 0x00, 0x81,
370        ];
371        let (_, decrypted_entries) = builder
372            .mpq_data_decrypt(&encrypted_block_table_data, block_table_key)
373            .unwrap();
374        assert_eq!(decrypted_entries, decrypted_block_table_data);
375    }
376}