nom_mpq/parser/
mod.rs

1//! Nom Parsing the MPQ file format
2//!
3//! NOTES:
4//! - All numbers in the MoPaQ format are in little endian byte order
5//! - Signed numbers use the two's complement system.
6//! - Structure members are listed in the following general form:
7//!   - offset from the beginning of the structure: data type(array size)
8//!     member nameĀ : member description
9
10use crate::{MPQParserError, MPQResult};
11
12use super::{MPQBuilder, MPQ};
13use nom::bytes::complete::{tag, take};
14use nom::error::dbg_dmp;
15use nom::multi::count;
16use nom::number::Endianness;
17use std::convert::From;
18use std::convert::TryFrom;
19use std::fs::File;
20use std::io::prelude::*;
21
22pub mod mpq_block_table_entry;
23pub mod mpq_file_header;
24pub mod mpq_file_header_ext;
25pub mod mpq_hash_table_entry;
26pub mod mpq_user_data;
27pub use mpq_block_table_entry::MPQBlockTableEntry;
28pub use mpq_file_header::MPQFileHeader;
29pub use mpq_file_header_ext::MPQFileHeaderExt;
30pub use mpq_hash_table_entry::MPQHashTableEntry;
31pub use mpq_user_data::MPQUserData;
32
33/// Final byte of the magic to identify particularly the Archive Header.
34pub const MPQ_ARCHIVE_HEADER_TYPE: u8 = 0x1a;
35/// Final byte of the magic to identify particularly the User Data.
36pub const MPQ_USER_DATA_HEADER_TYPE: u8 = 0x1b;
37/// The numeric values read are encoded in little endian LE
38pub const LITTLE_ENDIAN: Endianness = Endianness::Little;
39/// The characters used as displayable by [`peek_hex`]
40pub static CHARS: &[u8] = b"0123456789abcdef";
41
42/// Validates the first three bytes of the magic, it must be followed by either the
43/// [`MPQ_ARCHIVE_HEADER_TYPE`] or the [`MPQ_USER_DATA_HEADER_TYPE`]
44fn validate_magic(input: &[u8]) -> MPQResult<&[u8], &[u8]> {
45    dbg_dmp(tag(b"MPQ"), "tag")(input).map_err(|e| e.into())
46}
47
48/// Different HashTypes used in MPQ Archives, they are used to identify
49/// embedded filenames.
50#[derive(Debug, PartialEq, Copy, Clone)]
51pub enum MPQHashType {
52    /// A hashing of type TableOffset
53    TableOffset,
54    /// A Hashing of type A
55    HashA,
56    /// A Hashing of type B
57    HashB,
58    /// A Hashing of type Table
59    Table,
60}
61
62impl TryFrom<u32> for MPQHashType {
63    type Error = MPQParserError;
64    fn try_from(value: u32) -> Result<Self, Self::Error> {
65        match value {
66            0 => Ok(Self::TableOffset),
67            1 => Ok(Self::HashA),
68            2 => Ok(Self::HashB),
69            3 => Ok(Self::Table),
70            _ => Err(MPQParserError::InvalidHashType(value)),
71        }
72    }
73}
74
75impl TryFrom<MPQHashType> for u32 {
76    type Error = MPQParserError;
77    fn try_from(value: MPQHashType) -> Result<Self, Self::Error> {
78        match value {
79            MPQHashType::TableOffset => Ok(0),
80            MPQHashType::HashA => Ok(1),
81            MPQHashType::HashB => Ok(2),
82            MPQHashType::Table => Ok(3),
83        }
84    }
85}
86
87/// The type of sections that are available in an MPQ archive
88#[derive(Debug, PartialEq)]
89pub enum MPQSectionType {
90    /// The MPQ Section is of type User Data
91    UserData,
92    /// The MPQ Section is of type Header
93    Header,
94    /// The MPQ Section type is unknown.
95    Unknown,
96}
97
98impl From<&[u8]> for MPQSectionType {
99    fn from(input: &[u8]) -> Self {
100        if input.len() != 1 {
101            Self::Unknown
102        } else {
103            match input[0] {
104                MPQ_ARCHIVE_HEADER_TYPE => Self::Header,
105                MPQ_USER_DATA_HEADER_TYPE => Self::UserData,
106                _ => Self::Unknown,
107            }
108        }
109    }
110}
111
112/// A helper function that shows only up to the first 8 bytes of an u8 slice in
113/// xxd format.
114pub fn peek_hex(data: &[u8]) -> String {
115    let mut max_length = 8usize;
116    if data.len() < max_length {
117        max_length = data.len();
118    }
119    let data = &data[0..max_length];
120    let chunk_size = 8usize;
121    let mut v = Vec::with_capacity(data.len() * 3);
122    for chunk in data.chunks(chunk_size) {
123        v.push(b'[');
124        let mut even_space = false;
125        for &byte in chunk {
126            v.push(CHARS[(byte >> 4) as usize]);
127            v.push(CHARS[(byte & 0xf) as usize]);
128            if even_space {
129                v.push(b' ');
130            }
131            even_space = !even_space;
132        }
133        if chunk_size > chunk.len() {
134            for _j in 0..(chunk_size - chunk.len()) {
135                v.push(b' ');
136                v.push(b' ');
137                v.push(b' ');
138            }
139        }
140        v.push(b' ');
141
142        for &byte in chunk {
143            if (32..=126).contains(&byte) {
144                v.push(byte);
145            } else {
146                v.push(b'.');
147            }
148        }
149        v.push(b']');
150        v.push(b',');
151    }
152    v.pop();
153    String::from_utf8_lossy(&v[..]).into_owned()
154}
155
156/// Gets the header type from the MPQ file
157#[tracing::instrument(level = "trace", skip(input), fields(input = peek_hex(input)))]
158pub fn get_header_type(input: &[u8]) -> MPQResult<&[u8], MPQSectionType> {
159    let (input, _) = validate_magic(input)?;
160    let (input, mpq_type) = dbg_dmp(take(1usize), "mpq_type")(input)?;
161    let mpq_type = MPQSectionType::from(mpq_type);
162    Ok((input, mpq_type))
163}
164
165/// Reads the file headers, headers must contain the Archive File Header
166/// but they may optionally contain the User Data Headers.
167#[tracing::instrument(level = "trace", skip(input), fields(input = peek_hex(input)))]
168pub fn read_headers(input: &[u8]) -> MPQResult<&[u8], (MPQFileHeader, Option<MPQUserData>)> {
169    let mut user_data: Option<MPQUserData> = None;
170    let (input, mpq_type) = get_header_type(input)?;
171    let (input, archive_header) = match mpq_type {
172        MPQSectionType::UserData => {
173            let (input, parsed_user_data) = MPQUserData::parse(input)?;
174            let header_offset = parsed_user_data.archive_header_offset;
175            user_data = Some(parsed_user_data);
176            // If there is user data, it must be immediately followed by the Archive Header
177            let (input, mpq_type) = get_header_type(input)?;
178            assert!(MPQSectionType::Header == mpq_type);
179            MPQFileHeader::parse(input, header_offset as usize)?
180        }
181        MPQSectionType::Header => MPQFileHeader::parse(input, 0)?,
182        MPQSectionType::Unknown => {
183            tracing::error!("Unable to identify magic/section-type combination");
184            return MPQResult::Err(MPQParserError::MissingArchiveHeader);
185        }
186    };
187    Ok((input, (archive_header, user_data)))
188}
189
190/// Parses the whole input into an MPQ
191pub fn parse(orig_input: &[u8]) -> MPQResult<&[u8], MPQ> {
192    let builder = MPQBuilder::new();
193    let hash_table_key = builder.mpq_string_hash("(hash table)", MPQHashType::Table)?;
194    let block_table_key = builder.mpq_string_hash("(block table)", MPQHashType::Table)?;
195    let (tail, (archive_header, user_data)) = read_headers(orig_input)?;
196    // "seek" to the hash table offset.
197    let hash_table_offset = archive_header.hash_table_offset as usize + archive_header.offset;
198    let (_, encrypted_hash_table_data) = dbg_dmp(
199        take(16usize * archive_header.hash_table_entries as usize),
200        "encrypted_hash_table_data",
201    )(&orig_input[hash_table_offset..])?;
202    let decrypted_hash_table_data =
203        match builder.mpq_data_decrypt(encrypted_hash_table_data, hash_table_key) {
204            Ok((_, value)) => value,
205            Err(err) => {
206                tracing::warn!(
207                    "Unabe to use key: '{}' to decrypt MPQHashTable data: {}: {:?}",
208                    hash_table_key,
209                    peek_hex(encrypted_hash_table_data),
210                    err,
211                );
212                return Err(MPQParserError::DecryptionDataWithKey(
213                    hash_table_key.to_string(),
214                ));
215            }
216        };
217    let (_, hash_table_entries) = match count(
218        MPQHashTableEntry::parse,
219        archive_header.hash_table_entries as usize,
220    )(&decrypted_hash_table_data)
221    {
222        Ok((tail, value)) => (tail, value),
223        Err(err) => {
224            tracing::error!("Unable to use decrypted data: {:?}", err);
225            return Err(MPQParserError::IncompleteData);
226        }
227    };
228    // "seek" to the block table offset.
229    let block_table_offset = archive_header.block_table_offset as usize + archive_header.offset;
230    let (_, encrypted_block_table_data) = dbg_dmp(
231        take(16usize * archive_header.block_table_entries as usize),
232        "encrypted_block_table_data",
233    )(&orig_input[block_table_offset..])?;
234    let (_, decrypted_block_table_data) =
235        builder.mpq_data_decrypt(encrypted_block_table_data, block_table_key)?;
236    let (_, block_table_entries) = match count(
237        MPQBlockTableEntry::parse,
238        archive_header.block_table_entries as usize,
239    )(&decrypted_block_table_data)
240    {
241        Ok((tail, value)) => (tail, value),
242        Err(err) => {
243            tracing::error!("Unable to use decrypted data: {:?}", err);
244            return Err(MPQParserError::IncompleteData);
245        }
246    };
247    let mpq = builder
248        .with_archive_header(archive_header)
249        .with_user_data(user_data)
250        .with_hash_table(hash_table_entries)
251        .with_block_table(block_table_entries)
252        .build(orig_input)
253        .unwrap();
254    Ok((tail, mpq))
255}
256
257/// Convenience function to read a file to parse, mostly for testing.
258pub fn read_file(path: &str) -> Vec<u8> {
259    let mut f = File::open(path).unwrap();
260    let mut buffer: Vec<u8> = vec![];
261    // read the whole file
262    f.read_to_end(&mut buffer).unwrap();
263    buffer
264}
265
266#[cfg(test)]
267mod tests {
268    use super::mpq_file_header::tests::basic_file_header;
269    use super::mpq_user_data::tests::basic_user_header;
270    use super::*;
271    use test_log::test;
272
273    #[test]
274    fn it_parses_headers() {
275        // Let's build the MoPaQ progressively.
276        let mut user_data_header_input = basic_user_header();
277        let mut archive_header_input = basic_file_header();
278        user_data_header_input.append(&mut archive_header_input);
279        let (_input, (_archive_header, user_data_header)) =
280            read_headers(&user_data_header_input).unwrap();
281        assert!(user_data_header.is_some());
282    }
283    #[test]
284    fn it_generates_hashes() {
285        let builder = MPQBuilder::new();
286        let hash_table_key = builder
287            .mpq_string_hash("(hash table)", MPQHashType::Table)
288            .unwrap();
289        let block_table_key = builder
290            .mpq_string_hash("(block table)", MPQHashType::Table)
291            .unwrap();
292        assert_eq!(hash_table_key, 0xc3af3770);
293        assert_eq!(block_table_key, 0xec83b3a3);
294        let encrypted_hash_table_data = vec![
295            0x07, 0xf8, 0xb8, 0x55, 0x4f, 0xb4, 0x8e, 0x3c, 0x7c, 0xa8, 0x7b, 0xac, 0xae, 0x1a,
296            0x00, 0xe0, 0xc7, 0xc9, 0xdc, 0xc5, 0x3e, 0x6c, 0xfe, 0xc3, 0xa2, 0x02, 0x33, 0xa7,
297            0xb8, 0x1b, 0x6d, 0xb7, 0x83, 0x4f, 0x4c, 0x63, 0x15, 0x59, 0x4d, 0xf8, 0xda, 0x7e,
298            0x55, 0xfa, 0xe7, 0xb5, 0x2b, 0x0b, 0xe6, 0xd8, 0x76, 0xe6, 0xef, 0x30, 0x78, 0x8b,
299            0x70, 0x31, 0xdb, 0x02, 0xa2, 0x78, 0xb8, 0x89, 0x07, 0x90, 0x24, 0xb9, 0xb4, 0xec,
300            0xdc, 0xa3, 0x53, 0xe9, 0x4e, 0x95, 0xfc, 0x4e, 0x52, 0x15, 0x92, 0x59, 0xe3, 0xf1,
301            0x37, 0x9f, 0x4b, 0xec, 0x53, 0x8d, 0x7c, 0x04, 0x02, 0xdc, 0xe7, 0xcd, 0x95, 0xfe,
302            0x32, 0x21, 0x83, 0x94, 0x8d, 0x32, 0x23, 0x36, 0xa9, 0xd4, 0x76, 0xe1, 0x58, 0x3e,
303            0x12, 0x12, 0x33, 0x2a, 0xb1, 0x95, 0x30, 0x1e, 0xff, 0xac, 0x45, 0x0e, 0xb1, 0x11,
304            0xd5, 0x00, 0xc1, 0xed, 0x64, 0x49, 0xd4, 0xa3, 0x4b, 0x5a, 0xe0, 0x69, 0x0a, 0x5a,
305            0x35, 0x4a, 0x31, 0xd5, 0xa7, 0x53, 0xe3, 0xf8, 0xd8, 0x27, 0x11, 0x93, 0x86, 0x65,
306            0x21, 0xd5, 0x3d, 0xfd, 0xd6, 0x4d, 0x45, 0x62, 0xda, 0xc3, 0x7b, 0x0c, 0xab, 0xc7,
307            0x9d, 0x48, 0xbb, 0xbf, 0x15, 0x21, 0xfe, 0xe0, 0xca, 0x9e, 0x9a, 0x07, 0x3c, 0x91,
308            0x65, 0x26, 0xe1, 0xbb, 0x74, 0xeb, 0xce, 0x93, 0x32, 0x20, 0xad, 0x73, 0x59, 0x9c,
309            0x96, 0x24, 0xae, 0xfd, 0xf7, 0x99, 0xcf, 0xbb, 0x09, 0xf2, 0x39, 0x61, 0x4e, 0x36,
310            0xd5, 0x80, 0xdb, 0x5b, 0xa2, 0x61, 0x5a, 0x3d, 0xc2, 0x0b, 0xe3, 0x23, 0x30, 0x5a,
311            0xd4, 0xcd, 0xc6, 0x4a, 0x11, 0x47, 0xa1, 0x95, 0x7d, 0xbb, 0xd8, 0xcf, 0x76, 0xcf,
312            0xc9, 0x04, 0x13, 0x75, 0xba, 0x19, 0x98, 0xc8, 0xd6, 0xe3, 0xbe, 0x91, 0xb2, 0x1c,
313            0x6e, 0xb0, 0x8d, 0x87,
314        ];
315        let decrypted_hash_table_data = vec![
316            0xcb, 0x37, 0x84, 0xd3, 0xec, 0xea, 0xdf, 0x07, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00,
317            0x00, 0x00, 0x4b, 0xa5, 0xc2, 0xaa, 0x95, 0x2b, 0x76, 0xf4, 0x00, 0x00, 0x00, 0x00,
318            0x02, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
319            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
320            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
321            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x70, 0xb7, 0xe5, 0xc9,
322            0xb6, 0xf6, 0x18, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x7b, 0x08,
323            0x3c, 0x34, 0x82, 0x36, 0x8e, 0x27, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
324            0xa0, 0x1e, 0x2b, 0x3b, 0x57, 0xf0, 0x2e, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
325            0x00, 0x00, 0xdc, 0x8b, 0x7e, 0x5a, 0x5c, 0x3f, 0x25, 0xff, 0x00, 0x00, 0x00, 0x00,
326            0x01, 0x00, 0x00, 0x00, 0x10, 0x79, 0x65, 0xfd, 0xa7, 0x98, 0x9b, 0x4e, 0x00, 0x00,
327            0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x9c, 0xc2, 0x83, 0xd3, 0x92, 0x2e, 0x40, 0xef,
328            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
329            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
330            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
331            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
332            0xcf, 0xb0, 0xa8, 0x1d, 0x28, 0xff, 0xce, 0xa2, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00,
333            0x00, 0x00, 0x89, 0x22, 0x95, 0x31, 0xa3, 0xfa, 0x5f, 0x6a, 0x00, 0x00, 0x00, 0x00,
334            0x03, 0x00, 0x00, 0x00,
335        ];
336        let (_, decrypted_entries) = builder
337            .mpq_data_decrypt(&encrypted_hash_table_data, hash_table_key)
338            .unwrap();
339        assert_eq!(decrypted_entries, decrypted_hash_table_data);
340        let encrypted_block_table_data = vec![
341            0xa7, 0x67, 0x48, 0x3d, 0x7a, 0xd1, 0x08, 0xca, 0x4c, 0xbc, 0x35, 0xf8, 0x06, 0x04,
342            0x34, 0xe9, 0xbe, 0xb3, 0xb5, 0xb3, 0x7d, 0xeb, 0x0e, 0x11, 0x05, 0xb9, 0xf4, 0x17,
343            0xd3, 0x1b, 0x38, 0x21, 0x2f, 0xfd, 0x94, 0x62, 0xa1, 0xea, 0xe2, 0x2e, 0x29, 0xde,
344            0xe8, 0xdf, 0x4d, 0x84, 0x0b, 0x54, 0x88, 0xe4, 0x87, 0xdc, 0xcc, 0xca, 0xd6, 0xf6,
345            0xe6, 0xb4, 0x09, 0x0c, 0xf8, 0x27, 0xec, 0x87, 0x5d, 0x33, 0x7b, 0x3a, 0x9c, 0xb5,
346            0xd9, 0x80, 0x8c, 0x3c, 0x19, 0x81, 0x6c, 0x76, 0xec, 0xac, 0x53, 0x55, 0xd6, 0xa6,
347            0xf6, 0x7d, 0x18, 0xfb, 0xa9, 0x86, 0x30, 0x33, 0x29, 0xcb, 0x63, 0x11, 0xfa, 0xb5,
348            0xe6, 0x02, 0x7f, 0x23, 0x4b, 0xe9, 0xd8, 0x77, 0x0c, 0x4d, 0xc8, 0x1e, 0x41, 0xe9,
349            0xf2, 0x84, 0x6e, 0xc6, 0x75, 0xbd, 0x47, 0x8b, 0x04, 0x7d, 0x48, 0xd9, 0xc2, 0xa1,
350            0x02, 0x0d, 0x04, 0xdf, 0xb3, 0xc7, 0x82, 0xf5, 0x77, 0x37, 0x81, 0x9d, 0x7f, 0xfb,
351            0x65, 0x5d, 0x96, 0xe3, 0xa2, 0x0a, 0x68, 0x1b, 0xb6, 0x6b, 0x7c, 0x12, 0x3e, 0x7b,
352            0x63, 0x9c, 0x00, 0x7b, 0x7e, 0x23,
353        ];
354        let decrypted_block_table_data = vec![
355            0x2c, 0x00, 0x00, 0x00, 0xd7, 0x02, 0x00, 0x00, 0x7a, 0x03, 0x00, 0x00, 0x00, 0x02,
356            0x00, 0x81, 0x03, 0x03, 0x00, 0x00, 0x21, 0x03, 0x00, 0x00, 0xe9, 0x04, 0x00, 0x00,
357            0x00, 0x02, 0x00, 0x81, 0x24, 0x06, 0x00, 0x00, 0x30, 0xf6, 0x02, 0x00, 0x7d, 0x52,
358            0x07, 0x00, 0x00, 0x02, 0x00, 0x81, 0x54, 0xfc, 0x02, 0x00, 0xe2, 0x00, 0x00, 0x00,
359            0x4e, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x81, 0x36, 0xfd, 0x02, 0x00, 0x61, 0x00,
360            0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x81, 0x97, 0xfd, 0x02, 0x00,
361            0x2b, 0x05, 0x00, 0x00, 0xb2, 0x07, 0x00, 0x00, 0x00, 0x02, 0x00, 0x81, 0xc2, 0x02,
362            0x03, 0x00, 0x07, 0x19, 0x00, 0x00, 0x8f, 0x30, 0x00, 0x00, 0x00, 0x02, 0x00, 0x81,
363            0xc9, 0x1b, 0x03, 0x00, 0x15, 0x02, 0x00, 0x00, 0x60, 0x09, 0x00, 0x00, 0x00, 0x02,
364            0x00, 0x81, 0xde, 0x1d, 0x03, 0x00, 0x78, 0x00, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00,
365            0x00, 0x02, 0x00, 0x81, 0x56, 0x1e, 0x03, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x20, 0x01,
366            0x00, 0x00, 0x00, 0x02, 0x00, 0x81,
367        ];
368        let (_, decrypted_entries) = builder
369            .mpq_data_decrypt(&encrypted_block_table_data, block_table_key)
370            .unwrap();
371        assert_eq!(decrypted_entries, decrypted_block_table_data);
372    }
373}