nom_mpq/
lib.rs

1//! Nom Parsing the MoPaQ file format.
2//! Sources:
3//! - [The_MoPaQ_Archive_Format](https://web.archive.org/web/20120222093346/http://wiki.devklog.net/index.php?title=The_MoPaQ_Archive_Format)
4//! - [MPyQ](https://github.com/arkx/mpyq/)
5
6#![warn(missing_docs)]
7pub use error::MPQResult;
8use nom::bytes::complete::take;
9use nom::number::complete::{i32, u32, u8};
10use nom::HexDisplay;
11use nom::IResult;
12use parser::MPQHashType;
13use std::collections::HashMap;
14use std::io::Read;
15
16pub mod builder;
17pub mod error;
18pub mod parser;
19pub use builder::MPQBuilder;
20use compress::zlib;
21pub use error::MPQParserError;
22pub use parser::MPQBlockTableEntry;
23pub use parser::MPQFileHeader;
24pub use parser::MPQHashTableEntry;
25pub use parser::MPQUserData;
26use parser::LITTLE_ENDIAN;
27
28/// TEMP waiting for https://github.com/rust-bakery/nom/pull/1845 to be merged/released.
29pub fn dbg_dmp<'a, F, O, E: std::fmt::Debug>(
30    mut f: F,
31    context: &'static str,
32) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], O, E>
33where
34    F: FnMut(&'a [u8]) -> IResult<&'a [u8], O, E>,
35{
36    move |i: &'a [u8]| match f(i) {
37        Err(e) => {
38            println!("{}: Error({:?}) at:\n{}", context, e, i.to_hex(8));
39            Err(e)
40        }
41        a => a,
42    }
43}
44// Unused flags:
45// pub const MPQ_FILE_IMPLODE: u32 = 0x00000100;
46// pub const MPQ_FILE_FIX_KEY: u32 = 0x00020000;
47// pub const MPQ_FILE_DELETE_MARKER: u32 = 0x02000000;
48
49/// The sector is compressed
50pub const MPQ_FILE_COMPRESS: u32 = 0x00000200;
51/// The sector is encrypted.
52pub const MPQ_FILE_ENCRYPTED: u32 = 0x00010000;
53/// The sector contains a single file/unit.
54pub const MPQ_FILE_SINGLE_UNIT: u32 = 0x01000000;
55/// The sector has cyclic redundancy check.
56pub const MPQ_FILE_SECTOR_CRC: u32 = 0x04000000;
57/// The sector exists (as opposed to marked as deleted)
58pub const MPQ_FILE_EXISTS: u32 = 0x80000000;
59
60/// The sector has no compression
61pub const COMPRESSION_PLAINTEXT: u8 = 0;
62/// The sector is compressed using [`zlib`]
63pub const COMPRESSION_ZLIB: u8 = 2;
64/// The sector is compressed using [`bzip2`]
65pub const COMPRESSION_BZ2: u8 = 16;
66
67/// The main MPQ object that contains the parsed entries
68#[derive(Debug, Default)]
69pub struct MPQ {
70    /// The Archive Header containing format version, the offsets for the
71    /// block table and hash table.
72    pub archive_header: MPQFileHeader,
73    /// The Archive may contain [`MPQUserData`], which is at the start of the file.
74    pub user_data: Option<MPQUserData>,
75    /// The hash table entries, after decryption and parsing
76    pub hash_table_entries: Vec<MPQHashTableEntry>,
77    /// The block table entries, after decryption and parsing
78    pub block_table_entries: Vec<MPQBlockTableEntry>,
79    /// The internal MPQ encryption table.
80    pub encryption_table: HashMap<u32, u32>,
81}
82
83impl MPQ {
84    /// Prepares the encryption table, this hashmap is used for block-sized
85    /// decryption operations.
86    fn prepare_encryption_table() -> HashMap<u32, u32> {
87        let mut seed: u32 = 0x00100001;
88        let mut res = HashMap::new();
89        for i in (0..256).map(|x| x as u32) {
90            let mut idx = i;
91            for _ in 0..5 {
92                seed = (seed * 125 + 3) % 0x2AAAAB;
93                let temp1 = (seed & 0xFFFF) << 0x10;
94
95                seed = (seed * 125 + 3) % 0x2AAAAB;
96                let temp2 = seed & 0xFFFF;
97
98                res.insert(idx, temp1 | temp2);
99
100                idx += 0x100;
101            }
102        }
103        res
104    }
105
106    /// Hash a string using MPQ's hash function
107    ///
108    /// `_hash` on MPyQ
109    /// This function doesn't use self as the Builder also needs to access the same functionality.
110    #[allow(clippy::precedence)]
111    pub fn mpq_string_hash(
112        encryption_table: &HashMap<u32, u32>,
113        location: &str,
114        hash_type: MPQHashType,
115    ) -> Result<u32, MPQParserError> {
116        let mut seed1: u64 = 0x7FED7FEDu64;
117        let mut seed2: u64 = 0xEEEEEEEEu64;
118        for ch in location.to_uppercase().chars() {
119            let ch_ord: u32 = ch.into();
120            let hash_type_idx: u32 = hash_type.try_into()?;
121            let value = match encryption_table.get(&((hash_type_idx << 8) + ch_ord)) {
122                Some(val) => val,
123                None => {
124                    tracing::error!(
125                        "Couldn't find index in map for: {}",
126                        (hash_type_idx << 8) + ch_ord
127                    );
128                    return Err(MPQParserError::EncryptionTableIndexNotFound);
129                }
130            };
131            seed1 = (*value as u64 ^ (seed1 + seed2)) & 0xFFFFFFFFu64;
132            seed2 = ch_ord as u64 + seed1 + seed2 + (seed2 << 5) + 3 & 0xFFFFFFFFu64;
133        }
134        tracing::trace!("Returning {} for location: {}", (seed1 as u32), location);
135        Ok(seed1 as u32)
136    }
137
138    /// Get the hash table entry corresponding to a given filename.
139    ///
140    /// A filename is hashed with both [`MPQHashType::HashA`] and [`MPQHashType::HashB`]
141    /// to uniquely identify the filename in the archive
142    pub fn get_hash_table_entry(
143        &self,
144        filename: &str,
145    ) -> Result<MPQHashTableEntry, MPQParserError> {
146        let hash_a = Self::mpq_string_hash(&self.encryption_table, filename, MPQHashType::HashA)?;
147        let hash_b = Self::mpq_string_hash(&self.encryption_table, filename, MPQHashType::HashB)?;
148        for entry in &self.hash_table_entries {
149            if entry.hash_a == hash_a && entry.hash_b == hash_b {
150                tracing::debug!("Found filename: {}, as entry: {:?}", filename, entry);
151                return Ok(entry.clone());
152            }
153        }
154        tracing::warn!("Unable to find hash table entry for {}", filename);
155        Err(MPQParserError::HashTableEntryNotFound(filename.to_string()))
156    }
157
158    /// Read the compression type and decompress file data accordingly.
159    pub fn decompress(input: &[u8]) -> MPQResult<&[u8], Vec<u8>> {
160        let mut data = vec![];
161        let (tail, compression_type) = dbg_dmp(u8, "compression_type")(input)?;
162        match compression_type {
163            COMPRESSION_PLAINTEXT => {
164                tracing::debug!("Plaintext (no compression)");
165                data = tail[..].to_vec()
166            }
167            COMPRESSION_ZLIB => {
168                tracing::debug!("Attempting ZLIB compression",);
169                let mut d = zlib::Decoder::new(std::io::BufReader::new(tail));
170
171                let _ = d.read_to_end(&mut data)?;
172            }
173            COMPRESSION_BZ2 => {
174                tracing::debug!("Attempting BZ2 compression",);
175                let mut decompressor = bzip2_rs::DecoderReader::new(tail);
176                std::io::copy(&mut decompressor, &mut data)?;
177            }
178            unknown_version => {
179                return MPQResult::Err(MPQParserError::UnsupportedCompression(unknown_version));
180            }
181        };
182
183        Ok((tail, data))
184    }
185
186    /// Reads an embedded file inside the MPQ archive.
187    #[tracing::instrument(level = "debug", skip(self, orig_input))]
188    pub fn read_mpq_file_sector<'a>(
189        &'a self,
190        filename: &str,
191        force_decompress: bool,
192        orig_input: &'a [u8],
193    ) -> MPQResult<&'a [u8], Vec<u8>> {
194        let mut res = vec![];
195        let hash_entry = self.get_hash_table_entry(filename)?;
196        let block_entry = self.block_table_entries[hash_entry.block_table_index as usize].clone();
197        tracing::debug!("block_entry {:?}", block_entry);
198        // Read the block
199        if block_entry.flags & MPQ_FILE_EXISTS == 0 {
200            tracing::debug!("file is marked as deleted. Returning empty content");
201            return Ok((orig_input, res));
202        }
203        if block_entry.archived_size == 0 {
204            tracing::debug!("File is zero size. Returning empty content");
205            return Ok((orig_input, res));
206        }
207        let offset = block_entry.offset as usize + self.archive_header.offset;
208        let (tail, file_data) =
209            dbg_dmp(take(block_entry.archived_size), "file_data")(&orig_input[offset..])?;
210
211        tracing::debug!("Block table data: {}", parser::peek_hex(file_data));
212        if block_entry.flags & MPQ_FILE_ENCRYPTED != 0 {
213            return MPQResult::Err(MPQParserError::UnsupportedEncryptionType);
214        }
215        if block_entry.flags & MPQ_FILE_SINGLE_UNIT != 0 {
216            tracing::debug!("File sector contains a single unit",);
217            // Single unit files only need to be decompressed, but
218            // compression only happens when at least one byte is gained.
219            if block_entry.flags & MPQ_FILE_COMPRESS != 0
220                && (force_decompress || block_entry.size > block_entry.archived_size)
221            {
222                tracing::debug!("File needs to be decompressed",);
223                let (_tail, decompressed_data) = Self::decompress(file_data)?;
224                return Ok((tail, decompressed_data));
225            }
226            tracing::debug!("File does not needs to be decompressed",);
227        } else {
228            tracing::debug!("File does not need to be decompressed",);
229            // File consists of many sectors. They all need to be
230            // decompressed separately and united.
231            let sector_size = 512 << self.archive_header.sector_size_shift;
232            let mut sectors =
233                (block_entry.size as f32 / sector_size as f32).floor() as usize + 1usize;
234            tracing::debug!("Total sectors: {sectors}");
235            let crc = if block_entry.flags & MPQ_FILE_SECTOR_CRC != 0 {
236                sectors += 1;
237                true
238            } else {
239                false
240            };
241            let mut positions: Vec<usize> = vec![];
242            let mut position_file_index = &file_data[..4 * (sectors + 1)];
243            for _ in 0..sectors + 1 {
244                // Note: MPyQ format for this is a list of '<I'
245                // as long as there are sectors + 1
246                // `'<%dI' % (sectors + 1)` (Not to confuse the `d` with
247                // double, it's for the `%` format operator.
248                let (new_pos_idx, position) =
249                    dbg_dmp(u32(LITTLE_ENDIAN), "positions")(position_file_index)?;
250                positions.push(position as usize);
251                position_file_index = new_pos_idx;
252            }
253            let mut sector_bytes_left = block_entry.size as usize;
254            let mut total_sectors = positions.len() - 1;
255            if crc {
256                total_sectors -= 1;
257            }
258
259            for i in 0..total_sectors {
260                let mut sector = file_data[positions[i]..positions[i + 1]].to_vec();
261                if block_entry.flags & MPQ_FILE_COMPRESS != 0
262                    && (force_decompress || sector_bytes_left > sector.len())
263                {
264                    let (_tail, mut decompressed_sector) =
265                        Self::decompress(&file_data[positions[i]..positions[i + 1]])?;
266                    res.append(&mut decompressed_sector);
267                } else {
268                    res.append(&mut sector);
269                }
270
271                sector_bytes_left -= sector.len();
272            }
273            return Ok((tail, res));
274        }
275        Ok((tail, file_data.to_vec()))
276    }
277
278    /// Decrypt hash or block table or a sector.
279    ///
280    /// `_decrypt` on MPyQ
281    #[allow(clippy::precedence)]
282    pub fn mpq_data_decrypt<'a>(
283        encryption_table: &'a HashMap<u32, u32>,
284        data: &'a [u8],
285        key: u32,
286    ) -> IResult<&'a [u8], Vec<u8>> {
287        let mut seed1 = key as i64;
288        let mut seed2 = 0xEEEEEEEEi64;
289        let mut res = vec![];
290
291        for i in 0..(data.len() as f32 / 4f32).floor() as usize {
292            let encryption_table_value =
293                match encryption_table.get(&(0x400 + (seed1 as u32 & 0xFF))) {
294                    Some(val) => *val as i64,
295                    None => {
296                        tracing::error!(
297                            "Encryption table value not found for: {}",
298                            (0x400 + (seed1 & 0xFF) as i32)
299                        );
300                        continue;
301                    }
302                };
303            seed2 += encryption_table_value;
304            seed2 &= 0xFFFFFFFFi64;
305            let (_tail, value) =
306                dbg_dmp(i32(LITTLE_ENDIAN), "encrypted_value")(&data[i * 4..i * 4 + 4])?;
307            let mut value = value as i64;
308            value = (value ^ (seed1 + seed2)) & 0xFFFFFFFFi64;
309
310            seed1 = ((!seed1 << 0x15) + 0x11111111) | (seed1 >> 0x0B);
311            seed1 &= 0xFFFFFFFF;
312            seed2 = value + seed2 + (seed2 << 5) + 3 & 0xFFFFFFFFi64;
313            let mut le_packed_value = (value as i32).to_le_bytes().to_vec();
314
315            // pack in little endian
316            res.append(&mut le_packed_value);
317        }
318
319        Ok((data, res))
320    }
321
322    /// Returns the list of filenames and their respective size as contained in the MPQ archive.
323    pub fn get_files(&self, orig_input: &[u8]) -> Result<Vec<(String, usize)>, MPQParserError> {
324        let mut res: Vec<(String, usize)> = vec![];
325        let files: Vec<String> = match self.read_mpq_file_sector("(listfile)", false, orig_input) {
326            Ok((_tail, file_buffer)) => {
327                tracing::debug!(
328                    "Successfully read '(listfile)' sector: {:?}",
329                    parser::peek_hex(&file_buffer)
330                );
331                match std::str::from_utf8(&file_buffer) {
332                    Ok(val) => val.lines().map(|x| x.to_string()).collect(),
333                    Err(err) => {
334                        tracing::error!("Invalid UTF-8 sequence: {:?}", err);
335                        return Err(MPQParserError::InvalidUTF8Sequence(
336                            "(listfile)".to_string(),
337                        ));
338                    }
339                }
340            }
341            Err(err) => {
342                tracing::error!("Unable to read '(listfile)' sector: {:?}", err);
343                return Err(MPQParserError::InvalidListFileSector);
344            }
345        };
346        for filename in files {
347            let hash_entry = match self.get_hash_table_entry(&filename) {
348                Ok(val) => val,
349                Err(err) => {
350                    tracing::warn!(
351                        "Unable to find hash entry for filename: {:?}: {:?}",
352                        filename,
353                        err
354                    );
355                    continue;
356                }
357            };
358            let block_entry = &self.block_table_entries[hash_entry.block_table_index as usize];
359            tracing::debug!("{} {1:>8} bytes", filename, block_entry.size as usize);
360            res.push((filename, block_entry.size as usize));
361        }
362        Ok(res)
363    }
364}