nom_mpq/
lib.rs

1//! Nom Parsing the MoPaQ file format.
2//! Sources:
3//! - [The_MoPaQ_Archive_Format](https://web.archive.org/web/20120222093346/http://wiki.devklog.net/index.php?title=The_MoPaQ_Archive_Format)
4//! - [MPyQ](https://github.com/arkx/mpyq/)
5
6#![warn(missing_docs)]
7pub use error::MPQResult;
8use nom::bytes::complete::take;
9use nom::error::dbg_dmp;
10use nom::number::complete::{i32, u32, u8};
11use nom::IResult;
12use parser::MPQHashType;
13use std::collections::HashMap;
14use std::io::Read;
15
16pub mod builder;
17pub mod error;
18pub mod parser;
19pub use builder::MPQBuilder;
20use compress::zlib;
21pub use error::MPQParserError;
22pub use parser::MPQBlockTableEntry;
23pub use parser::MPQFileHeader;
24pub use parser::MPQHashTableEntry;
25pub use parser::MPQUserData;
26use parser::LITTLE_ENDIAN;
27
28// Unused flags:
29// pub const MPQ_FILE_IMPLODE: u32 = 0x00000100;
30// pub const MPQ_FILE_FIX_KEY: u32 = 0x00020000;
31// pub const MPQ_FILE_DELETE_MARKER: u32 = 0x02000000;
32
33/// The sector is compressed
34pub const MPQ_FILE_COMPRESS: u32 = 0x00000200;
35/// The sector is encrypted.
36pub const MPQ_FILE_ENCRYPTED: u32 = 0x00010000;
37/// The sector contains a single file/unit.
38pub const MPQ_FILE_SINGLE_UNIT: u32 = 0x01000000;
39/// The sector has cyclic redundancy check.
40pub const MPQ_FILE_SECTOR_CRC: u32 = 0x04000000;
41/// The sector exists (as opposed to marked as deleted)
42pub const MPQ_FILE_EXISTS: u32 = 0x80000000;
43
44/// The sector has no compression
45pub const COMPRESSION_PLAINTEXT: u8 = 0;
46/// The sector is compressed using [`zlib`]
47pub const COMPRESSION_ZLIB: u8 = 2;
48/// The sector is compressed using [`bzip2`]
49pub const COMPRESSION_BZ2: u8 = 16;
50
51/// The main MPQ object that contains the parsed entries
52#[derive(Debug, Default)]
53pub struct MPQ {
54    /// The Archive Header containing format version, the offsets for the
55    /// block table and hash table.
56    pub archive_header: MPQFileHeader,
57    /// The Archive may contain [`MPQUserData`], which is at the start of the file.
58    pub user_data: Option<MPQUserData>,
59    /// The hash table entries, after decryption and parsing
60    pub hash_table_entries: Vec<MPQHashTableEntry>,
61    /// The block table entries, after decryption and parsing
62    pub block_table_entries: Vec<MPQBlockTableEntry>,
63    /// The internal MPQ encryption table.
64    pub encryption_table: HashMap<u32, u32>,
65}
66
67impl MPQ {
68    /// Prepares the encryption table, this hashmap is used for block-sized
69    /// decryption operations.
70    fn prepare_encryption_table() -> HashMap<u32, u32> {
71        let mut seed: u32 = 0x00100001;
72        let mut res = HashMap::new();
73        for i in (0..256).map(|x| x as u32) {
74            let mut idx = i;
75            for _ in 0..5 {
76                seed = (seed * 125 + 3) % 0x2AAAAB;
77                let temp1 = (seed & 0xFFFF) << 0x10;
78
79                seed = (seed * 125 + 3) % 0x2AAAAB;
80                let temp2 = seed & 0xFFFF;
81
82                res.insert(idx, temp1 | temp2);
83
84                idx += 0x100;
85            }
86        }
87        res
88    }
89
90    /// Hash a string using MPQ's hash function
91    ///
92    /// `_hash` on MPyQ
93    /// This function doesn't use self as the Builder also needs to access the same functionality.
94    #[allow(clippy::precedence)]
95    pub fn mpq_string_hash(
96        encryption_table: &HashMap<u32, u32>,
97        location: &str,
98        hash_type: MPQHashType,
99    ) -> Result<u32, MPQParserError> {
100        let mut seed1: u64 = 0x7FED7FEDu64;
101        let mut seed2: u64 = 0xEEEEEEEEu64;
102        for ch in location.to_uppercase().chars() {
103            let ch_ord: u32 = ch.into();
104            let hash_type_idx: u32 = hash_type.try_into()?;
105            let value = match encryption_table.get(&((hash_type_idx << 8) + ch_ord)) {
106                Some(val) => val,
107                None => {
108                    tracing::error!(
109                        "Couldn't find index in map for: {}",
110                        (hash_type_idx << 8) + ch_ord
111                    );
112                    return Err(MPQParserError::EncryptionTableIndexNotFound);
113                }
114            };
115            seed1 = (*value as u64 ^ (seed1 + seed2)) & 0xFFFFFFFFu64;
116            seed2 = ch_ord as u64 + seed1 + seed2 + (seed2 << 5) + 3 & 0xFFFFFFFFu64;
117        }
118        tracing::trace!("Returning {} for location: {}", (seed1 as u32), location);
119        Ok(seed1 as u32)
120    }
121
122    /// Get the hash table entry corresponding to a given filename.
123    ///
124    /// A filename is hashed with both [`MPQHashType::HashA`] and [`MPQHashType::HashB`]
125    /// to uniquely identify the filename in the archive
126    pub fn get_hash_table_entry(
127        &self,
128        filename: &str,
129    ) -> Result<MPQHashTableEntry, MPQParserError> {
130        let hash_a = Self::mpq_string_hash(&self.encryption_table, filename, MPQHashType::HashA)?;
131        let hash_b = Self::mpq_string_hash(&self.encryption_table, filename, MPQHashType::HashB)?;
132        for entry in &self.hash_table_entries {
133            if entry.hash_a == hash_a && entry.hash_b == hash_b {
134                tracing::debug!("Found filename: {}, as entry: {:?}", filename, entry);
135                return Ok(entry.clone());
136            }
137        }
138        tracing::warn!("Unable to find hash table entry for {}", filename);
139        Err(MPQParserError::HashTableEntryNotFound(filename.to_string()))
140    }
141
142    /// Read the compression type and decompress file data accordingly.
143    pub fn decompress(input: &[u8]) -> MPQResult<&[u8], Vec<u8>> {
144        let mut data = vec![];
145        let (tail, compression_type) = dbg_dmp(u8, "compression_type")(input)?;
146        match compression_type {
147            COMPRESSION_PLAINTEXT => {
148                tracing::debug!("Plaintext (no compression)");
149                data = tail[..].to_vec()
150            }
151            COMPRESSION_ZLIB => {
152                tracing::debug!("Attempting ZLIB compression",);
153                let mut d = zlib::Decoder::new(std::io::BufReader::new(tail));
154
155                let _ = d.read_to_end(&mut data)?;
156            }
157            COMPRESSION_BZ2 => {
158                tracing::debug!("Attempting BZ2 compression",);
159                let mut decompressor = bzip2_rs::DecoderReader::new(tail);
160                std::io::copy(&mut decompressor, &mut data)?;
161            }
162            unknown_version => {
163                return MPQResult::Err(MPQParserError::UnsupportedCompression(unknown_version));
164            }
165        };
166
167        Ok((tail, data))
168    }
169
170    /// Reads an embedded file inside the MPQ archive.
171    #[tracing::instrument(level = "debug", skip(self, orig_input))]
172    pub fn read_mpq_file_sector<'a>(
173        &'a self,
174        filename: &str,
175        force_decompress: bool,
176        orig_input: &'a [u8],
177    ) -> MPQResult<&'a [u8], Vec<u8>> {
178        let mut res = vec![];
179        let hash_entry = self.get_hash_table_entry(filename)?;
180        let block_entry = self.block_table_entries[hash_entry.block_table_index as usize].clone();
181        tracing::debug!("block_entry {:?}", block_entry);
182        // Read the block
183        if block_entry.flags & MPQ_FILE_EXISTS == 0 {
184            tracing::debug!("file is marked as deleted. Returning empty content");
185            return Ok((orig_input, res));
186        }
187        if block_entry.archived_size == 0 {
188            tracing::debug!("File is zero size. Returning empty content");
189            return Ok((orig_input, res));
190        }
191        let offset = block_entry.offset as usize + self.archive_header.offset;
192        let (tail, file_data) =
193            dbg_dmp(take(block_entry.archived_size), "file_data")(&orig_input[offset..])?;
194
195        tracing::debug!("Block table data: {}", parser::peek_hex(file_data));
196        if block_entry.flags & MPQ_FILE_ENCRYPTED != 0 {
197            return MPQResult::Err(MPQParserError::UnsupportedEncryptionType);
198        }
199        if block_entry.flags & MPQ_FILE_SINGLE_UNIT != 0 {
200            tracing::debug!("File sector contains a single unit",);
201            // Single unit files only need to be decompressed, but
202            // compression only happens when at least one byte is gained.
203            if block_entry.flags & MPQ_FILE_COMPRESS != 0
204                && (force_decompress || block_entry.size > block_entry.archived_size)
205            {
206                tracing::debug!("File needs to be decompressed",);
207                let (_tail, decompressed_data) = Self::decompress(file_data)?;
208                return Ok((tail, decompressed_data));
209            }
210            tracing::debug!("File does not needs to be decompressed",);
211        } else {
212            tracing::debug!("File does not need to be decompressed",);
213            // File consists of many sectors. They all need to be
214            // decompressed separately and united.
215            let sector_size = 512 << self.archive_header.sector_size_shift;
216            let mut sectors =
217                (block_entry.size as f32 / sector_size as f32).floor() as usize + 1usize;
218            tracing::debug!("Total sectors: {sectors}");
219            let crc = if block_entry.flags & MPQ_FILE_SECTOR_CRC != 0 {
220                sectors += 1;
221                true
222            } else {
223                false
224            };
225            let mut positions: Vec<usize> = vec![];
226            let mut position_file_index = &file_data[..4 * (sectors + 1)];
227            for _ in 0..sectors + 1 {
228                // Note: MPyQ format for this is a list of '<I'
229                // as long as there are sectors + 1
230                // `'<%dI' % (sectors + 1)` (Not to confuse the `d` with
231                // double, it's for the `%` format operator.
232                let (new_pos_idx, position) =
233                    dbg_dmp(u32(LITTLE_ENDIAN), "positions")(position_file_index)?;
234                positions.push(position as usize);
235                position_file_index = new_pos_idx;
236            }
237            let mut sector_bytes_left = block_entry.size as usize;
238            let mut total_sectors = positions.len() - 1;
239            if crc {
240                total_sectors -= 1;
241            }
242
243            for i in 0..total_sectors {
244                let mut sector = file_data[positions[i]..positions[i + 1]].to_vec();
245                if block_entry.flags & MPQ_FILE_COMPRESS != 0
246                    && (force_decompress || sector_bytes_left > sector.len())
247                {
248                    let (_tail, mut decompressed_sector) =
249                        Self::decompress(&file_data[positions[i]..positions[i + 1]])?;
250                    res.append(&mut decompressed_sector);
251                } else {
252                    res.append(&mut sector);
253                }
254
255                sector_bytes_left -= sector.len();
256            }
257            return Ok((tail, res));
258        }
259        Ok((tail, file_data.to_vec()))
260    }
261
262    /// Decrypt hash or block table or a sector.
263    ///
264    /// `_decrypt` on MPyQ
265    #[allow(clippy::precedence)]
266    pub fn mpq_data_decrypt<'a>(
267        encryption_table: &'a HashMap<u32, u32>,
268        data: &'a [u8],
269        key: u32,
270    ) -> IResult<&'a [u8], Vec<u8>> {
271        let mut seed1 = key as i64;
272        let mut seed2 = 0xEEEEEEEEi64;
273        let mut res = vec![];
274
275        for i in 0..(data.len() as f32 / 4f32).floor() as usize {
276            let encryption_table_value =
277                match encryption_table.get(&(0x400 + (seed1 as u32 & 0xFF))) {
278                    Some(val) => *val as i64,
279                    None => {
280                        tracing::error!(
281                            "Encryption table value not found for: {}",
282                            (0x400 + (seed1 & 0xFF) as i32)
283                        );
284                        continue;
285                    }
286                };
287            seed2 += encryption_table_value;
288            seed2 &= 0xFFFFFFFFi64;
289            let (_tail, value) =
290                dbg_dmp(i32(LITTLE_ENDIAN), "encrypted_value")(&data[i * 4..i * 4 + 4])?;
291            let mut value = value as i64;
292            value = (value ^ (seed1 + seed2)) & 0xFFFFFFFFi64;
293
294            seed1 = ((!seed1 << 0x15) + 0x11111111) | (seed1 >> 0x0B);
295            seed1 &= 0xFFFFFFFF;
296            seed2 = value + seed2 + (seed2 << 5) + 3 & 0xFFFFFFFFi64;
297            let mut le_packed_value = (value as i32).to_le_bytes().to_vec();
298
299            // pack in little endian
300            res.append(&mut le_packed_value);
301        }
302
303        Ok((data, res))
304    }
305
306    /// Returns the list of filenames and their respective size as contained in the MPQ archive.
307    pub fn get_files(&self, orig_input: &[u8]) -> Result<Vec<(String, usize)>, MPQParserError> {
308        let mut res: Vec<(String, usize)> = vec![];
309        let files: Vec<String> = match self.read_mpq_file_sector("(listfile)", false, orig_input) {
310            Ok((_tail, file_buffer)) => {
311                tracing::debug!(
312                    "Successfully read '(listfile)' sector: {:?}",
313                    parser::peek_hex(&file_buffer)
314                );
315                match std::str::from_utf8(&file_buffer) {
316                    Ok(val) => val.lines().map(|x| x.to_string()).collect(),
317                    Err(err) => {
318                        tracing::error!("Invalid UTF-8 sequence: {:?}", err);
319                        return Err(MPQParserError::InvalidUTF8Sequence(
320                            "(listfile)".to_string(),
321                        ));
322                    }
323                }
324            }
325            Err(err) => {
326                tracing::error!("Unable to read '(listfile)' sector: {:?}", err);
327                return Err(MPQParserError::InvalidListFileSector);
328            }
329        };
330        for filename in files {
331            let hash_entry = match self.get_hash_table_entry(&filename) {
332                Ok(val) => val,
333                Err(err) => {
334                    tracing::warn!(
335                        "Unable to find hash entry for filename: {:?}: {:?}",
336                        filename,
337                        err
338                    );
339                    continue;
340                }
341            };
342            let block_entry = &self.block_table_entries[hash_entry.block_table_index as usize];
343            tracing::debug!("{} {1:>8} bytes", filename, block_entry.size as usize);
344            res.push((filename, block_entry.size as usize));
345        }
346        Ok(res)
347    }
348}