nom_mpq/
lib.rs

1//! Nom Parsing the MoPaQ file format.
2//! Sources:
3//! - [The_MoPaQ_Archive_Format](https://web.archive.org/web/20120222093346/http://wiki.devklog.net/index.php?title=The_MoPaQ_Archive_Format)
4//! - [MPyQ](https://github.com/arkx/mpyq/)
5
6#![warn(missing_docs)]
7pub use error::MPQResult;
8use nom::IResult;
9use nom::bytes::complete::take;
10use nom::number::complete::{i32, u8, u32};
11use parser::MPQHashType;
12use std::collections::HashMap;
13use std::io::Read;
14
15pub mod builder;
16pub mod error;
17pub mod parser;
18pub use builder::MPQBuilder;
19use compress::zlib;
20pub use error::MPQParserError;
21use parser::LITTLE_ENDIAN;
22pub use parser::MPQBlockTableEntry;
23pub use parser::MPQFileHeader;
24pub use parser::MPQHashTableEntry;
25pub use parser::MPQUserData;
26
27/// TEMP waiting for https://github.com/rust-bakery/nom/pull/1845 to be merged/released.
28pub fn dbg_dmp<'a, F, O, E: std::fmt::Debug>(
29    mut f: F,
30    context: &'static str,
31) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], O, E>
32where
33    F: FnMut(&'a [u8]) -> IResult<&'a [u8], O, E>,
34{
35    move |i: &'a [u8]| match f(i) {
36        Err(e) => {
37            tracing::debug!(
38                "{}: Error({:.32}) at: {}",
39                context,
40                e.to_string(),
41                parser::peek_hex(i)
42            );
43            Err(e)
44        }
45        a => a,
46    }
47}
48// Unused flags:
49// pub const MPQ_FILE_IMPLODE: u32 = 0x00000100;
50// pub const MPQ_FILE_FIX_KEY: u32 = 0x00020000;
51// pub const MPQ_FILE_DELETE_MARKER: u32 = 0x02000000;
52
53/// The sector is compressed
54pub const MPQ_FILE_COMPRESS: u32 = 0x00000200;
55/// The sector is encrypted.
56pub const MPQ_FILE_ENCRYPTED: u32 = 0x00010000;
57/// The sector contains a single file/unit.
58pub const MPQ_FILE_SINGLE_UNIT: u32 = 0x01000000;
59/// The sector has cyclic redundancy check.
60pub const MPQ_FILE_SECTOR_CRC: u32 = 0x04000000;
61/// The sector exists (as opposed to marked as deleted)
62pub const MPQ_FILE_EXISTS: u32 = 0x80000000;
63
64/// The sector has no compression
65pub const COMPRESSION_PLAINTEXT: u8 = 0;
66/// The sector is compressed using [`zlib`]
67pub const COMPRESSION_ZLIB: u8 = 2;
68/// The sector is compressed using [`bzip2`]
69pub const COMPRESSION_BZ2: u8 = 16;
70
71/// The main MPQ object that contains the parsed entries
72#[derive(Debug, Default)]
73pub struct MPQ {
74    /// The Archive Header containing format version, the offsets for the
75    /// block table and hash table.
76    pub archive_header: MPQFileHeader,
77    /// The Archive may contain [`MPQUserData`], which is at the start of the file.
78    pub user_data: Option<MPQUserData>,
79    /// The hash table entries, after decryption and parsing
80    pub hash_table_entries: Vec<MPQHashTableEntry>,
81    /// The block table entries, after decryption and parsing
82    pub block_table_entries: Vec<MPQBlockTableEntry>,
83    /// The internal MPQ encryption table.
84    pub encryption_table: HashMap<u32, u32>,
85}
86
87impl MPQ {
88    /// Prepares the encryption table, this hashmap is used for block-sized
89    /// decryption operations.
90    fn prepare_encryption_table() -> HashMap<u32, u32> {
91        let mut seed: u32 = 0x00100001;
92        let mut res = HashMap::new();
93        for i in (0..256).map(|x| x as u32) {
94            let mut idx = i;
95            for _ in 0..5 {
96                seed = (seed * 125 + 3) % 0x2AAAAB;
97                let temp1 = (seed & 0xFFFF) << 0x10;
98
99                seed = (seed * 125 + 3) % 0x2AAAAB;
100                let temp2 = seed & 0xFFFF;
101
102                res.insert(idx, temp1 | temp2);
103
104                idx += 0x100;
105            }
106        }
107        res
108    }
109
110    /// Hash a string using MPQ's hash function
111    ///
112    /// `_hash` on MPyQ
113    /// This function doesn't use self as the Builder also needs to access the same functionality.
114    #[allow(clippy::precedence)]
115    pub fn mpq_string_hash(
116        encryption_table: &HashMap<u32, u32>,
117        location: &str,
118        hash_type: MPQHashType,
119    ) -> Result<u32, MPQParserError> {
120        let mut seed1: u64 = 0x7FED7FEDu64;
121        let mut seed2: u64 = 0xEEEEEEEEu64;
122        for ch in location.to_uppercase().chars() {
123            let ch_ord: u32 = ch.into();
124            let hash_type_idx: u32 = hash_type.try_into()?;
125            let value = match encryption_table.get(&((hash_type_idx << 8) + ch_ord)) {
126                Some(val) => val,
127                None => {
128                    tracing::error!(
129                        "Couldn't find index in map for: {}",
130                        (hash_type_idx << 8) + ch_ord
131                    );
132                    return Err(MPQParserError::EncryptionTableIndexNotFound);
133                }
134            };
135            seed1 = (*value as u64 ^ (seed1 + seed2)) & 0xFFFFFFFFu64;
136            seed2 = ch_ord as u64 + seed1 + seed2 + (seed2 << 5) + 3 & 0xFFFFFFFFu64;
137        }
138        tracing::trace!("Returning {} for location: {}", (seed1 as u32), location);
139        Ok(seed1 as u32)
140    }
141
142    /// Get the hash table entry corresponding to a given filename.
143    ///
144    /// A filename is hashed with both [`MPQHashType::HashA`] and [`MPQHashType::HashB`]
145    /// to uniquely identify the filename in the archive
146    pub fn get_hash_table_entry(
147        &self,
148        filename: &str,
149    ) -> Result<MPQHashTableEntry, MPQParserError> {
150        let hash_a = Self::mpq_string_hash(&self.encryption_table, filename, MPQHashType::HashA)?;
151        let hash_b = Self::mpq_string_hash(&self.encryption_table, filename, MPQHashType::HashB)?;
152        for entry in &self.hash_table_entries {
153            if entry.hash_a == hash_a && entry.hash_b == hash_b {
154                tracing::debug!("Found filename: {}, as entry: {:?}", filename, entry);
155                return Ok(entry.clone());
156            }
157        }
158        Err(MPQParserError::HashTableEntryNotFound(filename.to_string()))
159    }
160
161    /// Read the compression type and decompress file data accordingly.
162    pub fn decompress(input: &[u8]) -> MPQResult<&[u8], Vec<u8>> {
163        let mut data = vec![];
164        let (tail, compression_type) = dbg_dmp(u8, "compression_type")(input)?;
165        match compression_type {
166            COMPRESSION_PLAINTEXT => {
167                tracing::debug!("Plaintext (no compression)");
168                data = tail[..].to_vec()
169            }
170            COMPRESSION_ZLIB => {
171                tracing::debug!("Attempting ZLIB compression",);
172                let mut d = zlib::Decoder::new(std::io::BufReader::new(tail));
173
174                let _ = d.read_to_end(&mut data)?;
175            }
176            COMPRESSION_BZ2 => {
177                tracing::debug!("Attempting BZ2 compression",);
178                let mut decompressor = bzip2_rs::DecoderReader::new(tail);
179                std::io::copy(&mut decompressor, &mut data)?;
180            }
181            unknown_version => {
182                return MPQResult::Err(MPQParserError::UnsupportedCompression(unknown_version));
183            }
184        };
185
186        Ok((tail, data))
187    }
188
189    /// Reads an embedded file inside the MPQ archive.
190    #[tracing::instrument(level = "debug", skip(self, orig_input))]
191    pub fn read_mpq_file_sector<'a>(
192        &'a self,
193        filename: &str,
194        force_decompress: bool,
195        orig_input: &'a [u8],
196    ) -> MPQResult<&'a [u8], Vec<u8>> {
197        let mut res = vec![];
198        let hash_entry = self.get_hash_table_entry(filename)?;
199        let block_entry = self.block_table_entries[hash_entry.block_table_index as usize].clone();
200        tracing::debug!("block_entry {:?}", block_entry);
201        // Read the block
202        if block_entry.flags & MPQ_FILE_EXISTS == 0 {
203            tracing::debug!("file is marked as deleted. Returning empty content");
204            return Ok((orig_input, res));
205        }
206        if block_entry.archived_size == 0 {
207            tracing::debug!("File is zero size. Returning empty content");
208            return Ok((orig_input, res));
209        }
210        let offset = block_entry.offset as usize + self.archive_header.offset;
211        let (tail, file_data) =
212            dbg_dmp(take(block_entry.archived_size), "file_data")(&orig_input[offset..])?;
213
214        tracing::debug!("Block table data: {}", parser::peek_hex(file_data));
215        if block_entry.flags & MPQ_FILE_ENCRYPTED != 0 {
216            return MPQResult::Err(MPQParserError::UnsupportedEncryptionType);
217        }
218        if block_entry.flags & MPQ_FILE_SINGLE_UNIT != 0 {
219            tracing::debug!("File sector contains a single unit",);
220            // Single unit files only need to be decompressed, but
221            // compression only happens when at least one byte is gained.
222            if block_entry.flags & MPQ_FILE_COMPRESS != 0
223                && (force_decompress || block_entry.size > block_entry.archived_size)
224            {
225                tracing::debug!("File needs to be decompressed",);
226                let (_tail, decompressed_data) = Self::decompress(file_data)?;
227                return Ok((tail, decompressed_data));
228            }
229            tracing::debug!("File does not needs to be decompressed",);
230        } else {
231            tracing::debug!("File does not need to be decompressed",);
232            // File consists of many sectors. They all need to be
233            // decompressed separately and united.
234            let sector_size = 512 << self.archive_header.sector_size_shift;
235            let mut sectors =
236                (block_entry.size as f32 / sector_size as f32).floor() as usize + 1usize;
237            tracing::debug!("Total sectors: {sectors}");
238            let crc = if block_entry.flags & MPQ_FILE_SECTOR_CRC != 0 {
239                sectors += 1;
240                true
241            } else {
242                false
243            };
244            let mut positions: Vec<usize> = vec![];
245            let mut position_file_index = &file_data[..4 * (sectors + 1)];
246            for _ in 0..sectors + 1 {
247                // Note: MPyQ format for this is a list of '<I'
248                // as long as there are sectors + 1
249                // `'<%dI' % (sectors + 1)` (Not to confuse the `d` with
250                // double, it's for the `%` format operator.
251                let (new_pos_idx, position) =
252                    dbg_dmp(u32(LITTLE_ENDIAN), "positions")(position_file_index)?;
253                positions.push(position as usize);
254                position_file_index = new_pos_idx;
255            }
256            let mut sector_bytes_left = block_entry.size as usize;
257            let mut total_sectors = positions.len() - 1;
258            if crc {
259                total_sectors -= 1;
260            }
261
262            for i in 0..total_sectors {
263                let mut sector = file_data[positions[i]..positions[i + 1]].to_vec();
264                if block_entry.flags & MPQ_FILE_COMPRESS != 0
265                    && (force_decompress || sector_bytes_left > sector.len())
266                {
267                    let (_tail, mut decompressed_sector) =
268                        Self::decompress(&file_data[positions[i]..positions[i + 1]])?;
269                    res.append(&mut decompressed_sector);
270                } else {
271                    res.append(&mut sector);
272                }
273
274                sector_bytes_left -= sector.len();
275            }
276            return Ok((tail, res));
277        }
278        Ok((tail, file_data.to_vec()))
279    }
280
281    /// Decrypt hash or block table or a sector.
282    ///
283    /// `_decrypt` on MPyQ
284    #[allow(clippy::precedence)]
285    pub fn mpq_data_decrypt<'a>(
286        encryption_table: &'a HashMap<u32, u32>,
287        data: &'a [u8],
288        key: u32,
289    ) -> IResult<&'a [u8], Vec<u8>> {
290        let mut seed1 = key as i64;
291        let mut seed2 = 0xEEEEEEEEi64;
292        let mut res = vec![];
293
294        for i in 0..(data.len() as f32 / 4f32).floor() as usize {
295            let encryption_table_value =
296                match encryption_table.get(&(0x400 + (seed1 as u32 & 0xFF))) {
297                    Some(val) => *val as i64,
298                    None => {
299                        tracing::error!(
300                            "Encryption table value not found for: {}",
301                            (0x400 + (seed1 & 0xFF) as i32)
302                        );
303                        continue;
304                    }
305                };
306            seed2 += encryption_table_value;
307            seed2 &= 0xFFFFFFFFi64;
308            let (_tail, value) =
309                dbg_dmp(i32(LITTLE_ENDIAN), "encrypted_value")(&data[i * 4..i * 4 + 4])?;
310            let mut value = value as i64;
311            value = (value ^ (seed1 + seed2)) & 0xFFFFFFFFi64;
312
313            seed1 = ((!seed1 << 0x15) + 0x11111111) | (seed1 >> 0x0B);
314            seed1 &= 0xFFFFFFFF;
315            seed2 = value + seed2 + (seed2 << 5) + 3 & 0xFFFFFFFFi64;
316            let mut le_packed_value = (value as i32).to_le_bytes().to_vec();
317
318            // pack in little endian
319            res.append(&mut le_packed_value);
320        }
321
322        Ok((data, res))
323    }
324
325    /// Returns the list of filenames and their respective size as contained in the MPQ archive.
326    pub fn get_files(&self, orig_input: &[u8]) -> Result<Vec<(String, usize)>, MPQParserError> {
327        let mut res: Vec<(String, usize)> = vec![];
328        let files: Vec<String> = match self.read_mpq_file_sector("(listfile)", false, orig_input) {
329            Ok((_tail, file_buffer)) => {
330                tracing::debug!(
331                    "Successfully read '(listfile)' sector: {:?}",
332                    parser::peek_hex(&file_buffer)
333                );
334                match std::str::from_utf8(&file_buffer) {
335                    Ok(val) => val.lines().map(|x| x.to_string()).collect(),
336                    Err(err) => {
337                        tracing::error!("Invalid UTF-8 sequence: {:?}", err);
338                        return Err(MPQParserError::InvalidUTF8Sequence(
339                            "(listfile)".to_string(),
340                        ));
341                    }
342                }
343            }
344            Err(err) => {
345                tracing::error!("Unable to read '(listfile)' sector: {:?}", err);
346                return Err(MPQParserError::InvalidListFileSector);
347            }
348        };
349        for filename in files {
350            let hash_entry = match self.get_hash_table_entry(&filename) {
351                Ok(val) => val,
352                Err(err) => {
353                    tracing::warn!(
354                        "Unable to find hash entry for filename: {:?}: {:?}",
355                        filename,
356                        err
357                    );
358                    continue;
359                }
360            };
361            let block_entry = &self.block_table_entries[hash_entry.block_table_index as usize];
362            tracing::debug!("{} {1:>8} bytes", filename, block_entry.size as usize);
363            res.push((filename, block_entry.size as usize));
364        }
365        Ok(res)
366    }
367}