Skip to main content

casc_lib/root/
parser.rs

1//! Root file binary parser.
2//!
3//! Supports three root file formats used across WoW versions:
4//!
5//! - **Legacy** (pre-8.2) - no header, blocks start at offset 0.
6//! - **MFST V1** (8.2 - 11.0.x) - `MFST` magic header, 12-byte block headers.
7//! - **MFST V2** (11.1.0+) - `MFST` magic header, 17-byte block headers with
8//!   restructured content flags.
9//!
10//! Each root file is organized as a series of blocks, where each block shares
11//! a common set of locale and content flags. Within a block, FileDataIDs are
12//! stored as delta-encoded integers followed by parallel arrays of CKeys and
13//! (optionally) name hashes.
14
15use std::collections::HashMap;
16
17use crate::error::{CascError, Result};
18use crate::util::io::{read_le_i32, read_le_u32, read_le_u64};
19
20use super::flags::{ContentFlags, LocaleFlags};
21
22/// Magic number for MFST header.
23///
24/// Real WoW root files store the bytes `[54 53 46 4D]` ("TSFM"), which is
25/// the string "MFST" written as a big-endian u32 (`0x5453464D`).  When read
26/// back with `read_le_u32` this yields `0x4D465354`.  We check both values
27/// so that hand-built test data (which writes `MFST_MAGIC_BE.to_le_bytes()`)
28/// and real game files are both recognized.
29const MFST_MAGIC_BE: u32 = 0x5453464D; // read_le on bytes "TSFM" -> 0x4D465354, but this is the BE interpretation
30const MFST_MAGIC_LE: u32 = 0x4D465354; // what read_le_u32 actually returns for real files
31
32/// A single root file entry mapping a CKey to flags/locale/name hash.
33#[derive(Debug, Clone)]
34pub struct RootEntry {
35    /// Content key identifying the file data in the encoding table.
36    pub ckey: [u8; 16],
37    /// Content flags (platform, encryption, compression hints).
38    pub content_flags: ContentFlags,
39    /// Locale flags indicating which client locales this entry applies to.
40    pub locale_flags: LocaleFlags,
41    /// Jenkins96 name hash of the original file path, or `None` when the
42    /// `NoNameHash` content flag is set.
43    pub name_hash: Option<u64>,
44}
45
46/// Detected root file format.
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum RootFormat {
49    /// Pre-8.2, no MFST header - blocks start immediately.
50    Legacy,
51    /// 8.2+, MFST header with block format version 1.
52    MfstV1,
53    /// 11.1.0+, MFST header with block format version 2.
54    MfstV2,
55}
56
57/// Parsed root file with FileDataID -> CKey lookup.
58pub struct RootFile {
59    format: RootFormat,
60    /// FileDataID -> `Vec<RootEntry>` (may have multiple locale variants).
61    entries: HashMap<u32, Vec<RootEntry>>,
62    total_entries: usize,
63}
64
65impl RootFile {
66    /// Parse a root file from raw bytes.
67    pub fn parse(data: &[u8]) -> Result<Self> {
68        let (format, block_start) = detect_format(data)?;
69
70        let mut entries: HashMap<u32, Vec<RootEntry>> = HashMap::new();
71        let mut total_entries: usize = 0;
72        let mut pos = block_start;
73
74        while pos < data.len() {
75            let (block_entries, new_pos) = parse_block(data, pos, format)?;
76            total_entries += block_entries.len();
77            for (fdid, entry) in block_entries {
78                entries.entry(fdid).or_default().push(entry);
79            }
80            pos = new_pos;
81        }
82
83        Ok(Self {
84            format,
85            entries,
86            total_entries,
87        })
88    }
89
90    /// Find the first entry for a FileDataID that matches the given locale filter.
91    pub fn find_by_fdid(&self, fdid: u32, locale: LocaleFlags) -> Option<&RootEntry> {
92        self.entries
93            .get(&fdid)?
94            .iter()
95            .find(|e| e.locale_flags.matches(locale))
96    }
97
98    /// Iterate all (FileDataID, entry) pairs.
99    pub fn iter_all(&self) -> impl Iterator<Item = (u32, &RootEntry)> {
100        self.entries
101            .iter()
102            .flat_map(|(fdid, entries)| entries.iter().map(move |entry| (*fdid, entry)))
103    }
104
105    /// The detected format of this root file.
106    pub fn format(&self) -> RootFormat {
107        self.format
108    }
109
110    /// Total number of entries across all blocks.
111    pub fn len(&self) -> usize {
112        self.total_entries
113    }
114
115    /// Whether the root file contains no entries.
116    pub fn is_empty(&self) -> bool {
117        self.total_entries == 0
118    }
119
120    /// Number of unique FileDataIDs.
121    pub fn fdid_count(&self) -> usize {
122        self.entries.len()
123    }
124}
125
126/// Detect the root file format and return the byte offset where blocks begin.
127fn detect_format(data: &[u8]) -> Result<(RootFormat, usize)> {
128    if data.len() < 4 {
129        // Too short for MFST header - treat as legacy if it has any data, else empty
130        if data.is_empty() {
131            return Err(CascError::InvalidFormat("root file is empty".to_string()));
132        }
133        return Ok((RootFormat::Legacy, 0));
134    }
135
136    let magic = read_le_u32(&data[0..4]);
137    if magic != MFST_MAGIC_LE && magic != MFST_MAGIC_BE {
138        // No MFST header - legacy format, blocks start at offset 0
139        return Ok((RootFormat::Legacy, 0));
140    }
141
142    // Has MFST magic. Determine header size.
143    if data.len() < 12 {
144        return Err(CascError::InvalidFormat(
145            "MFST header too short".to_string(),
146        ));
147    }
148
149    let field_at_4 = read_le_u32(&data[4..8]);
150
151    // For pre-10.1.7 MFST: header is magic(4) + total_count(4) + named_count(4) = 12 bytes.
152    // For 10.1.7+: offset 4 = header_size (24), offset 8 = version (1 or 2).
153    // Distinguish: if field_at_4 looks like a reasonable header_size (e.g. 24),
154    // it's the 10.1.7+ format. If it's a huge number, it's the old 12-byte header
155    // where field_at_4 is total_file_count.
156    if field_at_4 == 24 && data.len() >= 24 {
157        // 10.1.7+ format with explicit header_size and version
158        let version = read_le_u32(&data[8..12]);
159        let format = match version {
160            1 => RootFormat::MfstV1,
161            2 => RootFormat::MfstV2,
162            _ => {
163                return Err(CascError::UnsupportedVersion(version));
164            }
165        };
166        Ok((format, 24))
167    } else {
168        // Pre-10.1.7 MFST: 12-byte header (magic + total_count + named_count)
169        // Block format is v1
170        Ok((RootFormat::MfstV1, 12))
171    }
172}
173
174/// Parse a single block from the root file data at the given position.
175/// Returns the list of (FileDataID, RootEntry) pairs and the new position after the block.
176fn parse_block(
177    data: &[u8],
178    pos: usize,
179    format: RootFormat,
180) -> Result<(Vec<(u32, RootEntry)>, usize)> {
181    let (num_records, content_flags, locale_flags, mut pos) =
182        parse_block_header(data, pos, format)?;
183
184    if num_records == 0 {
185        return Ok((Vec::new(), pos));
186    }
187
188    let num = num_records as usize;
189
190    // Read FileDataID deltas (i32 LE each)
191    let deltas_size = num * 4;
192    if pos + deltas_size > data.len() {
193        return Err(CascError::InvalidFormat(
194            "root block: not enough data for FileDataID deltas".to_string(),
195        ));
196    }
197
198    let mut fdids = Vec::with_capacity(num);
199    let mut current_fdid: i64 = 0;
200    for i in 0..num {
201        let delta = read_le_i32(&data[pos + i * 4..]) as i64;
202        if i == 0 {
203            // First delta is the absolute starting FileDataID
204            current_fdid = delta;
205        } else {
206            current_fdid = current_fdid + 1 + delta;
207        }
208        fdids.push(current_fdid as u32);
209    }
210    pos += deltas_size;
211
212    // Read CKeys (16 bytes each)
213    let ckeys_size = num * 16;
214    if pos + ckeys_size > data.len() {
215        return Err(CascError::InvalidFormat(
216            "root block: not enough data for content keys".to_string(),
217        ));
218    }
219
220    let mut ckeys = Vec::with_capacity(num);
221    for i in 0..num {
222        let mut ckey = [0u8; 16];
223        ckey.copy_from_slice(&data[pos + i * 16..pos + i * 16 + 16]);
224        ckeys.push(ckey);
225    }
226    pos += ckeys_size;
227
228    // Read name hashes (u64 LE each) - only if NoNameHash flag is NOT set
229    let has_name_hashes = !content_flags.has_no_name_hash();
230    let mut name_hashes: Vec<Option<u64>> = Vec::with_capacity(num);
231
232    if has_name_hashes {
233        let hashes_size = num * 8;
234        if pos + hashes_size > data.len() {
235            return Err(CascError::InvalidFormat(
236                "root block: not enough data for name hashes".to_string(),
237            ));
238        }
239        for i in 0..num {
240            name_hashes.push(Some(read_le_u64(&data[pos + i * 8..])));
241        }
242        pos += hashes_size;
243    } else {
244        name_hashes.resize(num, None);
245    }
246
247    // Assemble entries
248    let mut result = Vec::with_capacity(num);
249    for i in 0..num {
250        result.push((
251            fdids[i],
252            RootEntry {
253                ckey: ckeys[i],
254                content_flags,
255                locale_flags,
256                name_hash: name_hashes[i],
257            },
258        ));
259    }
260
261    Ok((result, pos))
262}
263
264/// Parse a block header and return (num_records, content_flags, locale_flags, new_pos).
265fn parse_block_header(
266    data: &[u8],
267    pos: usize,
268    format: RootFormat,
269) -> Result<(u32, ContentFlags, LocaleFlags, usize)> {
270    match format {
271        RootFormat::Legacy | RootFormat::MfstV1 => {
272            // Block header v1: num_records(4) + content_flags(4) + locale_flags(4) = 12 bytes
273            if pos + 12 > data.len() {
274                return Err(CascError::InvalidFormat(
275                    "root block header v1: not enough data".to_string(),
276                ));
277            }
278            let num_records = read_le_u32(&data[pos..]);
279            let content_flags = ContentFlags(read_le_u32(&data[pos + 4..]));
280            let locale_flags = LocaleFlags(read_le_u32(&data[pos + 8..]));
281            Ok((num_records, content_flags, locale_flags, pos + 12))
282        }
283        RootFormat::MfstV2 => {
284            // Block header v2: num_records(4) + locale_flags(4) + unk1(4) + unk2(4) + unk3(1) = 17 bytes
285            if pos + 17 > data.len() {
286                return Err(CascError::InvalidFormat(
287                    "root block header v2: not enough data".to_string(),
288                ));
289            }
290            let num_records = read_le_u32(&data[pos..]);
291            let locale_flags = LocaleFlags(read_le_u32(&data[pos + 4..]));
292            let unk1 = read_le_u32(&data[pos + 8..]);
293            let unk2 = read_le_u32(&data[pos + 12..]);
294            let unk3 = data[pos + 16];
295            // Convert to old-style content_flags
296            let content_flags = ContentFlags(unk1 | unk2 | ((unk3 as u32) << 17));
297            Ok((num_records, content_flags, locale_flags, pos + 17))
298        }
299    }
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305    use crate::root::flags::{ContentFlags, LocaleFlags};
306
307    type RootBlockEntry = (i32, [u8; 16], Option<u64>);
308
309    /// Build a v1 MFST root file with given blocks.
310    /// Each block: (content_flags, locale_flags, entries: Vec<(fdid_delta, ckey, name_hash?)>)
311    fn build_root_v1(blocks: &[(u32, u32, Vec<RootBlockEntry>)]) -> Vec<u8> {
312        let total_count: u32 = blocks.iter().map(|(_, _, e)| e.len() as u32).sum();
313        let named_count: u32 = blocks
314            .iter()
315            .filter(|(cf, _, _)| (cf & 0x10000000) == 0)
316            .map(|(_, _, e)| e.len() as u32)
317            .sum();
318
319        let mut data = Vec::new();
320
321        // MFST header (24 bytes for 10.1.7+ format)
322        data.extend_from_slice(&MFST_MAGIC_BE.to_le_bytes()); // magic "MFST"
323        data.extend_from_slice(&24u32.to_le_bytes()); // header_size
324        data.extend_from_slice(&1u32.to_le_bytes()); // version = 1
325        data.extend_from_slice(&total_count.to_le_bytes()); // total_file_count
326        data.extend_from_slice(&named_count.to_le_bytes()); // named_file_count
327        data.extend_from_slice(&0u32.to_le_bytes()); // padding
328        assert_eq!(data.len(), 24);
329
330        // Blocks
331        for (content_flags, locale_flags, entries) in blocks {
332            let num_records = entries.len() as u32;
333            // Block header v1: num_records + content_flags + locale_flags
334            data.extend_from_slice(&num_records.to_le_bytes());
335            data.extend_from_slice(&content_flags.to_le_bytes());
336            data.extend_from_slice(&locale_flags.to_le_bytes());
337
338            // FileDataID deltas
339            for (delta, _, _) in entries {
340                data.extend_from_slice(&delta.to_le_bytes());
341            }
342
343            // CKeys
344            for (_, ckey, _) in entries {
345                data.extend_from_slice(ckey);
346            }
347
348            // Name hashes (only if NoNameHash not set)
349            if (content_flags & 0x10000000) == 0 {
350                for (_, _, name_hash) in entries {
351                    let hash = name_hash.unwrap_or(0);
352                    data.extend_from_slice(&hash.to_le_bytes());
353                }
354            }
355        }
356
357        data
358    }
359
360    #[test]
361    fn detect_mfst_format() {
362        let data = build_root_v1(&[]);
363        let root = RootFile::parse(&data).unwrap();
364        assert_eq!(root.format(), RootFormat::MfstV1);
365    }
366
367    #[test]
368    fn parse_single_block_single_entry() {
369        let ckey = [0xAA; 16];
370        let blocks = vec![(0x8u32, 0x2u32, vec![(100i32, ckey, Some(0xDEADBEEF_u64))])]; // Windows, enUS
371        let data = build_root_v1(&blocks);
372        let root = RootFile::parse(&data).unwrap();
373
374        assert_eq!(root.len(), 1);
375        let entry = root.find_by_fdid(100, LocaleFlags::EN_US).unwrap();
376        assert_eq!(entry.ckey, ckey);
377        assert_eq!(entry.name_hash, Some(0xDEADBEEF));
378    }
379
380    #[test]
381    fn parse_fdid_deltas_sequential() {
382        let blocks = vec![(
383            0x10000008u32,
384            0x2u32,
385            vec![
386                (100i32, [0x01; 16], None), // fdid = 100
387                (0i32, [0x02; 16], None),   // fdid = 101 (100 + 1 + 0)
388                (0i32, [0x03; 16], None),   // fdid = 102
389                (2i32, [0x04; 16], None),   // fdid = 105 (102 + 1 + 2)
390            ],
391        )];
392        let data = build_root_v1(&blocks);
393        let root = RootFile::parse(&data).unwrap();
394
395        assert_eq!(root.len(), 4);
396        assert!(root.find_by_fdid(100, LocaleFlags::ALL).is_some());
397        assert!(root.find_by_fdid(101, LocaleFlags::ALL).is_some());
398        assert!(root.find_by_fdid(102, LocaleFlags::ALL).is_some());
399        assert!(root.find_by_fdid(103, LocaleFlags::ALL).is_none()); // gap
400        assert!(root.find_by_fdid(104, LocaleFlags::ALL).is_none()); // gap
401        assert!(root.find_by_fdid(105, LocaleFlags::ALL).is_some());
402    }
403
404    #[test]
405    fn parse_block_with_name_hashes() {
406        let blocks = vec![(
407            0x8u32,
408            0x2u32,
409            vec![(50i32, [0xBB; 16], Some(0x1234567890ABCDEF_u64))],
410        )]; // No NoNameHash flag = has name hashes
411        let data = build_root_v1(&blocks);
412        let root = RootFile::parse(&data).unwrap();
413
414        let entry = root.find_by_fdid(50, LocaleFlags::ALL).unwrap();
415        assert_eq!(entry.name_hash, Some(0x1234567890ABCDEF));
416    }
417
418    #[test]
419    fn parse_block_without_name_hashes() {
420        let blocks = vec![(0x10000008u32, 0x2u32, vec![(50i32, [0xCC; 16], None)])]; // NoNameHash flag set
421        let data = build_root_v1(&blocks);
422        let root = RootFile::parse(&data).unwrap();
423
424        let entry = root.find_by_fdid(50, LocaleFlags::ALL).unwrap();
425        assert_eq!(entry.name_hash, None);
426    }
427
428    #[test]
429    fn parse_multiple_blocks_different_locales() {
430        let blocks = vec![
431            (0x8u32, 0x2u32, vec![(100i32, [0x01; 16], Some(0))]), // enUS
432            (0x8u32, 0x20u32, vec![(100i32, [0x02; 16], Some(0))]), // deDE, same fdid!
433        ];
434        let data = build_root_v1(&blocks);
435        let root = RootFile::parse(&data).unwrap();
436
437        // Same fdid, different locales
438        let en = root.find_by_fdid(100, LocaleFlags::EN_US).unwrap();
439        assert_eq!(en.ckey, [0x01; 16]);
440
441        let de = root.find_by_fdid(100, LocaleFlags::DE_DE).unwrap();
442        assert_eq!(de.ckey, [0x02; 16]);
443    }
444
445    #[test]
446    fn parse_locale_filter() {
447        let blocks = vec![(0x8u32, 0x20u32, vec![(200i32, [0xFF; 16], Some(0))])]; // deDE only
448        let data = build_root_v1(&blocks);
449        let root = RootFile::parse(&data).unwrap();
450
451        assert!(root.find_by_fdid(200, LocaleFlags::EN_US).is_none()); // not enUS
452        assert!(root.find_by_fdid(200, LocaleFlags::DE_DE).is_some()); // deDE
453        assert!(root.find_by_fdid(200, LocaleFlags::ALL).is_some()); // ALL matches
454    }
455
456    #[test]
457    fn iter_all_entries() {
458        let blocks = vec![(
459            0x10000008u32,
460            0x2u32,
461            vec![(10i32, [0x01; 16], None), (0i32, [0x02; 16], None)],
462        )];
463        let data = build_root_v1(&blocks);
464        let root = RootFile::parse(&data).unwrap();
465
466        let all: Vec<_> = root.iter_all().collect();
467        assert_eq!(all.len(), 2);
468    }
469
470    #[test]
471    fn parse_empty_root() {
472        let data = build_root_v1(&[]);
473        let root = RootFile::parse(&data).unwrap();
474        assert!(root.is_empty());
475        assert_eq!(root.fdid_count(), 0);
476    }
477
478    #[test]
479    fn detect_legacy_format() {
480        // Data that doesn't start with MFST magic - should be Legacy.
481        // Build a minimal legacy root with one block (block header starts at offset 0).
482        let mut data = Vec::new();
483        // Block header v1: num_records=1, content_flags=0x10000008, locale_flags=0x2
484        data.extend_from_slice(&1u32.to_le_bytes());
485        data.extend_from_slice(&0x10000008u32.to_le_bytes());
486        data.extend_from_slice(&0x2u32.to_le_bytes());
487        // Delta: fdid = 42
488        data.extend_from_slice(&42i32.to_le_bytes());
489        // CKey
490        data.extend_from_slice(&[0xDD; 16]);
491        // No name hashes (NoNameHash set)
492
493        let root = RootFile::parse(&data).unwrap();
494        assert_eq!(root.format(), RootFormat::Legacy);
495        assert_eq!(root.len(), 1);
496        assert!(root.find_by_fdid(42, LocaleFlags::ALL).is_some());
497    }
498
499    #[test]
500    fn detect_pre_1017_mfst() {
501        // Pre-10.1.7 MFST: 12-byte header (magic + total_count + named_count)
502        let mut data = Vec::new();
503        data.extend_from_slice(&MFST_MAGIC_BE.to_le_bytes()); // magic
504        data.extend_from_slice(&500000u32.to_le_bytes()); // total_count (large number, not 24)
505        data.extend_from_slice(&400000u32.to_le_bytes()); // named_count
506
507        // One block after header
508        data.extend_from_slice(&1u32.to_le_bytes()); // num_records
509        data.extend_from_slice(&0x10000008u32.to_le_bytes()); // content_flags
510        data.extend_from_slice(&0x2u32.to_le_bytes()); // locale_flags
511        data.extend_from_slice(&7i32.to_le_bytes()); // delta (fdid = 7)
512        data.extend_from_slice(&[0xEE; 16]); // ckey
513        // No name hashes
514
515        let root = RootFile::parse(&data).unwrap();
516        assert_eq!(root.format(), RootFormat::MfstV1);
517        assert_eq!(root.len(), 1);
518        assert!(root.find_by_fdid(7, LocaleFlags::ALL).is_some());
519    }
520
521    #[test]
522    fn mfst_v2_block_header() {
523        // Build a v2 MFST root manually
524        let mut data = Vec::new();
525        // MFST header (24 bytes)
526        data.extend_from_slice(&MFST_MAGIC_BE.to_le_bytes());
527        data.extend_from_slice(&24u32.to_le_bytes()); // header_size
528        data.extend_from_slice(&2u32.to_le_bytes()); // version = 2
529        data.extend_from_slice(&1u32.to_le_bytes()); // total_file_count
530        data.extend_from_slice(&0u32.to_le_bytes()); // named_file_count
531        data.extend_from_slice(&0u32.to_le_bytes()); // padding
532
533        // Block header v2: num_records(4) + locale_flags(4) + unk1(4) + unk2(4) + unk3(1) = 17 bytes
534        data.extend_from_slice(&1u32.to_le_bytes()); // num_records = 1
535        data.extend_from_slice(&0x2u32.to_le_bytes()); // locale_flags = enUS
536        data.extend_from_slice(&0x8u32.to_le_bytes()); // unk1 = 0x8 (LoadOnWindows)
537        data.extend_from_slice(&0x10000000u32.to_le_bytes()); // unk2 = NoNameHash
538        data.push(0); // unk3 = 0
539
540        // Delta
541        data.extend_from_slice(&99i32.to_le_bytes());
542        // CKey
543        data.extend_from_slice(&[0xAB; 16]);
544        // No name hashes (NoNameHash is set via unk2)
545
546        let root = RootFile::parse(&data).unwrap();
547        assert_eq!(root.format(), RootFormat::MfstV2);
548        assert_eq!(root.len(), 1);
549
550        let entry = root.find_by_fdid(99, LocaleFlags::EN_US).unwrap();
551        assert_eq!(entry.ckey, [0xAB; 16]);
552        // content_flags should be unk1 | unk2 | (unk3 << 17) = 0x8 | 0x10000000 | 0
553        assert!(entry.content_flags.has(ContentFlags::LOAD_ON_WINDOWS));
554        assert!(entry.content_flags.has_no_name_hash());
555        assert_eq!(entry.name_hash, None);
556    }
557
558    #[test]
559    fn parse_error_on_empty_data() {
560        let result = RootFile::parse(&[]);
561        assert!(result.is_err());
562    }
563
564    #[test]
565    fn parse_error_on_truncated_block() {
566        let mut data = Vec::new();
567        // MFST header
568        data.extend_from_slice(&MFST_MAGIC_BE.to_le_bytes());
569        data.extend_from_slice(&24u32.to_le_bytes());
570        data.extend_from_slice(&1u32.to_le_bytes());
571        data.extend_from_slice(&1u32.to_le_bytes());
572        data.extend_from_slice(&1u32.to_le_bytes());
573        data.extend_from_slice(&0u32.to_le_bytes());
574        // Block header claiming 1000 records but no body
575        data.extend_from_slice(&1000u32.to_le_bytes());
576        data.extend_from_slice(&0x8u32.to_le_bytes());
577        data.extend_from_slice(&0x2u32.to_le_bytes());
578
579        let result = RootFile::parse(&data);
580        assert!(result.is_err());
581    }
582
583    #[test]
584    fn fdid_count_vs_len() {
585        // Two entries with same fdid but different locales = fdid_count 1, len 2
586        let blocks = vec![
587            (0x8u32, 0x2u32, vec![(50i32, [0x01; 16], Some(0))]),
588            (0x8u32, 0x20u32, vec![(50i32, [0x02; 16], Some(0))]),
589        ];
590        let data = build_root_v1(&blocks);
591        let root = RootFile::parse(&data).unwrap();
592
593        assert_eq!(root.len(), 2);
594        assert_eq!(root.fdid_count(), 1);
595    }
596}