Skip to main content

casc_lib/encoding/
parser.rs

1//! Binary parser for the CASC encoding file.
2//!
3//! The encoding file begins with a 22-byte header (magic `"EN"`, version 1),
4//! followed by an ESpec string block, a page index, and then the actual
5//! CKey-to-EKey (CE) data pages. Each page contains variable-length entries
6//! consisting of a key count, a 5-byte file size (big-endian u40), the CKey,
7//! and one or more EKeys.
8//!
9//! Use [`EncodingFile::parse`](crate::encoding::parser::EncodingFile::parse) to build a lookup table, then
10//! [`EncodingFile::find_ekey`](crate::encoding::parser::EncodingFile::find_ekey) to resolve a CKey to its EKey(s).
11
12use std::collections::HashMap;
13
14use crate::error::{CascError, Result};
15use crate::util::io::{read_be_u16, read_be_u32, read_be_u40};
16
17/// Header of the encoding file (0x16 bytes, big-endian).
18#[derive(Debug, Clone)]
19pub struct EncodingHeader {
20    /// Format version (must be 1).
21    pub version: u8,
22    /// Byte length of content key hashes (typically 16).
23    pub hash_size_ckey: u8,
24    /// Byte length of encoding key hashes (typically 16).
25    pub hash_size_ekey: u8,
26    /// CE page size in KiB (each CE data page is this value * 1024 bytes).
27    pub ce_page_size_kb: u16,
28    /// ESpec page size in KiB.
29    pub espec_page_size_kb: u16,
30    /// Number of CKey-to-EKey data pages.
31    pub ce_page_count: u32,
32    /// Number of ESpec pages.
33    pub espec_page_count: u32,
34    /// Total byte size of the ESpec string block.
35    pub espec_block_size: u32,
36}
37
38/// A single CKey -> EKey mapping entry.
39#[derive(Debug, Clone)]
40pub struct EncodingEntry {
41    /// Content key identifying the logical file.
42    pub ckey: [u8; 16],
43    /// One or more encoding keys that store this file's data in the archives.
44    pub ekeys: Vec<[u8; 16]>,
45    /// Decompressed file size in bytes.
46    pub file_size: u64,
47}
48
49/// Parsed encoding file with fast CKey -> EKey lookup.
50///
51/// Constructed via [`EncodingFile::parse`] from the decoded (post-BLTE) encoding
52/// file data. Use [`find_ekey`](EncodingFile::find_ekey) to resolve a content
53/// key to its encoding key(s).
54pub struct EncodingFile {
55    header: EncodingHeader,
56    /// Flat map of CKey -> EncodingEntry for O(1) lookup.
57    entries: HashMap<[u8; 16], EncodingEntry>,
58}
59
60const HEADER_SIZE: usize = 0x16;
61const PAGE_INDEX_ENTRY_MD5: usize = 16;
62
63impl EncodingFile {
64    /// Parse from decoded (post-BLTE) encoding file data.
65    pub fn parse(data: &[u8]) -> Result<Self> {
66        if data.len() < HEADER_SIZE {
67            return Err(CascError::InvalidFormat(
68                "encoding data too short for header".into(),
69            ));
70        }
71
72        // Validate signature
73        if &data[0..2] != b"EN" {
74            return Err(CascError::InvalidMagic {
75                expected: "EN".into(),
76                found: format!("{:02X}{:02X}", data[0], data[1]),
77            });
78        }
79
80        let version = data[0x02];
81        if version != 1 {
82            return Err(CascError::UnsupportedVersion(version as u32));
83        }
84
85        let hash_size_ckey = data[0x03];
86        let hash_size_ekey = data[0x04];
87        let ce_page_size_kb = read_be_u16(&data[0x05..]);
88        let espec_page_size_kb = read_be_u16(&data[0x07..]);
89        let ce_page_count = read_be_u32(&data[0x09..]);
90        let espec_page_count = read_be_u32(&data[0x0D..]);
91        let flags = data[0x11];
92        let espec_block_size = read_be_u32(&data[0x12..]);
93
94        if flags != 0 {
95            return Err(CascError::InvalidFormat(format!(
96                "encoding flags must be 0, got {}",
97                flags
98            )));
99        }
100
101        let header = EncodingHeader {
102            version,
103            hash_size_ckey,
104            hash_size_ekey,
105            ce_page_size_kb,
106            espec_page_size_kb,
107            ce_page_count,
108            espec_page_count,
109            espec_block_size,
110        };
111
112        let ckey_size = hash_size_ckey as usize;
113        let ekey_size = hash_size_ekey as usize;
114        let ce_page_size = ce_page_size_kb as usize * 1024;
115        let page_index_entry_size = ckey_size + PAGE_INDEX_ENTRY_MD5;
116
117        // Calculate offsets into the data
118        let mut offset = HEADER_SIZE;
119
120        // 1. Skip ESpec string block
121        offset += espec_block_size as usize;
122
123        // 2. Skip CEKey page index
124        let page_index_size = ce_page_count as usize * page_index_entry_size;
125        offset += page_index_size;
126
127        // 3. Parse all CEKey pages
128        let mut entries = HashMap::new();
129
130        for _ in 0..ce_page_count {
131            if offset + ce_page_size > data.len() {
132                return Err(CascError::InvalidFormat(
133                    "encoding data truncated in CEKey pages".into(),
134                ));
135            }
136
137            let page_end = offset + ce_page_size;
138            let mut pos = offset;
139
140            while pos < page_end {
141                // Check if we hit padding (key_count == 0)
142                if pos >= data.len() || data[pos] == 0 {
143                    break;
144                }
145
146                let key_count = data[pos] as usize;
147                pos += 1;
148
149                // file_size: u40 BE (5 bytes)
150                if pos + 5 > page_end {
151                    break;
152                }
153                let file_size = read_be_u40(&data[pos..]);
154                pos += 5;
155
156                // ckey
157                if pos + ckey_size > page_end {
158                    break;
159                }
160                let mut ckey = [0u8; 16];
161                ckey.copy_from_slice(&data[pos..pos + ckey_size]);
162                pos += ckey_size;
163
164                // ekeys
165                let ekeys_total = key_count * ekey_size;
166                if pos + ekeys_total > page_end {
167                    break;
168                }
169                let mut ekeys = Vec::with_capacity(key_count);
170                for _ in 0..key_count {
171                    let mut ekey = [0u8; 16];
172                    ekey.copy_from_slice(&data[pos..pos + ekey_size]);
173                    ekeys.push(ekey);
174                    pos += ekey_size;
175                }
176
177                entries.insert(
178                    ckey,
179                    EncodingEntry {
180                        ckey,
181                        ekeys,
182                        file_size,
183                    },
184                );
185            }
186
187            offset = page_end;
188        }
189
190        // Skip EKeySpec page index and pages (not needed for extraction)
191
192        Ok(Self { header, entries })
193    }
194
195    /// Find the encoding entry for a given CKey.
196    pub fn find_ekey(&self, ckey: &[u8; 16]) -> Option<&EncodingEntry> {
197        self.entries.get(ckey)
198    }
199
200    /// Total number of entries.
201    pub fn len(&self) -> usize {
202        self.entries.len()
203    }
204
205    /// Whether the encoding file has no entries.
206    pub fn is_empty(&self) -> bool {
207        self.entries.is_empty()
208    }
209
210    /// Access the parsed header.
211    pub fn header(&self) -> &EncodingHeader {
212        &self.header
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    /// Build a minimal encoding file with the given entries.
221    /// Each entry is (ckey, ekey, file_size).
222    fn build_encoding_file(entries: &[([u8; 16], [u8; 16], u64)]) -> Vec<u8> {
223        let hash_size: u8 = 16;
224        let page_size_kb: u16 = 4; // 4096 bytes per page
225        let page_size: usize = page_size_kb as usize * 1024;
226
227        // Each entry = 1 (key_count) + 5 (file_size) + 16 (ckey) + 16 (ekey) = 38 bytes
228        let entry_size = 38;
229        let entries_per_page = page_size / entry_size;
230        let ce_page_count = entries.len().div_ceil(entries_per_page).max(1) as u32;
231        let espec_page_count: u32 = 0;
232        let espec_block_size: u32 = 0;
233
234        let mut data = Vec::new();
235
236        // Header (0x16 = 22 bytes)
237        data.extend_from_slice(b"EN"); // signature
238        data.push(1); // version
239        data.push(hash_size); // hash_size_ckey
240        data.push(hash_size); // hash_size_ekey
241        data.extend_from_slice(&page_size_kb.to_be_bytes()); // ce_page_size_kb
242        data.extend_from_slice(&page_size_kb.to_be_bytes()); // espec_page_size_kb
243        data.extend_from_slice(&ce_page_count.to_be_bytes()); // ce_page_count
244        data.extend_from_slice(&espec_page_count.to_be_bytes()); // espec_page_count
245        data.push(0); // flags
246        data.extend_from_slice(&espec_block_size.to_be_bytes()); // espec_block_size
247        assert_eq!(data.len(), 0x16);
248
249        // ESpec string block (empty since espec_block_size = 0)
250
251        // CEKey page index: ce_page_count entries of (first_ckey[16] + page_md5[16])
252        let mut sorted_entries: Vec<_> = entries.to_vec();
253        sorted_entries.sort_by_key(|a| a.0);
254
255        for page_idx in 0..ce_page_count as usize {
256            let first_entry_idx = page_idx * entries_per_page;
257            if first_entry_idx < sorted_entries.len() {
258                data.extend_from_slice(&sorted_entries[first_entry_idx].0); // first_ckey
259            } else {
260                data.extend_from_slice(&[0xFF; 16]); // padding
261            }
262            data.extend_from_slice(&[0u8; 16]); // page_md5 (zeroed for tests)
263        }
264
265        // CEKey pages: each page is page_size bytes, zero-padded
266        for page_idx in 0..ce_page_count as usize {
267            let start_idx = page_idx * entries_per_page;
268            let end_idx = (start_idx + entries_per_page).min(sorted_entries.len());
269            let mut page_data = Vec::new();
270
271            for entry in sorted_entries.iter().take(end_idx).skip(start_idx) {
272                let (ckey, ekey, file_size) = entry;
273                page_data.push(1u8); // key_count = 1
274                // file_size as u40 BE (5 bytes)
275                let fs = *file_size;
276                page_data.push((fs >> 32) as u8);
277                page_data.push((fs >> 24) as u8);
278                page_data.push((fs >> 16) as u8);
279                page_data.push((fs >> 8) as u8);
280                page_data.push(fs as u8);
281                page_data.extend_from_slice(ckey);
282                page_data.extend_from_slice(ekey);
283            }
284
285            // Zero-pad to page_size
286            page_data.resize(page_size, 0);
287            data.extend_from_slice(&page_data);
288        }
289
290        // No EKeySpec pages (espec_page_count = 0)
291        data
292    }
293
294    #[test]
295    fn parse_encoding_header_valid() {
296        let data = build_encoding_file(&[]);
297        let encoding = EncodingFile::parse(&data).unwrap();
298        assert_eq!(encoding.header.version, 1);
299        assert_eq!(encoding.header.hash_size_ckey, 16);
300        assert_eq!(encoding.header.hash_size_ekey, 16);
301        assert_eq!(encoding.header.ce_page_size_kb, 4);
302    }
303
304    #[test]
305    fn parse_encoding_header_invalid_magic() {
306        let mut data = build_encoding_file(&[]);
307        data[0] = b'X';
308        data[1] = b'X';
309        assert!(EncodingFile::parse(&data).is_err());
310    }
311
312    #[test]
313    fn parse_encoding_single_entry() {
314        let ckey = [0x01; 16];
315        let ekey = [0x02; 16];
316        let data = build_encoding_file(&[(ckey, ekey, 12345)]);
317        let encoding = EncodingFile::parse(&data).unwrap();
318
319        assert_eq!(encoding.len(), 1);
320        let entry = encoding.find_ekey(&ckey).unwrap();
321        assert_eq!(entry.ekeys[0], ekey);
322        assert_eq!(entry.file_size, 12345);
323    }
324
325    #[test]
326    fn parse_encoding_multiple_entries() {
327        let entries = vec![
328            ([0x01; 16], [0xA1; 16], 100),
329            ([0x02; 16], [0xA2; 16], 200),
330            ([0x03; 16], [0xA3; 16], 300),
331        ];
332        let data = build_encoding_file(&entries);
333        let encoding = EncodingFile::parse(&data).unwrap();
334
335        assert_eq!(encoding.len(), 3);
336
337        for (ckey, ekey, size) in &entries {
338            let entry = encoding.find_ekey(ckey).unwrap();
339            assert_eq!(entry.ekeys[0], *ekey);
340            assert_eq!(entry.file_size, *size);
341        }
342    }
343
344    #[test]
345    fn parse_encoding_lookup_miss() {
346        let data = build_encoding_file(&[([0x01; 16], [0xA1; 16], 100)]);
347        let encoding = EncodingFile::parse(&data).unwrap();
348
349        let missing = [0xFF; 16];
350        assert!(encoding.find_ekey(&missing).is_none());
351    }
352
353    #[test]
354    fn parse_encoding_empty() {
355        let data = build_encoding_file(&[]);
356        let encoding = EncodingFile::parse(&data).unwrap();
357        assert_eq!(encoding.len(), 0);
358        assert!(encoding.is_empty());
359    }
360
361    #[test]
362    fn parse_encoding_large_file_size() {
363        let ckey = [0x42; 16];
364        let ekey = [0x43; 16];
365        let large_size: u64 = 0xFF_FFFF_FFFF; // max u40
366        let data = build_encoding_file(&[(ckey, ekey, large_size)]);
367        let encoding = EncodingFile::parse(&data).unwrap();
368
369        let entry = encoding.find_ekey(&ckey).unwrap();
370        assert_eq!(entry.file_size, large_size);
371    }
372
373    #[test]
374    fn parse_encoding_preserves_ckey() {
375        let ckey = [
376            10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160,
377        ];
378        let ekey = [0xFF; 16];
379        let data = build_encoding_file(&[(ckey, ekey, 42)]);
380        let encoding = EncodingFile::parse(&data).unwrap();
381
382        let entry = encoding.find_ekey(&ckey).unwrap();
383        assert_eq!(entry.ckey, ckey);
384    }
385}