tact_parser/
encoding.rs

1//! Encoding file parser for TACT
2//!
3//! The encoding file maps Content Keys (CKey) to Encoding Keys (EKey) and vice versa.
4//! This is a critical component for resolving file references in the TACT system.
5//!
6//! IMPORTANT: Encoding files use BIG-ENDIAN byte order, unlike most other TACT formats!
7
8use byteorder::{BigEndian, ReadBytesExt};
9use std::collections::HashMap;
10use std::io::{Cursor, Read};
11use tracing::{debug, trace, warn};
12
13use crate::{Error, Result};
14
15/// Magic bytes for encoding file: "EN"
16const ENCODING_MAGIC: [u8; 2] = [0x45, 0x4E]; // 'E', 'N'
17
18/// Encoding file header
19#[derive(Debug, Clone)]
20pub struct EncodingHeader {
21    /// Magic bytes "EN"
22    pub magic: [u8; 2],
23    /// Version (should be 1)
24    pub version: u8,
25    /// Hash size for CKeys (usually 16 for MD5)
26    pub ckey_hash_size: u8,
27    /// Hash size for EKeys (usually 16 for MD5)
28    pub ekey_hash_size: u8,
29    /// Page size for CKey pages in KB
30    pub ckey_page_size_kb: u16,
31    /// Page size for EKey pages in KB
32    pub ekey_page_size_kb: u16,
33    /// Number of CKey pages
34    pub ckey_page_count: u32,
35    /// Number of EKey pages
36    pub ekey_page_count: u32,
37    /// Unknown field (must be 0)
38    pub unk: u8,
39    /// ESpec block size
40    pub espec_block_size: u32,
41}
42
43/// Page table entry
44#[derive(Debug, Clone)]
45pub struct PageInfo {
46    /// First hash in this page
47    pub first_hash: Vec<u8>,
48    /// MD5 checksum of the page
49    pub checksum: [u8; 16],
50}
51
52/// Encoding entry for a content key
53#[derive(Debug, Clone)]
54pub struct EncodingEntry {
55    /// The content key
56    pub content_key: Vec<u8>,
57    /// List of encoding keys for this content
58    pub encoding_keys: Vec<Vec<u8>>,
59    /// File size (40-bit integer)
60    pub size: u64,
61}
62
63/// Encoding file parser and lookup
64pub struct EncodingFile {
65    /// File header
66    pub header: EncodingHeader,
67    /// CKey → EncodingEntry mapping
68    ckey_entries: HashMap<Vec<u8>, EncodingEntry>,
69    /// EKey → CKey reverse mapping
70    ekey_to_ckey: HashMap<Vec<u8>, Vec<u8>>,
71}
72
73impl EncodingFile {
74    /// Parse an encoding file from raw data
75    pub fn parse(data: &[u8]) -> Result<Self> {
76        let mut cursor = Cursor::new(data);
77
78        // Parse header
79        let header = Self::parse_header(&mut cursor)?;
80        debug!(
81            "Parsed encoding header: version={}, ckey_pages={}, ekey_pages={}, ckey_page_size_kb={}, ekey_page_size_kb={}, espec_table_size={}",
82            header.version,
83            header.ckey_page_count,
84            header.ekey_page_count,
85            header.ckey_page_size_kb,
86            header.ekey_page_size_kb,
87            header.espec_block_size
88        );
89
90        // Read ESpec string table (comes immediately after header)
91        let mut espec_data = vec![0u8; header.espec_block_size as usize];
92        cursor.read_exact(&mut espec_data)?;
93        debug!("Read ESpec string table: {} bytes", espec_data.len());
94
95        // Parse CKey page table indices
96        let ckey_page_table = Self::parse_page_table(
97            &mut cursor,
98            header.ckey_page_count as usize,
99            header.ckey_hash_size as usize,
100        )?;
101        trace!("Parsed {} CKey page table entries", ckey_page_table.len());
102
103        // Parse EKey page table indices
104        let _ekey_page_table = Self::parse_page_table(
105            &mut cursor,
106            header.ekey_page_count as usize,
107            header.ekey_hash_size as usize,
108        )?;
109        trace!("Parsed {} EKey page table entries", _ekey_page_table.len());
110
111        // Parse CKey pages - read directly from cursor position like rustycasc does
112        let mut ckey_entries = HashMap::new();
113        let page_size = header.ckey_page_size_kb as usize * 1024;
114
115        // Get remaining data from cursor to validate checksums correctly
116        let remaining_data = {
117            let current_pos = cursor.position() as usize;
118            &data[current_pos..]
119        };
120        let mut data_offset = 0;
121
122        for (i, page_info) in ckey_page_table.iter().enumerate() {
123            // Validate checksum on the data at current position (like rustycasc)
124            if data_offset + page_size <= remaining_data.len() {
125                let page_slice = &remaining_data[data_offset..data_offset + page_size];
126                let checksum = ::md5::compute(page_slice);
127
128                if checksum.as_ref() != page_info.checksum {
129                    debug!(
130                        "CKey page {} checksum mismatch (expected: {:?}, got: {:?})",
131                        i,
132                        hex::encode(page_info.checksum),
133                        hex::encode(checksum.as_ref())
134                    );
135                }
136
137                Self::parse_ckey_page(
138                    page_slice,
139                    header.ckey_hash_size,
140                    header.ekey_hash_size,
141                    &mut ckey_entries,
142                )?;
143            }
144
145            data_offset += page_size;
146        }
147
148        // Advance cursor past all CKey pages
149        cursor.set_position(cursor.position() + (header.ckey_page_count as u64 * page_size as u64));
150
151        debug!("Parsed {} CKey entries", ckey_entries.len());
152
153        // Build reverse mapping (EKey → CKey)
154        let mut ekey_to_ckey = HashMap::new();
155        for entry in ckey_entries.values() {
156            for ekey in &entry.encoding_keys {
157                ekey_to_ckey.insert(ekey.clone(), entry.content_key.clone());
158            }
159        }
160
161        debug!(
162            "Built EKey→CKey reverse mapping with {} entries",
163            ekey_to_ckey.len()
164        );
165
166        Ok(Self {
167            header,
168            ckey_entries,
169            ekey_to_ckey,
170        })
171    }
172
173    /// Parse the encoding file header
174    fn parse_header<R: Read>(reader: &mut R) -> Result<EncodingHeader> {
175        let mut magic = [0u8; 2];
176        reader.read_exact(&mut magic)?;
177
178        if magic != ENCODING_MAGIC {
179            return Err(Error::BadMagic);
180        }
181
182        let version = reader.read_u8()?;
183        if version != 1 {
184            warn!("Unexpected encoding version: {}", version);
185        }
186
187        let ckey_hash_size = reader.read_u8()?;
188        let ekey_hash_size = reader.read_u8()?;
189        let ckey_page_size_kb = reader.read_u16::<BigEndian>()?; // BIG-ENDIAN!
190        let ekey_page_size_kb = reader.read_u16::<BigEndian>()?; // BIG-ENDIAN!
191        let ckey_page_count = reader.read_u32::<BigEndian>()?; // BIG-ENDIAN!
192        let ekey_page_count = reader.read_u32::<BigEndian>()?; // BIG-ENDIAN!
193        let unk = reader.read_u8()?;
194        let espec_block_size = reader.read_u32::<BigEndian>()?; // BIG-ENDIAN!
195
196        Ok(EncodingHeader {
197            magic,
198            version,
199            ckey_hash_size,
200            ekey_hash_size,
201            ckey_page_size_kb,
202            ekey_page_size_kb,
203            ckey_page_count,
204            ekey_page_count,
205            unk,
206            espec_block_size,
207        })
208    }
209
210    /// Parse a page table
211    fn parse_page_table<R: Read>(
212        reader: &mut R,
213        page_count: usize,
214        hash_size: usize,
215    ) -> Result<Vec<PageInfo>> {
216        let mut pages = Vec::with_capacity(page_count);
217
218        for _ in 0..page_count {
219            let mut first_hash = vec![0u8; hash_size];
220            reader.read_exact(&mut first_hash)?;
221
222            let mut checksum = [0u8; 16];
223            reader.read_exact(&mut checksum)?;
224
225            pages.push(PageInfo {
226                first_hash,
227                checksum,
228            });
229        }
230
231        Ok(pages)
232    }
233
234    /// Parse a CKey page
235    fn parse_ckey_page(
236        data: &[u8],
237        ckey_size: u8,
238        ekey_size: u8,
239        entries: &mut HashMap<Vec<u8>, EncodingEntry>,
240    ) -> Result<()> {
241        let mut offset = 0;
242
243        while offset < data.len() {
244            // Check for zero padding (end of page data)
245            if offset + 6 > data.len() || data[offset..].iter().all(|&b| b == 0) {
246                break;
247            }
248
249            // Read key count
250            let key_count = data[offset];
251            offset += 1;
252
253            if key_count == 0 {
254                break; // End of entries
255            }
256
257            // Read file size (40-bit integer - big-endian like the header!)
258            if offset + 5 > data.len() {
259                break;
260            }
261            let size = crate::utils::read_uint40_be(&data[offset..offset + 5])?;
262            offset += 5;
263
264            // Read content key
265            if offset + ckey_size as usize > data.len() {
266                break;
267            }
268            let ckey = data[offset..offset + ckey_size as usize].to_vec();
269            offset += ckey_size as usize;
270
271            // Read encoding keys
272            let mut ekeys = Vec::new();
273            for _ in 0..key_count {
274                if offset + ekey_size as usize > data.len() {
275                    break;
276                }
277                let ekey = data[offset..offset + ekey_size as usize].to_vec();
278                offset += ekey_size as usize;
279                ekeys.push(ekey);
280            }
281
282            entries.insert(
283                ckey.clone(),
284                EncodingEntry {
285                    content_key: ckey,
286                    encoding_keys: ekeys,
287                    size,
288                },
289            );
290        }
291
292        Ok(())
293    }
294
295    /// Look up encoding keys by content key
296    pub fn lookup_by_ckey(&self, ckey: &[u8]) -> Option<&EncodingEntry> {
297        self.ckey_entries.get(ckey)
298    }
299
300    /// Look up content key by encoding key
301    pub fn lookup_by_ekey(&self, ekey: &[u8]) -> Option<&Vec<u8>> {
302        self.ekey_to_ckey.get(ekey)
303    }
304
305    /// Get the first encoding key for a content key (most common case)
306    pub fn get_ekey_for_ckey(&self, ckey: &[u8]) -> Option<&Vec<u8>> {
307        self.ckey_entries
308            .get(ckey)
309            .and_then(|entry| entry.encoding_keys.first())
310    }
311
312    /// Get file size for a content key
313    pub fn get_file_size(&self, ckey: &[u8]) -> Option<u64> {
314        self.ckey_entries.get(ckey).map(|entry| entry.size)
315    }
316
317    /// Get total number of content keys
318    pub fn ckey_count(&self) -> usize {
319        self.ckey_entries.len()
320    }
321
322    /// Get total number of encoding keys
323    pub fn ekey_count(&self) -> usize {
324        self.ekey_to_ckey.len()
325    }
326
327    /// Get sample content keys for debugging
328    pub fn get_sample_ckeys(&self, limit: usize) -> Vec<String> {
329        self.ckey_entries
330            .keys()
331            .take(limit)
332            .map(hex::encode)
333            .collect()
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn test_encoding_header_size() {
343        // Header should be exactly 22 bytes
344        let header_size = 2 + 1 + 1 + 1 + 2 + 2 + 4 + 4 + 1 + 4;
345        assert_eq!(header_size, 22);
346    }
347
348    #[test]
349    fn test_parse_empty_encoding() {
350        // Create a minimal valid encoding file
351        let mut data = Vec::new();
352
353        // Magic
354        data.extend_from_slice(&ENCODING_MAGIC);
355        // Version
356        data.push(1);
357        // Hash sizes
358        data.push(16); // CKey hash size
359        data.push(16); // EKey hash size
360        // Page sizes (big-endian!)
361        data.extend_from_slice(&0u16.to_be_bytes()); // CKey page size
362        data.extend_from_slice(&0u16.to_be_bytes()); // EKey page size
363        // Page counts (big-endian!)
364        data.extend_from_slice(&0u32.to_be_bytes()); // CKey page count
365        data.extend_from_slice(&0u32.to_be_bytes()); // EKey page count
366        // Unknown
367        data.push(0);
368        // ESpec block size (big-endian!)
369        data.extend_from_slice(&0u32.to_be_bytes());
370
371        let result = EncodingFile::parse(&data);
372        assert!(result.is_ok());
373
374        let encoding = result.unwrap();
375        assert_eq!(encoding.header.version, 1);
376        assert_eq!(encoding.header.ckey_hash_size, 16);
377        assert_eq!(encoding.header.ekey_hash_size, 16);
378        assert_eq!(encoding.ckey_count(), 0);
379        assert_eq!(encoding.ekey_count(), 0);
380    }
381
382    #[test]
383    fn test_invalid_magic() {
384        let mut data = vec![0xFF, 0xFF]; // Wrong magic
385        data.push(1); // Version
386
387        let result = EncodingFile::parse(&data);
388        assert!(matches!(result, Err(Error::BadMagic)));
389    }
390}