tact_parser/
encoding.rs

1//! Encoding file parser for TACT
2//!
3//! The encoding file maps Content Keys (CKey) to Encoding Keys (EKey) and vice versa.
4//! This is a critical component for resolving file references in the TACT system.
5//!
6//! IMPORTANT: Encoding files use BIG-ENDIAN byte order, unlike most other TACT formats!
7
8use byteorder::{BigEndian, ReadBytesExt};
9use std::collections::HashMap;
10use std::io::{Cursor, Read};
11use tracing::{debug, trace, warn};
12
13use crate::{Error, Result};
14
15/// Magic bytes for encoding file: "EN"
16const ENCODING_MAGIC: [u8; 2] = [0x45, 0x4E]; // 'E', 'N'
17
18/// Encoding file header
19#[derive(Debug, Clone)]
20pub struct EncodingHeader {
21    /// Magic bytes "EN"
22    pub magic: [u8; 2],
23    /// Version (should be 1)
24    pub version: u8,
25    /// Hash size for CKeys (usually 16 for MD5)
26    pub ckey_hash_size: u8,
27    /// Hash size for EKeys (usually 16 for MD5)
28    pub ekey_hash_size: u8,
29    /// Page size for CKey pages in KB
30    pub ckey_page_size_kb: u16,
31    /// Page size for EKey pages in KB
32    pub ekey_page_size_kb: u16,
33    /// Number of CKey pages
34    pub ckey_page_count: u32,
35    /// Number of EKey pages
36    pub ekey_page_count: u32,
37    /// Unknown field (must be 0)
38    pub unk: u8,
39    /// ESpec block size
40    pub espec_block_size: u32,
41}
42
43/// Page table entry
44#[derive(Debug, Clone)]
45pub struct PageInfo {
46    /// First hash in this page
47    pub first_hash: Vec<u8>,
48    /// MD5 checksum of the page
49    pub checksum: [u8; 16],
50}
51
52/// Encoding entry for a content key
53#[derive(Debug, Clone)]
54pub struct EncodingEntry {
55    /// The content key
56    pub content_key: Vec<u8>,
57    /// List of encoding keys for this content
58    pub encoding_keys: Vec<Vec<u8>>,
59    /// File size (40-bit integer)
60    pub size: u64,
61}
62
63/// Encoding file parser and lookup
64pub struct EncodingFile {
65    /// File header
66    pub header: EncodingHeader,
67    /// CKey → EncodingEntry mapping
68    ckey_entries: HashMap<Vec<u8>, EncodingEntry>,
69    /// EKey → CKey reverse mapping
70    ekey_to_ckey: HashMap<Vec<u8>, Vec<u8>>,
71}
72
73impl EncodingFile {
74    /// Parse an encoding file from raw data
75    pub fn parse(data: &[u8]) -> Result<Self> {
76        let mut cursor = Cursor::new(data);
77
78        // Parse header
79        let header = Self::parse_header(&mut cursor)?;
80        debug!(
81            "Parsed encoding header: version={}, ckey_pages={}, ekey_pages={}",
82            header.version, header.ckey_page_count, header.ekey_page_count
83        );
84
85        // Parse CKey page table
86        let ckey_page_table = Self::parse_page_table(
87            &mut cursor,
88            header.ckey_page_count as usize,
89            header.ckey_hash_size as usize,
90        )?;
91        trace!("Parsed {} CKey page table entries", ckey_page_table.len());
92
93        // Parse EKey page table
94        let _ekey_page_table = Self::parse_page_table(
95            &mut cursor,
96            header.ekey_page_count as usize,
97            header.ekey_hash_size as usize,
98        )?;
99        trace!("Parsed {} EKey page table entries", _ekey_page_table.len());
100
101        // Parse CKey pages
102        let mut ckey_entries = HashMap::new();
103        let page_size = header.ckey_page_size_kb as usize * 1024;
104
105        for (i, page_info) in ckey_page_table.iter().enumerate() {
106            let page_data = Self::read_page(&mut cursor, page_size)?;
107
108            // Verify page checksum
109            let checksum = ::md5::compute(&page_data);
110            if checksum.as_ref() != page_info.checksum {
111                warn!("CKey page {} checksum mismatch", i);
112            }
113
114            Self::parse_ckey_page(
115                &page_data,
116                header.ckey_hash_size,
117                header.ekey_hash_size,
118                &mut ckey_entries,
119            )?;
120        }
121
122        debug!("Parsed {} CKey entries", ckey_entries.len());
123
124        // Build reverse mapping (EKey → CKey)
125        let mut ekey_to_ckey = HashMap::new();
126        for entry in ckey_entries.values() {
127            for ekey in &entry.encoding_keys {
128                ekey_to_ckey.insert(ekey.clone(), entry.content_key.clone());
129            }
130        }
131
132        debug!(
133            "Built EKey→CKey reverse mapping with {} entries",
134            ekey_to_ckey.len()
135        );
136
137        Ok(Self {
138            header,
139            ckey_entries,
140            ekey_to_ckey,
141        })
142    }
143
144    /// Parse the encoding file header
145    fn parse_header<R: Read>(reader: &mut R) -> Result<EncodingHeader> {
146        let mut magic = [0u8; 2];
147        reader.read_exact(&mut magic)?;
148
149        if magic != ENCODING_MAGIC {
150            return Err(Error::BadMagic);
151        }
152
153        let version = reader.read_u8()?;
154        if version != 1 {
155            warn!("Unexpected encoding version: {}", version);
156        }
157
158        let ckey_hash_size = reader.read_u8()?;
159        let ekey_hash_size = reader.read_u8()?;
160        let ckey_page_size_kb = reader.read_u16::<BigEndian>()?; // BIG-ENDIAN!
161        let ekey_page_size_kb = reader.read_u16::<BigEndian>()?; // BIG-ENDIAN!
162        let ckey_page_count = reader.read_u32::<BigEndian>()?; // BIG-ENDIAN!
163        let ekey_page_count = reader.read_u32::<BigEndian>()?; // BIG-ENDIAN!
164        let unk = reader.read_u8()?;
165        let espec_block_size = reader.read_u32::<BigEndian>()?; // BIG-ENDIAN!
166
167        Ok(EncodingHeader {
168            magic,
169            version,
170            ckey_hash_size,
171            ekey_hash_size,
172            ckey_page_size_kb,
173            ekey_page_size_kb,
174            ckey_page_count,
175            ekey_page_count,
176            unk,
177            espec_block_size,
178        })
179    }
180
181    /// Parse a page table
182    fn parse_page_table<R: Read>(
183        reader: &mut R,
184        page_count: usize,
185        hash_size: usize,
186    ) -> Result<Vec<PageInfo>> {
187        let mut pages = Vec::with_capacity(page_count);
188
189        for _ in 0..page_count {
190            let mut first_hash = vec![0u8; hash_size];
191            reader.read_exact(&mut first_hash)?;
192
193            let mut checksum = [0u8; 16];
194            reader.read_exact(&mut checksum)?;
195
196            pages.push(PageInfo {
197                first_hash,
198                checksum,
199            });
200        }
201
202        Ok(pages)
203    }
204
205    /// Read a page of data
206    fn read_page<R: Read>(reader: &mut R, page_size: usize) -> Result<Vec<u8>> {
207        let mut page = vec![0u8; page_size];
208        reader.read_exact(&mut page)?;
209        Ok(page)
210    }
211
212    /// Parse a CKey page
213    fn parse_ckey_page(
214        data: &[u8],
215        ckey_size: u8,
216        ekey_size: u8,
217        entries: &mut HashMap<Vec<u8>, EncodingEntry>,
218    ) -> Result<()> {
219        let mut offset = 0;
220
221        while offset < data.len() {
222            // Check for zero padding (end of page data)
223            if offset + 6 > data.len() || data[offset..].iter().all(|&b| b == 0) {
224                break;
225            }
226
227            // Read key count
228            let key_count = data[offset];
229            offset += 1;
230
231            if key_count == 0 {
232                break; // End of entries
233            }
234
235            // Read file size (40-bit integer!)
236            if offset + 5 > data.len() {
237                break;
238            }
239            let size = crate::utils::read_uint40(&data[offset..offset + 5])?;
240            offset += 5;
241
242            // Read content key
243            if offset + ckey_size as usize > data.len() {
244                break;
245            }
246            let ckey = data[offset..offset + ckey_size as usize].to_vec();
247            offset += ckey_size as usize;
248
249            // Read encoding keys
250            let mut ekeys = Vec::new();
251            for _ in 0..key_count {
252                if offset + ekey_size as usize > data.len() {
253                    break;
254                }
255                let ekey = data[offset..offset + ekey_size as usize].to_vec();
256                offset += ekey_size as usize;
257                ekeys.push(ekey);
258            }
259
260            entries.insert(
261                ckey.clone(),
262                EncodingEntry {
263                    content_key: ckey,
264                    encoding_keys: ekeys,
265                    size,
266                },
267            );
268        }
269
270        Ok(())
271    }
272
273    /// Look up encoding keys by content key
274    pub fn lookup_by_ckey(&self, ckey: &[u8]) -> Option<&EncodingEntry> {
275        self.ckey_entries.get(ckey)
276    }
277
278    /// Look up content key by encoding key
279    pub fn lookup_by_ekey(&self, ekey: &[u8]) -> Option<&Vec<u8>> {
280        self.ekey_to_ckey.get(ekey)
281    }
282
283    /// Get the first encoding key for a content key (most common case)
284    pub fn get_ekey_for_ckey(&self, ckey: &[u8]) -> Option<&Vec<u8>> {
285        self.ckey_entries
286            .get(ckey)
287            .and_then(|entry| entry.encoding_keys.first())
288    }
289
290    /// Get file size for a content key
291    pub fn get_file_size(&self, ckey: &[u8]) -> Option<u64> {
292        self.ckey_entries.get(ckey).map(|entry| entry.size)
293    }
294
295    /// Get total number of content keys
296    pub fn ckey_count(&self) -> usize {
297        self.ckey_entries.len()
298    }
299
300    /// Get total number of encoding keys
301    pub fn ekey_count(&self) -> usize {
302        self.ekey_to_ckey.len()
303    }
304}
305
306#[cfg(test)]
307mod tests {
308    use super::*;
309
310    #[test]
311    fn test_encoding_header_size() {
312        // Header should be exactly 22 bytes
313        let header_size = 2 + 1 + 1 + 1 + 2 + 2 + 4 + 4 + 1 + 4;
314        assert_eq!(header_size, 22);
315    }
316
317    #[test]
318    fn test_parse_empty_encoding() {
319        // Create a minimal valid encoding file
320        let mut data = Vec::new();
321
322        // Magic
323        data.extend_from_slice(&ENCODING_MAGIC);
324        // Version
325        data.push(1);
326        // Hash sizes
327        data.push(16); // CKey hash size
328        data.push(16); // EKey hash size
329        // Page sizes (big-endian!)
330        data.extend_from_slice(&0u16.to_be_bytes()); // CKey page size
331        data.extend_from_slice(&0u16.to_be_bytes()); // EKey page size
332        // Page counts (big-endian!)
333        data.extend_from_slice(&0u32.to_be_bytes()); // CKey page count
334        data.extend_from_slice(&0u32.to_be_bytes()); // EKey page count
335        // Unknown
336        data.push(0);
337        // ESpec block size (big-endian!)
338        data.extend_from_slice(&0u32.to_be_bytes());
339
340        let result = EncodingFile::parse(&data);
341        assert!(result.is_ok());
342
343        let encoding = result.unwrap();
344        assert_eq!(encoding.header.version, 1);
345        assert_eq!(encoding.header.ckey_hash_size, 16);
346        assert_eq!(encoding.header.ekey_hash_size, 16);
347        assert_eq!(encoding.ckey_count(), 0);
348        assert_eq!(encoding.ekey_count(), 0);
349    }
350
351    #[test]
352    fn test_invalid_magic() {
353        let mut data = vec![0xFF, 0xFF]; // Wrong magic
354        data.push(1); // Version
355
356        let result = EncodingFile::parse(&data);
357        assert!(matches!(result, Err(Error::BadMagic)));
358    }
359}