tact_parser/
size.rs

1//! Size file parser for TACT
2//!
3//! The size file contains a mapping of encoding keys to file sizes,
4//! helping clients calculate installation requirements.
5
6use std::collections::HashMap;
7use std::io::{Cursor, Read};
8
9use byteorder::{BigEndian, ReadBytesExt};
10use tracing::{debug, trace};
11
12use crate::utils::{read_cstring_from, read_uint40_from};
13use crate::{Error, Result};
14
15/// Size file header
16#[derive(Debug, Clone)]
17pub struct SizeHeader {
18    /// Magic bytes "DS"
19    pub magic: [u8; 2],
20    /// Version (typically 1)
21    pub version: u8,
22    /// EKey size (typically 9 - first 9 bytes of MD5)
23    pub ekey_size: u8,
24    /// Number of entries
25    pub entry_count: u32,
26    /// Number of tags
27    pub tag_count: u16,
28    /// Total size of all files (40-bit)
29    pub total_size: u64,
30}
31
32impl SizeHeader {
33    /// Parse size file header
34    pub fn parse<R: Read>(reader: &mut R) -> Result<Self> {
35        let mut magic = [0u8; 2];
36        reader.read_exact(&mut magic)?;
37
38        if magic != [b'D', b'S'] {
39            return Err(Error::IOError(std::io::Error::new(
40                std::io::ErrorKind::InvalidData,
41                format!("Invalid size file magic: {magic:?}"),
42            )));
43        }
44
45        let version = reader.read_u8()?;
46        let ekey_size = reader.read_u8()?;
47        let entry_count = reader.read_u32::<BigEndian>()?;
48        let tag_count = reader.read_u16::<BigEndian>()?;
49        let total_size = read_uint40_from(reader)?;
50
51        Ok(SizeHeader {
52            magic,
53            version,
54            ekey_size,
55            entry_count,
56            tag_count,
57            total_size,
58        })
59    }
60}
61
62/// Size file entry
63#[derive(Debug, Clone)]
64pub struct SizeEntry {
65    /// Encoding key (partial - first N bytes)
66    pub ekey: Vec<u8>,
67    /// Compressed size (32-bit)
68    pub compressed_size: u32,
69}
70
71impl SizeEntry {
72    /// Parse a size entry
73    pub fn parse<R: Read>(reader: &mut R, header: &SizeHeader) -> Result<Self> {
74        let mut ekey = vec![0u8; header.ekey_size as usize];
75        reader.read_exact(&mut ekey)?;
76
77        let compressed_size = reader.read_u32::<BigEndian>()?;
78
79        Ok(SizeEntry {
80            ekey,
81            compressed_size,
82        })
83    }
84}
85
86/// Size file tag
87#[derive(Debug, Clone)]
88pub struct SizeTag {
89    /// Tag name
90    pub name: String,
91    /// Tag type
92    pub tag_type: u16,
93    /// Bitmask indicating which entries have this tag
94    pub mask: Vec<u8>,
95}
96
97/// Size file
98#[derive(Debug, Clone)]
99pub struct SizeFile {
100    /// Header information
101    pub header: SizeHeader,
102    /// Size entries indexed by partial EKey
103    pub entries: HashMap<Vec<u8>, SizeEntry>,
104    /// Tags for conditional size calculation
105    pub tags: Vec<SizeTag>,
106    /// Entries ordered by size (largest first)
107    pub size_order: Vec<Vec<u8>>,
108    /// Entries in parse order (for tag mask application)
109    pub parse_order: Vec<Vec<u8>>,
110}
111
112impl SizeFile {
113    /// Parse a size file from bytes
114    pub fn parse(data: &[u8]) -> Result<Self> {
115        let mut cursor = Cursor::new(data);
116
117        // Parse header
118        let header = SizeHeader::parse(&mut cursor)?;
119
120        debug!(
121            "Parsing size file v{} with {} entries, total size: {}",
122            header.version, header.entry_count, header.total_size
123        );
124
125        // Parse tags first (they come before entries in this format)
126        let mut tags = Vec::with_capacity(header.tag_count as usize);
127        let bytes_per_tag = header.entry_count.div_ceil(8) as usize;
128
129        for i in 0..header.tag_count {
130            let name = read_cstring_from(&mut cursor)?;
131            let tag_type = cursor.read_u16::<BigEndian>()?;
132
133            let mut mask = vec![0u8; bytes_per_tag];
134            cursor.read_exact(&mut mask)?;
135
136            trace!("Tag {}: '{}' type={}", i, name, tag_type);
137
138            tags.push(SizeTag {
139                name,
140                tag_type,
141                mask,
142            });
143        }
144
145        // Parse entries
146        let mut entries = HashMap::with_capacity(header.entry_count as usize);
147        let mut size_list = Vec::with_capacity(header.entry_count as usize);
148        let mut parse_order = Vec::with_capacity(header.entry_count as usize);
149
150        for i in 0..header.entry_count {
151            let entry = SizeEntry::parse(&mut cursor, &header)?;
152            trace!(
153                "Entry {}: EKey {:02x?} size={}",
154                i,
155                &entry.ekey[..4.min(entry.ekey.len())],
156                entry.compressed_size
157            );
158            size_list.push((entry.compressed_size, entry.ekey.clone()));
159            parse_order.push(entry.ekey.clone());
160            entries.insert(entry.ekey.clone(), entry);
161        }
162
163        // Sort by size (largest first)
164        size_list.sort_by_key(|(size, _)| std::cmp::Reverse(*size));
165        let size_order: Vec<Vec<u8>> = size_list.into_iter().map(|(_, ekey)| ekey).collect();
166
167        // Verify total size
168        let calculated_total: u64 = entries.values().map(|e| e.compressed_size as u64).sum();
169
170        if calculated_total != header.total_size {
171            debug!(
172                "Warning: Calculated total size {} doesn't match header total {}",
173                calculated_total, header.total_size
174            );
175        }
176
177        Ok(SizeFile {
178            header,
179            entries,
180            tags,
181            size_order,
182            parse_order,
183        })
184    }
185
186    /// Get file size by partial EKey
187    pub fn get_file_size(&self, ekey: &[u8]) -> Option<u32> {
188        // If the provided key is longer than what we store, truncate it
189        let key = if ekey.len() > self.header.ekey_size as usize {
190            &ekey[..self.header.ekey_size as usize]
191        } else {
192            ekey
193        };
194
195        self.entries.get(key).map(|e| e.compressed_size)
196    }
197
198    /// Get total installation size (all files)
199    pub fn get_total_size(&self) -> u64 {
200        self.header.total_size
201    }
202
203    /// Calculate size for specific tags
204    pub fn get_size_for_tags(&self, tag_names: &[&str]) -> u64 {
205        // Find matching tags
206        let mut combined_mask = vec![0u8; self.header.entry_count.div_ceil(8) as usize];
207
208        for tag in &self.tags {
209            if tag_names.contains(&tag.name.as_str()) {
210                // OR the masks together
211                for (i, byte) in tag.mask.iter().enumerate() {
212                    combined_mask[i] |= byte;
213                }
214            }
215        }
216
217        // Calculate total size for entries matching the mask
218        // Use the parse order to match entries with the mask bits
219        let mut total = 0u64;
220
221        for (index, ekey) in self.parse_order.iter().enumerate() {
222            if let Some(entry) = self.entries.get(ekey) {
223                let byte_index = index / 8;
224                let bit_index = index % 8;
225
226                if byte_index < combined_mask.len() {
227                    let bit = (combined_mask[byte_index] >> (7 - bit_index)) & 1;
228                    if bit == 1 {
229                        total += entry.compressed_size as u64;
230                    }
231                }
232            }
233        }
234
235        total
236    }
237
238    /// Get the N largest files
239    pub fn get_largest_files(&self, count: usize) -> Vec<(&Vec<u8>, u32)> {
240        self.size_order
241            .iter()
242            .take(count)
243            .filter_map(|ekey| {
244                self.entries
245                    .get(ekey)
246                    .map(|entry| (ekey, entry.compressed_size))
247            })
248            .collect()
249    }
250
251    /// Calculate statistics
252    pub fn get_statistics(&self) -> SizeStatistics {
253        let sizes: Vec<u32> = self.entries.values().map(|e| e.compressed_size).collect();
254
255        let total = sizes.iter().map(|&s| s as u64).sum();
256        let average = if !sizes.is_empty() {
257            total / sizes.len() as u64
258        } else {
259            0
260        };
261
262        let min = sizes.iter().min().copied().unwrap_or(0);
263        let max = sizes.iter().max().copied().unwrap_or(0);
264
265        SizeStatistics {
266            total_size: total,
267            file_count: sizes.len() as u32,
268            average_size: average as u32,
269            min_size: min,
270            max_size: max,
271        }
272    }
273}
274
275/// Size file statistics
276#[derive(Debug, Clone)]
277pub struct SizeStatistics {
278    pub total_size: u64,
279    pub file_count: u32,
280    pub average_size: u32,
281    pub min_size: u32,
282    pub max_size: u32,
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn test_size_header() {
291        let data = vec![
292            b'D', b'S', // Magic
293            1,    // Version
294            9,    // EKey size (partial MD5)
295            0, 0, 0, 3, // Entry count (3, big-endian)
296            0, 2, // Tag count (2, big-endian)
297            0, 0x10, 0, 0, 0, // Total size (4096, 40-bit LE)
298        ];
299
300        let mut cursor = Cursor::new(data);
301        let header = SizeHeader::parse(&mut cursor).unwrap();
302
303        assert_eq!(header.magic, [b'D', b'S']);
304        assert_eq!(header.version, 1);
305        assert_eq!(header.ekey_size, 9);
306        assert_eq!(header.entry_count, 3);
307        assert_eq!(header.tag_count, 2);
308        assert_eq!(header.total_size, 4096);
309    }
310
311    #[test]
312    fn test_size_calculation() {
313        // Create entries with known sizes
314        let mut entries = HashMap::new();
315
316        entries.insert(
317            vec![1; 9],
318            SizeEntry {
319                ekey: vec![1; 9],
320                compressed_size: 1000,
321            },
322        );
323
324        entries.insert(
325            vec![2; 9],
326            SizeEntry {
327                ekey: vec![2; 9],
328                compressed_size: 2000,
329            },
330        );
331
332        entries.insert(
333            vec![3; 9],
334            SizeEntry {
335                ekey: vec![3; 9],
336                compressed_size: 3000,
337            },
338        );
339
340        let total: u64 = entries.values().map(|e| e.compressed_size as u64).sum();
341
342        assert_eq!(total, 6000);
343    }
344
345    #[test]
346    fn test_partial_ekey_lookup() {
347        let mut entries = HashMap::new();
348
349        let partial_key = vec![0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22, 0x33];
350        entries.insert(
351            partial_key.clone(),
352            SizeEntry {
353                ekey: partial_key.clone(),
354                compressed_size: 12345,
355            },
356        );
357
358        let size_file = SizeFile {
359            header: SizeHeader {
360                magic: [b'D', b'S'],
361                version: 1,
362                ekey_size: 9,
363                entry_count: 1,
364                tag_count: 0,
365                total_size: 12345,
366            },
367            entries,
368            tags: vec![],
369            size_order: vec![partial_key.clone()],
370            parse_order: vec![partial_key.clone()],
371        };
372
373        // Lookup with exact key
374        assert_eq!(size_file.get_file_size(&partial_key), Some(12345));
375
376        // Lookup with longer key (full MD5) - should truncate and match
377        let full_md5 = vec![
378            0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
379            0x99, 0x00,
380        ];
381        assert_eq!(size_file.get_file_size(&full_md5), Some(12345));
382
383        // Lookup with non-existent key
384        assert_eq!(size_file.get_file_size(&[0xFF; 9]), None);
385    }
386}