1use std::collections::HashMap;
7use std::io::{Cursor, Read};
8
9use byteorder::{BigEndian, ReadBytesExt};
10use tracing::{debug, trace};
11
12use crate::utils::{read_cstring_from, read_uint40_from};
13use crate::{Error, Result};
14
15#[derive(Debug, Clone)]
17pub struct SizeHeader {
18 pub magic: [u8; 2],
20 pub version: u8,
22 pub ekey_size: u8,
24 pub entry_count: u32,
26 pub tag_count: u16,
28 pub total_size: u64,
30}
31
32impl SizeHeader {
33 pub fn parse<R: Read>(reader: &mut R) -> Result<Self> {
35 let mut magic = [0u8; 2];
36 reader.read_exact(&mut magic)?;
37
38 if magic != [b'D', b'S'] {
39 return Err(Error::IOError(std::io::Error::new(
40 std::io::ErrorKind::InvalidData,
41 format!("Invalid size file magic: {magic:?}"),
42 )));
43 }
44
45 let version = reader.read_u8()?;
46 let ekey_size = reader.read_u8()?;
47 let entry_count = reader.read_u32::<BigEndian>()?;
48 let tag_count = reader.read_u16::<BigEndian>()?;
49 let total_size = read_uint40_from(reader)?;
50
51 Ok(SizeHeader {
52 magic,
53 version,
54 ekey_size,
55 entry_count,
56 tag_count,
57 total_size,
58 })
59 }
60}
61
62#[derive(Debug, Clone)]
64pub struct SizeEntry {
65 pub ekey: Vec<u8>,
67 pub compressed_size: u32,
69}
70
71impl SizeEntry {
72 pub fn parse<R: Read>(reader: &mut R, header: &SizeHeader) -> Result<Self> {
74 let mut ekey = vec![0u8; header.ekey_size as usize];
75 reader.read_exact(&mut ekey)?;
76
77 let compressed_size = reader.read_u32::<BigEndian>()?;
78
79 Ok(SizeEntry {
80 ekey,
81 compressed_size,
82 })
83 }
84}
85
86#[derive(Debug, Clone)]
88pub struct SizeTag {
89 pub name: String,
91 pub tag_type: u16,
93 pub mask: Vec<u8>,
95}
96
97#[derive(Debug, Clone)]
99pub struct SizeFile {
100 pub header: SizeHeader,
102 pub entries: HashMap<Vec<u8>, SizeEntry>,
104 pub tags: Vec<SizeTag>,
106 pub size_order: Vec<Vec<u8>>,
108 pub parse_order: Vec<Vec<u8>>,
110}
111
112impl SizeFile {
113 pub fn parse(data: &[u8]) -> Result<Self> {
115 let mut cursor = Cursor::new(data);
116
117 let header = SizeHeader::parse(&mut cursor)?;
119
120 debug!(
121 "Parsing size file v{} with {} entries, total size: {}",
122 header.version, header.entry_count, header.total_size
123 );
124
125 let mut tags = Vec::with_capacity(header.tag_count as usize);
127 let bytes_per_tag = header.entry_count.div_ceil(8) as usize;
128
129 for i in 0..header.tag_count {
130 let name = read_cstring_from(&mut cursor)?;
131 let tag_type = cursor.read_u16::<BigEndian>()?;
132
133 let mut mask = vec![0u8; bytes_per_tag];
134 cursor.read_exact(&mut mask)?;
135
136 trace!("Tag {}: '{}' type={}", i, name, tag_type);
137
138 tags.push(SizeTag {
139 name,
140 tag_type,
141 mask,
142 });
143 }
144
145 let mut entries = HashMap::with_capacity(header.entry_count as usize);
147 let mut size_list = Vec::with_capacity(header.entry_count as usize);
148 let mut parse_order = Vec::with_capacity(header.entry_count as usize);
149
150 for i in 0..header.entry_count {
151 let entry = SizeEntry::parse(&mut cursor, &header)?;
152 trace!(
153 "Entry {}: EKey {:02x?} size={}",
154 i,
155 &entry.ekey[..4.min(entry.ekey.len())],
156 entry.compressed_size
157 );
158 size_list.push((entry.compressed_size, entry.ekey.clone()));
159 parse_order.push(entry.ekey.clone());
160 entries.insert(entry.ekey.clone(), entry);
161 }
162
163 size_list.sort_by_key(|(size, _)| std::cmp::Reverse(*size));
165 let size_order: Vec<Vec<u8>> = size_list.into_iter().map(|(_, ekey)| ekey).collect();
166
167 let calculated_total: u64 = entries.values().map(|e| e.compressed_size as u64).sum();
169
170 if calculated_total != header.total_size {
171 debug!(
172 "Warning: Calculated total size {} doesn't match header total {}",
173 calculated_total, header.total_size
174 );
175 }
176
177 Ok(SizeFile {
178 header,
179 entries,
180 tags,
181 size_order,
182 parse_order,
183 })
184 }
185
186 pub fn get_file_size(&self, ekey: &[u8]) -> Option<u32> {
188 let key = if ekey.len() > self.header.ekey_size as usize {
190 &ekey[..self.header.ekey_size as usize]
191 } else {
192 ekey
193 };
194
195 self.entries.get(key).map(|e| e.compressed_size)
196 }
197
198 pub fn get_total_size(&self) -> u64 {
200 self.header.total_size
201 }
202
203 pub fn get_size_for_tags(&self, tag_names: &[&str]) -> u64 {
205 let mut combined_mask = vec![0u8; self.header.entry_count.div_ceil(8) as usize];
207
208 for tag in &self.tags {
209 if tag_names.contains(&tag.name.as_str()) {
210 for (i, byte) in tag.mask.iter().enumerate() {
212 combined_mask[i] |= byte;
213 }
214 }
215 }
216
217 let mut total = 0u64;
220
221 for (index, ekey) in self.parse_order.iter().enumerate() {
222 if let Some(entry) = self.entries.get(ekey) {
223 let byte_index = index / 8;
224 let bit_index = index % 8;
225
226 if byte_index < combined_mask.len() {
227 let bit = (combined_mask[byte_index] >> (7 - bit_index)) & 1;
228 if bit == 1 {
229 total += entry.compressed_size as u64;
230 }
231 }
232 }
233 }
234
235 total
236 }
237
238 pub fn get_largest_files(&self, count: usize) -> Vec<(&Vec<u8>, u32)> {
240 self.size_order
241 .iter()
242 .take(count)
243 .filter_map(|ekey| {
244 self.entries
245 .get(ekey)
246 .map(|entry| (ekey, entry.compressed_size))
247 })
248 .collect()
249 }
250
251 pub fn get_statistics(&self) -> SizeStatistics {
253 let sizes: Vec<u32> = self.entries.values().map(|e| e.compressed_size).collect();
254
255 let total = sizes.iter().map(|&s| s as u64).sum();
256 let average = if !sizes.is_empty() {
257 total / sizes.len() as u64
258 } else {
259 0
260 };
261
262 let min = sizes.iter().min().copied().unwrap_or(0);
263 let max = sizes.iter().max().copied().unwrap_or(0);
264
265 SizeStatistics {
266 total_size: total,
267 file_count: sizes.len() as u32,
268 average_size: average as u32,
269 min_size: min,
270 max_size: max,
271 }
272 }
273}
274
275#[derive(Debug, Clone)]
277pub struct SizeStatistics {
278 pub total_size: u64,
279 pub file_count: u32,
280 pub average_size: u32,
281 pub min_size: u32,
282 pub max_size: u32,
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 #[test]
290 fn test_size_header() {
291 let data = vec![
292 b'D', b'S', 1, 9, 0, 0, 0, 3, 0, 2, 0, 0x10, 0, 0, 0, ];
299
300 let mut cursor = Cursor::new(data);
301 let header = SizeHeader::parse(&mut cursor).unwrap();
302
303 assert_eq!(header.magic, [b'D', b'S']);
304 assert_eq!(header.version, 1);
305 assert_eq!(header.ekey_size, 9);
306 assert_eq!(header.entry_count, 3);
307 assert_eq!(header.tag_count, 2);
308 assert_eq!(header.total_size, 4096);
309 }
310
311 #[test]
312 fn test_size_calculation() {
313 let mut entries = HashMap::new();
315
316 entries.insert(
317 vec![1; 9],
318 SizeEntry {
319 ekey: vec![1; 9],
320 compressed_size: 1000,
321 },
322 );
323
324 entries.insert(
325 vec![2; 9],
326 SizeEntry {
327 ekey: vec![2; 9],
328 compressed_size: 2000,
329 },
330 );
331
332 entries.insert(
333 vec![3; 9],
334 SizeEntry {
335 ekey: vec![3; 9],
336 compressed_size: 3000,
337 },
338 );
339
340 let total: u64 = entries.values().map(|e| e.compressed_size as u64).sum();
341
342 assert_eq!(total, 6000);
343 }
344
345 #[test]
346 fn test_partial_ekey_lookup() {
347 let mut entries = HashMap::new();
348
349 let partial_key = vec![0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22, 0x33];
350 entries.insert(
351 partial_key.clone(),
352 SizeEntry {
353 ekey: partial_key.clone(),
354 compressed_size: 12345,
355 },
356 );
357
358 let size_file = SizeFile {
359 header: SizeHeader {
360 magic: [b'D', b'S'],
361 version: 1,
362 ekey_size: 9,
363 entry_count: 1,
364 tag_count: 0,
365 total_size: 12345,
366 },
367 entries,
368 tags: vec![],
369 size_order: vec![partial_key.clone()],
370 parse_order: vec![partial_key.clone()],
371 };
372
373 assert_eq!(size_file.get_file_size(&partial_key), Some(12345));
375
376 let full_md5 = vec![
378 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
379 0x99, 0x00,
380 ];
381 assert_eq!(size_file.get_file_size(&full_md5), Some(12345));
382
383 assert_eq!(size_file.get_file_size(&[0xFF; 9]), None);
385 }
386}