Skip to main content

casc_lib/storage/
index.rs

1//! Parser for CASC `.idx` index files.
2//!
3//! Index files map the first 9 bytes of each encoding key (EKey) to its physical
4//! location (archive number + byte offset) in the `data.NNN` archives. There are
5//! 16 buckets (0x00 - 0x0F), each with a versioned `.idx` file; only the
6//! highest-version file per bucket is used.
7
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10
11use crate::error::{CascError, Result};
12use crate::util::io::{read_be_u40, read_le_u32};
13
14/// Parsed entry from an .idx file.
15#[derive(Debug, Clone, Copy)]
16pub struct IndexEntry {
17    /// First 9 bytes of the EKey.
18    pub key: [u8; 9],
19    /// Data archive number (data.NNN).
20    pub archive_number: u32,
21    /// Byte offset within the archive.
22    pub archive_offset: u64,
23    /// Total size (header + BLTE data).
24    pub size: u32,
25}
26
27/// Field-size spec from an idx header.
28#[derive(Debug, Clone, Copy)]
29pub struct IndexSpec {
30    /// Byte width of the entry size field (typically 4).
31    pub size_len: u8,
32    /// Byte width of the archive offset field (typically 5).
33    pub offset_len: u8,
34    /// Byte width of the EKey prefix stored per entry (typically 9).
35    pub key_len: u8,
36    /// Number of bits within the offset field devoted to the byte offset
37    /// (the remaining upper bits encode the archive number).
38    pub offset_bits: u8,
39}
40
41/// Complete index across all 16 buckets.
42pub struct CascIndex {
43    entries: HashMap<[u8; 9], IndexEntry>,
44}
45
46// ---------------------------------------------------------------------------
47// Public helpers
48// ---------------------------------------------------------------------------
49
50/// Compute the bucket index (0..15) for a given EKey.
51///
52/// Formula: XOR bytes 0-8, then `(result & 0xF) ^ (result >> 4)`.
53pub fn bucket_index(ekey: &[u8]) -> u8 {
54    let i = ekey[0] ^ ekey[1] ^ ekey[2] ^ ekey[3] ^ ekey[4] ^ ekey[5] ^ ekey[6] ^ ekey[7] ^ ekey[8];
55    (i & 0xF) ^ (i >> 4)
56}
57
58/// Parse the 0x28-byte idx header. Returns the [`IndexSpec`] and total
59/// `entries_size` (byte length of all entries that follow the header).
60pub fn parse_idx_header(data: &[u8]) -> Result<(IndexSpec, u32)> {
61    if data.len() < 0x28 {
62        return Err(CascError::InvalidFormat(format!(
63            "idx header too short: {} bytes",
64            data.len()
65        )));
66    }
67
68    let version = u16::from_le_bytes([data[0x08], data[0x09]]);
69    if version != 7 {
70        return Err(CascError::UnsupportedVersion(version as u32));
71    }
72
73    let extra_bytes = data[0x0B];
74    if extra_bytes != 0 {
75        return Err(CascError::InvalidFormat(format!(
76            "idx extra_bytes must be 0, got {extra_bytes}"
77        )));
78    }
79
80    let spec = IndexSpec {
81        size_len: data[0x0C],
82        offset_len: data[0x0D],
83        key_len: data[0x0E],
84        offset_bits: data[0x0F],
85    };
86
87    let entries_size = read_le_u32(&data[0x20..0x24]);
88
89    Ok((spec, entries_size))
90}
91
92/// Parse the raw entry bytes according to `spec`. Returns all entries found.
93pub fn parse_idx_entries(data: &[u8], spec: &IndexSpec) -> Result<Vec<IndexEntry>> {
94    let entry_len = (spec.key_len as usize) + (spec.offset_len as usize) + (spec.size_len as usize);
95    if entry_len == 0 {
96        return Err(CascError::InvalidFormat("idx entry length is 0".into()));
97    }
98
99    let count = data.len() / entry_len;
100    let mut entries = Vec::with_capacity(count);
101
102    for i in 0..count {
103        let base = i * entry_len;
104        let key_end = base + spec.key_len as usize;
105        let offset_end = key_end + spec.offset_len as usize;
106        let size_end = offset_end + spec.size_len as usize;
107
108        if size_end > data.len() {
109            break;
110        }
111
112        // Key (first key_len bytes of the EKey)
113        let mut key = [0u8; 9];
114        let copy_len = (spec.key_len as usize).min(9);
115        key[..copy_len].copy_from_slice(&data[base..base + copy_len]);
116
117        // Offset - big-endian, variable length (typically 5 bytes)
118        let raw_offset = read_be_u40(&data[key_end..offset_end]);
119        let offset_mask = (1u64 << spec.offset_bits) - 1;
120        let archive_number = (raw_offset >> spec.offset_bits) as u32;
121        let archive_offset = raw_offset & offset_mask;
122
123        // Size - little-endian u32
124        let size = read_le_u32(&data[offset_end..size_end]);
125
126        entries.push(IndexEntry {
127            key,
128            archive_number,
129            archive_offset,
130            size,
131        });
132    }
133
134    Ok(entries)
135}
136
137/// Parse an idx filename like `"0500000003.idx"` into `(bucket, version)`.
138///
139/// The filename is `BBVVVVVVVV.idx` where BB is the hex bucket (0x00-0x0F)
140/// and VVVVVVVV is the hex version number.
141pub fn parse_idx_filename(name: &str) -> Option<(u8, u32)> {
142    let stem = name.strip_suffix(".idx")?;
143    if stem.len() != 10 {
144        return None;
145    }
146    let bucket = u8::from_str_radix(&stem[0..2], 16).ok()?;
147    let version = u32::from_str_radix(&stem[2..10], 16).ok()?;
148    if bucket > 0x0F {
149        return None;
150    }
151    Some((bucket, version))
152}
153
154/// Scan `data_dir` for `.idx` files and select the highest-version file per
155/// bucket (0-15). Returns up to 16 paths.
156pub fn select_idx_files(data_dir: &Path) -> Result<Vec<PathBuf>> {
157    let pattern = data_dir.join("*.idx");
158    let pattern_str = pattern.to_string_lossy().to_string();
159
160    let mut best: HashMap<u8, (u32, PathBuf)> = HashMap::new();
161
162    for path in glob::glob(&pattern_str)
163        .map_err(|e| CascError::InvalidFormat(format!("glob error: {e}")))?
164    {
165        let path = path.map_err(|e| CascError::Io(e.into_error()))?;
166        let fname = match path.file_name().and_then(|f| f.to_str()) {
167            Some(f) => f.to_owned(),
168            None => continue,
169        };
170
171        if let Some((bucket, version)) = parse_idx_filename(&fname) {
172            let entry = best.entry(bucket).or_insert((0, PathBuf::new()));
173            if version >= entry.0 {
174                *entry = (version, path);
175            }
176        }
177    }
178
179    let mut result: Vec<PathBuf> = best.into_values().map(|(_, p)| p).collect();
180    result.sort();
181    Ok(result)
182}
183
184impl CascIndex {
185    /// Load all idx files from `data_dir`, parse them, and build a lookup map.
186    pub fn load(data_dir: &Path) -> Result<Self> {
187        let idx_files = select_idx_files(data_dir)?;
188        let mut entries = HashMap::new();
189
190        for path in &idx_files {
191            let file_data = std::fs::read(path)?;
192            if file_data.len() < 0x28 {
193                continue;
194            }
195
196            let (spec, entries_size) = parse_idx_header(&file_data)?;
197            let header_len = 0x28usize;
198            let entry_end = header_len + entries_size as usize;
199            let entry_data = if entry_end <= file_data.len() {
200                &file_data[header_len..entry_end]
201            } else {
202                &file_data[header_len..]
203            };
204
205            for entry in parse_idx_entries(entry_data, &spec)? {
206                entries.insert(entry.key, entry);
207            }
208        }
209
210        Ok(Self { entries })
211    }
212
213    /// Return the total number of index entries.
214    pub fn len(&self) -> usize {
215        self.entries.len()
216    }
217
218    /// Return whether the index contains no entries.
219    pub fn is_empty(&self) -> bool {
220        self.entries.is_empty()
221    }
222
223    /// Look up an index entry by the first 9 bytes of an EKey.
224    pub fn find(&self, ekey: &[u8]) -> Option<&IndexEntry> {
225        if ekey.len() < 9 {
226            return None;
227        }
228        let mut key = [0u8; 9];
229        key.copy_from_slice(&ekey[..9]);
230        self.entries.get(&key)
231    }
232}
233
234// ---------------------------------------------------------------------------
235// Tests
236// ---------------------------------------------------------------------------
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn bucket_index_all_zeros() {
244        let ekey = [0u8; 16];
245        let bucket = bucket_index(&ekey);
246        assert!(bucket < 16);
247        assert_eq!(bucket, 0);
248    }
249
250    #[test]
251    fn bucket_index_always_less_than_16() {
252        for i in 0..=255u8 {
253            let mut ekey = [0u8; 16];
254            ekey[0] = i;
255            assert!(bucket_index(&ekey) < 16, "bucket >= 16 for ekey[0]={}", i);
256        }
257    }
258
259    #[test]
260    fn bucket_index_specific() {
261        let ekey = [0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
262        let i = 0xFF_u8;
263        let expected = (i & 0xF) ^ (i >> 4);
264        assert_eq!(bucket_index(&ekey), expected);
265    }
266
267    #[test]
268    fn parse_idx_header_valid() {
269        let mut header = [0u8; 0x28];
270        header[0..4].copy_from_slice(&0x10u32.to_le_bytes());
271        // Version = 7
272        header[0x08..0x0A].copy_from_slice(&7u16.to_le_bytes());
273        // BucketIndex = 0x05
274        header[0x0A] = 0x05;
275        // ExtraBytes = 0
276        header[0x0B] = 0;
277        // SizeLen = 4, OffsetLen = 5, KeyLen = 9, OffsetBits = 30
278        header[0x0C] = 4;
279        header[0x0D] = 5;
280        header[0x0E] = 9;
281        header[0x0F] = 30;
282        // EntriesSize = 36 (2 entries of 18 bytes each)
283        header[0x20..0x24].copy_from_slice(&36u32.to_le_bytes());
284
285        let (spec, entries_size) = parse_idx_header(&header).unwrap();
286        assert_eq!(spec.key_len, 9);
287        assert_eq!(spec.offset_len, 5);
288        assert_eq!(spec.size_len, 4);
289        assert_eq!(spec.offset_bits, 30);
290        assert_eq!(entries_size, 36);
291    }
292
293    #[test]
294    fn parse_idx_entries_single_entry() {
295        let spec = IndexSpec {
296            size_len: 4,
297            offset_len: 5,
298            key_len: 9,
299            offset_bits: 30,
300        };
301
302        let mut entry = [0u8; 18];
303        entry[0..9].copy_from_slice(&[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09]);
304        // archive=1, offset=0x1000 -> 0x40001000 as 5-byte BE
305        entry[9..14].copy_from_slice(&[0x00, 0x40, 0x00, 0x10, 0x00]);
306        entry[14..18].copy_from_slice(&256u32.to_le_bytes());
307
308        let entries = parse_idx_entries(&entry, &spec).unwrap();
309        assert_eq!(entries.len(), 1);
310        assert_eq!(
311            entries[0].key,
312            [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09]
313        );
314        assert_eq!(entries[0].archive_number, 1);
315        assert_eq!(entries[0].archive_offset, 0x1000);
316        assert_eq!(entries[0].size, 256);
317    }
318
319    #[test]
320    fn parse_idx_entries_two_entries() {
321        let spec = IndexSpec {
322            size_len: 4,
323            offset_len: 5,
324            key_len: 9,
325            offset_bits: 30,
326        };
327        let mut data = [0u8; 36];
328
329        // Entry 1: key=all 0xAA, archive=0, offset=0, size=100
330        data[0..9].copy_from_slice(&[0xAA; 9]);
331        data[9..14].copy_from_slice(&[0x00, 0x00, 0x00, 0x00, 0x00]);
332        data[14..18].copy_from_slice(&100u32.to_le_bytes());
333
334        // Entry 2: key=all 0xBB, archive=2, offset=0x100, size=200
335        data[18..27].copy_from_slice(&[0xBB; 9]);
336        // archive=2 -> bits 30-31 = 10 -> 0x80000000 | 0x100 = 0x80000100
337        data[27..32].copy_from_slice(&[0x00, 0x80, 0x00, 0x01, 0x00]);
338        data[32..36].copy_from_slice(&200u32.to_le_bytes());
339
340        let entries = parse_idx_entries(&data, &spec).unwrap();
341        assert_eq!(entries.len(), 2);
342        assert_eq!(entries[0].archive_number, 0);
343        assert_eq!(entries[1].archive_number, 2);
344        assert_eq!(entries[1].archive_offset, 0x100);
345    }
346
347    #[test]
348    fn idx_filename_parsing() {
349        assert_eq!(parse_idx_filename("0000000001.idx"), Some((0x00, 1)));
350        assert_eq!(parse_idx_filename("0f00000219.idx"), Some((0x0F, 0x219)));
351        assert_eq!(parse_idx_filename("0500000003.idx"), Some((0x05, 3)));
352        assert_eq!(parse_idx_filename("invalid.idx"), None);
353    }
354
355    #[test]
356    fn casc_index_find_hit() {
357        let entry = IndexEntry {
358            key: [1, 2, 3, 4, 5, 6, 7, 8, 9],
359            archive_number: 0,
360            archive_offset: 0,
361            size: 100,
362        };
363        let mut entries = HashMap::new();
364        entries.insert(entry.key, entry);
365        let index = CascIndex { entries };
366
367        let ekey = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
368        assert!(index.find(&ekey).is_some());
369    }
370
371    #[test]
372    fn casc_index_find_miss() {
373        let index = CascIndex {
374            entries: HashMap::new(),
375        };
376        let ekey = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
377        assert!(index.find(&ekey).is_none());
378    }
379}