use std::collections::HashMap;
use std::path::{Path, PathBuf};
use crate::error::{CascError, Result};
use crate::util::io::{read_be_u40, read_le_u32};
#[derive(Debug, Clone, Copy)]
pub struct IndexEntry {
pub key: [u8; 9],
pub archive_number: u32,
pub archive_offset: u64,
pub size: u32,
}
#[derive(Debug, Clone, Copy)]
pub struct IndexSpec {
pub size_len: u8,
pub offset_len: u8,
pub key_len: u8,
pub offset_bits: u8,
}
pub struct CascIndex {
entries: HashMap<[u8; 9], IndexEntry>,
}
pub fn bucket_index(ekey: &[u8]) -> u8 {
let i = ekey[0] ^ ekey[1] ^ ekey[2] ^ ekey[3] ^ ekey[4] ^ ekey[5] ^ ekey[6] ^ ekey[7] ^ ekey[8];
(i & 0xF) ^ (i >> 4)
}
pub fn parse_idx_header(data: &[u8]) -> Result<(IndexSpec, u32)> {
if data.len() < 0x28 {
return Err(CascError::InvalidFormat(format!(
"idx header too short: {} bytes",
data.len()
)));
}
let version = u16::from_le_bytes([data[0x08], data[0x09]]);
if version != 7 {
return Err(CascError::UnsupportedVersion(version as u32));
}
let extra_bytes = data[0x0B];
if extra_bytes != 0 {
return Err(CascError::InvalidFormat(format!(
"idx extra_bytes must be 0, got {extra_bytes}"
)));
}
let spec = IndexSpec {
size_len: data[0x0C],
offset_len: data[0x0D],
key_len: data[0x0E],
offset_bits: data[0x0F],
};
let entries_size = read_le_u32(&data[0x20..0x24]);
Ok((spec, entries_size))
}
pub fn parse_idx_entries(data: &[u8], spec: &IndexSpec) -> Result<Vec<IndexEntry>> {
let entry_len = (spec.key_len as usize) + (spec.offset_len as usize) + (spec.size_len as usize);
if entry_len == 0 {
return Err(CascError::InvalidFormat("idx entry length is 0".into()));
}
let count = data.len() / entry_len;
let mut entries = Vec::with_capacity(count);
for i in 0..count {
let base = i * entry_len;
let key_end = base + spec.key_len as usize;
let offset_end = key_end + spec.offset_len as usize;
let size_end = offset_end + spec.size_len as usize;
if size_end > data.len() {
break;
}
let mut key = [0u8; 9];
let copy_len = (spec.key_len as usize).min(9);
key[..copy_len].copy_from_slice(&data[base..base + copy_len]);
let raw_offset = read_be_u40(&data[key_end..offset_end]);
let offset_mask = (1u64 << spec.offset_bits) - 1;
let archive_number = (raw_offset >> spec.offset_bits) as u32;
let archive_offset = raw_offset & offset_mask;
let size = read_le_u32(&data[offset_end..size_end]);
entries.push(IndexEntry {
key,
archive_number,
archive_offset,
size,
});
}
Ok(entries)
}
pub fn parse_idx_filename(name: &str) -> Option<(u8, u32)> {
let stem = name.strip_suffix(".idx")?;
if stem.len() != 10 {
return None;
}
let bucket = u8::from_str_radix(&stem[0..2], 16).ok()?;
let version = u32::from_str_radix(&stem[2..10], 16).ok()?;
if bucket > 0x0F {
return None;
}
Some((bucket, version))
}
pub fn select_idx_files(data_dir: &Path) -> Result<Vec<PathBuf>> {
let pattern = data_dir.join("*.idx");
let pattern_str = pattern.to_string_lossy().to_string();
let mut best: HashMap<u8, (u32, PathBuf)> = HashMap::new();
for path in glob::glob(&pattern_str)
.map_err(|e| CascError::InvalidFormat(format!("glob error: {e}")))?
{
let path = path.map_err(|e| CascError::Io(e.into_error()))?;
let fname = match path.file_name().and_then(|f| f.to_str()) {
Some(f) => f.to_owned(),
None => continue,
};
if let Some((bucket, version)) = parse_idx_filename(&fname) {
let entry = best.entry(bucket).or_insert((0, PathBuf::new()));
if version >= entry.0 {
*entry = (version, path);
}
}
}
let mut result: Vec<PathBuf> = best.into_values().map(|(_, p)| p).collect();
result.sort();
Ok(result)
}
impl CascIndex {
pub fn load(data_dir: &Path) -> Result<Self> {
let idx_files = select_idx_files(data_dir)?;
let mut entries = HashMap::new();
for path in &idx_files {
let file_data = std::fs::read(path)?;
if file_data.len() < 0x28 {
continue;
}
let (spec, entries_size) = parse_idx_header(&file_data)?;
let header_len = 0x28usize;
let entry_end = header_len + entries_size as usize;
let entry_data = if entry_end <= file_data.len() {
&file_data[header_len..entry_end]
} else {
&file_data[header_len..]
};
for entry in parse_idx_entries(entry_data, &spec)? {
entries.insert(entry.key, entry);
}
}
Ok(Self { entries })
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn find(&self, ekey: &[u8]) -> Option<&IndexEntry> {
if ekey.len() < 9 {
return None;
}
let mut key = [0u8; 9];
key.copy_from_slice(&ekey[..9]);
self.entries.get(&key)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bucket_index_all_zeros() {
let ekey = [0u8; 16];
let bucket = bucket_index(&ekey);
assert!(bucket < 16);
assert_eq!(bucket, 0);
}
#[test]
fn bucket_index_always_less_than_16() {
for i in 0..=255u8 {
let mut ekey = [0u8; 16];
ekey[0] = i;
assert!(bucket_index(&ekey) < 16, "bucket >= 16 for ekey[0]={}", i);
}
}
#[test]
fn bucket_index_specific() {
let ekey = [0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let i = 0xFF_u8;
let expected = (i & 0xF) ^ (i >> 4);
assert_eq!(bucket_index(&ekey), expected);
}
#[test]
fn parse_idx_header_valid() {
let mut header = [0u8; 0x28];
header[0..4].copy_from_slice(&0x10u32.to_le_bytes());
header[0x08..0x0A].copy_from_slice(&7u16.to_le_bytes());
header[0x0A] = 0x05;
header[0x0B] = 0;
header[0x0C] = 4;
header[0x0D] = 5;
header[0x0E] = 9;
header[0x0F] = 30;
header[0x20..0x24].copy_from_slice(&36u32.to_le_bytes());
let (spec, entries_size) = parse_idx_header(&header).unwrap();
assert_eq!(spec.key_len, 9);
assert_eq!(spec.offset_len, 5);
assert_eq!(spec.size_len, 4);
assert_eq!(spec.offset_bits, 30);
assert_eq!(entries_size, 36);
}
#[test]
fn parse_idx_entries_single_entry() {
let spec = IndexSpec {
size_len: 4,
offset_len: 5,
key_len: 9,
offset_bits: 30,
};
let mut entry = [0u8; 18];
entry[0..9].copy_from_slice(&[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09]);
entry[9..14].copy_from_slice(&[0x00, 0x40, 0x00, 0x10, 0x00]);
entry[14..18].copy_from_slice(&256u32.to_le_bytes());
let entries = parse_idx_entries(&entry, &spec).unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(
entries[0].key,
[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09]
);
assert_eq!(entries[0].archive_number, 1);
assert_eq!(entries[0].archive_offset, 0x1000);
assert_eq!(entries[0].size, 256);
}
#[test]
fn parse_idx_entries_two_entries() {
let spec = IndexSpec {
size_len: 4,
offset_len: 5,
key_len: 9,
offset_bits: 30,
};
let mut data = [0u8; 36];
data[0..9].copy_from_slice(&[0xAA; 9]);
data[9..14].copy_from_slice(&[0x00, 0x00, 0x00, 0x00, 0x00]);
data[14..18].copy_from_slice(&100u32.to_le_bytes());
data[18..27].copy_from_slice(&[0xBB; 9]);
data[27..32].copy_from_slice(&[0x00, 0x80, 0x00, 0x01, 0x00]);
data[32..36].copy_from_slice(&200u32.to_le_bytes());
let entries = parse_idx_entries(&data, &spec).unwrap();
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].archive_number, 0);
assert_eq!(entries[1].archive_number, 2);
assert_eq!(entries[1].archive_offset, 0x100);
}
#[test]
fn idx_filename_parsing() {
assert_eq!(parse_idx_filename("0000000001.idx"), Some((0x00, 1)));
assert_eq!(parse_idx_filename("0f00000219.idx"), Some((0x0F, 0x219)));
assert_eq!(parse_idx_filename("0500000003.idx"), Some((0x05, 3)));
assert_eq!(parse_idx_filename("invalid.idx"), None);
}
#[test]
fn casc_index_find_hit() {
let entry = IndexEntry {
key: [1, 2, 3, 4, 5, 6, 7, 8, 9],
archive_number: 0,
archive_offset: 0,
size: 100,
};
let mut entries = HashMap::new();
entries.insert(entry.key, entry);
let index = CascIndex { entries };
let ekey = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
assert!(index.find(&ekey).is_some());
}
#[test]
fn casc_index_find_miss() {
let index = CascIndex {
entries: HashMap::new(),
};
let ekey = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
assert!(index.find(&ekey).is_none());
}
}