use super::PersistenceError;
use crate::hnsw::HnswConfig;
use crate::persistence::FileHeader;
use crc32fast::Hasher;
pub fn read_index_header(data: &[u8]) -> Result<HnswConfig, PersistenceError> {
let header = read_file_header(data)?;
Ok(HnswConfig {
m: header.hnsw_m,
m0: header.hnsw_m0,
ef_construction: 200, ef_search: 50, dimensions: header.dimensions,
metric: 0, _reserved: [0; 2],
})
}
pub fn read_file_header(data: &[u8]) -> Result<FileHeader, PersistenceError> {
if data.len() < 64 {
return Err(PersistenceError::BufferTooSmall {
expected: 64,
actual: data.len(),
});
}
let magic: [u8; 4] = data[0..4]
.try_into()
.map_err(|_| PersistenceError::BufferTooSmall {
expected: 4,
actual: data.len(),
})?;
if magic != FileHeader::MAGIC {
return Err(PersistenceError::InvalidMagic {
expected: FileHeader::MAGIC,
actual: magic,
});
}
let stored_crc = u32::from_le_bytes(data[44..48].try_into().map_err(|_| {
PersistenceError::BufferTooSmall {
expected: 48,
actual: data.len(),
}
})?);
let mut header_bytes = [0u8; 64];
header_bytes.copy_from_slice(&data[0..64]);
header_bytes[44..48].fill(0);
let mut hasher = Hasher::new();
hasher.update(&header_bytes);
let calculated_crc = hasher.finalize();
if stored_crc != calculated_crc {
return Err(PersistenceError::ChecksumMismatch {
expected: stored_crc,
actual: calculated_crc,
});
}
let version_major = data[4];
let version_minor = data[5];
if version_major != FileHeader::VERSION_MAJOR {
return Err(PersistenceError::UnsupportedVersion(
version_major,
version_minor,
));
}
let flags = u16::from_le_bytes(data[6..8].try_into().map_err(|_| {
PersistenceError::BufferTooSmall {
expected: 8,
actual: data.len(),
}
})?);
let vector_count = u64::from_le_bytes(data[8..16].try_into().map_err(|_| {
PersistenceError::BufferTooSmall {
expected: 16,
actual: data.len(),
}
})?);
let index_offset = u64::from_le_bytes(data[16..24].try_into().map_err(|_| {
PersistenceError::BufferTooSmall {
expected: 24,
actual: data.len(),
}
})?);
let metadata_offset = u64::from_le_bytes(data[24..32].try_into().map_err(|_| {
PersistenceError::BufferTooSmall {
expected: 32,
actual: data.len(),
}
})?);
let rng_seed = u64::from_le_bytes(data[32..40].try_into().map_err(|_| {
PersistenceError::BufferTooSmall {
expected: 40,
actual: data.len(),
}
})?);
let dimensions = u32::from_le_bytes(data[40..44].try_into().map_err(|_| {
PersistenceError::BufferTooSmall {
expected: 44,
actual: data.len(),
}
})?);
let hnsw_m = u32::from_le_bytes(data[48..52].try_into().map_err(|_| {
PersistenceError::BufferTooSmall {
expected: 52,
actual: data.len(),
}
})?);
let hnsw_m0 = u32::from_le_bytes(data[52..56].try_into().map_err(|_| {
PersistenceError::BufferTooSmall {
expected: 56,
actual: data.len(),
}
})?);
let data_crc = u32::from_le_bytes(data[56..60].try_into().map_err(|_| {
PersistenceError::BufferTooSmall {
expected: 60,
actual: data.len(),
}
})?);
let deleted_count = u32::from_le_bytes(data[60..64].try_into().map_err(|_| {
PersistenceError::BufferTooSmall {
expected: 64,
actual: data.len(),
}
})?);
Ok(FileHeader {
magic,
version_major,
version_minor,
flags,
vector_count,
index_offset,
metadata_offset,
rng_seed,
dimensions,
header_crc: stored_crc,
hnsw_m,
hnsw_m0,
data_crc,
deleted_count,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::persistence::writer::write_empty_index;
#[test]
fn test_read_valid_header() {
let config = HnswConfig::new(128);
let bytes = write_empty_index(&config);
let read_config = read_index_header(&bytes).unwrap();
assert_eq!(read_config.dimensions, 128);
assert_eq!(read_config.m, 12);
assert_eq!(read_config.m0, 24);
}
#[test]
fn test_read_invalid_magic() {
let mut bytes = [0u8; 64];
bytes[0] = 0x00;
let result = read_index_header(&bytes);
assert!(matches!(result, Err(PersistenceError::InvalidMagic { .. })));
}
#[test]
fn test_read_invalid_crc() {
let config = HnswConfig::new(128);
let mut bytes = write_empty_index(&config);
bytes[40] = 0xFF;
let result = read_index_header(&bytes);
assert!(matches!(
result,
Err(PersistenceError::ChecksumMismatch { .. })
));
}
}