use bytemuck::{Pod, Zeroable};
use core::mem::{align_of, size_of};
use thiserror::Error;
pub const MAGIC: [u8; 4] = *b"EVEC";
pub const VERSION_MAJOR: u8 = 0;
pub const VERSION_MINOR: u8 = 4;
pub const VERSION_MINOR_MIN: u8 = 1;
pub const METADATA_MAGIC: [u8; 4] = *b"META";
pub const METADATA_VERSION: u16 = 1;
pub const FORMAT_POSTCARD: u8 = 1;
pub const FORMAT_JSON: u8 = 2;
#[allow(non_snake_case)]
pub mod Flags {
pub const COMPRESSED: u16 = 1 << 0;
pub const QUANTIZED: u16 = 1 << 1;
pub const HAS_METADATA: u16 = 1 << 2;
pub const INDEX_TYPE_FLAT: u16 = 1 << 3;
}
#[derive(Clone, Copy, Debug, Pod, Zeroable)]
#[repr(C)]
pub struct FileHeader {
pub magic: [u8; 4],
pub version_major: u8,
pub version_minor: u8,
pub flags: u16,
pub vector_count: u64,
pub index_offset: u64,
pub metadata_offset: u64,
pub rng_seed: u64,
pub dimensions: u32,
pub header_crc: u32,
pub hnsw_m: u32,
pub hnsw_m0: u32,
pub data_crc: u32,
pub deleted_count: u32, }
const _: () = assert!(size_of::<FileHeader>() == 64);
const _: () = assert!(align_of::<FileHeader>() == 8);
#[derive(Clone, Copy, Debug, Pod, Zeroable)]
#[repr(C)]
pub struct MetadataSectionHeader {
pub magic: [u8; 4],
pub version: u16,
pub format: u8,
pub reserved: u8,
pub size: u32,
pub crc: u32,
}
const _: () = assert!(size_of::<MetadataSectionHeader>() == 16);
const _: () = assert!(align_of::<MetadataSectionHeader>() == 4);
impl MetadataSectionHeader {
pub const MAGIC: [u8; 4] = METADATA_MAGIC;
pub const VERSION: u16 = METADATA_VERSION;
pub const FORMAT_POSTCARD: u8 = FORMAT_POSTCARD;
pub const FORMAT_JSON: u8 = FORMAT_JSON;
#[must_use]
pub fn new_postcard(size: u32, crc: u32) -> Self {
Self {
magic: Self::MAGIC,
version: Self::VERSION,
format: Self::FORMAT_POSTCARD,
reserved: 0,
size,
crc,
}
}
#[must_use]
pub fn new_json(size: u32, crc: u32) -> Self {
Self {
magic: Self::MAGIC,
version: Self::VERSION,
format: Self::FORMAT_JSON,
reserved: 0,
size,
crc,
}
}
#[must_use]
pub fn as_bytes(&self) -> &[u8; 16] {
bytemuck::cast_ref(self)
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self, MetadataHeaderError> {
if bytes.len() < 16 {
return Err(MetadataHeaderError::BufferTooShort(bytes.len()));
}
let mut aligned_buf = [0u8; 16];
aligned_buf.copy_from_slice(&bytes[..16]);
let header = *bytemuck::from_bytes::<MetadataSectionHeader>(&aligned_buf);
header.validate_magic()?;
header.validate_version()?;
header.validate_format()?;
Ok(header)
}
pub fn validate_magic(&self) -> Result<(), MetadataHeaderError> {
if self.magic != Self::MAGIC {
return Err(MetadataHeaderError::InvalidMagic(self.magic));
}
Ok(())
}
pub fn validate_version(&self) -> Result<(), MetadataHeaderError> {
if self.version > Self::VERSION {
return Err(MetadataHeaderError::UnsupportedVersion(self.version));
}
Ok(())
}
pub fn validate_format(&self) -> Result<(), MetadataHeaderError> {
if self.format != Self::FORMAT_POSTCARD && self.format != Self::FORMAT_JSON {
return Err(MetadataHeaderError::UnsupportedFormat(self.format));
}
Ok(())
}
#[must_use]
pub fn is_postcard(&self) -> bool {
self.format == Self::FORMAT_POSTCARD
}
#[must_use]
pub fn is_json(&self) -> bool {
self.format == Self::FORMAT_JSON
}
}
#[derive(Debug, Error, PartialEq, Eq)]
pub enum MetadataHeaderError {
#[error("invalid metadata magic: expected 'META', got {0:?}")]
InvalidMagic([u8; 4]),
#[error("unsupported metadata version: {0}")]
UnsupportedVersion(u16),
#[error("unsupported serialization format: {0}")]
UnsupportedFormat(u8),
#[error("buffer too short: expected 16 bytes, got {0}")]
BufferTooShort(usize),
#[error("buffer is not 4-byte aligned")]
UnalignedBuffer,
#[error("CRC mismatch: expected {expected:#x}, got {actual:#x}")]
CrcMismatch {
expected: u32,
actual: u32,
},
}
#[derive(Debug, Error, PartialEq, Eq)]
pub enum HeaderError {
#[error("invalid magic number: expected 'EVEC', got {0:?}")]
InvalidMagic([u8; 4]),
#[error("unsupported version: {0}.{1}")]
UnsupportedVersion(u8, u8),
#[error("checksum mismatch: expected {expected:#x}, got {actual:#x}")]
ChecksumMismatch {
expected: u32,
actual: u32,
},
#[error("buffer too short: expected 64 bytes, got {0}")]
BufferTooShort(usize),
#[error("buffer is not 8-byte aligned")]
UnalignedBuffer,
}
impl FileHeader {
pub const MAGIC: [u8; 4] = MAGIC;
pub const VERSION_MAJOR: u8 = VERSION_MAJOR;
pub const VERSION_MINOR: u8 = VERSION_MINOR;
#[must_use]
pub fn new(dimensions: u32) -> Self {
let mut header = Self {
magic: MAGIC,
version_major: VERSION_MAJOR,
version_minor: VERSION_MINOR,
flags: 0,
vector_count: 0,
index_offset: 0,
metadata_offset: 0,
rng_seed: 0,
dimensions,
header_crc: 0,
hnsw_m: 16,
hnsw_m0: 32,
data_crc: 0,
deleted_count: 0,
};
header.update_checksum();
header
}
#[must_use]
pub fn as_bytes(&self) -> &[u8; 64] {
bytemuck::cast_ref(self)
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self, HeaderError> {
if bytes.len() < 64 {
return Err(HeaderError::BufferTooShort(bytes.len()));
}
let header = *bytemuck::try_from_bytes::<FileHeader>(&bytes[..64])
.map_err(|_| HeaderError::UnalignedBuffer)?;
if header.magic != MAGIC {
return Err(HeaderError::InvalidMagic(header.magic));
}
if header.version_major != VERSION_MAJOR {
return Err(HeaderError::UnsupportedVersion(
header.version_major,
header.version_minor,
));
}
if header.version_minor < VERSION_MINOR_MIN {
return Err(HeaderError::UnsupportedVersion(
header.version_major,
header.version_minor,
));
}
let mut verify_header = header;
verify_header.header_crc = 0;
let calculated_crc = crc32fast::hash(verify_header.as_bytes());
if header.header_crc != calculated_crc {
return Err(HeaderError::ChecksumMismatch {
expected: header.header_crc,
actual: calculated_crc,
});
}
Ok(header)
}
pub fn update_checksum(&mut self) {
self.header_crc = 0;
self.header_crc = crc32fast::hash(self.as_bytes());
}
#[must_use]
pub fn needs_migration(&self) -> bool {
self.version_minor < VERSION_MINOR
}
#[must_use]
pub fn supports_soft_delete(&self) -> bool {
self.version_minor >= 3
}
#[must_use]
pub fn supports_metadata(&self) -> bool {
self.version_minor >= 4
}
#[must_use]
pub fn has_metadata(&self) -> bool {
self.flags & Flags::HAS_METADATA != 0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_header_layout() {
assert_eq!(size_of::<FileHeader>(), 64);
assert_eq!(core::mem::align_of::<FileHeader>(), 8);
}
#[test]
fn test_new_header_validity() {
let header = FileHeader::new(128);
assert_eq!(header.magic, MAGIC);
assert_eq!(header.dimensions, 128);
assert_ne!(header.header_crc, 0);
let bytes = header.as_bytes();
let decoded = FileHeader::from_bytes(bytes).unwrap();
assert_eq!(decoded.dimensions, 128);
}
#[test]
fn test_invalid_magic() {
let mut header = FileHeader::new(128);
header.magic = [0x00, 0x00, 0x00, 0x00];
header.update_checksum();
let bytes = header.as_bytes();
let result = FileHeader::from_bytes(bytes);
assert!(matches!(result, Err(HeaderError::InvalidMagic(_))));
}
#[test]
fn test_checksum_mismatch() {
let mut header = FileHeader::new(128);
header.dimensions = 256;
let bytes = header.as_bytes();
let result = FileHeader::from_bytes(bytes);
assert!(matches!(result, Err(HeaderError::ChecksumMismatch { .. })));
}
#[test]
fn test_unaligned_buffer_rejected() {
let header = FileHeader::new(64);
let mut buf = Vec::with_capacity(65);
buf.push(0); buf.extend_from_slice(header.as_bytes());
let slice = &buf[1..65];
let result = FileHeader::from_bytes(slice);
assert!(matches!(result, Err(HeaderError::UnalignedBuffer)));
}
#[test]
fn test_metadata_header_layout() {
assert_eq!(size_of::<MetadataSectionHeader>(), 16);
assert_eq!(align_of::<MetadataSectionHeader>(), 4);
}
#[test]
fn test_metadata_header_new_postcard() {
let header = MetadataSectionHeader::new_postcard(1024, 0xDEAD_BEEF);
assert_eq!(header.magic, *b"META");
assert_eq!(header.version, 1);
assert_eq!(header.format, FORMAT_POSTCARD);
assert_eq!(header.reserved, 0);
assert_eq!(header.size, 1024);
assert_eq!(header.crc, 0xDEAD_BEEF);
assert!(header.is_postcard());
assert!(!header.is_json());
}
#[test]
fn test_metadata_header_new_json() {
let header = MetadataSectionHeader::new_json(2048, 0xCAFE_BABE);
assert_eq!(header.magic, *b"META");
assert_eq!(header.version, 1);
assert_eq!(header.format, FORMAT_JSON);
assert_eq!(header.reserved, 0);
assert_eq!(header.size, 2048);
assert_eq!(header.crc, 0xCAFE_BABE);
assert!(!header.is_postcard());
assert!(header.is_json());
}
#[test]
fn test_metadata_header_roundtrip() {
let header = MetadataSectionHeader::new_postcard(512, 0x1234_5678);
let bytes = header.as_bytes();
assert_eq!(bytes.len(), 16);
let decoded = MetadataSectionHeader::from_bytes(bytes).unwrap();
assert_eq!(decoded.magic, header.magic);
assert_eq!(decoded.version, header.version);
assert_eq!(decoded.format, header.format);
assert_eq!(decoded.size, header.size);
assert_eq!(decoded.crc, header.crc);
}
#[test]
fn test_metadata_header_invalid_magic() {
let mut header = MetadataSectionHeader::new_postcard(0, 0);
header.magic = [0x00, 0x00, 0x00, 0x00];
let bytes = header.as_bytes();
let result = MetadataSectionHeader::from_bytes(bytes);
assert!(matches!(result, Err(MetadataHeaderError::InvalidMagic(_))));
}
#[test]
fn test_metadata_header_unsupported_version() {
let mut header = MetadataSectionHeader::new_postcard(0, 0);
header.version = 99;
let bytes = header.as_bytes();
let result = MetadataSectionHeader::from_bytes(bytes);
assert!(matches!(
result,
Err(MetadataHeaderError::UnsupportedVersion(99))
));
}
#[test]
fn test_metadata_header_unsupported_format() {
let mut header = MetadataSectionHeader::new_postcard(0, 0);
header.format = 99;
let bytes = header.as_bytes();
let result = MetadataSectionHeader::from_bytes(bytes);
assert!(matches!(
result,
Err(MetadataHeaderError::UnsupportedFormat(99))
));
}
#[test]
fn test_metadata_header_buffer_too_short() {
let bytes = [0u8; 8]; let result = MetadataSectionHeader::from_bytes(&bytes);
assert!(matches!(
result,
Err(MetadataHeaderError::BufferTooShort(8))
));
}
#[test]
fn test_flags_constants() {
assert_eq!(Flags::COMPRESSED, 0b0001);
assert_eq!(Flags::QUANTIZED, 0b0010);
assert_eq!(Flags::HAS_METADATA, 0b0100);
assert_eq!(Flags::INDEX_TYPE_FLAT, 0b1000);
let combined = Flags::COMPRESSED | Flags::HAS_METADATA;
assert_eq!(combined, 0b0101);
let flat_with_metadata = Flags::INDEX_TYPE_FLAT | Flags::HAS_METADATA;
assert_eq!(flat_with_metadata, 0b1100);
}
}