use alloc::string::String;
use alloc::vec::Vec;
use super::hnsw::HnswIndex;
use super::types::{DistanceMetric, IndexType, VectorError, VectorIndexMeta};
const VECTOR_INDEX_MAGIC: u64 = 0x4C55_4E41_5645_4354;
const FORMAT_VERSION: u32 = 1;
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct VectorIndexHeader {
pub magic: u64,
pub version: u32,
pub index_type: u32,
pub vector_count: u64,
pub dimensions: u32,
pub distance_metric: u32,
pub hnsw_m: u16,
pub hnsw_ef_construction: u16,
pub hnsw_ef_search: u16,
pub layer_count: u16,
pub entry_point: u64,
pub model_id: u32,
pub reserved: [u8; 12],
}
impl VectorIndexHeader {
pub const SIZE: usize = 64;
pub fn new(meta: &VectorIndexMeta, model_id: u32) -> Self {
Self {
magic: VECTOR_INDEX_MAGIC,
version: FORMAT_VERSION,
index_type: match meta.index_type {
IndexType::Hnsw => 0,
IndexType::Ivf => 1,
IndexType::Flat => 2,
},
vector_count: meta.vector_count,
dimensions: meta.dimensions,
distance_metric: match meta.distance_metric {
DistanceMetric::Cosine => 0,
DistanceMetric::Euclidean => 1,
DistanceMetric::DotProduct => 2,
DistanceMetric::Manhattan => 3,
DistanceMetric::Hamming => 4,
},
hnsw_m: meta.hnsw_m,
hnsw_ef_construction: meta.hnsw_ef_construction,
hnsw_ef_search: 50,
layer_count: meta.max_layer as u16,
entry_point: meta.entry_point,
model_id,
reserved: [0u8; 12],
}
}
pub fn to_bytes(&self) -> [u8; Self::SIZE] {
let mut buf = [0u8; Self::SIZE];
buf[0..8].copy_from_slice(&self.magic.to_le_bytes());
buf[8..12].copy_from_slice(&self.version.to_le_bytes());
buf[12..16].copy_from_slice(&self.index_type.to_le_bytes());
buf[16..24].copy_from_slice(&self.vector_count.to_le_bytes());
buf[24..28].copy_from_slice(&self.dimensions.to_le_bytes());
buf[28..32].copy_from_slice(&self.distance_metric.to_le_bytes());
buf[32..34].copy_from_slice(&self.hnsw_m.to_le_bytes());
buf[34..36].copy_from_slice(&self.hnsw_ef_construction.to_le_bytes());
buf[36..38].copy_from_slice(&self.hnsw_ef_search.to_le_bytes());
buf[38..40].copy_from_slice(&self.layer_count.to_le_bytes());
buf[40..48].copy_from_slice(&self.entry_point.to_le_bytes());
buf[48..52].copy_from_slice(&self.model_id.to_le_bytes());
buf
}
pub fn from_bytes(buf: &[u8]) -> Result<Self, VectorError> {
if buf.len() < Self::SIZE {
return Err(VectorError::SerializationError);
}
let magic = u64::from_le_bytes([
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
]);
if magic != VECTOR_INDEX_MAGIC {
return Err(VectorError::CorruptedIndex);
}
let version = u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]);
if version > FORMAT_VERSION {
return Err(VectorError::NotSupported(
"Unsupported vector index version".into(),
));
}
Ok(Self {
magic,
version,
index_type: u32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]),
vector_count: u64::from_le_bytes([
buf[16], buf[17], buf[18], buf[19], buf[20], buf[21], buf[22], buf[23],
]),
dimensions: u32::from_le_bytes([buf[24], buf[25], buf[26], buf[27]]),
distance_metric: u32::from_le_bytes([buf[28], buf[29], buf[30], buf[31]]),
hnsw_m: u16::from_le_bytes([buf[32], buf[33]]),
hnsw_ef_construction: u16::from_le_bytes([buf[34], buf[35]]),
hnsw_ef_search: u16::from_le_bytes([buf[36], buf[37]]),
layer_count: u16::from_le_bytes([buf[38], buf[39]]),
entry_point: u64::from_le_bytes([
buf[40], buf[41], buf[42], buf[43], buf[44], buf[45], buf[46], buf[47],
]),
model_id: u32::from_le_bytes([buf[48], buf[49], buf[50], buf[51]]),
reserved: [
buf[52], buf[53], buf[54], buf[55], buf[56], buf[57], buf[58], buf[59], buf[60],
buf[61], buf[62], buf[63],
],
})
}
pub fn to_meta(&self) -> VectorIndexMeta {
VectorIndexMeta {
index_type: match self.index_type {
0 => IndexType::Hnsw,
1 => IndexType::Ivf,
_ => IndexType::Flat,
},
vector_count: self.vector_count,
dimensions: self.dimensions,
distance_metric: match self.distance_metric {
0 => DistanceMetric::Cosine,
1 => DistanceMetric::Euclidean,
2 => DistanceMetric::DotProduct,
3 => DistanceMetric::Manhattan,
_ => DistanceMetric::Cosine,
},
hnsw_m: self.hnsw_m,
hnsw_ef_construction: self.hnsw_ef_construction,
max_layer: self.layer_count as u8,
entry_point: self.entry_point,
}
}
}
#[derive(Debug, Clone)]
pub struct VectorRecord {
pub object_id: u64,
pub embedding: Vec<f32>,
}
impl VectorRecord {
pub fn to_bytes(&self) -> Vec<u8> {
let mut buf = Vec::with_capacity(8 + self.embedding.len() * 4);
buf.extend_from_slice(&self.object_id.to_le_bytes());
for &v in &self.embedding {
buf.extend_from_slice(&v.to_le_bytes());
}
buf
}
pub fn from_bytes(buf: &[u8], dimensions: usize) -> Result<Self, VectorError> {
let expected_size = 8 + dimensions * 4;
if buf.len() < expected_size {
return Err(VectorError::SerializationError);
}
let object_id = u64::from_le_bytes([
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
]);
let mut embedding = Vec::with_capacity(dimensions);
for i in 0..dimensions {
let offset = 8 + i * 4;
let v = f32::from_le_bytes([
buf[offset],
buf[offset + 1],
buf[offset + 2],
buf[offset + 3],
]);
embedding.push(v);
}
Ok(Self {
object_id,
embedding,
})
}
pub fn size(dimensions: usize) -> usize {
8 + dimensions * 4
}
}
#[derive(Debug, Clone)]
pub struct SerializedLayer {
pub node_count: u32,
pub adjacencies: Vec<u8>,
}
impl SerializedLayer {
pub fn new(adjacencies: &[(u64, Vec<u64>)]) -> Self {
let mut buf = Vec::new();
for (node_id, neighbors) in adjacencies {
buf.extend_from_slice(&node_id.to_le_bytes());
buf.extend_from_slice(&(neighbors.len() as u16).to_le_bytes());
for &neighbor in neighbors {
buf.extend_from_slice(&neighbor.to_le_bytes());
}
}
Self {
node_count: adjacencies.len() as u32,
adjacencies: buf,
}
}
pub fn to_bytes(&self) -> Vec<u8> {
let mut buf = Vec::with_capacity(8 + self.adjacencies.len());
buf.extend_from_slice(&self.node_count.to_le_bytes());
buf.extend_from_slice(&(self.adjacencies.len() as u32).to_le_bytes());
buf.extend_from_slice(&self.adjacencies);
buf
}
pub fn from_bytes(buf: &[u8]) -> Result<(Self, &[u8]), VectorError> {
if buf.len() < 8 {
return Err(VectorError::SerializationError);
}
let node_count = u32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]);
let data_len = u32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]) as usize;
if buf.len() < 8 + data_len {
return Err(VectorError::SerializationError);
}
let adjacencies = buf[8..8 + data_len].to_vec();
let remaining = &buf[8 + data_len..];
Ok((
Self {
node_count,
adjacencies,
},
remaining,
))
}
pub fn parse_adjacencies(&self) -> Vec<(u64, Vec<u64>)> {
let mut result = Vec::new();
let mut offset = 0;
let buf = &self.adjacencies;
while offset + 10 <= buf.len() {
let node_id = u64::from_le_bytes([
buf[offset],
buf[offset + 1],
buf[offset + 2],
buf[offset + 3],
buf[offset + 4],
buf[offset + 5],
buf[offset + 6],
buf[offset + 7],
]);
offset += 8;
let neighbor_count = u16::from_le_bytes([buf[offset], buf[offset + 1]]) as usize;
offset += 2;
let mut neighbors = Vec::with_capacity(neighbor_count);
for _ in 0..neighbor_count {
if offset + 8 > buf.len() {
break;
}
let neighbor = u64::from_le_bytes([
buf[offset],
buf[offset + 1],
buf[offset + 2],
buf[offset + 3],
buf[offset + 4],
buf[offset + 5],
buf[offset + 6],
buf[offset + 7],
]);
neighbors.push(neighbor);
offset += 8;
}
result.push((node_id, neighbors));
}
result
}
}
#[derive(Debug, Clone)]
pub struct SerializedIndex {
pub header: VectorIndexHeader,
pub vectors: Vec<VectorRecord>,
pub layers: Vec<SerializedLayer>,
}
impl SerializedIndex {
pub fn to_bytes(&self) -> Vec<u8> {
let mut buf = Vec::new();
buf.extend_from_slice(&self.header.to_bytes());
buf.extend_from_slice(&(self.vectors.len() as u64).to_le_bytes());
for record in &self.vectors {
buf.extend_from_slice(&record.to_bytes());
}
buf.extend_from_slice(&(self.layers.len() as u32).to_le_bytes());
for layer in &self.layers {
buf.extend_from_slice(&layer.to_bytes());
}
let checksum: u32 = buf.iter().map(|&b| b as u32).sum();
buf.extend_from_slice(&checksum.to_le_bytes());
buf
}
pub fn from_bytes(buf: &[u8]) -> Result<Self, VectorError> {
if buf.len() < VectorIndexHeader::SIZE + 8 {
return Err(VectorError::SerializationError);
}
let header = VectorIndexHeader::from_bytes(&buf[..VectorIndexHeader::SIZE])?;
let mut offset = VectorIndexHeader::SIZE;
let vector_count = u64::from_le_bytes([
buf[offset],
buf[offset + 1],
buf[offset + 2],
buf[offset + 3],
buf[offset + 4],
buf[offset + 5],
buf[offset + 6],
buf[offset + 7],
]) as usize;
offset += 8;
let dimensions = header.dimensions as usize;
let record_size = VectorRecord::size(dimensions);
let mut vectors = Vec::with_capacity(vector_count);
for _ in 0..vector_count {
if offset + record_size > buf.len() {
return Err(VectorError::SerializationError);
}
let record = VectorRecord::from_bytes(&buf[offset..], dimensions)?;
vectors.push(record);
offset += record_size;
}
if offset + 4 > buf.len() {
return Err(VectorError::SerializationError);
}
let layer_count = u32::from_le_bytes([
buf[offset],
buf[offset + 1],
buf[offset + 2],
buf[offset + 3],
]) as usize;
offset += 4;
let mut layers = Vec::with_capacity(layer_count);
let mut remaining = &buf[offset..];
for _ in 0..layer_count {
let (layer, rest) = SerializedLayer::from_bytes(remaining)?;
layers.push(layer);
remaining = rest;
}
Ok(Self {
header,
vectors,
layers,
})
}
pub fn size(&self) -> usize {
let dimensions = self.header.dimensions as usize;
let vector_size = 8 + self.vectors.len() * VectorRecord::size(dimensions);
let layer_size: usize = self.layers.iter().map(|l| 8 + l.adjacencies.len()).sum();
VectorIndexHeader::SIZE + vector_size + 4 + layer_size + 4
}
}
pub fn serialize_index(index: &HnswIndex, model_id: u32) -> Result<Vec<u8>, VectorError> {
if index.is_empty() {
return Err(VectorError::EmptyIndex);
}
let ids = index.get_ids();
let vectors: Vec<VectorRecord> = ids
.iter()
.filter_map(|&id| {
index.get_vector(id).map(|emb| VectorRecord {
object_id: id,
embedding: emb.to_vec(),
})
})
.collect();
let stats = index.stats();
let meta = VectorIndexMeta {
index_type: IndexType::Hnsw,
vector_count: stats.vector_count as u64,
dimensions: stats.dimensions as u32,
distance_metric: DistanceMetric::Cosine,
hnsw_m: stats.m as u16,
hnsw_ef_construction: stats.ef_construction as u16,
max_layer: stats.layer_count as u8,
entry_point: 0,
};
let header = VectorIndexHeader::new(&meta, model_id);
let serialized = SerializedIndex {
header,
vectors,
layers: Vec::new(), };
Ok(serialized.to_bytes())
}
pub fn deserialize_index(data: &[u8]) -> Result<HnswIndex, VectorError> {
let serialized = SerializedIndex::from_bytes(data)?;
let mut index = HnswIndex::with_params(
serialized.header.hnsw_m as usize,
serialized.header.hnsw_ef_construction as usize,
serialized.header.hnsw_ef_search as usize,
match serialized.header.distance_metric {
0 => DistanceMetric::Cosine,
1 => DistanceMetric::Euclidean,
2 => DistanceMetric::DotProduct,
3 => DistanceMetric::Manhattan,
_ => DistanceMetric::Cosine,
},
);
for record in serialized.vectors {
index.insert(record.object_id, &record.embedding)?;
}
Ok(index)
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec;
#[test]
fn test_header_serialization() {
let meta = VectorIndexMeta {
index_type: IndexType::Hnsw,
vector_count: 1000,
dimensions: 512,
distance_metric: DistanceMetric::Cosine,
hnsw_m: 16,
hnsw_ef_construction: 200,
max_layer: 4,
entry_point: 42,
};
let header = VectorIndexHeader::new(&meta, 12345);
let bytes = header.to_bytes();
let restored = VectorIndexHeader::from_bytes(&bytes).unwrap();
assert_eq!(restored.magic, VECTOR_INDEX_MAGIC);
assert_eq!(restored.vector_count, 1000);
assert_eq!(restored.dimensions, 512);
assert_eq!(restored.hnsw_m, 16);
assert_eq!(restored.model_id, 12345);
}
#[test]
fn test_vector_record_serialization() {
let record = VectorRecord {
object_id: 12345,
embedding: vec![1.0, 2.0, 3.0, 4.0],
};
let bytes = record.to_bytes();
let restored = VectorRecord::from_bytes(&bytes, 4).unwrap();
assert_eq!(restored.object_id, 12345);
assert_eq!(restored.embedding.len(), 4);
assert_eq!(restored.embedding[0], 1.0);
assert_eq!(restored.embedding[3], 4.0);
}
#[test]
fn test_serialized_layer() {
let adjacencies = vec![(1, vec![2, 3, 4]), (2, vec![1, 3]), (3, vec![1, 2, 4, 5])];
let layer = SerializedLayer::new(&adjacencies);
let bytes = layer.to_bytes();
let (restored, remaining) = SerializedLayer::from_bytes(&bytes).unwrap();
assert!(remaining.is_empty());
assert_eq!(restored.node_count, 3);
let parsed = restored.parse_adjacencies();
assert_eq!(parsed.len(), 3);
assert_eq!(parsed[0].0, 1);
assert_eq!(parsed[0].1, vec![2, 3, 4]);
}
#[test]
fn test_index_serialization_roundtrip() {
let mut index = HnswIndex::new(16, 200);
for i in 0..10 {
let embedding: Vec<f32> = (0..32).map(|j| ((i * 32 + j) as f32) / 320.0).collect();
index.insert(i as u64, &embedding).unwrap();
}
let bytes = serialize_index(&index, 42).unwrap();
let restored = deserialize_index(&bytes).unwrap();
assert_eq!(restored.len(), 10);
assert_eq!(restored.dimensions(), 32);
let vec = restored.get_vector(5).unwrap();
assert_eq!(vec.len(), 32);
}
#[test]
fn test_invalid_magic() {
let mut bytes = vec![0u8; 100];
bytes[0..8].copy_from_slice(&0x1234567890ABCDEFu64.to_le_bytes());
let result = VectorIndexHeader::from_bytes(&bytes);
assert!(matches!(result, Err(VectorError::CorruptedIndex)));
}
}