use crate::error::{Result, StorageError};
use crate::types::RowId;
use std::path::{Path, PathBuf};
use std::fs::{File, OpenOptions};
use std::io::{Write, Seek, SeekFrom, BufWriter};
use std::collections::HashSet;
use memmap2::{Mmap, MmapOptions};
use super::{Candidate, VectorNode};
const MAGIC: &[u8; 4] = b"VSST";
const VERSION: u32 = 4; const HEADER_SIZE: usize = 256;
const FOOTER_SIZE: usize = 64;
#[derive(Debug, Clone)]
pub struct SSTMetadata {
pub node_count: u64,
pub dimension: u32,
pub medoid: RowId,
pub id_list_offset: u64,
pub deleted_bitmap_offset: u64, pub vectors_offset: u64, pub raw_vectors_offset: u64, pub graph_offset: u64,
pub footer_offset: u64,
}
pub struct VamanaSSTFile {
#[allow(dead_code)]
path: PathBuf,
metadata: SSTMetadata,
mmap: Mmap,
id_to_index: std::collections::HashMap<RowId, usize>,
deleted_bitmap: parking_lot::RwLock<Vec<u8>>,
}
impl VamanaSSTFile {
pub fn create(
path: &Path,
mut nodes: Vec<(RowId, VectorNode)>,
medoid: RowId,
) -> Result<Self> {
if nodes.is_empty() {
return Err(StorageError::InvalidData("Cannot create empty SST".into()));
}
nodes.sort_by_key(|(id, _)| *id);
let dimension = nodes[0].1.vector.len();
let node_count = nodes.len() as u64;
let mut file = BufWriter::new(
OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(path)?
);
let header_pos = file.stream_position()?;
file.write_all(&vec![0u8; HEADER_SIZE])?;
let id_list_offset = file.stream_position()?;
for (id, _) in &nodes {
file.write_all(&id.to_le_bytes())?;
}
let deleted_bitmap_offset = file.stream_position()?;
let bitmap_size = node_count.div_ceil(8) as usize;
let mut bitmap = vec![0u8; bitmap_size];
for (idx, (_id, node)) in nodes.iter().enumerate() {
if node.deleted {
let byte_idx = idx / 8;
let bit_idx = idx % 8;
bitmap[byte_idx] |= 1 << bit_idx;
}
}
file.write_all(&bitmap)?;
let vectors_offset = file.stream_position()?;
write_sq8_vectors(&mut file, &nodes)?;
let raw_vectors_offset = file.stream_position()?;
write_raw_vectors(&mut file, &nodes)?;
let graph_offset = file.stream_position()?;
write_graph(&mut file, &nodes)?;
let footer_offset = file.stream_position()?;
write_footer(&mut file)?;
file.seek(SeekFrom::Start(header_pos))?;
write_header(&mut file, &SSTMetadata {
node_count,
dimension: dimension as u32,
medoid,
id_list_offset,
deleted_bitmap_offset, vectors_offset,
raw_vectors_offset,
graph_offset,
footer_offset,
})?;
file.flush()?;
drop(file);
Self::open(path)
}
pub fn open(path: &Path) -> Result<Self> {
let file = File::open(path)?;
let mmap = unsafe { MmapOptions::new().map(&file)? };
let metadata = parse_header(&mmap)?;
let id_to_index = read_id_list(&mmap, &metadata)?;
let deleted_bitmap = if metadata.deleted_bitmap_offset > 0 {
let bitmap_size = metadata.node_count.div_ceil(8) as usize;
let start = metadata.deleted_bitmap_offset as usize;
let end = start + bitmap_size;
mmap[start..end].to_vec()
} else {
vec![0u8; metadata.node_count.div_ceil(8) as usize]
};
Ok(Self {
path: path.to_path_buf(),
metadata,
mmap,
id_to_index,
deleted_bitmap: parking_lot::RwLock::new(deleted_bitmap), })
}
pub fn search(&self, query: &[f32], k: usize, ef: usize) -> Result<Vec<Candidate>> {
let centroid = self.get_centroid()?;
let scales = self.get_scales()?;
let candidates = if self.id_to_index.len() <= 100 {
self.linear_search(query, k, ¢roid, &scales)?
} else {
self.graph_search(query, k, ef, ¢roid, &scales)?
};
if self.metadata.raw_vectors_offset > 0 {
self.rerank(query, candidates, k)
} else {
Ok(candidates)
}
}
fn rerank(&self, query: &[f32], mut candidates: Vec<Candidate>, k: usize) -> Result<Vec<Candidate>> {
candidates.retain(|c| !self.is_deleted(c.id));
for candidate in &mut candidates {
if let Ok(raw_vec) = self.get_raw_vector(candidate.id) {
candidate.distance = l2_distance(query, &raw_vec);
}
}
candidates.sort_by(|a, b| {
a.distance.partial_cmp(&b.distance).unwrap_or(std::cmp::Ordering::Equal)
});
candidates.truncate(k);
Ok(candidates)
}
fn get_raw_vector(&self, id: RowId) -> Result<Vec<f32>> {
if self.metadata.raw_vectors_offset == 0 {
return Err(StorageError::InvalidData("Raw vectors not available in this SST version".into()));
}
let dim = self.metadata.dimension as usize;
let index = self.id_to_index.get(&id)
.ok_or_else(|| StorageError::InvalidData(format!("ID {} not found", id)))?;
let offset = self.metadata.raw_vectors_offset as usize + (*index) * dim * 4;
if offset + dim * 4 > self.mmap.len() {
return Err(StorageError::Corruption(
format!("Raw vector offset out of bounds: {} + {} > {}",
offset, dim * 4, self.mmap.len())
));
}
let mut vector = vec![0.0f32; dim];
for i in 0..dim {
let bytes = &self.mmap[offset + i * 4..offset + (i + 1) * 4];
vector[i] = f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
}
Ok(vector)
}
fn graph_search(
&self,
query: &[f32],
k: usize,
ef: usize,
centroid: &[f32],
scales: &[f32],
) -> Result<Vec<Candidate>> {
use std::collections::{BinaryHeap, HashSet};
let ef = ef.max(k * 3).max(50).min(self.id_to_index.len());
let start_ids = self.get_start_points()?;
let mut global_candidates = BinaryHeap::new();
let per_start_ef = ef;
for start_id in &start_ids {
let mut local_visited = HashSet::new(); let local_results = self.graph_search_from_point(
query,
k,
per_start_ef,
*start_id,
centroid,
scales,
&mut local_visited, )?;
for candidate in local_results {
global_candidates.push(candidate);
}
}
let mut seen = HashSet::new();
let mut results: Vec<Candidate> = global_candidates.into_sorted_vec()
.into_iter()
.filter(|c| seen.insert(c.id)) .collect();
results.truncate(k);
Ok(results)
}
fn get_start_points(&self) -> Result<Vec<RowId>> {
let mut starts = vec![self.metadata.medoid];
let target_starts = 2;
let ids: Vec<_> = self.id_to_index.keys().copied().collect();
if ids.len() > target_starts {
let step = ids.len() / target_starts;
for i in 1..target_starts { let idx = i * step;
let candidate = ids[idx];
if candidate != self.metadata.medoid && !starts.contains(&candidate) {
starts.push(candidate);
}
}
} else if ids.len() > 1 {
for id in ids {
if id != self.metadata.medoid {
starts.push(id);
}
}
}
Ok(starts)
}
fn graph_search_from_point(
&self,
query: &[f32],
k: usize,
ef: usize,
start_id: RowId,
centroid: &[f32],
scales: &[f32],
global_visited: &mut HashSet<RowId>, ) -> Result<Vec<Candidate>> {
use std::collections::BinaryHeap;
use std::cmp::Reverse;
let ef = ef.max(k * 2);
let start_vec = self.decompress_vector(start_id, centroid, scales)?;
let start_dist = l2_distance(query, &start_vec);
let mut candidates = BinaryHeap::new();
candidates.push(Reverse(Candidate::new(start_id, start_dist)));
let mut visited = BinaryHeap::new();
visited.push(Candidate::new(start_id, start_dist));
global_visited.insert(start_id);
while let Some(Reverse(current)) = candidates.pop() {
if visited.len() >= ef {
if let Some(furthest) = visited.peek() {
if current.distance > furthest.distance {
break;
}
}
}
let neighbors = self.get_neighbors(current.id)?;
for neighbor_id in neighbors {
if global_visited.contains(&neighbor_id) {
continue;
}
global_visited.insert(neighbor_id);
let neighbor_vec = self.decompress_vector(neighbor_id, centroid, scales)?;
let neighbor_dist = l2_distance(query, &neighbor_vec);
if visited.len() < ef {
candidates.push(Reverse(Candidate::new(neighbor_id, neighbor_dist)));
visited.push(Candidate::new(neighbor_id, neighbor_dist));
} else if let Some(furthest) = visited.peek() {
if neighbor_dist < furthest.distance {
candidates.push(Reverse(Candidate::new(neighbor_id, neighbor_dist)));
visited.push(Candidate::new(neighbor_id, neighbor_dist));
if visited.len() > ef {
visited.pop();
}
}
}
}
}
Ok(visited.into_sorted_vec())
}
fn linear_search(
&self,
query: &[f32],
k: usize,
centroid: &[f32],
scales: &[f32],
) -> Result<Vec<Candidate>> {
let mut candidates = Vec::with_capacity(self.id_to_index.len());
for &id in self.id_to_index.keys() {
if !self.is_deleted(id) { let vec = self.decompress_vector(id, centroid, scales)?;
let dist = l2_distance(query, &vec);
candidates.push(Candidate::new(id, dist));
}
}
candidates.sort_by(|a, b| {
a.distance.partial_cmp(&b.distance).unwrap_or(std::cmp::Ordering::Equal)
});
candidates.truncate(k);
Ok(candidates)
}
fn get_centroid(&self) -> Result<Vec<f32>> {
let offset = self.metadata.vectors_offset as usize;
let dim = self.metadata.dimension as usize;
let bytes = &self.mmap[offset..offset + dim * 4];
let mut centroid = vec![0.0f32; dim];
for i in 0..dim {
let val = f32::from_le_bytes([
bytes[i * 4],
bytes[i * 4 + 1],
bytes[i * 4 + 2],
bytes[i * 4 + 3],
]);
centroid[i] = val;
}
Ok(centroid)
}
fn get_scales(&self) -> Result<Vec<f32>> {
let offset = self.metadata.vectors_offset as usize + self.metadata.dimension as usize * 4;
let dim = self.metadata.dimension as usize;
let bytes = &self.mmap[offset..offset + dim * 4];
let mut scales = vec![0.0f32; dim];
for i in 0..dim {
let val = f32::from_le_bytes([
bytes[i * 4],
bytes[i * 4 + 1],
bytes[i * 4 + 2],
bytes[i * 4 + 3],
]);
scales[i] = val;
}
Ok(scales)
}
fn decompress_vector(&self, id: RowId, centroid: &[f32], scales: &[f32]) -> Result<Vec<f32>> {
let dim = self.metadata.dimension as usize;
let index = self.id_to_index.get(&id)
.ok_or_else(|| StorageError::InvalidData(format!("ID {} not found in SST", id)))?;
let compressed_offset = self.metadata.vectors_offset as usize
+ dim * 8 + (*index) * dim;
if compressed_offset + dim > self.mmap.len() {
return Err(StorageError::Corruption(
format!("Vector offset out of bounds: {} + {} > {}",
compressed_offset, dim, self.mmap.len())
));
}
let bytes = &self.mmap[compressed_offset..compressed_offset + dim];
let mut vector = vec![0.0f32; dim];
for i in 0..dim {
let code = bytes[i];
let normalized = code as f32 - 127.5; vector[i] = centroid[i] + normalized * scales[i];
}
Ok(vector)
}
fn get_neighbors(&self, id: RowId) -> Result<Vec<RowId>> {
let graph_offset = self.metadata.graph_offset as usize;
let index = self.id_to_index.get(&id)
.ok_or_else(|| StorageError::InvalidData(format!("ID {} not found in SST", id)))?;
let offset_table_start = graph_offset;
let offset_pos = offset_table_start + (*index) * 8;
if offset_pos + 8 > self.mmap.len() {
return Err(StorageError::Corruption(
format!("Offset table out of bounds: {} + 8 > {}",
offset_pos, self.mmap.len())
));
}
let node_offset = u64::from_le_bytes([
self.mmap[offset_pos],
self.mmap[offset_pos + 1],
self.mmap[offset_pos + 2],
self.mmap[offset_pos + 3],
self.mmap[offset_pos + 4],
self.mmap[offset_pos + 5],
self.mmap[offset_pos + 6],
self.mmap[offset_pos + 7],
]) as usize;
if node_offset + 4 > self.mmap.len() {
return Err(StorageError::Corruption(
format!("Node offset out of bounds: {} + 4 > {}",
node_offset, self.mmap.len())
));
}
let degree = u32::from_le_bytes([
self.mmap[node_offset],
self.mmap[node_offset + 1],
self.mmap[node_offset + 2],
self.mmap[node_offset + 3],
]) as usize;
let mut neighbors = Vec::with_capacity(degree);
let neighbors_start = node_offset + 4;
if neighbors_start + degree * 8 > self.mmap.len() {
return Err(StorageError::Corruption(
format!("Neighbors list out of bounds: {} + {} > {}",
neighbors_start, degree * 8, self.mmap.len())
));
}
for i in 0..degree {
let pos = neighbors_start + i * 8;
let neighbor_id = u64::from_le_bytes([
self.mmap[pos],
self.mmap[pos + 1],
self.mmap[pos + 2],
self.mmap[pos + 3],
self.mmap[pos + 4],
self.mmap[pos + 5],
self.mmap[pos + 6],
self.mmap[pos + 7],
]);
neighbors.push(neighbor_id);
}
Ok(neighbors)
}
pub fn metadata(&self) -> &SSTMetadata {
&self.metadata
}
pub fn path(&self) -> &Path {
&self.path
}
pub fn delete(&self, id: RowId) -> Result<()> {
if let Some(&index) = self.id_to_index.get(&id) {
let byte_idx = index / 8;
let bit_idx = index % 8;
let mut bitmap = self.deleted_bitmap.write();
bitmap[byte_idx] |= 1 << bit_idx;
Ok(())
} else {
Err(StorageError::InvalidData(format!("Node {} not found in SST", id)))
}
}
pub fn is_deleted(&self, id: RowId) -> bool {
if let Some(&index) = self.id_to_index.get(&id) {
let byte_idx = index / 8;
let bit_idx = index % 8;
let bitmap = self.deleted_bitmap.read();
(bitmap[byte_idx] & (1 << bit_idx)) != 0
} else {
false }
}
pub fn active_node_count(&self) -> usize {
let bitmap = self.deleted_bitmap.read();
let total = self.metadata.node_count as usize;
let deleted_count: usize = (0..total)
.filter(|&i| {
let byte_idx = i / 8;
let bit_idx = i % 8;
(bitmap[byte_idx] & (1 << bit_idx)) != 0
})
.count();
total - deleted_count
}
pub fn export_active_nodes(&self) -> Result<Vec<(RowId, VectorNode)>> {
let centroid = self.get_centroid()?;
let scales = self.get_scales()?;
let mut nodes = Vec::new();
for &row_id in self.id_to_index.keys() {
if self.is_deleted(row_id) {
continue;
}
let vector = self.decompress_vector(row_id, ¢roid, &scales)?;
let neighbors = self.get_neighbors(row_id)?;
let node = VectorNode {
vector,
neighbors,
timestamp: 0, deleted: false,
};
nodes.push((row_id, node));
}
nodes.sort_by_key(|(id, _)| *id);
Ok(nodes)
}
}
fn write_header<W: Write>(writer: &mut W, metadata: &SSTMetadata) -> Result<()> {
writer.write_all(MAGIC)?;
writer.write_all(&VERSION.to_le_bytes())?;
writer.write_all(&metadata.node_count.to_le_bytes())?;
writer.write_all(&metadata.dimension.to_le_bytes())?;
writer.write_all(&metadata.medoid.to_le_bytes())?;
writer.write_all(&metadata.id_list_offset.to_le_bytes())?;
writer.write_all(&metadata.deleted_bitmap_offset.to_le_bytes())?; writer.write_all(&metadata.vectors_offset.to_le_bytes())?;
writer.write_all(&metadata.raw_vectors_offset.to_le_bytes())?;
writer.write_all(&metadata.graph_offset.to_le_bytes())?;
writer.write_all(&metadata.footer_offset.to_le_bytes())?;
let padding = HEADER_SIZE - 4 - 4 - 8 - 4 - 8 - 8 - 8 - 8 - 8 - 8 - 8;
writer.write_all(&vec![0u8; padding])?;
Ok(())
}
fn parse_header(mmap: &[u8]) -> Result<SSTMetadata> {
if &mmap[0..4] != MAGIC {
return Err(StorageError::Corruption("Invalid SST magic".into()));
}
let version = u32::from_le_bytes([mmap[4], mmap[5], mmap[6], mmap[7]]);
if version == 1 {
return Err(StorageError::Corruption(
"SST V1 format is deprecated, please rebuild the index".into()
));
} else if version == 2 {
return parse_header_v2(mmap);
} else if version == 3 {
return parse_header_v3(mmap);
} else if version != VERSION {
return Err(StorageError::Corruption(format!("Unsupported SST version: {}", version)));
}
let node_count = u64::from_le_bytes([
mmap[8], mmap[9], mmap[10], mmap[11],
mmap[12], mmap[13], mmap[14], mmap[15],
]);
let dimension = u32::from_le_bytes([mmap[16], mmap[17], mmap[18], mmap[19]]);
let medoid = u64::from_le_bytes([
mmap[20], mmap[21], mmap[22], mmap[23],
mmap[24], mmap[25], mmap[26], mmap[27],
]);
let id_list_offset = u64::from_le_bytes([
mmap[28], mmap[29], mmap[30], mmap[31],
mmap[32], mmap[33], mmap[34], mmap[35],
]);
let deleted_bitmap_offset = u64::from_le_bytes([ mmap[36], mmap[37], mmap[38], mmap[39],
mmap[40], mmap[41], mmap[42], mmap[43],
]);
let vectors_offset = u64::from_le_bytes([
mmap[44], mmap[45], mmap[46], mmap[47],
mmap[48], mmap[49], mmap[50], mmap[51],
]);
let raw_vectors_offset = u64::from_le_bytes([
mmap[52], mmap[53], mmap[54], mmap[55],
mmap[56], mmap[57], mmap[58], mmap[59],
]);
let graph_offset = u64::from_le_bytes([
mmap[60], mmap[61], mmap[62], mmap[63],
mmap[64], mmap[65], mmap[66], mmap[67],
]);
let footer_offset = u64::from_le_bytes([
mmap[68], mmap[69], mmap[70], mmap[71],
mmap[72], mmap[73], mmap[74], mmap[75],
]);
Ok(SSTMetadata {
node_count,
dimension,
medoid,
id_list_offset,
deleted_bitmap_offset, vectors_offset,
raw_vectors_offset,
graph_offset,
footer_offset,
})
}
fn parse_header_v2(mmap: &[u8]) -> Result<SSTMetadata> {
let node_count = u64::from_le_bytes([
mmap[8], mmap[9], mmap[10], mmap[11],
mmap[12], mmap[13], mmap[14], mmap[15],
]);
let dimension = u32::from_le_bytes([mmap[16], mmap[17], mmap[18], mmap[19]]);
let medoid = u64::from_le_bytes([
mmap[20], mmap[21], mmap[22], mmap[23],
mmap[24], mmap[25], mmap[26], mmap[27],
]);
let id_list_offset = u64::from_le_bytes([
mmap[28], mmap[29], mmap[30], mmap[31],
mmap[32], mmap[33], mmap[34], mmap[35],
]);
let vectors_offset = u64::from_le_bytes([
mmap[36], mmap[37], mmap[38], mmap[39],
mmap[40], mmap[41], mmap[42], mmap[43],
]);
let graph_offset = u64::from_le_bytes([
mmap[44], mmap[45], mmap[46], mmap[47],
mmap[48], mmap[49], mmap[50], mmap[51],
]);
let footer_offset = u64::from_le_bytes([
mmap[52], mmap[53], mmap[54], mmap[55],
mmap[56], mmap[57], mmap[58], mmap[59],
]);
Ok(SSTMetadata {
node_count,
dimension,
medoid,
id_list_offset,
deleted_bitmap_offset: 0, vectors_offset,
raw_vectors_offset: 0, graph_offset,
footer_offset,
})
}
fn parse_header_v3(mmap: &[u8]) -> Result<SSTMetadata> {
let node_count = u64::from_le_bytes([
mmap[8], mmap[9], mmap[10], mmap[11],
mmap[12], mmap[13], mmap[14], mmap[15],
]);
let dimension = u32::from_le_bytes([mmap[16], mmap[17], mmap[18], mmap[19]]);
let medoid = u64::from_le_bytes([
mmap[20], mmap[21], mmap[22], mmap[23],
mmap[24], mmap[25], mmap[26], mmap[27],
]);
let id_list_offset = u64::from_le_bytes([
mmap[28], mmap[29], mmap[30], mmap[31],
mmap[32], mmap[33], mmap[34], mmap[35],
]);
let vectors_offset = u64::from_le_bytes([
mmap[36], mmap[37], mmap[38], mmap[39],
mmap[40], mmap[41], mmap[42], mmap[43],
]);
let raw_vectors_offset = u64::from_le_bytes([
mmap[44], mmap[45], mmap[46], mmap[47],
mmap[48], mmap[49], mmap[50], mmap[51],
]);
let graph_offset = u64::from_le_bytes([
mmap[52], mmap[53], mmap[54], mmap[55],
mmap[56], mmap[57], mmap[58], mmap[59],
]);
let footer_offset = u64::from_le_bytes([
mmap[60], mmap[61], mmap[62], mmap[63],
mmap[64], mmap[65], mmap[66], mmap[67],
]);
Ok(SSTMetadata {
node_count,
dimension,
medoid,
id_list_offset,
deleted_bitmap_offset: 0, vectors_offset,
raw_vectors_offset,
graph_offset,
footer_offset,
})
}
fn read_id_list(mmap: &[u8], metadata: &SSTMetadata) -> Result<std::collections::HashMap<RowId, usize>> {
let offset = metadata.id_list_offset as usize;
let node_count = metadata.node_count as usize;
let id_list_size = node_count * 8;
if offset + id_list_size > mmap.len() {
return Err(StorageError::Corruption(
format!("ID list out of bounds: {} + {} > {}",
offset, id_list_size, mmap.len())
));
}
let mut id_to_index = std::collections::HashMap::with_capacity(node_count);
for i in 0..node_count {
let pos = offset + i * 8;
let id = u64::from_le_bytes([
mmap[pos],
mmap[pos + 1],
mmap[pos + 2],
mmap[pos + 3],
mmap[pos + 4],
mmap[pos + 5],
mmap[pos + 6],
mmap[pos + 7],
]);
id_to_index.insert(id, i);
}
Ok(id_to_index)
}
fn write_sq8_vectors<W: Write>(writer: &mut W, nodes: &[(RowId, VectorNode)]) -> Result<()> {
if nodes.is_empty() {
return Ok(());
}
let dim = nodes[0].1.vector.len();
let mut centroid = vec![0.0f32; dim];
for (_, node) in nodes {
for i in 0..dim {
centroid[i] += node.vector[i];
}
}
for v in &mut centroid {
*v /= nodes.len() as f32;
}
let mut max_abs = vec![0.0f32; dim];
for (_, node) in nodes {
for i in 0..dim {
let shifted = node.vector[i] - centroid[i];
max_abs[i] = max_abs[i].max(shifted.abs());
}
}
let mut scales = vec![0.0f32; dim];
for i in 0..dim {
scales[i] = if max_abs[i] > 1e-6 { max_abs[i] / 127.5 } else { 1.0 };
}
for &v in ¢roid {
writer.write_all(&v.to_le_bytes())?;
}
for &s in &scales {
writer.write_all(&s.to_le_bytes())?;
}
for (_, node) in nodes {
for i in 0..dim {
let shifted = node.vector[i] - centroid[i];
let normalized = shifted / scales[i]; let code = (normalized + 127.5).clamp(0.0, 255.0) as u8;
writer.write_all(&[code])?;
}
}
Ok(())
}
fn write_graph<W: Write + Seek>(writer: &mut W, nodes: &[(RowId, VectorNode)]) -> Result<()> {
let graph_start = writer.stream_position()?;
let offset_table_size = nodes.len() * 8;
writer.write_all(&vec![0u8; offset_table_size])?;
let mut offsets = Vec::with_capacity(nodes.len());
for (_, node) in nodes {
let offset = writer.stream_position()?;
offsets.push(offset);
writer.write_all(&(node.neighbors.len() as u32).to_le_bytes())?;
for &neighbor_id in &node.neighbors {
writer.write_all(&neighbor_id.to_le_bytes())?;
}
}
let end_pos = writer.stream_position()?;
writer.seek(SeekFrom::Start(graph_start))?;
for offset in offsets {
writer.write_all(&offset.to_le_bytes())?;
}
writer.seek(SeekFrom::Start(end_pos))?;
Ok(())
}
fn write_raw_vectors<W: Write>(writer: &mut W, nodes: &[(RowId, VectorNode)]) -> Result<()> {
for (_, node) in nodes {
for &v in &node.vector {
writer.write_all(&v.to_le_bytes())?;
}
}
Ok(())
}
fn write_footer<W: Write>(writer: &mut W) -> Result<()> {
let checksum = 0u32;
writer.write_all(&checksum.to_le_bytes())?;
writer.write_all(&[0u8; FOOTER_SIZE - 4])?;
Ok(())
}
fn l2_distance(a: &[f32], b: &[f32]) -> f32 {
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).powi(2))
.sum::<f32>()
.sqrt()
}