#![allow(clippy::cast_possible_truncation)]
#![allow(clippy::cast_precision_loss)]
#![allow(clippy::cast_sign_loss)]
#![allow(clippy::missing_errors_doc)]
#![allow(clippy::missing_panics_doc)]
use super::config::HnswConfig;
use super::neighbor::NeighborPool;
use crate::metadata::{MetadataError, MetadataStore, MetadataValue};
use crate::quantization::variable::BinaryVector;
use crate::storage::binary::BinaryVectorStorage;
use crate::storage::VectorStorage;
use bytemuck::{Pod, Zeroable};
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::collections::HashMap;
use std::collections::HashSet;
use std::vec::Vec;
use thiserror::Error;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Pod, Zeroable)]
#[repr(transparent)]
pub struct VectorId(pub u64);
impl VectorId {
pub const INVALID: Self = VectorId(0);
pub const FIRST: Self = VectorId(1);
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[repr(transparent)]
pub struct NodeId(pub u32);
impl NodeId {
pub const INVALID: Self = NodeId(u32::MAX);
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(transparent)]
#[allow(dead_code)]
pub struct Layer(pub u8);
#[derive(Error, Debug, Clone, PartialEq, Eq)]
pub enum GraphError {
#[error("node capacity exceeded")]
CapacityExceeded,
#[error("invalid vector id")]
InvalidVectorId,
#[error("neighbor data corrupted")]
NeighborError,
#[error("node id out of bounds")]
NodeIdOutOfBounds,
#[error("config dimension mismatch: expected {expected}, got {actual}")]
ConfigMismatch {
expected: u32,
actual: u32,
},
#[error("dimension mismatch: expected {expected}, got {actual}")]
DimensionMismatch {
expected: usize,
actual: usize,
},
#[error("storage error: {0}")]
Storage(String),
#[error("invalid config: {0}")]
InvalidConfig(String),
#[error("metadata validation failed: {0}")]
MetadataValidation(#[from] MetadataError),
#[error("filter parse error: {0}")]
FilterParse(String),
#[error("filter evaluation error: {0}")]
FilterEval(String),
#[error("binary quantization is not enabled; use with_bq() or enable_bq() first")]
BqNotEnabled,
#[error("quantization error: {0}")]
Quantization(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CompactionResult {
pub tombstones_removed: usize,
pub new_size: usize,
pub duration_ms: u64,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize, Pod, Zeroable)]
#[repr(C)]
pub struct HnswNode {
pub vector_id: VectorId,
pub neighbor_offset: u32,
pub neighbor_len: u16,
pub max_layer: u8,
pub deleted: u8,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct HnswIndex {
pub config: HnswConfig,
pub(crate) nodes: Vec<HnswNode>,
pub(crate) neighbors: NeighborPool,
pub(crate) entry_point: Option<NodeId>,
pub(crate) max_layer: u8,
pub(crate) level_mult: f32,
rng: ChaCha8Rng,
#[serde(default)]
pub(crate) deleted_count: usize,
#[serde(default = "default_compaction_threshold")]
compaction_threshold: f64,
#[serde(default)]
pub(crate) metadata: MetadataStore,
#[serde(skip)]
pub(crate) bq_storage: Option<BinaryVectorStorage>,
}
fn default_compaction_threshold() -> f64 {
0.3
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BatchDeleteError {
NotFound(VectorId),
AlreadyDeleted(VectorId),
InternalError(VectorId, String),
}
#[derive(Debug, Clone)]
pub struct BatchDeleteResult {
pub deleted: usize,
pub already_deleted: usize,
pub invalid_ids: usize,
pub total: usize,
pub unique_count: usize,
pub errors: Vec<BatchDeleteError>,
}
impl BatchDeleteResult {
#[must_use]
pub fn new() -> Self {
Self {
deleted: 0,
already_deleted: 0,
invalid_ids: 0,
total: 0,
unique_count: 0,
errors: Vec::new(),
}
}
#[must_use]
pub fn all_valid(&self) -> bool {
self.invalid_ids == 0
}
#[must_use]
pub fn any_deleted(&self) -> bool {
self.deleted > 0
}
#[must_use]
pub fn has_errors(&self) -> bool {
self.invalid_ids > 0
|| !self
.errors
.iter()
.all(|e| matches!(e, BatchDeleteError::AlreadyDeleted(_)))
}
}
impl Default for BatchDeleteResult {
fn default() -> Self {
Self::new()
}
}
impl HnswIndex {
pub fn new(config: HnswConfig, storage: &VectorStorage) -> Result<Self, GraphError> {
if config.dimensions != storage.dimensions() {
return Err(GraphError::ConfigMismatch {
expected: storage.dimensions(),
actual: config.dimensions,
});
}
if config.m <= 1 {
return Err(GraphError::InvalidConfig(format!(
"m must be > 1, got {}",
config.m
)));
}
if config.m0 < config.m {
return Err(GraphError::InvalidConfig(format!(
"m0 must be >= m, got {} < {}",
config.m0, config.m
)));
}
let m_float = config.m as f32;
let level_mult = if m_float > 1.0 {
1.0 / m_float.ln()
} else {
0.0
};
let rng = ChaCha8Rng::seed_from_u64(42);
Ok(Self {
config,
nodes: Vec::new(),
neighbors: NeighborPool::new(),
entry_point: None,
max_layer: 0,
level_mult,
rng,
deleted_count: 0, compaction_threshold: default_compaction_threshold(), metadata: MetadataStore::new(), bq_storage: None, })
}
pub fn with_metadata(
config: HnswConfig,
storage: &VectorStorage,
metadata: MetadataStore,
) -> Result<Self, GraphError> {
if config.dimensions != storage.dimensions() {
return Err(GraphError::ConfigMismatch {
expected: storage.dimensions(),
actual: config.dimensions,
});
}
if config.m <= 1 {
return Err(GraphError::InvalidConfig(format!(
"m must be > 1, got {}",
config.m
)));
}
if config.m0 < config.m {
return Err(GraphError::InvalidConfig(format!(
"m0 must be >= m, got {} < {}",
config.m0, config.m
)));
}
let m_float = config.m as f32;
let level_mult = if m_float > 1.0 {
1.0 / m_float.ln()
} else {
0.0
};
let rng = ChaCha8Rng::seed_from_u64(42);
Ok(Self {
config,
nodes: Vec::new(),
neighbors: NeighborPool::new(),
entry_point: None,
max_layer: 0,
level_mult,
rng,
deleted_count: 0,
compaction_threshold: default_compaction_threshold(),
metadata, bq_storage: None,
})
}
pub fn with_bq(config: HnswConfig, storage: &VectorStorage) -> Result<Self, GraphError> {
let dimension = config.dimensions as usize;
if dimension % 8 != 0 {
return Err(GraphError::InvalidConfig(format!(
"dimension must be divisible by 8 for binary quantization, got {dimension}"
)));
}
let mut index = Self::new(config, storage)?;
index.bq_storage = Some(
BinaryVectorStorage::new(dimension).map_err(|e| GraphError::Storage(e.to_string()))?,
);
Ok(index)
}
pub fn enable_bq(&mut self, storage: &VectorStorage) -> Result<(), GraphError> {
let dimension = self.config.dimensions as usize;
if dimension % 8 != 0 {
return Err(GraphError::InvalidConfig(format!(
"dimension must be divisible by 8 for binary quantization, got {dimension}"
)));
}
let mut bq_storage =
BinaryVectorStorage::new(dimension).map_err(|e| GraphError::Storage(e.to_string()))?;
for node in &self.nodes {
if node.deleted != 0 {
let zeros = vec![0u8; dimension / 8];
bq_storage
.insert_raw(&zeros)
.map_err(|e| GraphError::Storage(e.to_string()))?;
continue;
}
let vector = storage.get_vector(node.vector_id);
let bv = BinaryVector::quantize(&vector)
.map_err(|e| GraphError::Quantization(e.to_string()))?;
bq_storage
.insert(&bv)
.map_err(|e| GraphError::Storage(e.to_string()))?;
}
self.bq_storage = Some(bq_storage);
Ok(())
}
#[must_use]
#[inline]
pub fn has_bq(&self) -> bool {
self.bq_storage.is_some()
}
#[must_use]
#[inline]
pub fn bq_storage(&self) -> Option<&BinaryVectorStorage> {
self.bq_storage.as_ref()
}
#[must_use]
pub fn get_random_level(&mut self) -> u8 {
let r: f32 = self.rng.gen_range(f32::EPSILON..=1.0);
let level = (-r.ln() * self.level_mult).floor();
if level > 16.0 {
16
} else {
level as u8
}
}
pub fn add_node(&mut self, vector_id: VectorId, max_layer: u8) -> Result<NodeId, GraphError> {
if vector_id == VectorId::INVALID {
return Err(GraphError::InvalidVectorId);
}
if self.nodes.len() >= u32::MAX as usize {
return Err(GraphError::CapacityExceeded);
}
let node = HnswNode {
vector_id,
neighbor_offset: 0,
neighbor_len: 0,
max_layer,
deleted: 0, };
#[allow(clippy::cast_possible_truncation)]
let id = NodeId(self.nodes.len() as u32);
self.nodes.push(node);
if max_layer > self.max_layer {
self.max_layer = max_layer;
}
Ok(id)
}
pub fn set_neighbors(
&mut self,
node_id: NodeId,
neighbors: &[NodeId],
) -> Result<(), GraphError> {
if node_id.0 as usize >= self.nodes.len() {
return Err(GraphError::InvalidVectorId);
}
let neighbor_u32s: Vec<u32> = neighbors.iter().map(|n| n.0).collect();
let encoded = NeighborPool::encode_neighbors(&neighbor_u32s);
let (offset, capacity) = self.neighbors.alloc(encoded.len())?;
let start = offset as usize;
let end = start + encoded.len();
self.neighbors.buffer[start..end].copy_from_slice(&encoded);
let node = &mut self.nodes[node_id.0 as usize];
if node.neighbor_len > 0 {
self.neighbors.free(node.neighbor_offset, node.neighbor_len);
}
node.neighbor_offset = offset;
node.neighbor_len = capacity;
Ok(())
}
#[must_use]
pub fn get_node(&self, id: NodeId) -> Option<&HnswNode> {
if id == NodeId::INVALID {
return None;
}
self.nodes.get(id.0 as usize)
}
pub fn get_neighbors(&self, node: &HnswNode) -> Result<Vec<NodeId>, GraphError> {
let start = node.neighbor_offset as usize;
let end = start + node.neighbor_len as usize;
if end > self.neighbors.buffer.len() {
return Err(GraphError::NeighborError);
}
let slice = &self.neighbors.buffer[start..end];
let raw_neighbors = NeighborPool::decode_neighbors(slice);
let neighbors = raw_neighbors.into_iter().map(NodeId).collect();
Ok(neighbors)
}
pub fn get_neighbors_layer(
&self,
node: &HnswNode,
layer: u8,
) -> Result<Vec<NodeId>, GraphError> {
let start = node.neighbor_offset as usize;
let end = start + node.neighbor_len as usize;
if end > self.neighbors.buffer.len() {
return Err(GraphError::NeighborError);
}
let slice = &self.neighbors.buffer[start..end];
let raw_neighbors = NeighborPool::decode_layer(slice, layer);
let neighbors = raw_neighbors.into_iter().map(NodeId).collect();
Ok(neighbors)
}
#[must_use]
pub fn node_count(&self) -> usize {
self.nodes.len()
}
#[must_use]
pub fn entry_point(&self) -> Option<NodeId> {
self.entry_point
}
pub fn set_entry_point(&mut self, id: NodeId) {
self.entry_point = Some(id);
}
#[must_use]
pub fn max_layer(&self) -> u8 {
self.max_layer
}
#[must_use]
pub fn metadata(&self) -> &MetadataStore {
&self.metadata
}
pub fn metadata_mut(&mut self) -> &mut MetadataStore {
&mut self.metadata
}
pub fn insert_with_metadata(
&mut self,
storage: &mut VectorStorage,
vector: &[f32],
metadata: HashMap<String, MetadataValue>,
) -> Result<VectorId, GraphError> {
use crate::metadata::validation::{validate_key_value, MAX_KEYS_PER_VECTOR};
if metadata.len() > MAX_KEYS_PER_VECTOR {
return Err(GraphError::MetadataValidation(MetadataError::TooManyKeys {
vector_id: 0, count: metadata.len(),
max: MAX_KEYS_PER_VECTOR,
}));
}
for (key, value) in &metadata {
validate_key_value(key, value)?;
}
let vector_id = self.insert(vector, storage)?;
#[allow(clippy::cast_possible_truncation)]
let metadata_id = vector_id.0 as u32;
for (key, value) in metadata {
self.metadata
.insert(metadata_id, &key, value)
.map_err(GraphError::MetadataValidation)?;
}
Ok(vector_id)
}
pub fn search_filtered(
&self,
storage: &VectorStorage,
query: &[f32],
filter: &str,
k: usize,
) -> Result<Vec<(VectorId, f32)>, GraphError> {
use crate::filter::{
estimate_filter_selectivity, evaluate, overfetch_from_selectivity, parse,
};
let expr = parse(filter).map_err(|e| GraphError::FilterParse(e.to_string()))?;
let selectivity = estimate_filter_selectivity(&expr);
let overfetch_factor = overfetch_from_selectivity(selectivity);
let overfetched_k = k.saturating_mul(overfetch_factor);
let candidates = self.search(query, overfetched_k, storage)?;
let mut passing_results = Vec::with_capacity(k);
for result in candidates {
#[allow(clippy::cast_possible_truncation)]
let metadata_id = result.vector_id.0 as u32;
let metadata = self
.metadata
.get_all(metadata_id)
.cloned()
.unwrap_or_default();
match evaluate(&expr, &metadata) {
Ok(true) => {
passing_results.push((result.vector_id, result.distance));
if passing_results.len() >= k {
break;
}
}
Ok(false) => {
}
Err(e) => {
log::debug!(
"Filter evaluation failed for vector {}: {}",
result.vector_id.0,
e
);
}
}
}
Ok(passing_results)
}
fn get_node_mut(&mut self, vector_id: VectorId) -> Result<&mut HnswNode, GraphError> {
self.nodes
.iter_mut()
.find(|n| n.vector_id == vector_id)
.ok_or(GraphError::InvalidVectorId)
}
fn get_node_by_vector_id(&self, vector_id: VectorId) -> Result<&HnswNode, GraphError> {
self.nodes
.iter()
.find(|n| n.vector_id == vector_id)
.ok_or(GraphError::InvalidVectorId)
}
pub fn soft_delete(&mut self, vector_id: VectorId) -> Result<bool, GraphError> {
let node = self.get_node_mut(vector_id)?;
if node.deleted != 0 {
return Ok(false); }
node.deleted = 1;
self.deleted_count += 1;
#[allow(clippy::cast_possible_truncation)]
let metadata_id = vector_id.0 as u32;
self.metadata.delete_all(metadata_id);
Ok(true)
}
pub fn soft_delete_batch(&mut self, ids: &[VectorId]) -> BatchDeleteResult {
const MAX_BATCH_SIZE: usize = 10_000_000;
let mut result = BatchDeleteResult {
deleted: 0,
already_deleted: 0,
invalid_ids: 0,
total: ids.len(),
unique_count: 0,
errors: Vec::new(),
};
if ids.is_empty() {
return result;
}
if ids.len() > MAX_BATCH_SIZE {
result.invalid_ids = ids.len();
result.errors.push(BatchDeleteError::InternalError(
VectorId(0),
format!(
"Batch size {} exceeds maximum {}",
ids.len(),
MAX_BATCH_SIZE
),
));
return result;
}
let mut seen = HashSet::with_capacity(ids.len().min(1024)); let mut unique_ids = Vec::with_capacity(ids.len().min(1024));
for &id in ids {
if seen.insert(id) {
unique_ids.push(id);
}
}
result.unique_count = unique_ids.len();
let estimated_errors = unique_ids.len() / 10; let mut valid_ids = Vec::with_capacity(unique_ids.len());
let mut already_deleted_count = 0;
result.errors = Vec::with_capacity(estimated_errors);
for &id in &unique_ids {
match self.is_deleted(id) {
Ok(true) => {
already_deleted_count += 1;
result.errors.push(BatchDeleteError::AlreadyDeleted(id));
}
Ok(false) => {
valid_ids.push(id);
}
Err(_) => {
result.invalid_ids += 1;
result.errors.push(BatchDeleteError::NotFound(id));
}
}
}
result.already_deleted = already_deleted_count;
for &id in &valid_ids {
match self.soft_delete(id) {
Ok(true) => result.deleted += 1,
Ok(false) => {
result.already_deleted += 1;
}
Err(e) => {
result.errors.push(BatchDeleteError::InternalError(
id,
format!("Unexpected error after validation: {e:?}"),
));
}
}
}
result
}
pub fn soft_delete_batch_with_progress<F>(
&mut self,
ids: &[VectorId],
mut callback: F,
) -> BatchDeleteResult
where
F: FnMut(usize, usize),
{
const MAX_BATCH_SIZE: usize = 10_000_000;
let mut result = BatchDeleteResult {
deleted: 0,
already_deleted: 0,
invalid_ids: 0,
total: ids.len(),
unique_count: 0,
errors: Vec::new(),
};
if ids.is_empty() {
callback(0, 0);
return result;
}
if ids.len() > MAX_BATCH_SIZE {
result.invalid_ids = ids.len();
result.errors.push(BatchDeleteError::InternalError(
VectorId(0),
format!(
"Batch size {} exceeds maximum {}",
ids.len(),
MAX_BATCH_SIZE
),
));
return result;
}
let mut seen = HashSet::with_capacity(ids.len().min(1024));
let mut unique_ids = Vec::with_capacity(ids.len().min(1024));
for &id in ids {
if seen.insert(id) {
unique_ids.push(id);
}
}
result.unique_count = unique_ids.len();
let estimated_errors = unique_ids.len() / 10;
let mut valid_ids = Vec::with_capacity(unique_ids.len());
let mut already_deleted_count = 0;
result.errors = Vec::with_capacity(estimated_errors);
for &id in &unique_ids {
match self.is_deleted(id) {
Ok(true) => {
already_deleted_count += 1;
result.errors.push(BatchDeleteError::AlreadyDeleted(id));
}
Ok(false) => {
valid_ids.push(id);
}
Err(_) => {
result.invalid_ids += 1;
result.errors.push(BatchDeleteError::NotFound(id));
}
}
}
result.already_deleted = already_deleted_count;
let total_to_process = valid_ids.len();
let interval = (total_to_process / 10).max(1);
let mut last_callback = 0;
for (i, &id) in valid_ids.iter().enumerate() {
match self.soft_delete(id) {
Ok(true) => result.deleted += 1,
Ok(false) => {
result.already_deleted += 1;
}
Err(e) => {
result.errors.push(BatchDeleteError::InternalError(
id,
format!("Unexpected error after validation: {e:?}"),
));
}
}
if i + 1 - last_callback >= interval || i + 1 == total_to_process {
callback(i + 1, total_to_process);
last_callback = i + 1;
}
}
result
}
pub fn is_deleted(&self, vector_id: VectorId) -> Result<bool, GraphError> {
let node = self.get_node_by_vector_id(vector_id)?;
Ok(node.deleted != 0)
}
#[must_use]
pub fn deleted_count(&self) -> usize {
self.deleted_count
}
#[must_use]
pub fn tombstone_ratio(&self) -> f64 {
let total = self.node_count();
if total == 0 {
return 0.0;
}
self.deleted_count as f64 / total as f64
}
#[must_use]
pub fn live_count(&self) -> usize {
self.node_count().saturating_sub(self.deleted_count)
}
pub const MAX_ADJUSTED_K_MULTIPLIER: usize = 10;
#[must_use]
pub fn adjusted_k(&self, k: usize) -> usize {
if self.deleted_count == 0 {
return k;
}
let total = self.node_count();
let live = self.live_count();
if live == 0 {
return k; }
let adjusted = k.saturating_mul(total) / live;
let max_by_multiplier = k.saturating_mul(Self::MAX_ADJUSTED_K_MULTIPLIER);
adjusted.min(max_by_multiplier)
}
#[deprecated(since = "0.3.0", note = "Use soft_delete() instead")]
pub fn delete_in_storage(&self, id: VectorId, storage: &mut VectorStorage) -> bool {
storage.mark_deleted(id)
}
pub fn log_stats(&self) {
println!("Index Stats:");
println!(" Node Count: {}", self.nodes.len());
println!(" Neighbor Buffer Len: {}", self.neighbors.buffer.len());
println!(
" Neighbor Buffer Cap: {}",
self.neighbors.buffer.capacity()
);
println!(" Total Memory Usage: {} bytes", self.memory_usage());
}
#[must_use]
pub fn memory_usage(&self) -> usize {
let nodes_size = self.nodes.capacity() * std::mem::size_of::<HnswNode>();
let neighbors_size = self.neighbors.memory_usage();
std::mem::size_of::<Self>() + nodes_size + neighbors_size
}
#[must_use]
pub fn needs_compaction(&self) -> bool {
self.tombstone_ratio() > self.compaction_threshold
}
pub fn set_compaction_threshold(&mut self, ratio: f64) {
self.compaction_threshold = ratio.clamp(0.01, 0.99);
}
#[must_use]
pub fn compaction_threshold(&self) -> f64 {
self.compaction_threshold
}
#[must_use]
pub fn compaction_warning(&self) -> Option<String> {
if self.needs_compaction() {
Some(format!(
"Compaction recommended: tombstone ratio {:.1}% exceeds threshold {:.1}%. \
Call compact() to rebuild index without tombstones.",
self.tombstone_ratio() * 100.0,
self.compaction_threshold * 100.0
))
} else {
None
}
}
pub fn compact(
&self,
storage: &VectorStorage,
) -> Result<(HnswIndex, VectorStorage, CompactionResult), GraphError> {
#[cfg(not(target_arch = "wasm32"))]
let start = std::time::Instant::now();
#[cfg(target_arch = "wasm32")]
let start_ms = web_sys::window()
.and_then(|w| w.performance())
.map(|p| p.now())
.unwrap_or(0.0);
let original_deleted = self.deleted_count;
let original_total = self.node_count();
if original_deleted == 0 {
let config = self.config.clone();
let mut new_storage = VectorStorage::new(&config, None);
let mut new_index = HnswIndex::new(config, &new_storage)?;
new_index.compaction_threshold = self.compaction_threshold;
for node in &self.nodes {
let vec = storage.get_vector(node.vector_id);
new_index.insert(&vec, &mut new_storage)?;
}
return Ok((
new_index,
new_storage,
CompactionResult {
tombstones_removed: 0,
new_size: original_total,
duration_ms: 0,
},
));
}
let live_vectors: Vec<Vec<f32>> = self
.nodes
.iter()
.filter(|node| node.deleted == 0)
.map(|node| {
let vec = storage.get_vector(node.vector_id);
vec.into_owned()
})
.collect();
let new_size = live_vectors.len();
let config = self.config.clone();
let mut new_storage = VectorStorage::new(&config, None);
let mut new_index = HnswIndex::new(config, &new_storage)?;
new_index.compaction_threshold = self.compaction_threshold;
for vector in live_vectors {
new_index.insert(&vector, &mut new_storage)?;
}
#[cfg(not(target_arch = "wasm32"))]
let duration_ms = start.elapsed().as_millis() as u64;
#[cfg(target_arch = "wasm32")]
let duration_ms = web_sys::window()
.and_then(|w| w.performance())
.map(|p| (p.now() - start_ms) as u64)
.unwrap_or(0);
Ok((
new_index,
new_storage,
CompactionResult {
tombstones_removed: original_deleted,
new_size,
duration_ms,
},
))
}
pub fn insert_with_id(
&mut self,
id: VectorId,
vector: &[f32],
storage: &mut VectorStorage,
) -> Result<VectorId, GraphError> {
if id == VectorId::INVALID {
return Err(GraphError::InvalidVectorId);
}
if self.nodes.iter().any(|n| n.vector_id == id) {
return Err(GraphError::InvalidVectorId);
}
if vector.len() != self.config.dimensions as usize {
return Err(GraphError::DimensionMismatch {
expected: self.config.dimensions as usize,
actual: vector.len(),
});
}
self.insert(vector, storage)
}
}
pub trait VectorProvider {
fn get_vector(&self, id: VectorId) -> Cow<'_, [f32]>;
fn is_deleted(&self, id: VectorId) -> bool {
let _ = id;
false
}
fn get_quantized_vector(&self, id: VectorId) -> Option<&[u8]> {
let _ = id;
None
}
fn quantize_query<'a>(&self, query: &[f32], output: &'a mut Vec<u8>) -> Option<&'a [u8]> {
let _ = query;
let _ = output;
None
}
}
use crate::batch::BatchInsertable;
use crate::error::BatchError;
impl HnswIndex {
#[must_use]
pub fn dimensions(&self) -> u32 {
self.config.dimensions
}
#[must_use]
pub fn len(&self) -> usize {
self.nodes.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.nodes.is_empty()
}
#[must_use]
pub const fn capacity(&self) -> usize {
u32::MAX as usize
}
#[must_use]
pub fn contains_id(&self, id: u64) -> bool {
self.nodes.iter().any(|n| n.vector_id.0 == id)
}
fn validate_vector(vector: &[f32]) -> Option<String> {
for (i, &val) in vector.iter().enumerate() {
if val.is_nan() {
return Some(format!("NaN at index {i}"));
}
if val.is_infinite() {
return Some(format!("Infinity at index {i}"));
}
}
None
}
}
impl BatchInsertable for HnswIndex {
fn batch_insert<I, F>(
&mut self,
vectors: I,
storage: &mut VectorStorage,
mut progress_callback: Option<F>,
) -> Result<Vec<u64>, BatchError>
where
I: IntoIterator<Item = (u64, Vec<f32>)>,
F: FnMut(usize, usize),
{
let batch: Vec<(u64, Vec<f32>)> = vectors.into_iter().collect();
let total = batch.len();
if total == 0 {
return Ok(vec![]);
}
let expected_dim = self.config.dimensions as usize;
let (first_id, first_vec) = &batch[0];
if first_vec.len() != expected_dim {
return Err(BatchError::DimensionMismatch {
expected: expected_dim,
actual: first_vec.len(),
vector_id: *first_id,
});
}
let current_count = self.nodes.len();
if current_count.saturating_add(total) > self.capacity() {
return Err(BatchError::CapacityExceeded {
current: current_count,
max: self.capacity(),
});
}
let mut seen_ids: HashSet<u64> = HashSet::with_capacity(total);
let mut inserted_ids: Vec<u64> = Vec::with_capacity(total);
if let Some(ref mut callback) = progress_callback {
callback(0, total);
}
let progress_interval = if total >= 10 { total / 10 } else { 1 };
for (id, vector) in batch {
if !seen_ids.insert(id) {
continue;
}
if id == 0 {
continue;
}
if self.contains_id(id) {
continue;
}
if vector.len() != expected_dim {
continue;
}
if Self::validate_vector(&vector).is_some() {
continue;
}
match self.insert(&vector, storage) {
Ok(assigned_id) => {
inserted_ids.push(assigned_id.0);
}
Err(e) => {
return Err(BatchError::InternalError {
message: format!("HNSW insert failed for vector {id}: {e}"),
});
}
}
let inserted_count = inserted_ids.len();
if inserted_count % progress_interval == 0 || inserted_count == 1 {
if let Some(ref mut callback) = progress_callback {
callback(inserted_count, total);
}
}
}
if let Some(ref mut callback) = progress_callback {
callback(inserted_ids.len(), total);
}
Ok(inserted_ids)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<HnswIndex>();
}
#[test]
fn test_initialization() {
let config = HnswConfig::new(128);
let storage = VectorStorage::new(&config, None);
let index = HnswIndex::new(config.clone(), &storage).unwrap();
assert_eq!(index.node_count(), 0);
assert_eq!(index.entry_point(), None);
assert_eq!(index.max_layer(), 0);
}
#[test]
fn test_dimension_mismatch() {
let config_idx = HnswConfig::new(128);
let config_store = HnswConfig::new(64);
let storage = VectorStorage::new(&config_store, None);
let result = HnswIndex::new(config_idx, &storage);
assert!(matches!(
result,
Err(GraphError::ConfigMismatch {
expected: 64,
actual: 128
})
));
}
#[test]
fn test_layer_distribution() {
let config = HnswConfig::new(128);
let storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::new(config, &storage).unwrap();
let mut levels = vec![0u8; 1000];
for l in &mut levels {
*l = index.get_random_level();
}
#[allow(clippy::naive_bytecount)]
let l0_count = levels.iter().filter(|&&l| l == 0).count();
assert!(
l0_count > 800,
"Level 0 should be dominant (expected ~93% for M=16)"
);
let max = *levels.iter().max().unwrap();
assert!(max < 16, "Level should be reasonable");
}
#[test]
fn test_neighbor_roundtrip() {
let config = HnswConfig::new(128);
let storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::new(config, &storage).unwrap();
let id1 = index.add_node(VectorId(1), 0).unwrap();
let id2 = index.add_node(VectorId(2), 0).unwrap();
let id3 = index.add_node(VectorId(3), 0).unwrap();
let neighbors = vec![id2, id3];
index.set_neighbors(id1, &neighbors).unwrap();
{
let node1 = index.get_node(id1).unwrap();
let retrieved = index.get_neighbors(node1).unwrap();
assert_eq!(retrieved, neighbors);
}
let neighbors2 = vec![id3];
index.set_neighbors(id1, &neighbors2).unwrap();
{
let node1 = index.get_node(id1).unwrap();
let retrieved2 = index.get_neighbors(node1).unwrap();
assert_eq!(retrieved2, neighbors2);
}
}
mod batch_insert_tests {
use super::*;
use crate::batch::BatchInsertable;
fn create_test_index_and_storage(dim: u32) -> (HnswIndex, VectorStorage) {
let config = HnswConfig::new(dim);
let storage = VectorStorage::new(&config, None);
let index = HnswIndex::new(config, &storage).unwrap();
(index, storage)
}
#[test]
fn test_batch_insert_empty() {
let (mut index, mut storage) = create_test_index_and_storage(128);
let vectors: Vec<(u64, Vec<f32>)> = vec![];
let result = index.batch_insert(vectors, &mut storage, None::<fn(usize, usize)>);
assert!(result.is_ok());
assert!(result.unwrap().is_empty());
assert_eq!(index.node_count(), 0);
}
#[test]
fn test_batch_insert_single_vector() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors = vec![(1u64, vec![1.0, 2.0, 3.0, 4.0])];
let result = index.batch_insert(vectors, &mut storage, None::<fn(usize, usize)>);
assert!(result.is_ok());
let ids = result.unwrap();
assert_eq!(ids.len(), 1);
assert_eq!(index.node_count(), 1);
}
#[test]
fn test_batch_insert_multiple_vectors() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors = vec![
(1u64, vec![1.0, 2.0, 3.0, 4.0]),
(2u64, vec![5.0, 6.0, 7.0, 8.0]),
(3u64, vec![9.0, 10.0, 11.0, 12.0]),
];
let result = index.batch_insert(vectors, &mut storage, None::<fn(usize, usize)>);
assert!(result.is_ok());
let ids = result.unwrap();
assert_eq!(ids.len(), 3);
assert_eq!(index.node_count(), 3);
}
#[test]
fn test_batch_insert_dimension_mismatch_first_vector() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors = vec![
(1u64, vec![1.0, 2.0, 3.0]), (2u64, vec![5.0, 6.0, 7.0, 8.0]),
];
let result = index.batch_insert(vectors, &mut storage, None::<fn(usize, usize)>);
assert!(result.is_err());
match result.unwrap_err() {
BatchError::DimensionMismatch {
expected,
actual,
vector_id,
} => {
assert_eq!(expected, 4);
assert_eq!(actual, 3);
assert_eq!(vector_id, 1);
}
_ => panic!("Expected DimensionMismatch error"),
}
assert_eq!(index.node_count(), 0);
}
#[test]
fn test_batch_insert_dimension_mismatch_later_vector_skipped() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors = vec![
(1u64, vec![1.0, 2.0, 3.0, 4.0]),
(2u64, vec![5.0, 6.0, 7.0]), (3u64, vec![9.0, 10.0, 11.0, 12.0]),
];
let result = index.batch_insert(vectors, &mut storage, None::<fn(usize, usize)>);
assert!(result.is_ok());
let ids = result.unwrap();
assert_eq!(ids.len(), 2);
assert_eq!(index.node_count(), 2);
}
#[test]
fn test_batch_insert_duplicate_id_within_batch() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors = vec![
(1u64, vec![1.0, 2.0, 3.0, 4.0]),
(1u64, vec![5.0, 6.0, 7.0, 8.0]), (2u64, vec![9.0, 10.0, 11.0, 12.0]),
];
let result = index.batch_insert(vectors, &mut storage, None::<fn(usize, usize)>);
assert!(result.is_ok());
let ids = result.unwrap();
assert_eq!(ids.len(), 2);
assert_eq!(index.node_count(), 2);
}
#[test]
fn test_batch_insert_duplicate_id_in_existing_index() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors1 = vec![(1u64, vec![1.0, 2.0, 3.0, 4.0])];
let result1 = index.batch_insert(vectors1, &mut storage, None::<fn(usize, usize)>);
assert!(result1.is_ok());
assert_eq!(index.node_count(), 1);
let vectors2 = vec![
(1u64, vec![5.0, 6.0, 7.0, 8.0]), (2u64, vec![9.0, 10.0, 11.0, 12.0]),
];
let result2 = index.batch_insert(vectors2, &mut storage, None::<fn(usize, usize)>);
assert!(result2.is_ok());
let ids = result2.unwrap();
assert_eq!(ids.len(), 1); assert_eq!(index.node_count(), 2); }
#[test]
fn test_batch_insert_invalid_vector_nan() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors = vec![
(1u64, vec![1.0, 2.0, 3.0, 4.0]),
(2u64, vec![f32::NAN, 6.0, 7.0, 8.0]), (3u64, vec![9.0, 10.0, 11.0, 12.0]),
];
let result = index.batch_insert(vectors, &mut storage, None::<fn(usize, usize)>);
assert!(result.is_ok());
let ids = result.unwrap();
assert_eq!(ids.len(), 2);
assert_eq!(index.node_count(), 2);
}
#[test]
fn test_batch_insert_invalid_vector_infinity() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors = vec![
(1u64, vec![1.0, 2.0, 3.0, 4.0]),
(2u64, vec![f32::INFINITY, 6.0, 7.0, 8.0]), (3u64, vec![9.0, 10.0, 11.0, 12.0]),
];
let result = index.batch_insert(vectors, &mut storage, None::<fn(usize, usize)>);
assert!(result.is_ok());
let ids = result.unwrap();
assert_eq!(ids.len(), 2);
assert_eq!(index.node_count(), 2);
}
#[test]
fn test_batch_insert_invalid_vector_neg_infinity() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors = vec![
(1u64, vec![1.0, 2.0, 3.0, 4.0]),
(2u64, vec![f32::NEG_INFINITY, 6.0, 7.0, 8.0]), (3u64, vec![9.0, 10.0, 11.0, 12.0]),
];
let result = index.batch_insert(vectors, &mut storage, None::<fn(usize, usize)>);
assert!(result.is_ok());
let ids = result.unwrap();
assert_eq!(ids.len(), 2);
assert_eq!(index.node_count(), 2);
}
#[test]
fn test_batch_insert_id_zero_skipped() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors = vec![
(0u64, vec![1.0, 2.0, 3.0, 4.0]), (1u64, vec![5.0, 6.0, 7.0, 8.0]),
(2u64, vec![9.0, 10.0, 11.0, 12.0]),
];
let result = index.batch_insert(vectors, &mut storage, None::<fn(usize, usize)>);
assert!(result.is_ok());
let ids = result.unwrap();
assert_eq!(ids.len(), 2);
assert_eq!(index.node_count(), 2);
}
#[test]
fn test_batch_insert_progress_callback_called() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors: Vec<(u64, Vec<f32>)> =
(1..=10).map(|i| (i as u64, vec![i as f32; 4])).collect();
let mut progress_calls: Vec<(usize, usize)> = vec![];
let result = index.batch_insert(
vectors,
&mut storage,
Some(|current, total| {
progress_calls.push((current, total));
}),
);
assert!(result.is_ok());
assert_eq!(index.node_count(), 10);
assert!(!progress_calls.is_empty());
assert_eq!(progress_calls[0], (0, 10));
let last = progress_calls.last().unwrap();
assert_eq!(*last, (10, 10));
}
#[test]
fn test_batch_insert_progress_callback_single_vector() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors = vec![(1u64, vec![1.0, 2.0, 3.0, 4.0])];
let mut progress_calls: Vec<(usize, usize)> = vec![];
let result = index.batch_insert(
vectors,
&mut storage,
Some(|current, total| {
progress_calls.push((current, total));
}),
);
assert!(result.is_ok());
assert_eq!(index.node_count(), 1);
assert!(progress_calls.len() >= 2);
assert_eq!(progress_calls[0], (0, 1));
assert!(progress_calls.contains(&(1, 1)));
}
#[test]
fn test_batch_insert_mixed_errors() {
let (mut index, mut storage) = create_test_index_and_storage(4);
let vectors = vec![
(1u64, vec![1.0, 2.0, 3.0, 4.0]), (2u64, vec![f32::NAN, 2.0, 3.0, 4.0]), (3u64, vec![3.0, 3.0, 3.0]), (1u64, vec![4.0, 4.0, 4.0, 4.0]), (0u64, vec![5.0, 5.0, 5.0, 5.0]), (4u64, vec![6.0, 6.0, 6.0, 6.0]), ];
let result = index.batch_insert(vectors, &mut storage, None::<fn(usize, usize)>);
assert!(result.is_ok());
let ids = result.unwrap();
assert_eq!(ids.len(), 2);
assert_eq!(index.node_count(), 2);
}
#[test]
fn test_helper_dimensions() {
let (index, _storage) = create_test_index_and_storage(128);
assert_eq!(index.dimensions(), 128);
}
#[test]
fn test_helper_len_empty() {
let (index, _storage) = create_test_index_and_storage(128);
assert_eq!(index.len(), 0);
assert!(index.is_empty());
}
#[test]
fn test_helper_capacity() {
let (index, _storage) = create_test_index_and_storage(128);
assert_eq!(index.capacity(), u32::MAX as usize);
}
#[test]
fn test_helper_contains_id() {
let (mut index, mut storage) = create_test_index_and_storage(4);
assert!(!index.contains_id(1));
let _ = index.insert(&[1.0, 2.0, 3.0, 4.0], &mut storage);
assert!(index.node_count() == 1);
}
#[test]
fn test_validate_vector_valid() {
let vector = vec![1.0, 2.0, 3.0, 4.0];
assert!(HnswIndex::validate_vector(&vector).is_none());
}
#[test]
fn test_validate_vector_nan() {
let vector = vec![1.0, f32::NAN, 3.0, 4.0];
let result = HnswIndex::validate_vector(&vector);
assert!(result.is_some());
assert!(result.unwrap().contains("NaN"));
}
#[test]
fn test_validate_vector_infinity() {
let vector = vec![1.0, f32::INFINITY, 3.0, 4.0];
let result = HnswIndex::validate_vector(&vector);
assert!(result.is_some());
assert!(result.unwrap().contains("Infinity"));
}
}
mod delete_tests {
use super::*;
fn create_test_index() -> (HnswIndex, VectorStorage) {
let config = HnswConfig::new(4);
let storage = VectorStorage::new(&config, None);
let index = HnswIndex::new(config, &storage).unwrap();
(index, storage)
}
#[test]
fn test_soft_delete_marks_node() {
let (mut index, mut storage) = create_test_index();
let vec = vec![1.0, 2.0, 3.0, 4.0];
let id = index.insert(&vec, &mut storage).unwrap();
assert!(!index.is_deleted(id).unwrap());
assert!(index.soft_delete(id).unwrap());
assert!(index.is_deleted(id).unwrap());
}
#[test]
fn test_soft_delete_idempotent() {
let (mut index, mut storage) = create_test_index();
let vec = vec![1.0, 2.0, 3.0, 4.0];
let id = index.insert(&vec, &mut storage).unwrap();
assert!(index.soft_delete(id).unwrap()); assert!(!index.soft_delete(id).unwrap()); assert_eq!(index.deleted_count(), 1); }
#[test]
fn test_soft_delete_nonexistent_fails() {
let (mut index, _storage) = create_test_index();
let result = index.soft_delete(VectorId(999));
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), GraphError::InvalidVectorId));
}
#[test]
fn test_deleted_count() {
let (mut index, mut storage) = create_test_index();
let id1 = index.insert(&[1.0, 0.0, 0.0, 0.0], &mut storage).unwrap();
let id2 = index.insert(&[0.0, 1.0, 0.0, 0.0], &mut storage).unwrap();
let _id3 = index.insert(&[0.0, 0.0, 1.0, 0.0], &mut storage).unwrap();
assert_eq!(index.deleted_count(), 0);
assert_eq!(index.node_count(), 3);
index.soft_delete(id1).unwrap();
index.soft_delete(id2).unwrap();
assert_eq!(index.deleted_count(), 2);
assert_eq!(index.live_count(), 1);
}
#[test]
#[allow(clippy::float_cmp)]
fn test_tombstone_ratio() {
let (mut index, mut storage) = create_test_index();
assert_eq!(index.tombstone_ratio(), 0.0);
let mut ids = Vec::new();
#[allow(clippy::cast_precision_loss)]
for i in 0..4 {
let id = index.insert(&[i as f32; 4], &mut storage).unwrap();
ids.push(id);
}
assert!((index.tombstone_ratio() - 0.0).abs() < f64::EPSILON);
index.soft_delete(ids[0]).unwrap();
assert!((index.tombstone_ratio() - 0.25).abs() < 0.01);
index.soft_delete(ids[1]).unwrap();
assert!((index.tombstone_ratio() - 0.50).abs() < 0.01);
}
#[test]
fn test_is_deleted_nonexistent_fails() {
let (index, _storage) = create_test_index();
let result = index.is_deleted(VectorId(999));
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), GraphError::InvalidVectorId));
}
#[test]
fn test_live_count() {
let (mut index, mut storage) = create_test_index();
let mut ids = Vec::new();
for i in 0..5 {
let id = index.insert(&[i as f32; 4], &mut storage).unwrap();
ids.push(id);
}
index.soft_delete(ids[0]).unwrap();
index.soft_delete(ids[1]).unwrap();
assert_eq!(index.node_count(), 5);
assert_eq!(index.deleted_count(), 2);
assert_eq!(index.live_count(), 3);
}
#[test]
fn test_get_node_by_vector_id_helper() {
let (mut index, mut storage) = create_test_index();
let id = index.insert(&[1.0, 2.0, 3.0, 4.0], &mut storage).unwrap();
let node = index.get_node_by_vector_id(id);
assert!(node.is_ok());
assert_eq!(node.unwrap().vector_id, id);
let bad = index.get_node_by_vector_id(VectorId(999));
assert!(bad.is_err());
}
#[test]
fn test_deleted_field_is_zero_by_default() {
let (mut index, mut storage) = create_test_index();
let id = index.insert(&[1.0, 2.0, 3.0, 4.0], &mut storage).unwrap();
let node = index.get_node_by_vector_id(id).unwrap();
assert_eq!(node.deleted, 0);
}
#[test]
fn test_deleted_field_set_to_one_after_delete() {
let (mut index, mut storage) = create_test_index();
let id = index.insert(&[1.0, 2.0, 3.0, 4.0], &mut storage).unwrap();
index.soft_delete(id).unwrap();
let node = index.get_node_by_vector_id(id).unwrap();
assert_eq!(node.deleted, 1);
}
#[test]
fn test_delete_invalid_vector_id_zero() {
let (mut index, _storage) = create_test_index();
let result = index.soft_delete(VectorId(0));
assert!(result.is_err());
}
#[test]
fn test_adjusted_k_no_tombstones() {
let (index, _storage) = create_test_index();
assert_eq!(index.adjusted_k(10), 10);
assert_eq!(index.adjusted_k(1), 1);
assert_eq!(index.adjusted_k(100), 100);
}
#[test]
fn test_adjusted_k_with_tombstones() {
let (mut index, mut storage) = create_test_index();
let mut ids = Vec::new();
for i in 0..10 {
let id = index.insert(&[i as f32; 4], &mut storage).unwrap();
ids.push(id);
}
for id in ids.iter().take(5) {
index.soft_delete(*id).unwrap();
}
let adjusted = index.adjusted_k(10);
assert!(
(18..=22).contains(&adjusted),
"Expected ~20, got {adjusted}"
);
}
#[test]
fn test_adjusted_k_capped_at_10x() {
let (mut index, mut storage) = create_test_index();
let mut ids = Vec::new();
for i in 0..100 {
let id = index.insert(&[i as f32; 4], &mut storage).unwrap();
ids.push(id);
}
for id in ids.iter().take(95) {
index.soft_delete(*id).unwrap();
}
let adjusted = index.adjusted_k(10);
assert_eq!(adjusted, 100, "Should be capped at 10x (100)");
}
#[test]
fn test_adjusted_k_10_percent_tombstones() {
let (mut index, mut storage) = create_test_index();
let mut ids = Vec::new();
for i in 0..10 {
let id = index.insert(&[i as f32; 4], &mut storage).unwrap();
ids.push(id);
}
index.soft_delete(ids[0]).unwrap();
let adjusted = index.adjusted_k(10);
assert!(
(11..=13).contains(&adjusted),
"Expected ~12, got {adjusted}"
);
}
#[test]
fn test_adjusted_k_boundary_zero_tombstones() {
let (mut index, mut storage) = create_test_index();
for i in 0..10 {
index.insert(&[i as f32; 4], &mut storage).unwrap();
}
assert_eq!(index.adjusted_k(5), 5);
assert_eq!(index.adjusted_k(10), 10);
assert!((index.tombstone_ratio() - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_adjusted_k_boundary_50_percent() {
let (mut index, mut storage) = create_test_index();
let mut ids = Vec::new();
for i in 0..10 {
let id = index.insert(&[i as f32; 4], &mut storage).unwrap();
ids.push(id);
}
for id in ids.iter().take(5) {
index.soft_delete(*id).unwrap();
}
let adjusted = index.adjusted_k(10);
assert_eq!(adjusted, 20, "50% tombstones should give 2x");
assert!((index.tombstone_ratio() - 0.5).abs() < 0.01);
}
#[test]
fn test_adjusted_k_boundary_90_percent() {
let (mut index, mut storage) = create_test_index();
let mut ids = Vec::new();
for i in 0..10 {
let id = index.insert(&[i as f32; 4], &mut storage).unwrap();
ids.push(id);
}
for id in ids.iter().take(9) {
index.soft_delete(*id).unwrap();
}
let adjusted = index.adjusted_k(10);
assert_eq!(adjusted, 100, "90% tombstones should give 10x (capped)");
assert!((index.tombstone_ratio() - 0.9).abs() < 0.01);
}
#[test]
fn test_adjusted_k_boundary_all_deleted() {
let (mut index, mut storage) = create_test_index();
let mut ids = Vec::new();
for i in 0..5 {
let id = index.insert(&[i as f32; 4], &mut storage).unwrap();
ids.push(id);
}
for id in &ids {
index.soft_delete(*id).unwrap();
}
let adjusted = index.adjusted_k(10);
assert_eq!(adjusted, 10, "All deleted should return original k");
assert_eq!(index.live_count(), 0);
assert!((index.tombstone_ratio() - 1.0).abs() < 0.01);
}
#[test]
fn test_adjusted_k_large_k_small_index() {
let (mut index, mut storage) = create_test_index();
let mut ids = Vec::new();
for i in 0..5 {
let id = index.insert(&[i as f32; 4], &mut storage).unwrap();
ids.push(id);
}
index.soft_delete(ids[0]).unwrap();
index.soft_delete(ids[1]).unwrap();
let adjusted = index.adjusted_k(100);
assert!(
(166..=168).contains(&adjusted),
"Should compute ~166 for 40% tombstones, got {adjusted}"
);
}
#[test]
fn test_adjusted_k_uses_constant() {
assert_eq!(HnswIndex::MAX_ADJUSTED_K_MULTIPLIER, 10);
}
#[test]
fn test_adjusted_k_integer_precision() {
let (mut index, mut storage) = create_test_index();
let mut ids = Vec::new();
for i in 0..100 {
let id = index.insert(&[i as f32; 4], &mut storage).unwrap();
ids.push(id);
}
for id in ids.iter().take(33) {
index.soft_delete(*id).unwrap();
}
let adjusted = index.adjusted_k(10);
assert!(
(14..=16).contains(&adjusted),
"33% tombstones: expected ~15, got {adjusted}"
);
}
}
}