HnswIndex

Struct HnswIndex 

Source
pub struct HnswIndex {
    pub config: HnswConfig,
    /* private fields */
}
Expand description

The HNSW Graph structure managing layers and nodes.

§Memory

Uses a flattened representation for cache efficiency. Nodes are stored in a contiguous vector.

§Soft Delete (v0.3.0)

Supports soft delete via tombstone marking. Deleted nodes remain in the graph for routing but are excluded from search results. Use compact() to reclaim space from deleted nodes.

Fields§

§config: HnswConfig

Algorithm configuration

Implementations§

Source§

impl HnswIndex

Source

pub const MAX_ADJUSTED_K_MULTIPLIER: usize = 10usize

Maximum multiplier for adjusted_k to prevent excessive over-fetching.

At 90%+ tombstones, we cap at 10x the original k to bound memory usage. Beyond this ratio, compaction should be triggered.

Source

pub fn new( config: HnswConfig, storage: &VectorStorage, ) -> Result<Self, GraphError>

Creates a new empty HNSW graph.

§Arguments
  • config - HNSW configuration parameters.
  • storage - Vector storage to validate against.
§Errors

Returns GraphError::ConfigMismatch if storage dimensions differ from config. Returns GraphError::InvalidConfig if configuration parameters are invalid (e.g., M <= 1).

Source

pub fn with_metadata( config: HnswConfig, storage: &VectorStorage, metadata: MetadataStore, ) -> Result<Self, GraphError>

Creates a new HNSW graph with pre-populated metadata (v0.6.0 - RFC-002).

This constructor allows initializing the index with existing metadata, useful for deserialization or migration scenarios.

§Arguments
  • config - HNSW configuration parameters.
  • storage - Vector storage to validate against.
  • metadata - Pre-populated metadata store.
§Errors

Returns GraphError::ConfigMismatch if storage dimensions differ from config. Returns GraphError::InvalidConfig if configuration parameters are invalid.

§Example
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::metadata::{MetadataStore, MetadataValue};
use edgevec::storage::VectorStorage;

let config = HnswConfig::new(4);
let storage = VectorStorage::new(&config, None);

let mut metadata = MetadataStore::new();
metadata.insert(1, "key", MetadataValue::String("value".into())).unwrap();

let index = HnswIndex::with_metadata(config, &storage, metadata).unwrap();
assert!(index.metadata().has_key(1, "key"));
Source

pub fn with_bq( config: HnswConfig, storage: &VectorStorage, ) -> Result<Self, GraphError>

Creates a new index with binary quantization enabled (v0.7.0 - RFC-002 Phase 2).

BQ provides 32x memory reduction and 3-5x search speedup at the cost of some recall (which can be recovered via rescoring).

§Arguments
  • config - HNSW configuration parameters.
  • storage - Vector storage to validate against.
§Errors

Returns error if dimension is not divisible by 8 (required for binary packing). Returns GraphError::ConfigMismatch if storage dimensions differ from config. Returns GraphError::InvalidConfig if configuration parameters are invalid.

§Example
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::storage::VectorStorage;

let config = HnswConfig::new(128); // 128D, divisible by 8
let storage = VectorStorage::new(&config, None);
let index = HnswIndex::with_bq(config, &storage).unwrap();

assert!(index.has_bq());
Source

pub fn enable_bq(&mut self, storage: &VectorStorage) -> Result<(), GraphError>

Enables binary quantization on an existing index (v0.7.0 - RFC-002 Phase 2).

This creates a new BQ storage and quantizes all existing vectors. Time complexity: O(n × d) where n = vector count, d = dimension.

§Arguments
  • storage - The F32 vector storage containing vectors to quantize.
§Errors

Returns error if dimension is not divisible by 8.

§Example
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::storage::VectorStorage;

let config = HnswConfig::new(128);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::new(config, &storage).unwrap();

// Insert some vectors
index.insert(&vec![1.0f32; 128], &mut storage).unwrap();

// Enable BQ later
index.enable_bq(&storage).unwrap();
assert!(index.has_bq());
Source

pub fn has_bq(&self) -> bool

Returns true if binary quantization is enabled.

Source

pub fn bq_storage(&self) -> Option<&BinaryVectorStorage>

Returns a reference to the BQ storage, if enabled.

Source

pub fn get_random_level(&mut self) -> u8

Generates a random level for a new node.

Formula: floor(-ln(uniform(0,1)) * m_l) Clamped to max_level (e.g. 16) to prevent memory explosion.

Source

pub fn add_node( &mut self, vector_id: VectorId, max_layer: u8, ) -> Result<NodeId, GraphError>

Adds a node to the graph.

§Arguments
  • vector_id - The external vector identifier
  • max_layer - The maximum layer for this node
§Returns

The new NodeId assigned to this node, or a GraphError.

Source

pub fn set_neighbors( &mut self, node_id: NodeId, neighbors: &[NodeId], ) -> Result<(), GraphError>

Sets the neighbors for a node.

§Arguments
  • node_id - The node to update.
  • neighbors - The list of neighbor IDs.
Source

pub fn get_node(&self, id: NodeId) -> Option<&HnswNode>

Retrieves a node by its ID.

Source

pub fn get_neighbors(&self, node: &HnswNode) -> Result<Vec<NodeId>, GraphError>

Retrieves the neighbors for a node.

Source

pub fn get_neighbors_layer( &self, node: &HnswNode, layer: u8, ) -> Result<Vec<NodeId>, GraphError>

Retrieves the neighbors for a specific layer.

Source

pub fn node_count(&self) -> usize

Returns the number of nodes in the graph.

Source

pub fn entry_point(&self) -> Option<NodeId>

Returns the entry point node ID, if any.

Source

pub fn set_entry_point(&mut self, id: NodeId)

Sets the entry point node ID.

Source

pub fn max_layer(&self) -> u8

Returns the current maximum layer in the graph.

Source

pub fn metadata(&self) -> &MetadataStore

Returns an immutable reference to the metadata store.

The metadata store contains key-value pairs attached to vectors. Use this method to query metadata without modifying it.

§Thread Safety

MetadataStore is Send + Sync when all contained types are Send + Sync. String and MetadataValue are both Send + Sync, so this is safe.

§Example
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::storage::VectorStorage;

let config = HnswConfig::new(4);
let storage = VectorStorage::new(&config, None);
let index = HnswIndex::new(config, &storage).unwrap();

assert!(index.metadata().is_empty());
Source

pub fn metadata_mut(&mut self) -> &mut MetadataStore

Returns a mutable reference to the metadata store.

Use this method to modify metadata attached to vectors.

§Thread Safety

Concurrent modification requires external synchronization (e.g., Mutex), matching the existing pattern for other HnswIndex operations.

§Example
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::metadata::MetadataValue;
use edgevec::storage::VectorStorage;

let config = HnswConfig::new(4);
let storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::new(config, &storage).unwrap();

index.metadata_mut()
    .insert(1, "category", MetadataValue::String("books".into()))
    .unwrap();

assert!(index.metadata().has_key(1, "category"));
Source

pub fn insert_with_metadata( &mut self, storage: &mut VectorStorage, vector: &[f32], metadata: HashMap<String, MetadataValue>, ) -> Result<VectorId, GraphError>

Inserts a vector with metadata atomically (v0.6.0 - RFC-002).

This method validates metadata BEFORE inserting the vector, ensuring that either both the vector and metadata are stored, or neither is.

§Arguments
  • storage - The vector storage to insert into.
  • vector - The vector data (must match configured dimensions).
  • metadata - Key-value metadata pairs to attach to the vector.
§Returns

The assigned VectorId on success.

§Errors

Returns GraphError::MetadataValidation if metadata fails validation:

  • More than 64 keys
  • Key longer than 256 bytes
  • String value longer than 64KB
  • String array with more than 1024 elements
  • Invalid key format (must be alphanumeric + underscore)
  • Invalid float (NaN or Infinity)

On error, the index remains unchanged (no partial state).

§Example
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::metadata::MetadataValue;
use edgevec::storage::VectorStorage;
use std::collections::HashMap;

let config = HnswConfig::new(4);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::new(config, &storage).unwrap();

let mut metadata = HashMap::new();
metadata.insert("category".to_string(), MetadataValue::String("books".into()));
metadata.insert("price".to_string(), MetadataValue::Float(29.99));

let vector_id = index.insert_with_metadata(
    &mut storage,
    &[1.0, 2.0, 3.0, 4.0],
    metadata,
).unwrap();

// Note: VectorId is u64, but metadata uses u32 IDs
#[allow(clippy::cast_possible_truncation)]
let meta_id = vector_id.0 as u32;
assert!(index.metadata().has_key(meta_id, "category"));
Source

pub fn search_filtered( &self, storage: &VectorStorage, query: &[f32], filter: &str, k: usize, ) -> Result<Vec<(VectorId, f32)>, GraphError>

Search with post-filtering using metadata expressions.

Performs a vector similarity search and filters results based on a metadata filter expression. Uses adaptive overfetch to ensure enough results pass the filter.

§Arguments
  • storage - The vector storage
  • query - The query vector
  • filter - Filter expression (e.g., "category = \"books\" AND price < 100")
  • k - Number of results to return
§Returns

Up to k results that pass the filter, sorted by distance (ascending). May return fewer than k results if not enough vectors pass the filter.

§Errors
  • GraphError::FilterParse - Invalid filter syntax
  • GraphError::FilterEval - Filter evaluation failed
  • Other GraphError variants from underlying search
§Algorithm (RFC-002 §3.2)
  1. Parse filter expression
  2. Use default selectivity = 0.50 (refined in W26.3.1)
  3. Calculate overfetch_factor = min(10, max(2, 1 / selectivity))
  4. Search with k * overfetch_factor candidates
  5. Post-filter results using metadata evaluation
  6. Return top-k passing results
§Example
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::storage::VectorStorage;
use edgevec::metadata::MetadataValue;
use std::collections::HashMap;

let config = HnswConfig::new(4);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::new(config, &storage).unwrap();

// Insert vectors with metadata
let mut meta = HashMap::new();
meta.insert("category".to_string(), MetadataValue::String("books".into()));
meta.insert("price".to_string(), MetadataValue::Integer(25));
let _ = index.insert_with_metadata(&mut storage, &[1.0, 0.0, 0.0, 0.0], meta);

// Search with filter
let results = index.search_filtered(
    &storage,
    &[1.0, 0.0, 0.0, 0.0],
    "category = \"books\" AND price < 100",
    5,
).unwrap();
Source

pub fn soft_delete(&mut self, vector_id: VectorId) -> Result<bool, GraphError>

Mark a vector as deleted (soft delete).

The vector remains in the graph for routing but is excluded from search results. Space is reclaimed via compact().

§Arguments
  • vector_id - The ID of the vector to delete
§Returns
  • Ok(true) - Vector was deleted
  • Ok(false) - Vector was already deleted
  • Err(InvalidVectorId) - Vector ID not found
§Complexity
  • Time: O(n) for VectorId → Node lookup via linear scan, plus O(1) for setting the deleted byte
  • Space: O(1)

Note: The O(n) lookup is a known limitation in v0.3.0. A HashMap<VectorId, NodeId> index could provide O(1) lookup but is deferred to v0.4.0 to avoid scope creep.

§Thread Safety (RFC-001 Design)

This method requires &mut self, which means Rust’s borrow checker prevents concurrent access at compile time. This is intentional:

  • Search (&self) and delete (&mut self) cannot run concurrently
  • No atomics or locks needed - safety is enforced by the type system
  • See RFC-001 Section 6.4 “Design Decisions” for rationale
§Persistence

IMPORTANT: Delete operations are in-memory only until save() is called. If the process crashes before save(), the delete will be lost.

§Example
let deleted = index.soft_delete(VectorId(42))?;
assert!(deleted);
assert!(index.is_deleted(VectorId(42))?);
Source

pub fn soft_delete_batch(&mut self, ids: &[VectorId]) -> BatchDeleteResult

Delete multiple vectors in a single operation

[C5 FIX] Two-Phase Implementation:

  1. Pre-validation: Check all IDs exist and are not already deleted
  2. Execution: Apply deletions (guaranteed to succeed after validation)

This prevents partial failures from leaving the index in an inconsistent state.

[C2 FIX] Duplicate Handling: Duplicate IDs in the input are automatically deduplicated. Only the first occurrence is processed, duplicates are counted in total but not unique_count.

[M2 FIX] Memory Bounds: Maximum batch size is capped at 10 million IDs to prevent memory exhaustion.

§Arguments
  • ids - Slice of VectorId values to delete (duplicates allowed)
§Returns
  • BatchDeleteResult with counts and detailed errors
§Complexity
  • Time: O(N × M) where N = unique IDs, M = index size (for ID lookup)
  • Space: O(N) for deduplication and validation structures
§Example
use edgevec::hnsw::{HnswConfig, HnswIndex, VectorId};
use edgevec::storage::VectorStorage;

let config = HnswConfig::new(4);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::new(config, &storage).unwrap();

// Insert some vectors
for i in 0..10 {
    index.insert(&vec![i as f32; 4], &mut storage).unwrap();
}

// Batch delete
let ids = vec![VectorId(1), VectorId(3), VectorId(5)];
let result = index.soft_delete_batch(&ids);

assert_eq!(result.deleted, 3);
assert_eq!(result.total, 3);
assert!(result.all_valid());
Source

pub fn soft_delete_batch_with_progress<F>( &mut self, ids: &[VectorId], callback: F, ) -> BatchDeleteResult
where F: FnMut(usize, usize),

Delete multiple vectors with progress callback

[C3 FIX] Two-Phase Implementation: Now delegates to soft_delete_batch() for validation, then reports progress during the execution phase. This ensures consistent behavior between variants.

[M3 FIX] Panic Safety: If the callback panics, the operation aborts but the index remains in a valid state (no partial deletions). The panic will unwind past this function.

Callback is invoked approximately every 10% of progress during execution phase. Useful for UI updates during large batch operations.

§Arguments
  • ids - Slice of VectorId values to delete (duplicates allowed)
  • callback - Function called with (processed_unique, total_unique) counts
§Complexity
  • Time: O(N × M) + O(N × C) where N = unique IDs, M = index size, C = callback cost
  • Space: O(N) for deduplication and validation
§Example
use edgevec::hnsw::{HnswConfig, HnswIndex, VectorId};
use edgevec::storage::VectorStorage;

let config = HnswConfig::new(4);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::new(config, &storage).unwrap();

// Insert vectors
for i in 0..100 {
    index.insert(&vec![i as f32; 4], &mut storage).unwrap();
}

// Batch delete with progress
let ids: Vec<VectorId> = (1..=50).map(VectorId).collect();
let result = index.soft_delete_batch_with_progress(&ids, |processed, total| {
    println!("Progress: {}/{}", processed, total);
});
Source

pub fn is_deleted(&self, vector_id: VectorId) -> Result<bool, GraphError>

Check if a vector is marked as deleted.

§Arguments
  • vector_id - The ID of the vector to check
§Returns
  • Ok(true) - Vector is deleted
  • Ok(false) - Vector is live
  • Err(InvalidVectorId) - Vector ID not found
§Complexity
  • Time: O(n) for lookup + O(1) for check
  • Space: O(1)
Source

pub fn deleted_count(&self) -> usize

Get the count of deleted (tombstoned) vectors.

§Returns

Number of vectors marked as deleted.

Source

pub fn tombstone_ratio(&self) -> f64

Get the ratio of deleted vectors to total vectors.

§Returns

A value between 0.0 and 1.0 representing the tombstone ratio. Returns 0.0 if the index is empty.

Source

pub fn live_count(&self) -> usize

Get count of live (non-deleted) vectors.

§Returns

Number of vectors that are not marked as deleted.

Source

pub fn adjusted_k(&self, k: usize) -> usize

Calculate adjusted k to compensate for tombstones.

When the index has deleted vectors, we over-fetch to ensure we can return k live results after filtering. This method calculates how many candidates to fetch internally so that after filtering out deleted vectors, we likely have k results.

§Formula

adjusted_k = k * total / live

This is equivalent to k / (1 - tombstone_ratio) but uses integer arithmetic for precision.

§Caps

The result is capped at k * MAX_ADJUSTED_K_MULTIPLIER (default 10x) to prevent excessive fetching at very high tombstone ratios.

§Examples
  • 0% tombstones: k = k (no adjustment)
  • 10% tombstones: k → ~1.11x
  • 30% tombstones: k → ~1.43x
  • 50% tombstones: k → 2x
  • 90%+ tombstones: k → 10x (capped)
§Thread Safety

This method reads deleted_count and node_count() which may change if the index is mutated. Per RFC-001, the API uses &mut self for mutations, so concurrent read+write is prevented by Rust’s borrow checker. The design accepts eventual consistency for soft delete semantics.

§Arguments
  • k - The requested number of results
§Returns

The adjusted k value to use for internal search operations. This value is always >= k (unless capped by live_count).

Source

pub fn delete_in_storage( &self, id: VectorId, storage: &mut VectorStorage, ) -> bool

👎Deprecated since 0.3.0: Use soft_delete() instead

Marks a vector as deleted in the storage (legacy API).

DEPRECATED: Use soft_delete() instead for RFC-001 compliant soft delete. This method delegates to storage and does not update deleted_count.

§Arguments
  • id - The vector ID to delete.
  • storage - The vector storage to update.
§Returns

true if the vector was active and is now deleted. false if it was already deleted.

Source

pub fn log_stats(&self)

DEBUG: Print memory stats

Source

pub fn memory_usage(&self) -> usize

Returns the approximate memory usage in bytes.

Source

pub fn needs_compaction(&self) -> bool

Check if compaction is recommended.

Returns true if the tombstone ratio exceeds the configured threshold (default 30%). When this returns true, calling compact() is recommended to reclaim space and maintain search performance.

§Thread Safety

This method is not thread-safe. HnswIndex is !Send and must be accessed from a single thread. For concurrent use, create separate index instances (e.g., via Web Workers in WASM).

§Example
if index.needs_compaction() {
    let (new_index, new_storage, result) = index.compact(&storage)?;
    index = new_index;
    storage = new_storage;
}
Source

pub fn set_compaction_threshold(&mut self, ratio: f64)

Set the compaction threshold.

When tombstone_ratio() exceeds this value, needs_compaction() returns true.

§Arguments
  • ratio - Tombstone ratio threshold (0.01 to 0.99)
§Default

Default is 0.3 (30%). Lower values trigger compaction more often but maintain better search performance.

§Clamping

Values outside [0.01, 0.99] are clamped to that range.

Source

pub fn compaction_threshold(&self) -> f64

Get the current compaction threshold.

§Returns

The ratio at which needs_compaction() returns true.

Source

pub fn compaction_warning(&self) -> Option<String>

Get a compaction warning message if compaction is recommended.

Returns Some(message) if the tombstone ratio exceeds the threshold, or None if compaction is not needed.

§Example
if let Some(warning) = index.compaction_warning() {
    log::warn!("{}", warning);
}
Source

pub fn compact( &self, storage: &VectorStorage, ) -> Result<(HnswIndex, VectorStorage, CompactionResult), GraphError>

Compact the index by rebuilding without tombstones.

This operation creates a NEW index and NEW storage containing only live (non-deleted) vectors. The original index and storage are NOT modified, allowing the caller to replace them atomically.

§Important: Vector ID Remapping

Due to storage design constraints, vector IDs are remapped during compaction. New IDs are assigned sequentially starting from 1. If you need to track the mapping, use the returned index to query by position or content.

§Returns

Returns (new_index, new_storage, result) tuple. The caller MUST replace BOTH their index and storage references:

let (new_index, new_storage, result) = old_index.compact(&old_storage)?;
println!("Removed {} tombstones in {}ms", result.tombstones_removed, result.duration_ms);
// Now use new_index and new_storage, old ones will be dropped
index = new_index;
storage = new_storage;
§Algorithm
  1. Collect all live vectors (non-deleted)
  2. Create a new empty index and storage with the same config
  3. Re-insert all live vectors using regular insert()
  4. Return the new pair
§Performance
  • Time: O(n log n) where n = live vector count
  • Space: 2x index size during compaction (temporary)
§Memory Safety
  • Returns new pair — no storage/index mismatch possible
  • On failure, original index/storage unchanged (caller keeps refs)
  • Old index/storage are NOT modified — caller drops when ready
§Warning

This is a blocking operation. For WASM, consider running during idle time or on user action.

Source

pub fn insert_with_id( &mut self, id: VectorId, vector: &[f32], storage: &mut VectorStorage, ) -> Result<VectorId, GraphError>

Insert a vector with a specific ID (validation only).

This method validates that the specified ID doesn’t conflict with existing IDs, then delegates to the standard insert() method.

Important: Due to storage design constraints where VectorIds must match storage slot indices, the returned VectorId is the one assigned by storage, NOT the requested ID. The id parameter is only used for duplicate validation.

For actual ID preservation during compaction, the storage would need to support sparse ID assignment, which is not currently implemented.

§Arguments
  • id - The specific VectorId to validate (not actually used)
  • vector - The vector data (must match configured dimensions)
  • storage - Mutable reference to vector storage
§Returns

The VectorId assigned by storage (sequential).

§Errors
  • InvalidVectorId - If ID already exists in index or is the sentinel value
  • DimensionMismatch - If vector dimensions don’t match config
  • Storage - If storage operation fails
Source§

impl HnswIndex

Source

pub fn dimensions(&self) -> u32

Returns the configured dimensionality of the index.

Source

pub fn len(&self) -> usize

Returns the current number of nodes in the index.

Source

pub fn is_empty(&self) -> bool

Returns true if the index contains no nodes.

Source

pub const fn capacity(&self) -> usize

Returns the maximum capacity of the index (u32::MAX nodes).

Source

pub fn contains_id(&self, id: u64) -> bool

Checks if a vector ID already exists in the index.

Returns true if the ID exists, false otherwise.

Source§

impl HnswIndex

Source

pub fn insert( &mut self, vector: &[f32], storage: &mut VectorStorage, ) -> Result<VectorId, GraphError>

Inserts a vector into the index.

This method performs the full HNSW insertion algorithm:

  1. Stores the vector in VectorStorage.
  2. Assigns a new NodeId and determines its level L.
  3. Finds the entry point at layer L.
  4. Connects the new node to neighbors from layer L down to 0.
§Arguments
  • vector - The raw vector data (must match configured dimensions).
  • storage - The storage backend (must match configuration).
§Returns

The assigned VectorId on success, or a GraphError on failure.

§Errors

Returns GraphError if:

  • Storage dimensions mismatch.
  • Config metric is invalid.
  • Internal graph corruption occurs.
Source

pub fn insert_bq( &mut self, vector: &[f32], storage: &mut VectorStorage, ) -> Result<VectorId, GraphError>

Inserts a vector with automatic binary quantization (v0.7.0 - RFC-002 Phase 2).

If BQ is enabled, the vector is stored in both F32 and BQ format. If BQ is disabled, this behaves identically to insert().

§Arguments
  • vector - The vector to insert.
  • storage - The F32 vector storage.
§Returns

The assigned vector ID.

§Errors
  • GraphError::DimensionMismatch if vector dimension is wrong.
  • GraphError::Quantization if BQ quantization fails.
§Example
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::storage::VectorStorage;

let config = HnswConfig::new(128);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::with_bq(config, &storage).unwrap();

let v = vec![1.0f32; 128];
let id = index.insert_bq(&v, &mut storage).unwrap();

assert!(index.has_bq());
assert_eq!(index.bq_storage().unwrap().len(), 1);
Source§

impl HnswIndex

Source

pub fn search( &self, query: &[f32], k: usize, storage: &VectorStorage, ) -> Result<Vec<SearchResult>, GraphError>

Searches the index for the K nearest neighbors.

§Arguments
  • query - The query vector.
  • k - The number of neighbors to return.
  • storage - The vector storage for distance calculations.
§Returns

A list of SearchResults, sorted by distance (ascending). Returns at most k results, or fewer if:

  • The index has fewer than k live (non-deleted) vectors
  • Not enough neighbors were found during traversal
§Tombstone Handling (v0.3.0)

Search automatically filters out deleted vectors:

  1. Routing: Deleted nodes are still traversed during graph navigation. This preserves graph connectivity and search quality.

  2. Filtering: Deleted nodes are excluded from the final results. Only live vectors appear in the returned SearchResult list.

  3. Compensation: The search internally over-fetches using adjusted_k() to ensure k live results are returned even with tombstones. At high tombstone ratios (>30%), consider calling compact().

§Thread Safety

Per RFC-001, this method accepts eventual consistency. If soft_delete() is called concurrently (which requires &mut self), Rust’s borrow checker prevents data races. The API is designed for single-threaded access.

§Errors

Returns GraphError if:

  • Query dimension mismatches index dimension
  • Config metric is invalid
  • Internal graph corruption occurs
Source

pub fn search_with_context( &self, query: &[f32], k: usize, storage: &VectorStorage, ctx: &mut SearchContext, ) -> Result<Vec<SearchResult>, GraphError>

Searches the index for the K nearest neighbors with a reusable context.

This method allows reusing allocations across multiple searches for better performance.

§Arguments
  • query - The query vector.
  • k - The number of neighbors to return.
  • storage - The vector storage for distance calculations.
  • ctx - A reusable search context to avoid allocations.
§Returns

A list of SearchResults, sorted by distance (ascending).

§Errors

Returns GraphError if:

  • Config metric is invalid.
  • Internal graph corruption occurs.
§Performance

Reusing SearchContext across searches can significantly improve performance by avoiding repeated allocations of HashSets and heaps. This is especially important for high-throughput scenarios or large-scale indexes (100k+ vectors).

Source§

impl HnswIndex

Source

pub fn search_bq( &self, query: &[f32], k: usize, _storage: &VectorStorage, ) -> Result<Vec<(VectorId, f32)>, GraphError>

Searches the index using binary quantization (Hamming distance).

This is faster than F32 search but has lower recall. Use search_bq_rescored() for better recall with F32 rescoring.

§Arguments
  • query - The query vector.
  • k - Number of results to return.
  • _storage - The F32 storage (needed for compatibility, not used).
§Returns

Top-k results sorted by approximate similarity (higher is better).

§Errors
  • GraphError::BqNotEnabled if BQ storage is not initialized.
  • GraphError::DimensionMismatch if query dimension is wrong.
§Example
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::storage::VectorStorage;

let config = HnswConfig::new(128);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::with_bq(config, &storage).unwrap();

let v = vec![1.0f32; 128];
index.insert_bq(&v, &mut storage).unwrap();

let query = vec![1.0f32; 128];
let results = index.search_bq(&query, 10, &storage).unwrap();

assert_eq!(results.len(), 1);
assert!((results[0].1 - 1.0).abs() < 0.01); // Similarity ~1.0
Source

pub fn search_bq_rescored( &self, query: &[f32], k: usize, rescore_factor: usize, storage: &VectorStorage, ) -> Result<Vec<(VectorId, f32)>, GraphError>

Searches using binary quantization with F32 rescoring.

This provides the best of both worlds:

  • Fast BQ search for candidate generation
  • Accurate F32 rescoring for final ranking
§Arguments
  • query - The query vector.
  • k - Number of results to return.
  • rescore_factor - Overfetch multiplier (recommended: 3). Higher values improve recall but increase latency.
  • storage - F32 vector storage.
§Returns

Top-k results sorted by exact F32 distance (converted to similarity).

§Errors
  • GraphError::BqNotEnabled if BQ storage is not initialized.
  • GraphError::DimensionMismatch if query dimension is wrong.
§Performance
  • BQ phase: O(log n × d/8) — very fast
  • Rescore phase: O(k × rescore_factor × d) — proportional to overfetch

Typical latency: 1.5-2× pure BQ, but with recall ~0.95+

§Example
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::storage::VectorStorage;

let config = HnswConfig::new(128);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::with_bq(config, &storage).unwrap();

let v = vec![1.0f32; 128];
index.insert_bq(&v, &mut storage).unwrap();

// Search for 10 results with 3× overfetch
let results = index.search_bq_rescored(&v, 10, 3, &storage).unwrap();
Source

pub fn search_bq_rescored_default( &self, query: &[f32], k: usize, storage: &VectorStorage, ) -> Result<Vec<(VectorId, f32)>, GraphError>

Convenience method with default rescore factor.

Uses rescore_factor = 5, which provides good recall/speed balance. For maximum recall (>0.90), use rescore_factor = 10 or higher.

§Errors
  • GraphError::BqNotEnabled if BQ storage is not initialized.
  • GraphError::DimensionMismatch if query dimension is wrong.
§Example
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::storage::VectorStorage;

let config = HnswConfig::new(128);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::with_bq(config, &storage).unwrap();

let v = vec![1.0f32; 128];
index.insert_bq(&v, &mut storage).unwrap();

let results = index.search_bq_rescored_default(&v, 10, &storage).unwrap();
Source

pub fn search_bq_high_recall( &self, query: &[f32], k: usize, storage: &VectorStorage, ) -> Result<Vec<(VectorId, f32)>, GraphError>

High-recall BQ search (rescore_factor = 15).

Use this when recall is critical and latency is acceptable. Achieves >0.90 recall on most datasets.

§Errors
  • GraphError::BqNotEnabled if BQ storage is not initialized.
  • GraphError::DimensionMismatch if query dimension is wrong.

Trait Implementations§

Source§

impl BatchInsertable for HnswIndex

Source§

fn batch_insert<I, F>( &mut self, vectors: I, storage: &mut VectorStorage, progress_callback: Option<F>, ) -> Result<Vec<u64>, BatchError>
where I: IntoIterator<Item = (u64, Vec<f32>)>, F: FnMut(usize, usize),

Inserts multiple vectors in a single batch operation.

This is the full implementation per RFC 0001 v1.2.

§Algorithm
  1. Collect iterator to Vec (required for progress tracking)
  2. Pre-validate first vector’s dimensionality (fail-fast)
  3. Check capacity constraints
  4. Iterate through vectors with progress callbacks
  5. Actually insert vectors into HNSW graph via storage
  6. Handle errors according to best-effort semantics
§Error Handling
  • Fatal errors (abort immediately):

    • DimensionMismatch on first vector
    • CapacityExceeded
    • InternalError (HNSW invariant violated)
  • Non-fatal errors (skip and continue):

    • DuplicateId (within batch or existing in index)
    • InvalidVector (NaN, Inf)
    • DimensionMismatch on subsequent vectors
Source§

impl Clone for HnswIndex

Source§

fn clone(&self) -> HnswIndex

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for HnswIndex

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl<'de> Deserialize<'de> for HnswIndex

Source§

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>
where __D: Deserializer<'de>,

Deserialize this value from the given Serde deserializer. Read more
Source§

impl Serialize for HnswIndex

Source§

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>
where __S: Serializer,

Serialize this value into the given Serde serializer. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> Conv for T

Source§

fn conv<T>(self) -> T
where Self: Into<T>,

Converts self into T using Into<T>. Read more
Source§

impl<T> FmtForward for T

Source§

fn fmt_binary(self) -> FmtBinary<Self>
where Self: Binary,

Causes self to use its Binary implementation when Debug-formatted.
Source§

fn fmt_display(self) -> FmtDisplay<Self>
where Self: Display,

Causes self to use its Display implementation when Debug-formatted.
Source§

fn fmt_lower_exp(self) -> FmtLowerExp<Self>
where Self: LowerExp,

Causes self to use its LowerExp implementation when Debug-formatted.
Source§

fn fmt_lower_hex(self) -> FmtLowerHex<Self>
where Self: LowerHex,

Causes self to use its LowerHex implementation when Debug-formatted.
Source§

fn fmt_octal(self) -> FmtOctal<Self>
where Self: Octal,

Causes self to use its Octal implementation when Debug-formatted.
Source§

fn fmt_pointer(self) -> FmtPointer<Self>
where Self: Pointer,

Causes self to use its Pointer implementation when Debug-formatted.
Source§

fn fmt_upper_exp(self) -> FmtUpperExp<Self>
where Self: UpperExp,

Causes self to use its UpperExp implementation when Debug-formatted.
Source§

fn fmt_upper_hex(self) -> FmtUpperHex<Self>
where Self: UpperHex,

Causes self to use its UpperHex implementation when Debug-formatted.
Source§

fn fmt_list(self) -> FmtList<Self>
where &'a Self: for<'a> IntoIterator,

Formats each item in a sequence. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> Pipe for T
where T: ?Sized,

Source§

fn pipe<R>(self, func: impl FnOnce(Self) -> R) -> R
where Self: Sized,

Pipes by value. This is generally the method you want to use. Read more
Source§

fn pipe_ref<'a, R>(&'a self, func: impl FnOnce(&'a Self) -> R) -> R
where R: 'a,

Borrows self and passes that borrow into the pipe function. Read more
Source§

fn pipe_ref_mut<'a, R>(&'a mut self, func: impl FnOnce(&'a mut Self) -> R) -> R
where R: 'a,

Mutably borrows self and passes that borrow into the pipe function. Read more
Source§

fn pipe_borrow<'a, B, R>(&'a self, func: impl FnOnce(&'a B) -> R) -> R
where Self: Borrow<B>, B: 'a + ?Sized, R: 'a,

Borrows self, then passes self.borrow() into the pipe function. Read more
Source§

fn pipe_borrow_mut<'a, B, R>( &'a mut self, func: impl FnOnce(&'a mut B) -> R, ) -> R
where Self: BorrowMut<B>, B: 'a + ?Sized, R: 'a,

Mutably borrows self, then passes self.borrow_mut() into the pipe function. Read more
Source§

fn pipe_as_ref<'a, U, R>(&'a self, func: impl FnOnce(&'a U) -> R) -> R
where Self: AsRef<U>, U: 'a + ?Sized, R: 'a,

Borrows self, then passes self.as_ref() into the pipe function.
Source§

fn pipe_as_mut<'a, U, R>(&'a mut self, func: impl FnOnce(&'a mut U) -> R) -> R
where Self: AsMut<U>, U: 'a + ?Sized, R: 'a,

Mutably borrows self, then passes self.as_mut() into the pipe function.
Source§

fn pipe_deref<'a, T, R>(&'a self, func: impl FnOnce(&'a T) -> R) -> R
where Self: Deref<Target = T>, T: 'a + ?Sized, R: 'a,

Borrows self, then passes self.deref() into the pipe function.
Source§

fn pipe_deref_mut<'a, T, R>( &'a mut self, func: impl FnOnce(&'a mut T) -> R, ) -> R
where Self: DerefMut<Target = T> + Deref, T: 'a + ?Sized, R: 'a,

Mutably borrows self, then passes self.deref_mut() into the pipe function.
Source§

impl<T> Tap for T

Source§

fn tap(self, func: impl FnOnce(&Self)) -> Self

Immutable access to a value. Read more
Source§

fn tap_mut(self, func: impl FnOnce(&mut Self)) -> Self

Mutable access to a value. Read more
Source§

fn tap_borrow<B>(self, func: impl FnOnce(&B)) -> Self
where Self: Borrow<B>, B: ?Sized,

Immutable access to the Borrow<B> of a value. Read more
Source§

fn tap_borrow_mut<B>(self, func: impl FnOnce(&mut B)) -> Self
where Self: BorrowMut<B>, B: ?Sized,

Mutable access to the BorrowMut<B> of a value. Read more
Source§

fn tap_ref<R>(self, func: impl FnOnce(&R)) -> Self
where Self: AsRef<R>, R: ?Sized,

Immutable access to the AsRef<R> view of a value. Read more
Source§

fn tap_ref_mut<R>(self, func: impl FnOnce(&mut R)) -> Self
where Self: AsMut<R>, R: ?Sized,

Mutable access to the AsMut<R> view of a value. Read more
Source§

fn tap_deref<T>(self, func: impl FnOnce(&T)) -> Self
where Self: Deref<Target = T>, T: ?Sized,

Immutable access to the Deref::Target of a value. Read more
Source§

fn tap_deref_mut<T>(self, func: impl FnOnce(&mut T)) -> Self
where Self: DerefMut<Target = T> + Deref, T: ?Sized,

Mutable access to the Deref::Target of a value. Read more
Source§

fn tap_dbg(self, func: impl FnOnce(&Self)) -> Self

Calls .tap() only in debug builds, and is erased in release builds.
Source§

fn tap_mut_dbg(self, func: impl FnOnce(&mut Self)) -> Self

Calls .tap_mut() only in debug builds, and is erased in release builds.
Source§

fn tap_borrow_dbg<B>(self, func: impl FnOnce(&B)) -> Self
where Self: Borrow<B>, B: ?Sized,

Calls .tap_borrow() only in debug builds, and is erased in release builds.
Source§

fn tap_borrow_mut_dbg<B>(self, func: impl FnOnce(&mut B)) -> Self
where Self: BorrowMut<B>, B: ?Sized,

Calls .tap_borrow_mut() only in debug builds, and is erased in release builds.
Source§

fn tap_ref_dbg<R>(self, func: impl FnOnce(&R)) -> Self
where Self: AsRef<R>, R: ?Sized,

Calls .tap_ref() only in debug builds, and is erased in release builds.
Source§

fn tap_ref_mut_dbg<R>(self, func: impl FnOnce(&mut R)) -> Self
where Self: AsMut<R>, R: ?Sized,

Calls .tap_ref_mut() only in debug builds, and is erased in release builds.
Source§

fn tap_deref_dbg<T>(self, func: impl FnOnce(&T)) -> Self
where Self: Deref<Target = T>, T: ?Sized,

Calls .tap_deref() only in debug builds, and is erased in release builds.
Source§

fn tap_deref_mut_dbg<T>(self, func: impl FnOnce(&mut T)) -> Self
where Self: DerefMut<Target = T> + Deref, T: ?Sized,

Calls .tap_deref_mut() only in debug builds, and is erased in release builds.
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T> TryConv for T

Source§

fn try_conv<T>(self) -> Result<T, Self::Error>
where Self: TryInto<T>,

Attempts to convert self into T using TryInto<T>. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,