velesdb-core 1.6.0

High-performance vector database engine written in Rust
Documentation
//! Index implementations for efficient vector search.
//!
//! This module provides various index implementations for approximate
//! nearest neighbor (ANN) search and full-text search.

mod bm25;
#[cfg(test)]
mod bm25_tests;
pub mod hnsw;
mod posting_list;
#[cfg(test)]
mod posting_list_tests;
pub mod secondary;
pub mod sparse;
pub mod trigram;

pub use bm25::{Bm25Index, Bm25Params};
pub use hnsw::{HnswIndex, HnswParams, SearchQuality};
pub use secondary::{JsonValue, SecondaryIndex};
pub use sparse::{SparseInvertedIndex, SparseVector};
pub use trigram::{extract_trigrams, TrigramIndex};

use crate::distance::DistanceMetric;
use crate::scored_result::ScoredResult;

/// Trait for vector index implementations.
///
/// All index implementations must be thread-safe (Send + Sync).
///
/// # Performance Note
///
/// For bulk insertions, prefer batch methods like `HnswIndex::insert_batch_parallel()`
/// over calling [`Self::insert`] in a loop.
/// Individual inserts incur per-call lock overhead that batch methods avoid.
pub trait VectorIndex: Send + Sync {
    /// Inserts a vector into the index.
    ///
    /// # Arguments
    ///
    /// * `id` - Unique identifier for the vector
    /// * `vector` - The vector data to index
    ///
    /// # Performance Warning (PERF-2)
    ///
    /// This method acquires locks for each insertion. For bulk loading, use:
    /// - `HnswIndex::insert_batch_parallel()` - Best for all batches
    ///
    /// Calling `insert()` in a loop incurs ~3x lock overhead per vector compared
    /// to batch methods which acquire locks once for the entire batch.
    fn insert(&self, id: u64, vector: &[f32]);

    /// Searches for the k nearest neighbors of the query vector.
    ///
    /// # Arguments
    ///
    /// * `query` - The query vector
    /// * `k` - Number of nearest neighbors to return
    ///
    /// # Returns
    ///
    /// A vector of [`ScoredResult`] sorted by distance/similarity.
    fn search(&self, query: &[f32], k: usize) -> Vec<ScoredResult>;

    /// Removes a vector from the index.
    ///
    /// # Arguments
    ///
    /// * `id` - The identifier of the vector to remove
    ///
    /// # Returns
    ///
    /// `true` if the vector was found and removed, `false` otherwise.
    fn remove(&self, id: u64) -> bool;

    /// Returns the number of vectors in the index.
    fn len(&self) -> usize;

    /// Returns `true` if the index is empty.
    fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Returns the dimension of vectors in this index.
    fn dimension(&self) -> usize;

    /// Returns the distance metric used by this index.
    fn metric(&self) -> DistanceMetric;
}