manifoldb_vector/ops/
mod.rs

1//! Vector search operators.
2//!
3//! This module provides operators for vector similarity search that can be
4//! composed into query pipelines. Operators implement an iterator-like interface
5//! for streaming results.
6//!
7//! # Operators
8//!
9//! - [`AnnScan`] - Approximate nearest neighbor search using HNSW index
10//! - [`ExactKnn`] - Brute force k-NN search for small sets or validation
11//! - [`VectorFilter`] - Post-filter vector results by predicates
12//!
13//! # Multi-Vector / ColBERT
14//!
15//! The [`maxsim`] module provides MaxSim scoring for ColBERT-style late interaction
16//! models, where each query/document is represented as multiple token embeddings.
17//!
18//! # Hybrid Search
19//!
20//! The [`hybrid`] module provides support for combining dense and sparse vector
21//! similarity scores using weighted combinations or reciprocal rank fusion.
22//!
23//! # Iterator Design
24//!
25//! All operators implement the [`VectorOperator`] trait, which provides a
26//! streaming interface for results:
27//!
28//! ```ignore
29//! use manifoldb_vector::ops::{VectorOperator, AnnScan};
30//!
31//! let mut scan = AnnScan::new(index, query, k)?;
32//! while let Some(result) = scan.next()? {
33//!     println!("Entity: {:?}, Distance: {}", result.entity_id, result.distance);
34//! }
35//! ```
36//!
37//! # Search Modes
38//!
39//! The operators support two primary search modes:
40//!
41//! - **Find K nearest**: Return the K closest vectors to the query
42//! - **Find within distance**: Return all vectors within distance D of the query
43//!
44//! # Combining with Graph Traversal
45//!
46//! Vector operators can be combined with graph traversal to find similar
47//! neighbors or filter graph results by vector similarity.
48
49mod ann_scan;
50mod exact_knn;
51mod filter;
52pub mod hybrid;
53pub mod maxsim;
54
55pub use ann_scan::AnnScan;
56pub use exact_knn::ExactKnn;
57pub use filter::{FilterBuilder, VectorFilter};
58pub use hybrid::{merge_results, reciprocal_rank_fusion, HybridConfig, HybridMatch};
59pub use maxsim::{maxsim, maxsim_batch, maxsim_cosine, MaxSimScorer};
60
61use manifoldb_core::EntityId;
62
63use crate::error::VectorError;
64
65/// A match from a vector search operation.
66///
67/// Contains the entity ID and its distance to the query vector.
68#[derive(Debug, Clone, Copy)]
69pub struct VectorMatch {
70    /// The entity ID of the matching vector.
71    pub entity_id: EntityId,
72    /// The distance to the query vector (lower is more similar for most metrics).
73    pub distance: f32,
74}
75
76impl VectorMatch {
77    /// Create a new vector match.
78    #[must_use]
79    pub const fn new(entity_id: EntityId, distance: f32) -> Self {
80        Self { entity_id, distance }
81    }
82}
83
84impl From<crate::index::SearchResult> for VectorMatch {
85    fn from(result: crate::index::SearchResult) -> Self {
86        Self::new(result.entity_id, result.distance)
87    }
88}
89
90/// Trait for vector search operators.
91///
92/// Operators implement an iterator-like interface for streaming vector search
93/// results. Unlike standard iterators, the `next` method returns a `Result`
94/// to handle storage or computation errors.
95pub trait VectorOperator {
96    /// Get the next match from the operator.
97    ///
98    /// Returns `Ok(Some(match))` if a match is available, `Ok(None)` if the
99    /// operator is exhausted, or `Err` if an error occurred.
100    fn next(&mut self) -> Result<Option<VectorMatch>, VectorError>;
101
102    /// Collect all remaining matches into a vector.
103    ///
104    /// This consumes the operator and returns all matches.
105    fn collect_all(&mut self) -> Result<Vec<VectorMatch>, VectorError> {
106        let mut results = Vec::new();
107        while let Some(m) = self.next()? {
108            results.push(m);
109        }
110        Ok(results)
111    }
112
113    /// Get the dimension of vectors this operator works with.
114    fn dimension(&self) -> usize;
115}
116
117/// Configuration for search operations.
118#[derive(Debug, Clone, Copy)]
119pub struct SearchConfig {
120    /// Maximum number of results to return.
121    pub k: usize,
122    /// Maximum distance threshold (only return results closer than this).
123    pub max_distance: Option<f32>,
124    /// Beam width for approximate search (HNSW `ef_search` parameter).
125    pub ef_search: Option<usize>,
126}
127
128impl SearchConfig {
129    /// Create a new search configuration for finding K nearest neighbors.
130    #[must_use]
131    pub const fn k_nearest(k: usize) -> Self {
132        Self { k, max_distance: None, ef_search: None }
133    }
134
135    /// Create a search configuration for finding all vectors within a distance.
136    #[must_use]
137    pub const fn within_distance(max_distance: f32) -> Self {
138        Self { k: usize::MAX, max_distance: Some(max_distance), ef_search: None }
139    }
140
141    /// Set the beam width for approximate search.
142    #[must_use]
143    pub const fn with_ef_search(mut self, ef: usize) -> Self {
144        self.ef_search = Some(ef);
145        self
146    }
147
148    /// Set the maximum number of results.
149    #[must_use]
150    pub const fn with_k(mut self, k: usize) -> Self {
151        self.k = k;
152        self
153    }
154
155    /// Set the maximum distance threshold.
156    #[must_use]
157    pub const fn with_max_distance(mut self, max_distance: f32) -> Self {
158        self.max_distance = Some(max_distance);
159        self
160    }
161}
162
163impl Default for SearchConfig {
164    fn default() -> Self {
165        Self::k_nearest(10)
166    }
167}