manifoldb_vector/ops/mod.rs
1//! Vector search operators.
2//!
3//! This module provides operators for vector similarity search that can be
4//! composed into query pipelines. Operators implement an iterator-like interface
5//! for streaming results.
6//!
7//! # Operators
8//!
9//! - [`AnnScan`] - Approximate nearest neighbor search using HNSW index
10//! - [`ExactKnn`] - Brute force k-NN search for small sets or validation
11//! - [`VectorFilter`] - Post-filter vector results by predicates
12//!
13//! # Multi-Vector / ColBERT
14//!
15//! The [`maxsim`] module provides MaxSim scoring for ColBERT-style late interaction
16//! models, where each query/document is represented as multiple token embeddings.
17//!
18//! # Hybrid Search
19//!
20//! The [`hybrid`] module provides support for combining dense and sparse vector
21//! similarity scores using weighted combinations or reciprocal rank fusion.
22//!
23//! # Iterator Design
24//!
25//! All operators implement the [`VectorOperator`] trait, which provides a
26//! streaming interface for results:
27//!
28//! ```ignore
29//! use manifoldb_vector::ops::{VectorOperator, AnnScan};
30//!
31//! let mut scan = AnnScan::new(index, query, k)?;
32//! while let Some(result) = scan.next()? {
33//! println!("Entity: {:?}, Distance: {}", result.entity_id, result.distance);
34//! }
35//! ```
36//!
37//! # Search Modes
38//!
39//! The operators support two primary search modes:
40//!
41//! - **Find K nearest**: Return the K closest vectors to the query
42//! - **Find within distance**: Return all vectors within distance D of the query
43//!
44//! # Combining with Graph Traversal
45//!
46//! Vector operators can be combined with graph traversal to find similar
47//! neighbors or filter graph results by vector similarity.
48
49mod ann_scan;
50mod exact_knn;
51mod filter;
52pub mod hybrid;
53pub mod maxsim;
54
55pub use ann_scan::AnnScan;
56pub use exact_knn::ExactKnn;
57pub use filter::{FilterBuilder, VectorFilter};
58pub use hybrid::{merge_results, reciprocal_rank_fusion, HybridConfig, HybridMatch};
59pub use maxsim::{maxsim, maxsim_batch, maxsim_cosine, MaxSimScorer};
60
61use manifoldb_core::EntityId;
62
63use crate::error::VectorError;
64
65/// A match from a vector search operation.
66///
67/// Contains the entity ID and its distance to the query vector.
68#[derive(Debug, Clone, Copy)]
69pub struct VectorMatch {
70 /// The entity ID of the matching vector.
71 pub entity_id: EntityId,
72 /// The distance to the query vector (lower is more similar for most metrics).
73 pub distance: f32,
74}
75
76impl VectorMatch {
77 /// Create a new vector match.
78 #[must_use]
79 pub const fn new(entity_id: EntityId, distance: f32) -> Self {
80 Self { entity_id, distance }
81 }
82}
83
84impl From<crate::index::SearchResult> for VectorMatch {
85 fn from(result: crate::index::SearchResult) -> Self {
86 Self::new(result.entity_id, result.distance)
87 }
88}
89
90/// Trait for vector search operators.
91///
92/// Operators implement an iterator-like interface for streaming vector search
93/// results. Unlike standard iterators, the `next` method returns a `Result`
94/// to handle storage or computation errors.
95pub trait VectorOperator {
96 /// Get the next match from the operator.
97 ///
98 /// Returns `Ok(Some(match))` if a match is available, `Ok(None)` if the
99 /// operator is exhausted, or `Err` if an error occurred.
100 fn next(&mut self) -> Result<Option<VectorMatch>, VectorError>;
101
102 /// Collect all remaining matches into a vector.
103 ///
104 /// This consumes the operator and returns all matches.
105 fn collect_all(&mut self) -> Result<Vec<VectorMatch>, VectorError> {
106 let mut results = Vec::new();
107 while let Some(m) = self.next()? {
108 results.push(m);
109 }
110 Ok(results)
111 }
112
113 /// Get the dimension of vectors this operator works with.
114 fn dimension(&self) -> usize;
115}
116
117/// Configuration for search operations.
118#[derive(Debug, Clone, Copy)]
119pub struct SearchConfig {
120 /// Maximum number of results to return.
121 pub k: usize,
122 /// Maximum distance threshold (only return results closer than this).
123 pub max_distance: Option<f32>,
124 /// Beam width for approximate search (HNSW `ef_search` parameter).
125 pub ef_search: Option<usize>,
126}
127
128impl SearchConfig {
129 /// Create a new search configuration for finding K nearest neighbors.
130 #[must_use]
131 pub const fn k_nearest(k: usize) -> Self {
132 Self { k, max_distance: None, ef_search: None }
133 }
134
135 /// Create a search configuration for finding all vectors within a distance.
136 #[must_use]
137 pub const fn within_distance(max_distance: f32) -> Self {
138 Self { k: usize::MAX, max_distance: Some(max_distance), ef_search: None }
139 }
140
141 /// Set the beam width for approximate search.
142 #[must_use]
143 pub const fn with_ef_search(mut self, ef: usize) -> Self {
144 self.ef_search = Some(ef);
145 self
146 }
147
148 /// Set the maximum number of results.
149 #[must_use]
150 pub const fn with_k(mut self, k: usize) -> Self {
151 self.k = k;
152 self
153 }
154
155 /// Set the maximum distance threshold.
156 #[must_use]
157 pub const fn with_max_distance(mut self, max_distance: f32) -> Self {
158 self.max_distance = Some(max_distance);
159 self
160 }
161}
162
163impl Default for SearchConfig {
164 fn default() -> Self {
165 Self::k_nearest(10)
166 }
167}