laurus/vector/search/searcher.rs
1//! Vector searcher trait and query/response types.
2
3use serde::{Deserialize, Serialize};
4
5use crate::error::Result;
6use crate::vector::core::vector::Vector;
7
8/// Low-level query for a single-vector search against a vector index.
9///
10/// This type represents a single nearest-neighbor query at the index level,
11/// in contrast to the high-level [`VectorSearchRequest`] which can contain
12/// multiple query vectors and aggregation settings.
13///
14/// Naming convention: low-level index operations use "Query" (e.g.,
15/// `VectorIndexQuery`, `VectorIndexQueryParams`), while high-level
16/// store/engine operations use "Request" (e.g., `VectorSearchRequest`).
17#[derive(Debug, Clone)]
18pub struct VectorIndexQuery {
19 /// The query vector.
20 pub query: Vector,
21 /// Search configuration.
22 pub params: VectorIndexQueryParams,
23 /// Optional field name to filter search results.
24 /// If None, searches across all fields.
25 pub field_name: Option<String>,
26}
27
28impl VectorIndexQuery {
29 /// Create a new vector search request.
30 pub fn new(query: Vector) -> Self {
31 VectorIndexQuery {
32 query,
33 params: VectorIndexQueryParams::default(),
34 field_name: None,
35 }
36 }
37
38 /// Set the number of results to return.
39 pub fn top_k(mut self, top_k: usize) -> Self {
40 self.params.top_k = top_k;
41 self
42 }
43
44 /// Set minimum similarity threshold.
45 pub fn min_similarity(mut self, threshold: f32) -> Self {
46 self.params.min_similarity = threshold;
47 self
48 }
49
50 /// Set whether to include scores in results.
51 pub fn include_scores(mut self, include: bool) -> Self {
52 self.params.include_scores = include;
53 self
54 }
55
56 /// Set whether to include vectors in results.
57 pub fn include_vectors(mut self, include: bool) -> Self {
58 self.params.include_vectors = include;
59 self
60 }
61
62 /// Set search timeout in milliseconds.
63 pub fn timeout_ms(mut self, timeout: u64) -> Self {
64 self.params.timeout_ms = Some(timeout);
65 self
66 }
67
68 /// Set field name to filter search results.
69 pub fn field_name(mut self, field_name: String) -> Self {
70 self.field_name = Some(field_name);
71 self
72 }
73}
74
75/// Configuration for low-level vector index query operations.
76///
77/// Used with [`VectorIndexQuery`] to configure nearest-neighbor search
78/// parameters at the index level.
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct VectorIndexQueryParams {
81 /// Number of results to return.
82 pub top_k: usize,
83 /// Minimum similarity threshold.
84 pub min_similarity: f32,
85 /// Whether to return similarity scores.
86 pub include_scores: bool,
87 /// Whether to include vector data in results.
88 pub include_vectors: bool,
89 /// Search timeout in milliseconds.
90 pub timeout_ms: Option<u64>,
91 /// Reranking configuration.
92 pub reranking: Option<crate::vector::search::scoring::ranking::RankingConfig>,
93}
94
95impl Default for VectorIndexQueryParams {
96 fn default() -> Self {
97 Self {
98 top_k: 10,
99 min_similarity: 0.0,
100 include_scores: true,
101 include_vectors: false,
102 timeout_ms: None,
103 reranking: None,
104 }
105 }
106}
107
108/// A single result from a low-level vector index query.
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct VectorIndexQueryResult {
111 /// Document ID.
112 pub doc_id: u64,
113 /// Field name of the matched vector.
114 pub field_name: String,
115 /// Similarity score (higher is more similar).
116 pub similarity: f32,
117 /// Distance score (lower is more similar).
118 pub distance: f32,
119 /// Optional vector data.
120 pub vector: Option<Vector>,
121}
122
123/// Collection of results from a low-level vector index query.
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct VectorIndexQueryResults {
126 /// Individual search results.
127 pub results: Vec<VectorIndexQueryResult>,
128 /// Total number of candidates examined.
129 pub candidates_examined: usize,
130 /// Search execution time in milliseconds.
131 pub search_time_ms: f64,
132 /// Query metadata.
133 pub query_metadata: std::collections::HashMap<String, String>,
134}
135
136impl VectorIndexQueryResults {
137 /// Create new empty search results.
138 pub fn new() -> Self {
139 Self {
140 results: Vec::new(),
141 candidates_examined: 0,
142 search_time_ms: 0.0,
143 query_metadata: std::collections::HashMap::new(),
144 }
145 }
146
147 /// Check if results are empty.
148 pub fn is_empty(&self) -> bool {
149 self.results.is_empty()
150 }
151
152 /// Get the number of results.
153 pub fn len(&self) -> usize {
154 self.results.len()
155 }
156
157 /// Sort results by similarity (descending).
158 pub fn sort_by_similarity(&mut self) {
159 self.results.sort_by(|a, b| {
160 b.similarity
161 .partial_cmp(&a.similarity)
162 .unwrap_or(std::cmp::Ordering::Equal)
163 });
164 }
165
166 /// Sort results by distance (ascending).
167 pub fn sort_by_distance(&mut self) {
168 self.results.sort_by(|a, b| {
169 a.distance
170 .partial_cmp(&b.distance)
171 .unwrap_or(std::cmp::Ordering::Equal)
172 });
173 }
174
175 /// Take the top k results.
176 pub fn take_top_k(&mut self, k: usize) {
177 if self.results.len() > k {
178 self.results.truncate(k);
179 }
180 }
181
182 /// Filter results by minimum similarity.
183 pub fn filter_by_similarity(&mut self, min_similarity: f32) {
184 self.results
185 .retain(|result| result.similarity >= min_similarity);
186 }
187
188 /// Get the best (highest similarity) result.
189 pub fn best_result(&self) -> Option<&VectorIndexQueryResult> {
190 self.results.iter().max_by(|a, b| {
191 a.similarity
192 .partial_cmp(&b.similarity)
193 .unwrap_or(std::cmp::Ordering::Equal)
194 })
195 }
196}
197
198impl Default for VectorIndexQueryResults {
199 fn default() -> Self {
200 Self::new()
201 }
202}
203
204/// Trait for vector searchers.
205pub trait VectorIndexSearcher: Send + Sync + std::fmt::Debug {
206 /// Execute a vector similarity search.
207 fn search(&self, request: &VectorIndexQuery) -> Result<VectorIndexQueryResults>;
208
209 /// Count the number of vectors matching the query.
210 fn count(&self, request: VectorIndexQuery) -> Result<u64>;
211
212 /// Warm up the searcher (pre-load data, etc.).
213 fn warmup(&mut self) -> Result<()> {
214 // No-op by default. Implementations can override this method to perform
215 // any necessary warm-up steps, such as loading index data into memory.
216 Ok(())
217 }
218}
219
220// ── High-level search request types ──────────────────────────────────────────
221
222/// How a vector search query is specified.
223///
224/// Mirrors [`LexicalSearchQuery`](crate::lexical::search::searcher::LexicalSearchQuery)
225/// for symmetry:
226///
227/// | | Lexical | Vector |
228/// |---|---|---|
229/// | Deferred resolution | [`Dsl(String)`](crate::lexical::search::searcher::LexicalSearchQuery::Dsl) | [`Payloads`](Self::Payloads) |
230/// | Pre-built | [`Obj(Box<dyn Query>)`](crate::lexical::search::searcher::LexicalSearchQuery::Obj) | [`Vectors`](Self::Vectors) |
231#[derive(Debug, Clone)]
232pub enum VectorSearchQuery {
233 /// Raw payloads (text, bytes, etc.) to be embedded into vectors at
234 /// search time by the engine's configured embedder.
235 Payloads(Vec<crate::vector::store::request::QueryPayload>),
236
237 /// Pre-embedded query vectors, ready for nearest-neighbor search.
238 Vectors(Vec<crate::vector::store::request::QueryVector>),
239}
240
241fn default_query_limit() -> usize {
242 10
243}
244
245fn default_overfetch() -> f32 {
246 1.0
247}
248
249/// Parameters for vector search operations.
250///
251/// Analogous to
252/// [`LexicalSearchParams`](crate::lexical::search::searcher::LexicalSearchParams),
253/// this struct groups all configuration knobs for a vector search independently
254/// of the query specification.
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct VectorSearchParams {
257 /// Fields to search in.
258 #[serde(default)]
259 pub fields: Option<Vec<crate::vector::store::request::FieldSelector>>,
260 /// Maximum number of results to return.
261 #[serde(default = "default_query_limit")]
262 pub limit: usize,
263 /// How to combine scores from multiple query vectors.
264 #[serde(default)]
265 pub score_mode: crate::vector::store::request::VectorScoreMode,
266 /// Overfetch factor for better result quality.
267 #[serde(default = "default_overfetch")]
268 pub overfetch: f32,
269 /// Minimum score threshold. Results below this score are filtered out.
270 #[serde(default)]
271 pub min_score: f32,
272 /// List of allowed document IDs (for internal use by Engine filtering).
273 #[serde(skip)]
274 pub allowed_ids: Option<Vec<u64>>,
275}
276
277impl Default for VectorSearchParams {
278 fn default() -> Self {
279 Self {
280 fields: None,
281 limit: default_query_limit(),
282 score_mode: crate::vector::store::request::VectorScoreMode::default(),
283 overfetch: default_overfetch(),
284 min_score: 0.0,
285 allowed_ids: None,
286 }
287 }
288}
289
290/// Request model for collection-level vector search.
291///
292/// Mirrors
293/// [`LexicalSearchRequest`](crate::lexical::search::searcher::LexicalSearchRequest)
294/// structure: a query enum paired with a params struct.
295#[derive(Debug, Clone)]
296pub struct VectorSearchRequest {
297 /// The query to execute.
298 pub query: VectorSearchQuery,
299 /// Search configuration.
300 pub params: VectorSearchParams,
301}
302
303impl Default for VectorSearchRequest {
304 fn default() -> Self {
305 Self {
306 query: VectorSearchQuery::Vectors(Vec::new()),
307 params: VectorSearchParams::default(),
308 }
309 }
310}
311
312// ── High-level searcher trait ────────────────────────────────────────────────
313
314/// Trait for high-level vector search implementations.
315///
316/// This trait defines the interface for executing searches against vector indexes,
317/// analogous to [`crate::lexical::search::searcher::LexicalSearcher`] for lexical search.
318///
319/// Unlike [`VectorIndexSearcher`] which operates at the low-level (single vector queries),
320/// `VectorSearcher` handles high-level search requests with multiple query vectors,
321/// field selection, filters, and score aggregation.
322pub trait VectorSearcher: Send + Sync + std::fmt::Debug {
323 /// Execute a search with the given request.
324 ///
325 /// This method processes a high-level search request that may contain
326 /// multiple query vectors across different fields, applies filters,
327 /// and aggregates scores according to the specified score mode.
328 fn search(
329 &self,
330 request: &VectorSearchRequest,
331 ) -> crate::error::Result<crate::vector::store::response::VectorSearchResults>;
332
333 /// Count the number of matching documents for a request.
334 ///
335 /// Returns the number of documents that match the given search request,
336 /// applying the min_score threshold if specified in the request.
337 fn count(&self, request: &VectorSearchRequest) -> crate::error::Result<u64>;
338}