Skip to main content

laurus/vector/search/
searcher.rs

1//! Vector searcher trait and query/response types.
2
3use serde::{Deserialize, Serialize};
4
5use crate::error::Result;
6use crate::vector::core::vector::Vector;
7
8/// Low-level query for a single-vector search against a vector index.
9///
10/// This type represents a single nearest-neighbor query at the index level,
11/// in contrast to the high-level [`VectorSearchRequest`] which can contain
12/// multiple query vectors and aggregation settings.
13///
14/// Naming convention: low-level index operations use "Query" (e.g.,
15/// `VectorIndexQuery`, `VectorIndexQueryParams`), while high-level
16/// store/engine operations use "Request" (e.g., `VectorSearchRequest`).
17#[derive(Debug, Clone)]
18pub struct VectorIndexQuery {
19    /// The query vector.
20    pub query: Vector,
21    /// Search configuration.
22    pub params: VectorIndexQueryParams,
23    /// Optional field name to filter search results.
24    /// If None, searches across all fields.
25    pub field_name: Option<String>,
26}
27
28impl VectorIndexQuery {
29    /// Create a new vector search request.
30    pub fn new(query: Vector) -> Self {
31        VectorIndexQuery {
32            query,
33            params: VectorIndexQueryParams::default(),
34            field_name: None,
35        }
36    }
37
38    /// Set the number of results to return.
39    pub fn top_k(mut self, top_k: usize) -> Self {
40        self.params.top_k = top_k;
41        self
42    }
43
44    /// Set minimum similarity threshold.
45    pub fn min_similarity(mut self, threshold: f32) -> Self {
46        self.params.min_similarity = threshold;
47        self
48    }
49
50    /// Set whether to include scores in results.
51    pub fn include_scores(mut self, include: bool) -> Self {
52        self.params.include_scores = include;
53        self
54    }
55
56    /// Set whether to include vectors in results.
57    pub fn include_vectors(mut self, include: bool) -> Self {
58        self.params.include_vectors = include;
59        self
60    }
61
62    /// Set search timeout in milliseconds.
63    pub fn timeout_ms(mut self, timeout: u64) -> Self {
64        self.params.timeout_ms = Some(timeout);
65        self
66    }
67
68    /// Set field name to filter search results.
69    pub fn field_name(mut self, field_name: String) -> Self {
70        self.field_name = Some(field_name);
71        self
72    }
73}
74
75/// Configuration for low-level vector index query operations.
76///
77/// Used with [`VectorIndexQuery`] to configure nearest-neighbor search
78/// parameters at the index level.
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct VectorIndexQueryParams {
81    /// Number of results to return.
82    pub top_k: usize,
83    /// Minimum similarity threshold.
84    pub min_similarity: f32,
85    /// Whether to return similarity scores.
86    pub include_scores: bool,
87    /// Whether to include vector data in results.
88    pub include_vectors: bool,
89    /// Search timeout in milliseconds.
90    pub timeout_ms: Option<u64>,
91    /// Reranking configuration.
92    pub reranking: Option<crate::vector::search::scoring::ranking::RankingConfig>,
93}
94
95impl Default for VectorIndexQueryParams {
96    fn default() -> Self {
97        Self {
98            top_k: 10,
99            min_similarity: 0.0,
100            include_scores: true,
101            include_vectors: false,
102            timeout_ms: None,
103            reranking: None,
104        }
105    }
106}
107
108/// A single result from a low-level vector index query.
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct VectorIndexQueryResult {
111    /// Document ID.
112    pub doc_id: u64,
113    /// Field name of the matched vector.
114    pub field_name: String,
115    /// Similarity score (higher is more similar).
116    pub similarity: f32,
117    /// Distance score (lower is more similar).
118    pub distance: f32,
119    /// Optional vector data.
120    pub vector: Option<Vector>,
121}
122
123/// Collection of results from a low-level vector index query.
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct VectorIndexQueryResults {
126    /// Individual search results.
127    pub results: Vec<VectorIndexQueryResult>,
128    /// Total number of candidates examined.
129    pub candidates_examined: usize,
130    /// Search execution time in milliseconds.
131    pub search_time_ms: f64,
132    /// Query metadata.
133    pub query_metadata: std::collections::HashMap<String, String>,
134}
135
136impl VectorIndexQueryResults {
137    /// Create new empty search results.
138    pub fn new() -> Self {
139        Self {
140            results: Vec::new(),
141            candidates_examined: 0,
142            search_time_ms: 0.0,
143            query_metadata: std::collections::HashMap::new(),
144        }
145    }
146
147    /// Check if results are empty.
148    pub fn is_empty(&self) -> bool {
149        self.results.is_empty()
150    }
151
152    /// Get the number of results.
153    pub fn len(&self) -> usize {
154        self.results.len()
155    }
156
157    /// Sort results by similarity (descending).
158    pub fn sort_by_similarity(&mut self) {
159        self.results.sort_by(|a, b| {
160            b.similarity
161                .partial_cmp(&a.similarity)
162                .unwrap_or(std::cmp::Ordering::Equal)
163        });
164    }
165
166    /// Sort results by distance (ascending).
167    pub fn sort_by_distance(&mut self) {
168        self.results.sort_by(|a, b| {
169            a.distance
170                .partial_cmp(&b.distance)
171                .unwrap_or(std::cmp::Ordering::Equal)
172        });
173    }
174
175    /// Take the top k results.
176    pub fn take_top_k(&mut self, k: usize) {
177        if self.results.len() > k {
178            self.results.truncate(k);
179        }
180    }
181
182    /// Filter results by minimum similarity.
183    pub fn filter_by_similarity(&mut self, min_similarity: f32) {
184        self.results
185            .retain(|result| result.similarity >= min_similarity);
186    }
187
188    /// Get the best (highest similarity) result.
189    pub fn best_result(&self) -> Option<&VectorIndexQueryResult> {
190        self.results.iter().max_by(|a, b| {
191            a.similarity
192                .partial_cmp(&b.similarity)
193                .unwrap_or(std::cmp::Ordering::Equal)
194        })
195    }
196}
197
198impl Default for VectorIndexQueryResults {
199    fn default() -> Self {
200        Self::new()
201    }
202}
203
204/// Trait for vector searchers.
205pub trait VectorIndexSearcher: Send + Sync + std::fmt::Debug {
206    /// Execute a vector similarity search.
207    fn search(&self, request: &VectorIndexQuery) -> Result<VectorIndexQueryResults>;
208
209    /// Count the number of vectors matching the query.
210    fn count(&self, request: VectorIndexQuery) -> Result<u64>;
211
212    /// Warm up the searcher (pre-load data, etc.).
213    fn warmup(&mut self) -> Result<()> {
214        // No-op by default. Implementations can override this method to perform
215        // any necessary warm-up steps, such as loading index data into memory.
216        Ok(())
217    }
218}
219
220// ── High-level search request types ──────────────────────────────────────────
221
222/// How a vector search query is specified.
223///
224/// Mirrors [`LexicalSearchQuery`](crate::lexical::search::searcher::LexicalSearchQuery)
225/// for symmetry:
226///
227/// | | Lexical | Vector |
228/// |---|---|---|
229/// | Deferred resolution | [`Dsl(String)`](crate::lexical::search::searcher::LexicalSearchQuery::Dsl) | [`Payloads`](Self::Payloads) |
230/// | Pre-built | [`Obj(Box<dyn Query>)`](crate::lexical::search::searcher::LexicalSearchQuery::Obj) | [`Vectors`](Self::Vectors) |
231#[derive(Debug, Clone)]
232pub enum VectorSearchQuery {
233    /// Raw payloads (text, bytes, etc.) to be embedded into vectors at
234    /// search time by the engine's configured embedder.
235    Payloads(Vec<crate::vector::store::request::QueryPayload>),
236
237    /// Pre-embedded query vectors, ready for nearest-neighbor search.
238    Vectors(Vec<crate::vector::store::request::QueryVector>),
239}
240
241fn default_query_limit() -> usize {
242    10
243}
244
245fn default_overfetch() -> f32 {
246    1.0
247}
248
249/// Parameters for vector search operations.
250///
251/// Analogous to
252/// [`LexicalSearchParams`](crate::lexical::search::searcher::LexicalSearchParams),
253/// this struct groups all configuration knobs for a vector search independently
254/// of the query specification.
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct VectorSearchParams {
257    /// Fields to search in.
258    #[serde(default)]
259    pub fields: Option<Vec<crate::vector::store::request::FieldSelector>>,
260    /// Maximum number of results to return.
261    #[serde(default = "default_query_limit")]
262    pub limit: usize,
263    /// How to combine scores from multiple query vectors.
264    #[serde(default)]
265    pub score_mode: crate::vector::store::request::VectorScoreMode,
266    /// Overfetch factor for better result quality.
267    #[serde(default = "default_overfetch")]
268    pub overfetch: f32,
269    /// Minimum score threshold. Results below this score are filtered out.
270    #[serde(default)]
271    pub min_score: f32,
272    /// List of allowed document IDs (for internal use by Engine filtering).
273    #[serde(skip)]
274    pub allowed_ids: Option<Vec<u64>>,
275}
276
277impl Default for VectorSearchParams {
278    fn default() -> Self {
279        Self {
280            fields: None,
281            limit: default_query_limit(),
282            score_mode: crate::vector::store::request::VectorScoreMode::default(),
283            overfetch: default_overfetch(),
284            min_score: 0.0,
285            allowed_ids: None,
286        }
287    }
288}
289
290/// Request model for collection-level vector search.
291///
292/// Mirrors
293/// [`LexicalSearchRequest`](crate::lexical::search::searcher::LexicalSearchRequest)
294/// structure: a query enum paired with a params struct.
295#[derive(Debug, Clone)]
296pub struct VectorSearchRequest {
297    /// The query to execute.
298    pub query: VectorSearchQuery,
299    /// Search configuration.
300    pub params: VectorSearchParams,
301}
302
303impl Default for VectorSearchRequest {
304    fn default() -> Self {
305        Self {
306            query: VectorSearchQuery::Vectors(Vec::new()),
307            params: VectorSearchParams::default(),
308        }
309    }
310}
311
312// ── High-level searcher trait ────────────────────────────────────────────────
313
314/// Trait for high-level vector search implementations.
315///
316/// This trait defines the interface for executing searches against vector indexes,
317/// analogous to [`crate::lexical::search::searcher::LexicalSearcher`] for lexical search.
318///
319/// Unlike [`VectorIndexSearcher`] which operates at the low-level (single vector queries),
320/// `VectorSearcher` handles high-level search requests with multiple query vectors,
321/// field selection, filters, and score aggregation.
322pub trait VectorSearcher: Send + Sync + std::fmt::Debug {
323    /// Execute a search with the given request.
324    ///
325    /// This method processes a high-level search request that may contain
326    /// multiple query vectors across different fields, applies filters,
327    /// and aggregates scores according to the specified score mode.
328    fn search(
329        &self,
330        request: &VectorSearchRequest,
331    ) -> crate::error::Result<crate::vector::store::response::VectorSearchResults>;
332
333    /// Count the number of matching documents for a request.
334    ///
335    /// Returns the number of documents that match the given search request,
336    /// applying the min_score threshold if specified in the request.
337    fn count(&self, request: &VectorSearchRequest) -> crate::error::Result<u64>;
338}