Skip to main content

laurus/engine/
search.rs

1use std::collections::HashMap;
2
3use crate::lexical::query::Query;
4use crate::lexical::search::searcher::{LexicalSearchQuery, SortField};
5// Re-export VectorSearchQuery so engine.rs and query.rs can refer to it
6// via `self::search::VectorSearchQuery` without reaching into vector internals.
7use crate::vector::VectorScoreMode;
8pub use crate::vector::search::searcher::VectorSearchQuery;
9
10// ── Query types (what to search for) ─────────────────────────────────────────
11
12/// Unified search query specification.
13///
14/// Determines **what** to search for. Search parameters (limits, score
15/// thresholds, fusion, etc.) are separate fields on [`SearchRequest`].
16///
17/// Four variants cover all search modes:
18///
19/// - [`Dsl`](Self::Dsl) — unified query DSL string, parsed at search time.
20/// - [`Lexical`](Self::Lexical) — lexical (BM25) search only.
21/// - [`Vector`](Self::Vector) — vector (nearest-neighbor) search only.
22/// - [`Hybrid`](Self::Hybrid) — both lexical and vector search with fusion.
23#[derive(Debug)]
24#[allow(clippy::large_enum_variant)]
25pub enum SearchQuery {
26    /// Unified query DSL string — parsed at search time by
27    /// [`UnifiedQueryParser`](super::query::UnifiedQueryParser).
28    ///
29    /// Supports lexical, vector, and hybrid queries in a single string:
30    ///
31    /// - **Lexical**: `title:hello`, `"exact phrase"`, `AND`/`OR`, `term~2`,
32    ///   `[a TO z]`, etc.
33    /// - **Vector**: `field:"text"`, `field:text^0.8` (with boost).
34    /// - **Hybrid**: mix both — `title:hello content:"cute kitten"^0.8`.
35    Dsl(String),
36
37    /// Pre-built lexical (BM25) search query.
38    Lexical(LexicalSearchQuery),
39
40    /// Pre-built vector (nearest-neighbor) search query.
41    Vector(VectorSearchQuery),
42
43    /// Hybrid search combining lexical and vector components.
44    ///
45    /// Results are merged using the [`fusion_algorithm`](SearchRequest::fusion_algorithm)
46    /// specified on the [`SearchRequest`]. The [`mode`](HybridMode) controls
47    /// whether results are unioned (OR) or intersected (AND).
48    Hybrid {
49        /// Lexical search component.
50        lexical: LexicalSearchQuery,
51        /// Vector search component.
52        vector: VectorSearchQuery,
53        /// Controls how lexical and vector results are combined.
54        /// Defaults to [`HybridMode::Union`].
55        mode: HybridMode,
56    },
57}
58
59/// Controls how lexical and vector results are combined in hybrid search.
60///
61/// - [`Union`](Self::Union) — documents from **either** lexical or vector
62///   results are included (OR semantics). This is the default.
63/// - [`Intersection`](Self::Intersection) — only documents appearing in
64///   **both** result sets are included (AND semantics). Triggered by
65///   the `+` prefix on vector field clauses in the query DSL, e.g.
66///   `title:hello +embedding:"cute kitten"`.
67#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
68pub enum HybridMode {
69    /// Documents from either source are included (default).
70    #[default]
71    Union,
72    /// Only documents appearing in BOTH result sets are included.
73    Intersection,
74}
75
76// ── Option types (how to search) ─────────────────────────────────────────────
77
78/// Parameters controlling lexical search behavior.
79///
80/// These are separated from the query itself so that the same options can
81/// be applied regardless of how the query was specified (DSL string or
82/// pre-built query object).
83#[derive(Debug, Clone)]
84pub struct LexicalSearchOptions {
85    /// Per-field boost factors for relevance scoring.
86    ///
87    /// Example: `{"title": 2.0, "body": 1.0}` gives title matches twice
88    /// the weight of body matches.
89    pub field_boosts: HashMap<String, f32>,
90
91    /// Minimum score threshold. Results below this score are discarded.
92    /// Defaults to `0.0` (no threshold).
93    pub min_score: f32,
94
95    /// Timeout for the search operation in milliseconds.
96    /// `None` means no timeout.
97    pub timeout_ms: Option<u64>,
98
99    /// Enable parallel search across index segments for better performance
100    /// on multi-core systems. Defaults to `false`.
101    pub parallel: bool,
102
103    /// Sort results by field value or by relevance score.
104    /// Defaults to [`SortField::Score`].
105    pub sort_by: SortField,
106}
107
108impl Default for LexicalSearchOptions {
109    fn default() -> Self {
110        Self {
111            field_boosts: HashMap::new(),
112            min_score: 0.0,
113            timeout_ms: None,
114            parallel: false,
115            sort_by: SortField::Score,
116        }
117    }
118}
119
120/// Parameters controlling vector search behavior.
121///
122/// These are separated from the query itself so that the same options can
123/// be applied regardless of how the query was specified (payloads or
124/// pre-embedded vectors).
125#[derive(Debug, Clone)]
126pub struct VectorSearchOptions {
127    /// How to combine scores from multiple query vectors.
128    /// Defaults to [`VectorScoreMode::WeightedSum`].
129    pub score_mode: VectorScoreMode,
130
131    /// Minimum score threshold. Results below this score are discarded.
132    /// Defaults to `0.0` (no threshold).
133    pub min_score: f32,
134}
135
136impl Default for VectorSearchOptions {
137    fn default() -> Self {
138        Self {
139            score_mode: VectorScoreMode::WeightedSum,
140            min_score: 0.0,
141        }
142    }
143}
144
145// ── SearchRequest ────────────────────────────────────────────────────────────
146
147/// Unified search request combining query specification with pagination,
148/// options, and fusion settings.
149///
150/// The query specifies **what** to search for ([`SearchQuery`]), while
151/// [`lexical_options`](Self::lexical_options) and
152/// [`vector_options`](Self::vector_options) control **how** to search.
153///
154/// Use [`SearchRequestBuilder`] for a fluent construction API.
155pub struct SearchRequest {
156    /// The search query specification.
157    pub query: SearchQuery,
158
159    /// Maximum number of results to return. Defaults to `10`.
160    pub limit: usize,
161
162    /// Number of results to skip before returning (for pagination).
163    /// Defaults to `0`.
164    pub offset: usize,
165
166    /// Fusion algorithm for combining lexical and vector scores.
167    ///
168    /// Only used when both lexical and vector search components are
169    /// present (i.e., [`SearchQuery::Hybrid`] or a [`SearchQuery::Dsl`]
170    /// that contains both clause types). Defaults to
171    /// [`FusionAlgorithm::RRF { k: 60.0 }`](FusionAlgorithm::RRF) when
172    /// `None`.
173    pub fusion_algorithm: Option<FusionAlgorithm>,
174
175    /// Optional filter query (lexical) to restrict the search space.
176    ///
177    /// When set, the filter is evaluated first and **both** lexical and
178    /// vector searches are restricted to documents matching this filter.
179    pub filter_query: Option<Box<dyn Query>>,
180
181    /// Parameters controlling lexical search behavior.
182    pub lexical_options: LexicalSearchOptions,
183
184    /// Parameters controlling vector search behavior.
185    pub vector_options: VectorSearchOptions,
186}
187
188/// Algorithm used to combine lexical and vector scores in hybrid search.
189///
190/// The default fusion algorithm (when none is specified in a
191/// [`SearchRequest`]) is [`RRF`](Self::RRF) with `k = 60.0`.
192#[derive(Debug, Clone, Copy)]
193pub enum FusionAlgorithm {
194    /// Reciprocal Rank Fusion (RRF).
195    ///
196    /// Combines results based on rank position rather than raw scores,
197    /// making it effective when score magnitudes are not comparable
198    /// (e.g. BM25 vs cosine similarity). The score for each document is
199    /// `sum(1 / (k + rank))` across the result lists.
200    RRF {
201        /// Smoothing constant `k`. Higher values reduce the influence of
202        /// top-ranked documents. Typical default is `60.0`.
203        k: f64,
204    },
205
206    /// Weighted Sum with automatic min-max score normalization.
207    ///
208    /// Before weighting, the engine independently normalizes lexical and
209    /// vector scores to the `[0.0, 1.0]` range using min-max normalization
210    /// over their respective result sets.
211    WeightedSum {
212        /// Weight for the normalized lexical score (clamped to `0.0..=1.0`).
213        lexical_weight: f32,
214        /// Weight for the normalized vector score (clamped to `0.0..=1.0`).
215        vector_weight: f32,
216    },
217}
218
219impl Default for SearchRequest {
220    fn default() -> Self {
221        Self {
222            query: SearchQuery::Dsl(String::new()),
223            limit: 10,
224            offset: 0,
225            fusion_algorithm: None,
226            filter_query: None,
227            lexical_options: LexicalSearchOptions::default(),
228            vector_options: VectorSearchOptions::default(),
229        }
230    }
231}
232
233// ── SearchRequestBuilder ─────────────────────────────────────────────────────
234
235/// Fluent builder for constructing a [`SearchRequest`].
236///
237/// Supports three construction patterns:
238///
239/// 1. **DSL string** (via [`query_dsl`](Self::query_dsl)): Pass a unified
240///    query DSL string. The engine parses it at search time.
241/// 2. **Single mode** (via [`lexical_query`](Self::lexical_query) or
242///    [`vector_query`](Self::vector_query)): Set one search mode.
243/// 3. **Hybrid** (via both [`lexical_query`](Self::lexical_query) and
244///    [`vector_query`](Self::vector_query)): Set both for hybrid search.
245///
246/// If [`query_dsl`](Self::query_dsl) is called, the builder produces a
247/// [`SearchQuery::Dsl`] variant. Otherwise, it determines the variant from
248/// which query methods were called.
249pub struct SearchRequestBuilder {
250    dsl: Option<String>,
251    lexical_query: Option<LexicalSearchQuery>,
252    vector_query: Option<VectorSearchQuery>,
253    limit: usize,
254    offset: usize,
255    fusion_algorithm: Option<FusionAlgorithm>,
256    filter_query: Option<Box<dyn Query>>,
257    lexical_options: LexicalSearchOptions,
258    vector_options: VectorSearchOptions,
259}
260
261impl Default for SearchRequestBuilder {
262    fn default() -> Self {
263        Self::new()
264    }
265}
266
267impl SearchRequestBuilder {
268    /// Create a new builder with default settings.
269    pub fn new() -> Self {
270        Self {
271            dsl: None,
272            lexical_query: None,
273            vector_query: None,
274            limit: 10,
275            offset: 0,
276            fusion_algorithm: None,
277            filter_query: None,
278            lexical_options: LexicalSearchOptions::default(),
279            vector_options: VectorSearchOptions::default(),
280        }
281    }
282
283    // ── Query setters ────────────────────────────────────────────────────
284
285    /// Set a unified query DSL string.
286    ///
287    /// When set, the built request uses [`SearchQuery::Dsl`] and any
288    /// lexical/vector queries set via other methods are ignored.
289    pub fn query_dsl(mut self, dsl: impl Into<String>) -> Self {
290        self.dsl = Some(dsl.into());
291        self
292    }
293
294    /// Set the lexical search query.
295    ///
296    /// If [`vector_query`](Self::vector_query) is also set, the result is
297    /// [`SearchQuery::Hybrid`]. Otherwise [`SearchQuery::Lexical`].
298    pub fn lexical_query(mut self, query: LexicalSearchQuery) -> Self {
299        self.lexical_query = Some(query);
300        self
301    }
302
303    /// Set the vector search query.
304    ///
305    /// If [`lexical_query`](Self::lexical_query) is also set, the result is
306    /// [`SearchQuery::Hybrid`]. Otherwise [`SearchQuery::Vector`].
307    pub fn vector_query(mut self, query: VectorSearchQuery) -> Self {
308        self.vector_query = Some(query);
309        self
310    }
311
312    // ── Pagination & fusion ──────────────────────────────────────────────
313
314    /// Set the maximum number of results to return.
315    pub fn limit(mut self, limit: usize) -> Self {
316        self.limit = limit;
317        self
318    }
319
320    /// Set the number of results to skip (for pagination).
321    pub fn offset(mut self, offset: usize) -> Self {
322        self.offset = offset;
323        self
324    }
325
326    /// Set the fusion algorithm for hybrid search.
327    ///
328    /// For [`FusionAlgorithm::WeightedSum`], the weights are clamped to
329    /// `0.0..=1.0` to prevent NaN/Inf propagation.
330    pub fn fusion_algorithm(mut self, fusion: FusionAlgorithm) -> Self {
331        let fusion = match fusion {
332            FusionAlgorithm::WeightedSum {
333                lexical_weight,
334                vector_weight,
335            } => FusionAlgorithm::WeightedSum {
336                lexical_weight: lexical_weight.clamp(0.0, 1.0),
337                vector_weight: vector_weight.clamp(0.0, 1.0),
338            },
339            other => other,
340        };
341        self.fusion_algorithm = Some(fusion);
342        self
343    }
344
345    /// Set a filter query to restrict the search space.
346    ///
347    /// The filter applies to **both** lexical and vector searches.
348    pub fn filter_query(mut self, query: Box<dyn Query>) -> Self {
349        self.filter_query = Some(query);
350        self
351    }
352
353    // ── Lexical options ──────────────────────────────────────────────────
354
355    /// Add a field-level boost for lexical search.
356    pub fn add_field_boost(mut self, field: impl Into<String>, boost: f32) -> Self {
357        self.lexical_options
358            .field_boosts
359            .insert(field.into(), boost);
360        self
361    }
362
363    /// Set the minimum score threshold for lexical search.
364    pub fn lexical_min_score(mut self, min_score: f32) -> Self {
365        self.lexical_options.min_score = min_score;
366        self
367    }
368
369    /// Set the timeout for lexical search in milliseconds.
370    pub fn lexical_timeout_ms(mut self, timeout_ms: u64) -> Self {
371        self.lexical_options.timeout_ms = Some(timeout_ms);
372        self
373    }
374
375    /// Enable or disable parallel lexical search.
376    pub fn lexical_parallel(mut self, parallel: bool) -> Self {
377        self.lexical_options.parallel = parallel;
378        self
379    }
380
381    /// Set the sort order for lexical search results.
382    pub fn sort_by(mut self, sort_by: SortField) -> Self {
383        self.lexical_options.sort_by = sort_by;
384        self
385    }
386
387    // ── Vector options ───────────────────────────────────────────────────
388
389    /// Set the score combination mode for vector search.
390    pub fn vector_score_mode(mut self, score_mode: VectorScoreMode) -> Self {
391        self.vector_options.score_mode = score_mode;
392        self
393    }
394
395    /// Set the minimum score threshold for vector search.
396    pub fn vector_min_score(mut self, min_score: f32) -> Self {
397        self.vector_options.min_score = min_score;
398        self
399    }
400
401    // ── Build ────────────────────────────────────────────────────────────
402
403    /// Consume the builder and return the constructed [`SearchRequest`].
404    pub fn build(self) -> SearchRequest {
405        let query = if let Some(dsl) = self.dsl {
406            SearchQuery::Dsl(dsl)
407        } else {
408            match (self.lexical_query, self.vector_query) {
409                (Some(lexical), Some(vector)) => SearchQuery::Hybrid {
410                    lexical,
411                    vector,
412                    mode: HybridMode::default(),
413                },
414                (Some(lexical), None) => SearchQuery::Lexical(lexical),
415                (None, Some(vector)) => SearchQuery::Vector(vector),
416                (None, None) => SearchQuery::Dsl(String::new()),
417            }
418        };
419
420        SearchRequest {
421            query,
422            limit: self.limit,
423            offset: self.offset,
424            fusion_algorithm: self.fusion_algorithm,
425            filter_query: self.filter_query,
426            lexical_options: self.lexical_options,
427            vector_options: self.vector_options,
428        }
429    }
430}
431
432// ── SearchResult ─────────────────────────────────────────────────────────────
433
434/// A single result from an [`Engine`](super::Engine) search.
435#[derive(Debug, Clone)]
436pub struct SearchResult {
437    /// External document ID (the `_id` field value).
438    pub id: String,
439    /// Relevance score. The meaning depends on the search mode:
440    /// - Lexical only: BM25 score.
441    /// - Vector only: similarity score (e.g. cosine similarity).
442    /// - Hybrid: fused score produced by the [`FusionAlgorithm`].
443    pub score: f32,
444    /// The stored fields of the document, or `None` if the document could
445    /// not be retrieved (e.g. it was deleted between scoring and retrieval).
446    pub document: Option<crate::data::Document>,
447}