laurus/engine/search.rs
1use std::collections::HashMap;
2
3use crate::lexical::query::Query;
4use crate::lexical::search::searcher::{LexicalSearchQuery, SortField};
5// Re-export VectorSearchQuery so engine.rs and query.rs can refer to it
6// via `self::search::VectorSearchQuery` without reaching into vector internals.
7use crate::vector::VectorScoreMode;
8pub use crate::vector::search::searcher::VectorSearchQuery;
9
10// ── Query types (what to search for) ─────────────────────────────────────────
11
12/// Unified search query specification.
13///
14/// Determines **what** to search for. Search parameters (limits, score
15/// thresholds, fusion, etc.) are separate fields on [`SearchRequest`].
16///
17/// Four variants cover all search modes:
18///
19/// - [`Dsl`](Self::Dsl) — unified query DSL string, parsed at search time.
20/// - [`Lexical`](Self::Lexical) — lexical (BM25) search only.
21/// - [`Vector`](Self::Vector) — vector (nearest-neighbor) search only.
22/// - [`Hybrid`](Self::Hybrid) — both lexical and vector search with fusion.
23#[derive(Debug)]
24#[allow(clippy::large_enum_variant)]
25pub enum SearchQuery {
26 /// Unified query DSL string — parsed at search time by
27 /// [`UnifiedQueryParser`](super::query::UnifiedQueryParser).
28 ///
29 /// Supports lexical, vector, and hybrid queries in a single string:
30 ///
31 /// - **Lexical**: `title:hello`, `"exact phrase"`, `AND`/`OR`, `term~2`,
32 /// `[a TO z]`, etc.
33 /// - **Vector**: `field:"text"`, `field:text^0.8` (with boost).
34 /// - **Hybrid**: mix both — `title:hello content:"cute kitten"^0.8`.
35 Dsl(String),
36
37 /// Pre-built lexical (BM25) search query.
38 Lexical(LexicalSearchQuery),
39
40 /// Pre-built vector (nearest-neighbor) search query.
41 Vector(VectorSearchQuery),
42
43 /// Hybrid search combining lexical and vector components.
44 ///
45 /// Results are merged using the [`fusion_algorithm`](SearchRequest::fusion_algorithm)
46 /// specified on the [`SearchRequest`]. The [`mode`](HybridMode) controls
47 /// whether results are unioned (OR) or intersected (AND).
48 Hybrid {
49 /// Lexical search component.
50 lexical: LexicalSearchQuery,
51 /// Vector search component.
52 vector: VectorSearchQuery,
53 /// Controls how lexical and vector results are combined.
54 /// Defaults to [`HybridMode::Union`].
55 mode: HybridMode,
56 },
57}
58
59/// Controls how lexical and vector results are combined in hybrid search.
60///
61/// - [`Union`](Self::Union) — documents from **either** lexical or vector
62/// results are included (OR semantics). This is the default.
63/// - [`Intersection`](Self::Intersection) — only documents appearing in
64/// **both** result sets are included (AND semantics). Triggered by
65/// the `+` prefix on vector field clauses in the query DSL, e.g.
66/// `title:hello +embedding:"cute kitten"`.
67#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
68pub enum HybridMode {
69 /// Documents from either source are included (default).
70 #[default]
71 Union,
72 /// Only documents appearing in BOTH result sets are included.
73 Intersection,
74}
75
76// ── Option types (how to search) ─────────────────────────────────────────────
77
78/// Parameters controlling lexical search behavior.
79///
80/// These are separated from the query itself so that the same options can
81/// be applied regardless of how the query was specified (DSL string or
82/// pre-built query object).
83#[derive(Debug, Clone)]
84pub struct LexicalSearchOptions {
85 /// Per-field boost factors for relevance scoring.
86 ///
87 /// Example: `{"title": 2.0, "body": 1.0}` gives title matches twice
88 /// the weight of body matches.
89 pub field_boosts: HashMap<String, f32>,
90
91 /// Minimum score threshold. Results below this score are discarded.
92 /// Defaults to `0.0` (no threshold).
93 pub min_score: f32,
94
95 /// Timeout for the search operation in milliseconds.
96 /// `None` means no timeout.
97 pub timeout_ms: Option<u64>,
98
99 /// Enable parallel search across index segments for better performance
100 /// on multi-core systems. Defaults to `false`.
101 pub parallel: bool,
102
103 /// Sort results by field value or by relevance score.
104 /// Defaults to [`SortField::Score`].
105 pub sort_by: SortField,
106}
107
108impl Default for LexicalSearchOptions {
109 fn default() -> Self {
110 Self {
111 field_boosts: HashMap::new(),
112 min_score: 0.0,
113 timeout_ms: None,
114 parallel: false,
115 sort_by: SortField::Score,
116 }
117 }
118}
119
120/// Parameters controlling vector search behavior.
121///
122/// These are separated from the query itself so that the same options can
123/// be applied regardless of how the query was specified (payloads or
124/// pre-embedded vectors).
125#[derive(Debug, Clone)]
126pub struct VectorSearchOptions {
127 /// How to combine scores from multiple query vectors.
128 /// Defaults to [`VectorScoreMode::WeightedSum`].
129 pub score_mode: VectorScoreMode,
130
131 /// Minimum score threshold. Results below this score are discarded.
132 /// Defaults to `0.0` (no threshold).
133 pub min_score: f32,
134}
135
136impl Default for VectorSearchOptions {
137 fn default() -> Self {
138 Self {
139 score_mode: VectorScoreMode::WeightedSum,
140 min_score: 0.0,
141 }
142 }
143}
144
145// ── SearchRequest ────────────────────────────────────────────────────────────
146
147/// Unified search request combining query specification with pagination,
148/// options, and fusion settings.
149///
150/// The query specifies **what** to search for ([`SearchQuery`]), while
151/// [`lexical_options`](Self::lexical_options) and
152/// [`vector_options`](Self::vector_options) control **how** to search.
153///
154/// Use [`SearchRequestBuilder`] for a fluent construction API.
155pub struct SearchRequest {
156 /// The search query specification.
157 pub query: SearchQuery,
158
159 /// Maximum number of results to return. Defaults to `10`.
160 pub limit: usize,
161
162 /// Number of results to skip before returning (for pagination).
163 /// Defaults to `0`.
164 pub offset: usize,
165
166 /// Fusion algorithm for combining lexical and vector scores.
167 ///
168 /// Only used when both lexical and vector search components are
169 /// present (i.e., [`SearchQuery::Hybrid`] or a [`SearchQuery::Dsl`]
170 /// that contains both clause types). Defaults to
171 /// [`FusionAlgorithm::RRF { k: 60.0 }`](FusionAlgorithm::RRF) when
172 /// `None`.
173 pub fusion_algorithm: Option<FusionAlgorithm>,
174
175 /// Optional filter query (lexical) to restrict the search space.
176 ///
177 /// When set, the filter is evaluated first and **both** lexical and
178 /// vector searches are restricted to documents matching this filter.
179 pub filter_query: Option<Box<dyn Query>>,
180
181 /// Parameters controlling lexical search behavior.
182 pub lexical_options: LexicalSearchOptions,
183
184 /// Parameters controlling vector search behavior.
185 pub vector_options: VectorSearchOptions,
186}
187
188/// Algorithm used to combine lexical and vector scores in hybrid search.
189///
190/// The default fusion algorithm (when none is specified in a
191/// [`SearchRequest`]) is [`RRF`](Self::RRF) with `k = 60.0`.
192#[derive(Debug, Clone, Copy)]
193pub enum FusionAlgorithm {
194 /// Reciprocal Rank Fusion (RRF).
195 ///
196 /// Combines results based on rank position rather than raw scores,
197 /// making it effective when score magnitudes are not comparable
198 /// (e.g. BM25 vs cosine similarity). The score for each document is
199 /// `sum(1 / (k + rank))` across the result lists.
200 RRF {
201 /// Smoothing constant `k`. Higher values reduce the influence of
202 /// top-ranked documents. Typical default is `60.0`.
203 k: f64,
204 },
205
206 /// Weighted Sum with automatic min-max score normalization.
207 ///
208 /// Before weighting, the engine independently normalizes lexical and
209 /// vector scores to the `[0.0, 1.0]` range using min-max normalization
210 /// over their respective result sets.
211 WeightedSum {
212 /// Weight for the normalized lexical score (clamped to `0.0..=1.0`).
213 lexical_weight: f32,
214 /// Weight for the normalized vector score (clamped to `0.0..=1.0`).
215 vector_weight: f32,
216 },
217}
218
219impl Default for SearchRequest {
220 fn default() -> Self {
221 Self {
222 query: SearchQuery::Dsl(String::new()),
223 limit: 10,
224 offset: 0,
225 fusion_algorithm: None,
226 filter_query: None,
227 lexical_options: LexicalSearchOptions::default(),
228 vector_options: VectorSearchOptions::default(),
229 }
230 }
231}
232
233// ── SearchRequestBuilder ─────────────────────────────────────────────────────
234
235/// Fluent builder for constructing a [`SearchRequest`].
236///
237/// Supports three construction patterns:
238///
239/// 1. **DSL string** (via [`query_dsl`](Self::query_dsl)): Pass a unified
240/// query DSL string. The engine parses it at search time.
241/// 2. **Single mode** (via [`lexical_query`](Self::lexical_query) or
242/// [`vector_query`](Self::vector_query)): Set one search mode.
243/// 3. **Hybrid** (via both [`lexical_query`](Self::lexical_query) and
244/// [`vector_query`](Self::vector_query)): Set both for hybrid search.
245///
246/// If [`query_dsl`](Self::query_dsl) is called, the builder produces a
247/// [`SearchQuery::Dsl`] variant. Otherwise, it determines the variant from
248/// which query methods were called.
249pub struct SearchRequestBuilder {
250 dsl: Option<String>,
251 lexical_query: Option<LexicalSearchQuery>,
252 vector_query: Option<VectorSearchQuery>,
253 limit: usize,
254 offset: usize,
255 fusion_algorithm: Option<FusionAlgorithm>,
256 filter_query: Option<Box<dyn Query>>,
257 lexical_options: LexicalSearchOptions,
258 vector_options: VectorSearchOptions,
259}
260
261impl Default for SearchRequestBuilder {
262 fn default() -> Self {
263 Self::new()
264 }
265}
266
267impl SearchRequestBuilder {
268 /// Create a new builder with default settings.
269 pub fn new() -> Self {
270 Self {
271 dsl: None,
272 lexical_query: None,
273 vector_query: None,
274 limit: 10,
275 offset: 0,
276 fusion_algorithm: None,
277 filter_query: None,
278 lexical_options: LexicalSearchOptions::default(),
279 vector_options: VectorSearchOptions::default(),
280 }
281 }
282
283 // ── Query setters ────────────────────────────────────────────────────
284
285 /// Set a unified query DSL string.
286 ///
287 /// When set, the built request uses [`SearchQuery::Dsl`] and any
288 /// lexical/vector queries set via other methods are ignored.
289 pub fn query_dsl(mut self, dsl: impl Into<String>) -> Self {
290 self.dsl = Some(dsl.into());
291 self
292 }
293
294 /// Set the lexical search query.
295 ///
296 /// If [`vector_query`](Self::vector_query) is also set, the result is
297 /// [`SearchQuery::Hybrid`]. Otherwise [`SearchQuery::Lexical`].
298 pub fn lexical_query(mut self, query: LexicalSearchQuery) -> Self {
299 self.lexical_query = Some(query);
300 self
301 }
302
303 /// Set the vector search query.
304 ///
305 /// If [`lexical_query`](Self::lexical_query) is also set, the result is
306 /// [`SearchQuery::Hybrid`]. Otherwise [`SearchQuery::Vector`].
307 pub fn vector_query(mut self, query: VectorSearchQuery) -> Self {
308 self.vector_query = Some(query);
309 self
310 }
311
312 // ── Pagination & fusion ──────────────────────────────────────────────
313
314 /// Set the maximum number of results to return.
315 pub fn limit(mut self, limit: usize) -> Self {
316 self.limit = limit;
317 self
318 }
319
320 /// Set the number of results to skip (for pagination).
321 pub fn offset(mut self, offset: usize) -> Self {
322 self.offset = offset;
323 self
324 }
325
326 /// Set the fusion algorithm for hybrid search.
327 ///
328 /// For [`FusionAlgorithm::WeightedSum`], the weights are clamped to
329 /// `0.0..=1.0` to prevent NaN/Inf propagation.
330 pub fn fusion_algorithm(mut self, fusion: FusionAlgorithm) -> Self {
331 let fusion = match fusion {
332 FusionAlgorithm::WeightedSum {
333 lexical_weight,
334 vector_weight,
335 } => FusionAlgorithm::WeightedSum {
336 lexical_weight: lexical_weight.clamp(0.0, 1.0),
337 vector_weight: vector_weight.clamp(0.0, 1.0),
338 },
339 other => other,
340 };
341 self.fusion_algorithm = Some(fusion);
342 self
343 }
344
345 /// Set a filter query to restrict the search space.
346 ///
347 /// The filter applies to **both** lexical and vector searches.
348 pub fn filter_query(mut self, query: Box<dyn Query>) -> Self {
349 self.filter_query = Some(query);
350 self
351 }
352
353 // ── Lexical options ──────────────────────────────────────────────────
354
355 /// Add a field-level boost for lexical search.
356 pub fn add_field_boost(mut self, field: impl Into<String>, boost: f32) -> Self {
357 self.lexical_options
358 .field_boosts
359 .insert(field.into(), boost);
360 self
361 }
362
363 /// Set the minimum score threshold for lexical search.
364 pub fn lexical_min_score(mut self, min_score: f32) -> Self {
365 self.lexical_options.min_score = min_score;
366 self
367 }
368
369 /// Set the timeout for lexical search in milliseconds.
370 pub fn lexical_timeout_ms(mut self, timeout_ms: u64) -> Self {
371 self.lexical_options.timeout_ms = Some(timeout_ms);
372 self
373 }
374
375 /// Enable or disable parallel lexical search.
376 pub fn lexical_parallel(mut self, parallel: bool) -> Self {
377 self.lexical_options.parallel = parallel;
378 self
379 }
380
381 /// Set the sort order for lexical search results.
382 pub fn sort_by(mut self, sort_by: SortField) -> Self {
383 self.lexical_options.sort_by = sort_by;
384 self
385 }
386
387 // ── Vector options ───────────────────────────────────────────────────
388
389 /// Set the score combination mode for vector search.
390 pub fn vector_score_mode(mut self, score_mode: VectorScoreMode) -> Self {
391 self.vector_options.score_mode = score_mode;
392 self
393 }
394
395 /// Set the minimum score threshold for vector search.
396 pub fn vector_min_score(mut self, min_score: f32) -> Self {
397 self.vector_options.min_score = min_score;
398 self
399 }
400
401 // ── Build ────────────────────────────────────────────────────────────
402
403 /// Consume the builder and return the constructed [`SearchRequest`].
404 pub fn build(self) -> SearchRequest {
405 let query = if let Some(dsl) = self.dsl {
406 SearchQuery::Dsl(dsl)
407 } else {
408 match (self.lexical_query, self.vector_query) {
409 (Some(lexical), Some(vector)) => SearchQuery::Hybrid {
410 lexical,
411 vector,
412 mode: HybridMode::default(),
413 },
414 (Some(lexical), None) => SearchQuery::Lexical(lexical),
415 (None, Some(vector)) => SearchQuery::Vector(vector),
416 (None, None) => SearchQuery::Dsl(String::new()),
417 }
418 };
419
420 SearchRequest {
421 query,
422 limit: self.limit,
423 offset: self.offset,
424 fusion_algorithm: self.fusion_algorithm,
425 filter_query: self.filter_query,
426 lexical_options: self.lexical_options,
427 vector_options: self.vector_options,
428 }
429 }
430}
431
432// ── SearchResult ─────────────────────────────────────────────────────────────
433
434/// A single result from an [`Engine`](super::Engine) search.
435#[derive(Debug, Clone)]
436pub struct SearchResult {
437 /// External document ID (the `_id` field value).
438 pub id: String,
439 /// Relevance score. The meaning depends on the search mode:
440 /// - Lexical only: BM25 score.
441 /// - Vector only: similarity score (e.g. cosine similarity).
442 /// - Hybrid: fused score produced by the [`FusionAlgorithm`].
443 pub score: f32,
444 /// The stored fields of the document, or `None` if the document could
445 /// not be retrieved (e.g. it was deleted between scoring and retrieval).
446 pub document: Option<crate::data::Document>,
447}