laurus/lexical/query.rs
1//! Query system for searching documents in inverted indexes.
2
3pub mod advanced_query;
4pub mod boolean;
5pub mod collector;
6pub mod fuzzy;
7pub mod geo;
8pub mod matcher;
9pub mod multi_term;
10pub mod parser;
11pub mod phrase;
12pub mod prefix;
13pub mod range;
14pub mod regexp;
15pub mod scorer;
16pub mod span;
17pub mod term;
18pub mod wildcard;
19
20// Re-exports for cleaner API
21pub use advanced_query::AdvancedQuery;
22pub use boolean::{BooleanQuery, BooleanQueryBuilder};
23pub use fuzzy::FuzzyQuery;
24pub use geo::{GeoBoundingBox, GeoBoundingBoxQuery, GeoDistanceQuery, GeoPoint, GeoQuery};
25pub use multi_term::MultiTermQuery;
26pub use parser::LexicalQueryParser;
27pub use phrase::PhraseQuery;
28pub use prefix::PrefixQuery;
29pub use range::NumericRangeQuery;
30pub use regexp::RegexpQuery;
31pub use span::{SpanNearQuery, SpanQuery, SpanTermQuery};
32pub use term::TermQuery;
33pub use wildcard::WildcardQuery;
34
35use std::any::Any;
36use std::collections::HashMap;
37use std::fmt::Debug;
38
39use serde::{Deserialize, Serialize};
40
41use crate::error::Result;
42#[allow(unused_imports)]
43use crate::lexical::core::document::Document;
44use crate::lexical::reader::LexicalIndexReader;
45
46use self::matcher::Matcher;
47use self::scorer::Scorer;
48
49/// A search hit containing a document and its score.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct Hit {
52 /// The document ID.
53 pub doc_id: u64,
54 /// The relevance score.
55 pub score: f32,
56 /// The document fields (if retrieved).
57 pub fields: HashMap<String, String>,
58}
59
60/// A single search hit containing a matched document and its relevance score.
61///
62/// Returned as part of [`LexicalSearchResults`] to represent each document
63/// that matched the search query. The `score` reflects the relevance ranking
64/// computed by the scorer (e.g., BM25), and the `document` field optionally
65/// holds the stored fields if they were requested.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct SearchHit {
68 /// The internal document ID.
69 pub doc_id: u64,
70 /// The relevance score.
71 pub score: f32,
72 /// The document (if retrieved).
73 pub document: Option<Document>,
74}
75
76/// Aggregated results from a lexical search query.
77///
78/// Contains the ranked list of matching documents along with summary statistics.
79///
80/// # Fields
81///
82/// - `hits` - Ranked list of [`SearchHit`] entries, ordered by descending score.
83/// - `total_hits` - Total number of documents that matched the query (may exceed `hits.len()`
84/// when a limit is applied).
85/// - `max_score` - The highest relevance score among all results, useful for normalization.
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct LexicalSearchResults {
88 /// The search hits.
89 pub hits: Vec<SearchHit>,
90 /// Total number of matching documents.
91 pub total_hits: u64,
92 /// Maximum score in the results.
93 pub max_score: f32,
94}
95
96/// Query result wrapper for different result types.
97#[derive(Debug, Clone)]
98pub struct QueryResult {
99 /// Document ID.
100 pub doc_id: u64,
101 /// Score.
102 pub score: f32,
103}
104
105/// Trait for search queries.
106pub trait Query: Send + Sync + Debug {
107 /// Create a matcher for this query.
108 fn matcher(&self, reader: &dyn LexicalIndexReader) -> Result<Box<dyn Matcher>>;
109
110 /// Create a scorer for this query.
111 fn scorer(&self, reader: &dyn LexicalIndexReader) -> Result<Box<dyn Scorer>>;
112
113 /// Get the boost factor for this query.
114 fn boost(&self) -> f32;
115
116 /// Set the boost factor for this query.
117 fn set_boost(&mut self, boost: f32);
118
119 /// Get a human-readable description of this query.
120 fn description(&self) -> String;
121
122 /// Clone this query.
123 fn clone_box(&self) -> Box<dyn Query>;
124
125 /// Returns `true` if this query would match no documents in the given reader.
126 ///
127 /// Each implementor defines its own emptiness semantics. For example:
128 /// - [`TermQuery`] checks whether
129 /// the term exists in the index via the reader.
130 /// - [`BooleanQuery`] returns `true`
131 /// when it has no clauses or all of its clauses are empty.
132 ///
133 /// # Parameters
134 ///
135 /// - `reader` - The index reader used to check whether the query's terms exist.
136 ///
137 /// # Returns
138 ///
139 /// `Ok(true)` if this query would not match any documents, `Ok(false)` otherwise.
140 /// Returns an error if the reader cannot be queried.
141 fn is_empty(&self, reader: &dyn LexicalIndexReader) -> Result<bool>;
142
143 /// Get the estimated cost of executing this query.
144 fn cost(&self, reader: &dyn LexicalIndexReader) -> Result<u64>;
145
146 /// Get this query as Any for downcasting.
147 fn as_any(&self) -> &dyn Any;
148
149 /// Get the field name this query searches in, if applicable.
150 /// Returns None for queries that don't target a specific field (e.g., BooleanQuery).
151 fn field(&self) -> Option<&str> {
152 None
153 }
154
155 /// Apply field-level boosts to this query and its sub-queries.
156 fn apply_field_boosts(&mut self, boosts: &HashMap<String, f32>) {
157 if let Some(f) = self.field()
158 && let Some(&b) = boosts.get(f)
159 {
160 self.set_boost(self.boost() * b);
161 }
162 }
163}