Skip to main content

laurus/lexical/
query.rs

1//! Query system for searching documents in inverted indexes.
2
3pub mod advanced_query;
4pub mod boolean;
5pub mod collector;
6pub mod fuzzy;
7pub mod geo;
8pub mod matcher;
9pub mod multi_term;
10pub mod parser;
11pub mod phrase;
12pub mod prefix;
13pub mod range;
14pub mod regexp;
15pub mod scorer;
16pub mod span;
17pub mod term;
18pub mod wildcard;
19
20// Re-exports for cleaner API
21pub use advanced_query::AdvancedQuery;
22pub use boolean::{BooleanQuery, BooleanQueryBuilder};
23pub use fuzzy::FuzzyQuery;
24pub use geo::{GeoBoundingBox, GeoBoundingBoxQuery, GeoDistanceQuery, GeoPoint, GeoQuery};
25pub use multi_term::MultiTermQuery;
26pub use parser::LexicalQueryParser;
27pub use phrase::PhraseQuery;
28pub use prefix::PrefixQuery;
29pub use range::NumericRangeQuery;
30pub use regexp::RegexpQuery;
31pub use span::{SpanNearQuery, SpanQuery, SpanTermQuery};
32pub use term::TermQuery;
33pub use wildcard::WildcardQuery;
34
35use std::any::Any;
36use std::collections::HashMap;
37use std::fmt::Debug;
38
39use serde::{Deserialize, Serialize};
40
41use crate::error::Result;
42#[allow(unused_imports)]
43use crate::lexical::core::document::Document;
44use crate::lexical::reader::LexicalIndexReader;
45
46use self::matcher::Matcher;
47use self::scorer::Scorer;
48
49/// A search hit containing a document and its score.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct Hit {
52    /// The document ID.
53    pub doc_id: u64,
54    /// The relevance score.
55    pub score: f32,
56    /// The document fields (if retrieved).
57    pub fields: HashMap<String, String>,
58}
59
60/// A single search hit containing a matched document and its relevance score.
61///
62/// Returned as part of [`LexicalSearchResults`] to represent each document
63/// that matched the search query. The `score` reflects the relevance ranking
64/// computed by the scorer (e.g., BM25), and the `document` field optionally
65/// holds the stored fields if they were requested.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct SearchHit {
68    /// The internal document ID.
69    pub doc_id: u64,
70    /// The relevance score.
71    pub score: f32,
72    /// The document (if retrieved).
73    pub document: Option<Document>,
74}
75
76/// Aggregated results from a lexical search query.
77///
78/// Contains the ranked list of matching documents along with summary statistics.
79///
80/// # Fields
81///
82/// - `hits` - Ranked list of [`SearchHit`] entries, ordered by descending score.
83/// - `total_hits` - Total number of documents that matched the query (may exceed `hits.len()`
84///   when a limit is applied).
85/// - `max_score` - The highest relevance score among all results, useful for normalization.
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct LexicalSearchResults {
88    /// The search hits.
89    pub hits: Vec<SearchHit>,
90    /// Total number of matching documents.
91    pub total_hits: u64,
92    /// Maximum score in the results.
93    pub max_score: f32,
94}
95
96/// Query result wrapper for different result types.
97#[derive(Debug, Clone)]
98pub struct QueryResult {
99    /// Document ID.
100    pub doc_id: u64,
101    /// Score.
102    pub score: f32,
103}
104
105/// Trait for search queries.
106pub trait Query: Send + Sync + Debug {
107    /// Create a matcher for this query.
108    fn matcher(&self, reader: &dyn LexicalIndexReader) -> Result<Box<dyn Matcher>>;
109
110    /// Create a scorer for this query.
111    fn scorer(&self, reader: &dyn LexicalIndexReader) -> Result<Box<dyn Scorer>>;
112
113    /// Get the boost factor for this query.
114    fn boost(&self) -> f32;
115
116    /// Set the boost factor for this query.
117    fn set_boost(&mut self, boost: f32);
118
119    /// Get a human-readable description of this query.
120    fn description(&self) -> String;
121
122    /// Clone this query.
123    fn clone_box(&self) -> Box<dyn Query>;
124
125    /// Returns `true` if this query would match no documents in the given reader.
126    ///
127    /// Each implementor defines its own emptiness semantics. For example:
128    /// - [`TermQuery`] checks whether
129    ///   the term exists in the index via the reader.
130    /// - [`BooleanQuery`] returns `true`
131    ///   when it has no clauses or all of its clauses are empty.
132    ///
133    /// # Parameters
134    ///
135    /// - `reader` - The index reader used to check whether the query's terms exist.
136    ///
137    /// # Returns
138    ///
139    /// `Ok(true)` if this query would not match any documents, `Ok(false)` otherwise.
140    /// Returns an error if the reader cannot be queried.
141    fn is_empty(&self, reader: &dyn LexicalIndexReader) -> Result<bool>;
142
143    /// Get the estimated cost of executing this query.
144    fn cost(&self, reader: &dyn LexicalIndexReader) -> Result<u64>;
145
146    /// Get this query as Any for downcasting.
147    fn as_any(&self) -> &dyn Any;
148
149    /// Get the field name this query searches in, if applicable.
150    /// Returns None for queries that don't target a specific field (e.g., BooleanQuery).
151    fn field(&self) -> Option<&str> {
152        None
153    }
154
155    /// Apply field-level boosts to this query and its sub-queries.
156    fn apply_field_boosts(&mut self, boosts: &HashMap<String, f32>) {
157        if let Some(f) = self.field()
158            && let Some(&b) = boosts.get(f)
159        {
160            self.set_boost(self.boost() * b);
161        }
162    }
163}