Skip to main content

hermes_core/query/
traits.rs

1//! Query and Scorer traits with async support
2//!
3//! Provides the core abstractions for search queries and document scoring.
4
5use std::future::Future;
6use std::pin::Pin;
7
8use crate::segment::SegmentReader;
9use crate::{DocId, Result, Score};
10
11/// BM25 parameters
12#[derive(Debug, Clone, Copy)]
13pub struct Bm25Params {
14    /// Term frequency saturation parameter (typically 1.2-2.0)
15    pub k1: f32,
16    /// Length normalization parameter (typically 0.75)
17    pub b: f32,
18}
19
20impl Default for Bm25Params {
21    fn default() -> Self {
22        Self { k1: 1.2, b: 0.75 }
23    }
24}
25
26/// Future type for scorer creation
27#[cfg(not(target_arch = "wasm32"))]
28pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + Send + 'a>>;
29#[cfg(target_arch = "wasm32")]
30pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + 'a>>;
31
32/// Future type for count estimation
33#[cfg(not(target_arch = "wasm32"))]
34pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + Send + 'a>>;
35#[cfg(target_arch = "wasm32")]
36pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + 'a>>;
37
38/// Per-document predicate closure type (platform-aware Send+Sync bounds)
39#[cfg(not(target_arch = "wasm32"))]
40pub type DocPredicate<'a> = Box<dyn Fn(DocId) -> bool + Send + Sync + 'a>;
41#[cfg(target_arch = "wasm32")]
42pub type DocPredicate<'a> = Box<dyn Fn(DocId) -> bool + 'a>;
43
44/// Info for MaxScore-optimizable term queries
45#[derive(Debug, Clone)]
46pub struct TermQueryInfo {
47    /// Field being searched
48    pub field: crate::dsl::Field,
49    /// Term bytes (lowercase)
50    pub term: Vec<u8>,
51}
52
53/// Info for MaxScore-optimizable sparse term queries
54#[derive(Debug, Clone, Copy)]
55pub struct SparseTermQueryInfo {
56    /// Sparse vector field
57    pub field: crate::dsl::Field,
58    /// Dimension ID in the sparse vector
59    pub dim_id: u32,
60    /// Query weight for this dimension
61    pub weight: f32,
62    /// MaxScore heap factor (1.0 = exact, lower = approximate)
63    pub heap_factor: f32,
64    /// Multi-value combiner for ordinal deduplication
65    pub combiner: super::MultiValueCombiner,
66    /// Multiplier on executor limit to compensate for ordinal deduplication
67    /// (1.0 = exact, 2.0 = fetch 2x then combine down)
68    pub over_fetch_factor: f32,
69}
70
71/// Matched positions for a field (field_id, list of scored positions)
72/// Each position includes its individual score contribution
73pub type MatchedPositions = Vec<(u32, Vec<super::ScoredPosition>)>;
74
75macro_rules! define_query_traits {
76    ($($send_bounds:tt)*) => {
77        /// A search query (async)
78        ///
79        /// Note: `scorer` takes `&self` (not `&'a self`) so that scorers don't borrow the query.
80        /// This enables query composition - queries can create sub-queries locally and get their scorers.
81        /// Implementations must clone/capture any data they need during scorer creation.
82        pub trait Query: std::fmt::Display + $($send_bounds)* {
83            /// Create a scorer for this query against a single segment (async)
84            ///
85            /// The `limit` parameter specifies the maximum number of results to return.
86            /// This is passed from the top-level search limit.
87            ///
88            /// Note: The scorer borrows only the reader, not the query. Implementations
89            /// should capture any needed query data (field, terms, etc.) during creation.
90            fn scorer<'a>(
91                &self,
92                reader: &'a SegmentReader,
93                limit: usize,
94            ) -> ScorerFuture<'a>;
95
96            /// Estimated number of matching documents in a segment (async)
97            fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a>;
98
99            /// Create a scorer synchronously (mmap/RAM only).
100            ///
101            /// Available when the `sync` feature is enabled.
102            /// Default implementation returns an error.
103            #[cfg(feature = "sync")]
104            fn scorer_sync<'a>(
105                &self,
106                reader: &'a SegmentReader,
107                limit: usize,
108            ) -> Result<Box<dyn Scorer + 'a>> {
109                let _ = (reader, limit);
110                Err(crate::error::Error::Query(
111                    "sync scorer not supported for this query type".into(),
112                ))
113            }
114
115            /// Return term info if this is a simple term query eligible for MaxScore optimization
116            ///
117            /// Returns None for complex queries (boolean, phrase, etc.)
118            fn as_term_query_info(&self) -> Option<TermQueryInfo> {
119                None
120            }
121
122            /// Return sparse term info if this is a single-dimension sparse query
123            /// eligible for MaxScore optimization
124            fn as_sparse_term_query_info(&self) -> Option<SparseTermQueryInfo> {
125                None
126            }
127
128            /// Decompose into sparse term query infos for MaxScore optimization.
129            ///
130            /// Returns `Some(vec)` if this query can be represented as a set of
131            /// sparse term queries on the same field. Used by the BooleanQuery
132            /// planner to build a predicate-aware MaxScoreExecutor directly.
133            fn as_sparse_term_queries(&self) -> Option<Vec<SparseTermQueryInfo>> {
134                None
135            }
136
137            /// True if this query is a pure filter (always scores 1.0, no positions).
138            /// Used by the planner to convert non-selective MUST filters into predicates.
139            fn is_filter(&self) -> bool {
140                false
141            }
142
143            /// For filter queries: return a cheap per-doc predicate against a segment.
144            /// The predicate does O(1) work per doc (e.g., fast-field lookup).
145            fn as_doc_predicate<'a>(
146                &self,
147                _reader: &'a SegmentReader,
148            ) -> Option<DocPredicate<'a>> {
149                None
150            }
151        }
152
153        /// Scored document stream: a DocSet that also provides scores.
154        pub trait Scorer: super::docset::DocSet + $($send_bounds)* {
155            /// Score for current document
156            fn score(&self) -> Score;
157
158            /// Get matched positions for the current document (if available)
159            /// Returns (field_id, positions) pairs where positions are encoded as per PositionMode
160            fn matched_positions(&self) -> Option<MatchedPositions> {
161                None
162            }
163        }
164    };
165}
166
167#[cfg(not(target_arch = "wasm32"))]
168define_query_traits!(Send + Sync);
169
170#[cfg(target_arch = "wasm32")]
171define_query_traits!();
172
173impl Query for Box<dyn Query> {
174    fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
175        (**self).scorer(reader, limit)
176    }
177
178    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
179        (**self).count_estimate(reader)
180    }
181
182    fn as_term_query_info(&self) -> Option<TermQueryInfo> {
183        (**self).as_term_query_info()
184    }
185
186    fn as_sparse_term_query_info(&self) -> Option<SparseTermQueryInfo> {
187        (**self).as_sparse_term_query_info()
188    }
189
190    fn as_sparse_term_queries(&self) -> Option<Vec<SparseTermQueryInfo>> {
191        (**self).as_sparse_term_queries()
192    }
193
194    fn is_filter(&self) -> bool {
195        (**self).is_filter()
196    }
197
198    fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<DocPredicate<'a>> {
199        (**self).as_doc_predicate(reader)
200    }
201
202    #[cfg(feature = "sync")]
203    fn scorer_sync<'a>(
204        &self,
205        reader: &'a SegmentReader,
206        limit: usize,
207    ) -> Result<Box<dyn Scorer + 'a>> {
208        (**self).scorer_sync(reader, limit)
209    }
210}
211
212/// Empty scorer for terms that don't exist
213pub struct EmptyScorer;
214
215impl super::docset::DocSet for EmptyScorer {
216    fn doc(&self) -> DocId {
217        crate::structures::TERMINATED
218    }
219
220    fn advance(&mut self) -> DocId {
221        crate::structures::TERMINATED
222    }
223
224    fn seek(&mut self, _target: DocId) -> DocId {
225        crate::structures::TERMINATED
226    }
227
228    fn size_hint(&self) -> u32 {
229        0
230    }
231}
232
233impl Scorer for EmptyScorer {
234    fn score(&self) -> Score {
235        0.0
236    }
237}