hermes_core/query/traits.rs
1//! Query and Scorer traits with async support
2//!
3//! Provides the core abstractions for search queries and document scoring.
4
5use std::future::Future;
6use std::pin::Pin;
7
8use crate::segment::SegmentReader;
9use crate::{DocId, Result, Score};
10
11/// BM25 parameters
12#[derive(Debug, Clone, Copy)]
13pub struct Bm25Params {
14 /// Term frequency saturation parameter (typically 1.2-2.0)
15 pub k1: f32,
16 /// Length normalization parameter (typically 0.75)
17 pub b: f32,
18}
19
20impl Default for Bm25Params {
21 fn default() -> Self {
22 Self { k1: 1.2, b: 0.75 }
23 }
24}
25
26/// Future type for scorer creation
27#[cfg(not(target_arch = "wasm32"))]
28pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + Send + 'a>>;
29#[cfg(target_arch = "wasm32")]
30pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + 'a>>;
31
32/// Future type for count estimation
33#[cfg(not(target_arch = "wasm32"))]
34pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + Send + 'a>>;
35#[cfg(target_arch = "wasm32")]
36pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + 'a>>;
37
38/// Per-document predicate closure type (platform-aware Send+Sync bounds)
39#[cfg(not(target_arch = "wasm32"))]
40pub type DocPredicate<'a> = Box<dyn Fn(DocId) -> bool + Send + Sync + 'a>;
41#[cfg(target_arch = "wasm32")]
42pub type DocPredicate<'a> = Box<dyn Fn(DocId) -> bool + 'a>;
43
44/// Info for MaxScore-optimizable term queries
45#[derive(Debug, Clone)]
46pub struct TermQueryInfo {
47 /// Field being searched
48 pub field: crate::dsl::Field,
49 /// Term bytes (lowercase)
50 pub term: Vec<u8>,
51}
52
53/// Info for MaxScore-optimizable sparse term queries
54#[derive(Debug, Clone, Copy)]
55pub struct SparseTermQueryInfo {
56 /// Sparse vector field
57 pub field: crate::dsl::Field,
58 /// Dimension ID in the sparse vector
59 pub dim_id: u32,
60 /// Query weight for this dimension
61 pub weight: f32,
62 /// MaxScore heap factor (1.0 = exact, lower = approximate)
63 pub heap_factor: f32,
64 /// Multi-value combiner for ordinal deduplication
65 pub combiner: super::MultiValueCombiner,
66 /// Multiplier on executor limit to compensate for ordinal deduplication
67 /// (1.0 = exact, 2.0 = fetch 2x then combine down)
68 pub over_fetch_factor: f32,
69}
70
71/// Decomposition of a query for MaxScore optimization.
72///
73/// The planner inspects this to decide whether to use text MaxScore,
74/// sparse MaxScore, or standard BooleanScorer execution.
75#[derive(Debug, Clone)]
76pub enum QueryDecomposition {
77 /// Single text term — eligible for text MaxScore grouping
78 TextTerm(TermQueryInfo),
79 /// One or more sparse dimensions — eligible for sparse MaxScore
80 SparseTerms(Vec<SparseTermQueryInfo>),
81 /// Not decomposable — falls back to standard execution
82 Opaque,
83}
84
85/// Matched positions for a field (field_id, list of scored positions)
86/// Each position includes its individual score contribution
87pub type MatchedPositions = Vec<(u32, Vec<super::ScoredPosition>)>;
88
89macro_rules! define_query_traits {
90 ($($send_bounds:tt)*) => {
91 /// A search query (async)
92 ///
93 /// Note: `scorer` takes `&self` (not `&'a self`) so that scorers don't borrow the query.
94 /// This enables query composition - queries can create sub-queries locally and get their scorers.
95 /// Implementations must clone/capture any data they need during scorer creation.
96 pub trait Query: std::fmt::Display + $($send_bounds)* {
97 /// Create a scorer for this query against a single segment (async)
98 ///
99 /// The `limit` parameter specifies the maximum number of results to return.
100 /// This is passed from the top-level search limit.
101 ///
102 /// Note: The scorer borrows only the reader, not the query. Implementations
103 /// should capture any needed query data (field, terms, etc.) during creation.
104 fn scorer<'a>(
105 &self,
106 reader: &'a SegmentReader,
107 limit: usize,
108 ) -> ScorerFuture<'a>;
109
110 /// Estimated number of matching documents in a segment (async)
111 fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a>;
112
113 /// Create a scorer synchronously (mmap/RAM only).
114 ///
115 /// Available when the `sync` feature is enabled.
116 /// Default implementation returns an error.
117 #[cfg(feature = "sync")]
118 fn scorer_sync<'a>(
119 &self,
120 reader: &'a SegmentReader,
121 limit: usize,
122 ) -> Result<Box<dyn Scorer + 'a>> {
123 let _ = (reader, limit);
124 Err(crate::error::Error::Query(
125 "sync scorer not supported for this query type".into(),
126 ))
127 }
128
129 /// Decompose this query for MaxScore optimization.
130 ///
131 /// Returns `TextTerm` for simple term queries, `SparseTerms` for
132 /// sparse vector queries (single or multi-dim), or `Opaque` if
133 /// the query cannot be decomposed.
134 fn decompose(&self) -> QueryDecomposition {
135 QueryDecomposition::Opaque
136 }
137
138 /// True if this query is a pure filter (always scores 1.0, no positions).
139 /// Used by the planner to convert non-selective MUST filters into predicates.
140 fn is_filter(&self) -> bool {
141 false
142 }
143
144 /// For filter queries: return a cheap per-doc predicate against a segment.
145 /// The predicate does O(1) work per doc (e.g., fast-field lookup).
146 fn as_doc_predicate<'a>(
147 &self,
148 _reader: &'a SegmentReader,
149 ) -> Option<DocPredicate<'a>> {
150 None
151 }
152 }
153
154 /// Scored document stream: a DocSet that also provides scores.
155 pub trait Scorer: super::docset::DocSet + $($send_bounds)* {
156 /// Score for current document
157 fn score(&self) -> Score;
158
159 /// Get matched positions for the current document (if available)
160 /// Returns (field_id, positions) pairs where positions are encoded as per PositionMode
161 fn matched_positions(&self) -> Option<MatchedPositions> {
162 None
163 }
164 }
165 };
166}
167
168#[cfg(not(target_arch = "wasm32"))]
169define_query_traits!(Send + Sync);
170
171#[cfg(target_arch = "wasm32")]
172define_query_traits!();
173
174impl Query for Box<dyn Query> {
175 fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
176 (**self).scorer(reader, limit)
177 }
178
179 fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
180 (**self).count_estimate(reader)
181 }
182
183 fn decompose(&self) -> QueryDecomposition {
184 (**self).decompose()
185 }
186
187 fn is_filter(&self) -> bool {
188 (**self).is_filter()
189 }
190
191 fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<DocPredicate<'a>> {
192 (**self).as_doc_predicate(reader)
193 }
194
195 #[cfg(feature = "sync")]
196 fn scorer_sync<'a>(
197 &self,
198 reader: &'a SegmentReader,
199 limit: usize,
200 ) -> Result<Box<dyn Scorer + 'a>> {
201 (**self).scorer_sync(reader, limit)
202 }
203}
204
205/// Empty scorer for terms that don't exist
206pub struct EmptyScorer;
207
208impl super::docset::DocSet for EmptyScorer {
209 fn doc(&self) -> DocId {
210 crate::structures::TERMINATED
211 }
212
213 fn advance(&mut self) -> DocId {
214 crate::structures::TERMINATED
215 }
216
217 fn seek(&mut self, _target: DocId) -> DocId {
218 crate::structures::TERMINATED
219 }
220
221 fn size_hint(&self) -> u32 {
222 0
223 }
224}
225
226impl Scorer for EmptyScorer {
227 fn score(&self) -> Score {
228 0.0
229 }
230}