Skip to main content

hermes_core/query/
traits.rs

1//! Query and Scorer traits with async support
2//!
3//! Provides the core abstractions for search queries and document scoring.
4
5use std::future::Future;
6use std::pin::Pin;
7
8use crate::segment::SegmentReader;
9use crate::{DocId, Result, Score};
10
11/// Filter predicate checked per-doc during scoring.
12///
13/// Fast-field lookups are O(1) per doc — no bitset needed.
14/// Passed through the query tree so executors (MaxScore, BMP) can reject
15/// filtered documents *inside* the scoring loop, before they enter the heap.
16#[cfg(not(target_arch = "wasm32"))]
17pub type DocPredicate<'a> = Box<dyn Fn(DocId) -> bool + Send + Sync + 'a>;
18#[cfg(target_arch = "wasm32")]
19pub type DocPredicate<'a> = Box<dyn Fn(DocId) -> bool + 'a>;
20
21/// BM25 parameters
22#[derive(Debug, Clone, Copy)]
23pub struct Bm25Params {
24    /// Term frequency saturation parameter (typically 1.2-2.0)
25    pub k1: f32,
26    /// Length normalization parameter (typically 0.75)
27    pub b: f32,
28}
29
30impl Default for Bm25Params {
31    fn default() -> Self {
32        Self { k1: 1.2, b: 0.75 }
33    }
34}
35
36/// Future type for scorer creation
37#[cfg(not(target_arch = "wasm32"))]
38pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + Send + 'a>>;
39#[cfg(target_arch = "wasm32")]
40pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + 'a>>;
41
42/// Future type for count estimation
43#[cfg(not(target_arch = "wasm32"))]
44pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + Send + 'a>>;
45#[cfg(target_arch = "wasm32")]
46pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + 'a>>;
47
48/// Info for MaxScore-optimizable term queries
49#[derive(Debug, Clone)]
50pub struct TermQueryInfo {
51    /// Field being searched
52    pub field: crate::dsl::Field,
53    /// Term bytes (lowercase)
54    pub term: Vec<u8>,
55}
56
57/// Matched positions for a field (field_id, list of scored positions)
58/// Each position includes its individual score contribution
59pub type MatchedPositions = Vec<(u32, Vec<super::ScoredPosition>)>;
60
61macro_rules! define_query_traits {
62    ($($send_bounds:tt)*) => {
63        /// A search query (async)
64        ///
65        /// Note: `scorer` takes `&self` (not `&'a self`) so that scorers don't borrow the query.
66        /// This enables query composition - queries can create sub-queries locally and get their scorers.
67        /// Implementations must clone/capture any data they need during scorer creation.
68        pub trait Query: $($send_bounds)* {
69            /// Create a scorer for this query against a single segment (async)
70            ///
71            /// The `limit` parameter specifies the maximum number of results to return.
72            /// This is passed from the top-level search limit.
73            ///
74            /// Note: The scorer borrows only the reader, not the query. Implementations
75            /// should capture any needed query data (field, terms, etc.) during creation.
76            fn scorer<'a>(
77                &self,
78                reader: &'a SegmentReader,
79                limit: usize,
80                predicate: Option<DocPredicate<'a>>,
81            ) -> ScorerFuture<'a>;
82
83            /// Estimated number of matching documents in a segment (async)
84            fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a>;
85
86            /// Create a scorer synchronously (mmap/RAM only).
87            ///
88            /// Available when the `sync` feature is enabled.
89            /// Default implementation returns an error.
90            #[cfg(feature = "sync")]
91            fn scorer_sync<'a>(
92                &self,
93                reader: &'a SegmentReader,
94                limit: usize,
95                predicate: Option<DocPredicate<'a>>,
96            ) -> Result<Box<dyn Scorer + 'a>> {
97                let _ = (reader, limit, predicate);
98                Err(crate::error::Error::Query(
99                    "sync scorer not supported for this query type".into(),
100                ))
101            }
102
103            /// Return term info if this is a simple term query eligible for MaxScore optimization
104            ///
105            /// Returns None for complex queries (boolean, phrase, etc.)
106            fn as_term_query_info(&self) -> Option<TermQueryInfo> {
107                None
108            }
109        }
110
111        /// Scorer that iterates over matching documents and computes scores
112        pub trait Scorer: $($send_bounds)* {
113            /// Current document ID, or TERMINATED if exhausted
114            fn doc(&self) -> DocId;
115
116            /// Score for current document
117            fn score(&self) -> Score;
118
119            /// Advance to next document
120            fn advance(&mut self) -> DocId;
121
122            /// Seek to first doc >= target
123            fn seek(&mut self, target: DocId) -> DocId;
124
125            /// Size hint for remaining documents
126            fn size_hint(&self) -> u32;
127
128            /// Get matched positions for the current document (if available)
129            /// Returns (field_id, positions) pairs where positions are encoded as per PositionMode
130            fn matched_positions(&self) -> Option<MatchedPositions> {
131                None
132            }
133        }
134    };
135}
136
137#[cfg(not(target_arch = "wasm32"))]
138define_query_traits!(Send + Sync);
139
140#[cfg(target_arch = "wasm32")]
141define_query_traits!();
142
143impl Query for Box<dyn Query> {
144    fn scorer<'a>(
145        &self,
146        reader: &'a SegmentReader,
147        limit: usize,
148        predicate: Option<DocPredicate<'a>>,
149    ) -> ScorerFuture<'a> {
150        (**self).scorer(reader, limit, predicate)
151    }
152
153    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
154        (**self).count_estimate(reader)
155    }
156
157    fn as_term_query_info(&self) -> Option<TermQueryInfo> {
158        (**self).as_term_query_info()
159    }
160
161    #[cfg(feature = "sync")]
162    fn scorer_sync<'a>(
163        &self,
164        reader: &'a SegmentReader,
165        limit: usize,
166        predicate: Option<DocPredicate<'a>>,
167    ) -> Result<Box<dyn Scorer + 'a>> {
168        (**self).scorer_sync(reader, limit, predicate)
169    }
170}
171
172/// Empty scorer for terms that don't exist
173pub struct EmptyScorer;
174
175impl Scorer for EmptyScorer {
176    fn doc(&self) -> DocId {
177        crate::structures::TERMINATED
178    }
179
180    fn score(&self) -> Score {
181        0.0
182    }
183
184    fn advance(&mut self) -> DocId {
185        crate::structures::TERMINATED
186    }
187
188    fn seek(&mut self, _target: DocId) -> DocId {
189        crate::structures::TERMINATED
190    }
191
192    fn size_hint(&self) -> u32 {
193        0
194    }
195}