Skip to main content

hermes_core/query/
traits.rs

1//! Query and Scorer traits with async support
2//!
3//! Provides the core abstractions for search queries and document scoring.
4
5use std::future::Future;
6use std::pin::Pin;
7
8use crate::segment::SegmentReader;
9use crate::{DocId, Result, Score};
10
11/// BM25 parameters
12#[derive(Debug, Clone, Copy)]
13pub struct Bm25Params {
14    /// Term frequency saturation parameter (typically 1.2-2.0)
15    pub k1: f32,
16    /// Length normalization parameter (typically 0.75)
17    pub b: f32,
18}
19
20impl Default for Bm25Params {
21    fn default() -> Self {
22        Self { k1: 1.2, b: 0.75 }
23    }
24}
25
26/// Future type for scorer creation
27#[cfg(not(target_arch = "wasm32"))]
28pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + Send + 'a>>;
29#[cfg(target_arch = "wasm32")]
30pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + 'a>>;
31
32/// Future type for count estimation
33#[cfg(not(target_arch = "wasm32"))]
34pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + Send + 'a>>;
35#[cfg(target_arch = "wasm32")]
36pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + 'a>>;
37
38/// Info for MaxScore-optimizable term queries
39#[derive(Debug, Clone)]
40pub struct TermQueryInfo {
41    /// Field being searched
42    pub field: crate::dsl::Field,
43    /// Term bytes (lowercase)
44    pub term: Vec<u8>,
45}
46
47/// Info for MaxScore-optimizable sparse term queries
48#[derive(Debug, Clone, Copy)]
49pub struct SparseTermQueryInfo {
50    /// Sparse vector field
51    pub field: crate::dsl::Field,
52    /// Dimension ID in the sparse vector
53    pub dim_id: u32,
54    /// Query weight for this dimension
55    pub weight: f32,
56    /// MaxScore heap factor (1.0 = exact, lower = approximate)
57    pub heap_factor: f32,
58    /// Multi-value combiner for ordinal deduplication
59    pub combiner: super::MultiValueCombiner,
60    /// Multiplier on executor limit to compensate for ordinal deduplication
61    /// (1.0 = exact, 2.0 = fetch 2x then combine down)
62    pub over_fetch_factor: f32,
63}
64
65/// Matched positions for a field (field_id, list of scored positions)
66/// Each position includes its individual score contribution
67pub type MatchedPositions = Vec<(u32, Vec<super::ScoredPosition>)>;
68
69macro_rules! define_query_traits {
70    ($($send_bounds:tt)*) => {
71        /// A search query (async)
72        ///
73        /// Note: `scorer` takes `&self` (not `&'a self`) so that scorers don't borrow the query.
74        /// This enables query composition - queries can create sub-queries locally and get their scorers.
75        /// Implementations must clone/capture any data they need during scorer creation.
76        pub trait Query: std::fmt::Display + $($send_bounds)* {
77            /// Create a scorer for this query against a single segment (async)
78            ///
79            /// The `limit` parameter specifies the maximum number of results to return.
80            /// This is passed from the top-level search limit.
81            ///
82            /// Note: The scorer borrows only the reader, not the query. Implementations
83            /// should capture any needed query data (field, terms, etc.) during creation.
84            fn scorer<'a>(
85                &self,
86                reader: &'a SegmentReader,
87                limit: usize,
88            ) -> ScorerFuture<'a>;
89
90            /// Estimated number of matching documents in a segment (async)
91            fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a>;
92
93            /// Create a scorer synchronously (mmap/RAM only).
94            ///
95            /// Available when the `sync` feature is enabled.
96            /// Default implementation returns an error.
97            #[cfg(feature = "sync")]
98            fn scorer_sync<'a>(
99                &self,
100                reader: &'a SegmentReader,
101                limit: usize,
102            ) -> Result<Box<dyn Scorer + 'a>> {
103                let _ = (reader, limit);
104                Err(crate::error::Error::Query(
105                    "sync scorer not supported for this query type".into(),
106                ))
107            }
108
109            /// Return term info if this is a simple term query eligible for MaxScore optimization
110            ///
111            /// Returns None for complex queries (boolean, phrase, etc.)
112            fn as_term_query_info(&self) -> Option<TermQueryInfo> {
113                None
114            }
115
116            /// Return sparse term info if this is a single-dimension sparse query
117            /// eligible for MaxScore optimization
118            fn as_sparse_term_query_info(&self) -> Option<SparseTermQueryInfo> {
119                None
120            }
121        }
122
123        /// Scored document stream: a DocSet that also provides scores.
124        pub trait Scorer: super::docset::DocSet + $($send_bounds)* {
125            /// Score for current document
126            fn score(&self) -> Score;
127
128            /// Get matched positions for the current document (if available)
129            /// Returns (field_id, positions) pairs where positions are encoded as per PositionMode
130            fn matched_positions(&self) -> Option<MatchedPositions> {
131                None
132            }
133        }
134    };
135}
136
137#[cfg(not(target_arch = "wasm32"))]
138define_query_traits!(Send + Sync);
139
140#[cfg(target_arch = "wasm32")]
141define_query_traits!();
142
143impl Query for Box<dyn Query> {
144    fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
145        (**self).scorer(reader, limit)
146    }
147
148    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
149        (**self).count_estimate(reader)
150    }
151
152    fn as_term_query_info(&self) -> Option<TermQueryInfo> {
153        (**self).as_term_query_info()
154    }
155
156    fn as_sparse_term_query_info(&self) -> Option<SparseTermQueryInfo> {
157        (**self).as_sparse_term_query_info()
158    }
159
160    #[cfg(feature = "sync")]
161    fn scorer_sync<'a>(
162        &self,
163        reader: &'a SegmentReader,
164        limit: usize,
165    ) -> Result<Box<dyn Scorer + 'a>> {
166        (**self).scorer_sync(reader, limit)
167    }
168}
169
170/// Empty scorer for terms that don't exist
171pub struct EmptyScorer;
172
173impl super::docset::DocSet for EmptyScorer {
174    fn doc(&self) -> DocId {
175        crate::structures::TERMINATED
176    }
177
178    fn advance(&mut self) -> DocId {
179        crate::structures::TERMINATED
180    }
181
182    fn seek(&mut self, _target: DocId) -> DocId {
183        crate::structures::TERMINATED
184    }
185
186    fn size_hint(&self) -> u32 {
187        0
188    }
189}
190
191impl Scorer for EmptyScorer {
192    fn score(&self) -> Score {
193        0.0
194    }
195}