Skip to main content

hermes_core/query/
traits.rs

1//! Query and Scorer traits with async support
2//!
3//! Provides the core abstractions for search queries and document scoring.
4
5use std::future::Future;
6use std::pin::Pin;
7
8use crate::segment::SegmentReader;
9use crate::{DocId, Result, Score};
10
11/// BM25 parameters
12#[derive(Debug, Clone, Copy)]
13pub struct Bm25Params {
14    /// Term frequency saturation parameter (typically 1.2-2.0)
15    pub k1: f32,
16    /// Length normalization parameter (typically 0.75)
17    pub b: f32,
18}
19
20impl Default for Bm25Params {
21    fn default() -> Self {
22        Self { k1: 1.2, b: 0.75 }
23    }
24}
25
26/// Future type for scorer creation
27#[cfg(not(target_arch = "wasm32"))]
28pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + Send + 'a>>;
29#[cfg(target_arch = "wasm32")]
30pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + 'a>>;
31
32/// Future type for count estimation
33#[cfg(not(target_arch = "wasm32"))]
34pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + Send + 'a>>;
35#[cfg(target_arch = "wasm32")]
36pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + 'a>>;
37
38/// Info for WAND-optimizable term queries
39#[derive(Debug, Clone)]
40pub struct TermQueryInfo {
41    /// Field being searched
42    pub field: crate::dsl::Field,
43    /// Term bytes (lowercase)
44    pub term: Vec<u8>,
45}
46
47/// A search query (async)
48///
49/// Note: `scorer` takes `&self` (not `&'a self`) so that scorers don't borrow the query.
50/// This enables query composition - queries can create sub-queries locally and get their scorers.
51/// Implementations must clone/capture any data they need during scorer creation.
52#[cfg(not(target_arch = "wasm32"))]
53pub trait Query: Send + Sync {
54    /// Create a scorer for this query against a single segment (async)
55    ///
56    /// The `limit` parameter specifies the maximum number of results to return.
57    /// This is passed from the top-level search limit.
58    ///
59    /// Note: The scorer borrows only the reader, not the query. Implementations
60    /// should capture any needed query data (field, terms, etc.) during creation.
61    fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a>;
62
63    /// Estimated number of matching documents in a segment (async)
64    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a>;
65
66    /// Return term info if this is a simple term query eligible for WAND optimization
67    ///
68    /// Returns None for complex queries (boolean, phrase, etc.)
69    fn as_term_query_info(&self) -> Option<TermQueryInfo> {
70        None
71    }
72}
73
74/// A search query (async) - WASM version without Send bounds
75#[cfg(target_arch = "wasm32")]
76pub trait Query {
77    /// Create a scorer for this query against a single segment (async)
78    ///
79    /// The `limit` parameter specifies the maximum number of results to return.
80    /// This is passed from the top-level search limit.
81    fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a>;
82
83    /// Estimated number of matching documents in a segment (async)
84    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a>;
85
86    /// Return term info if this is a simple term query eligible for WAND optimization
87    fn as_term_query_info(&self) -> Option<TermQueryInfo> {
88        None
89    }
90}
91
92impl Query for Box<dyn Query> {
93    fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
94        (**self).scorer(reader, limit)
95    }
96
97    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
98        (**self).count_estimate(reader)
99    }
100
101    fn as_term_query_info(&self) -> Option<TermQueryInfo> {
102        (**self).as_term_query_info()
103    }
104}
105
106/// Matched positions for a field (field_id, list of scored positions)
107/// Each position includes its individual score contribution
108pub type MatchedPositions = Vec<(u32, Vec<super::ScoredPosition>)>;
109
110/// Scorer that iterates over matching documents and computes scores
111#[cfg(not(target_arch = "wasm32"))]
112pub trait Scorer: Send {
113    /// Current document ID, or TERMINATED if exhausted
114    fn doc(&self) -> DocId;
115
116    /// Score for current document
117    fn score(&self) -> Score;
118
119    /// Advance to next document
120    fn advance(&mut self) -> DocId;
121
122    /// Seek to first doc >= target
123    fn seek(&mut self, target: DocId) -> DocId;
124
125    /// Size hint for remaining documents
126    fn size_hint(&self) -> u32;
127
128    /// Get matched positions for the current document (if available)
129    /// Returns (field_id, positions) pairs where positions are encoded as per PositionMode
130    fn matched_positions(&self) -> Option<MatchedPositions> {
131        None
132    }
133}
134
135/// Scorer that iterates over matching documents and computes scores (WASM version)
136#[cfg(target_arch = "wasm32")]
137pub trait Scorer {
138    /// Current document ID, or TERMINATED if exhausted
139    fn doc(&self) -> DocId;
140
141    /// Score for current document
142    fn score(&self) -> Score;
143
144    /// Advance to next document
145    fn advance(&mut self) -> DocId;
146
147    /// Seek to first doc >= target
148    fn seek(&mut self, target: DocId) -> DocId;
149
150    /// Size hint for remaining documents
151    fn size_hint(&self) -> u32;
152
153    /// Get matched positions for the current document (if available)
154    fn matched_positions(&self) -> Option<MatchedPositions> {
155        None
156    }
157}
158
159/// Empty scorer for terms that don't exist
160pub struct EmptyScorer;
161
162impl Scorer for EmptyScorer {
163    fn doc(&self) -> DocId {
164        crate::structures::TERMINATED
165    }
166
167    fn score(&self) -> Score {
168        0.0
169    }
170
171    fn advance(&mut self) -> DocId {
172        crate::structures::TERMINATED
173    }
174
175    fn seek(&mut self, _target: DocId) -> DocId {
176        crate::structures::TERMINATED
177    }
178
179    fn size_hint(&self) -> u32 {
180        0
181    }
182}