Skip to main content

hermes_core/query/
traits.rs

1//! Query and Scorer traits with async support
2//!
3//! Provides the core abstractions for search queries and document scoring.
4
5use std::future::Future;
6use std::pin::Pin;
7
8use crate::segment::SegmentReader;
9use crate::{DocId, Result, Score};
10
11/// BM25 parameters
12#[derive(Debug, Clone, Copy)]
13pub struct Bm25Params {
14    /// Term frequency saturation parameter (typically 1.2-2.0)
15    pub k1: f32,
16    /// Length normalization parameter (typically 0.75)
17    pub b: f32,
18}
19
20impl Default for Bm25Params {
21    fn default() -> Self {
22        Self { k1: 1.2, b: 0.75 }
23    }
24}
25
26/// Future type for scorer creation
27#[cfg(not(target_arch = "wasm32"))]
28pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + Send + 'a>>;
29#[cfg(target_arch = "wasm32")]
30pub type ScorerFuture<'a> = Pin<Box<dyn Future<Output = Result<Box<dyn Scorer + 'a>>> + 'a>>;
31
32/// Future type for count estimation
33#[cfg(not(target_arch = "wasm32"))]
34pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + Send + 'a>>;
35#[cfg(target_arch = "wasm32")]
36pub type CountFuture<'a> = Pin<Box<dyn Future<Output = Result<u32>> + 'a>>;
37
38/// Info for WAND-optimizable term queries
39#[derive(Debug, Clone)]
40pub struct TermQueryInfo {
41    /// Field being searched
42    pub field: crate::dsl::Field,
43    /// Term bytes (lowercase)
44    pub term: Vec<u8>,
45}
46
47/// Matched positions for a field (field_id, list of scored positions)
48/// Each position includes its individual score contribution
49pub type MatchedPositions = Vec<(u32, Vec<super::ScoredPosition>)>;
50
51macro_rules! define_query_traits {
52    ($($send_bounds:tt)*) => {
53        /// A search query (async)
54        ///
55        /// Note: `scorer` takes `&self` (not `&'a self`) so that scorers don't borrow the query.
56        /// This enables query composition - queries can create sub-queries locally and get their scorers.
57        /// Implementations must clone/capture any data they need during scorer creation.
58        pub trait Query: $($send_bounds)* {
59            /// Create a scorer for this query against a single segment (async)
60            ///
61            /// The `limit` parameter specifies the maximum number of results to return.
62            /// This is passed from the top-level search limit.
63            ///
64            /// Note: The scorer borrows only the reader, not the query. Implementations
65            /// should capture any needed query data (field, terms, etc.) during creation.
66            fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a>;
67
68            /// Estimated number of matching documents in a segment (async)
69            fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a>;
70
71            /// Return term info if this is a simple term query eligible for WAND optimization
72            ///
73            /// Returns None for complex queries (boolean, phrase, etc.)
74            fn as_term_query_info(&self) -> Option<TermQueryInfo> {
75                None
76            }
77        }
78
79        /// Scorer that iterates over matching documents and computes scores
80        pub trait Scorer: $($send_bounds)* {
81            /// Current document ID, or TERMINATED if exhausted
82            fn doc(&self) -> DocId;
83
84            /// Score for current document
85            fn score(&self) -> Score;
86
87            /// Advance to next document
88            fn advance(&mut self) -> DocId;
89
90            /// Seek to first doc >= target
91            fn seek(&mut self, target: DocId) -> DocId;
92
93            /// Size hint for remaining documents
94            fn size_hint(&self) -> u32;
95
96            /// Get matched positions for the current document (if available)
97            /// Returns (field_id, positions) pairs where positions are encoded as per PositionMode
98            fn matched_positions(&self) -> Option<MatchedPositions> {
99                None
100            }
101        }
102    };
103}
104
105#[cfg(not(target_arch = "wasm32"))]
106define_query_traits!(Send + Sync);
107
108#[cfg(target_arch = "wasm32")]
109define_query_traits!();
110
111impl Query for Box<dyn Query> {
112    fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
113        (**self).scorer(reader, limit)
114    }
115
116    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
117        (**self).count_estimate(reader)
118    }
119
120    fn as_term_query_info(&self) -> Option<TermQueryInfo> {
121        (**self).as_term_query_info()
122    }
123}
124
125/// Empty scorer for terms that don't exist
126pub struct EmptyScorer;
127
128impl Scorer for EmptyScorer {
129    fn doc(&self) -> DocId {
130        crate::structures::TERMINATED
131    }
132
133    fn score(&self) -> Score {
134        0.0
135    }
136
137    fn advance(&mut self) -> DocId {
138        crate::structures::TERMINATED
139    }
140
141    fn seek(&mut self, _target: DocId) -> DocId {
142        crate::structures::TERMINATED
143    }
144
145    fn size_hint(&self) -> u32 {
146        0
147    }
148}