Skip to main content

luci/core/
scorer.rs

1use crate::core::DocId;
2
3/// Iterates over matching documents and produces scores.
4///
5/// The `Scorer` owns its iteration state. There is no separate
6/// `DocIterator` — `doc_id()`, `next()`, `advance()` are on this trait
7/// directly. See [[architecture-query-execution#Core Traits]].
8///
9/// Object-safe: used as `Box<dyn Scorer>` in the query execution engine.
10/// Vtable dispatch cost is amortized by block-of-128 posting processing.
11///
12/// `Send` but not `Sync`: scorers are mutable (iteration state) and used
13/// by one thread at a time within a rayon segment task.
14pub trait Scorer: Send {
15    /// Current document ID. Valid after `next()` or `advance()` has been
16    /// called. Before the first call, the value is unspecified.
17    fn doc_id(&self) -> DocId;
18
19    /// Advance to the next matching document.
20    /// Returns the new doc ID, or `NO_MORE_DOCS` if exhausted.
21    fn next(&mut self) -> DocId;
22
23    /// Advance to the first matching document at or after `target`.
24    /// Returns the new doc ID, or `NO_MORE_DOCS` if no match >= target.
25    ///
26    /// Contract: `target` must be > current `doc_id()`. Callers must not
27    /// call `advance()` with a target <= the current position.
28    fn advance(&mut self, target: DocId) -> DocId;
29
30    /// Score of the current document.
31    /// Only meaningful when `ScoreMode::needs_scores()` is true.
32    fn score(&mut self) -> f32;
33
34    /// Optional two-phase iteration support.
35    ///
36    /// If `Some`, the scorer's `next()`/`advance()` produce an approximate
37    /// (superset) of true matches. The caller must invoke `matches()` on the
38    /// returned [`TwoPhaseIterator`] to confirm each candidate.
39    ///
40    /// The return value is determined at construction time and cached —
41    /// implementations return the same variant on every call. See
42    /// [[architecture-query-execution#Two-Phase Iteration Lifecycle]].
43    fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator>;
44
45    /// Upper bound on the score this scorer can produce for any document.
46    /// Used by WAND/MaxScore to skip non-competitive documents.
47    /// Default: `f32::MAX` (no bound known).
48    fn max_score(&self) -> f32 {
49        f32::MAX
50    }
51
52    /// Upper bound on score for documents in the block containing `doc`.
53    /// Returns `max_score()` by default (no block-level information).
54    /// Overridden by BlockMaxBm25Scorer for tighter per-block bounds.
55    ///
56    /// See [[architecture-query-execution#WAND / MaxScore Optimization]].
57    fn block_max_score(&mut self, _doc: DocId) -> f32 {
58        self.max_score()
59    }
60
61    /// Inform this scorer of the minimum competitive score. Documents scoring
62    /// below this threshold may be skipped. Default: no-op.
63    ///
64    /// Used by TopDocsCollector to propagate the heap minimum back to
65    /// WAND-aware scorers. See [[architecture-query-execution#WAND / MaxScore Optimization]].
66    fn set_min_competitive_score(&mut self, _min_score: f32) {}
67}
68
69/// Two-phase verification for approximate scorers.
70///
71/// Used by phrase queries, geo distance queries, regex queries, and other
72/// scorers where the fast approximation (posting list intersection) produces
73/// a superset of true matches and each candidate needs expensive
74/// verification. See [[architecture-query-execution#Core Traits]].
75pub trait TwoPhaseIterator: Send {
76    /// Verify that the current document (from the parent `Scorer`) truly
77    /// matches. Called after the scorer's `next()`/`advance()` positions on
78    /// a candidate.
79    fn matches(&mut self) -> bool;
80
81    /// Estimated cost of a single `matches()` call, relative to the cost
82    /// of `advance()`. Used to order multiple two-phase scorers
83    /// cheapest-first in conjunctions.
84    fn match_cost(&self) -> f32;
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90    use crate::core::NO_MORE_DOCS;
91
92    /// Scorer that immediately reports no matches. Validates that the
93    /// Scorer trait is object-safe (`Box<dyn Scorer>` compiles).
94    struct EmptyScorer;
95
96    impl Scorer for EmptyScorer {
97        fn doc_id(&self) -> DocId {
98            NO_MORE_DOCS
99        }
100
101        fn next(&mut self) -> DocId {
102            NO_MORE_DOCS
103        }
104
105        fn advance(&mut self, _target: DocId) -> DocId {
106            NO_MORE_DOCS
107        }
108
109        fn score(&mut self) -> f32 {
110            0.0
111        }
112
113        fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
114            None
115        }
116    }
117
118    /// Scorer backed by a sorted Vec of DocIds. Validates the next()/advance()
119    /// contract.
120    struct VecScorer {
121        docs: Vec<DocId>,
122        pos: usize,
123    }
124
125    impl VecScorer {
126        fn new(docs: Vec<DocId>) -> Self {
127            Self { docs, pos: 0 }
128        }
129
130        fn current(&self) -> DocId {
131            if self.pos < self.docs.len() {
132                self.docs[self.pos]
133            } else {
134                NO_MORE_DOCS
135            }
136        }
137    }
138
139    impl Scorer for VecScorer {
140        fn doc_id(&self) -> DocId {
141            self.current()
142        }
143
144        fn next(&mut self) -> DocId {
145            if self.pos < self.docs.len() {
146                self.pos += 1;
147            }
148            self.current()
149        }
150
151        fn advance(&mut self, target: DocId) -> DocId {
152            while self.pos < self.docs.len() && self.docs[self.pos] < target {
153                self.pos += 1;
154            }
155            self.current()
156        }
157
158        fn score(&mut self) -> f32 {
159            1.0
160        }
161
162        fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
163            None
164        }
165    }
166
167    #[test]
168    fn empty_scorer_is_object_safe() {
169        let scorer: Box<dyn Scorer> = Box::new(EmptyScorer);
170        assert_eq!(scorer.doc_id(), NO_MORE_DOCS);
171    }
172
173    #[test]
174    fn empty_scorer_returns_no_more_docs() {
175        let mut scorer = EmptyScorer;
176        assert_eq!(scorer.next(), NO_MORE_DOCS);
177        assert_eq!(scorer.advance(DocId(0)), NO_MORE_DOCS);
178    }
179
180    #[test]
181    fn vec_scorer_iterates_in_order() {
182        let mut scorer = VecScorer::new(vec![DocId(1), DocId(5), DocId(10)]);
183        assert_eq!(scorer.doc_id(), DocId(1));
184        assert_eq!(scorer.next(), DocId(5));
185        assert_eq!(scorer.next(), DocId(10));
186        assert_eq!(scorer.next(), NO_MORE_DOCS);
187    }
188
189    #[test]
190    fn vec_scorer_advance_skips() {
191        let mut scorer = VecScorer::new(vec![DocId(1), DocId(5), DocId(10), DocId(20)]);
192        assert_eq!(scorer.advance(DocId(5)), DocId(5));
193        assert_eq!(scorer.advance(DocId(15)), DocId(20));
194        assert_eq!(scorer.advance(DocId(21)), NO_MORE_DOCS);
195    }
196
197    #[test]
198    fn vec_scorer_advance_past_end() {
199        let mut scorer = VecScorer::new(vec![DocId(1), DocId(2)]);
200        assert_eq!(scorer.advance(DocId(100)), NO_MORE_DOCS);
201    }
202
203    #[test]
204    fn vec_scorer_default_max_score() {
205        let scorer = VecScorer::new(vec![]);
206        assert_eq!(scorer.max_score(), f32::MAX);
207    }
208}