luci/core/scorer.rs
1use crate::core::DocId;
2
3/// Iterates over matching documents and produces scores.
4///
5/// The `Scorer` owns its iteration state. There is no separate
6/// `DocIterator` — `doc_id()`, `next()`, `advance()` are on this trait
7/// directly. See [[architecture-query-execution#Core Traits]].
8///
9/// Object-safe: used as `Box<dyn Scorer>` in the query execution engine.
10/// Vtable dispatch cost is amortized by block-of-128 posting processing.
11///
12/// `Send` but not `Sync`: scorers are mutable (iteration state) and used
13/// by one thread at a time within a rayon segment task.
14pub trait Scorer: Send {
15 /// Current document ID. Valid after `next()` or `advance()` has been
16 /// called. Before the first call, the value is unspecified.
17 fn doc_id(&self) -> DocId;
18
19 /// Advance to the next matching document.
20 /// Returns the new doc ID, or `NO_MORE_DOCS` if exhausted.
21 fn next(&mut self) -> DocId;
22
23 /// Advance to the first matching document at or after `target`.
24 /// Returns the new doc ID, or `NO_MORE_DOCS` if no match >= target.
25 ///
26 /// Contract: `target` must be > current `doc_id()`. Callers must not
27 /// call `advance()` with a target <= the current position.
28 fn advance(&mut self, target: DocId) -> DocId;
29
30 /// Score of the current document.
31 /// Only meaningful when `ScoreMode::needs_scores()` is true.
32 fn score(&mut self) -> f32;
33
34 /// Optional two-phase iteration support.
35 ///
36 /// If `Some`, the scorer's `next()`/`advance()` produce an approximate
37 /// (superset) of true matches. The caller must invoke `matches()` on the
38 /// returned [`TwoPhaseIterator`] to confirm each candidate.
39 ///
40 /// The return value is determined at construction time and cached —
41 /// implementations return the same variant on every call. See
42 /// [[architecture-query-execution#Two-Phase Iteration Lifecycle]].
43 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator>;
44
45 /// Upper bound on the score this scorer can produce for any document.
46 /// Used by WAND/MaxScore to skip non-competitive documents.
47 /// Default: `f32::MAX` (no bound known).
48 fn max_score(&self) -> f32 {
49 f32::MAX
50 }
51
52 /// Upper bound on score for documents in the block containing `doc`.
53 /// Returns `max_score()` by default (no block-level information).
54 /// Overridden by BlockMaxBm25Scorer for tighter per-block bounds.
55 ///
56 /// See [[architecture-query-execution#WAND / MaxScore Optimization]].
57 fn block_max_score(&mut self, _doc: DocId) -> f32 {
58 self.max_score()
59 }
60
61 /// Inform this scorer of the minimum competitive score. Documents scoring
62 /// below this threshold may be skipped. Default: no-op.
63 ///
64 /// Used by TopDocsCollector to propagate the heap minimum back to
65 /// WAND-aware scorers. See [[architecture-query-execution#WAND / MaxScore Optimization]].
66 fn set_min_competitive_score(&mut self, _min_score: f32) {}
67}
68
69/// Two-phase verification for approximate scorers.
70///
71/// Used by phrase queries, geo distance queries, regex queries, and other
72/// scorers where the fast approximation (posting list intersection) produces
73/// a superset of true matches and each candidate needs expensive
74/// verification. See [[architecture-query-execution#Core Traits]].
75pub trait TwoPhaseIterator: Send {
76 /// Verify that the current document (from the parent `Scorer`) truly
77 /// matches. Called after the scorer's `next()`/`advance()` positions on
78 /// a candidate.
79 fn matches(&mut self) -> bool;
80
81 /// Estimated cost of a single `matches()` call, relative to the cost
82 /// of `advance()`. Used to order multiple two-phase scorers
83 /// cheapest-first in conjunctions.
84 fn match_cost(&self) -> f32;
85}
86
87#[cfg(test)]
88mod tests {
89 use super::*;
90 use crate::core::NO_MORE_DOCS;
91
92 /// Scorer that immediately reports no matches. Validates that the
93 /// Scorer trait is object-safe (`Box<dyn Scorer>` compiles).
94 struct EmptyScorer;
95
96 impl Scorer for EmptyScorer {
97 fn doc_id(&self) -> DocId {
98 NO_MORE_DOCS
99 }
100
101 fn next(&mut self) -> DocId {
102 NO_MORE_DOCS
103 }
104
105 fn advance(&mut self, _target: DocId) -> DocId {
106 NO_MORE_DOCS
107 }
108
109 fn score(&mut self) -> f32 {
110 0.0
111 }
112
113 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
114 None
115 }
116 }
117
118 /// Scorer backed by a sorted Vec of DocIds. Validates the next()/advance()
119 /// contract.
120 struct VecScorer {
121 docs: Vec<DocId>,
122 pos: usize,
123 }
124
125 impl VecScorer {
126 fn new(docs: Vec<DocId>) -> Self {
127 Self { docs, pos: 0 }
128 }
129
130 fn current(&self) -> DocId {
131 if self.pos < self.docs.len() {
132 self.docs[self.pos]
133 } else {
134 NO_MORE_DOCS
135 }
136 }
137 }
138
139 impl Scorer for VecScorer {
140 fn doc_id(&self) -> DocId {
141 self.current()
142 }
143
144 fn next(&mut self) -> DocId {
145 if self.pos < self.docs.len() {
146 self.pos += 1;
147 }
148 self.current()
149 }
150
151 fn advance(&mut self, target: DocId) -> DocId {
152 while self.pos < self.docs.len() && self.docs[self.pos] < target {
153 self.pos += 1;
154 }
155 self.current()
156 }
157
158 fn score(&mut self) -> f32 {
159 1.0
160 }
161
162 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
163 None
164 }
165 }
166
167 #[test]
168 fn empty_scorer_is_object_safe() {
169 let scorer: Box<dyn Scorer> = Box::new(EmptyScorer);
170 assert_eq!(scorer.doc_id(), NO_MORE_DOCS);
171 }
172
173 #[test]
174 fn empty_scorer_returns_no_more_docs() {
175 let mut scorer = EmptyScorer;
176 assert_eq!(scorer.next(), NO_MORE_DOCS);
177 assert_eq!(scorer.advance(DocId(0)), NO_MORE_DOCS);
178 }
179
180 #[test]
181 fn vec_scorer_iterates_in_order() {
182 let mut scorer = VecScorer::new(vec![DocId(1), DocId(5), DocId(10)]);
183 assert_eq!(scorer.doc_id(), DocId(1));
184 assert_eq!(scorer.next(), DocId(5));
185 assert_eq!(scorer.next(), DocId(10));
186 assert_eq!(scorer.next(), NO_MORE_DOCS);
187 }
188
189 #[test]
190 fn vec_scorer_advance_skips() {
191 let mut scorer = VecScorer::new(vec![DocId(1), DocId(5), DocId(10), DocId(20)]);
192 assert_eq!(scorer.advance(DocId(5)), DocId(5));
193 assert_eq!(scorer.advance(DocId(15)), DocId(20));
194 assert_eq!(scorer.advance(DocId(21)), NO_MORE_DOCS);
195 }
196
197 #[test]
198 fn vec_scorer_advance_past_end() {
199 let mut scorer = VecScorer::new(vec![DocId(1), DocId(2)]);
200 assert_eq!(scorer.advance(DocId(100)), NO_MORE_DOCS);
201 }
202
203 #[test]
204 fn vec_scorer_default_max_score() {
205 let scorer = VecScorer::new(vec![]);
206 assert_eq!(scorer.max_score(), f32::MAX);
207 }
208}