luci/query/multi_term.rs
1//! Shared per-segment supplier for constant-score multi-term queries.
2//!
3//! Used by `prefix`, `wildcard`, `fuzzy`, and `regexp`. Each calling
4//! query is responsible for enumerating its own matching terms in the
5//! segment (via `terms_with_prefix`, `automaton_search`, etc.); this
6//! module provides the post-enumeration scoring infrastructure:
7//!
8//! 1. Open each term's postings on demand (one FST lookup per term).
9//! 2. Wrap in [`FilterScorer`] (no BM25 setup, no norms I/O).
10//! 3. Union via [`BufferedUnionScorer`] (windowed bitset).
11//!
12//! Both `FilterScorer::score()` and `BufferedUnionScorer::score()`
13//! return a constant 1.0 — matches Lucene's
14//! `MultiTermQueryConstantScoreBlendedWrapper` semantics where the
15//! disjunction is wrapped in a `ConstantScoreScorer`.
16//!
17//! See [[optimization-multiterm-constant-score-rewrite]] and
18//! [[fix-disjunction-heap-inefficiency]].
19
20use crate::core::{DocId, FieldId, NO_MORE_DOCS, Result, Scorer, TwoPhaseIterator};
21
22use crate::query::ScorerSupplier;
23use crate::query::term::FilterScorer;
24use crate::search::buffered_union::BufferedUnionScorer;
25use crate::segment::reader::SegmentReader;
26
27/// Per-segment supplier for constant-score multi-term queries.
28pub(crate) struct ConstantScoreMultiTermSupplier {
29 field_id: FieldId,
30 /// `(term, doc_freq)` pairs as discovered by the calling query.
31 /// `doc_freq` is used only for cost estimation, not for scoring.
32 terms: Vec<(String, u32)>,
33 cost: u64,
34 /// Raw pointer to segment reader. Safe because the supplier's
35 /// lifetime is bounded by the search call which holds the reader.
36 segment_data: *const SegmentReader,
37}
38
39// SAFETY: Only used within a single search call where the
40// SegmentReader outlives the supplier.
41unsafe impl Send for ConstantScoreMultiTermSupplier {}
42
43impl ConstantScoreMultiTermSupplier {
44 pub(crate) fn new(
45 reader: &SegmentReader,
46 field_id: FieldId,
47 terms: Vec<(String, u32)>,
48 ) -> Self {
49 let cost: u64 = terms.iter().map(|(_, df)| *df as u64).sum();
50 Self {
51 field_id,
52 terms,
53 cost,
54 segment_data: reader as *const SegmentReader,
55 }
56 }
57}
58
59impl ScorerSupplier for ConstantScoreMultiTermSupplier {
60 fn cost(&self) -> u64 {
61 self.cost
62 }
63
64 fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
65 // SAFETY: the segment reader outlives this scorer supplier
66 let reader = unsafe { &*self.segment_data };
67
68 let mut scorers: Vec<Box<dyn Scorer>> = Vec::with_capacity(self.terms.len());
69 for (term, _) in &self.terms {
70 if let Some(postings) = reader.postings(self.field_id, term) {
71 scorers.push(Box::new(FilterScorer::new(postings)));
72 }
73 }
74
75 if scorers.is_empty() {
76 return Ok(Box::new(EmptyScorer));
77 }
78 // Single-term fast path: return the FilterScorer directly. Its
79 // score() already returns the constant 1.0 — no wrapper needed.
80 if scorers.len() == 1 {
81 return Ok(scorers.pop().unwrap());
82 }
83 // Multi-term path: union via BufferedUnionScorer (windowed bitset).
84 Ok(Box::new(BufferedUnionScorer::new(scorers)))
85 }
86}
87
88/// Empty scorer for the no-matches case.
89pub(crate) struct EmptyScorer;
90
91impl Scorer for EmptyScorer {
92 fn doc_id(&self) -> DocId {
93 NO_MORE_DOCS
94 }
95 fn next(&mut self) -> DocId {
96 NO_MORE_DOCS
97 }
98 fn advance(&mut self, _: DocId) -> DocId {
99 NO_MORE_DOCS
100 }
101 fn score(&mut self) -> f32 {
102 0.0
103 }
104 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
105 None
106 }
107}