Skip to main content

luci/query/
multi_term.rs

1//! Shared per-segment supplier for constant-score multi-term queries.
2//!
3//! Used by `prefix`, `wildcard`, `fuzzy`, and `regexp`. Each calling
4//! query is responsible for enumerating its own matching terms in the
5//! segment (via `terms_with_prefix`, `automaton_search`, etc.); this
6//! module provides the post-enumeration scoring infrastructure:
7//!
8//! 1. Open each term's postings on demand (one FST lookup per term).
9//! 2. Wrap in [`FilterScorer`] (no BM25 setup, no norms I/O).
10//! 3. Union via [`BufferedUnionScorer`] (windowed bitset).
11//!
12//! Both `FilterScorer::score()` and `BufferedUnionScorer::score()`
13//! return a constant 1.0 — matches Lucene's
14//! `MultiTermQueryConstantScoreBlendedWrapper` semantics where the
15//! disjunction is wrapped in a `ConstantScoreScorer`.
16//!
17//! See [[optimization-multiterm-constant-score-rewrite]] and
18//! [[fix-disjunction-heap-inefficiency]].
19
20use crate::core::{DocId, FieldId, NO_MORE_DOCS, Result, Scorer, TwoPhaseIterator};
21
22use crate::query::ScorerSupplier;
23use crate::query::term::FilterScorer;
24use crate::search::buffered_union::BufferedUnionScorer;
25use crate::segment::reader::SegmentReader;
26
27/// Per-segment supplier for constant-score multi-term queries.
28pub(crate) struct ConstantScoreMultiTermSupplier {
29    field_id: FieldId,
30    /// `(term, doc_freq)` pairs as discovered by the calling query.
31    /// `doc_freq` is used only for cost estimation, not for scoring.
32    terms: Vec<(String, u32)>,
33    cost: u64,
34    /// Raw pointer to segment reader. Safe because the supplier's
35    /// lifetime is bounded by the search call which holds the reader.
36    segment_data: *const SegmentReader,
37}
38
39// SAFETY: Only used within a single search call where the
40// SegmentReader outlives the supplier.
41unsafe impl Send for ConstantScoreMultiTermSupplier {}
42
43impl ConstantScoreMultiTermSupplier {
44    pub(crate) fn new(
45        reader: &SegmentReader,
46        field_id: FieldId,
47        terms: Vec<(String, u32)>,
48    ) -> Self {
49        let cost: u64 = terms.iter().map(|(_, df)| *df as u64).sum();
50        Self {
51            field_id,
52            terms,
53            cost,
54            segment_data: reader as *const SegmentReader,
55        }
56    }
57}
58
59impl ScorerSupplier for ConstantScoreMultiTermSupplier {
60    fn cost(&self) -> u64 {
61        self.cost
62    }
63
64    fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
65        // SAFETY: the segment reader outlives this scorer supplier
66        let reader = unsafe { &*self.segment_data };
67
68        let mut scorers: Vec<Box<dyn Scorer>> = Vec::with_capacity(self.terms.len());
69        for (term, _) in &self.terms {
70            if let Some(postings) = reader.postings(self.field_id, term) {
71                scorers.push(Box::new(FilterScorer::new(postings)));
72            }
73        }
74
75        if scorers.is_empty() {
76            return Ok(Box::new(EmptyScorer));
77        }
78        // Single-term fast path: return the FilterScorer directly. Its
79        // score() already returns the constant 1.0 — no wrapper needed.
80        if scorers.len() == 1 {
81            return Ok(scorers.pop().unwrap());
82        }
83        // Multi-term path: union via BufferedUnionScorer (windowed bitset).
84        Ok(Box::new(BufferedUnionScorer::new(scorers)))
85    }
86}
87
88/// Empty scorer for the no-matches case.
89pub(crate) struct EmptyScorer;
90
91impl Scorer for EmptyScorer {
92    fn doc_id(&self) -> DocId {
93        NO_MORE_DOCS
94    }
95    fn next(&mut self) -> DocId {
96        NO_MORE_DOCS
97    }
98    fn advance(&mut self, _: DocId) -> DocId {
99        NO_MORE_DOCS
100    }
101    fn score(&mut self) -> f32 {
102        0.0
103    }
104    fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
105        None
106    }
107}