Skip to main content

luci/query/
term.rs

1//! TermQuery: exact term match on a field.
2//!
3//! In query context, scores with BM25. In filter context, just iterates
4//! matching doc IDs without scoring.
5//!
6//! See [[query-dsl#Term-Level Queries]] and [[architecture-query-execution#Step 6]].
7
8use crate::core::{DocId, FieldId, NO_MORE_DOCS, Result, ScoreMode, Scorer, TwoPhaseIterator};
9
10use crate::query::{BoundQuery, Query, ScorerSupplier};
11use crate::search::bm25::{BlockMaxBm25Scorer, Bm25Scorer, Bm25Weight};
12use crate::search::searcher::Searcher;
13use crate::segment::reader::SegmentReader;
14
15/// Exact term match on a single field.
16pub struct TermQuery {
17    pub field: String,
18    pub value: String,
19}
20
21impl Query for TermQuery {
22    fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
23        Ok(Box::new(BoundTermQuery {
24            field: self.field.clone(),
25            value: self.value.clone(),
26            score_mode,
27            total_docs: searcher.total_docs(),
28            doc_freq: searcher.doc_freq(&self.field, &self.value),
29            avg_field_length: searcher.avg_field_length(&self.field),
30        }))
31    }
32}
33
34pub(crate) struct BoundTermQuery {
35    pub(crate) field: String,
36    pub(crate) value: String,
37    pub(crate) score_mode: ScoreMode,
38    pub(crate) total_docs: u32,
39    #[allow(dead_code)]
40    pub(crate) doc_freq: u32,
41    pub(crate) avg_field_length: f32,
42}
43
44impl BoundTermQuery {
45    /// Resolve the field ID in this segment, or None if the field doesn't exist.
46    fn resolve_field(&self, reader: &SegmentReader) -> Option<FieldId> {
47        reader
48            .header()
49            .fields
50            .iter()
51            .find(|f| f.field_name == self.field)
52            .map(|f| f.field_id)
53    }
54}
55
56impl BoundQuery for BoundTermQuery {
57    /// Monomorphic scoring: constructs concrete scorer types and runs the
58    /// scoring loop with static dispatch, avoiding vtable overhead.
59    /// See [[optimization-scoring-throughput#Phase 1]].
60    fn bulk_score(
61        &self,
62        reader: &SegmentReader,
63        collector: &mut crate::search::collector::TopDocsCollector,
64        segment_id: crate::core::SegmentId,
65    ) -> Result<Option<u64>> {
66        let field_id = match self.resolve_field(reader) {
67            Some(id) => id,
68            None => return Ok(Some(0)),
69        };
70
71        let doc_freq = reader.doc_freq(field_id, &self.value);
72        if doc_freq == 0 {
73            return Ok(Some(0));
74        }
75
76        if !self.score_mode.needs_scores() {
77            let postings = reader.postings(field_id, &self.value).unwrap();
78            let mut scorer = FilterScorer::new(postings);
79            return Ok(Some(crate::search::score_loop(
80                &mut scorer,
81                collector,
82                segment_id,
83            )));
84        }
85
86        let weight = Bm25Weight::new(self.total_docs, doc_freq, self.avg_field_length);
87        let norms = reader.norms(field_id).unwrap();
88
89        if let Some(dl) = norms.uniform_norm() {
90            let constant =
91                crate::search::bm25::bm25_score(weight.idf, 1.0, dl, weight.avg_field_length);
92            let postings = reader.postings(field_id, &self.value).unwrap();
93            let mut scorer = ConstantBm25Scorer::new(postings, constant);
94            return Ok(Some(crate::search::score_loop(
95                &mut scorer,
96                collector,
97                segment_id,
98            )));
99        }
100
101        if let Some(block_postings) = reader.postings_block_max(field_id, &self.value) {
102            let mut scorer = BlockMaxBm25Scorer::new(weight, block_postings, norms);
103            return Ok(Some(crate::search::score_loop(
104                &mut scorer,
105                collector,
106                segment_id,
107            )));
108        }
109
110        let postings = reader.postings(field_id, &self.value).unwrap();
111        let mut scorer = Bm25Scorer::new(weight, postings, norms);
112        Ok(Some(crate::search::score_loop(
113            &mut scorer,
114            collector,
115            segment_id,
116        )))
117    }
118
119    fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
120        let field_id = match self.resolve_field(reader) {
121            Some(id) => id,
122            None => return Ok(None),
123        };
124
125        let doc_freq = reader.doc_freq(field_id, &self.value);
126        if doc_freq == 0 {
127            return Ok(None);
128        }
129
130        Ok(Some(Box::new(TermScorerSupplier {
131            field_id,
132            value: self.value.clone(),
133            score_mode: self.score_mode,
134            doc_freq,
135            total_docs: self.total_docs,
136            avg_field_length: self.avg_field_length,
137            segment_data: reader as *const SegmentReader,
138        })))
139    }
140
141    /// BM25 score explanation with full formula decomposition.
142    fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<crate::search::Explanation> {
143        use crate::search::Explanation;
144        use crate::search::bm25::{bm25_idf, bm25_score};
145
146        let field_id = match self.resolve_field(reader) {
147            Some(id) => id,
148            None => {
149                return Ok(Explanation::no_match(format!(
150                    "no field '{}' in segment",
151                    self.field
152                )));
153            }
154        };
155
156        let doc_freq = reader.doc_freq(field_id, &self.value);
157        if doc_freq == 0 {
158            return Ok(Explanation::no_match(format!(
159                "term '{}' not found in field '{}'",
160                self.value, self.field
161            )));
162        }
163
164        // Find TF for this doc
165        let mut postings = match reader.postings(field_id, &self.value) {
166            Some(p) => p,
167            None => {
168                return Ok(Explanation::no_match(format!(
169                    "term '{}' not found",
170                    self.value
171                )));
172            }
173        };
174
175        let mut tf = 0u32;
176        while let Some((did, t)) = postings.next() {
177            if did == doc {
178                tf = t;
179                break;
180            }
181            if did > doc {
182                return Ok(Explanation::no_match(format!(
183                    "doc {} does not contain term '{}'",
184                    doc.as_u32(),
185                    self.value
186                )));
187            }
188        }
189        if tf == 0 {
190            return Ok(Explanation::no_match(format!(
191                "doc {} does not contain term '{}'",
192                doc.as_u32(),
193                self.value
194            )));
195        }
196
197        // Get doc length from norms
198        let norms = reader.norms(field_id).unwrap();
199        let dl = crate::inverted::norms::decode_norm(norms.raw_byte(doc));
200        let avgdl = self.avg_field_length;
201
202        let idf = bm25_idf(self.total_docs, doc_freq);
203        let score = bm25_score(idf, tf as f32, dl, avgdl);
204
205        let idf_exp = Explanation::leaf(
206            idf,
207            format!("idf(docFreq={}, docCount={})", doc_freq, self.total_docs),
208        );
209        let tf_exp = Explanation::leaf(
210            tf as f32,
211            format!("tf(freq={} in doc {})", tf, doc.as_u32()),
212        );
213        let dl_exp = Explanation::leaf(dl, format!("dl(fieldLength={})", dl));
214        let avgdl_exp = Explanation::leaf(avgdl, format!("avgdl(avgFieldLength={:.1})", avgdl));
215
216        Ok(Explanation::matched(
217            score,
218            format!(
219                "score(freq={}) = idf * tf_norm, term={}, field={}",
220                tf, self.value, self.field
221            ),
222            vec![idf_exp, tf_exp, dl_exp, avgdl_exp],
223        ))
224    }
225}
226
227struct TermScorerSupplier {
228    field_id: FieldId,
229    value: String,
230    score_mode: ScoreMode,
231    doc_freq: u32,
232    total_docs: u32,
233    avg_field_length: f32,
234    /// Raw pointer to segment reader. Safe because the supplier's lifetime
235    /// is bounded by the search call which holds the reader.
236    segment_data: *const SegmentReader,
237}
238
239// SAFETY: TermScorerSupplier is only used within a single search call where
240// the SegmentReader outlives the supplier.
241unsafe impl Send for TermScorerSupplier {}
242
243impl ScorerSupplier for TermScorerSupplier {
244    fn cost(&self) -> u64 {
245        self.doc_freq as u64
246    }
247
248    fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
249        // SAFETY: the segment reader outlives this scorer supplier
250        let reader = unsafe { &*self.segment_data };
251
252        if !self.score_mode.needs_scores() {
253            // Filter context: no scoring needed
254            let postings = reader.postings(self.field_id, &self.value).unwrap();
255            return Ok(Box::new(FilterScorer::new(postings)));
256        }
257
258        let weight = Bm25Weight::new(self.total_docs, self.doc_freq, self.avg_field_length);
259        let norms = reader.norms(self.field_id).unwrap();
260
261        // For uniform-norm fields (keyword, boolean), use the simpler posting
262        // list with a precomputed constant score. This avoids BlockMax overhead
263        // and norms lookup — WAND can't prune uniform scores anyway.
264        if let Some(dl) = norms.uniform_norm() {
265            let constant =
266                crate::search::bm25::bm25_score(weight.idf, 1.0, dl, weight.avg_field_length);
267            let postings = reader.postings(self.field_id, &self.value).unwrap();
268            return Ok(Box::new(ConstantBm25Scorer::new(postings, constant)));
269        }
270
271        // Use BlockMaxBm25Scorer if the posting list has block-max metadata
272        if let Some(block_postings) = reader.postings_block_max(self.field_id, &self.value) {
273            return Ok(Box::new(BlockMaxBm25Scorer::new(
274                weight,
275                block_postings,
276                norms,
277            )));
278        }
279
280        let postings = reader.postings(self.field_id, &self.value).unwrap();
281        Ok(Box::new(Bm25Scorer::new(weight, postings, norms)))
282    }
283}
284
285/// BM25 scorer for uniform-norm fields (keyword, boolean).
286///
287/// Uses the simple `PostingListReader` (no block-max overhead) and returns
288/// a precomputed constant score. For keyword fields where TF=1 and
289/// field_length=1, the BM25 score is identical for every document.
290///
291/// See [[investigation-20260317-01-disjunction-performance]].
292struct ConstantBm25Scorer<'a> {
293    postings: crate::inverted::postings::PostingListReader<'a>,
294    current: DocId,
295    constant_score: f32,
296}
297
298impl<'a> ConstantBm25Scorer<'a> {
299    fn new(
300        mut postings: crate::inverted::postings::PostingListReader<'a>,
301        constant_score: f32,
302    ) -> Self {
303        let current = match postings.next() {
304            Some((id, _)) => id,
305            None => NO_MORE_DOCS,
306        };
307        Self {
308            postings,
309            current,
310            constant_score,
311        }
312    }
313}
314
315impl Scorer for ConstantBm25Scorer<'_> {
316    fn doc_id(&self) -> DocId {
317        self.current
318    }
319
320    fn next(&mut self) -> DocId {
321        self.current = match self.postings.next() {
322            Some((id, _)) => id,
323            None => NO_MORE_DOCS,
324        };
325        self.current
326    }
327
328    fn advance(&mut self, target: DocId) -> DocId {
329        while self.current < target && self.current != NO_MORE_DOCS {
330            self.next();
331        }
332        self.current
333    }
334
335    fn score(&mut self) -> f32 {
336        self.constant_score
337    }
338
339    fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
340        None
341    }
342
343    fn max_score(&self) -> f32 {
344        self.constant_score
345    }
346}
347
348/// A simple scorer that iterates doc IDs without computing scores.
349/// Used in filter context.
350pub(crate) struct FilterScorer<'a> {
351    postings: crate::inverted::postings::PostingListReader<'a>,
352    current: DocId,
353}
354
355impl<'a> FilterScorer<'a> {
356    pub(crate) fn new(mut postings: crate::inverted::postings::PostingListReader<'a>) -> Self {
357        let current = match postings.next() {
358            Some((id, _)) => id,
359            None => NO_MORE_DOCS,
360        };
361        Self { postings, current }
362    }
363}
364
365impl Scorer for FilterScorer<'_> {
366    fn doc_id(&self) -> DocId {
367        self.current
368    }
369
370    fn next(&mut self) -> DocId {
371        self.current = match self.postings.next() {
372            Some((id, _)) => id,
373            None => NO_MORE_DOCS,
374        };
375        self.current
376    }
377
378    fn advance(&mut self, target: DocId) -> DocId {
379        while self.current < target && self.current != NO_MORE_DOCS {
380            self.next();
381        }
382        self.current
383    }
384
385    fn score(&mut self) -> f32 {
386        1.0 // constant score in filter context
387    }
388
389    fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
390        None
391    }
392}
393
394#[cfg(test)]
395mod tests {
396    use super::*;
397    use crate::analysis::Token;
398    use crate::core::SegmentId;
399    use crate::mapping::{FieldType, Mapping};
400    use crate::segment::builder::SegmentBuilder;
401
402    fn make_tokens(terms: &[&str]) -> Vec<Token> {
403        terms
404            .iter()
405            .enumerate()
406            .map(|(i, t)| Token::new(*t, 0, t.len(), i as u32))
407            .collect()
408    }
409
410    fn test_schema() -> Mapping {
411        Mapping::builder()
412            .field("body", FieldType::Text)
413            .field("tag", FieldType::Keyword)
414            .build()
415    }
416
417    fn build_test_segment() -> SegmentReader {
418        let schema = test_schema();
419        let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
420        builder.add_document(
421            &[
422                (FieldId::new(0), make_tokens(&["hello", "world"])),
423                (FieldId::new(1), make_tokens(&["a"])),
424            ],
425            br#"{"body":"hello world","tag":"a"}"#,
426        );
427        builder.add_document(
428            &[
429                (FieldId::new(0), make_tokens(&["hello", "luci"])),
430                (FieldId::new(1), make_tokens(&["b"])),
431            ],
432            br#"{"body":"hello luci","tag":"b"}"#,
433        );
434        builder.add_document(
435            &[
436                (FieldId::new(0), make_tokens(&["goodbye"])),
437                (FieldId::new(1), make_tokens(&["a"])),
438            ],
439            br#"{"body":"goodbye","tag":"a"}"#,
440        );
441        SegmentReader::open(builder.build()).unwrap()
442    }
443
444    #[test]
445    fn term_query_creates_weight() {
446        let reader = build_test_segment();
447        let store = crate::search::segment_store::SegmentStore::new(
448            vec![reader],
449            crate::analysis::AnalyzerRegistry::new(),
450            None,
451            None,
452        );
453        let searcher = Searcher::new(&store);
454        let query = TermQuery {
455            field: "tag".into(),
456            value: "a".into(),
457        };
458        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
459        // Should not panic
460        drop(weight);
461    }
462
463    #[test]
464    fn term_query_scorer_iterates() {
465        let reader = build_test_segment();
466        let store = crate::search::segment_store::SegmentStore::new(
467            vec![reader],
468            crate::analysis::AnalyzerRegistry::new(),
469            None,
470            None,
471        );
472        let searcher = Searcher::new(&store);
473        let query = TermQuery {
474            field: "tag".into(),
475            value: "a".into(),
476        };
477        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
478
479        let reader = &searcher.segments()[0];
480        let supplier = weight.scorer_supplier(reader).unwrap().unwrap();
481        assert_eq!(supplier.cost(), 2); // "a" appears in 2 docs
482
483        let mut scorer = supplier.scorer().unwrap();
484        assert_eq!(scorer.doc_id(), DocId::new(0));
485        assert_eq!(scorer.next(), DocId::new(2));
486        assert_eq!(scorer.next(), NO_MORE_DOCS);
487    }
488
489    #[test]
490    fn term_query_missing_term() {
491        let reader = build_test_segment();
492        let store = crate::search::segment_store::SegmentStore::new(
493            vec![reader],
494            crate::analysis::AnalyzerRegistry::new(),
495            None,
496            None,
497        );
498        let searcher = Searcher::new(&store);
499        let query = TermQuery {
500            field: "tag".into(),
501            value: "nonexistent".into(),
502        };
503        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
504
505        let reader = &searcher.segments()[0];
506        let supplier = weight.scorer_supplier(reader).unwrap();
507        assert!(supplier.is_none());
508    }
509
510    #[test]
511    fn term_query_missing_field() {
512        let reader = build_test_segment();
513        let store = crate::search::segment_store::SegmentStore::new(
514            vec![reader],
515            crate::analysis::AnalyzerRegistry::new(),
516            None,
517            None,
518        );
519        let searcher = Searcher::new(&store);
520        let query = TermQuery {
521            field: "nosuchfield".into(),
522            value: "x".into(),
523        };
524        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
525
526        let reader = &searcher.segments()[0];
527        let supplier = weight.scorer_supplier(reader).unwrap();
528        assert!(supplier.is_none());
529    }
530
531    #[test]
532    fn term_query_filter_context() {
533        let reader = build_test_segment();
534        let store = crate::search::segment_store::SegmentStore::new(
535            vec![reader],
536            crate::analysis::AnalyzerRegistry::new(),
537            None,
538            None,
539        );
540        let searcher = Searcher::new(&store);
541        let query = TermQuery {
542            field: "tag".into(),
543            value: "a".into(),
544        };
545        let weight = query.bind(&searcher, ScoreMode::CompleteNoScores).unwrap();
546
547        let reader = &searcher.segments()[0];
548        let supplier = weight.scorer_supplier(reader).unwrap().unwrap();
549        let mut scorer = supplier.scorer().unwrap();
550
551        assert_eq!(scorer.doc_id(), DocId::new(0));
552        assert_eq!(scorer.score(), 1.0); // constant score in filter mode
553        assert_eq!(scorer.next(), DocId::new(2));
554        assert_eq!(scorer.next(), NO_MORE_DOCS);
555    }
556}