1use crate::core::{DocId, FieldId, NO_MORE_DOCS, Result, ScoreMode, Scorer, TwoPhaseIterator};
9
10use crate::query::{BoundQuery, Query, ScorerSupplier};
11use crate::search::bm25::{BlockMaxBm25Scorer, Bm25Scorer, Bm25Weight};
12use crate::search::searcher::Searcher;
13use crate::segment::reader::SegmentReader;
14
15pub struct TermQuery {
17 pub field: String,
18 pub value: String,
19}
20
21impl Query for TermQuery {
22 fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
23 Ok(Box::new(BoundTermQuery {
24 field: self.field.clone(),
25 value: self.value.clone(),
26 score_mode,
27 total_docs: searcher.total_docs(),
28 doc_freq: searcher.doc_freq(&self.field, &self.value),
29 avg_field_length: searcher.avg_field_length(&self.field),
30 }))
31 }
32}
33
34pub(crate) struct BoundTermQuery {
35 pub(crate) field: String,
36 pub(crate) value: String,
37 pub(crate) score_mode: ScoreMode,
38 pub(crate) total_docs: u32,
39 #[allow(dead_code)]
40 pub(crate) doc_freq: u32,
41 pub(crate) avg_field_length: f32,
42}
43
44impl BoundTermQuery {
45 fn resolve_field(&self, reader: &SegmentReader) -> Option<FieldId> {
47 reader
48 .header()
49 .fields
50 .iter()
51 .find(|f| f.field_name == self.field)
52 .map(|f| f.field_id)
53 }
54}
55
56impl BoundQuery for BoundTermQuery {
57 fn bulk_score(
61 &self,
62 reader: &SegmentReader,
63 collector: &mut crate::search::collector::TopDocsCollector,
64 segment_id: crate::core::SegmentId,
65 ) -> Result<Option<u64>> {
66 let field_id = match self.resolve_field(reader) {
67 Some(id) => id,
68 None => return Ok(Some(0)),
69 };
70
71 let doc_freq = reader.doc_freq(field_id, &self.value);
72 if doc_freq == 0 {
73 return Ok(Some(0));
74 }
75
76 if !self.score_mode.needs_scores() {
77 let postings = reader.postings(field_id, &self.value).unwrap();
78 let mut scorer = FilterScorer::new(postings);
79 return Ok(Some(crate::search::score_loop(
80 &mut scorer,
81 collector,
82 segment_id,
83 )));
84 }
85
86 let weight = Bm25Weight::new(self.total_docs, doc_freq, self.avg_field_length);
87 let norms = reader.norms(field_id).unwrap();
88
89 if let Some(dl) = norms.uniform_norm() {
90 let constant =
91 crate::search::bm25::bm25_score(weight.idf, 1.0, dl, weight.avg_field_length);
92 let postings = reader.postings(field_id, &self.value).unwrap();
93 let mut scorer = ConstantBm25Scorer::new(postings, constant);
94 return Ok(Some(crate::search::score_loop(
95 &mut scorer,
96 collector,
97 segment_id,
98 )));
99 }
100
101 if let Some(block_postings) = reader.postings_block_max(field_id, &self.value) {
102 let mut scorer = BlockMaxBm25Scorer::new(weight, block_postings, norms);
103 return Ok(Some(crate::search::score_loop(
104 &mut scorer,
105 collector,
106 segment_id,
107 )));
108 }
109
110 let postings = reader.postings(field_id, &self.value).unwrap();
111 let mut scorer = Bm25Scorer::new(weight, postings, norms);
112 Ok(Some(crate::search::score_loop(
113 &mut scorer,
114 collector,
115 segment_id,
116 )))
117 }
118
119 fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
120 let field_id = match self.resolve_field(reader) {
121 Some(id) => id,
122 None => return Ok(None),
123 };
124
125 let doc_freq = reader.doc_freq(field_id, &self.value);
126 if doc_freq == 0 {
127 return Ok(None);
128 }
129
130 Ok(Some(Box::new(TermScorerSupplier {
131 field_id,
132 value: self.value.clone(),
133 score_mode: self.score_mode,
134 doc_freq,
135 total_docs: self.total_docs,
136 avg_field_length: self.avg_field_length,
137 segment_data: reader as *const SegmentReader,
138 })))
139 }
140
141 fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<crate::search::Explanation> {
143 use crate::search::Explanation;
144 use crate::search::bm25::{bm25_idf, bm25_score};
145
146 let field_id = match self.resolve_field(reader) {
147 Some(id) => id,
148 None => {
149 return Ok(Explanation::no_match(format!(
150 "no field '{}' in segment",
151 self.field
152 )));
153 }
154 };
155
156 let doc_freq = reader.doc_freq(field_id, &self.value);
157 if doc_freq == 0 {
158 return Ok(Explanation::no_match(format!(
159 "term '{}' not found in field '{}'",
160 self.value, self.field
161 )));
162 }
163
164 let mut postings = match reader.postings(field_id, &self.value) {
166 Some(p) => p,
167 None => {
168 return Ok(Explanation::no_match(format!(
169 "term '{}' not found",
170 self.value
171 )));
172 }
173 };
174
175 let mut tf = 0u32;
176 while let Some((did, t)) = postings.next() {
177 if did == doc {
178 tf = t;
179 break;
180 }
181 if did > doc {
182 return Ok(Explanation::no_match(format!(
183 "doc {} does not contain term '{}'",
184 doc.as_u32(),
185 self.value
186 )));
187 }
188 }
189 if tf == 0 {
190 return Ok(Explanation::no_match(format!(
191 "doc {} does not contain term '{}'",
192 doc.as_u32(),
193 self.value
194 )));
195 }
196
197 let norms = reader.norms(field_id).unwrap();
199 let dl = crate::inverted::norms::decode_norm(norms.raw_byte(doc));
200 let avgdl = self.avg_field_length;
201
202 let idf = bm25_idf(self.total_docs, doc_freq);
203 let score = bm25_score(idf, tf as f32, dl, avgdl);
204
205 let idf_exp = Explanation::leaf(
206 idf,
207 format!("idf(docFreq={}, docCount={})", doc_freq, self.total_docs),
208 );
209 let tf_exp = Explanation::leaf(
210 tf as f32,
211 format!("tf(freq={} in doc {})", tf, doc.as_u32()),
212 );
213 let dl_exp = Explanation::leaf(dl, format!("dl(fieldLength={})", dl));
214 let avgdl_exp = Explanation::leaf(avgdl, format!("avgdl(avgFieldLength={:.1})", avgdl));
215
216 Ok(Explanation::matched(
217 score,
218 format!(
219 "score(freq={}) = idf * tf_norm, term={}, field={}",
220 tf, self.value, self.field
221 ),
222 vec![idf_exp, tf_exp, dl_exp, avgdl_exp],
223 ))
224 }
225}
226
227struct TermScorerSupplier {
228 field_id: FieldId,
229 value: String,
230 score_mode: ScoreMode,
231 doc_freq: u32,
232 total_docs: u32,
233 avg_field_length: f32,
234 segment_data: *const SegmentReader,
237}
238
239unsafe impl Send for TermScorerSupplier {}
242
243impl ScorerSupplier for TermScorerSupplier {
244 fn cost(&self) -> u64 {
245 self.doc_freq as u64
246 }
247
248 fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
249 let reader = unsafe { &*self.segment_data };
251
252 if !self.score_mode.needs_scores() {
253 let postings = reader.postings(self.field_id, &self.value).unwrap();
255 return Ok(Box::new(FilterScorer::new(postings)));
256 }
257
258 let weight = Bm25Weight::new(self.total_docs, self.doc_freq, self.avg_field_length);
259 let norms = reader.norms(self.field_id).unwrap();
260
261 if let Some(dl) = norms.uniform_norm() {
265 let constant =
266 crate::search::bm25::bm25_score(weight.idf, 1.0, dl, weight.avg_field_length);
267 let postings = reader.postings(self.field_id, &self.value).unwrap();
268 return Ok(Box::new(ConstantBm25Scorer::new(postings, constant)));
269 }
270
271 if let Some(block_postings) = reader.postings_block_max(self.field_id, &self.value) {
273 return Ok(Box::new(BlockMaxBm25Scorer::new(
274 weight,
275 block_postings,
276 norms,
277 )));
278 }
279
280 let postings = reader.postings(self.field_id, &self.value).unwrap();
281 Ok(Box::new(Bm25Scorer::new(weight, postings, norms)))
282 }
283}
284
285struct ConstantBm25Scorer<'a> {
293 postings: crate::inverted::postings::PostingListReader<'a>,
294 current: DocId,
295 constant_score: f32,
296}
297
298impl<'a> ConstantBm25Scorer<'a> {
299 fn new(
300 mut postings: crate::inverted::postings::PostingListReader<'a>,
301 constant_score: f32,
302 ) -> Self {
303 let current = match postings.next() {
304 Some((id, _)) => id,
305 None => NO_MORE_DOCS,
306 };
307 Self {
308 postings,
309 current,
310 constant_score,
311 }
312 }
313}
314
315impl Scorer for ConstantBm25Scorer<'_> {
316 fn doc_id(&self) -> DocId {
317 self.current
318 }
319
320 fn next(&mut self) -> DocId {
321 self.current = match self.postings.next() {
322 Some((id, _)) => id,
323 None => NO_MORE_DOCS,
324 };
325 self.current
326 }
327
328 fn advance(&mut self, target: DocId) -> DocId {
329 while self.current < target && self.current != NO_MORE_DOCS {
330 self.next();
331 }
332 self.current
333 }
334
335 fn score(&mut self) -> f32 {
336 self.constant_score
337 }
338
339 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
340 None
341 }
342
343 fn max_score(&self) -> f32 {
344 self.constant_score
345 }
346}
347
348pub(crate) struct FilterScorer<'a> {
351 postings: crate::inverted::postings::PostingListReader<'a>,
352 current: DocId,
353}
354
355impl<'a> FilterScorer<'a> {
356 pub(crate) fn new(mut postings: crate::inverted::postings::PostingListReader<'a>) -> Self {
357 let current = match postings.next() {
358 Some((id, _)) => id,
359 None => NO_MORE_DOCS,
360 };
361 Self { postings, current }
362 }
363}
364
365impl Scorer for FilterScorer<'_> {
366 fn doc_id(&self) -> DocId {
367 self.current
368 }
369
370 fn next(&mut self) -> DocId {
371 self.current = match self.postings.next() {
372 Some((id, _)) => id,
373 None => NO_MORE_DOCS,
374 };
375 self.current
376 }
377
378 fn advance(&mut self, target: DocId) -> DocId {
379 while self.current < target && self.current != NO_MORE_DOCS {
380 self.next();
381 }
382 self.current
383 }
384
385 fn score(&mut self) -> f32 {
386 1.0 }
388
389 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
390 None
391 }
392}
393
394#[cfg(test)]
395mod tests {
396 use super::*;
397 use crate::analysis::Token;
398 use crate::core::SegmentId;
399 use crate::mapping::{FieldType, Mapping};
400 use crate::segment::builder::SegmentBuilder;
401
402 fn make_tokens(terms: &[&str]) -> Vec<Token> {
403 terms
404 .iter()
405 .enumerate()
406 .map(|(i, t)| Token::new(*t, 0, t.len(), i as u32))
407 .collect()
408 }
409
410 fn test_schema() -> Mapping {
411 Mapping::builder()
412 .field("body", FieldType::Text)
413 .field("tag", FieldType::Keyword)
414 .build()
415 }
416
417 fn build_test_segment() -> SegmentReader {
418 let schema = test_schema();
419 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
420 builder.add_document(
421 &[
422 (FieldId::new(0), make_tokens(&["hello", "world"])),
423 (FieldId::new(1), make_tokens(&["a"])),
424 ],
425 br#"{"body":"hello world","tag":"a"}"#,
426 );
427 builder.add_document(
428 &[
429 (FieldId::new(0), make_tokens(&["hello", "luci"])),
430 (FieldId::new(1), make_tokens(&["b"])),
431 ],
432 br#"{"body":"hello luci","tag":"b"}"#,
433 );
434 builder.add_document(
435 &[
436 (FieldId::new(0), make_tokens(&["goodbye"])),
437 (FieldId::new(1), make_tokens(&["a"])),
438 ],
439 br#"{"body":"goodbye","tag":"a"}"#,
440 );
441 SegmentReader::open(builder.build()).unwrap()
442 }
443
444 #[test]
445 fn term_query_creates_weight() {
446 let reader = build_test_segment();
447 let store = crate::search::segment_store::SegmentStore::new(
448 vec![reader],
449 crate::analysis::AnalyzerRegistry::new(),
450 None,
451 None,
452 );
453 let searcher = Searcher::new(&store);
454 let query = TermQuery {
455 field: "tag".into(),
456 value: "a".into(),
457 };
458 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
459 drop(weight);
461 }
462
463 #[test]
464 fn term_query_scorer_iterates() {
465 let reader = build_test_segment();
466 let store = crate::search::segment_store::SegmentStore::new(
467 vec![reader],
468 crate::analysis::AnalyzerRegistry::new(),
469 None,
470 None,
471 );
472 let searcher = Searcher::new(&store);
473 let query = TermQuery {
474 field: "tag".into(),
475 value: "a".into(),
476 };
477 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
478
479 let reader = &searcher.segments()[0];
480 let supplier = weight.scorer_supplier(reader).unwrap().unwrap();
481 assert_eq!(supplier.cost(), 2); let mut scorer = supplier.scorer().unwrap();
484 assert_eq!(scorer.doc_id(), DocId::new(0));
485 assert_eq!(scorer.next(), DocId::new(2));
486 assert_eq!(scorer.next(), NO_MORE_DOCS);
487 }
488
489 #[test]
490 fn term_query_missing_term() {
491 let reader = build_test_segment();
492 let store = crate::search::segment_store::SegmentStore::new(
493 vec![reader],
494 crate::analysis::AnalyzerRegistry::new(),
495 None,
496 None,
497 );
498 let searcher = Searcher::new(&store);
499 let query = TermQuery {
500 field: "tag".into(),
501 value: "nonexistent".into(),
502 };
503 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
504
505 let reader = &searcher.segments()[0];
506 let supplier = weight.scorer_supplier(reader).unwrap();
507 assert!(supplier.is_none());
508 }
509
510 #[test]
511 fn term_query_missing_field() {
512 let reader = build_test_segment();
513 let store = crate::search::segment_store::SegmentStore::new(
514 vec![reader],
515 crate::analysis::AnalyzerRegistry::new(),
516 None,
517 None,
518 );
519 let searcher = Searcher::new(&store);
520 let query = TermQuery {
521 field: "nosuchfield".into(),
522 value: "x".into(),
523 };
524 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
525
526 let reader = &searcher.segments()[0];
527 let supplier = weight.scorer_supplier(reader).unwrap();
528 assert!(supplier.is_none());
529 }
530
531 #[test]
532 fn term_query_filter_context() {
533 let reader = build_test_segment();
534 let store = crate::search::segment_store::SegmentStore::new(
535 vec![reader],
536 crate::analysis::AnalyzerRegistry::new(),
537 None,
538 None,
539 );
540 let searcher = Searcher::new(&store);
541 let query = TermQuery {
542 field: "tag".into(),
543 value: "a".into(),
544 };
545 let weight = query.bind(&searcher, ScoreMode::CompleteNoScores).unwrap();
546
547 let reader = &searcher.segments()[0];
548 let supplier = weight.scorer_supplier(reader).unwrap().unwrap();
549 let mut scorer = supplier.scorer().unwrap();
550
551 assert_eq!(scorer.doc_id(), DocId::new(0));
552 assert_eq!(scorer.score(), 1.0); assert_eq!(scorer.next(), DocId::new(2));
554 assert_eq!(scorer.next(), NO_MORE_DOCS);
555 }
556}