1use std::sync::Arc;
4
5use crate::segment::SegmentReader;
6use crate::structures::TERMINATED;
7use crate::{DocId, Score};
8
9use super::{
10 CountFuture, GlobalStats, MaxScoreExecutor, Query, ScoredDoc, Scorer, ScorerFuture,
11 SparseTermQueryInfo,
12};
13
14#[derive(Default, Clone)]
19pub struct BooleanQuery {
20 pub must: Vec<Arc<dyn Query>>,
21 pub should: Vec<Arc<dyn Query>>,
22 pub must_not: Vec<Arc<dyn Query>>,
23 global_stats: Option<Arc<GlobalStats>>,
25}
26
27impl std::fmt::Debug for BooleanQuery {
28 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29 f.debug_struct("BooleanQuery")
30 .field("must_count", &self.must.len())
31 .field("should_count", &self.should.len())
32 .field("must_not_count", &self.must_not.len())
33 .field("has_global_stats", &self.global_stats.is_some())
34 .finish()
35 }
36}
37
38impl std::fmt::Display for BooleanQuery {
39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 write!(f, "Boolean(")?;
41 let mut first = true;
42 for q in &self.must {
43 if !first {
44 write!(f, " ")?;
45 }
46 write!(f, "+{}", q)?;
47 first = false;
48 }
49 for q in &self.should {
50 if !first {
51 write!(f, " ")?;
52 }
53 write!(f, "{}", q)?;
54 first = false;
55 }
56 for q in &self.must_not {
57 if !first {
58 write!(f, " ")?;
59 }
60 write!(f, "-{}", q)?;
61 first = false;
62 }
63 write!(f, ")")
64 }
65}
66
67impl BooleanQuery {
68 pub fn new() -> Self {
69 Self::default()
70 }
71
72 pub fn must(mut self, query: impl Query + 'static) -> Self {
73 self.must.push(Arc::new(query));
74 self
75 }
76
77 pub fn should(mut self, query: impl Query + 'static) -> Self {
78 self.should.push(Arc::new(query));
79 self
80 }
81
82 pub fn must_not(mut self, query: impl Query + 'static) -> Self {
83 self.must_not.push(Arc::new(query));
84 self
85 }
86
87 pub fn with_global_stats(mut self, stats: Arc<GlobalStats>) -> Self {
89 self.global_stats = Some(stats);
90 self
91 }
92}
93
94fn compute_idf(
96 posting_list: &crate::structures::BlockPostingList,
97 field: crate::Field,
98 term: &[u8],
99 num_docs: f32,
100 global_stats: Option<&Arc<GlobalStats>>,
101) -> f32 {
102 if let Some(stats) = global_stats {
103 let global_idf = stats.text_idf(field, &String::from_utf8_lossy(term));
104 if global_idf > 0.0 {
105 return global_idf;
106 }
107 }
108 let doc_freq = posting_list.doc_count() as f32;
109 super::bm25_idf(doc_freq, num_docs)
110}
111
112fn prepare_text_maxscore(
115 should: &[Arc<dyn Query>],
116 reader: &SegmentReader,
117 global_stats: Option<&Arc<GlobalStats>>,
118) -> Option<(Vec<super::TermQueryInfo>, crate::Field, f32, f32)> {
119 let infos: Vec<_> = should
120 .iter()
121 .filter_map(|q| q.as_term_query_info())
122 .collect();
123 if infos.len() != should.len() {
124 return None;
125 }
126 let field = infos[0].field;
127 if !infos.iter().all(|t| t.field == field) {
128 return None;
129 }
130 let avg_field_len = global_stats
131 .map(|s| s.avg_field_len(field))
132 .unwrap_or_else(|| reader.avg_field_len(field));
133 let num_docs = reader.num_docs() as f32;
134 Some((infos, field, avg_field_len, num_docs))
135}
136
137fn finish_text_maxscore<'a>(
139 posting_lists: Vec<(crate::structures::BlockPostingList, f32)>,
140 avg_field_len: f32,
141 limit: usize,
142) -> crate::Result<Box<dyn Scorer + 'a>> {
143 if posting_lists.is_empty() {
144 return Ok(Box::new(EmptyScorer) as Box<dyn Scorer + 'a>);
145 }
146 let results = MaxScoreExecutor::text(posting_lists, avg_field_len, limit).execute_sync()?;
147 Ok(Box::new(TopKResultScorer::new(results)) as Box<dyn Scorer + 'a>)
148}
149
150async fn try_maxscore_scorer<'a>(
152 should: &[Arc<dyn Query>],
153 reader: &'a SegmentReader,
154 limit: usize,
155 global_stats: Option<&Arc<GlobalStats>>,
156) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
157 let (mut infos, _field, avg_field_len, num_docs) =
158 match prepare_text_maxscore(should, reader, global_stats) {
159 Some(v) => v,
160 None => return Ok(None),
161 };
162 let mut posting_lists = Vec::with_capacity(infos.len());
163 for info in infos.drain(..) {
164 if let Some(pl) = reader.get_postings(info.field, &info.term).await? {
165 let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
166 posting_lists.push((pl, idf));
167 }
168 }
169 Ok(Some(finish_text_maxscore(
170 posting_lists,
171 avg_field_len,
172 limit,
173 )?))
174}
175
176#[cfg(feature = "sync")]
178fn try_maxscore_scorer_sync<'a>(
179 should: &[Arc<dyn Query>],
180 reader: &'a SegmentReader,
181 limit: usize,
182 global_stats: Option<&Arc<GlobalStats>>,
183) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
184 let (mut infos, _field, avg_field_len, num_docs) =
185 match prepare_text_maxscore(should, reader, global_stats) {
186 Some(v) => v,
187 None => return Ok(None),
188 };
189 let mut posting_lists = Vec::with_capacity(infos.len());
190 for info in infos.drain(..) {
191 if let Some(pl) = reader.get_postings_sync(info.field, &info.term)? {
192 let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
193 posting_lists.push((pl, idf));
194 }
195 }
196 Ok(Some(finish_text_maxscore(
197 posting_lists,
198 avg_field_len,
199 limit,
200 )?))
201}
202
203struct PerFieldGrouping {
205 multi_term_groups: Vec<(crate::Field, f32, Vec<super::TermQueryInfo>)>,
207 fallback_indices: Vec<usize>,
209 per_field_limit: usize,
211 num_docs: f32,
212}
213
214fn prepare_per_field_grouping(
217 should: &[Arc<dyn Query>],
218 reader: &SegmentReader,
219 limit: usize,
220 global_stats: Option<&Arc<GlobalStats>>,
221) -> Option<PerFieldGrouping> {
222 let mut field_groups: rustc_hash::FxHashMap<crate::Field, Vec<(usize, super::TermQueryInfo)>> =
223 rustc_hash::FxHashMap::default();
224 let mut non_term_indices: Vec<usize> = Vec::new();
225
226 for (i, q) in should.iter().enumerate() {
227 if let Some(info) = q.as_term_query_info() {
228 field_groups.entry(info.field).or_default().push((i, info));
229 } else {
230 non_term_indices.push(i);
231 }
232 }
233
234 if !field_groups.values().any(|g| g.len() >= 2) {
235 return None;
236 }
237
238 let num_groups = field_groups.len() + non_term_indices.len();
239 let per_field_limit = limit * num_groups;
240 let num_docs = reader.num_docs() as f32;
241
242 let mut multi_term_groups = Vec::new();
243 let mut fallback_indices = non_term_indices;
244
245 for group in field_groups.into_values() {
246 if group.len() >= 2 {
247 let field = group[0].1.field;
248 let avg_field_len = global_stats
249 .map(|s| s.avg_field_len(field))
250 .unwrap_or_else(|| reader.avg_field_len(field));
251 let infos: Vec<_> = group.into_iter().map(|(_, info)| info).collect();
252 multi_term_groups.push((field, avg_field_len, infos));
253 } else {
254 fallback_indices.push(group[0].0);
255 }
256 }
257
258 Some(PerFieldGrouping {
259 multi_term_groups,
260 fallback_indices,
261 per_field_limit,
262 num_docs,
263 })
264}
265
266fn build_should_scorer<'a>(scorers: Vec<Box<dyn Scorer + 'a>>) -> Box<dyn Scorer + 'a> {
268 if scorers.is_empty() {
269 return Box::new(EmptyScorer);
270 }
271 if scorers.len() == 1 {
272 return scorers.into_iter().next().unwrap();
273 }
274 let mut scorer = BooleanScorer {
275 must: vec![],
276 should: scorers,
277 must_not: vec![],
278 current_doc: 0,
279 };
280 scorer.current_doc = scorer.find_next_match();
281 Box::new(scorer)
282}
283
284async fn try_per_field_maxscore<'a>(
290 should: &[Arc<dyn Query>],
291 reader: &'a SegmentReader,
292 limit: usize,
293 global_stats: Option<&Arc<GlobalStats>>,
294) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
295 let grouping = match prepare_per_field_grouping(should, reader, limit, global_stats) {
296 Some(g) => g,
297 None => return Ok(None),
298 };
299
300 let mut scorers: Vec<Box<dyn Scorer + 'a>> = Vec::new();
301
302 for (field, avg_field_len, infos) in &grouping.multi_term_groups {
303 let mut posting_lists = Vec::with_capacity(infos.len());
304 for info in infos {
305 if let Some(pl) = reader.get_postings(info.field, &info.term).await? {
306 let idf = compute_idf(&pl, *field, &info.term, grouping.num_docs, global_stats);
307 posting_lists.push((pl, idf));
308 }
309 }
310 if !posting_lists.is_empty() {
311 scorers.push(finish_text_maxscore(
312 posting_lists,
313 *avg_field_len,
314 grouping.per_field_limit,
315 )?);
316 }
317 }
318
319 for &idx in &grouping.fallback_indices {
320 scorers.push(should[idx].scorer(reader, limit).await?);
321 }
322
323 Ok(Some(build_should_scorer(scorers)))
324}
325
326#[cfg(feature = "sync")]
328fn try_per_field_maxscore_sync<'a>(
329 should: &[Arc<dyn Query>],
330 reader: &'a SegmentReader,
331 limit: usize,
332 global_stats: Option<&Arc<GlobalStats>>,
333) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
334 let grouping = match prepare_per_field_grouping(should, reader, limit, global_stats) {
335 Some(g) => g,
336 None => return Ok(None),
337 };
338
339 let mut scorers: Vec<Box<dyn Scorer + 'a>> = Vec::new();
340
341 for (field, avg_field_len, infos) in &grouping.multi_term_groups {
342 let mut posting_lists = Vec::with_capacity(infos.len());
343 for info in infos {
344 if let Some(pl) = reader.get_postings_sync(info.field, &info.term)? {
345 let idf = compute_idf(&pl, *field, &info.term, grouping.num_docs, global_stats);
346 posting_lists.push((pl, idf));
347 }
348 }
349 if !posting_lists.is_empty() {
350 scorers.push(finish_text_maxscore(
351 posting_lists,
352 *avg_field_len,
353 grouping.per_field_limit,
354 )?);
355 }
356 }
357
358 for &idx in &grouping.fallback_indices {
359 scorers.push(should[idx].scorer_sync(reader, limit)?);
360 }
361
362 Ok(Some(build_should_scorer(scorers)))
363}
364
365fn prepare_sparse_maxscore<'a>(
368 should: &[Arc<dyn Query>],
369 reader: &'a SegmentReader,
370 limit: usize,
371) -> Option<Result<MaxScoreExecutor<'a>, Box<dyn Scorer + 'a>>> {
372 let infos: Vec<SparseTermQueryInfo> = should
373 .iter()
374 .filter_map(|q| q.as_sparse_term_query_info())
375 .collect();
376 if infos.len() != should.len() {
377 return None;
378 }
379 let field = infos[0].field;
380 if !infos.iter().all(|t| t.field == field) {
381 return None;
382 }
383 let si = match reader.sparse_index(field) {
384 Some(si) => si,
385 None => return Some(Err(Box::new(EmptyScorer))),
386 };
387 let query_terms: Vec<(u32, f32)> = infos
388 .iter()
389 .filter(|info| si.has_dimension(info.dim_id))
390 .map(|info| (info.dim_id, info.weight))
391 .collect();
392 if query_terms.is_empty() {
393 return Some(Err(Box::new(EmptyScorer)));
394 }
395 let executor_limit = (limit as f32 * infos[0].over_fetch_factor).ceil() as usize;
396 Some(Ok(MaxScoreExecutor::sparse(
397 si,
398 query_terms,
399 executor_limit,
400 infos[0].heap_factor,
401 )))
402}
403
404fn combine_sparse_results<'a>(
406 raw: Vec<ScoredDoc>,
407 combiner: super::MultiValueCombiner,
408 field: crate::Field,
409 limit: usize,
410) -> Box<dyn Scorer + 'a> {
411 let combined = crate::segment::combine_ordinal_results(
412 raw.into_iter().map(|r| (r.doc_id, r.ordinal, r.score)),
413 combiner,
414 limit,
415 );
416 Box::new(VectorTopKResultScorer::new(combined, field.0))
417}
418
419async fn try_sparse_maxscore_scorer<'a>(
421 should: &[Arc<dyn Query>],
422 reader: &'a SegmentReader,
423 limit: usize,
424) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
425 let executor = match prepare_sparse_maxscore(should, reader, limit) {
426 None => return Ok(None),
427 Some(Err(empty)) => return Ok(Some(empty)),
428 Some(Ok(e)) => e,
429 };
430 let info = should[0].as_sparse_term_query_info().unwrap();
431 let raw = executor.execute().await?;
432 Ok(Some(combine_sparse_results(
433 raw,
434 info.combiner,
435 info.field,
436 limit,
437 )))
438}
439
440#[cfg(feature = "sync")]
442fn try_sparse_maxscore_scorer_sync<'a>(
443 should: &[Arc<dyn Query>],
444 reader: &'a SegmentReader,
445 limit: usize,
446) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
447 let executor = match prepare_sparse_maxscore(should, reader, limit) {
448 None => return Ok(None),
449 Some(Err(empty)) => return Ok(Some(empty)),
450 Some(Ok(e)) => e,
451 };
452 let info = should[0].as_sparse_term_query_info().unwrap();
453 let raw = executor.execute_sync()?;
454 Ok(Some(combine_sparse_results(
455 raw,
456 info.combiner,
457 info.field,
458 limit,
459 )))
460}
461
462impl Query for BooleanQuery {
463 fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
464 let must = self.must.clone();
466 let should = self.should.clone();
467 let must_not = self.must_not.clone();
468 let global_stats = self.global_stats.clone();
469
470 Box::pin(async move {
471 if must_not.is_empty() {
473 if must.len() == 1 && should.is_empty() {
474 return must[0].scorer(reader, limit).await;
475 }
476 if should.len() == 1 && must.is_empty() {
477 return should[0].scorer(reader, limit).await;
478 }
479 }
480
481 if must.is_empty() && must_not.is_empty() && should.len() >= 2 {
484 if let Some(scorer) =
486 try_maxscore_scorer(&should, reader, limit, global_stats.as_ref()).await?
487 {
488 return Ok(scorer);
489 }
490 if let Some(scorer) = try_sparse_maxscore_scorer(&should, reader, limit).await? {
492 return Ok(scorer);
493 }
494 if let Some(scorer) =
496 try_per_field_maxscore(&should, reader, limit, global_stats.as_ref()).await?
497 {
498 return Ok(scorer);
499 }
500 }
501
502 if !should.is_empty() && !must.is_empty() && limit < usize::MAX / 4 {
507 let should_scorer = if should.len() == 1 {
508 should[0].scorer(reader, limit).await?
509 } else {
510 let sub = BooleanQuery {
511 must: Vec::new(),
512 should: should.clone(),
513 must_not: Vec::new(),
514 global_stats: global_stats.clone(),
515 };
516 sub.scorer(reader, limit).await?
517 };
518
519 if should_scorer.size_hint() >= limit as u32 {
520 let mut predicates: Vec<super::DocPredicate<'a>> = Vec::new();
522 let mut must_verifiers: Vec<Box<dyn super::Scorer + 'a>> = Vec::new();
523 let mut filter_score = 0.0f32;
524
525 for q in &must {
526 if let Some(pred) = q.as_doc_predicate(reader) {
527 predicates.push(pred);
528 filter_score += 1.0;
529 } else {
530 must_verifiers.push(q.scorer(reader, limit).await?);
531 }
532 }
533
534 let mut must_not_verifiers: Vec<Box<dyn super::Scorer + 'a>> = Vec::new();
536 for q in &must_not {
537 if let Some(pred) = q.as_doc_predicate(reader) {
538 let negated: super::DocPredicate<'a> =
539 Box::new(move |doc_id| !pred(doc_id));
540 predicates.push(negated);
541 } else {
542 must_not_verifiers.push(q.scorer(reader, limit).await?);
543 }
544 }
545
546 log::debug!(
547 "BooleanQuery planner: push-down {} predicates + {} must verifiers + {} must_not verifiers, {} SHOULD drive (size_hint={})",
548 predicates.len(),
549 must_verifiers.len(),
550 must_not_verifiers.len(),
551 should.len(),
552 should_scorer.size_hint()
553 );
554
555 return Ok(Box::new(super::PredicatedScorer::new(
556 should_scorer,
557 predicates,
558 must_verifiers,
559 must_not_verifiers,
560 filter_score,
561 )));
562 }
563
564 let mut must_scorers = Vec::with_capacity(must.len());
567 for q in &must {
568 must_scorers.push(q.scorer(reader, limit).await?);
569 }
570
571 let mut must_not_scorers = Vec::with_capacity(must_not.len());
572 for q in &must_not {
573 must_not_scorers.push(q.scorer(reader, limit).await?);
574 }
575
576 let mut scorer = BooleanScorer {
577 must: must_scorers,
578 should: vec![should_scorer],
579 must_not: must_not_scorers,
580 current_doc: 0,
581 };
582 scorer.current_doc = scorer.find_next_match();
583 return Ok(Box::new(scorer));
584 }
585
586 let mut must_scorers = Vec::with_capacity(must.len());
588 for q in &must {
589 must_scorers.push(q.scorer(reader, limit).await?);
590 }
591
592 let mut should_scorers = Vec::with_capacity(should.len());
593 for q in &should {
594 should_scorers.push(q.scorer(reader, limit).await?);
595 }
596
597 let mut must_not_scorers = Vec::with_capacity(must_not.len());
598 for q in &must_not {
599 must_not_scorers.push(q.scorer(reader, limit).await?);
600 }
601
602 let mut scorer = BooleanScorer {
603 must: must_scorers,
604 should: should_scorers,
605 must_not: must_not_scorers,
606 current_doc: 0,
607 };
608 scorer.current_doc = scorer.find_next_match();
610 Ok(Box::new(scorer) as Box<dyn Scorer + 'a>)
611 })
612 }
613
614 #[cfg(feature = "sync")]
615 fn scorer_sync<'a>(
616 &self,
617 reader: &'a SegmentReader,
618 limit: usize,
619 ) -> crate::Result<Box<dyn Scorer + 'a>> {
620 if self.must_not.is_empty() {
622 if self.must.len() == 1 && self.should.is_empty() {
623 return self.must[0].scorer_sync(reader, limit);
624 }
625 if self.should.len() == 1 && self.must.is_empty() {
626 return self.should[0].scorer_sync(reader, limit);
627 }
628 }
629
630 if self.must.is_empty() && self.must_not.is_empty() && self.should.len() >= 2 {
632 if let Some(scorer) =
633 try_maxscore_scorer_sync(&self.should, reader, limit, self.global_stats.as_ref())?
634 {
635 return Ok(scorer);
636 }
637 if let Some(scorer) = try_sparse_maxscore_scorer_sync(&self.should, reader, limit)? {
638 return Ok(scorer);
639 }
640 if let Some(scorer) = try_per_field_maxscore_sync(
642 &self.should,
643 reader,
644 limit,
645 self.global_stats.as_ref(),
646 )? {
647 return Ok(scorer);
648 }
649 }
650
651 if !self.should.is_empty() && !self.must.is_empty() && limit < usize::MAX / 4 {
653 let should_scorer = if self.should.len() == 1 {
654 self.should[0].scorer_sync(reader, limit)?
655 } else {
656 let sub = BooleanQuery {
657 must: Vec::new(),
658 should: self.should.clone(),
659 must_not: Vec::new(),
660 global_stats: self.global_stats.clone(),
661 };
662 sub.scorer_sync(reader, limit)?
663 };
664
665 if should_scorer.size_hint() >= limit as u32 {
666 let mut predicates: Vec<super::DocPredicate<'a>> = Vec::new();
667 let mut must_verifiers: Vec<Box<dyn super::Scorer + 'a>> = Vec::new();
668 let mut filter_score = 0.0f32;
669
670 for q in &self.must {
671 if let Some(pred) = q.as_doc_predicate(reader) {
672 predicates.push(pred);
673 filter_score += 1.0;
674 } else {
675 must_verifiers.push(q.scorer_sync(reader, limit)?);
676 }
677 }
678
679 let mut must_not_verifiers: Vec<Box<dyn super::Scorer + 'a>> = Vec::new();
680 for q in &self.must_not {
681 if let Some(pred) = q.as_doc_predicate(reader) {
682 let negated: super::DocPredicate<'a> =
683 Box::new(move |doc_id| !pred(doc_id));
684 predicates.push(negated);
685 } else {
686 must_not_verifiers.push(q.scorer_sync(reader, limit)?);
687 }
688 }
689
690 log::debug!(
691 "BooleanQuery planner (sync): push-down {} predicates + {} must verifiers + {} must_not verifiers, {} SHOULD drive",
692 predicates.len(),
693 must_verifiers.len(),
694 must_not_verifiers.len(),
695 self.should.len()
696 );
697
698 return Ok(Box::new(super::PredicatedScorer::new(
699 should_scorer,
700 predicates,
701 must_verifiers,
702 must_not_verifiers,
703 filter_score,
704 )));
705 }
706
707 let mut must_scorers = Vec::with_capacity(self.must.len());
710 for q in &self.must {
711 must_scorers.push(q.scorer_sync(reader, limit)?);
712 }
713
714 let mut must_not_scorers = Vec::with_capacity(self.must_not.len());
715 for q in &self.must_not {
716 must_not_scorers.push(q.scorer_sync(reader, limit)?);
717 }
718
719 let mut scorer = BooleanScorer {
720 must: must_scorers,
721 should: vec![should_scorer],
722 must_not: must_not_scorers,
723 current_doc: 0,
724 };
725 scorer.current_doc = scorer.find_next_match();
726 return Ok(Box::new(scorer));
727 }
728
729 let mut must_scorers = Vec::with_capacity(self.must.len());
731 for q in &self.must {
732 must_scorers.push(q.scorer_sync(reader, limit)?);
733 }
734
735 let mut should_scorers = Vec::with_capacity(self.should.len());
736 for q in &self.should {
737 should_scorers.push(q.scorer_sync(reader, limit)?);
738 }
739
740 let mut must_not_scorers = Vec::with_capacity(self.must_not.len());
741 for q in &self.must_not {
742 must_not_scorers.push(q.scorer_sync(reader, limit)?);
743 }
744
745 let mut scorer = BooleanScorer {
746 must: must_scorers,
747 should: should_scorers,
748 must_not: must_not_scorers,
749 current_doc: 0,
750 };
751 scorer.current_doc = scorer.find_next_match();
752 Ok(Box::new(scorer) as Box<dyn Scorer + 'a>)
753 }
754
755 fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
756 let must = self.must.clone();
757 let should = self.should.clone();
758
759 Box::pin(async move {
760 if !must.is_empty() {
761 let mut estimates = Vec::with_capacity(must.len());
762 for q in &must {
763 estimates.push(q.count_estimate(reader).await?);
764 }
765 estimates
766 .into_iter()
767 .min()
768 .ok_or_else(|| crate::Error::Corruption("Empty must clause".to_string()))
769 } else if !should.is_empty() {
770 let mut sum = 0u32;
771 for q in &should {
772 sum = sum.saturating_add(q.count_estimate(reader).await?);
773 }
774 Ok(sum)
775 } else {
776 Ok(0)
777 }
778 })
779 }
780}
781
782struct BooleanScorer<'a> {
783 must: Vec<Box<dyn Scorer + 'a>>,
784 should: Vec<Box<dyn Scorer + 'a>>,
785 must_not: Vec<Box<dyn Scorer + 'a>>,
786 current_doc: DocId,
787}
788
789impl BooleanScorer<'_> {
790 fn find_next_match(&mut self) -> DocId {
791 if self.must.is_empty() && self.should.is_empty() {
792 return TERMINATED;
793 }
794
795 loop {
796 let candidate = if !self.must.is_empty() {
797 let mut max_doc = self
798 .must
799 .iter()
800 .map(|s| s.doc())
801 .max()
802 .unwrap_or(TERMINATED);
803
804 if max_doc == TERMINATED {
805 return TERMINATED;
806 }
807
808 loop {
809 let mut all_match = true;
810 for scorer in &mut self.must {
811 let doc = scorer.seek(max_doc);
812 if doc == TERMINATED {
813 return TERMINATED;
814 }
815 if doc > max_doc {
816 max_doc = doc;
817 all_match = false;
818 break;
819 }
820 }
821 if all_match {
822 break;
823 }
824 }
825 max_doc
826 } else {
827 self.should
828 .iter()
829 .map(|s| s.doc())
830 .filter(|&d| d != TERMINATED)
831 .min()
832 .unwrap_or(TERMINATED)
833 };
834
835 if candidate == TERMINATED {
836 return TERMINATED;
837 }
838
839 let excluded = self.must_not.iter_mut().any(|scorer| {
840 let doc = scorer.seek(candidate);
841 doc == candidate
842 });
843
844 if !excluded {
845 for scorer in &mut self.should {
847 scorer.seek(candidate);
848 }
849 self.current_doc = candidate;
850 return candidate;
851 }
852
853 if !self.must.is_empty() {
855 for scorer in &mut self.must {
856 scorer.advance();
857 }
858 } else {
859 for scorer in &mut self.should {
861 if scorer.doc() <= candidate && scorer.doc() != TERMINATED {
862 scorer.seek(candidate + 1);
863 }
864 }
865 }
866 }
867 }
868}
869
870impl super::docset::DocSet for BooleanScorer<'_> {
871 fn doc(&self) -> DocId {
872 self.current_doc
873 }
874
875 fn advance(&mut self) -> DocId {
876 if !self.must.is_empty() {
877 for scorer in &mut self.must {
878 scorer.advance();
879 }
880 } else {
881 for scorer in &mut self.should {
882 if scorer.doc() == self.current_doc {
883 scorer.advance();
884 }
885 }
886 }
887
888 self.current_doc = self.find_next_match();
889 self.current_doc
890 }
891
892 fn seek(&mut self, target: DocId) -> DocId {
893 for scorer in &mut self.must {
894 scorer.seek(target);
895 }
896
897 for scorer in &mut self.should {
898 scorer.seek(target);
899 }
900
901 self.current_doc = self.find_next_match();
902 self.current_doc
903 }
904
905 fn size_hint(&self) -> u32 {
906 if !self.must.is_empty() {
907 self.must.iter().map(|s| s.size_hint()).min().unwrap_or(0)
908 } else {
909 self.should.iter().map(|s| s.size_hint()).sum()
910 }
911 }
912}
913
914impl Scorer for BooleanScorer<'_> {
915 fn score(&self) -> Score {
916 let mut total = 0.0;
917
918 for scorer in &self.must {
919 if scorer.doc() == self.current_doc {
920 total += scorer.score();
921 }
922 }
923
924 for scorer in &self.should {
925 if scorer.doc() == self.current_doc {
926 total += scorer.score();
927 }
928 }
929
930 total
931 }
932
933 fn matched_positions(&self) -> Option<super::MatchedPositions> {
934 let mut all_positions: super::MatchedPositions = Vec::new();
935
936 for scorer in &self.must {
937 if scorer.doc() == self.current_doc
938 && let Some(positions) = scorer.matched_positions()
939 {
940 all_positions.extend(positions);
941 }
942 }
943
944 for scorer in &self.should {
945 if scorer.doc() == self.current_doc
946 && let Some(positions) = scorer.matched_positions()
947 {
948 all_positions.extend(positions);
949 }
950 }
951
952 if all_positions.is_empty() {
953 None
954 } else {
955 Some(all_positions)
956 }
957 }
958}
959
960struct TopKResultScorer {
962 results: Vec<ScoredDoc>,
963 position: usize,
964}
965
966impl TopKResultScorer {
967 fn new(mut results: Vec<ScoredDoc>) -> Self {
968 results.sort_unstable_by_key(|r| r.doc_id);
970 Self {
971 results,
972 position: 0,
973 }
974 }
975}
976
977impl super::docset::DocSet for TopKResultScorer {
978 fn doc(&self) -> DocId {
979 if self.position < self.results.len() {
980 self.results[self.position].doc_id
981 } else {
982 TERMINATED
983 }
984 }
985
986 fn advance(&mut self) -> DocId {
987 self.position += 1;
988 self.doc()
989 }
990
991 fn seek(&mut self, target: DocId) -> DocId {
992 let remaining = &self.results[self.position..];
993 self.position += remaining.partition_point(|r| r.doc_id < target);
994 self.doc()
995 }
996
997 fn size_hint(&self) -> u32 {
998 (self.results.len() - self.position) as u32
999 }
1000}
1001
1002impl Scorer for TopKResultScorer {
1003 fn score(&self) -> Score {
1004 if self.position < self.results.len() {
1005 self.results[self.position].score
1006 } else {
1007 0.0
1008 }
1009 }
1010}
1011
1012struct VectorTopKResultScorer {
1015 results: Vec<crate::segment::VectorSearchResult>,
1016 position: usize,
1017 field_id: u32,
1018}
1019
1020impl VectorTopKResultScorer {
1021 fn new(mut results: Vec<crate::segment::VectorSearchResult>, field_id: u32) -> Self {
1022 results.sort_unstable_by_key(|r| r.doc_id);
1023 Self {
1024 results,
1025 position: 0,
1026 field_id,
1027 }
1028 }
1029}
1030
1031impl super::docset::DocSet for VectorTopKResultScorer {
1032 fn doc(&self) -> DocId {
1033 if self.position < self.results.len() {
1034 self.results[self.position].doc_id
1035 } else {
1036 TERMINATED
1037 }
1038 }
1039
1040 fn advance(&mut self) -> DocId {
1041 self.position += 1;
1042 self.doc()
1043 }
1044
1045 fn seek(&mut self, target: DocId) -> DocId {
1046 let remaining = &self.results[self.position..];
1047 self.position += remaining.partition_point(|r| r.doc_id < target);
1048 self.doc()
1049 }
1050
1051 fn size_hint(&self) -> u32 {
1052 (self.results.len() - self.position) as u32
1053 }
1054}
1055
1056impl Scorer for VectorTopKResultScorer {
1057 fn score(&self) -> Score {
1058 if self.position < self.results.len() {
1059 self.results[self.position].score
1060 } else {
1061 0.0
1062 }
1063 }
1064
1065 fn matched_positions(&self) -> Option<super::MatchedPositions> {
1066 if self.position >= self.results.len() {
1067 return None;
1068 }
1069 let result = &self.results[self.position];
1070 let scored_positions: Vec<super::ScoredPosition> = result
1071 .ordinals
1072 .iter()
1073 .map(|&(ordinal, score)| super::ScoredPosition::new(ordinal, score))
1074 .collect();
1075 Some(vec![(self.field_id, scored_positions)])
1076 }
1077}
1078
1079struct EmptyScorer;
1081
1082impl super::docset::DocSet for EmptyScorer {
1083 fn doc(&self) -> DocId {
1084 TERMINATED
1085 }
1086
1087 fn advance(&mut self) -> DocId {
1088 TERMINATED
1089 }
1090
1091 fn seek(&mut self, _target: DocId) -> DocId {
1092 TERMINATED
1093 }
1094
1095 fn size_hint(&self) -> u32 {
1096 0
1097 }
1098}
1099
1100impl Scorer for EmptyScorer {
1101 fn score(&self) -> Score {
1102 0.0
1103 }
1104}
1105
1106#[cfg(test)]
1107mod tests {
1108 use super::*;
1109 use crate::dsl::Field;
1110 use crate::query::TermQuery;
1111
1112 #[test]
1113 fn test_maxscore_eligible_pure_or_same_field() {
1114 let query = BooleanQuery::new()
1116 .should(TermQuery::text(Field(0), "hello"))
1117 .should(TermQuery::text(Field(0), "world"))
1118 .should(TermQuery::text(Field(0), "foo"));
1119
1120 assert!(
1122 query
1123 .should
1124 .iter()
1125 .all(|q| q.as_term_query_info().is_some())
1126 );
1127
1128 let infos: Vec<_> = query
1130 .should
1131 .iter()
1132 .filter_map(|q| q.as_term_query_info())
1133 .collect();
1134 assert_eq!(infos.len(), 3);
1135 assert!(infos.iter().all(|i| i.field == Field(0)));
1136 }
1137
1138 #[test]
1139 fn test_maxscore_not_eligible_different_fields() {
1140 let query = BooleanQuery::new()
1142 .should(TermQuery::text(Field(0), "hello"))
1143 .should(TermQuery::text(Field(1), "world")); let infos: Vec<_> = query
1146 .should
1147 .iter()
1148 .filter_map(|q| q.as_term_query_info())
1149 .collect();
1150 assert_eq!(infos.len(), 2);
1151 assert!(infos[0].field != infos[1].field);
1153 }
1154
1155 #[test]
1156 fn test_maxscore_not_eligible_with_must() {
1157 let query = BooleanQuery::new()
1159 .must(TermQuery::text(Field(0), "required"))
1160 .should(TermQuery::text(Field(0), "hello"))
1161 .should(TermQuery::text(Field(0), "world"));
1162
1163 assert!(!query.must.is_empty());
1165 }
1166
1167 #[test]
1168 fn test_maxscore_not_eligible_with_must_not() {
1169 let query = BooleanQuery::new()
1171 .should(TermQuery::text(Field(0), "hello"))
1172 .should(TermQuery::text(Field(0), "world"))
1173 .must_not(TermQuery::text(Field(0), "excluded"));
1174
1175 assert!(!query.must_not.is_empty());
1177 }
1178
1179 #[test]
1180 fn test_maxscore_not_eligible_single_term() {
1181 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
1183
1184 assert_eq!(query.should.len(), 1);
1186 }
1187
1188 #[test]
1189 fn test_term_query_info_extraction() {
1190 let term_query = TermQuery::text(Field(42), "test");
1191 let info = term_query.as_term_query_info();
1192
1193 assert!(info.is_some());
1194 let info = info.unwrap();
1195 assert_eq!(info.field, Field(42));
1196 assert_eq!(info.term, b"test");
1197 }
1198
1199 #[test]
1200 fn test_boolean_query_no_term_info() {
1201 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
1203
1204 assert!(query.as_term_query_info().is_none());
1205 }
1206}