1use std::sync::Arc;
4
5use crate::segment::SegmentReader;
6use crate::structures::TERMINATED;
7use crate::{DocId, Score};
8
9use super::{
10 CountFuture, GlobalStats, MaxScoreExecutor, Query, ScoredDoc, Scorer, ScorerFuture,
11 SparseTermQueryInfo,
12};
13
14#[derive(Default, Clone)]
19pub struct BooleanQuery {
20 pub must: Vec<Arc<dyn Query>>,
21 pub should: Vec<Arc<dyn Query>>,
22 pub must_not: Vec<Arc<dyn Query>>,
23 global_stats: Option<Arc<GlobalStats>>,
25}
26
27impl std::fmt::Debug for BooleanQuery {
28 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29 f.debug_struct("BooleanQuery")
30 .field("must_count", &self.must.len())
31 .field("should_count", &self.should.len())
32 .field("must_not_count", &self.must_not.len())
33 .field("has_global_stats", &self.global_stats.is_some())
34 .finish()
35 }
36}
37
38impl std::fmt::Display for BooleanQuery {
39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 write!(f, "Boolean(")?;
41 let mut first = true;
42 for q in &self.must {
43 if !first {
44 write!(f, " ")?;
45 }
46 write!(f, "+{}", q)?;
47 first = false;
48 }
49 for q in &self.should {
50 if !first {
51 write!(f, " ")?;
52 }
53 write!(f, "{}", q)?;
54 first = false;
55 }
56 for q in &self.must_not {
57 if !first {
58 write!(f, " ")?;
59 }
60 write!(f, "-{}", q)?;
61 first = false;
62 }
63 write!(f, ")")
64 }
65}
66
67impl BooleanQuery {
68 pub fn new() -> Self {
69 Self::default()
70 }
71
72 pub fn must(mut self, query: impl Query + 'static) -> Self {
73 self.must.push(Arc::new(query));
74 self
75 }
76
77 pub fn should(mut self, query: impl Query + 'static) -> Self {
78 self.should.push(Arc::new(query));
79 self
80 }
81
82 pub fn must_not(mut self, query: impl Query + 'static) -> Self {
83 self.must_not.push(Arc::new(query));
84 self
85 }
86
87 pub fn with_global_stats(mut self, stats: Arc<GlobalStats>) -> Self {
89 self.global_stats = Some(stats);
90 self
91 }
92}
93
94fn compute_idf(
96 posting_list: &crate::structures::BlockPostingList,
97 field: crate::Field,
98 term: &[u8],
99 num_docs: f32,
100 global_stats: Option<&Arc<GlobalStats>>,
101) -> f32 {
102 if let Some(stats) = global_stats {
103 let global_idf = stats.text_idf(field, &String::from_utf8_lossy(term));
104 if global_idf > 0.0 {
105 return global_idf;
106 }
107 }
108 let doc_freq = posting_list.doc_count() as f32;
109 super::bm25_idf(doc_freq, num_docs)
110}
111
112fn prepare_text_maxscore(
115 should: &[Arc<dyn Query>],
116 reader: &SegmentReader,
117 global_stats: Option<&Arc<GlobalStats>>,
118) -> Option<(Vec<super::TermQueryInfo>, crate::Field, f32, f32)> {
119 let infos: Vec<_> = should
120 .iter()
121 .filter_map(|q| q.as_term_query_info())
122 .collect();
123 if infos.len() != should.len() {
124 return None;
125 }
126 let field = infos[0].field;
127 if !infos.iter().all(|t| t.field == field) {
128 return None;
129 }
130 let avg_field_len = global_stats
131 .map(|s| s.avg_field_len(field))
132 .unwrap_or_else(|| reader.avg_field_len(field));
133 let num_docs = reader.num_docs() as f32;
134 Some((infos, field, avg_field_len, num_docs))
135}
136
137fn finish_text_maxscore<'a>(
139 posting_lists: Vec<(crate::structures::BlockPostingList, f32)>,
140 avg_field_len: f32,
141 limit: usize,
142) -> crate::Result<Box<dyn Scorer + 'a>> {
143 if posting_lists.is_empty() {
144 return Ok(Box::new(EmptyScorer) as Box<dyn Scorer + 'a>);
145 }
146 let results = MaxScoreExecutor::text(posting_lists, avg_field_len, limit).execute_sync()?;
147 Ok(Box::new(TopKResultScorer::new(results)) as Box<dyn Scorer + 'a>)
148}
149
150async fn try_maxscore_scorer<'a>(
152 should: &[Arc<dyn Query>],
153 reader: &'a SegmentReader,
154 limit: usize,
155 global_stats: Option<&Arc<GlobalStats>>,
156) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
157 let (mut infos, _field, avg_field_len, num_docs) =
158 match prepare_text_maxscore(should, reader, global_stats) {
159 Some(v) => v,
160 None => return Ok(None),
161 };
162 let mut posting_lists = Vec::with_capacity(infos.len());
163 for info in infos.drain(..) {
164 if let Some(pl) = reader.get_postings(info.field, &info.term).await? {
165 let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
166 posting_lists.push((pl, idf));
167 }
168 }
169 Ok(Some(finish_text_maxscore(
170 posting_lists,
171 avg_field_len,
172 limit,
173 )?))
174}
175
176#[cfg(feature = "sync")]
178fn try_maxscore_scorer_sync<'a>(
179 should: &[Arc<dyn Query>],
180 reader: &'a SegmentReader,
181 limit: usize,
182 global_stats: Option<&Arc<GlobalStats>>,
183) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
184 let (mut infos, _field, avg_field_len, num_docs) =
185 match prepare_text_maxscore(should, reader, global_stats) {
186 Some(v) => v,
187 None => return Ok(None),
188 };
189 let mut posting_lists = Vec::with_capacity(infos.len());
190 for info in infos.drain(..) {
191 if let Some(pl) = reader.get_postings_sync(info.field, &info.term)? {
192 let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
193 posting_lists.push((pl, idf));
194 }
195 }
196 Ok(Some(finish_text_maxscore(
197 posting_lists,
198 avg_field_len,
199 limit,
200 )?))
201}
202
203struct PerFieldGrouping {
205 multi_term_groups: Vec<(crate::Field, f32, Vec<super::TermQueryInfo>)>,
207 fallback_indices: Vec<usize>,
209 per_field_limit: usize,
211 num_docs: f32,
212}
213
214fn prepare_per_field_grouping(
217 should: &[Arc<dyn Query>],
218 reader: &SegmentReader,
219 limit: usize,
220 global_stats: Option<&Arc<GlobalStats>>,
221) -> Option<PerFieldGrouping> {
222 let mut field_groups: rustc_hash::FxHashMap<crate::Field, Vec<(usize, super::TermQueryInfo)>> =
223 rustc_hash::FxHashMap::default();
224 let mut non_term_indices: Vec<usize> = Vec::new();
225
226 for (i, q) in should.iter().enumerate() {
227 if let Some(info) = q.as_term_query_info() {
228 field_groups.entry(info.field).or_default().push((i, info));
229 } else {
230 non_term_indices.push(i);
231 }
232 }
233
234 if !field_groups.values().any(|g| g.len() >= 2) {
235 return None;
236 }
237
238 let num_groups = field_groups.len() + non_term_indices.len();
239 let per_field_limit = limit * num_groups;
240 let num_docs = reader.num_docs() as f32;
241
242 let mut multi_term_groups = Vec::new();
243 let mut fallback_indices = non_term_indices;
244
245 for group in field_groups.into_values() {
246 if group.len() >= 2 {
247 let field = group[0].1.field;
248 let avg_field_len = global_stats
249 .map(|s| s.avg_field_len(field))
250 .unwrap_or_else(|| reader.avg_field_len(field));
251 let infos: Vec<_> = group.into_iter().map(|(_, info)| info).collect();
252 multi_term_groups.push((field, avg_field_len, infos));
253 } else {
254 fallback_indices.push(group[0].0);
255 }
256 }
257
258 Some(PerFieldGrouping {
259 multi_term_groups,
260 fallback_indices,
261 per_field_limit,
262 num_docs,
263 })
264}
265
266fn build_should_scorer<'a>(scorers: Vec<Box<dyn Scorer + 'a>>) -> Box<dyn Scorer + 'a> {
268 if scorers.is_empty() {
269 return Box::new(EmptyScorer);
270 }
271 if scorers.len() == 1 {
272 return scorers.into_iter().next().unwrap();
273 }
274 let mut scorer = BooleanScorer {
275 must: vec![],
276 should: scorers,
277 must_not: vec![],
278 current_doc: 0,
279 };
280 scorer.current_doc = scorer.find_next_match();
281 Box::new(scorer)
282}
283
284async fn try_per_field_maxscore<'a>(
290 should: &[Arc<dyn Query>],
291 reader: &'a SegmentReader,
292 limit: usize,
293 global_stats: Option<&Arc<GlobalStats>>,
294) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
295 let grouping = match prepare_per_field_grouping(should, reader, limit, global_stats) {
296 Some(g) => g,
297 None => return Ok(None),
298 };
299
300 let mut scorers: Vec<Box<dyn Scorer + 'a>> = Vec::new();
301
302 for (field, avg_field_len, infos) in &grouping.multi_term_groups {
303 let mut posting_lists = Vec::with_capacity(infos.len());
304 for info in infos {
305 if let Some(pl) = reader.get_postings(info.field, &info.term).await? {
306 let idf = compute_idf(&pl, *field, &info.term, grouping.num_docs, global_stats);
307 posting_lists.push((pl, idf));
308 }
309 }
310 if !posting_lists.is_empty() {
311 scorers.push(finish_text_maxscore(
312 posting_lists,
313 *avg_field_len,
314 grouping.per_field_limit,
315 )?);
316 }
317 }
318
319 for &idx in &grouping.fallback_indices {
320 scorers.push(should[idx].scorer(reader, limit).await?);
321 }
322
323 Ok(Some(build_should_scorer(scorers)))
324}
325
326#[cfg(feature = "sync")]
328fn try_per_field_maxscore_sync<'a>(
329 should: &[Arc<dyn Query>],
330 reader: &'a SegmentReader,
331 limit: usize,
332 global_stats: Option<&Arc<GlobalStats>>,
333) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
334 let grouping = match prepare_per_field_grouping(should, reader, limit, global_stats) {
335 Some(g) => g,
336 None => return Ok(None),
337 };
338
339 let mut scorers: Vec<Box<dyn Scorer + 'a>> = Vec::new();
340
341 for (field, avg_field_len, infos) in &grouping.multi_term_groups {
342 let mut posting_lists = Vec::with_capacity(infos.len());
343 for info in infos {
344 if let Some(pl) = reader.get_postings_sync(info.field, &info.term)? {
345 let idf = compute_idf(&pl, *field, &info.term, grouping.num_docs, global_stats);
346 posting_lists.push((pl, idf));
347 }
348 }
349 if !posting_lists.is_empty() {
350 scorers.push(finish_text_maxscore(
351 posting_lists,
352 *avg_field_len,
353 grouping.per_field_limit,
354 )?);
355 }
356 }
357
358 for &idx in &grouping.fallback_indices {
359 scorers.push(should[idx].scorer_sync(reader, limit)?);
360 }
361
362 Ok(Some(build_should_scorer(scorers)))
363}
364
365fn prepare_sparse_maxscore<'a>(
368 should: &[Arc<dyn Query>],
369 reader: &'a SegmentReader,
370 limit: usize,
371) -> Option<Result<MaxScoreExecutor<'a>, Box<dyn Scorer + 'a>>> {
372 let infos: Vec<SparseTermQueryInfo> = should
373 .iter()
374 .filter_map(|q| q.as_sparse_term_query_info())
375 .collect();
376 if infos.len() != should.len() {
377 return None;
378 }
379 let field = infos[0].field;
380 if !infos.iter().all(|t| t.field == field) {
381 return None;
382 }
383 let si = match reader.sparse_index(field) {
384 Some(si) => si,
385 None => return Some(Err(Box::new(EmptyScorer))),
386 };
387 let query_terms: Vec<(u32, f32)> = infos
388 .iter()
389 .filter(|info| si.has_dimension(info.dim_id))
390 .map(|info| (info.dim_id, info.weight))
391 .collect();
392 if query_terms.is_empty() {
393 return Some(Err(Box::new(EmptyScorer)));
394 }
395 let executor_limit = (limit as f32 * infos[0].over_fetch_factor).ceil() as usize;
396 Some(Ok(MaxScoreExecutor::sparse(
397 si,
398 query_terms,
399 executor_limit,
400 infos[0].heap_factor,
401 )))
402}
403
404fn combine_sparse_results<'a>(
406 raw: Vec<ScoredDoc>,
407 combiner: super::MultiValueCombiner,
408 field: crate::Field,
409 limit: usize,
410) -> Box<dyn Scorer + 'a> {
411 let combined = crate::segment::combine_ordinal_results(
412 raw.into_iter().map(|r| (r.doc_id, r.ordinal, r.score)),
413 combiner,
414 limit,
415 );
416 Box::new(VectorTopKResultScorer::new(combined, field.0))
417}
418
419async fn try_sparse_maxscore_scorer<'a>(
421 should: &[Arc<dyn Query>],
422 reader: &'a SegmentReader,
423 limit: usize,
424) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
425 let executor = match prepare_sparse_maxscore(should, reader, limit) {
426 None => return Ok(None),
427 Some(Err(empty)) => return Ok(Some(empty)),
428 Some(Ok(e)) => e,
429 };
430 let info = should[0].as_sparse_term_query_info().unwrap();
431 let raw = executor.execute().await?;
432 Ok(Some(combine_sparse_results(
433 raw,
434 info.combiner,
435 info.field,
436 limit,
437 )))
438}
439
440#[cfg(feature = "sync")]
442fn try_sparse_maxscore_scorer_sync<'a>(
443 should: &[Arc<dyn Query>],
444 reader: &'a SegmentReader,
445 limit: usize,
446) -> crate::Result<Option<Box<dyn Scorer + 'a>>> {
447 let executor = match prepare_sparse_maxscore(should, reader, limit) {
448 None => return Ok(None),
449 Some(Err(empty)) => return Ok(Some(empty)),
450 Some(Ok(e)) => e,
451 };
452 let info = should[0].as_sparse_term_query_info().unwrap();
453 let raw = executor.execute_sync()?;
454 Ok(Some(combine_sparse_results(
455 raw,
456 info.combiner,
457 info.field,
458 limit,
459 )))
460}
461
462fn compile_filter_predicates<'a>(
470 must: &[std::sync::Arc<dyn Query>],
471 must_not: &[std::sync::Arc<dyn Query>],
472 reader: &'a SegmentReader,
473) -> Option<(super::DocPredicate<'a>, f32)> {
474 if must.is_empty() || !must.iter().all(|q| q.is_filter()) {
475 return None;
476 }
477
478 let predicates: Vec<_> = must
479 .iter()
480 .filter_map(|q| q.as_doc_predicate(reader))
481 .collect();
482 if predicates.len() != must.len() {
483 return None;
484 }
485
486 let filter_score = must.len() as f32;
487
488 let combined: super::DocPredicate<'a> = if predicates.len() == 1 {
489 predicates.into_iter().next().unwrap()
490 } else {
491 Box::new(move |doc_id| predicates.iter().all(|p| p(doc_id)))
492 };
493
494 if must_not.is_empty() {
495 return Some((combined, filter_score));
496 }
497
498 let not_predicates: Vec<_> = must_not
501 .iter()
502 .filter_map(|q| q.as_doc_predicate(reader))
503 .collect();
504 if not_predicates.len() != must_not.len() {
505 return None;
506 }
507
508 Some((
509 Box::new(move |doc_id| combined(doc_id) && not_predicates.iter().all(|p| !p(doc_id))),
510 filter_score,
511 ))
512}
513
514impl Query for BooleanQuery {
515 fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
516 let must = self.must.clone();
518 let should = self.should.clone();
519 let must_not = self.must_not.clone();
520 let global_stats = self.global_stats.clone();
521
522 Box::pin(async move {
523 if must_not.is_empty() {
525 if must.len() == 1 && should.is_empty() {
526 return must[0].scorer(reader, limit).await;
527 }
528 if should.len() == 1 && must.is_empty() {
529 return should[0].scorer(reader, limit).await;
530 }
531 }
532
533 if must.is_empty() && must_not.is_empty() && should.len() >= 2 {
536 if let Some(scorer) =
538 try_maxscore_scorer(&should, reader, limit, global_stats.as_ref()).await?
539 {
540 return Ok(scorer);
541 }
542 if let Some(scorer) = try_sparse_maxscore_scorer(&should, reader, limit).await? {
544 return Ok(scorer);
545 }
546 if let Some(scorer) =
548 try_per_field_maxscore(&should, reader, limit, global_stats.as_ref()).await?
549 {
550 return Ok(scorer);
551 }
552 }
553
554 if !should.is_empty()
559 && limit < usize::MAX / 4
560 && let Some((predicate, filter_score)) =
561 compile_filter_predicates(&must, &must_not, reader)
562 {
563 let should_scorer = if should.len() == 1 {
564 should[0].scorer(reader, limit).await?
565 } else {
566 let sub = BooleanQuery {
567 must: Vec::new(),
568 should: should.clone(),
569 must_not: Vec::new(),
570 global_stats: global_stats.clone(),
571 };
572 sub.scorer(reader, limit).await?
573 };
574
575 if should_scorer.size_hint() >= limit as u32 {
578 log::debug!(
579 "BooleanQuery planner: filter push-down, {} MUST filters → predicate, {} SHOULD scorers drive (size_hint={})",
580 must.len(),
581 should.len(),
582 should_scorer.size_hint()
583 );
584 return Ok(Box::new(super::PredicatedScorer::new(
585 should_scorer,
586 predicate,
587 filter_score,
588 )));
589 }
590 }
591
592 let mut must_scorers = Vec::with_capacity(must.len());
594 for q in &must {
595 must_scorers.push(q.scorer(reader, limit).await?);
596 }
597
598 let mut should_scorers = Vec::with_capacity(should.len());
599 for q in &should {
600 should_scorers.push(q.scorer(reader, limit).await?);
601 }
602
603 let mut must_not_scorers = Vec::with_capacity(must_not.len());
604 for q in &must_not {
605 must_not_scorers.push(q.scorer(reader, limit).await?);
606 }
607
608 let mut scorer = BooleanScorer {
609 must: must_scorers,
610 should: should_scorers,
611 must_not: must_not_scorers,
612 current_doc: 0,
613 };
614 scorer.current_doc = scorer.find_next_match();
616 Ok(Box::new(scorer) as Box<dyn Scorer + 'a>)
617 })
618 }
619
620 #[cfg(feature = "sync")]
621 fn scorer_sync<'a>(
622 &self,
623 reader: &'a SegmentReader,
624 limit: usize,
625 ) -> crate::Result<Box<dyn Scorer + 'a>> {
626 if self.must_not.is_empty() {
628 if self.must.len() == 1 && self.should.is_empty() {
629 return self.must[0].scorer_sync(reader, limit);
630 }
631 if self.should.len() == 1 && self.must.is_empty() {
632 return self.should[0].scorer_sync(reader, limit);
633 }
634 }
635
636 if self.must.is_empty() && self.must_not.is_empty() && self.should.len() >= 2 {
638 if let Some(scorer) =
639 try_maxscore_scorer_sync(&self.should, reader, limit, self.global_stats.as_ref())?
640 {
641 return Ok(scorer);
642 }
643 if let Some(scorer) = try_sparse_maxscore_scorer_sync(&self.should, reader, limit)? {
644 return Ok(scorer);
645 }
646 if let Some(scorer) = try_per_field_maxscore_sync(
648 &self.should,
649 reader,
650 limit,
651 self.global_stats.as_ref(),
652 )? {
653 return Ok(scorer);
654 }
655 }
656
657 if !self.should.is_empty()
659 && limit < usize::MAX / 4
660 && let Some((predicate, filter_score)) =
661 compile_filter_predicates(&self.must, &self.must_not, reader)
662 {
663 let should_scorer = if self.should.len() == 1 {
664 self.should[0].scorer_sync(reader, limit)?
665 } else {
666 let sub = BooleanQuery {
667 must: Vec::new(),
668 should: self.should.clone(),
669 must_not: Vec::new(),
670 global_stats: self.global_stats.clone(),
671 };
672 sub.scorer_sync(reader, limit)?
673 };
674
675 if should_scorer.size_hint() >= limit as u32 {
676 log::debug!(
677 "BooleanQuery planner (sync): filter push-down, {} MUST filters → predicate, {} SHOULD scorers drive",
678 self.must.len(),
679 self.should.len()
680 );
681 return Ok(Box::new(super::PredicatedScorer::new(
682 should_scorer,
683 predicate,
684 filter_score,
685 )));
686 }
687 }
688
689 let mut must_scorers = Vec::with_capacity(self.must.len());
691 for q in &self.must {
692 must_scorers.push(q.scorer_sync(reader, limit)?);
693 }
694
695 let mut should_scorers = Vec::with_capacity(self.should.len());
696 for q in &self.should {
697 should_scorers.push(q.scorer_sync(reader, limit)?);
698 }
699
700 let mut must_not_scorers = Vec::with_capacity(self.must_not.len());
701 for q in &self.must_not {
702 must_not_scorers.push(q.scorer_sync(reader, limit)?);
703 }
704
705 let mut scorer = BooleanScorer {
706 must: must_scorers,
707 should: should_scorers,
708 must_not: must_not_scorers,
709 current_doc: 0,
710 };
711 scorer.current_doc = scorer.find_next_match();
712 Ok(Box::new(scorer) as Box<dyn Scorer + 'a>)
713 }
714
715 fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
716 let must = self.must.clone();
717 let should = self.should.clone();
718
719 Box::pin(async move {
720 if !must.is_empty() {
721 let mut estimates = Vec::with_capacity(must.len());
722 for q in &must {
723 estimates.push(q.count_estimate(reader).await?);
724 }
725 estimates
726 .into_iter()
727 .min()
728 .ok_or_else(|| crate::Error::Corruption("Empty must clause".to_string()))
729 } else if !should.is_empty() {
730 let mut sum = 0u32;
731 for q in &should {
732 sum = sum.saturating_add(q.count_estimate(reader).await?);
733 }
734 Ok(sum)
735 } else {
736 Ok(0)
737 }
738 })
739 }
740}
741
742struct BooleanScorer<'a> {
743 must: Vec<Box<dyn Scorer + 'a>>,
744 should: Vec<Box<dyn Scorer + 'a>>,
745 must_not: Vec<Box<dyn Scorer + 'a>>,
746 current_doc: DocId,
747}
748
749impl BooleanScorer<'_> {
750 fn find_next_match(&mut self) -> DocId {
751 if self.must.is_empty() && self.should.is_empty() {
752 return TERMINATED;
753 }
754
755 loop {
756 let candidate = if !self.must.is_empty() {
757 let mut max_doc = self
758 .must
759 .iter()
760 .map(|s| s.doc())
761 .max()
762 .unwrap_or(TERMINATED);
763
764 if max_doc == TERMINATED {
765 return TERMINATED;
766 }
767
768 loop {
769 let mut all_match = true;
770 for scorer in &mut self.must {
771 let doc = scorer.seek(max_doc);
772 if doc == TERMINATED {
773 return TERMINATED;
774 }
775 if doc > max_doc {
776 max_doc = doc;
777 all_match = false;
778 break;
779 }
780 }
781 if all_match {
782 break;
783 }
784 }
785 max_doc
786 } else {
787 self.should
788 .iter()
789 .map(|s| s.doc())
790 .filter(|&d| d != TERMINATED)
791 .min()
792 .unwrap_or(TERMINATED)
793 };
794
795 if candidate == TERMINATED {
796 return TERMINATED;
797 }
798
799 let excluded = self.must_not.iter_mut().any(|scorer| {
800 let doc = scorer.seek(candidate);
801 doc == candidate
802 });
803
804 if !excluded {
805 for scorer in &mut self.should {
807 scorer.seek(candidate);
808 }
809 self.current_doc = candidate;
810 return candidate;
811 }
812
813 if !self.must.is_empty() {
815 for scorer in &mut self.must {
816 scorer.advance();
817 }
818 } else {
819 for scorer in &mut self.should {
821 if scorer.doc() <= candidate && scorer.doc() != TERMINATED {
822 scorer.seek(candidate + 1);
823 }
824 }
825 }
826 }
827 }
828}
829
830impl super::docset::DocSet for BooleanScorer<'_> {
831 fn doc(&self) -> DocId {
832 self.current_doc
833 }
834
835 fn advance(&mut self) -> DocId {
836 if !self.must.is_empty() {
837 for scorer in &mut self.must {
838 scorer.advance();
839 }
840 } else {
841 for scorer in &mut self.should {
842 if scorer.doc() == self.current_doc {
843 scorer.advance();
844 }
845 }
846 }
847
848 self.current_doc = self.find_next_match();
849 self.current_doc
850 }
851
852 fn seek(&mut self, target: DocId) -> DocId {
853 for scorer in &mut self.must {
854 scorer.seek(target);
855 }
856
857 for scorer in &mut self.should {
858 scorer.seek(target);
859 }
860
861 self.current_doc = self.find_next_match();
862 self.current_doc
863 }
864
865 fn size_hint(&self) -> u32 {
866 if !self.must.is_empty() {
867 self.must.iter().map(|s| s.size_hint()).min().unwrap_or(0)
868 } else {
869 self.should.iter().map(|s| s.size_hint()).sum()
870 }
871 }
872}
873
874impl Scorer for BooleanScorer<'_> {
875 fn score(&self) -> Score {
876 let mut total = 0.0;
877
878 for scorer in &self.must {
879 if scorer.doc() == self.current_doc {
880 total += scorer.score();
881 }
882 }
883
884 for scorer in &self.should {
885 if scorer.doc() == self.current_doc {
886 total += scorer.score();
887 }
888 }
889
890 total
891 }
892
893 fn matched_positions(&self) -> Option<super::MatchedPositions> {
894 let mut all_positions: super::MatchedPositions = Vec::new();
895
896 for scorer in &self.must {
897 if scorer.doc() == self.current_doc
898 && let Some(positions) = scorer.matched_positions()
899 {
900 all_positions.extend(positions);
901 }
902 }
903
904 for scorer in &self.should {
905 if scorer.doc() == self.current_doc
906 && let Some(positions) = scorer.matched_positions()
907 {
908 all_positions.extend(positions);
909 }
910 }
911
912 if all_positions.is_empty() {
913 None
914 } else {
915 Some(all_positions)
916 }
917 }
918}
919
920struct TopKResultScorer {
922 results: Vec<ScoredDoc>,
923 position: usize,
924}
925
926impl TopKResultScorer {
927 fn new(mut results: Vec<ScoredDoc>) -> Self {
928 results.sort_unstable_by_key(|r| r.doc_id);
930 Self {
931 results,
932 position: 0,
933 }
934 }
935}
936
937impl super::docset::DocSet for TopKResultScorer {
938 fn doc(&self) -> DocId {
939 if self.position < self.results.len() {
940 self.results[self.position].doc_id
941 } else {
942 TERMINATED
943 }
944 }
945
946 fn advance(&mut self) -> DocId {
947 self.position += 1;
948 self.doc()
949 }
950
951 fn seek(&mut self, target: DocId) -> DocId {
952 let remaining = &self.results[self.position..];
953 self.position += remaining.partition_point(|r| r.doc_id < target);
954 self.doc()
955 }
956
957 fn size_hint(&self) -> u32 {
958 (self.results.len() - self.position) as u32
959 }
960}
961
962impl Scorer for TopKResultScorer {
963 fn score(&self) -> Score {
964 if self.position < self.results.len() {
965 self.results[self.position].score
966 } else {
967 0.0
968 }
969 }
970}
971
972struct VectorTopKResultScorer {
975 results: Vec<crate::segment::VectorSearchResult>,
976 position: usize,
977 field_id: u32,
978}
979
980impl VectorTopKResultScorer {
981 fn new(mut results: Vec<crate::segment::VectorSearchResult>, field_id: u32) -> Self {
982 results.sort_unstable_by_key(|r| r.doc_id);
983 Self {
984 results,
985 position: 0,
986 field_id,
987 }
988 }
989}
990
991impl super::docset::DocSet for VectorTopKResultScorer {
992 fn doc(&self) -> DocId {
993 if self.position < self.results.len() {
994 self.results[self.position].doc_id
995 } else {
996 TERMINATED
997 }
998 }
999
1000 fn advance(&mut self) -> DocId {
1001 self.position += 1;
1002 self.doc()
1003 }
1004
1005 fn seek(&mut self, target: DocId) -> DocId {
1006 let remaining = &self.results[self.position..];
1007 self.position += remaining.partition_point(|r| r.doc_id < target);
1008 self.doc()
1009 }
1010
1011 fn size_hint(&self) -> u32 {
1012 (self.results.len() - self.position) as u32
1013 }
1014}
1015
1016impl Scorer for VectorTopKResultScorer {
1017 fn score(&self) -> Score {
1018 if self.position < self.results.len() {
1019 self.results[self.position].score
1020 } else {
1021 0.0
1022 }
1023 }
1024
1025 fn matched_positions(&self) -> Option<super::MatchedPositions> {
1026 if self.position >= self.results.len() {
1027 return None;
1028 }
1029 let result = &self.results[self.position];
1030 let scored_positions: Vec<super::ScoredPosition> = result
1031 .ordinals
1032 .iter()
1033 .map(|&(ordinal, score)| super::ScoredPosition::new(ordinal, score))
1034 .collect();
1035 Some(vec![(self.field_id, scored_positions)])
1036 }
1037}
1038
1039struct EmptyScorer;
1041
1042impl super::docset::DocSet for EmptyScorer {
1043 fn doc(&self) -> DocId {
1044 TERMINATED
1045 }
1046
1047 fn advance(&mut self) -> DocId {
1048 TERMINATED
1049 }
1050
1051 fn seek(&mut self, _target: DocId) -> DocId {
1052 TERMINATED
1053 }
1054
1055 fn size_hint(&self) -> u32 {
1056 0
1057 }
1058}
1059
1060impl Scorer for EmptyScorer {
1061 fn score(&self) -> Score {
1062 0.0
1063 }
1064}
1065
1066#[cfg(test)]
1067mod tests {
1068 use super::*;
1069 use crate::dsl::Field;
1070 use crate::query::TermQuery;
1071
1072 #[test]
1073 fn test_maxscore_eligible_pure_or_same_field() {
1074 let query = BooleanQuery::new()
1076 .should(TermQuery::text(Field(0), "hello"))
1077 .should(TermQuery::text(Field(0), "world"))
1078 .should(TermQuery::text(Field(0), "foo"));
1079
1080 assert!(
1082 query
1083 .should
1084 .iter()
1085 .all(|q| q.as_term_query_info().is_some())
1086 );
1087
1088 let infos: Vec<_> = query
1090 .should
1091 .iter()
1092 .filter_map(|q| q.as_term_query_info())
1093 .collect();
1094 assert_eq!(infos.len(), 3);
1095 assert!(infos.iter().all(|i| i.field == Field(0)));
1096 }
1097
1098 #[test]
1099 fn test_maxscore_not_eligible_different_fields() {
1100 let query = BooleanQuery::new()
1102 .should(TermQuery::text(Field(0), "hello"))
1103 .should(TermQuery::text(Field(1), "world")); let infos: Vec<_> = query
1106 .should
1107 .iter()
1108 .filter_map(|q| q.as_term_query_info())
1109 .collect();
1110 assert_eq!(infos.len(), 2);
1111 assert!(infos[0].field != infos[1].field);
1113 }
1114
1115 #[test]
1116 fn test_maxscore_not_eligible_with_must() {
1117 let query = BooleanQuery::new()
1119 .must(TermQuery::text(Field(0), "required"))
1120 .should(TermQuery::text(Field(0), "hello"))
1121 .should(TermQuery::text(Field(0), "world"));
1122
1123 assert!(!query.must.is_empty());
1125 }
1126
1127 #[test]
1128 fn test_maxscore_not_eligible_with_must_not() {
1129 let query = BooleanQuery::new()
1131 .should(TermQuery::text(Field(0), "hello"))
1132 .should(TermQuery::text(Field(0), "world"))
1133 .must_not(TermQuery::text(Field(0), "excluded"));
1134
1135 assert!(!query.must_not.is_empty());
1137 }
1138
1139 #[test]
1140 fn test_maxscore_not_eligible_single_term() {
1141 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
1143
1144 assert_eq!(query.should.len(), 1);
1146 }
1147
1148 #[test]
1149 fn test_term_query_info_extraction() {
1150 let term_query = TermQuery::text(Field(42), "test");
1151 let info = term_query.as_term_query_info();
1152
1153 assert!(info.is_some());
1154 let info = info.unwrap();
1155 assert_eq!(info.field, Field(42));
1156 assert_eq!(info.term, b"test");
1157 }
1158
1159 #[test]
1160 fn test_boolean_query_no_term_info() {
1161 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
1163
1164 assert!(query.as_term_query_info().is_none());
1165 }
1166}