1use std::sync::Arc;
4
5use crate::segment::SegmentReader;
6use crate::structures::TERMINATED;
7use crate::{DocId, Score};
8
9use super::planner::{
10 build_sparse_bmp_results, build_sparse_bmp_results_filtered, build_sparse_maxscore_executor,
11 chain_predicates, combine_sparse_results, compute_idf, extract_all_sparse_infos,
12 finish_text_maxscore, prepare_per_field_grouping, prepare_text_maxscore,
13};
14use super::{CountFuture, EmptyScorer, GlobalStats, Query, Scorer, ScorerFuture};
15
16#[derive(Default, Clone)]
21pub struct BooleanQuery {
22 pub must: Vec<Arc<dyn Query>>,
23 pub should: Vec<Arc<dyn Query>>,
24 pub must_not: Vec<Arc<dyn Query>>,
25 global_stats: Option<Arc<GlobalStats>>,
27}
28
29impl std::fmt::Debug for BooleanQuery {
30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31 f.debug_struct("BooleanQuery")
32 .field("must_count", &self.must.len())
33 .field("should_count", &self.should.len())
34 .field("must_not_count", &self.must_not.len())
35 .field("has_global_stats", &self.global_stats.is_some())
36 .finish()
37 }
38}
39
40impl std::fmt::Display for BooleanQuery {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 write!(f, "Boolean(")?;
43 let mut first = true;
44 for q in &self.must {
45 if !first {
46 write!(f, " ")?;
47 }
48 write!(f, "+{}", q)?;
49 first = false;
50 }
51 for q in &self.should {
52 if !first {
53 write!(f, " ")?;
54 }
55 write!(f, "{}", q)?;
56 first = false;
57 }
58 for q in &self.must_not {
59 if !first {
60 write!(f, " ")?;
61 }
62 write!(f, "-{}", q)?;
63 first = false;
64 }
65 write!(f, ")")
66 }
67}
68
69impl BooleanQuery {
70 pub fn new() -> Self {
71 Self::default()
72 }
73
74 pub fn must(mut self, query: impl Query + 'static) -> Self {
75 self.must.push(Arc::new(query));
76 self
77 }
78
79 pub fn should(mut self, query: impl Query + 'static) -> Self {
80 self.should.push(Arc::new(query));
81 self
82 }
83
84 pub fn must_not(mut self, query: impl Query + 'static) -> Self {
85 self.must_not.push(Arc::new(query));
86 self
87 }
88
89 pub fn with_global_stats(mut self, stats: Arc<GlobalStats>) -> Self {
91 self.global_stats = Some(stats);
92 self
93 }
94}
95
96fn build_should_scorer<'a>(scorers: Vec<Box<dyn Scorer + 'a>>) -> Box<dyn Scorer + 'a> {
98 if scorers.is_empty() {
99 return Box::new(EmptyScorer);
100 }
101 if scorers.len() == 1 {
102 return scorers.into_iter().next().unwrap();
103 }
104 let mut scorer = BooleanScorer {
105 must: vec![],
106 should: scorers,
107 must_not: vec![],
108 current_doc: 0,
109 };
110 scorer.current_doc = scorer.find_next_match();
111 Box::new(scorer)
112}
113
114macro_rules! boolean_plan {
128 ($must:expr, $should:expr, $must_not:expr, $global_stats:expr,
129 $reader:expr, $limit:expr,
130 $scorer_fn:ident, $get_postings_fn:ident, $execute_fn:ident
131 $(, $aw:tt)*) => {{
132 let must: &[Arc<dyn Query>] = &$must;
133 let should_all: &[Arc<dyn Query>] = &$should;
134 let must_not: &[Arc<dyn Query>] = &$must_not;
135 let global_stats: Option<&Arc<GlobalStats>> = $global_stats;
136 let reader: &SegmentReader = $reader;
137 let limit: usize = $limit;
138
139 let should_capped: Vec<Arc<dyn Query>>;
142 let should: &[Arc<dyn Query>] = if should_all.len() > super::MAX_QUERY_TERMS {
143 let is_predicate: Vec<bool> = should_all
144 .iter()
145 .map(|q| q.is_filter() || q.as_doc_predicate(reader).is_some())
146 .collect();
147 let cursor_count = is_predicate.iter().filter(|&&p| !p).count();
148
149 if cursor_count > super::MAX_QUERY_TERMS {
150 let mut kept = Vec::with_capacity(should_all.len());
151 let mut cursor_kept = 0usize;
152 for (q, &is_pred) in should_all.iter().zip(is_predicate.iter()) {
153 if is_pred {
154 kept.push(q.clone());
155 } else if cursor_kept < super::MAX_QUERY_TERMS {
156 kept.push(q.clone());
157 cursor_kept += 1;
158 }
159 }
160 log::debug!(
161 "BooleanQuery: capping cursor SHOULD from {} to {} ({} fast-field predicates exempt)",
162 cursor_count,
163 super::MAX_QUERY_TERMS,
164 kept.len() - cursor_kept,
165 );
166 should_capped = kept;
167 &should_capped
168 } else {
169 log::debug!(
170 "BooleanQuery: {} SHOULD clauses OK ({} need cursors, {} fast-field predicates)",
171 should_all.len(),
172 cursor_count,
173 should_all.len() - cursor_count,
174 );
175 should_all
176 }
177 } else {
178 should_all
179 };
180
181 if must_not.is_empty() {
183 if must.len() == 1 && should.is_empty() {
184 return must[0].$scorer_fn(reader, limit) $(. $aw)* ;
185 }
186 if should.len() == 1 && must.is_empty() {
187 return should[0].$scorer_fn(reader, limit) $(. $aw)* ;
188 }
189 }
190
191 if must.is_empty() && must_not.is_empty() && should.len() >= 2 {
193 if let Some((mut infos, _field, avg_field_len, num_docs)) =
195 prepare_text_maxscore(should, reader, global_stats)
196 {
197 let mut posting_lists = Vec::with_capacity(infos.len());
198 for info in infos.drain(..) {
199 if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
200 $(. $aw)* ?
201 {
202 let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
203 posting_lists.push((pl, idf));
204 }
205 }
206 return finish_text_maxscore(posting_lists, avg_field_len, limit);
207 }
208
209 if let Some(infos) = extract_all_sparse_infos(should) {
212 if let Some((raw, info)) =
213 build_sparse_bmp_results(&infos, reader, limit)
214 {
215 return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
216 }
217 if let Some((executor, info)) =
218 build_sparse_maxscore_executor(&infos, reader, limit, None)
219 {
220 let raw = executor.$execute_fn() $(. $aw)* ?;
221 return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
222 }
223 }
224
225 if let Some(grouping) = prepare_per_field_grouping(should, reader, limit, global_stats)
227 {
228 let mut scorers: Vec<Box<dyn Scorer + '_>> = Vec::new();
229 for (field, avg_field_len, infos) in &grouping.multi_term_groups {
230 let mut posting_lists = Vec::with_capacity(infos.len());
231 for info in infos {
232 if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
233 $(. $aw)* ?
234 {
235 let idf = compute_idf(
236 &pl, *field, &info.term, grouping.num_docs, global_stats,
237 );
238 posting_lists.push((pl, idf));
239 }
240 }
241 if !posting_lists.is_empty() {
242 scorers.push(finish_text_maxscore(
243 posting_lists,
244 *avg_field_len,
245 grouping.per_field_limit,
246 )?);
247 }
248 }
249 for &idx in &grouping.fallback_indices {
250 scorers.push(should[idx].$scorer_fn(reader, limit) $(. $aw)* ?);
251 }
252 return Ok(build_should_scorer(scorers));
253 }
254 }
255
256 if !should.is_empty() && !must.is_empty() && limit < usize::MAX / 4 {
258 let mut predicates: Vec<super::DocPredicate<'_>> = Vec::new();
260 let mut must_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
261 for q in must {
262 if let Some(pred) = q.as_doc_predicate(reader) {
263 log::debug!("BooleanQuery planner 3a: MUST clause → predicate ({})", q);
264 predicates.push(pred);
265 } else {
266 log::debug!("BooleanQuery planner 3a: MUST clause → verifier scorer ({})", q);
267 must_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
268 }
269 }
270 let mut must_not_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
272 for q in must_not {
273 if let Some(pred) = q.as_doc_predicate(reader) {
274 let negated: super::DocPredicate<'_> =
275 Box::new(move |doc_id| !pred(doc_id));
276 predicates.push(negated);
277 } else {
278 must_not_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
279 }
280 }
281
282 if must_verifiers.is_empty()
284 && must_not_verifiers.is_empty()
285 && !predicates.is_empty()
286 {
287 if let Some(infos) = extract_all_sparse_infos(should) {
288 let combined = chain_predicates(predicates);
290 if let Some((raw, info)) =
291 build_sparse_bmp_results_filtered(&infos, reader, limit, &*combined)
292 {
293 log::debug!(
294 "BooleanQuery planner: predicate-aware sparse BMP, {} dims",
295 infos.len()
296 );
297 return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
298 }
299 if let Some((executor, info)) =
301 build_sparse_maxscore_executor(&infos, reader, limit, Some(combined))
302 {
303 log::debug!(
304 "BooleanQuery planner: predicate-aware sparse MaxScore, {} dims",
305 infos.len()
306 );
307 let raw = executor.$execute_fn() $(. $aw)* ?;
308 return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
309 }
310 predicates = Vec::new();
313 for q in must {
314 if let Some(pred) = q.as_doc_predicate(reader) {
315 predicates.push(pred);
316 }
317 }
318 for q in must_not {
319 if let Some(pred) = q.as_doc_predicate(reader) {
320 let negated: super::DocPredicate<'_> =
321 Box::new(move |doc_id| !pred(doc_id));
322 predicates.push(negated);
323 }
324 }
325 }
326 }
327
328 let should_limit = if !predicates.is_empty() { limit * 4 } else { limit };
330 let should_scorer = if should.len() == 1 {
331 should[0].$scorer_fn(reader, should_limit) $(. $aw)* ?
332 } else {
333 let sub = BooleanQuery {
334 must: Vec::new(),
335 should: should.to_vec(),
336 must_not: Vec::new(),
337 global_stats: global_stats.cloned(),
338 };
339 sub.$scorer_fn(reader, should_limit) $(. $aw)* ?
340 };
341
342 let use_predicated =
343 must_verifiers.is_empty() || should_scorer.size_hint() >= limit as u32;
344
345 if use_predicated {
346 log::debug!(
347 "BooleanQuery planner: PredicatedScorer {} preds + {} must_v + {} must_not_v, \
348 SHOULD size_hint={}, over_fetch={}",
349 predicates.len(), must_verifiers.len(), must_not_verifiers.len(),
350 should_scorer.size_hint(), should_limit
351 );
352 return Ok(Box::new(super::PredicatedScorer::new(
353 should_scorer, predicates, must_verifiers, must_not_verifiers,
354 )));
355 }
356
357 log::debug!(
359 "BooleanQuery planner: BooleanScorer fallback, size_hint={} < limit={}, \
360 {} must_v + {} must_not_v",
361 should_scorer.size_hint(), limit,
362 must_verifiers.len(), must_not_verifiers.len()
363 );
364 let mut scorer = BooleanScorer {
365 must: must_verifiers,
366 should: vec![should_scorer],
367 must_not: must_not_verifiers,
368 current_doc: 0,
369 };
370 scorer.current_doc = scorer.find_next_match();
371 return Ok(Box::new(scorer));
372 }
373
374 let mut must_scorers = Vec::with_capacity(must.len());
376 for q in must {
377 must_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
378 }
379 let mut should_scorers = Vec::with_capacity(should.len());
380 for q in should {
381 should_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
382 }
383 let mut must_not_scorers = Vec::with_capacity(must_not.len());
384 for q in must_not {
385 must_not_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
386 }
387 let mut scorer = BooleanScorer {
388 must: must_scorers,
389 should: should_scorers,
390 must_not: must_not_scorers,
391 current_doc: 0,
392 };
393 scorer.current_doc = scorer.find_next_match();
394 Ok(Box::new(scorer) as Box<dyn Scorer + '_>)
395 }};
396}
397
398impl Query for BooleanQuery {
399 fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
400 let must = self.must.clone();
401 let should = self.should.clone();
402 let must_not = self.must_not.clone();
403 let global_stats = self.global_stats.clone();
404 Box::pin(async move {
405 boolean_plan!(
406 must,
407 should,
408 must_not,
409 global_stats.as_ref(),
410 reader,
411 limit,
412 scorer,
413 get_postings,
414 execute,
415 await
416 )
417 })
418 }
419
420 #[cfg(feature = "sync")]
421 fn scorer_sync<'a>(
422 &self,
423 reader: &'a SegmentReader,
424 limit: usize,
425 ) -> crate::Result<Box<dyn Scorer + 'a>> {
426 boolean_plan!(
427 self.must,
428 self.should,
429 self.must_not,
430 self.global_stats.as_ref(),
431 reader,
432 limit,
433 scorer_sync,
434 get_postings_sync,
435 execute_sync
436 )
437 }
438
439 fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
440 if self.must.is_empty() && self.should.is_empty() {
442 return None;
443 }
444
445 let must_preds: Vec<_> = self
447 .must
448 .iter()
449 .map(|q| q.as_doc_predicate(reader))
450 .collect::<Option<Vec<_>>>()?;
451 let should_preds: Vec<_> = self
452 .should
453 .iter()
454 .map(|q| q.as_doc_predicate(reader))
455 .collect::<Option<Vec<_>>>()?;
456 let must_not_preds: Vec<_> = self
457 .must_not
458 .iter()
459 .map(|q| q.as_doc_predicate(reader))
460 .collect::<Option<Vec<_>>>()?;
461
462 let has_must = !must_preds.is_empty();
463
464 Some(Box::new(move |doc_id| {
465 if !must_preds.iter().all(|p| p(doc_id)) {
467 return false;
468 }
469 if !has_must && !should_preds.is_empty() && !should_preds.iter().any(|p| p(doc_id)) {
471 return false;
472 }
473 must_not_preds.iter().all(|p| !p(doc_id))
475 }))
476 }
477
478 fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
479 let must = self.must.clone();
480 let should = self.should.clone();
481
482 Box::pin(async move {
483 if !must.is_empty() {
484 let mut estimates = Vec::with_capacity(must.len());
485 for q in &must {
486 estimates.push(q.count_estimate(reader).await?);
487 }
488 estimates
489 .into_iter()
490 .min()
491 .ok_or_else(|| crate::Error::Corruption("Empty must clause".to_string()))
492 } else if !should.is_empty() {
493 let mut sum = 0u32;
494 for q in &should {
495 sum = sum.saturating_add(q.count_estimate(reader).await?);
496 }
497 Ok(sum)
498 } else {
499 Ok(0)
500 }
501 })
502 }
503}
504
505struct BooleanScorer<'a> {
506 must: Vec<Box<dyn Scorer + 'a>>,
507 should: Vec<Box<dyn Scorer + 'a>>,
508 must_not: Vec<Box<dyn Scorer + 'a>>,
509 current_doc: DocId,
510}
511
512impl BooleanScorer<'_> {
513 fn find_next_match(&mut self) -> DocId {
514 if self.must.is_empty() && self.should.is_empty() {
515 return TERMINATED;
516 }
517
518 loop {
519 let candidate = if !self.must.is_empty() {
520 let mut max_doc = self
521 .must
522 .iter()
523 .map(|s| s.doc())
524 .max()
525 .unwrap_or(TERMINATED);
526
527 if max_doc == TERMINATED {
528 return TERMINATED;
529 }
530
531 loop {
532 let mut all_match = true;
533 for scorer in &mut self.must {
534 let doc = scorer.seek(max_doc);
535 if doc == TERMINATED {
536 return TERMINATED;
537 }
538 if doc > max_doc {
539 max_doc = doc;
540 all_match = false;
541 break;
542 }
543 }
544 if all_match {
545 break;
546 }
547 }
548 max_doc
549 } else {
550 self.should
551 .iter()
552 .map(|s| s.doc())
553 .filter(|&d| d != TERMINATED)
554 .min()
555 .unwrap_or(TERMINATED)
556 };
557
558 if candidate == TERMINATED {
559 return TERMINATED;
560 }
561
562 let excluded = self.must_not.iter_mut().any(|scorer| {
563 let doc = scorer.seek(candidate);
564 doc == candidate
565 });
566
567 if !excluded {
568 for scorer in &mut self.should {
570 scorer.seek(candidate);
571 }
572 self.current_doc = candidate;
573 return candidate;
574 }
575
576 if !self.must.is_empty() {
578 for scorer in &mut self.must {
579 scorer.advance();
580 }
581 } else {
582 for scorer in &mut self.should {
584 if scorer.doc() <= candidate && scorer.doc() != TERMINATED {
585 scorer.seek(candidate + 1);
586 }
587 }
588 }
589 }
590 }
591}
592
593impl super::docset::DocSet for BooleanScorer<'_> {
594 fn doc(&self) -> DocId {
595 self.current_doc
596 }
597
598 fn advance(&mut self) -> DocId {
599 if !self.must.is_empty() {
600 for scorer in &mut self.must {
601 scorer.advance();
602 }
603 } else {
604 for scorer in &mut self.should {
605 if scorer.doc() == self.current_doc {
606 scorer.advance();
607 }
608 }
609 }
610
611 self.current_doc = self.find_next_match();
612 self.current_doc
613 }
614
615 fn seek(&mut self, target: DocId) -> DocId {
616 for scorer in &mut self.must {
617 scorer.seek(target);
618 }
619
620 for scorer in &mut self.should {
621 scorer.seek(target);
622 }
623
624 self.current_doc = self.find_next_match();
625 self.current_doc
626 }
627
628 fn size_hint(&self) -> u32 {
629 if !self.must.is_empty() {
630 self.must.iter().map(|s| s.size_hint()).min().unwrap_or(0)
631 } else {
632 self.should.iter().map(|s| s.size_hint()).sum()
633 }
634 }
635}
636
637impl Scorer for BooleanScorer<'_> {
638 fn score(&self) -> Score {
639 let mut total = 0.0;
640
641 for scorer in &self.must {
642 if scorer.doc() == self.current_doc {
643 total += scorer.score();
644 }
645 }
646
647 for scorer in &self.should {
648 if scorer.doc() == self.current_doc {
649 total += scorer.score();
650 }
651 }
652
653 total
654 }
655
656 fn matched_positions(&self) -> Option<super::MatchedPositions> {
657 let mut all_positions: super::MatchedPositions = Vec::new();
658
659 for scorer in &self.must {
660 if scorer.doc() == self.current_doc
661 && let Some(positions) = scorer.matched_positions()
662 {
663 all_positions.extend(positions);
664 }
665 }
666
667 for scorer in &self.should {
668 if scorer.doc() == self.current_doc
669 && let Some(positions) = scorer.matched_positions()
670 {
671 all_positions.extend(positions);
672 }
673 }
674
675 if all_positions.is_empty() {
676 None
677 } else {
678 Some(all_positions)
679 }
680 }
681}
682
683#[cfg(test)]
684mod tests {
685 use super::*;
686 use crate::dsl::Field;
687 use crate::query::{QueryDecomposition, TermQuery};
688
689 #[test]
690 fn test_maxscore_eligible_pure_or_same_field() {
691 let query = BooleanQuery::new()
693 .should(TermQuery::text(Field(0), "hello"))
694 .should(TermQuery::text(Field(0), "world"))
695 .should(TermQuery::text(Field(0), "foo"));
696
697 assert!(
699 query
700 .should
701 .iter()
702 .all(|q| matches!(q.decompose(), QueryDecomposition::TextTerm(_)))
703 );
704
705 let infos: Vec<_> = query
707 .should
708 .iter()
709 .filter_map(|q| match q.decompose() {
710 QueryDecomposition::TextTerm(info) => Some(info),
711 _ => None,
712 })
713 .collect();
714 assert_eq!(infos.len(), 3);
715 assert!(infos.iter().all(|i| i.field == Field(0)));
716 }
717
718 #[test]
719 fn test_maxscore_not_eligible_different_fields() {
720 let query = BooleanQuery::new()
722 .should(TermQuery::text(Field(0), "hello"))
723 .should(TermQuery::text(Field(1), "world")); let infos: Vec<_> = query
726 .should
727 .iter()
728 .filter_map(|q| match q.decompose() {
729 QueryDecomposition::TextTerm(info) => Some(info),
730 _ => None,
731 })
732 .collect();
733 assert_eq!(infos.len(), 2);
734 assert!(infos[0].field != infos[1].field);
736 }
737
738 #[test]
739 fn test_maxscore_not_eligible_with_must() {
740 let query = BooleanQuery::new()
742 .must(TermQuery::text(Field(0), "required"))
743 .should(TermQuery::text(Field(0), "hello"))
744 .should(TermQuery::text(Field(0), "world"));
745
746 assert!(!query.must.is_empty());
748 }
749
750 #[test]
751 fn test_maxscore_not_eligible_with_must_not() {
752 let query = BooleanQuery::new()
754 .should(TermQuery::text(Field(0), "hello"))
755 .should(TermQuery::text(Field(0), "world"))
756 .must_not(TermQuery::text(Field(0), "excluded"));
757
758 assert!(!query.must_not.is_empty());
760 }
761
762 #[test]
763 fn test_maxscore_not_eligible_single_term() {
764 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
766
767 assert_eq!(query.should.len(), 1);
769 }
770
771 #[test]
772 fn test_term_query_info_extraction() {
773 let term_query = TermQuery::text(Field(42), "test");
774 match term_query.decompose() {
775 QueryDecomposition::TextTerm(info) => {
776 assert_eq!(info.field, Field(42));
777 assert_eq!(info.term, b"test");
778 }
779 _ => panic!("Expected TextTerm decomposition"),
780 }
781 }
782
783 #[test]
784 fn test_boolean_query_no_term_info() {
785 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
787
788 assert!(matches!(query.decompose(), QueryDecomposition::Opaque));
789 }
790}