1use std::sync::Arc;
4
5use crate::segment::SegmentReader;
6use crate::structures::TERMINATED;
7use crate::{DocId, Score};
8
9use super::planner::{
10 build_sparse_maxscore_executor, chain_predicates, combine_sparse_results, compute_idf,
11 extract_all_sparse_infos, finish_text_maxscore, prepare_per_field_grouping,
12 prepare_text_maxscore,
13};
14use super::{CountFuture, EmptyScorer, GlobalStats, Query, Scorer, ScorerFuture};
15
16#[derive(Default, Clone)]
21pub struct BooleanQuery {
22 pub must: Vec<Arc<dyn Query>>,
23 pub should: Vec<Arc<dyn Query>>,
24 pub must_not: Vec<Arc<dyn Query>>,
25 global_stats: Option<Arc<GlobalStats>>,
27}
28
29impl std::fmt::Debug for BooleanQuery {
30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31 f.debug_struct("BooleanQuery")
32 .field("must_count", &self.must.len())
33 .field("should_count", &self.should.len())
34 .field("must_not_count", &self.must_not.len())
35 .field("has_global_stats", &self.global_stats.is_some())
36 .finish()
37 }
38}
39
40impl std::fmt::Display for BooleanQuery {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 write!(f, "Boolean(")?;
43 let mut first = true;
44 for q in &self.must {
45 if !first {
46 write!(f, " ")?;
47 }
48 write!(f, "+{}", q)?;
49 first = false;
50 }
51 for q in &self.should {
52 if !first {
53 write!(f, " ")?;
54 }
55 write!(f, "{}", q)?;
56 first = false;
57 }
58 for q in &self.must_not {
59 if !first {
60 write!(f, " ")?;
61 }
62 write!(f, "-{}", q)?;
63 first = false;
64 }
65 write!(f, ")")
66 }
67}
68
69impl BooleanQuery {
70 pub fn new() -> Self {
71 Self::default()
72 }
73
74 pub fn must(mut self, query: impl Query + 'static) -> Self {
75 self.must.push(Arc::new(query));
76 self
77 }
78
79 pub fn should(mut self, query: impl Query + 'static) -> Self {
80 self.should.push(Arc::new(query));
81 self
82 }
83
84 pub fn must_not(mut self, query: impl Query + 'static) -> Self {
85 self.must_not.push(Arc::new(query));
86 self
87 }
88
89 pub fn with_global_stats(mut self, stats: Arc<GlobalStats>) -> Self {
91 self.global_stats = Some(stats);
92 self
93 }
94}
95
96fn build_should_scorer<'a>(scorers: Vec<Box<dyn Scorer + 'a>>) -> Box<dyn Scorer + 'a> {
98 if scorers.is_empty() {
99 return Box::new(EmptyScorer);
100 }
101 if scorers.len() == 1 {
102 return scorers.into_iter().next().unwrap();
103 }
104 let mut scorer = BooleanScorer {
105 must: vec![],
106 should: scorers,
107 must_not: vec![],
108 current_doc: 0,
109 };
110 scorer.current_doc = scorer.find_next_match();
111 Box::new(scorer)
112}
113
114macro_rules! boolean_plan {
128 ($must:expr, $should:expr, $must_not:expr, $global_stats:expr,
129 $reader:expr, $limit:expr,
130 $scorer_fn:ident, $get_postings_fn:ident, $execute_fn:ident
131 $(, $aw:tt)*) => {{
132 let must: &[Arc<dyn Query>] = &$must;
133 let should_all: &[Arc<dyn Query>] = &$should;
134 let must_not: &[Arc<dyn Query>] = &$must_not;
135 let global_stats: Option<&Arc<GlobalStats>> = $global_stats;
136 let reader: &SegmentReader = $reader;
137 let limit: usize = $limit;
138
139 let should: &[Arc<dyn Query>] = if should_all.len() > super::MAX_QUERY_TOKENS {
141 log::debug!(
142 "BooleanQuery: capping SHOULD clauses from {} to {}",
143 should_all.len(),
144 super::MAX_QUERY_TOKENS,
145 );
146 &should_all[..super::MAX_QUERY_TOKENS]
147 } else {
148 should_all
149 };
150
151 if must_not.is_empty() {
153 if must.len() == 1 && should.is_empty() {
154 return must[0].$scorer_fn(reader, limit) $(. $aw)* ;
155 }
156 if should.len() == 1 && must.is_empty() {
157 return should[0].$scorer_fn(reader, limit) $(. $aw)* ;
158 }
159 }
160
161 if must.is_empty() && must_not.is_empty() && should.len() >= 2 {
163 if let Some((mut infos, _field, avg_field_len, num_docs)) =
165 prepare_text_maxscore(should, reader, global_stats)
166 {
167 let mut posting_lists = Vec::with_capacity(infos.len());
168 for info in infos.drain(..) {
169 if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
170 $(. $aw)* ?
171 {
172 let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
173 posting_lists.push((pl, idf));
174 }
175 }
176 return finish_text_maxscore(posting_lists, avg_field_len, limit);
177 }
178
179 if let Some(infos) = extract_all_sparse_infos(should) {
181 if let Some((executor, info)) =
182 build_sparse_maxscore_executor(&infos, reader, limit, None)
183 {
184 let raw = executor.$execute_fn() $(. $aw)* ?;
185 return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
186 }
187 }
188
189 if let Some(grouping) = prepare_per_field_grouping(should, reader, limit, global_stats)
191 {
192 let mut scorers: Vec<Box<dyn Scorer + '_>> = Vec::new();
193 for (field, avg_field_len, infos) in &grouping.multi_term_groups {
194 let mut posting_lists = Vec::with_capacity(infos.len());
195 for info in infos {
196 if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
197 $(. $aw)* ?
198 {
199 let idf = compute_idf(
200 &pl, *field, &info.term, grouping.num_docs, global_stats,
201 );
202 posting_lists.push((pl, idf));
203 }
204 }
205 if !posting_lists.is_empty() {
206 scorers.push(finish_text_maxscore(
207 posting_lists,
208 *avg_field_len,
209 grouping.per_field_limit,
210 )?);
211 }
212 }
213 for &idx in &grouping.fallback_indices {
214 scorers.push(should[idx].$scorer_fn(reader, limit) $(. $aw)* ?);
215 }
216 return Ok(build_should_scorer(scorers));
217 }
218 }
219
220 if !should.is_empty() && !must.is_empty() && limit < usize::MAX / 4 {
222 let mut predicates: Vec<super::DocPredicate<'_>> = Vec::new();
224 let mut must_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
225 for q in must {
226 if let Some(pred) = q.as_doc_predicate(reader) {
227 log::debug!("BooleanQuery planner 3a: MUST clause → predicate ({})", q);
228 predicates.push(pred);
229 } else {
230 log::debug!("BooleanQuery planner 3a: MUST clause → verifier scorer ({})", q);
231 must_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
232 }
233 }
234 let mut must_not_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
236 for q in must_not {
237 if let Some(pred) = q.as_doc_predicate(reader) {
238 let negated: super::DocPredicate<'_> =
239 Box::new(move |doc_id| !pred(doc_id));
240 predicates.push(negated);
241 } else {
242 must_not_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
243 }
244 }
245
246 if must_verifiers.is_empty()
248 && must_not_verifiers.is_empty()
249 && !predicates.is_empty()
250 {
251 if let Some(infos) = extract_all_sparse_infos(should) {
252 let combined = chain_predicates(predicates);
253 if let Some((executor, info)) =
254 build_sparse_maxscore_executor(&infos, reader, limit, Some(combined))
255 {
256 log::debug!(
257 "BooleanQuery planner: predicate-aware sparse MaxScore, {} dims",
258 infos.len()
259 );
260 let raw = executor.$execute_fn() $(. $aw)* ?;
261 return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
262 }
263 predicates = Vec::new();
266 for q in must {
267 if let Some(pred) = q.as_doc_predicate(reader) {
268 predicates.push(pred);
269 }
270 }
271 for q in must_not {
272 if let Some(pred) = q.as_doc_predicate(reader) {
273 let negated: super::DocPredicate<'_> =
274 Box::new(move |doc_id| !pred(doc_id));
275 predicates.push(negated);
276 }
277 }
278 }
279 }
280
281 let should_limit = if !predicates.is_empty() { limit * 4 } else { limit };
283 let should_scorer = if should.len() == 1 {
284 should[0].$scorer_fn(reader, should_limit) $(. $aw)* ?
285 } else {
286 let sub = BooleanQuery {
287 must: Vec::new(),
288 should: should.to_vec(),
289 must_not: Vec::new(),
290 global_stats: global_stats.cloned(),
291 };
292 sub.$scorer_fn(reader, should_limit) $(. $aw)* ?
293 };
294
295 let use_predicated =
296 must_verifiers.is_empty() || should_scorer.size_hint() >= limit as u32;
297
298 if use_predicated {
299 log::debug!(
300 "BooleanQuery planner: PredicatedScorer {} preds + {} must_v + {} must_not_v, \
301 SHOULD size_hint={}, over_fetch={}",
302 predicates.len(), must_verifiers.len(), must_not_verifiers.len(),
303 should_scorer.size_hint(), should_limit
304 );
305 return Ok(Box::new(super::PredicatedScorer::new(
306 should_scorer, predicates, must_verifiers, must_not_verifiers,
307 )));
308 }
309
310 log::debug!(
312 "BooleanQuery planner: BooleanScorer fallback, size_hint={} < limit={}, \
313 {} must_v + {} must_not_v",
314 should_scorer.size_hint(), limit,
315 must_verifiers.len(), must_not_verifiers.len()
316 );
317 let mut scorer = BooleanScorer {
318 must: must_verifiers,
319 should: vec![should_scorer],
320 must_not: must_not_verifiers,
321 current_doc: 0,
322 };
323 scorer.current_doc = scorer.find_next_match();
324 return Ok(Box::new(scorer));
325 }
326
327 let mut must_scorers = Vec::with_capacity(must.len());
329 for q in must {
330 must_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
331 }
332 let mut should_scorers = Vec::with_capacity(should.len());
333 for q in should {
334 should_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
335 }
336 let mut must_not_scorers = Vec::with_capacity(must_not.len());
337 for q in must_not {
338 must_not_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
339 }
340 let mut scorer = BooleanScorer {
341 must: must_scorers,
342 should: should_scorers,
343 must_not: must_not_scorers,
344 current_doc: 0,
345 };
346 scorer.current_doc = scorer.find_next_match();
347 Ok(Box::new(scorer) as Box<dyn Scorer + '_>)
348 }};
349}
350
351impl Query for BooleanQuery {
352 fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
353 let must = self.must.clone();
354 let should = self.should.clone();
355 let must_not = self.must_not.clone();
356 let global_stats = self.global_stats.clone();
357 Box::pin(async move {
358 boolean_plan!(
359 must,
360 should,
361 must_not,
362 global_stats.as_ref(),
363 reader,
364 limit,
365 scorer,
366 get_postings,
367 execute,
368 await
369 )
370 })
371 }
372
373 #[cfg(feature = "sync")]
374 fn scorer_sync<'a>(
375 &self,
376 reader: &'a SegmentReader,
377 limit: usize,
378 ) -> crate::Result<Box<dyn Scorer + 'a>> {
379 boolean_plan!(
380 self.must,
381 self.should,
382 self.must_not,
383 self.global_stats.as_ref(),
384 reader,
385 limit,
386 scorer_sync,
387 get_postings_sync,
388 execute_sync
389 )
390 }
391
392 fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
393 if self.must.is_empty() && self.should.is_empty() {
395 return None;
396 }
397
398 let must_preds: Vec<_> = self
400 .must
401 .iter()
402 .map(|q| q.as_doc_predicate(reader))
403 .collect::<Option<Vec<_>>>()?;
404 let should_preds: Vec<_> = self
405 .should
406 .iter()
407 .map(|q| q.as_doc_predicate(reader))
408 .collect::<Option<Vec<_>>>()?;
409 let must_not_preds: Vec<_> = self
410 .must_not
411 .iter()
412 .map(|q| q.as_doc_predicate(reader))
413 .collect::<Option<Vec<_>>>()?;
414
415 let has_must = !must_preds.is_empty();
416
417 Some(Box::new(move |doc_id| {
418 if !must_preds.iter().all(|p| p(doc_id)) {
420 return false;
421 }
422 if !has_must && !should_preds.is_empty() && !should_preds.iter().any(|p| p(doc_id)) {
424 return false;
425 }
426 must_not_preds.iter().all(|p| !p(doc_id))
428 }))
429 }
430
431 fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
432 let must = self.must.clone();
433 let should = self.should.clone();
434
435 Box::pin(async move {
436 if !must.is_empty() {
437 let mut estimates = Vec::with_capacity(must.len());
438 for q in &must {
439 estimates.push(q.count_estimate(reader).await?);
440 }
441 estimates
442 .into_iter()
443 .min()
444 .ok_or_else(|| crate::Error::Corruption("Empty must clause".to_string()))
445 } else if !should.is_empty() {
446 let mut sum = 0u32;
447 for q in &should {
448 sum = sum.saturating_add(q.count_estimate(reader).await?);
449 }
450 Ok(sum)
451 } else {
452 Ok(0)
453 }
454 })
455 }
456}
457
458struct BooleanScorer<'a> {
459 must: Vec<Box<dyn Scorer + 'a>>,
460 should: Vec<Box<dyn Scorer + 'a>>,
461 must_not: Vec<Box<dyn Scorer + 'a>>,
462 current_doc: DocId,
463}
464
465impl BooleanScorer<'_> {
466 fn find_next_match(&mut self) -> DocId {
467 if self.must.is_empty() && self.should.is_empty() {
468 return TERMINATED;
469 }
470
471 loop {
472 let candidate = if !self.must.is_empty() {
473 let mut max_doc = self
474 .must
475 .iter()
476 .map(|s| s.doc())
477 .max()
478 .unwrap_or(TERMINATED);
479
480 if max_doc == TERMINATED {
481 return TERMINATED;
482 }
483
484 loop {
485 let mut all_match = true;
486 for scorer in &mut self.must {
487 let doc = scorer.seek(max_doc);
488 if doc == TERMINATED {
489 return TERMINATED;
490 }
491 if doc > max_doc {
492 max_doc = doc;
493 all_match = false;
494 break;
495 }
496 }
497 if all_match {
498 break;
499 }
500 }
501 max_doc
502 } else {
503 self.should
504 .iter()
505 .map(|s| s.doc())
506 .filter(|&d| d != TERMINATED)
507 .min()
508 .unwrap_or(TERMINATED)
509 };
510
511 if candidate == TERMINATED {
512 return TERMINATED;
513 }
514
515 let excluded = self.must_not.iter_mut().any(|scorer| {
516 let doc = scorer.seek(candidate);
517 doc == candidate
518 });
519
520 if !excluded {
521 for scorer in &mut self.should {
523 scorer.seek(candidate);
524 }
525 self.current_doc = candidate;
526 return candidate;
527 }
528
529 if !self.must.is_empty() {
531 for scorer in &mut self.must {
532 scorer.advance();
533 }
534 } else {
535 for scorer in &mut self.should {
537 if scorer.doc() <= candidate && scorer.doc() != TERMINATED {
538 scorer.seek(candidate + 1);
539 }
540 }
541 }
542 }
543 }
544}
545
546impl super::docset::DocSet for BooleanScorer<'_> {
547 fn doc(&self) -> DocId {
548 self.current_doc
549 }
550
551 fn advance(&mut self) -> DocId {
552 if !self.must.is_empty() {
553 for scorer in &mut self.must {
554 scorer.advance();
555 }
556 } else {
557 for scorer in &mut self.should {
558 if scorer.doc() == self.current_doc {
559 scorer.advance();
560 }
561 }
562 }
563
564 self.current_doc = self.find_next_match();
565 self.current_doc
566 }
567
568 fn seek(&mut self, target: DocId) -> DocId {
569 for scorer in &mut self.must {
570 scorer.seek(target);
571 }
572
573 for scorer in &mut self.should {
574 scorer.seek(target);
575 }
576
577 self.current_doc = self.find_next_match();
578 self.current_doc
579 }
580
581 fn size_hint(&self) -> u32 {
582 if !self.must.is_empty() {
583 self.must.iter().map(|s| s.size_hint()).min().unwrap_or(0)
584 } else {
585 self.should.iter().map(|s| s.size_hint()).sum()
586 }
587 }
588}
589
590impl Scorer for BooleanScorer<'_> {
591 fn score(&self) -> Score {
592 let mut total = 0.0;
593
594 for scorer in &self.must {
595 if scorer.doc() == self.current_doc {
596 total += scorer.score();
597 }
598 }
599
600 for scorer in &self.should {
601 if scorer.doc() == self.current_doc {
602 total += scorer.score();
603 }
604 }
605
606 total
607 }
608
609 fn matched_positions(&self) -> Option<super::MatchedPositions> {
610 let mut all_positions: super::MatchedPositions = Vec::new();
611
612 for scorer in &self.must {
613 if scorer.doc() == self.current_doc
614 && let Some(positions) = scorer.matched_positions()
615 {
616 all_positions.extend(positions);
617 }
618 }
619
620 for scorer in &self.should {
621 if scorer.doc() == self.current_doc
622 && let Some(positions) = scorer.matched_positions()
623 {
624 all_positions.extend(positions);
625 }
626 }
627
628 if all_positions.is_empty() {
629 None
630 } else {
631 Some(all_positions)
632 }
633 }
634}
635
636#[cfg(test)]
637mod tests {
638 use super::*;
639 use crate::dsl::Field;
640 use crate::query::{QueryDecomposition, TermQuery};
641
642 #[test]
643 fn test_maxscore_eligible_pure_or_same_field() {
644 let query = BooleanQuery::new()
646 .should(TermQuery::text(Field(0), "hello"))
647 .should(TermQuery::text(Field(0), "world"))
648 .should(TermQuery::text(Field(0), "foo"));
649
650 assert!(
652 query
653 .should
654 .iter()
655 .all(|q| matches!(q.decompose(), QueryDecomposition::TextTerm(_)))
656 );
657
658 let infos: Vec<_> = query
660 .should
661 .iter()
662 .filter_map(|q| match q.decompose() {
663 QueryDecomposition::TextTerm(info) => Some(info),
664 _ => None,
665 })
666 .collect();
667 assert_eq!(infos.len(), 3);
668 assert!(infos.iter().all(|i| i.field == Field(0)));
669 }
670
671 #[test]
672 fn test_maxscore_not_eligible_different_fields() {
673 let query = BooleanQuery::new()
675 .should(TermQuery::text(Field(0), "hello"))
676 .should(TermQuery::text(Field(1), "world")); let infos: Vec<_> = query
679 .should
680 .iter()
681 .filter_map(|q| match q.decompose() {
682 QueryDecomposition::TextTerm(info) => Some(info),
683 _ => None,
684 })
685 .collect();
686 assert_eq!(infos.len(), 2);
687 assert!(infos[0].field != infos[1].field);
689 }
690
691 #[test]
692 fn test_maxscore_not_eligible_with_must() {
693 let query = BooleanQuery::new()
695 .must(TermQuery::text(Field(0), "required"))
696 .should(TermQuery::text(Field(0), "hello"))
697 .should(TermQuery::text(Field(0), "world"));
698
699 assert!(!query.must.is_empty());
701 }
702
703 #[test]
704 fn test_maxscore_not_eligible_with_must_not() {
705 let query = BooleanQuery::new()
707 .should(TermQuery::text(Field(0), "hello"))
708 .should(TermQuery::text(Field(0), "world"))
709 .must_not(TermQuery::text(Field(0), "excluded"));
710
711 assert!(!query.must_not.is_empty());
713 }
714
715 #[test]
716 fn test_maxscore_not_eligible_single_term() {
717 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
719
720 assert_eq!(query.should.len(), 1);
722 }
723
724 #[test]
725 fn test_term_query_info_extraction() {
726 let term_query = TermQuery::text(Field(42), "test");
727 match term_query.decompose() {
728 QueryDecomposition::TextTerm(info) => {
729 assert_eq!(info.field, Field(42));
730 assert_eq!(info.term, b"test");
731 }
732 _ => panic!("Expected TextTerm decomposition"),
733 }
734 }
735
736 #[test]
737 fn test_boolean_query_no_term_info() {
738 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
740
741 assert!(matches!(query.decompose(), QueryDecomposition::Opaque));
742 }
743}