1use std::sync::Arc;
4
5use crate::segment::SegmentReader;
6use crate::structures::TERMINATED;
7use crate::{DocId, Score};
8
9use super::planner::{
10 build_sparse_maxscore_executor, chain_predicates, combine_sparse_results, compute_idf,
11 extract_all_sparse_infos, finish_text_maxscore, prepare_per_field_grouping,
12 prepare_text_maxscore,
13};
14use super::{CountFuture, EmptyScorer, GlobalStats, Query, Scorer, ScorerFuture};
15
16#[derive(Default, Clone)]
21pub struct BooleanQuery {
22 pub must: Vec<Arc<dyn Query>>,
23 pub should: Vec<Arc<dyn Query>>,
24 pub must_not: Vec<Arc<dyn Query>>,
25 global_stats: Option<Arc<GlobalStats>>,
27}
28
29impl std::fmt::Debug for BooleanQuery {
30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31 f.debug_struct("BooleanQuery")
32 .field("must_count", &self.must.len())
33 .field("should_count", &self.should.len())
34 .field("must_not_count", &self.must_not.len())
35 .field("has_global_stats", &self.global_stats.is_some())
36 .finish()
37 }
38}
39
40impl std::fmt::Display for BooleanQuery {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 write!(f, "Boolean(")?;
43 let mut first = true;
44 for q in &self.must {
45 if !first {
46 write!(f, " ")?;
47 }
48 write!(f, "+{}", q)?;
49 first = false;
50 }
51 for q in &self.should {
52 if !first {
53 write!(f, " ")?;
54 }
55 write!(f, "{}", q)?;
56 first = false;
57 }
58 for q in &self.must_not {
59 if !first {
60 write!(f, " ")?;
61 }
62 write!(f, "-{}", q)?;
63 first = false;
64 }
65 write!(f, ")")
66 }
67}
68
69impl BooleanQuery {
70 pub fn new() -> Self {
71 Self::default()
72 }
73
74 pub fn must(mut self, query: impl Query + 'static) -> Self {
75 self.must.push(Arc::new(query));
76 self
77 }
78
79 pub fn should(mut self, query: impl Query + 'static) -> Self {
80 self.should.push(Arc::new(query));
81 self
82 }
83
84 pub fn must_not(mut self, query: impl Query + 'static) -> Self {
85 self.must_not.push(Arc::new(query));
86 self
87 }
88
89 pub fn with_global_stats(mut self, stats: Arc<GlobalStats>) -> Self {
91 self.global_stats = Some(stats);
92 self
93 }
94}
95
96fn build_should_scorer<'a>(scorers: Vec<Box<dyn Scorer + 'a>>) -> Box<dyn Scorer + 'a> {
98 if scorers.is_empty() {
99 return Box::new(EmptyScorer);
100 }
101 if scorers.len() == 1 {
102 return scorers.into_iter().next().unwrap();
103 }
104 let mut scorer = BooleanScorer {
105 must: vec![],
106 should: scorers,
107 must_not: vec![],
108 current_doc: 0,
109 };
110 scorer.current_doc = scorer.find_next_match();
111 Box::new(scorer)
112}
113
114macro_rules! boolean_plan {
128 ($must:expr, $should:expr, $must_not:expr, $global_stats:expr,
129 $reader:expr, $limit:expr,
130 $scorer_fn:ident, $get_postings_fn:ident, $execute_fn:ident
131 $(, $aw:tt)*) => {{
132 let must: &[Arc<dyn Query>] = &$must;
133 let should: &[Arc<dyn Query>] = &$should;
134 let must_not: &[Arc<dyn Query>] = &$must_not;
135 let global_stats: Option<&Arc<GlobalStats>> = $global_stats;
136 let reader: &SegmentReader = $reader;
137 let limit: usize = $limit;
138
139 if must_not.is_empty() {
141 if must.len() == 1 && should.is_empty() {
142 return must[0].$scorer_fn(reader, limit) $(. $aw)* ;
143 }
144 if should.len() == 1 && must.is_empty() {
145 return should[0].$scorer_fn(reader, limit) $(. $aw)* ;
146 }
147 }
148
149 if must.is_empty() && must_not.is_empty() && should.len() >= 2 {
151 if let Some((mut infos, _field, avg_field_len, num_docs)) =
153 prepare_text_maxscore(should, reader, global_stats)
154 {
155 let mut posting_lists = Vec::with_capacity(infos.len());
156 for info in infos.drain(..) {
157 if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
158 $(. $aw)* ?
159 {
160 let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
161 posting_lists.push((pl, idf));
162 }
163 }
164 return finish_text_maxscore(posting_lists, avg_field_len, limit);
165 }
166
167 if let Some(infos) = extract_all_sparse_infos(should) {
169 if let Some((executor, info)) =
170 build_sparse_maxscore_executor(&infos, reader, limit, None)
171 {
172 let raw = executor.$execute_fn() $(. $aw)* ?;
173 return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
174 }
175 }
176
177 if let Some(grouping) = prepare_per_field_grouping(should, reader, limit, global_stats)
179 {
180 let mut scorers: Vec<Box<dyn Scorer + '_>> = Vec::new();
181 for (field, avg_field_len, infos) in &grouping.multi_term_groups {
182 let mut posting_lists = Vec::with_capacity(infos.len());
183 for info in infos {
184 if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
185 $(. $aw)* ?
186 {
187 let idf = compute_idf(
188 &pl, *field, &info.term, grouping.num_docs, global_stats,
189 );
190 posting_lists.push((pl, idf));
191 }
192 }
193 if !posting_lists.is_empty() {
194 scorers.push(finish_text_maxscore(
195 posting_lists,
196 *avg_field_len,
197 grouping.per_field_limit,
198 )?);
199 }
200 }
201 for &idx in &grouping.fallback_indices {
202 scorers.push(should[idx].$scorer_fn(reader, limit) $(. $aw)* ?);
203 }
204 return Ok(build_should_scorer(scorers));
205 }
206 }
207
208 if !should.is_empty() && !must.is_empty() && limit < usize::MAX / 4 {
210 let mut predicates: Vec<super::DocPredicate<'_>> = Vec::new();
212 let mut must_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
213 for q in must {
214 if let Some(pred) = q.as_doc_predicate(reader) {
215 predicates.push(pred);
216 } else {
217 must_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
218 }
219 }
220 let mut must_not_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
222 for q in must_not {
223 if let Some(pred) = q.as_doc_predicate(reader) {
224 let negated: super::DocPredicate<'_> =
225 Box::new(move |doc_id| !pred(doc_id));
226 predicates.push(negated);
227 } else {
228 must_not_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
229 }
230 }
231
232 if must_verifiers.is_empty()
234 && must_not_verifiers.is_empty()
235 && !predicates.is_empty()
236 {
237 if let Some(infos) = extract_all_sparse_infos(should) {
238 let combined = chain_predicates(predicates);
239 if let Some((executor, info)) =
240 build_sparse_maxscore_executor(&infos, reader, limit, Some(combined))
241 {
242 log::debug!(
243 "BooleanQuery planner: predicate-aware sparse MaxScore, {} dims",
244 infos.len()
245 );
246 let raw = executor.$execute_fn() $(. $aw)* ?;
247 return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
248 }
249 predicates = Vec::new();
252 for q in must {
253 if let Some(pred) = q.as_doc_predicate(reader) {
254 predicates.push(pred);
255 }
256 }
257 for q in must_not {
258 if let Some(pred) = q.as_doc_predicate(reader) {
259 let negated: super::DocPredicate<'_> =
260 Box::new(move |doc_id| !pred(doc_id));
261 predicates.push(negated);
262 }
263 }
264 }
265 }
266
267 let should_limit = if !predicates.is_empty() { limit * 4 } else { limit };
269 let should_scorer = if should.len() == 1 {
270 should[0].$scorer_fn(reader, should_limit) $(. $aw)* ?
271 } else {
272 let sub = BooleanQuery {
273 must: Vec::new(),
274 should: should.to_vec(),
275 must_not: Vec::new(),
276 global_stats: global_stats.cloned(),
277 };
278 sub.$scorer_fn(reader, should_limit) $(. $aw)* ?
279 };
280
281 let use_predicated =
282 must_verifiers.is_empty() || should_scorer.size_hint() >= limit as u32;
283
284 if use_predicated {
285 log::debug!(
286 "BooleanQuery planner: PredicatedScorer {} preds + {} must_v + {} must_not_v, \
287 SHOULD size_hint={}, over_fetch={}",
288 predicates.len(), must_verifiers.len(), must_not_verifiers.len(),
289 should_scorer.size_hint(), should_limit
290 );
291 return Ok(Box::new(super::PredicatedScorer::new(
292 should_scorer, predicates, must_verifiers, must_not_verifiers,
293 )));
294 }
295
296 let mut scorer = BooleanScorer {
298 must: must_verifiers,
299 should: vec![should_scorer],
300 must_not: must_not_verifiers,
301 current_doc: 0,
302 };
303 scorer.current_doc = scorer.find_next_match();
304 return Ok(Box::new(scorer));
305 }
306
307 let mut must_scorers = Vec::with_capacity(must.len());
309 for q in must {
310 must_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
311 }
312 let mut should_scorers = Vec::with_capacity(should.len());
313 for q in should {
314 should_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
315 }
316 let mut must_not_scorers = Vec::with_capacity(must_not.len());
317 for q in must_not {
318 must_not_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
319 }
320 let mut scorer = BooleanScorer {
321 must: must_scorers,
322 should: should_scorers,
323 must_not: must_not_scorers,
324 current_doc: 0,
325 };
326 scorer.current_doc = scorer.find_next_match();
327 Ok(Box::new(scorer) as Box<dyn Scorer + '_>)
328 }};
329}
330
331impl Query for BooleanQuery {
332 fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
333 let must = self.must.clone();
334 let should = self.should.clone();
335 let must_not = self.must_not.clone();
336 let global_stats = self.global_stats.clone();
337 Box::pin(async move {
338 boolean_plan!(
339 must,
340 should,
341 must_not,
342 global_stats.as_ref(),
343 reader,
344 limit,
345 scorer,
346 get_postings,
347 execute,
348 await
349 )
350 })
351 }
352
353 #[cfg(feature = "sync")]
354 fn scorer_sync<'a>(
355 &self,
356 reader: &'a SegmentReader,
357 limit: usize,
358 ) -> crate::Result<Box<dyn Scorer + 'a>> {
359 boolean_plan!(
360 self.must,
361 self.should,
362 self.must_not,
363 self.global_stats.as_ref(),
364 reader,
365 limit,
366 scorer_sync,
367 get_postings_sync,
368 execute_sync
369 )
370 }
371
372 fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
373 if self.must.is_empty() && self.should.is_empty() {
375 return None;
376 }
377
378 let must_preds: Vec<_> = self
380 .must
381 .iter()
382 .map(|q| q.as_doc_predicate(reader))
383 .collect::<Option<Vec<_>>>()?;
384 let should_preds: Vec<_> = self
385 .should
386 .iter()
387 .map(|q| q.as_doc_predicate(reader))
388 .collect::<Option<Vec<_>>>()?;
389 let must_not_preds: Vec<_> = self
390 .must_not
391 .iter()
392 .map(|q| q.as_doc_predicate(reader))
393 .collect::<Option<Vec<_>>>()?;
394
395 let has_must = !must_preds.is_empty();
396
397 Some(Box::new(move |doc_id| {
398 if !must_preds.iter().all(|p| p(doc_id)) {
400 return false;
401 }
402 if !has_must && !should_preds.is_empty() && !should_preds.iter().any(|p| p(doc_id)) {
404 return false;
405 }
406 must_not_preds.iter().all(|p| !p(doc_id))
408 }))
409 }
410
411 fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
412 let must = self.must.clone();
413 let should = self.should.clone();
414
415 Box::pin(async move {
416 if !must.is_empty() {
417 let mut estimates = Vec::with_capacity(must.len());
418 for q in &must {
419 estimates.push(q.count_estimate(reader).await?);
420 }
421 estimates
422 .into_iter()
423 .min()
424 .ok_or_else(|| crate::Error::Corruption("Empty must clause".to_string()))
425 } else if !should.is_empty() {
426 let mut sum = 0u32;
427 for q in &should {
428 sum = sum.saturating_add(q.count_estimate(reader).await?);
429 }
430 Ok(sum)
431 } else {
432 Ok(0)
433 }
434 })
435 }
436}
437
438struct BooleanScorer<'a> {
439 must: Vec<Box<dyn Scorer + 'a>>,
440 should: Vec<Box<dyn Scorer + 'a>>,
441 must_not: Vec<Box<dyn Scorer + 'a>>,
442 current_doc: DocId,
443}
444
445impl BooleanScorer<'_> {
446 fn find_next_match(&mut self) -> DocId {
447 if self.must.is_empty() && self.should.is_empty() {
448 return TERMINATED;
449 }
450
451 loop {
452 let candidate = if !self.must.is_empty() {
453 let mut max_doc = self
454 .must
455 .iter()
456 .map(|s| s.doc())
457 .max()
458 .unwrap_or(TERMINATED);
459
460 if max_doc == TERMINATED {
461 return TERMINATED;
462 }
463
464 loop {
465 let mut all_match = true;
466 for scorer in &mut self.must {
467 let doc = scorer.seek(max_doc);
468 if doc == TERMINATED {
469 return TERMINATED;
470 }
471 if doc > max_doc {
472 max_doc = doc;
473 all_match = false;
474 break;
475 }
476 }
477 if all_match {
478 break;
479 }
480 }
481 max_doc
482 } else {
483 self.should
484 .iter()
485 .map(|s| s.doc())
486 .filter(|&d| d != TERMINATED)
487 .min()
488 .unwrap_or(TERMINATED)
489 };
490
491 if candidate == TERMINATED {
492 return TERMINATED;
493 }
494
495 let excluded = self.must_not.iter_mut().any(|scorer| {
496 let doc = scorer.seek(candidate);
497 doc == candidate
498 });
499
500 if !excluded {
501 for scorer in &mut self.should {
503 scorer.seek(candidate);
504 }
505 self.current_doc = candidate;
506 return candidate;
507 }
508
509 if !self.must.is_empty() {
511 for scorer in &mut self.must {
512 scorer.advance();
513 }
514 } else {
515 for scorer in &mut self.should {
517 if scorer.doc() <= candidate && scorer.doc() != TERMINATED {
518 scorer.seek(candidate + 1);
519 }
520 }
521 }
522 }
523 }
524}
525
526impl super::docset::DocSet for BooleanScorer<'_> {
527 fn doc(&self) -> DocId {
528 self.current_doc
529 }
530
531 fn advance(&mut self) -> DocId {
532 if !self.must.is_empty() {
533 for scorer in &mut self.must {
534 scorer.advance();
535 }
536 } else {
537 for scorer in &mut self.should {
538 if scorer.doc() == self.current_doc {
539 scorer.advance();
540 }
541 }
542 }
543
544 self.current_doc = self.find_next_match();
545 self.current_doc
546 }
547
548 fn seek(&mut self, target: DocId) -> DocId {
549 for scorer in &mut self.must {
550 scorer.seek(target);
551 }
552
553 for scorer in &mut self.should {
554 scorer.seek(target);
555 }
556
557 self.current_doc = self.find_next_match();
558 self.current_doc
559 }
560
561 fn size_hint(&self) -> u32 {
562 if !self.must.is_empty() {
563 self.must.iter().map(|s| s.size_hint()).min().unwrap_or(0)
564 } else {
565 self.should.iter().map(|s| s.size_hint()).sum()
566 }
567 }
568}
569
570impl Scorer for BooleanScorer<'_> {
571 fn score(&self) -> Score {
572 let mut total = 0.0;
573
574 for scorer in &self.must {
575 if scorer.doc() == self.current_doc {
576 total += scorer.score();
577 }
578 }
579
580 for scorer in &self.should {
581 if scorer.doc() == self.current_doc {
582 total += scorer.score();
583 }
584 }
585
586 total
587 }
588
589 fn matched_positions(&self) -> Option<super::MatchedPositions> {
590 let mut all_positions: super::MatchedPositions = Vec::new();
591
592 for scorer in &self.must {
593 if scorer.doc() == self.current_doc
594 && let Some(positions) = scorer.matched_positions()
595 {
596 all_positions.extend(positions);
597 }
598 }
599
600 for scorer in &self.should {
601 if scorer.doc() == self.current_doc
602 && let Some(positions) = scorer.matched_positions()
603 {
604 all_positions.extend(positions);
605 }
606 }
607
608 if all_positions.is_empty() {
609 None
610 } else {
611 Some(all_positions)
612 }
613 }
614}
615
616#[cfg(test)]
617mod tests {
618 use super::*;
619 use crate::dsl::Field;
620 use crate::query::{QueryDecomposition, TermQuery};
621
622 #[test]
623 fn test_maxscore_eligible_pure_or_same_field() {
624 let query = BooleanQuery::new()
626 .should(TermQuery::text(Field(0), "hello"))
627 .should(TermQuery::text(Field(0), "world"))
628 .should(TermQuery::text(Field(0), "foo"));
629
630 assert!(
632 query
633 .should
634 .iter()
635 .all(|q| matches!(q.decompose(), QueryDecomposition::TextTerm(_)))
636 );
637
638 let infos: Vec<_> = query
640 .should
641 .iter()
642 .filter_map(|q| match q.decompose() {
643 QueryDecomposition::TextTerm(info) => Some(info),
644 _ => None,
645 })
646 .collect();
647 assert_eq!(infos.len(), 3);
648 assert!(infos.iter().all(|i| i.field == Field(0)));
649 }
650
651 #[test]
652 fn test_maxscore_not_eligible_different_fields() {
653 let query = BooleanQuery::new()
655 .should(TermQuery::text(Field(0), "hello"))
656 .should(TermQuery::text(Field(1), "world")); let infos: Vec<_> = query
659 .should
660 .iter()
661 .filter_map(|q| match q.decompose() {
662 QueryDecomposition::TextTerm(info) => Some(info),
663 _ => None,
664 })
665 .collect();
666 assert_eq!(infos.len(), 2);
667 assert!(infos[0].field != infos[1].field);
669 }
670
671 #[test]
672 fn test_maxscore_not_eligible_with_must() {
673 let query = BooleanQuery::new()
675 .must(TermQuery::text(Field(0), "required"))
676 .should(TermQuery::text(Field(0), "hello"))
677 .should(TermQuery::text(Field(0), "world"));
678
679 assert!(!query.must.is_empty());
681 }
682
683 #[test]
684 fn test_maxscore_not_eligible_with_must_not() {
685 let query = BooleanQuery::new()
687 .should(TermQuery::text(Field(0), "hello"))
688 .should(TermQuery::text(Field(0), "world"))
689 .must_not(TermQuery::text(Field(0), "excluded"));
690
691 assert!(!query.must_not.is_empty());
693 }
694
695 #[test]
696 fn test_maxscore_not_eligible_single_term() {
697 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
699
700 assert_eq!(query.should.len(), 1);
702 }
703
704 #[test]
705 fn test_term_query_info_extraction() {
706 let term_query = TermQuery::text(Field(42), "test");
707 match term_query.decompose() {
708 QueryDecomposition::TextTerm(info) => {
709 assert_eq!(info.field, Field(42));
710 assert_eq!(info.term, b"test");
711 }
712 _ => panic!("Expected TextTerm decomposition"),
713 }
714 }
715
716 #[test]
717 fn test_boolean_query_no_term_info() {
718 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
720
721 assert!(matches!(query.decompose(), QueryDecomposition::Opaque));
722 }
723}