1use std::sync::Arc;
4
5use crate::segment::SegmentReader;
6use crate::structures::TERMINATED;
7use crate::{DocId, Score};
8
9use super::planner::{
10 build_sparse_maxscore_executor, chain_predicates, combine_sparse_results, compute_idf,
11 extract_all_sparse_infos, finish_text_maxscore, prepare_per_field_grouping,
12 prepare_text_maxscore,
13};
14use super::{CountFuture, EmptyScorer, GlobalStats, Query, Scorer, ScorerFuture};
15
16#[derive(Default, Clone)]
21pub struct BooleanQuery {
22 pub must: Vec<Arc<dyn Query>>,
23 pub should: Vec<Arc<dyn Query>>,
24 pub must_not: Vec<Arc<dyn Query>>,
25 global_stats: Option<Arc<GlobalStats>>,
27}
28
29impl std::fmt::Debug for BooleanQuery {
30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31 f.debug_struct("BooleanQuery")
32 .field("must_count", &self.must.len())
33 .field("should_count", &self.should.len())
34 .field("must_not_count", &self.must_not.len())
35 .field("has_global_stats", &self.global_stats.is_some())
36 .finish()
37 }
38}
39
40impl std::fmt::Display for BooleanQuery {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 write!(f, "Boolean(")?;
43 let mut first = true;
44 for q in &self.must {
45 if !first {
46 write!(f, " ")?;
47 }
48 write!(f, "+{}", q)?;
49 first = false;
50 }
51 for q in &self.should {
52 if !first {
53 write!(f, " ")?;
54 }
55 write!(f, "{}", q)?;
56 first = false;
57 }
58 for q in &self.must_not {
59 if !first {
60 write!(f, " ")?;
61 }
62 write!(f, "-{}", q)?;
63 first = false;
64 }
65 write!(f, ")")
66 }
67}
68
69impl BooleanQuery {
70 pub fn new() -> Self {
71 Self::default()
72 }
73
74 pub fn must(mut self, query: impl Query + 'static) -> Self {
75 self.must.push(Arc::new(query));
76 self
77 }
78
79 pub fn should(mut self, query: impl Query + 'static) -> Self {
80 self.should.push(Arc::new(query));
81 self
82 }
83
84 pub fn must_not(mut self, query: impl Query + 'static) -> Self {
85 self.must_not.push(Arc::new(query));
86 self
87 }
88
89 pub fn with_global_stats(mut self, stats: Arc<GlobalStats>) -> Self {
91 self.global_stats = Some(stats);
92 self
93 }
94}
95
96fn build_should_scorer<'a>(scorers: Vec<Box<dyn Scorer + 'a>>) -> Box<dyn Scorer + 'a> {
98 if scorers.is_empty() {
99 return Box::new(EmptyScorer);
100 }
101 if scorers.len() == 1 {
102 return scorers.into_iter().next().unwrap();
103 }
104 let mut scorer = BooleanScorer {
105 must: vec![],
106 should: scorers,
107 must_not: vec![],
108 current_doc: 0,
109 };
110 scorer.current_doc = scorer.find_next_match();
111 Box::new(scorer)
112}
113
114macro_rules! boolean_plan {
128 ($must:expr, $should:expr, $must_not:expr, $global_stats:expr,
129 $reader:expr, $limit:expr,
130 $scorer_fn:ident, $get_postings_fn:ident, $execute_fn:ident
131 $(, $aw:tt)*) => {{
132 let must: &[Arc<dyn Query>] = &$must;
133 let should: &[Arc<dyn Query>] = &$should;
134 let must_not: &[Arc<dyn Query>] = &$must_not;
135 let global_stats: Option<&Arc<GlobalStats>> = $global_stats;
136 let reader: &SegmentReader = $reader;
137 let limit: usize = $limit;
138
139 if must_not.is_empty() {
141 if must.len() == 1 && should.is_empty() {
142 return must[0].$scorer_fn(reader, limit) $(. $aw)* ;
143 }
144 if should.len() == 1 && must.is_empty() {
145 return should[0].$scorer_fn(reader, limit) $(. $aw)* ;
146 }
147 }
148
149 if must.is_empty() && must_not.is_empty() && should.len() >= 2 {
151 if let Some((mut infos, _field, avg_field_len, num_docs)) =
153 prepare_text_maxscore(should, reader, global_stats)
154 {
155 let mut posting_lists = Vec::with_capacity(infos.len());
156 for info in infos.drain(..) {
157 if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
158 $(. $aw)* ?
159 {
160 let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
161 posting_lists.push((pl, idf));
162 }
163 }
164 return finish_text_maxscore(posting_lists, avg_field_len, limit);
165 }
166
167 if let Some(infos) = extract_all_sparse_infos(should) {
169 if let Some((executor, info)) =
170 build_sparse_maxscore_executor(&infos, reader, limit, None)
171 {
172 let raw = executor.$execute_fn() $(. $aw)* ?;
173 return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
174 }
175 }
176
177 if let Some(grouping) = prepare_per_field_grouping(should, reader, limit, global_stats)
179 {
180 let mut scorers: Vec<Box<dyn Scorer + '_>> = Vec::new();
181 for (field, avg_field_len, infos) in &grouping.multi_term_groups {
182 let mut posting_lists = Vec::with_capacity(infos.len());
183 for info in infos {
184 if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
185 $(. $aw)* ?
186 {
187 let idf = compute_idf(
188 &pl, *field, &info.term, grouping.num_docs, global_stats,
189 );
190 posting_lists.push((pl, idf));
191 }
192 }
193 if !posting_lists.is_empty() {
194 scorers.push(finish_text_maxscore(
195 posting_lists,
196 *avg_field_len,
197 grouping.per_field_limit,
198 )?);
199 }
200 }
201 for &idx in &grouping.fallback_indices {
202 scorers.push(should[idx].$scorer_fn(reader, limit) $(. $aw)* ?);
203 }
204 return Ok(build_should_scorer(scorers));
205 }
206 }
207
208 if !should.is_empty() && !must.is_empty() && limit < usize::MAX / 4 {
210 let mut predicates: Vec<super::DocPredicate<'_>> = Vec::new();
212 let mut must_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
213 for q in must {
214 if let Some(pred) = q.as_doc_predicate(reader) {
215 log::debug!("BooleanQuery planner 3a: MUST clause → predicate ({})", q);
216 predicates.push(pred);
217 } else {
218 log::debug!("BooleanQuery planner 3a: MUST clause → verifier scorer ({})", q);
219 must_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
220 }
221 }
222 let mut must_not_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
224 for q in must_not {
225 if let Some(pred) = q.as_doc_predicate(reader) {
226 let negated: super::DocPredicate<'_> =
227 Box::new(move |doc_id| !pred(doc_id));
228 predicates.push(negated);
229 } else {
230 must_not_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
231 }
232 }
233
234 if must_verifiers.is_empty()
236 && must_not_verifiers.is_empty()
237 && !predicates.is_empty()
238 {
239 if let Some(infos) = extract_all_sparse_infos(should) {
240 let combined = chain_predicates(predicates);
241 if let Some((executor, info)) =
242 build_sparse_maxscore_executor(&infos, reader, limit, Some(combined))
243 {
244 log::debug!(
245 "BooleanQuery planner: predicate-aware sparse MaxScore, {} dims",
246 infos.len()
247 );
248 let raw = executor.$execute_fn() $(. $aw)* ?;
249 return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
250 }
251 predicates = Vec::new();
254 for q in must {
255 if let Some(pred) = q.as_doc_predicate(reader) {
256 predicates.push(pred);
257 }
258 }
259 for q in must_not {
260 if let Some(pred) = q.as_doc_predicate(reader) {
261 let negated: super::DocPredicate<'_> =
262 Box::new(move |doc_id| !pred(doc_id));
263 predicates.push(negated);
264 }
265 }
266 }
267 }
268
269 let should_limit = if !predicates.is_empty() { limit * 4 } else { limit };
271 let should_scorer = if should.len() == 1 {
272 should[0].$scorer_fn(reader, should_limit) $(. $aw)* ?
273 } else {
274 let sub = BooleanQuery {
275 must: Vec::new(),
276 should: should.to_vec(),
277 must_not: Vec::new(),
278 global_stats: global_stats.cloned(),
279 };
280 sub.$scorer_fn(reader, should_limit) $(. $aw)* ?
281 };
282
283 let use_predicated =
284 must_verifiers.is_empty() || should_scorer.size_hint() >= limit as u32;
285
286 if use_predicated {
287 log::debug!(
288 "BooleanQuery planner: PredicatedScorer {} preds + {} must_v + {} must_not_v, \
289 SHOULD size_hint={}, over_fetch={}",
290 predicates.len(), must_verifiers.len(), must_not_verifiers.len(),
291 should_scorer.size_hint(), should_limit
292 );
293 return Ok(Box::new(super::PredicatedScorer::new(
294 should_scorer, predicates, must_verifiers, must_not_verifiers,
295 )));
296 }
297
298 log::debug!(
300 "BooleanQuery planner: BooleanScorer fallback, size_hint={} < limit={}, \
301 {} must_v + {} must_not_v",
302 should_scorer.size_hint(), limit,
303 must_verifiers.len(), must_not_verifiers.len()
304 );
305 let mut scorer = BooleanScorer {
306 must: must_verifiers,
307 should: vec![should_scorer],
308 must_not: must_not_verifiers,
309 current_doc: 0,
310 };
311 scorer.current_doc = scorer.find_next_match();
312 return Ok(Box::new(scorer));
313 }
314
315 let mut must_scorers = Vec::with_capacity(must.len());
317 for q in must {
318 must_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
319 }
320 let mut should_scorers = Vec::with_capacity(should.len());
321 for q in should {
322 should_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
323 }
324 let mut must_not_scorers = Vec::with_capacity(must_not.len());
325 for q in must_not {
326 must_not_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
327 }
328 let mut scorer = BooleanScorer {
329 must: must_scorers,
330 should: should_scorers,
331 must_not: must_not_scorers,
332 current_doc: 0,
333 };
334 scorer.current_doc = scorer.find_next_match();
335 Ok(Box::new(scorer) as Box<dyn Scorer + '_>)
336 }};
337}
338
339impl Query for BooleanQuery {
340 fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
341 let must = self.must.clone();
342 let should = self.should.clone();
343 let must_not = self.must_not.clone();
344 let global_stats = self.global_stats.clone();
345 Box::pin(async move {
346 boolean_plan!(
347 must,
348 should,
349 must_not,
350 global_stats.as_ref(),
351 reader,
352 limit,
353 scorer,
354 get_postings,
355 execute,
356 await
357 )
358 })
359 }
360
361 #[cfg(feature = "sync")]
362 fn scorer_sync<'a>(
363 &self,
364 reader: &'a SegmentReader,
365 limit: usize,
366 ) -> crate::Result<Box<dyn Scorer + 'a>> {
367 boolean_plan!(
368 self.must,
369 self.should,
370 self.must_not,
371 self.global_stats.as_ref(),
372 reader,
373 limit,
374 scorer_sync,
375 get_postings_sync,
376 execute_sync
377 )
378 }
379
380 fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
381 if self.must.is_empty() && self.should.is_empty() {
383 return None;
384 }
385
386 let must_preds: Vec<_> = self
388 .must
389 .iter()
390 .map(|q| q.as_doc_predicate(reader))
391 .collect::<Option<Vec<_>>>()?;
392 let should_preds: Vec<_> = self
393 .should
394 .iter()
395 .map(|q| q.as_doc_predicate(reader))
396 .collect::<Option<Vec<_>>>()?;
397 let must_not_preds: Vec<_> = self
398 .must_not
399 .iter()
400 .map(|q| q.as_doc_predicate(reader))
401 .collect::<Option<Vec<_>>>()?;
402
403 let has_must = !must_preds.is_empty();
404
405 Some(Box::new(move |doc_id| {
406 if !must_preds.iter().all(|p| p(doc_id)) {
408 return false;
409 }
410 if !has_must && !should_preds.is_empty() && !should_preds.iter().any(|p| p(doc_id)) {
412 return false;
413 }
414 must_not_preds.iter().all(|p| !p(doc_id))
416 }))
417 }
418
419 fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
420 let must = self.must.clone();
421 let should = self.should.clone();
422
423 Box::pin(async move {
424 if !must.is_empty() {
425 let mut estimates = Vec::with_capacity(must.len());
426 for q in &must {
427 estimates.push(q.count_estimate(reader).await?);
428 }
429 estimates
430 .into_iter()
431 .min()
432 .ok_or_else(|| crate::Error::Corruption("Empty must clause".to_string()))
433 } else if !should.is_empty() {
434 let mut sum = 0u32;
435 for q in &should {
436 sum = sum.saturating_add(q.count_estimate(reader).await?);
437 }
438 Ok(sum)
439 } else {
440 Ok(0)
441 }
442 })
443 }
444}
445
446struct BooleanScorer<'a> {
447 must: Vec<Box<dyn Scorer + 'a>>,
448 should: Vec<Box<dyn Scorer + 'a>>,
449 must_not: Vec<Box<dyn Scorer + 'a>>,
450 current_doc: DocId,
451}
452
453impl BooleanScorer<'_> {
454 fn find_next_match(&mut self) -> DocId {
455 if self.must.is_empty() && self.should.is_empty() {
456 return TERMINATED;
457 }
458
459 loop {
460 let candidate = if !self.must.is_empty() {
461 let mut max_doc = self
462 .must
463 .iter()
464 .map(|s| s.doc())
465 .max()
466 .unwrap_or(TERMINATED);
467
468 if max_doc == TERMINATED {
469 return TERMINATED;
470 }
471
472 loop {
473 let mut all_match = true;
474 for scorer in &mut self.must {
475 let doc = scorer.seek(max_doc);
476 if doc == TERMINATED {
477 return TERMINATED;
478 }
479 if doc > max_doc {
480 max_doc = doc;
481 all_match = false;
482 break;
483 }
484 }
485 if all_match {
486 break;
487 }
488 }
489 max_doc
490 } else {
491 self.should
492 .iter()
493 .map(|s| s.doc())
494 .filter(|&d| d != TERMINATED)
495 .min()
496 .unwrap_or(TERMINATED)
497 };
498
499 if candidate == TERMINATED {
500 return TERMINATED;
501 }
502
503 let excluded = self.must_not.iter_mut().any(|scorer| {
504 let doc = scorer.seek(candidate);
505 doc == candidate
506 });
507
508 if !excluded {
509 for scorer in &mut self.should {
511 scorer.seek(candidate);
512 }
513 self.current_doc = candidate;
514 return candidate;
515 }
516
517 if !self.must.is_empty() {
519 for scorer in &mut self.must {
520 scorer.advance();
521 }
522 } else {
523 for scorer in &mut self.should {
525 if scorer.doc() <= candidate && scorer.doc() != TERMINATED {
526 scorer.seek(candidate + 1);
527 }
528 }
529 }
530 }
531 }
532}
533
534impl super::docset::DocSet for BooleanScorer<'_> {
535 fn doc(&self) -> DocId {
536 self.current_doc
537 }
538
539 fn advance(&mut self) -> DocId {
540 if !self.must.is_empty() {
541 for scorer in &mut self.must {
542 scorer.advance();
543 }
544 } else {
545 for scorer in &mut self.should {
546 if scorer.doc() == self.current_doc {
547 scorer.advance();
548 }
549 }
550 }
551
552 self.current_doc = self.find_next_match();
553 self.current_doc
554 }
555
556 fn seek(&mut self, target: DocId) -> DocId {
557 for scorer in &mut self.must {
558 scorer.seek(target);
559 }
560
561 for scorer in &mut self.should {
562 scorer.seek(target);
563 }
564
565 self.current_doc = self.find_next_match();
566 self.current_doc
567 }
568
569 fn size_hint(&self) -> u32 {
570 if !self.must.is_empty() {
571 self.must.iter().map(|s| s.size_hint()).min().unwrap_or(0)
572 } else {
573 self.should.iter().map(|s| s.size_hint()).sum()
574 }
575 }
576}
577
578impl Scorer for BooleanScorer<'_> {
579 fn score(&self) -> Score {
580 let mut total = 0.0;
581
582 for scorer in &self.must {
583 if scorer.doc() == self.current_doc {
584 total += scorer.score();
585 }
586 }
587
588 for scorer in &self.should {
589 if scorer.doc() == self.current_doc {
590 total += scorer.score();
591 }
592 }
593
594 total
595 }
596
597 fn matched_positions(&self) -> Option<super::MatchedPositions> {
598 let mut all_positions: super::MatchedPositions = Vec::new();
599
600 for scorer in &self.must {
601 if scorer.doc() == self.current_doc
602 && let Some(positions) = scorer.matched_positions()
603 {
604 all_positions.extend(positions);
605 }
606 }
607
608 for scorer in &self.should {
609 if scorer.doc() == self.current_doc
610 && let Some(positions) = scorer.matched_positions()
611 {
612 all_positions.extend(positions);
613 }
614 }
615
616 if all_positions.is_empty() {
617 None
618 } else {
619 Some(all_positions)
620 }
621 }
622}
623
624#[cfg(test)]
625mod tests {
626 use super::*;
627 use crate::dsl::Field;
628 use crate::query::{QueryDecomposition, TermQuery};
629
630 #[test]
631 fn test_maxscore_eligible_pure_or_same_field() {
632 let query = BooleanQuery::new()
634 .should(TermQuery::text(Field(0), "hello"))
635 .should(TermQuery::text(Field(0), "world"))
636 .should(TermQuery::text(Field(0), "foo"));
637
638 assert!(
640 query
641 .should
642 .iter()
643 .all(|q| matches!(q.decompose(), QueryDecomposition::TextTerm(_)))
644 );
645
646 let infos: Vec<_> = query
648 .should
649 .iter()
650 .filter_map(|q| match q.decompose() {
651 QueryDecomposition::TextTerm(info) => Some(info),
652 _ => None,
653 })
654 .collect();
655 assert_eq!(infos.len(), 3);
656 assert!(infos.iter().all(|i| i.field == Field(0)));
657 }
658
659 #[test]
660 fn test_maxscore_not_eligible_different_fields() {
661 let query = BooleanQuery::new()
663 .should(TermQuery::text(Field(0), "hello"))
664 .should(TermQuery::text(Field(1), "world")); let infos: Vec<_> = query
667 .should
668 .iter()
669 .filter_map(|q| match q.decompose() {
670 QueryDecomposition::TextTerm(info) => Some(info),
671 _ => None,
672 })
673 .collect();
674 assert_eq!(infos.len(), 2);
675 assert!(infos[0].field != infos[1].field);
677 }
678
679 #[test]
680 fn test_maxscore_not_eligible_with_must() {
681 let query = BooleanQuery::new()
683 .must(TermQuery::text(Field(0), "required"))
684 .should(TermQuery::text(Field(0), "hello"))
685 .should(TermQuery::text(Field(0), "world"));
686
687 assert!(!query.must.is_empty());
689 }
690
691 #[test]
692 fn test_maxscore_not_eligible_with_must_not() {
693 let query = BooleanQuery::new()
695 .should(TermQuery::text(Field(0), "hello"))
696 .should(TermQuery::text(Field(0), "world"))
697 .must_not(TermQuery::text(Field(0), "excluded"));
698
699 assert!(!query.must_not.is_empty());
701 }
702
703 #[test]
704 fn test_maxscore_not_eligible_single_term() {
705 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
707
708 assert_eq!(query.should.len(), 1);
710 }
711
712 #[test]
713 fn test_term_query_info_extraction() {
714 let term_query = TermQuery::text(Field(42), "test");
715 match term_query.decompose() {
716 QueryDecomposition::TextTerm(info) => {
717 assert_eq!(info.field, Field(42));
718 assert_eq!(info.term, b"test");
719 }
720 _ => panic!("Expected TextTerm decomposition"),
721 }
722 }
723
724 #[test]
725 fn test_boolean_query_no_term_info() {
726 let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
728
729 assert!(matches!(query.decompose(), QueryDecomposition::Opaque));
730 }
731}