hermes_core/query/
boolean.rs

1//! Boolean query with MUST, SHOULD, and MUST_NOT clauses
2
3use std::sync::Arc;
4
5use crate::segment::SegmentReader;
6use crate::structures::TERMINATED;
7use crate::{DocId, Score};
8
9use super::planner::{
10    build_combined_bitset, build_sparse_bmp_results, build_sparse_bmp_results_filtered,
11    build_sparse_maxscore_executor, chain_predicates, combine_sparse_results, compute_idf,
12    extract_all_sparse_infos, finish_text_maxscore, prepare_per_field_grouping,
13    prepare_text_maxscore,
14};
15use super::{CountFuture, EmptyScorer, GlobalStats, Query, Scorer, ScorerFuture};
16
17/// Boolean query with MUST, SHOULD, and MUST_NOT clauses
18///
19/// When all clauses are SHOULD term queries on the same field, automatically
20/// uses MaxScore optimization for efficient top-k retrieval.
21#[derive(Default, Clone)]
22pub struct BooleanQuery {
23    pub must: Vec<Arc<dyn Query>>,
24    pub should: Vec<Arc<dyn Query>>,
25    pub must_not: Vec<Arc<dyn Query>>,
26    /// Optional global statistics for cross-segment IDF
27    global_stats: Option<Arc<GlobalStats>>,
28}
29
30impl std::fmt::Debug for BooleanQuery {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        f.debug_struct("BooleanQuery")
33            .field("must_count", &self.must.len())
34            .field("should_count", &self.should.len())
35            .field("must_not_count", &self.must_not.len())
36            .field("has_global_stats", &self.global_stats.is_some())
37            .finish()
38    }
39}
40
41impl std::fmt::Display for BooleanQuery {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        write!(f, "Boolean(")?;
44        let mut first = true;
45        for q in &self.must {
46            if !first {
47                write!(f, " ")?;
48            }
49            write!(f, "+{}", q)?;
50            first = false;
51        }
52        for q in &self.should {
53            if !first {
54                write!(f, " ")?;
55            }
56            write!(f, "{}", q)?;
57            first = false;
58        }
59        for q in &self.must_not {
60            if !first {
61                write!(f, " ")?;
62            }
63            write!(f, "-{}", q)?;
64            first = false;
65        }
66        write!(f, ")")
67    }
68}
69
70impl BooleanQuery {
71    pub fn new() -> Self {
72        Self::default()
73    }
74
75    pub fn must(mut self, query: impl Query + 'static) -> Self {
76        self.must.push(Arc::new(query));
77        self
78    }
79
80    pub fn should(mut self, query: impl Query + 'static) -> Self {
81        self.should.push(Arc::new(query));
82        self
83    }
84
85    pub fn must_not(mut self, query: impl Query + 'static) -> Self {
86        self.must_not.push(Arc::new(query));
87        self
88    }
89
90    /// Set global statistics for cross-segment IDF
91    pub fn with_global_stats(mut self, stats: Arc<GlobalStats>) -> Self {
92        self.global_stats = Some(stats);
93        self
94    }
95}
96
97/// Build a SHOULD-only scorer from a vec of optimized scorers.
98fn build_should_scorer<'a>(scorers: Vec<Box<dyn Scorer + 'a>>) -> Box<dyn Scorer + 'a> {
99    if scorers.is_empty() {
100        return Box::new(EmptyScorer);
101    }
102    if scorers.len() == 1 {
103        return scorers.into_iter().next().unwrap();
104    }
105    let mut scorer = BooleanScorer {
106        must: vec![],
107        should: scorers,
108        must_not: vec![],
109        current_doc: 0,
110    };
111    scorer.current_doc = scorer.find_next_match();
112    Box::new(scorer)
113}
114
115// ── Planner macro ────────────────────────────────────────────────────────
116//
117// Unified planner for both async and sync paths.  Parameterised on:
118//   $scorer_fn      – scorer | scorer_sync
119//   $get_postings_fn – get_postings | get_postings_sync
120//   $execute_fn     – execute | execute_sync
121//   $($aw)*         – .await  (present for async, absent for sync)
122//
123// Decision order:
124//   1. Single-clause unwrap
125//   2. Pure OR → text MaxScore | sparse MaxScore | per-field MaxScore
126//   3. Filter push-down → predicate-aware sparse MaxScore | PredicatedScorer
127//   4. Standard BooleanScorer fallback
128macro_rules! boolean_plan {
129    ($must:expr, $should:expr, $must_not:expr, $global_stats:expr,
130     $reader:expr, $limit:expr,
131     $scorer_fn:ident, $get_postings_fn:ident, $execute_fn:ident
132     $(, $aw:tt)*) => {{
133        let must: &[Arc<dyn Query>] = &$must;
134        let should_all: &[Arc<dyn Query>] = &$should;
135        let must_not: &[Arc<dyn Query>] = &$must_not;
136        let global_stats: Option<&Arc<GlobalStats>> = $global_stats;
137        let reader: &SegmentReader = $reader;
138        let limit: usize = $limit;
139
140        // Cap SHOULD clauses to MAX_QUERY_TERMS, but only count queries that need
141        // posting-list cursors. Fast-field predicates (O(1) per doc) are exempt.
142        let should_capped: Vec<Arc<dyn Query>>;
143        let should: &[Arc<dyn Query>] = if should_all.len() > super::MAX_QUERY_TERMS {
144            let is_predicate: Vec<bool> = should_all
145                .iter()
146                .map(|q| q.is_filter() || q.as_doc_predicate(reader).is_some())
147                .collect();
148            let cursor_count = is_predicate.iter().filter(|&&p| !p).count();
149
150            if cursor_count > super::MAX_QUERY_TERMS {
151                let mut kept = Vec::with_capacity(should_all.len());
152                let mut cursor_kept = 0usize;
153                for (q, &is_pred) in should_all.iter().zip(is_predicate.iter()) {
154                    if is_pred {
155                        kept.push(q.clone());
156                    } else if cursor_kept < super::MAX_QUERY_TERMS {
157                        kept.push(q.clone());
158                        cursor_kept += 1;
159                    }
160                }
161                log::debug!(
162                    "BooleanQuery: capping cursor SHOULD from {} to {} ({} fast-field predicates exempt)",
163                    cursor_count,
164                    super::MAX_QUERY_TERMS,
165                    kept.len() - cursor_kept,
166                );
167                should_capped = kept;
168                &should_capped
169            } else {
170                log::debug!(
171                    "BooleanQuery: {} SHOULD clauses OK ({} need cursors, {} fast-field predicates)",
172                    should_all.len(),
173                    cursor_count,
174                    should_all.len() - cursor_count,
175                );
176                should_all
177            }
178        } else {
179            should_all
180        };
181
182        // ── 1. Single-clause optimisation ────────────────────────────────
183        if must_not.is_empty() {
184            if must.len() == 1 && should.is_empty() {
185                return must[0].$scorer_fn(reader, limit) $(.  $aw)* ;
186            }
187            if should.len() == 1 && must.is_empty() {
188                return should[0].$scorer_fn(reader, limit) $(. $aw)* ;
189            }
190        }
191
192        // ── 2. Pure OR → MaxScore optimisations ──────────────────────────
193        if must.is_empty() && must_not.is_empty() && should.len() >= 2 {
194            // 2a. Text MaxScore (single-field, all term queries)
195            if let Some((mut infos, _field, avg_field_len, num_docs)) =
196                prepare_text_maxscore(should, reader, global_stats)
197            {
198                let mut posting_lists = Vec::with_capacity(infos.len());
199                for info in infos.drain(..) {
200                    if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
201                        $(. $aw)* ?
202                    {
203                        let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
204                        posting_lists.push((pl, idf));
205                    }
206                }
207                return finish_text_maxscore(posting_lists, avg_field_len, limit, reader);
208            }
209
210            // 2b. Sparse (single-field, all sparse term queries)
211            // Auto-detect: BMP executor if field has BMP index, else MaxScore
212            if let Some(infos) = extract_all_sparse_infos(should) {
213                if let Some((raw, info)) =
214                    build_sparse_bmp_results(&infos, reader, limit)
215                {
216                    return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
217                }
218                if let Some((executor, info)) =
219                    build_sparse_maxscore_executor(&infos, reader, limit, None)
220                {
221                    let raw = executor.$execute_fn() $(. $aw)* ?;
222                    return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
223                }
224            }
225
226            // 2c. Per-field text MaxScore (multi-field term grouping)
227            if let Some(grouping) = prepare_per_field_grouping(should, reader, limit, global_stats)
228            {
229                let mut scorers: Vec<Box<dyn Scorer + '_>> = Vec::new();
230                for (field, avg_field_len, infos) in &grouping.multi_term_groups {
231                    let mut posting_lists = Vec::with_capacity(infos.len());
232                    for info in infos {
233                        if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
234                            $(. $aw)* ?
235                        {
236                            let idf = compute_idf(
237                                &pl, *field, &info.term, grouping.num_docs, global_stats,
238                            );
239                            posting_lists.push((pl, idf));
240                        }
241                    }
242                    if !posting_lists.is_empty() {
243                        scorers.push(finish_text_maxscore(
244                            posting_lists,
245                            *avg_field_len,
246                            grouping.per_field_limit,
247                            reader,
248                        )?);
249                    }
250                }
251                for &idx in &grouping.fallback_indices {
252                    scorers.push(should[idx].$scorer_fn(reader, limit) $(. $aw)* ?);
253                }
254                return Ok(build_should_scorer(scorers));
255            }
256        }
257
258        // ── 3. Filter push-down (MUST + SHOULD) ─────────────────────────
259        if !should.is_empty() && !must.is_empty() && limit < usize::MAX / 4 {
260            // 3a. Compile MUST → predicates (O(1)) vs verifier scorers (seek)
261            let mut predicates: Vec<super::DocPredicate<'_>> = Vec::new();
262            let mut must_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
263            for q in must {
264                if let Some(pred) = q.as_doc_predicate(reader) {
265                    log::debug!("BooleanQuery planner 3a: MUST clause → predicate ({})", q);
266                    predicates.push(pred);
267                } else {
268                    log::debug!("BooleanQuery planner 3a: MUST clause → verifier scorer ({})", q);
269                    must_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
270                }
271            }
272            // Compile MUST_NOT → negated predicates vs verifier scorers
273            let mut must_not_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
274            for q in must_not {
275                if let Some(pred) = q.as_doc_predicate(reader) {
276                    let negated: super::DocPredicate<'_> =
277                        Box::new(move |doc_id| !pred(doc_id));
278                    predicates.push(negated);
279                } else {
280                    must_not_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
281                }
282            }
283
284            // 3b. Fast path: pure predicates + sparse SHOULD → BMP or MaxScore w/ predicate
285            if must_verifiers.is_empty()
286                && must_not_verifiers.is_empty()
287                && !predicates.is_empty()
288            {
289                if let Some(infos) = extract_all_sparse_infos(should) {
290                    // Try BMP with bitset first: build compact bitset from MUST/MUST_NOT
291                    // posting lists (O(M) for term queries) for fast per-slot lookup.
292                    let bitset_result = build_combined_bitset(must, must_not, reader);
293                    if let Some(ref bitset) = bitset_result {
294                        let bitset_pred = |doc_id: crate::DocId| bitset.contains(doc_id);
295                        if let Some((raw, info)) =
296                            build_sparse_bmp_results_filtered(&infos, reader, limit, &bitset_pred)
297                        {
298                            log::debug!(
299                                "BooleanQuery planner: bitset-aware sparse BMP, {} dims, {} matching docs",
300                                infos.len(),
301                                bitset.count()
302                            );
303                            return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
304                        }
305                    }
306
307                    // Fallback: closure predicate (for queries that don't support bitsets)
308                    let combined = chain_predicates(predicates);
309                    if let Some((raw, info)) =
310                        build_sparse_bmp_results_filtered(&infos, reader, limit, &*combined)
311                    {
312                        log::debug!(
313                            "BooleanQuery planner: predicate-aware sparse BMP, {} dims",
314                            infos.len()
315                        );
316                        return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
317                    }
318                    // Try MaxScore with predicate
319                    if let Some((executor, info)) =
320                        build_sparse_maxscore_executor(&infos, reader, limit, Some(combined))
321                    {
322                        log::debug!(
323                            "BooleanQuery planner: predicate-aware sparse MaxScore, {} dims",
324                            infos.len()
325                        );
326                        let raw = executor.$execute_fn() $(. $aw)* ?;
327                        return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
328                    }
329                    // predicates consumed — cannot fall through; rebuild them
330                    // (this path only triggers if neither sparse index exists)
331                    predicates = Vec::new();
332                    for q in must {
333                        if let Some(pred) = q.as_doc_predicate(reader) {
334                            predicates.push(pred);
335                        }
336                    }
337                    for q in must_not {
338                        if let Some(pred) = q.as_doc_predicate(reader) {
339                            let negated: super::DocPredicate<'_> =
340                                Box::new(move |doc_id| !pred(doc_id));
341                            predicates.push(negated);
342                        }
343                    }
344                }
345            }
346
347            // 3c. PredicatedScorer fallback (over-fetch 4x when predicates present)
348            let should_limit = if !predicates.is_empty() { limit * 4 } else { limit };
349            let should_scorer = if should.len() == 1 {
350                should[0].$scorer_fn(reader, should_limit) $(. $aw)* ?
351            } else {
352                let sub = BooleanQuery {
353                    must: Vec::new(),
354                    should: should.to_vec(),
355                    must_not: Vec::new(),
356                    global_stats: global_stats.cloned(),
357                };
358                sub.$scorer_fn(reader, should_limit) $(. $aw)* ?
359            };
360
361            let use_predicated =
362                must_verifiers.is_empty() || should_scorer.size_hint() >= limit as u32;
363
364            if use_predicated {
365                log::debug!(
366                    "BooleanQuery planner: PredicatedScorer {} preds + {} must_v + {} must_not_v, \
367                     SHOULD size_hint={}, over_fetch={}",
368                    predicates.len(), must_verifiers.len(), must_not_verifiers.len(),
369                    should_scorer.size_hint(), should_limit
370                );
371                return Ok(Box::new(super::PredicatedScorer::new(
372                    should_scorer, predicates, must_verifiers, must_not_verifiers,
373                )));
374            }
375
376            // size_hint < limit with verifiers → BooleanScorer
377            log::debug!(
378                "BooleanQuery planner: BooleanScorer fallback, size_hint={} < limit={}, \
379                 {} must_v + {} must_not_v",
380                should_scorer.size_hint(), limit,
381                must_verifiers.len(), must_not_verifiers.len()
382            );
383            let mut scorer = BooleanScorer {
384                must: must_verifiers,
385                should: vec![should_scorer],
386                must_not: must_not_verifiers,
387                current_doc: 0,
388            };
389            scorer.current_doc = scorer.find_next_match();
390            return Ok(Box::new(scorer));
391        }
392
393        // ── 4. Standard BooleanScorer fallback ───────────────────────────
394        let mut must_scorers = Vec::with_capacity(must.len());
395        for q in must {
396            must_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
397        }
398        let mut should_scorers = Vec::with_capacity(should.len());
399        for q in should {
400            should_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
401        }
402        let mut must_not_scorers = Vec::with_capacity(must_not.len());
403        for q in must_not {
404            must_not_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
405        }
406        let mut scorer = BooleanScorer {
407            must: must_scorers,
408            should: should_scorers,
409            must_not: must_not_scorers,
410            current_doc: 0,
411        };
412        scorer.current_doc = scorer.find_next_match();
413        Ok(Box::new(scorer) as Box<dyn Scorer + '_>)
414    }};
415}
416
417impl Query for BooleanQuery {
418    fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
419        let must = self.must.clone();
420        let should = self.should.clone();
421        let must_not = self.must_not.clone();
422        let global_stats = self.global_stats.clone();
423        Box::pin(async move {
424            boolean_plan!(
425                must,
426                should,
427                must_not,
428                global_stats.as_ref(),
429                reader,
430                limit,
431                scorer,
432                get_postings,
433                execute,
434                await
435            )
436        })
437    }
438
439    #[cfg(feature = "sync")]
440    fn scorer_sync<'a>(
441        &self,
442        reader: &'a SegmentReader,
443        limit: usize,
444    ) -> crate::Result<Box<dyn Scorer + 'a>> {
445        boolean_plan!(
446            self.must,
447            self.should,
448            self.must_not,
449            self.global_stats.as_ref(),
450            reader,
451            limit,
452            scorer_sync,
453            get_postings_sync,
454            execute_sync
455        )
456    }
457
458    fn as_doc_bitset(&self, reader: &SegmentReader) -> Option<super::DocBitset> {
459        if self.must.is_empty() && self.should.is_empty() {
460            return None;
461        }
462
463        let num_docs = reader.num_docs();
464
465        // MUST clauses: intersect bitsets (AND)
466        let mut result: Option<super::DocBitset> = None;
467        for q in &self.must {
468            let bs = q.as_doc_bitset(reader)?;
469            match result {
470                None => result = Some(bs),
471                Some(ref mut acc) => acc.intersect_with(&bs),
472            }
473        }
474
475        // SHOULD clauses: union bitsets (OR), then intersect with MUST result
476        if !self.should.is_empty() {
477            let mut should_union = super::DocBitset::new(num_docs);
478            for q in &self.should {
479                let bs = q.as_doc_bitset(reader)?;
480                should_union.union_with(&bs);
481            }
482            match result {
483                None => result = Some(should_union),
484                Some(ref mut acc) => {
485                    // When MUST clauses exist, SHOULD is optional (doesn't filter).
486                    // When no MUST clauses, at least one SHOULD must match.
487                    if self.must.is_empty() {
488                        *acc = should_union;
489                    }
490                }
491            }
492        }
493
494        // MUST_NOT clauses: subtract bitsets (ANDNOT)
495        if let Some(ref mut acc) = result {
496            for q in &self.must_not {
497                if let Some(bs) = q.as_doc_bitset(reader) {
498                    acc.subtract(&bs);
499                } else {
500                    // Can't build bitset for this MUST_NOT clause — bail
501                    return None;
502                }
503            }
504        }
505
506        result
507    }
508
509    fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
510        // Need at least some clauses
511        if self.must.is_empty() && self.should.is_empty() {
512            return None;
513        }
514
515        // Try converting all clauses to predicates; bail if any child can't
516        let must_preds: Vec<_> = self
517            .must
518            .iter()
519            .map(|q| q.as_doc_predicate(reader))
520            .collect::<Option<Vec<_>>>()?;
521        let should_preds: Vec<_> = self
522            .should
523            .iter()
524            .map(|q| q.as_doc_predicate(reader))
525            .collect::<Option<Vec<_>>>()?;
526        let must_not_preds: Vec<_> = self
527            .must_not
528            .iter()
529            .map(|q| q.as_doc_predicate(reader))
530            .collect::<Option<Vec<_>>>()?;
531
532        let has_must = !must_preds.is_empty();
533
534        Some(Box::new(move |doc_id| {
535            // All MUST predicates must pass
536            if !must_preds.iter().all(|p| p(doc_id)) {
537                return false;
538            }
539            // When there are no MUST clauses, at least one SHOULD must pass
540            if !has_must && !should_preds.is_empty() && !should_preds.iter().any(|p| p(doc_id)) {
541                return false;
542            }
543            // No MUST_NOT predicate should pass
544            must_not_preds.iter().all(|p| !p(doc_id))
545        }))
546    }
547
548    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
549        let must = self.must.clone();
550        let should = self.should.clone();
551
552        Box::pin(async move {
553            if !must.is_empty() {
554                let mut estimates = Vec::with_capacity(must.len());
555                for q in &must {
556                    estimates.push(q.count_estimate(reader).await?);
557                }
558                estimates
559                    .into_iter()
560                    .min()
561                    .ok_or_else(|| crate::Error::Corruption("Empty must clause".to_string()))
562            } else if !should.is_empty() {
563                let mut sum = 0u32;
564                for q in &should {
565                    sum = sum.saturating_add(q.count_estimate(reader).await?);
566                }
567                Ok(sum)
568            } else {
569                Ok(0)
570            }
571        })
572    }
573}
574
575struct BooleanScorer<'a> {
576    must: Vec<Box<dyn Scorer + 'a>>,
577    should: Vec<Box<dyn Scorer + 'a>>,
578    must_not: Vec<Box<dyn Scorer + 'a>>,
579    current_doc: DocId,
580}
581
582impl BooleanScorer<'_> {
583    fn find_next_match(&mut self) -> DocId {
584        if self.must.is_empty() && self.should.is_empty() {
585            return TERMINATED;
586        }
587
588        loop {
589            let candidate = if !self.must.is_empty() {
590                let mut max_doc = self
591                    .must
592                    .iter()
593                    .map(|s| s.doc())
594                    .max()
595                    .unwrap_or(TERMINATED);
596
597                if max_doc == TERMINATED {
598                    return TERMINATED;
599                }
600
601                loop {
602                    let mut all_match = true;
603                    for scorer in &mut self.must {
604                        let doc = scorer.seek(max_doc);
605                        if doc == TERMINATED {
606                            return TERMINATED;
607                        }
608                        if doc > max_doc {
609                            max_doc = doc;
610                            all_match = false;
611                            break;
612                        }
613                    }
614                    if all_match {
615                        break;
616                    }
617                }
618                max_doc
619            } else {
620                self.should
621                    .iter()
622                    .map(|s| s.doc())
623                    .filter(|&d| d != TERMINATED)
624                    .min()
625                    .unwrap_or(TERMINATED)
626            };
627
628            if candidate == TERMINATED {
629                return TERMINATED;
630            }
631
632            let excluded = self.must_not.iter_mut().any(|scorer| {
633                let doc = scorer.seek(candidate);
634                doc == candidate
635            });
636
637            if !excluded {
638                // Seek SHOULD scorers to candidate so score() can see their contributions
639                for scorer in &mut self.should {
640                    scorer.seek(candidate);
641                }
642                self.current_doc = candidate;
643                return candidate;
644            }
645
646            // Advance past excluded candidate
647            if !self.must.is_empty() {
648                for scorer in &mut self.must {
649                    scorer.advance();
650                }
651            } else {
652                // For SHOULD-only: seek all scorers past the excluded candidate
653                for scorer in &mut self.should {
654                    if scorer.doc() <= candidate && scorer.doc() != TERMINATED {
655                        scorer.seek(candidate + 1);
656                    }
657                }
658            }
659        }
660    }
661}
662
663impl super::docset::DocSet for BooleanScorer<'_> {
664    fn doc(&self) -> DocId {
665        self.current_doc
666    }
667
668    fn advance(&mut self) -> DocId {
669        if !self.must.is_empty() {
670            for scorer in &mut self.must {
671                scorer.advance();
672            }
673        } else {
674            for scorer in &mut self.should {
675                if scorer.doc() == self.current_doc {
676                    scorer.advance();
677                }
678            }
679        }
680
681        self.current_doc = self.find_next_match();
682        self.current_doc
683    }
684
685    fn seek(&mut self, target: DocId) -> DocId {
686        for scorer in &mut self.must {
687            scorer.seek(target);
688        }
689
690        for scorer in &mut self.should {
691            scorer.seek(target);
692        }
693
694        self.current_doc = self.find_next_match();
695        self.current_doc
696    }
697
698    fn size_hint(&self) -> u32 {
699        if !self.must.is_empty() {
700            self.must.iter().map(|s| s.size_hint()).min().unwrap_or(0)
701        } else {
702            self.should.iter().map(|s| s.size_hint()).sum()
703        }
704    }
705}
706
707impl Scorer for BooleanScorer<'_> {
708    fn score(&self) -> Score {
709        let mut total = 0.0;
710
711        for scorer in &self.must {
712            if scorer.doc() == self.current_doc {
713                total += scorer.score();
714            }
715        }
716
717        for scorer in &self.should {
718            if scorer.doc() == self.current_doc {
719                total += scorer.score();
720            }
721        }
722
723        total
724    }
725
726    fn matched_positions(&self) -> Option<super::MatchedPositions> {
727        let mut all_positions: super::MatchedPositions = Vec::new();
728
729        for scorer in &self.must {
730            if scorer.doc() == self.current_doc
731                && let Some(positions) = scorer.matched_positions()
732            {
733                all_positions.extend(positions);
734            }
735        }
736
737        for scorer in &self.should {
738            if scorer.doc() == self.current_doc
739                && let Some(positions) = scorer.matched_positions()
740            {
741                all_positions.extend(positions);
742            }
743        }
744
745        if all_positions.is_empty() {
746            None
747        } else {
748            Some(all_positions)
749        }
750    }
751}
752
753#[cfg(test)]
754mod tests {
755    use super::*;
756    use crate::dsl::Field;
757    use crate::query::{QueryDecomposition, TermQuery};
758
759    #[test]
760    fn test_maxscore_eligible_pure_or_same_field() {
761        // Pure OR query with multiple terms in same field should be MaxScore-eligible
762        let query = BooleanQuery::new()
763            .should(TermQuery::text(Field(0), "hello"))
764            .should(TermQuery::text(Field(0), "world"))
765            .should(TermQuery::text(Field(0), "foo"));
766
767        // All clauses should return term info
768        assert!(
769            query
770                .should
771                .iter()
772                .all(|q| matches!(q.decompose(), QueryDecomposition::TextTerm(_)))
773        );
774
775        // All should be same field
776        let infos: Vec<_> = query
777            .should
778            .iter()
779            .filter_map(|q| match q.decompose() {
780                QueryDecomposition::TextTerm(info) => Some(info),
781                _ => None,
782            })
783            .collect();
784        assert_eq!(infos.len(), 3);
785        assert!(infos.iter().all(|i| i.field == Field(0)));
786    }
787
788    #[test]
789    fn test_maxscore_not_eligible_different_fields() {
790        // OR query with terms in different fields should NOT use MaxScore
791        let query = BooleanQuery::new()
792            .should(TermQuery::text(Field(0), "hello"))
793            .should(TermQuery::text(Field(1), "world")); // Different field!
794
795        let infos: Vec<_> = query
796            .should
797            .iter()
798            .filter_map(|q| match q.decompose() {
799                QueryDecomposition::TextTerm(info) => Some(info),
800                _ => None,
801            })
802            .collect();
803        assert_eq!(infos.len(), 2);
804        // Fields are different, MaxScore should not be used
805        assert!(infos[0].field != infos[1].field);
806    }
807
808    #[test]
809    fn test_maxscore_not_eligible_with_must() {
810        // Query with MUST clause should NOT use MaxScore optimization
811        let query = BooleanQuery::new()
812            .must(TermQuery::text(Field(0), "required"))
813            .should(TermQuery::text(Field(0), "hello"))
814            .should(TermQuery::text(Field(0), "world"));
815
816        // Has MUST clause, so MaxScore optimization should not kick in
817        assert!(!query.must.is_empty());
818    }
819
820    #[test]
821    fn test_maxscore_not_eligible_with_must_not() {
822        // Query with MUST_NOT clause should NOT use MaxScore optimization
823        let query = BooleanQuery::new()
824            .should(TermQuery::text(Field(0), "hello"))
825            .should(TermQuery::text(Field(0), "world"))
826            .must_not(TermQuery::text(Field(0), "excluded"));
827
828        // Has MUST_NOT clause, so MaxScore optimization should not kick in
829        assert!(!query.must_not.is_empty());
830    }
831
832    #[test]
833    fn test_maxscore_not_eligible_single_term() {
834        // Single SHOULD clause should NOT use MaxScore (no benefit)
835        let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
836
837        // Only one term, MaxScore not beneficial
838        assert_eq!(query.should.len(), 1);
839    }
840
841    #[test]
842    fn test_term_query_info_extraction() {
843        let term_query = TermQuery::text(Field(42), "test");
844        match term_query.decompose() {
845            QueryDecomposition::TextTerm(info) => {
846                assert_eq!(info.field, Field(42));
847                assert_eq!(info.term, b"test");
848            }
849            _ => panic!("Expected TextTerm decomposition"),
850        }
851    }
852
853    #[test]
854    fn test_boolean_query_no_term_info() {
855        // BooleanQuery itself should not return term info
856        let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
857
858        assert!(matches!(query.decompose(), QueryDecomposition::Opaque));
859    }
860}
hermes_core/query/boolean.rs

hermes_core/query/
boolean.rs