hermes_core/query/
boolean.rs

1//! Boolean query with MUST, SHOULD, and MUST_NOT clauses
2
3use std::sync::Arc;
4
5use crate::segment::SegmentReader;
6use crate::structures::TERMINATED;
7use crate::{DocId, Score};
8
9use super::planner::{
10    build_combined_bitset, build_sparse_bmp_results, build_sparse_bmp_results_filtered,
11    build_sparse_maxscore_executor, chain_predicates, combine_sparse_results, compute_idf,
12    extract_all_sparse_infos, finish_text_maxscore, prepare_per_field_grouping,
13    prepare_text_maxscore,
14};
15use super::{CountFuture, EmptyScorer, GlobalStats, Query, Scorer, ScorerFuture};
16
17/// Boolean query with MUST, SHOULD, and MUST_NOT clauses
18///
19/// When all clauses are SHOULD term queries on the same field, automatically
20/// uses MaxScore optimization for efficient top-k retrieval.
21#[derive(Default, Clone)]
22pub struct BooleanQuery {
23    pub must: Vec<Arc<dyn Query>>,
24    pub should: Vec<Arc<dyn Query>>,
25    pub must_not: Vec<Arc<dyn Query>>,
26    /// Optional global statistics for cross-segment IDF
27    global_stats: Option<Arc<GlobalStats>>,
28}
29
30impl std::fmt::Debug for BooleanQuery {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        f.debug_struct("BooleanQuery")
33            .field("must_count", &self.must.len())
34            .field("should_count", &self.should.len())
35            .field("must_not_count", &self.must_not.len())
36            .field("has_global_stats", &self.global_stats.is_some())
37            .finish()
38    }
39}
40
41impl std::fmt::Display for BooleanQuery {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        write!(f, "Boolean(")?;
44        let mut first = true;
45        for q in &self.must {
46            if !first {
47                write!(f, " ")?;
48            }
49            write!(f, "+{}", q)?;
50            first = false;
51        }
52        for q in &self.should {
53            if !first {
54                write!(f, " ")?;
55            }
56            write!(f, "{}", q)?;
57            first = false;
58        }
59        for q in &self.must_not {
60            if !first {
61                write!(f, " ")?;
62            }
63            write!(f, "-{}", q)?;
64            first = false;
65        }
66        write!(f, ")")
67    }
68}
69
70impl BooleanQuery {
71    pub fn new() -> Self {
72        Self::default()
73    }
74
75    pub fn must(mut self, query: impl Query + 'static) -> Self {
76        self.must.push(Arc::new(query));
77        self
78    }
79
80    pub fn should(mut self, query: impl Query + 'static) -> Self {
81        self.should.push(Arc::new(query));
82        self
83    }
84
85    pub fn must_not(mut self, query: impl Query + 'static) -> Self {
86        self.must_not.push(Arc::new(query));
87        self
88    }
89
90    /// Set global statistics for cross-segment IDF
91    pub fn with_global_stats(mut self, stats: Arc<GlobalStats>) -> Self {
92        self.global_stats = Some(stats);
93        self
94    }
95}
96
97/// Build a SHOULD-only scorer from a vec of optimized scorers.
98fn build_should_scorer<'a>(scorers: Vec<Box<dyn Scorer + 'a>>) -> Box<dyn Scorer + 'a> {
99    if scorers.is_empty() {
100        return Box::new(EmptyScorer);
101    }
102    if scorers.len() == 1 {
103        return scorers.into_iter().next().unwrap();
104    }
105    let mut scorer = BooleanScorer {
106        must: vec![],
107        should: scorers,
108        must_not: vec![],
109        current_doc: 0,
110    };
111    scorer.current_doc = scorer.find_next_match();
112    Box::new(scorer)
113}
114
115// ── Planner macro ────────────────────────────────────────────────────────
116//
117// Unified planner for both async and sync paths.  Parameterised on:
118//   $scorer_fn      – scorer | scorer_sync
119//   $get_postings_fn – get_postings | get_postings_sync
120//   $execute_fn     – execute | execute_sync
121//   $($aw)*         – .await  (present for async, absent for sync)
122//
123// Decision order:
124//   1. Single-clause unwrap
125//   2. Pure OR → text MaxScore | sparse MaxScore | per-field MaxScore
126//   3. Filter push-down → predicate-aware sparse MaxScore | PredicatedScorer
127//   4. Standard BooleanScorer fallback
128macro_rules! boolean_plan {
129    ($must:expr, $should:expr, $must_not:expr, $global_stats:expr,
130     $reader:expr, $limit:expr,
131     $scorer_fn:ident, $get_postings_fn:ident, $execute_fn:ident
132     $(, $aw:tt)*) => {{
133        let must: &[Arc<dyn Query>] = &$must;
134        let should_all: &[Arc<dyn Query>] = &$should;
135        let must_not: &[Arc<dyn Query>] = &$must_not;
136        let global_stats: Option<&Arc<GlobalStats>> = $global_stats;
137        let reader: &SegmentReader = $reader;
138        let limit: usize = $limit;
139
140        // Cap SHOULD clauses to MAX_QUERY_TERMS, but only count queries that need
141        // posting-list cursors. Fast-field predicates (O(1) per doc) are exempt.
142        let should_capped: Vec<Arc<dyn Query>>;
143        let should: &[Arc<dyn Query>] = if should_all.len() > super::MAX_QUERY_TERMS {
144            let is_predicate: Vec<bool> = should_all
145                .iter()
146                .map(|q| q.is_filter() || q.as_doc_predicate(reader).is_some())
147                .collect();
148            let cursor_count = is_predicate.iter().filter(|&&p| !p).count();
149
150            if cursor_count > super::MAX_QUERY_TERMS {
151                let mut kept = Vec::with_capacity(should_all.len());
152                let mut cursor_kept = 0usize;
153                for (q, &is_pred) in should_all.iter().zip(is_predicate.iter()) {
154                    if is_pred {
155                        kept.push(q.clone());
156                    } else if cursor_kept < super::MAX_QUERY_TERMS {
157                        kept.push(q.clone());
158                        cursor_kept += 1;
159                    }
160                }
161                log::debug!(
162                    "BooleanQuery: capping cursor SHOULD from {} to {} ({} fast-field predicates exempt)",
163                    cursor_count,
164                    super::MAX_QUERY_TERMS,
165                    kept.len() - cursor_kept,
166                );
167                should_capped = kept;
168                &should_capped
169            } else {
170                log::debug!(
171                    "BooleanQuery: {} SHOULD clauses OK ({} need cursors, {} fast-field predicates)",
172                    should_all.len(),
173                    cursor_count,
174                    should_all.len() - cursor_count,
175                );
176                should_all
177            }
178        } else {
179            should_all
180        };
181
182        // ── 1. Single-clause optimisation ────────────────────────────────
183        if must_not.is_empty() {
184            if must.len() == 1 && should.is_empty() {
185                return must[0].$scorer_fn(reader, limit) $(.  $aw)* ;
186            }
187            if should.len() == 1 && must.is_empty() {
188                return should[0].$scorer_fn(reader, limit) $(. $aw)* ;
189            }
190        }
191
192        // ── 2. Pure OR → MaxScore optimisations ──────────────────────────
193        if must.is_empty() && must_not.is_empty() && should.len() >= 2 {
194            // 2a. Text MaxScore (single-field, all term queries)
195            if let Some((mut infos, _field, avg_field_len, num_docs)) =
196                prepare_text_maxscore(should, reader, global_stats)
197            {
198                let mut posting_lists = Vec::with_capacity(infos.len());
199                for info in infos.drain(..) {
200                    if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
201                        $(. $aw)* ?
202                    {
203                        let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
204                        posting_lists.push((pl, idf));
205                    }
206                }
207                return finish_text_maxscore(posting_lists, avg_field_len, limit);
208            }
209
210            // 2b. Sparse (single-field, all sparse term queries)
211            // Auto-detect: BMP executor if field has BMP index, else MaxScore
212            if let Some(infos) = extract_all_sparse_infos(should) {
213                if let Some((raw, info)) =
214                    build_sparse_bmp_results(&infos, reader, limit)
215                {
216                    return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
217                }
218                if let Some((executor, info)) =
219                    build_sparse_maxscore_executor(&infos, reader, limit, None)
220                {
221                    let raw = executor.$execute_fn() $(. $aw)* ?;
222                    return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
223                }
224            }
225
226            // 2c. Per-field text MaxScore (multi-field term grouping)
227            if let Some(grouping) = prepare_per_field_grouping(should, reader, limit, global_stats)
228            {
229                let mut scorers: Vec<Box<dyn Scorer + '_>> = Vec::new();
230                for (field, avg_field_len, infos) in &grouping.multi_term_groups {
231                    let mut posting_lists = Vec::with_capacity(infos.len());
232                    for info in infos {
233                        if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
234                            $(. $aw)* ?
235                        {
236                            let idf = compute_idf(
237                                &pl, *field, &info.term, grouping.num_docs, global_stats,
238                            );
239                            posting_lists.push((pl, idf));
240                        }
241                    }
242                    if !posting_lists.is_empty() {
243                        scorers.push(finish_text_maxscore(
244                            posting_lists,
245                            *avg_field_len,
246                            grouping.per_field_limit,
247                        )?);
248                    }
249                }
250                for &idx in &grouping.fallback_indices {
251                    scorers.push(should[idx].$scorer_fn(reader, limit) $(. $aw)* ?);
252                }
253                return Ok(build_should_scorer(scorers));
254            }
255        }
256
257        // ── 3. Filter push-down (MUST + SHOULD) ─────────────────────────
258        if !should.is_empty() && !must.is_empty() && limit < usize::MAX / 4 {
259            // 3a. Compile MUST → predicates (O(1)) vs verifier scorers (seek)
260            let mut predicates: Vec<super::DocPredicate<'_>> = Vec::new();
261            let mut must_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
262            for q in must {
263                if let Some(pred) = q.as_doc_predicate(reader) {
264                    log::debug!("BooleanQuery planner 3a: MUST clause → predicate ({})", q);
265                    predicates.push(pred);
266                } else {
267                    log::debug!("BooleanQuery planner 3a: MUST clause → verifier scorer ({})", q);
268                    must_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
269                }
270            }
271            // Compile MUST_NOT → negated predicates vs verifier scorers
272            let mut must_not_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
273            for q in must_not {
274                if let Some(pred) = q.as_doc_predicate(reader) {
275                    let negated: super::DocPredicate<'_> =
276                        Box::new(move |doc_id| !pred(doc_id));
277                    predicates.push(negated);
278                } else {
279                    must_not_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
280                }
281            }
282
283            // 3b. Fast path: pure predicates + sparse SHOULD → BMP or MaxScore w/ predicate
284            if must_verifiers.is_empty()
285                && must_not_verifiers.is_empty()
286                && !predicates.is_empty()
287            {
288                if let Some(infos) = extract_all_sparse_infos(should) {
289                    // Try BMP with bitset first: build compact bitset from MUST/MUST_NOT
290                    // posting lists (O(M) for term queries) for fast per-slot lookup.
291                    let bitset_result = build_combined_bitset(must, must_not, reader);
292                    if let Some(ref bitset) = bitset_result {
293                        let bitset_pred = |doc_id: crate::DocId| bitset.contains(doc_id);
294                        if let Some((raw, info)) =
295                            build_sparse_bmp_results_filtered(&infos, reader, limit, &bitset_pred)
296                        {
297                            log::debug!(
298                                "BooleanQuery planner: bitset-aware sparse BMP, {} dims, {} matching docs",
299                                infos.len(),
300                                bitset.count()
301                            );
302                            return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
303                        }
304                    }
305
306                    // Fallback: closure predicate (for queries that don't support bitsets)
307                    let combined = chain_predicates(predicates);
308                    if let Some((raw, info)) =
309                        build_sparse_bmp_results_filtered(&infos, reader, limit, &*combined)
310                    {
311                        log::debug!(
312                            "BooleanQuery planner: predicate-aware sparse BMP, {} dims",
313                            infos.len()
314                        );
315                        return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
316                    }
317                    // Try MaxScore with predicate
318                    if let Some((executor, info)) =
319                        build_sparse_maxscore_executor(&infos, reader, limit, Some(combined))
320                    {
321                        log::debug!(
322                            "BooleanQuery planner: predicate-aware sparse MaxScore, {} dims",
323                            infos.len()
324                        );
325                        let raw = executor.$execute_fn() $(. $aw)* ?;
326                        return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
327                    }
328                    // predicates consumed — cannot fall through; rebuild them
329                    // (this path only triggers if neither sparse index exists)
330                    predicates = Vec::new();
331                    for q in must {
332                        if let Some(pred) = q.as_doc_predicate(reader) {
333                            predicates.push(pred);
334                        }
335                    }
336                    for q in must_not {
337                        if let Some(pred) = q.as_doc_predicate(reader) {
338                            let negated: super::DocPredicate<'_> =
339                                Box::new(move |doc_id| !pred(doc_id));
340                            predicates.push(negated);
341                        }
342                    }
343                }
344            }
345
346            // 3c. PredicatedScorer fallback (over-fetch 4x when predicates present)
347            let should_limit = if !predicates.is_empty() { limit * 4 } else { limit };
348            let should_scorer = if should.len() == 1 {
349                should[0].$scorer_fn(reader, should_limit) $(. $aw)* ?
350            } else {
351                let sub = BooleanQuery {
352                    must: Vec::new(),
353                    should: should.to_vec(),
354                    must_not: Vec::new(),
355                    global_stats: global_stats.cloned(),
356                };
357                sub.$scorer_fn(reader, should_limit) $(. $aw)* ?
358            };
359
360            let use_predicated =
361                must_verifiers.is_empty() || should_scorer.size_hint() >= limit as u32;
362
363            if use_predicated {
364                log::debug!(
365                    "BooleanQuery planner: PredicatedScorer {} preds + {} must_v + {} must_not_v, \
366                     SHOULD size_hint={}, over_fetch={}",
367                    predicates.len(), must_verifiers.len(), must_not_verifiers.len(),
368                    should_scorer.size_hint(), should_limit
369                );
370                return Ok(Box::new(super::PredicatedScorer::new(
371                    should_scorer, predicates, must_verifiers, must_not_verifiers,
372                )));
373            }
374
375            // size_hint < limit with verifiers → BooleanScorer
376            log::debug!(
377                "BooleanQuery planner: BooleanScorer fallback, size_hint={} < limit={}, \
378                 {} must_v + {} must_not_v",
379                should_scorer.size_hint(), limit,
380                must_verifiers.len(), must_not_verifiers.len()
381            );
382            let mut scorer = BooleanScorer {
383                must: must_verifiers,
384                should: vec![should_scorer],
385                must_not: must_not_verifiers,
386                current_doc: 0,
387            };
388            scorer.current_doc = scorer.find_next_match();
389            return Ok(Box::new(scorer));
390        }
391
392        // ── 4. Standard BooleanScorer fallback ───────────────────────────
393        let mut must_scorers = Vec::with_capacity(must.len());
394        for q in must {
395            must_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
396        }
397        let mut should_scorers = Vec::with_capacity(should.len());
398        for q in should {
399            should_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
400        }
401        let mut must_not_scorers = Vec::with_capacity(must_not.len());
402        for q in must_not {
403            must_not_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
404        }
405        let mut scorer = BooleanScorer {
406            must: must_scorers,
407            should: should_scorers,
408            must_not: must_not_scorers,
409            current_doc: 0,
410        };
411        scorer.current_doc = scorer.find_next_match();
412        Ok(Box::new(scorer) as Box<dyn Scorer + '_>)
413    }};
414}
415
416impl Query for BooleanQuery {
417    fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
418        let must = self.must.clone();
419        let should = self.should.clone();
420        let must_not = self.must_not.clone();
421        let global_stats = self.global_stats.clone();
422        Box::pin(async move {
423            boolean_plan!(
424                must,
425                should,
426                must_not,
427                global_stats.as_ref(),
428                reader,
429                limit,
430                scorer,
431                get_postings,
432                execute,
433                await
434            )
435        })
436    }
437
438    #[cfg(feature = "sync")]
439    fn scorer_sync<'a>(
440        &self,
441        reader: &'a SegmentReader,
442        limit: usize,
443    ) -> crate::Result<Box<dyn Scorer + 'a>> {
444        boolean_plan!(
445            self.must,
446            self.should,
447            self.must_not,
448            self.global_stats.as_ref(),
449            reader,
450            limit,
451            scorer_sync,
452            get_postings_sync,
453            execute_sync
454        )
455    }
456
457    fn as_doc_bitset(&self, reader: &SegmentReader) -> Option<super::DocBitset> {
458        if self.must.is_empty() && self.should.is_empty() {
459            return None;
460        }
461
462        let num_docs = reader.num_docs();
463
464        // MUST clauses: intersect bitsets (AND)
465        let mut result: Option<super::DocBitset> = None;
466        for q in &self.must {
467            let bs = q.as_doc_bitset(reader)?;
468            match result {
469                None => result = Some(bs),
470                Some(ref mut acc) => acc.intersect_with(&bs),
471            }
472        }
473
474        // SHOULD clauses: union bitsets (OR), then intersect with MUST result
475        if !self.should.is_empty() {
476            let mut should_union = super::DocBitset::new(num_docs);
477            for q in &self.should {
478                let bs = q.as_doc_bitset(reader)?;
479                should_union.union_with(&bs);
480            }
481            match result {
482                None => result = Some(should_union),
483                Some(ref mut acc) => {
484                    // When MUST clauses exist, SHOULD is optional (doesn't filter).
485                    // When no MUST clauses, at least one SHOULD must match.
486                    if self.must.is_empty() {
487                        *acc = should_union;
488                    }
489                }
490            }
491        }
492
493        // MUST_NOT clauses: subtract bitsets (ANDNOT)
494        if let Some(ref mut acc) = result {
495            for q in &self.must_not {
496                if let Some(bs) = q.as_doc_bitset(reader) {
497                    acc.subtract(&bs);
498                } else {
499                    // Can't build bitset for this MUST_NOT clause — bail
500                    return None;
501                }
502            }
503        }
504
505        result
506    }
507
508    fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
509        // Need at least some clauses
510        if self.must.is_empty() && self.should.is_empty() {
511            return None;
512        }
513
514        // Try converting all clauses to predicates; bail if any child can't
515        let must_preds: Vec<_> = self
516            .must
517            .iter()
518            .map(|q| q.as_doc_predicate(reader))
519            .collect::<Option<Vec<_>>>()?;
520        let should_preds: Vec<_> = self
521            .should
522            .iter()
523            .map(|q| q.as_doc_predicate(reader))
524            .collect::<Option<Vec<_>>>()?;
525        let must_not_preds: Vec<_> = self
526            .must_not
527            .iter()
528            .map(|q| q.as_doc_predicate(reader))
529            .collect::<Option<Vec<_>>>()?;
530
531        let has_must = !must_preds.is_empty();
532
533        Some(Box::new(move |doc_id| {
534            // All MUST predicates must pass
535            if !must_preds.iter().all(|p| p(doc_id)) {
536                return false;
537            }
538            // When there are no MUST clauses, at least one SHOULD must pass
539            if !has_must && !should_preds.is_empty() && !should_preds.iter().any(|p| p(doc_id)) {
540                return false;
541            }
542            // No MUST_NOT predicate should pass
543            must_not_preds.iter().all(|p| !p(doc_id))
544        }))
545    }
546
547    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
548        let must = self.must.clone();
549        let should = self.should.clone();
550
551        Box::pin(async move {
552            if !must.is_empty() {
553                let mut estimates = Vec::with_capacity(must.len());
554                for q in &must {
555                    estimates.push(q.count_estimate(reader).await?);
556                }
557                estimates
558                    .into_iter()
559                    .min()
560                    .ok_or_else(|| crate::Error::Corruption("Empty must clause".to_string()))
561            } else if !should.is_empty() {
562                let mut sum = 0u32;
563                for q in &should {
564                    sum = sum.saturating_add(q.count_estimate(reader).await?);
565                }
566                Ok(sum)
567            } else {
568                Ok(0)
569            }
570        })
571    }
572}
573
574struct BooleanScorer<'a> {
575    must: Vec<Box<dyn Scorer + 'a>>,
576    should: Vec<Box<dyn Scorer + 'a>>,
577    must_not: Vec<Box<dyn Scorer + 'a>>,
578    current_doc: DocId,
579}
580
581impl BooleanScorer<'_> {
582    fn find_next_match(&mut self) -> DocId {
583        if self.must.is_empty() && self.should.is_empty() {
584            return TERMINATED;
585        }
586
587        loop {
588            let candidate = if !self.must.is_empty() {
589                let mut max_doc = self
590                    .must
591                    .iter()
592                    .map(|s| s.doc())
593                    .max()
594                    .unwrap_or(TERMINATED);
595
596                if max_doc == TERMINATED {
597                    return TERMINATED;
598                }
599
600                loop {
601                    let mut all_match = true;
602                    for scorer in &mut self.must {
603                        let doc = scorer.seek(max_doc);
604                        if doc == TERMINATED {
605                            return TERMINATED;
606                        }
607                        if doc > max_doc {
608                            max_doc = doc;
609                            all_match = false;
610                            break;
611                        }
612                    }
613                    if all_match {
614                        break;
615                    }
616                }
617                max_doc
618            } else {
619                self.should
620                    .iter()
621                    .map(|s| s.doc())
622                    .filter(|&d| d != TERMINATED)
623                    .min()
624                    .unwrap_or(TERMINATED)
625            };
626
627            if candidate == TERMINATED {
628                return TERMINATED;
629            }
630
631            let excluded = self.must_not.iter_mut().any(|scorer| {
632                let doc = scorer.seek(candidate);
633                doc == candidate
634            });
635
636            if !excluded {
637                // Seek SHOULD scorers to candidate so score() can see their contributions
638                for scorer in &mut self.should {
639                    scorer.seek(candidate);
640                }
641                self.current_doc = candidate;
642                return candidate;
643            }
644
645            // Advance past excluded candidate
646            if !self.must.is_empty() {
647                for scorer in &mut self.must {
648                    scorer.advance();
649                }
650            } else {
651                // For SHOULD-only: seek all scorers past the excluded candidate
652                for scorer in &mut self.should {
653                    if scorer.doc() <= candidate && scorer.doc() != TERMINATED {
654                        scorer.seek(candidate + 1);
655                    }
656                }
657            }
658        }
659    }
660}
661
662impl super::docset::DocSet for BooleanScorer<'_> {
663    fn doc(&self) -> DocId {
664        self.current_doc
665    }
666
667    fn advance(&mut self) -> DocId {
668        if !self.must.is_empty() {
669            for scorer in &mut self.must {
670                scorer.advance();
671            }
672        } else {
673            for scorer in &mut self.should {
674                if scorer.doc() == self.current_doc {
675                    scorer.advance();
676                }
677            }
678        }
679
680        self.current_doc = self.find_next_match();
681        self.current_doc
682    }
683
684    fn seek(&mut self, target: DocId) -> DocId {
685        for scorer in &mut self.must {
686            scorer.seek(target);
687        }
688
689        for scorer in &mut self.should {
690            scorer.seek(target);
691        }
692
693        self.current_doc = self.find_next_match();
694        self.current_doc
695    }
696
697    fn size_hint(&self) -> u32 {
698        if !self.must.is_empty() {
699            self.must.iter().map(|s| s.size_hint()).min().unwrap_or(0)
700        } else {
701            self.should.iter().map(|s| s.size_hint()).sum()
702        }
703    }
704}
705
706impl Scorer for BooleanScorer<'_> {
707    fn score(&self) -> Score {
708        let mut total = 0.0;
709
710        for scorer in &self.must {
711            if scorer.doc() == self.current_doc {
712                total += scorer.score();
713            }
714        }
715
716        for scorer in &self.should {
717            if scorer.doc() == self.current_doc {
718                total += scorer.score();
719            }
720        }
721
722        total
723    }
724
725    fn matched_positions(&self) -> Option<super::MatchedPositions> {
726        let mut all_positions: super::MatchedPositions = Vec::new();
727
728        for scorer in &self.must {
729            if scorer.doc() == self.current_doc
730                && let Some(positions) = scorer.matched_positions()
731            {
732                all_positions.extend(positions);
733            }
734        }
735
736        for scorer in &self.should {
737            if scorer.doc() == self.current_doc
738                && let Some(positions) = scorer.matched_positions()
739            {
740                all_positions.extend(positions);
741            }
742        }
743
744        if all_positions.is_empty() {
745            None
746        } else {
747            Some(all_positions)
748        }
749    }
750}
751
752#[cfg(test)]
753mod tests {
754    use super::*;
755    use crate::dsl::Field;
756    use crate::query::{QueryDecomposition, TermQuery};
757
758    #[test]
759    fn test_maxscore_eligible_pure_or_same_field() {
760        // Pure OR query with multiple terms in same field should be MaxScore-eligible
761        let query = BooleanQuery::new()
762            .should(TermQuery::text(Field(0), "hello"))
763            .should(TermQuery::text(Field(0), "world"))
764            .should(TermQuery::text(Field(0), "foo"));
765
766        // All clauses should return term info
767        assert!(
768            query
769                .should
770                .iter()
771                .all(|q| matches!(q.decompose(), QueryDecomposition::TextTerm(_)))
772        );
773
774        // All should be same field
775        let infos: Vec<_> = query
776            .should
777            .iter()
778            .filter_map(|q| match q.decompose() {
779                QueryDecomposition::TextTerm(info) => Some(info),
780                _ => None,
781            })
782            .collect();
783        assert_eq!(infos.len(), 3);
784        assert!(infos.iter().all(|i| i.field == Field(0)));
785    }
786
787    #[test]
788    fn test_maxscore_not_eligible_different_fields() {
789        // OR query with terms in different fields should NOT use MaxScore
790        let query = BooleanQuery::new()
791            .should(TermQuery::text(Field(0), "hello"))
792            .should(TermQuery::text(Field(1), "world")); // Different field!
793
794        let infos: Vec<_> = query
795            .should
796            .iter()
797            .filter_map(|q| match q.decompose() {
798                QueryDecomposition::TextTerm(info) => Some(info),
799                _ => None,
800            })
801            .collect();
802        assert_eq!(infos.len(), 2);
803        // Fields are different, MaxScore should not be used
804        assert!(infos[0].field != infos[1].field);
805    }
806
807    #[test]
808    fn test_maxscore_not_eligible_with_must() {
809        // Query with MUST clause should NOT use MaxScore optimization
810        let query = BooleanQuery::new()
811            .must(TermQuery::text(Field(0), "required"))
812            .should(TermQuery::text(Field(0), "hello"))
813            .should(TermQuery::text(Field(0), "world"));
814
815        // Has MUST clause, so MaxScore optimization should not kick in
816        assert!(!query.must.is_empty());
817    }
818
819    #[test]
820    fn test_maxscore_not_eligible_with_must_not() {
821        // Query with MUST_NOT clause should NOT use MaxScore optimization
822        let query = BooleanQuery::new()
823            .should(TermQuery::text(Field(0), "hello"))
824            .should(TermQuery::text(Field(0), "world"))
825            .must_not(TermQuery::text(Field(0), "excluded"));
826
827        // Has MUST_NOT clause, so MaxScore optimization should not kick in
828        assert!(!query.must_not.is_empty());
829    }
830
831    #[test]
832    fn test_maxscore_not_eligible_single_term() {
833        // Single SHOULD clause should NOT use MaxScore (no benefit)
834        let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
835
836        // Only one term, MaxScore not beneficial
837        assert_eq!(query.should.len(), 1);
838    }
839
840    #[test]
841    fn test_term_query_info_extraction() {
842        let term_query = TermQuery::text(Field(42), "test");
843        match term_query.decompose() {
844            QueryDecomposition::TextTerm(info) => {
845                assert_eq!(info.field, Field(42));
846                assert_eq!(info.term, b"test");
847            }
848            _ => panic!("Expected TextTerm decomposition"),
849        }
850    }
851
852    #[test]
853    fn test_boolean_query_no_term_info() {
854        // BooleanQuery itself should not return term info
855        let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
856
857        assert!(matches!(query.decompose(), QueryDecomposition::Opaque));
858    }
859}
hermes_core/query/boolean.rs

hermes_core/query/
boolean.rs