hermes_core/query/
boolean.rs

1//! Boolean query with MUST, SHOULD, and MUST_NOT clauses
2
3use std::sync::Arc;
4
5use crate::segment::SegmentReader;
6use crate::structures::TERMINATED;
7use crate::{DocId, Score};
8
9use super::planner::{
10    build_combined_bitset, build_sparse_bmp_results, build_sparse_bmp_results_filtered,
11    build_sparse_maxscore_executor, chain_predicates, combine_sparse_results, compute_idf,
12    extract_all_sparse_infos, finish_text_maxscore, prepare_per_field_grouping,
13    prepare_text_maxscore,
14};
15use super::{CountFuture, EmptyScorer, GlobalStats, Query, Scorer, ScorerFuture};
16
17/// Boolean query with MUST, SHOULD, and MUST_NOT clauses
18///
19/// When all clauses are SHOULD term queries on the same field, automatically
20/// uses MaxScore optimization for efficient top-k retrieval.
21#[derive(Default, Clone)]
22pub struct BooleanQuery {
23    pub must: Vec<Arc<dyn Query>>,
24    pub should: Vec<Arc<dyn Query>>,
25    pub must_not: Vec<Arc<dyn Query>>,
26    /// Optional global statistics for cross-segment IDF
27    global_stats: Option<Arc<GlobalStats>>,
28}
29
30impl std::fmt::Debug for BooleanQuery {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        f.debug_struct("BooleanQuery")
33            .field("must_count", &self.must.len())
34            .field("should_count", &self.should.len())
35            .field("must_not_count", &self.must_not.len())
36            .field("has_global_stats", &self.global_stats.is_some())
37            .finish()
38    }
39}
40
41impl std::fmt::Display for BooleanQuery {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        write!(f, "Boolean(")?;
44        let mut first = true;
45        for q in &self.must {
46            if !first {
47                write!(f, " ")?;
48            }
49            write!(f, "+{}", q)?;
50            first = false;
51        }
52        for q in &self.should {
53            if !first {
54                write!(f, " ")?;
55            }
56            write!(f, "{}", q)?;
57            first = false;
58        }
59        for q in &self.must_not {
60            if !first {
61                write!(f, " ")?;
62            }
63            write!(f, "-{}", q)?;
64            first = false;
65        }
66        write!(f, ")")
67    }
68}
69
70impl BooleanQuery {
71    pub fn new() -> Self {
72        Self::default()
73    }
74
75    pub fn must(mut self, query: impl Query + 'static) -> Self {
76        self.must.push(Arc::new(query));
77        self
78    }
79
80    pub fn should(mut self, query: impl Query + 'static) -> Self {
81        self.should.push(Arc::new(query));
82        self
83    }
84
85    pub fn must_not(mut self, query: impl Query + 'static) -> Self {
86        self.must_not.push(Arc::new(query));
87        self
88    }
89
90    /// Set global statistics for cross-segment IDF
91    pub fn with_global_stats(mut self, stats: Arc<GlobalStats>) -> Self {
92        self.global_stats = Some(stats);
93        self
94    }
95}
96
97/// Build a SHOULD-only scorer from a vec of optimized scorers.
98fn build_should_scorer<'a>(scorers: Vec<Box<dyn Scorer + 'a>>) -> Box<dyn Scorer + 'a> {
99    if scorers.is_empty() {
100        return Box::new(EmptyScorer);
101    }
102    if scorers.len() == 1 {
103        return scorers.into_iter().next().unwrap();
104    }
105    let mut scorer = BooleanScorer {
106        must: vec![],
107        should: scorers,
108        must_not: vec![],
109        current_doc: 0,
110    };
111    scorer.current_doc = scorer.find_next_match();
112    Box::new(scorer)
113}
114
115// ── Planner macro ────────────────────────────────────────────────────────
116//
117// Unified planner for both async and sync paths.  Parameterised on:
118//   $scorer_fn      – scorer | scorer_sync
119//   $get_postings_fn – get_postings | get_postings_sync
120//   $execute_fn     – execute | execute_sync
121//   $($aw)*         – .await  (present for async, absent for sync)
122//
123// Decision order:
124//   1. Single-clause unwrap
125//   2. Pure OR → text MaxScore | sparse MaxScore | per-field MaxScore
126//   3. Filter push-down → predicate-aware sparse MaxScore | PredicatedScorer
127//   4. Standard BooleanScorer fallback
128macro_rules! boolean_plan {
129    ($must:expr, $should:expr, $must_not:expr, $global_stats:expr,
130     $reader:expr, $limit:expr,
131     $scorer_fn:ident, $get_postings_fn:ident, $execute_fn:ident
132     $(, $aw:tt)*) => {{
133        let must: &[Arc<dyn Query>] = &$must;
134        let should_all: &[Arc<dyn Query>] = &$should;
135        let must_not: &[Arc<dyn Query>] = &$must_not;
136        let global_stats: Option<&Arc<GlobalStats>> = $global_stats;
137        let reader: &SegmentReader = $reader;
138        let limit: usize = $limit;
139
140        // Cap SHOULD clauses to MAX_QUERY_TERMS, but only count queries that need
141        // posting-list cursors. Fast-field predicates (O(1) per doc) are exempt.
142        let should_capped: Vec<Arc<dyn Query>>;
143        let should: &[Arc<dyn Query>] = if should_all.len() > super::MAX_QUERY_TERMS {
144            let is_predicate: Vec<bool> = should_all
145                .iter()
146                .map(|q| q.is_filter() || q.as_doc_predicate(reader).is_some())
147                .collect();
148            let cursor_count = is_predicate.iter().filter(|&&p| !p).count();
149
150            if cursor_count > super::MAX_QUERY_TERMS {
151                let mut kept = Vec::with_capacity(should_all.len());
152                let mut cursor_kept = 0usize;
153                for (q, &is_pred) in should_all.iter().zip(is_predicate.iter()) {
154                    if is_pred {
155                        kept.push(q.clone());
156                    } else if cursor_kept < super::MAX_QUERY_TERMS {
157                        kept.push(q.clone());
158                        cursor_kept += 1;
159                    }
160                }
161                log::debug!(
162                    "BooleanQuery: capping cursor SHOULD from {} to {} ({} fast-field predicates exempt)",
163                    cursor_count,
164                    super::MAX_QUERY_TERMS,
165                    kept.len() - cursor_kept,
166                );
167                should_capped = kept;
168                &should_capped
169            } else {
170                log::debug!(
171                    "BooleanQuery: {} SHOULD clauses OK ({} need cursors, {} fast-field predicates)",
172                    should_all.len(),
173                    cursor_count,
174                    should_all.len() - cursor_count,
175                );
176                should_all
177            }
178        } else {
179            should_all
180        };
181
182        // ── 1. Single-clause optimisation ────────────────────────────────
183        if must_not.is_empty() {
184            if must.len() == 1 && should.is_empty() {
185                return must[0].$scorer_fn(reader, limit) $(.  $aw)* ;
186            }
187            if should.len() == 1 && must.is_empty() {
188                return should[0].$scorer_fn(reader, limit) $(. $aw)* ;
189            }
190        }
191
192        // ── 2. Pure OR → MaxScore optimisations ──────────────────────────
193        if must.is_empty() && must_not.is_empty() && should.len() >= 2 {
194            // 2a. Text MaxScore (single-field, all term queries)
195            if let Some((mut infos, _field, avg_field_len, num_docs)) =
196                prepare_text_maxscore(should, reader, global_stats)
197            {
198                let mut posting_lists = Vec::with_capacity(infos.len());
199                for info in infos.drain(..) {
200                    if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
201                        $(. $aw)* ?
202                    {
203                        let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
204                        posting_lists.push((pl, idf));
205                    }
206                }
207                return finish_text_maxscore(posting_lists, avg_field_len, limit, reader);
208            }
209
210            // 2b. Sparse (single-field, all sparse term queries)
211            // Auto-detect: BMP executor if field has BMP index, else MaxScore
212            if let Some(infos) = extract_all_sparse_infos(should) {
213                if let Some((raw, info)) =
214                    build_sparse_bmp_results(&infos, reader, limit)
215                {
216                    return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
217                }
218                if let Some((executor, info)) =
219                    build_sparse_maxscore_executor(&infos, reader, limit, None)
220                {
221                    let raw = executor.$execute_fn() $(. $aw)* ?;
222                    return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
223                }
224            }
225
226            // 2c. Per-field text MaxScore (multi-field term grouping)
227            if let Some(grouping) = prepare_per_field_grouping(should, reader, limit, global_stats)
228            {
229                let mut scorers: Vec<Box<dyn Scorer + '_>> = Vec::new();
230                for (field, avg_field_len, infos) in &grouping.multi_term_groups {
231                    let mut posting_lists = Vec::with_capacity(infos.len());
232                    for info in infos {
233                        if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
234                            $(. $aw)* ?
235                        {
236                            let idf = compute_idf(
237                                &pl, *field, &info.term, grouping.num_docs, global_stats,
238                            );
239                            posting_lists.push((pl, idf));
240                        }
241                    }
242                    if !posting_lists.is_empty() {
243                        scorers.push(finish_text_maxscore(
244                            posting_lists,
245                            *avg_field_len,
246                            grouping.per_field_limit,
247                            reader,
248                        )?);
249                    }
250                }
251                for &idx in &grouping.fallback_indices {
252                    scorers.push(should[idx].$scorer_fn(reader, limit) $(. $aw)* ?);
253                }
254                return Ok(build_should_scorer(scorers));
255            }
256        }
257
258        // ── 3. Filter push-down (MUST + SHOULD) ─────────────────────────
259        if !should.is_empty() && !must.is_empty() && limit < usize::MAX / 4 {
260            // 3a. Compile MUST → predicates (O(1)) vs verifier scorers (seek)
261            let mut predicates: Vec<super::DocPredicate<'_>> = Vec::new();
262            let mut must_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
263            for q in must {
264                if let Some(pred) = q.as_doc_predicate(reader) {
265                    log::debug!("BooleanQuery planner 3a: MUST clause → predicate ({})", q);
266                    predicates.push(pred);
267                } else {
268                    log::debug!("BooleanQuery planner 3a: MUST clause → verifier scorer ({})", q);
269                    must_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
270                }
271            }
272            // Compile MUST_NOT → negated predicates vs verifier scorers
273            let mut must_not_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
274            for q in must_not {
275                if let Some(pred) = q.as_doc_predicate(reader) {
276                    let negated: super::DocPredicate<'_> =
277                        Box::new(move |doc_id| !pred(doc_id));
278                    predicates.push(negated);
279                } else {
280                    must_not_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
281                }
282            }
283
284            // 3b. Fast path: pure predicates + sparse SHOULD → BMP or MaxScore w/ predicate
285            if must_verifiers.is_empty()
286                && must_not_verifiers.is_empty()
287                && !predicates.is_empty()
288            {
289                if let Some(infos) = extract_all_sparse_infos(should) {
290                    // Try BMP with bitset first: build compact bitset from MUST/MUST_NOT
291                    // posting lists (O(M) for term queries) for fast per-slot lookup.
292                    let bitset_result = build_combined_bitset(must, must_not, reader);
293                    if let Some(ref bitset) = bitset_result {
294                        let bitset_pred = |doc_id: crate::DocId| bitset.contains(doc_id);
295                        if let Some((raw, info)) =
296                            build_sparse_bmp_results_filtered(&infos, reader, limit, &bitset_pred)
297                        {
298                            log::debug!(
299                                "BooleanQuery planner: bitset-aware sparse BMP, {} dims, {} matching docs",
300                                infos.len(),
301                                bitset.count()
302                            );
303                            return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
304                        }
305                    }
306
307                    // Fallback: closure predicate (for queries that don't support bitsets)
308                    let combined = chain_predicates(predicates);
309                    if let Some((raw, info)) =
310                        build_sparse_bmp_results_filtered(&infos, reader, limit, &*combined)
311                    {
312                        log::debug!(
313                            "BooleanQuery planner: predicate-aware sparse BMP, {} dims",
314                            infos.len()
315                        );
316                        return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
317                    }
318                    // Try MaxScore with predicate
319                    if let Some((executor, info)) =
320                        build_sparse_maxscore_executor(&infos, reader, limit, Some(combined))
321                    {
322                        log::debug!(
323                            "BooleanQuery planner: predicate-aware sparse MaxScore, {} dims",
324                            infos.len()
325                        );
326                        let raw = executor.$execute_fn() $(. $aw)* ?;
327                        return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
328                    }
329                    // predicates consumed — cannot fall through; rebuild them
330                    // (this path only triggers if neither sparse index exists)
331                    predicates = Vec::new();
332                    for q in must {
333                        if let Some(pred) = q.as_doc_predicate(reader) {
334                            predicates.push(pred);
335                        }
336                    }
337                    for q in must_not {
338                        if let Some(pred) = q.as_doc_predicate(reader) {
339                            let negated: super::DocPredicate<'_> =
340                                Box::new(move |doc_id| !pred(doc_id));
341                            predicates.push(negated);
342                        }
343                    }
344                }
345            }
346
347            // 3c. PredicatedScorer fallback (over-fetch 4x when any filter is present)
348            let has_filters = !predicates.is_empty()
349                || !must_verifiers.is_empty()
350                || !must_not_verifiers.is_empty();
351            let should_limit = if has_filters { limit * 4 } else { limit };
352            let should_scorer = if should.len() == 1 {
353                should[0].$scorer_fn(reader, should_limit) $(. $aw)* ?
354            } else {
355                let sub = BooleanQuery {
356                    must: Vec::new(),
357                    should: should.to_vec(),
358                    must_not: Vec::new(),
359                    global_stats: global_stats.cloned(),
360                };
361                sub.$scorer_fn(reader, should_limit) $(. $aw)* ?
362            };
363
364            let use_predicated =
365                must_verifiers.is_empty() || should_scorer.size_hint() >= limit as u32;
366
367            if use_predicated {
368                log::debug!(
369                    "BooleanQuery planner: PredicatedScorer {} preds + {} must_v + {} must_not_v, \
370                     SHOULD size_hint={}, over_fetch={}",
371                    predicates.len(), must_verifiers.len(), must_not_verifiers.len(),
372                    should_scorer.size_hint(), should_limit
373                );
374                return Ok(Box::new(super::PredicatedScorer::new(
375                    should_scorer, predicates, must_verifiers, must_not_verifiers,
376                )));
377            }
378
379            // size_hint < limit with verifiers → BooleanScorer
380            log::debug!(
381                "BooleanQuery planner: BooleanScorer fallback, size_hint={} < limit={}, \
382                 {} must_v + {} must_not_v",
383                should_scorer.size_hint(), limit,
384                must_verifiers.len(), must_not_verifiers.len()
385            );
386            let mut scorer = BooleanScorer {
387                must: must_verifiers,
388                should: vec![should_scorer],
389                must_not: must_not_verifiers,
390                current_doc: 0,
391            };
392            scorer.current_doc = scorer.find_next_match();
393            return Ok(Box::new(scorer));
394        }
395
396        // ── 4. Standard BooleanScorer fallback ───────────────────────────
397        let mut must_scorers = Vec::with_capacity(must.len());
398        for q in must {
399            must_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
400        }
401        let mut should_scorers = Vec::with_capacity(should.len());
402        for q in should {
403            should_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
404        }
405        let mut must_not_scorers = Vec::with_capacity(must_not.len());
406        for q in must_not {
407            must_not_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
408        }
409        let mut scorer = BooleanScorer {
410            must: must_scorers,
411            should: should_scorers,
412            must_not: must_not_scorers,
413            current_doc: 0,
414        };
415        scorer.current_doc = scorer.find_next_match();
416        Ok(Box::new(scorer) as Box<dyn Scorer + '_>)
417    }};
418}
419
420impl Query for BooleanQuery {
421    fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
422        let must = self.must.clone();
423        let should = self.should.clone();
424        let must_not = self.must_not.clone();
425        let global_stats = self.global_stats.clone();
426        Box::pin(async move {
427            boolean_plan!(
428                must,
429                should,
430                must_not,
431                global_stats.as_ref(),
432                reader,
433                limit,
434                scorer,
435                get_postings,
436                execute,
437                await
438            )
439        })
440    }
441
442    #[cfg(feature = "sync")]
443    fn scorer_sync<'a>(
444        &self,
445        reader: &'a SegmentReader,
446        limit: usize,
447    ) -> crate::Result<Box<dyn Scorer + 'a>> {
448        boolean_plan!(
449            self.must,
450            self.should,
451            self.must_not,
452            self.global_stats.as_ref(),
453            reader,
454            limit,
455            scorer_sync,
456            get_postings_sync,
457            execute_sync
458        )
459    }
460
461    fn as_doc_bitset(&self, reader: &SegmentReader) -> Option<super::DocBitset> {
462        if self.must.is_empty() && self.should.is_empty() {
463            return None;
464        }
465
466        let num_docs = reader.num_docs();
467
468        // MUST clauses: intersect bitsets (AND)
469        let mut result: Option<super::DocBitset> = None;
470        for q in &self.must {
471            let bs = q.as_doc_bitset(reader)?;
472            match result {
473                None => result = Some(bs),
474                Some(ref mut acc) => acc.intersect_with(&bs),
475            }
476        }
477
478        // SHOULD clauses: union bitsets (OR), then intersect with MUST result
479        if !self.should.is_empty() {
480            let mut should_union = super::DocBitset::new(num_docs);
481            for q in &self.should {
482                let bs = q.as_doc_bitset(reader)?;
483                should_union.union_with(&bs);
484            }
485            match result {
486                None => result = Some(should_union),
487                Some(ref mut acc) => {
488                    // When MUST clauses exist, SHOULD is optional (doesn't filter).
489                    // When no MUST clauses, at least one SHOULD must match.
490                    if self.must.is_empty() {
491                        *acc = should_union;
492                    }
493                }
494            }
495        }
496
497        // MUST_NOT clauses: subtract bitsets (ANDNOT)
498        if let Some(ref mut acc) = result {
499            for q in &self.must_not {
500                if let Some(bs) = q.as_doc_bitset(reader) {
501                    acc.subtract(&bs);
502                } else {
503                    // Can't build bitset for this MUST_NOT clause — bail
504                    return None;
505                }
506            }
507        }
508
509        result
510    }
511
512    fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
513        // Need at least some clauses
514        if self.must.is_empty() && self.should.is_empty() {
515            return None;
516        }
517
518        // Try converting all clauses to predicates; bail if any child can't
519        let must_preds: Vec<_> = self
520            .must
521            .iter()
522            .map(|q| q.as_doc_predicate(reader))
523            .collect::<Option<Vec<_>>>()?;
524        let should_preds: Vec<_> = self
525            .should
526            .iter()
527            .map(|q| q.as_doc_predicate(reader))
528            .collect::<Option<Vec<_>>>()?;
529        let must_not_preds: Vec<_> = self
530            .must_not
531            .iter()
532            .map(|q| q.as_doc_predicate(reader))
533            .collect::<Option<Vec<_>>>()?;
534
535        let has_must = !must_preds.is_empty();
536
537        Some(Box::new(move |doc_id| {
538            // All MUST predicates must pass
539            if !must_preds.iter().all(|p| p(doc_id)) {
540                return false;
541            }
542            // When there are no MUST clauses, at least one SHOULD must pass
543            if !has_must && !should_preds.is_empty() && !should_preds.iter().any(|p| p(doc_id)) {
544                return false;
545            }
546            // No MUST_NOT predicate should pass
547            must_not_preds.iter().all(|p| !p(doc_id))
548        }))
549    }
550
551    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
552        let must = self.must.clone();
553        let should = self.should.clone();
554
555        Box::pin(async move {
556            if !must.is_empty() {
557                let mut estimates = Vec::with_capacity(must.len());
558                for q in &must {
559                    estimates.push(q.count_estimate(reader).await?);
560                }
561                estimates
562                    .into_iter()
563                    .min()
564                    .ok_or_else(|| crate::Error::Corruption("Empty must clause".to_string()))
565            } else if !should.is_empty() {
566                let mut sum = 0u32;
567                for q in &should {
568                    sum = sum.saturating_add(q.count_estimate(reader).await?);
569                }
570                Ok(sum)
571            } else {
572                Ok(0)
573            }
574        })
575    }
576}
577
578struct BooleanScorer<'a> {
579    must: Vec<Box<dyn Scorer + 'a>>,
580    should: Vec<Box<dyn Scorer + 'a>>,
581    must_not: Vec<Box<dyn Scorer + 'a>>,
582    current_doc: DocId,
583}
584
585impl BooleanScorer<'_> {
586    fn find_next_match(&mut self) -> DocId {
587        if self.must.is_empty() && self.should.is_empty() {
588            return TERMINATED;
589        }
590
591        loop {
592            let candidate = if !self.must.is_empty() {
593                let mut max_doc = self
594                    .must
595                    .iter()
596                    .map(|s| s.doc())
597                    .max()
598                    .unwrap_or(TERMINATED);
599
600                if max_doc == TERMINATED {
601                    return TERMINATED;
602                }
603
604                loop {
605                    let mut all_match = true;
606                    for scorer in &mut self.must {
607                        let doc = scorer.seek(max_doc);
608                        if doc == TERMINATED {
609                            return TERMINATED;
610                        }
611                        if doc > max_doc {
612                            max_doc = doc;
613                            all_match = false;
614                            break;
615                        }
616                    }
617                    if all_match {
618                        break;
619                    }
620                }
621                max_doc
622            } else {
623                self.should
624                    .iter()
625                    .map(|s| s.doc())
626                    .filter(|&d| d != TERMINATED)
627                    .min()
628                    .unwrap_or(TERMINATED)
629            };
630
631            if candidate == TERMINATED {
632                return TERMINATED;
633            }
634
635            let excluded = self.must_not.iter_mut().any(|scorer| {
636                let doc = scorer.seek(candidate);
637                doc == candidate
638            });
639
640            if !excluded {
641                // Seek SHOULD scorers to candidate so score() can see their contributions
642                for scorer in &mut self.should {
643                    scorer.seek(candidate);
644                }
645                self.current_doc = candidate;
646                return candidate;
647            }
648
649            // Advance past excluded candidate
650            if !self.must.is_empty() {
651                for scorer in &mut self.must {
652                    scorer.advance();
653                }
654            } else {
655                // For SHOULD-only: seek all scorers past the excluded candidate
656                for scorer in &mut self.should {
657                    if scorer.doc() <= candidate && scorer.doc() != TERMINATED {
658                        scorer.seek(candidate + 1);
659                    }
660                }
661            }
662        }
663    }
664}
665
666impl super::docset::DocSet for BooleanScorer<'_> {
667    fn doc(&self) -> DocId {
668        self.current_doc
669    }
670
671    fn advance(&mut self) -> DocId {
672        if !self.must.is_empty() {
673            for scorer in &mut self.must {
674                scorer.advance();
675            }
676        } else {
677            for scorer in &mut self.should {
678                if scorer.doc() == self.current_doc {
679                    scorer.advance();
680                }
681            }
682        }
683
684        self.current_doc = self.find_next_match();
685        self.current_doc
686    }
687
688    fn seek(&mut self, target: DocId) -> DocId {
689        for scorer in &mut self.must {
690            scorer.seek(target);
691        }
692
693        for scorer in &mut self.should {
694            scorer.seek(target);
695        }
696
697        self.current_doc = self.find_next_match();
698        self.current_doc
699    }
700
701    fn size_hint(&self) -> u32 {
702        if !self.must.is_empty() {
703            self.must.iter().map(|s| s.size_hint()).min().unwrap_or(0)
704        } else {
705            self.should.iter().map(|s| s.size_hint()).sum()
706        }
707    }
708}
709
710impl Scorer for BooleanScorer<'_> {
711    fn score(&self) -> Score {
712        let mut total = 0.0;
713
714        for scorer in &self.must {
715            if scorer.doc() == self.current_doc {
716                total += scorer.score();
717            }
718        }
719
720        for scorer in &self.should {
721            if scorer.doc() == self.current_doc {
722                total += scorer.score();
723            }
724        }
725
726        total
727    }
728
729    fn matched_positions(&self) -> Option<super::MatchedPositions> {
730        let mut all_positions: super::MatchedPositions = Vec::new();
731
732        for scorer in &self.must {
733            if scorer.doc() == self.current_doc
734                && let Some(positions) = scorer.matched_positions()
735            {
736                all_positions.extend(positions);
737            }
738        }
739
740        for scorer in &self.should {
741            if scorer.doc() == self.current_doc
742                && let Some(positions) = scorer.matched_positions()
743            {
744                all_positions.extend(positions);
745            }
746        }
747
748        if all_positions.is_empty() {
749            None
750        } else {
751            Some(all_positions)
752        }
753    }
754}
755
756#[cfg(test)]
757mod tests {
758    use super::*;
759    use crate::dsl::Field;
760    use crate::query::{QueryDecomposition, TermQuery};
761
762    #[test]
763    fn test_maxscore_eligible_pure_or_same_field() {
764        // Pure OR query with multiple terms in same field should be MaxScore-eligible
765        let query = BooleanQuery::new()
766            .should(TermQuery::text(Field(0), "hello"))
767            .should(TermQuery::text(Field(0), "world"))
768            .should(TermQuery::text(Field(0), "foo"));
769
770        // All clauses should return term info
771        assert!(
772            query
773                .should
774                .iter()
775                .all(|q| matches!(q.decompose(), QueryDecomposition::TextTerm(_)))
776        );
777
778        // All should be same field
779        let infos: Vec<_> = query
780            .should
781            .iter()
782            .filter_map(|q| match q.decompose() {
783                QueryDecomposition::TextTerm(info) => Some(info),
784                _ => None,
785            })
786            .collect();
787        assert_eq!(infos.len(), 3);
788        assert!(infos.iter().all(|i| i.field == Field(0)));
789    }
790
791    #[test]
792    fn test_maxscore_not_eligible_different_fields() {
793        // OR query with terms in different fields should NOT use MaxScore
794        let query = BooleanQuery::new()
795            .should(TermQuery::text(Field(0), "hello"))
796            .should(TermQuery::text(Field(1), "world")); // Different field!
797
798        let infos: Vec<_> = query
799            .should
800            .iter()
801            .filter_map(|q| match q.decompose() {
802                QueryDecomposition::TextTerm(info) => Some(info),
803                _ => None,
804            })
805            .collect();
806        assert_eq!(infos.len(), 2);
807        // Fields are different, MaxScore should not be used
808        assert!(infos[0].field != infos[1].field);
809    }
810
811    #[test]
812    fn test_maxscore_not_eligible_with_must() {
813        // Query with MUST clause should NOT use MaxScore optimization
814        let query = BooleanQuery::new()
815            .must(TermQuery::text(Field(0), "required"))
816            .should(TermQuery::text(Field(0), "hello"))
817            .should(TermQuery::text(Field(0), "world"));
818
819        // Has MUST clause, so MaxScore optimization should not kick in
820        assert!(!query.must.is_empty());
821    }
822
823    #[test]
824    fn test_maxscore_not_eligible_with_must_not() {
825        // Query with MUST_NOT clause should NOT use MaxScore optimization
826        let query = BooleanQuery::new()
827            .should(TermQuery::text(Field(0), "hello"))
828            .should(TermQuery::text(Field(0), "world"))
829            .must_not(TermQuery::text(Field(0), "excluded"));
830
831        // Has MUST_NOT clause, so MaxScore optimization should not kick in
832        assert!(!query.must_not.is_empty());
833    }
834
835    #[test]
836    fn test_maxscore_not_eligible_single_term() {
837        // Single SHOULD clause should NOT use MaxScore (no benefit)
838        let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
839
840        // Only one term, MaxScore not beneficial
841        assert_eq!(query.should.len(), 1);
842    }
843
844    #[test]
845    fn test_term_query_info_extraction() {
846        let term_query = TermQuery::text(Field(42), "test");
847        match term_query.decompose() {
848            QueryDecomposition::TextTerm(info) => {
849                assert_eq!(info.field, Field(42));
850                assert_eq!(info.term, b"test");
851            }
852            _ => panic!("Expected TextTerm decomposition"),
853        }
854    }
855
856    #[test]
857    fn test_boolean_query_no_term_info() {
858        // BooleanQuery itself should not return term info
859        let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
860
861        assert!(matches!(query.decompose(), QueryDecomposition::Opaque));
862    }
863}
hermes_core/query/boolean.rs

hermes_core/query/
boolean.rs