hermes_core/query/
boolean.rs

1//! Boolean query with MUST, SHOULD, and MUST_NOT clauses
2
3use std::sync::Arc;
4
5use crate::segment::SegmentReader;
6use crate::structures::TERMINATED;
7use crate::{DocId, Score};
8
9use super::planner::{
10    build_combined_bitset, build_sparse_bmp_results, build_sparse_bmp_results_filtered,
11    build_sparse_maxscore_executor, chain_predicates, combine_sparse_results, compute_idf,
12    extract_all_sparse_infos, finish_text_maxscore, prepare_per_field_grouping,
13    prepare_text_maxscore,
14};
15use super::{CountFuture, EmptyScorer, GlobalStats, Query, Scorer, ScorerFuture};
16
17/// Boolean query with MUST, SHOULD, and MUST_NOT clauses
18///
19/// When all clauses are SHOULD term queries on the same field, automatically
20/// uses MaxScore optimization for efficient top-k retrieval.
21#[derive(Default, Clone)]
22pub struct BooleanQuery {
23    pub must: Vec<Arc<dyn Query>>,
24    pub should: Vec<Arc<dyn Query>>,
25    pub must_not: Vec<Arc<dyn Query>>,
26    /// Optional global statistics for cross-segment IDF
27    global_stats: Option<Arc<GlobalStats>>,
28}
29
30impl std::fmt::Debug for BooleanQuery {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        f.debug_struct("BooleanQuery")
33            .field("must_count", &self.must.len())
34            .field("should_count", &self.should.len())
35            .field("must_not_count", &self.must_not.len())
36            .field("has_global_stats", &self.global_stats.is_some())
37            .finish()
38    }
39}
40
41impl std::fmt::Display for BooleanQuery {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        write!(f, "Boolean(")?;
44        let mut first = true;
45        for q in &self.must {
46            if !first {
47                write!(f, " ")?;
48            }
49            write!(f, "+{}", q)?;
50            first = false;
51        }
52        for q in &self.should {
53            if !first {
54                write!(f, " ")?;
55            }
56            write!(f, "{}", q)?;
57            first = false;
58        }
59        for q in &self.must_not {
60            if !first {
61                write!(f, " ")?;
62            }
63            write!(f, "-{}", q)?;
64            first = false;
65        }
66        write!(f, ")")
67    }
68}
69
70impl BooleanQuery {
71    pub fn new() -> Self {
72        Self::default()
73    }
74
75    pub fn must(mut self, query: impl Query + 'static) -> Self {
76        self.must.push(Arc::new(query));
77        self
78    }
79
80    pub fn should(mut self, query: impl Query + 'static) -> Self {
81        self.should.push(Arc::new(query));
82        self
83    }
84
85    pub fn must_not(mut self, query: impl Query + 'static) -> Self {
86        self.must_not.push(Arc::new(query));
87        self
88    }
89
90    /// Set global statistics for cross-segment IDF
91    pub fn with_global_stats(mut self, stats: Arc<GlobalStats>) -> Self {
92        self.global_stats = Some(stats);
93        self
94    }
95}
96
97/// Build a SHOULD-only scorer from a vec of optimized scorers.
98fn build_should_scorer<'a>(scorers: Vec<Box<dyn Scorer + 'a>>) -> Box<dyn Scorer + 'a> {
99    if scorers.is_empty() {
100        return Box::new(EmptyScorer);
101    }
102    if scorers.len() == 1 {
103        return scorers.into_iter().next().unwrap();
104    }
105    let mut scorer = BooleanScorer {
106        must: vec![],
107        should: scorers,
108        must_not: vec![],
109        current_doc: 0,
110    };
111    scorer.current_doc = scorer.find_next_match();
112    Box::new(scorer)
113}
114
115// ── Planner macro ────────────────────────────────────────────────────────
116//
117// Unified planner for both async and sync paths.  Parameterised on:
118//   $scorer_fn      – scorer | scorer_sync
119//   $get_postings_fn – get_postings | get_postings_sync
120//   $execute_fn     – execute | execute_sync
121//   $($aw)*         – .await  (present for async, absent for sync)
122//
123// Decision order:
124//   1. Single-clause unwrap
125//   2. Pure OR → text MaxScore | sparse MaxScore | per-field MaxScore
126//   3. Filter push-down → predicate-aware sparse MaxScore | PredicatedScorer
127//   4. Standard BooleanScorer fallback
128macro_rules! boolean_plan {
129    ($must:expr, $should:expr, $must_not:expr, $global_stats:expr,
130     $reader:expr, $limit:expr,
131     $scorer_fn:ident, $get_postings_fn:ident, $execute_fn:ident
132     $(, $aw:tt)*) => {{
133        let must: &[Arc<dyn Query>] = &$must;
134        let should_all: &[Arc<dyn Query>] = &$should;
135        let must_not: &[Arc<dyn Query>] = &$must_not;
136        let global_stats: Option<&Arc<GlobalStats>> = $global_stats;
137        let reader: &SegmentReader = $reader;
138        let limit: usize = $limit;
139
140        // Cap SHOULD clauses to MAX_QUERY_TERMS, but only count queries that need
141        // posting-list cursors. Fast-field predicates (O(1) per doc) are exempt.
142        let should_capped: Vec<Arc<dyn Query>>;
143        let should: &[Arc<dyn Query>] = if should_all.len() > super::MAX_QUERY_TERMS {
144            let is_predicate: Vec<bool> = should_all
145                .iter()
146                .map(|q| q.is_filter() || q.as_doc_predicate(reader).is_some())
147                .collect();
148            let cursor_count = is_predicate.iter().filter(|&&p| !p).count();
149
150            if cursor_count > super::MAX_QUERY_TERMS {
151                let mut kept = Vec::with_capacity(should_all.len());
152                let mut cursor_kept = 0usize;
153                for (q, &is_pred) in should_all.iter().zip(is_predicate.iter()) {
154                    if is_pred {
155                        kept.push(q.clone());
156                    } else if cursor_kept < super::MAX_QUERY_TERMS {
157                        kept.push(q.clone());
158                        cursor_kept += 1;
159                    }
160                }
161                log::debug!(
162                    "BooleanQuery: capping cursor SHOULD from {} to {} ({} fast-field predicates exempt)",
163                    cursor_count,
164                    super::MAX_QUERY_TERMS,
165                    kept.len() - cursor_kept,
166                );
167                should_capped = kept;
168                &should_capped
169            } else {
170                log::debug!(
171                    "BooleanQuery: {} SHOULD clauses OK ({} need cursors, {} fast-field predicates)",
172                    should_all.len(),
173                    cursor_count,
174                    should_all.len() - cursor_count,
175                );
176                should_all
177            }
178        } else {
179            should_all
180        };
181
182        // ── 1. Single-clause optimisation ────────────────────────────────
183        if must_not.is_empty() {
184            if must.len() == 1 && should.is_empty() {
185                return must[0].$scorer_fn(reader, limit) $(.  $aw)* ;
186            }
187            if should.len() == 1 && must.is_empty() {
188                return should[0].$scorer_fn(reader, limit) $(. $aw)* ;
189            }
190        }
191
192        // ── 2. Pure OR → MaxScore optimisations ──────────────────────────
193        if must.is_empty() && must_not.is_empty() && should.len() >= 2 {
194            // 2a. Text MaxScore (single-field, all term queries)
195            if let Some((mut infos, _field, avg_field_len, num_docs)) =
196                prepare_text_maxscore(should, reader, global_stats)
197            {
198                let mut posting_lists = Vec::with_capacity(infos.len());
199                for info in infos.drain(..) {
200                    if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
201                        $(. $aw)* ?
202                    {
203                        let idf = compute_idf(&pl, info.field, &info.term, num_docs, global_stats);
204                        posting_lists.push((pl, idf));
205                    }
206                }
207                return finish_text_maxscore(posting_lists, avg_field_len, limit, reader);
208            }
209
210            // 2b. Sparse (single-field, all sparse term queries)
211            // Auto-detect: BMP executor if field has BMP index, else MaxScore
212            if let Some(infos) = extract_all_sparse_infos(should) {
213                if let Some((raw, info)) =
214                    build_sparse_bmp_results(&infos, reader, limit)
215                {
216                    return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
217                }
218                if let Some((executor, info)) =
219                    build_sparse_maxscore_executor(&infos, reader, limit, None)
220                {
221                    let raw = executor.$execute_fn() $(. $aw)* ?;
222                    return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
223                }
224            }
225
226            // 2c. Per-field text MaxScore (multi-field term grouping)
227            if let Some(grouping) = prepare_per_field_grouping(should, reader, limit, global_stats)
228            {
229                let mut scorers: Vec<Box<dyn Scorer + '_>> = Vec::new();
230                for (field, avg_field_len, infos) in &grouping.multi_term_groups {
231                    let mut posting_lists = Vec::with_capacity(infos.len());
232                    for info in infos {
233                        if let Some(pl) = reader.$get_postings_fn(info.field, &info.term)
234                            $(. $aw)* ?
235                        {
236                            let idf = compute_idf(
237                                &pl, *field, &info.term, grouping.num_docs, global_stats,
238                            );
239                            posting_lists.push((pl, idf));
240                        }
241                    }
242                    if !posting_lists.is_empty() {
243                        scorers.push(finish_text_maxscore(
244                            posting_lists,
245                            *avg_field_len,
246                            grouping.per_field_limit,
247                            reader,
248                        )?);
249                    }
250                }
251                for &idx in &grouping.fallback_indices {
252                    scorers.push(should[idx].$scorer_fn(reader, limit) $(. $aw)* ?);
253                }
254                return Ok(build_should_scorer(scorers));
255            }
256        }
257
258        // ── 3. Filter push-down (MUST + SHOULD) ─────────────────────────
259        if !should.is_empty() && !must.is_empty() && limit < usize::MAX / 4 {
260            // Pre-check: is SHOULD all-sparse? This determines whether we can
261            // use bitset fallback for MUST clauses that lack fast-field predicates.
262            // For sparse SHOULD, the predicate is pushed into BMP/MaxScore traversal
263            // so all qualifying docs are found. For text SHOULD, we must NOT convert
264            // MUST to a predicate (PredicatedScorer would drop MUST-only docs that
265            // don't match SHOULD), so those go to verifier → BooleanScorer.
266            let should_is_sparse = extract_all_sparse_infos(should).is_some();
267
268            // 3a. Compile MUST → predicates (O(1)) vs verifier scorers (seek)
269            //
270            // Priority: as_doc_predicate (fast-field O(1)) > as_doc_bitset
271            // (posting-list materialization, O(1) lookup, sparse-SHOULD only)
272            // > verifier scorer (seek).
273            let mut predicates: Vec<super::DocPredicate<'_>> = Vec::new();
274            let mut must_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
275            for q in must {
276                if let Some(pred) = q.as_doc_predicate(reader) {
277                    log::debug!("BooleanQuery planner 3a: MUST clause → predicate ({})", q);
278                    predicates.push(pred);
279                } else if should_is_sparse {
280                    if let Some(bitset) = q.as_doc_bitset(reader) {
281                        log::debug!("BooleanQuery planner 3a: MUST clause → bitset predicate ({})", q);
282                        predicates.push(Box::new(move |doc_id| bitset.contains(doc_id)));
283                    } else {
284                        log::debug!("BooleanQuery planner 3a: MUST clause → verifier scorer ({})", q);
285                        must_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
286                    }
287                } else {
288                    log::debug!("BooleanQuery planner 3a: MUST clause → verifier scorer ({})", q);
289                    must_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
290                }
291            }
292            // Compile MUST_NOT → negated predicates vs verifier scorers
293            let mut must_not_verifiers: Vec<Box<dyn super::Scorer + '_>> = Vec::new();
294            for q in must_not {
295                if let Some(pred) = q.as_doc_predicate(reader) {
296                    let negated: super::DocPredicate<'_> =
297                        Box::new(move |doc_id| !pred(doc_id));
298                    predicates.push(negated);
299                } else if should_is_sparse {
300                    if let Some(bitset) = q.as_doc_bitset(reader) {
301                        log::debug!("BooleanQuery planner 3a: MUST_NOT clause → bitset predicate ({})", q);
302                        predicates.push(Box::new(move |doc_id| !bitset.contains(doc_id)));
303                    } else {
304                        must_not_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
305                    }
306                } else {
307                    must_not_verifiers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
308                }
309            }
310
311            // 3b. Fast path: pure predicates + sparse SHOULD → BMP or MaxScore w/ predicate
312            if must_verifiers.is_empty()
313                && must_not_verifiers.is_empty()
314                && !predicates.is_empty()
315            {
316                if let Some(infos) = extract_all_sparse_infos(should) {
317                    // Try BMP with bitset first: build compact bitset from MUST/MUST_NOT
318                    // posting lists (O(M) for term queries) for fast per-slot lookup.
319                    let bitset_result = build_combined_bitset(must, must_not, reader);
320                    if let Some(ref bitset) = bitset_result {
321                        let bitset_pred = |doc_id: crate::DocId| bitset.contains(doc_id);
322                        if let Some((raw, info)) =
323                            build_sparse_bmp_results_filtered(&infos, reader, limit, &bitset_pred)
324                        {
325                            log::debug!(
326                                "BooleanQuery planner: bitset-aware sparse BMP, {} dims, {} matching docs",
327                                infos.len(),
328                                bitset.count()
329                            );
330                            return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
331                        }
332                    }
333
334                    // Fallback: closure predicate (for queries that don't support bitsets)
335                    let combined = chain_predicates(predicates);
336                    if let Some((raw, info)) =
337                        build_sparse_bmp_results_filtered(&infos, reader, limit, &*combined)
338                    {
339                        log::debug!(
340                            "BooleanQuery planner: predicate-aware sparse BMP, {} dims",
341                            infos.len()
342                        );
343                        return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
344                    }
345                    // Try MaxScore with predicate
346                    if let Some((executor, info)) =
347                        build_sparse_maxscore_executor(&infos, reader, limit, Some(combined))
348                    {
349                        log::debug!(
350                            "BooleanQuery planner: predicate-aware sparse MaxScore, {} dims",
351                            infos.len()
352                        );
353                        let raw = executor.$execute_fn() $(. $aw)* ?;
354                        return Ok(combine_sparse_results(raw, info.combiner, info.field, limit));
355                    }
356                    // predicates consumed — cannot fall through; rebuild them
357                    // (this path only triggers if neither sparse index exists)
358                    // should_is_sparse is true here (we're inside extract_all_sparse_infos)
359                    predicates = Vec::new();
360                    for q in must {
361                        if let Some(pred) = q.as_doc_predicate(reader) {
362                            predicates.push(pred);
363                        } else if let Some(bitset) = q.as_doc_bitset(reader) {
364                            predicates.push(Box::new(move |doc_id| bitset.contains(doc_id)));
365                        }
366                    }
367                    for q in must_not {
368                        if let Some(pred) = q.as_doc_predicate(reader) {
369                            let negated: super::DocPredicate<'_> =
370                                Box::new(move |doc_id| !pred(doc_id));
371                            predicates.push(negated);
372                        } else if let Some(bitset) = q.as_doc_bitset(reader) {
373                            predicates.push(Box::new(move |doc_id| !bitset.contains(doc_id)));
374                        }
375                    }
376                }
377            }
378
379            // 3c. PredicatedScorer fallback (over-fetch 4x when any filter is present)
380            let has_filters = !predicates.is_empty()
381                || !must_verifiers.is_empty()
382                || !must_not_verifiers.is_empty();
383            let should_limit = if has_filters { limit * 4 } else { limit };
384            let should_scorer = if should.len() == 1 {
385                should[0].$scorer_fn(reader, should_limit) $(. $aw)* ?
386            } else {
387                let sub = BooleanQuery {
388                    must: Vec::new(),
389                    should: should.to_vec(),
390                    must_not: Vec::new(),
391                    global_stats: global_stats.cloned(),
392                };
393                sub.$scorer_fn(reader, should_limit) $(. $aw)* ?
394            };
395
396            let use_predicated =
397                must_verifiers.is_empty() || should_scorer.size_hint() >= limit as u32;
398
399            if use_predicated {
400                log::debug!(
401                    "BooleanQuery planner: PredicatedScorer {} preds + {} must_v + {} must_not_v, \
402                     SHOULD size_hint={}, over_fetch={}",
403                    predicates.len(), must_verifiers.len(), must_not_verifiers.len(),
404                    should_scorer.size_hint(), should_limit
405                );
406                return Ok(Box::new(super::PredicatedScorer::new(
407                    should_scorer, predicates, must_verifiers, must_not_verifiers,
408                )));
409            }
410
411            // size_hint < limit with verifiers → BooleanScorer
412            log::debug!(
413                "BooleanQuery planner: BooleanScorer fallback, size_hint={} < limit={}, \
414                 {} must_v + {} must_not_v",
415                should_scorer.size_hint(), limit,
416                must_verifiers.len(), must_not_verifiers.len()
417            );
418            let mut scorer = BooleanScorer {
419                must: must_verifiers,
420                should: vec![should_scorer],
421                must_not: must_not_verifiers,
422                current_doc: 0,
423            };
424            scorer.current_doc = scorer.find_next_match();
425            return Ok(Box::new(scorer));
426        }
427
428        // ── 4. Standard BooleanScorer fallback ───────────────────────────
429        let mut must_scorers = Vec::with_capacity(must.len());
430        for q in must {
431            must_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
432        }
433        let mut should_scorers = Vec::with_capacity(should.len());
434        for q in should {
435            should_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
436        }
437        let mut must_not_scorers = Vec::with_capacity(must_not.len());
438        for q in must_not {
439            must_not_scorers.push(q.$scorer_fn(reader, limit) $(. $aw)* ?);
440        }
441        let mut scorer = BooleanScorer {
442            must: must_scorers,
443            should: should_scorers,
444            must_not: must_not_scorers,
445            current_doc: 0,
446        };
447        scorer.current_doc = scorer.find_next_match();
448        Ok(Box::new(scorer) as Box<dyn Scorer + '_>)
449    }};
450}
451
452impl Query for BooleanQuery {
453    fn scorer<'a>(&self, reader: &'a SegmentReader, limit: usize) -> ScorerFuture<'a> {
454        let must = self.must.clone();
455        let should = self.should.clone();
456        let must_not = self.must_not.clone();
457        let global_stats = self.global_stats.clone();
458        Box::pin(async move {
459            boolean_plan!(
460                must,
461                should,
462                must_not,
463                global_stats.as_ref(),
464                reader,
465                limit,
466                scorer,
467                get_postings,
468                execute,
469                await
470            )
471        })
472    }
473
474    #[cfg(feature = "sync")]
475    fn scorer_sync<'a>(
476        &self,
477        reader: &'a SegmentReader,
478        limit: usize,
479    ) -> crate::Result<Box<dyn Scorer + 'a>> {
480        boolean_plan!(
481            self.must,
482            self.should,
483            self.must_not,
484            self.global_stats.as_ref(),
485            reader,
486            limit,
487            scorer_sync,
488            get_postings_sync,
489            execute_sync
490        )
491    }
492
493    fn as_doc_bitset(&self, reader: &SegmentReader) -> Option<super::DocBitset> {
494        if self.must.is_empty() && self.should.is_empty() {
495            return None;
496        }
497
498        let num_docs = reader.num_docs();
499
500        // MUST clauses: intersect bitsets (AND)
501        let mut result: Option<super::DocBitset> = None;
502        for q in &self.must {
503            let bs = q.as_doc_bitset(reader)?;
504            match result {
505                None => result = Some(bs),
506                Some(ref mut acc) => acc.intersect_with(&bs),
507            }
508        }
509
510        // SHOULD clauses: union bitsets (OR), then intersect with MUST result
511        if !self.should.is_empty() {
512            let mut should_union = super::DocBitset::new(num_docs);
513            for q in &self.should {
514                let bs = q.as_doc_bitset(reader)?;
515                should_union.union_with(&bs);
516            }
517            match result {
518                None => result = Some(should_union),
519                Some(ref mut acc) => {
520                    // When MUST clauses exist, SHOULD is optional (doesn't filter).
521                    // When no MUST clauses, at least one SHOULD must match.
522                    if self.must.is_empty() {
523                        *acc = should_union;
524                    }
525                }
526            }
527        }
528
529        // MUST_NOT clauses: subtract bitsets (ANDNOT)
530        if let Some(ref mut acc) = result {
531            for q in &self.must_not {
532                if let Some(bs) = q.as_doc_bitset(reader) {
533                    acc.subtract(&bs);
534                } else {
535                    // Can't build bitset for this MUST_NOT clause — bail
536                    return None;
537                }
538            }
539        }
540
541        result
542    }
543
544    fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
545        // Need at least some clauses
546        if self.must.is_empty() && self.should.is_empty() {
547            return None;
548        }
549
550        // Try converting all clauses to predicates; bail if any child can't
551        let must_preds: Vec<_> = self
552            .must
553            .iter()
554            .map(|q| q.as_doc_predicate(reader))
555            .collect::<Option<Vec<_>>>()?;
556        let should_preds: Vec<_> = self
557            .should
558            .iter()
559            .map(|q| q.as_doc_predicate(reader))
560            .collect::<Option<Vec<_>>>()?;
561        let must_not_preds: Vec<_> = self
562            .must_not
563            .iter()
564            .map(|q| q.as_doc_predicate(reader))
565            .collect::<Option<Vec<_>>>()?;
566
567        let has_must = !must_preds.is_empty();
568
569        Some(Box::new(move |doc_id| {
570            // All MUST predicates must pass
571            if !must_preds.iter().all(|p| p(doc_id)) {
572                return false;
573            }
574            // When there are no MUST clauses, at least one SHOULD must pass
575            if !has_must && !should_preds.is_empty() && !should_preds.iter().any(|p| p(doc_id)) {
576                return false;
577            }
578            // No MUST_NOT predicate should pass
579            must_not_preds.iter().all(|p| !p(doc_id))
580        }))
581    }
582
583    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
584        let must = self.must.clone();
585        let should = self.should.clone();
586
587        Box::pin(async move {
588            if !must.is_empty() {
589                let mut estimates = Vec::with_capacity(must.len());
590                for q in &must {
591                    estimates.push(q.count_estimate(reader).await?);
592                }
593                estimates
594                    .into_iter()
595                    .min()
596                    .ok_or_else(|| crate::Error::Corruption("Empty must clause".to_string()))
597            } else if !should.is_empty() {
598                let mut sum = 0u32;
599                for q in &should {
600                    sum = sum.saturating_add(q.count_estimate(reader).await?);
601                }
602                Ok(sum)
603            } else {
604                Ok(0)
605            }
606        })
607    }
608}
609
610struct BooleanScorer<'a> {
611    must: Vec<Box<dyn Scorer + 'a>>,
612    should: Vec<Box<dyn Scorer + 'a>>,
613    must_not: Vec<Box<dyn Scorer + 'a>>,
614    current_doc: DocId,
615}
616
617impl BooleanScorer<'_> {
618    fn find_next_match(&mut self) -> DocId {
619        if self.must.is_empty() && self.should.is_empty() {
620            return TERMINATED;
621        }
622
623        loop {
624            let candidate = if !self.must.is_empty() {
625                let mut max_doc = self
626                    .must
627                    .iter()
628                    .map(|s| s.doc())
629                    .max()
630                    .unwrap_or(TERMINATED);
631
632                if max_doc == TERMINATED {
633                    return TERMINATED;
634                }
635
636                loop {
637                    let mut all_match = true;
638                    for scorer in &mut self.must {
639                        let doc = scorer.seek(max_doc);
640                        if doc == TERMINATED {
641                            return TERMINATED;
642                        }
643                        if doc > max_doc {
644                            max_doc = doc;
645                            all_match = false;
646                            break;
647                        }
648                    }
649                    if all_match {
650                        break;
651                    }
652                }
653                max_doc
654            } else {
655                self.should
656                    .iter()
657                    .map(|s| s.doc())
658                    .filter(|&d| d != TERMINATED)
659                    .min()
660                    .unwrap_or(TERMINATED)
661            };
662
663            if candidate == TERMINATED {
664                return TERMINATED;
665            }
666
667            let excluded = self.must_not.iter_mut().any(|scorer| {
668                let doc = scorer.seek(candidate);
669                doc == candidate
670            });
671
672            if !excluded {
673                // Seek SHOULD scorers to candidate so score() can see their contributions
674                for scorer in &mut self.should {
675                    scorer.seek(candidate);
676                }
677                self.current_doc = candidate;
678                return candidate;
679            }
680
681            // Advance past excluded candidate
682            if !self.must.is_empty() {
683                for scorer in &mut self.must {
684                    scorer.advance();
685                }
686            } else {
687                // For SHOULD-only: seek all scorers past the excluded candidate
688                for scorer in &mut self.should {
689                    if scorer.doc() <= candidate && scorer.doc() != TERMINATED {
690                        scorer.seek(candidate + 1);
691                    }
692                }
693            }
694        }
695    }
696}
697
698impl super::docset::DocSet for BooleanScorer<'_> {
699    fn doc(&self) -> DocId {
700        self.current_doc
701    }
702
703    fn advance(&mut self) -> DocId {
704        if !self.must.is_empty() {
705            for scorer in &mut self.must {
706                scorer.advance();
707            }
708        } else {
709            for scorer in &mut self.should {
710                if scorer.doc() == self.current_doc {
711                    scorer.advance();
712                }
713            }
714        }
715
716        self.current_doc = self.find_next_match();
717        self.current_doc
718    }
719
720    fn seek(&mut self, target: DocId) -> DocId {
721        for scorer in &mut self.must {
722            scorer.seek(target);
723        }
724
725        for scorer in &mut self.should {
726            scorer.seek(target);
727        }
728
729        self.current_doc = self.find_next_match();
730        self.current_doc
731    }
732
733    fn size_hint(&self) -> u32 {
734        if !self.must.is_empty() {
735            self.must.iter().map(|s| s.size_hint()).min().unwrap_or(0)
736        } else {
737            self.should.iter().map(|s| s.size_hint()).sum()
738        }
739    }
740}
741
742impl Scorer for BooleanScorer<'_> {
743    fn score(&self) -> Score {
744        let mut total = 0.0;
745
746        for scorer in &self.must {
747            if scorer.doc() == self.current_doc {
748                total += scorer.score();
749            }
750        }
751
752        for scorer in &self.should {
753            if scorer.doc() == self.current_doc {
754                total += scorer.score();
755            }
756        }
757
758        total
759    }
760
761    fn matched_positions(&self) -> Option<super::MatchedPositions> {
762        let mut all_positions: super::MatchedPositions = Vec::new();
763
764        for scorer in &self.must {
765            if scorer.doc() == self.current_doc
766                && let Some(positions) = scorer.matched_positions()
767            {
768                all_positions.extend(positions);
769            }
770        }
771
772        for scorer in &self.should {
773            if scorer.doc() == self.current_doc
774                && let Some(positions) = scorer.matched_positions()
775            {
776                all_positions.extend(positions);
777            }
778        }
779
780        if all_positions.is_empty() {
781            None
782        } else {
783            Some(all_positions)
784        }
785    }
786}
787
788#[cfg(test)]
789mod tests {
790    use super::*;
791    use crate::dsl::Field;
792    use crate::query::{QueryDecomposition, TermQuery};
793
794    #[test]
795    fn test_maxscore_eligible_pure_or_same_field() {
796        // Pure OR query with multiple terms in same field should be MaxScore-eligible
797        let query = BooleanQuery::new()
798            .should(TermQuery::text(Field(0), "hello"))
799            .should(TermQuery::text(Field(0), "world"))
800            .should(TermQuery::text(Field(0), "foo"));
801
802        // All clauses should return term info
803        assert!(
804            query
805                .should
806                .iter()
807                .all(|q| matches!(q.decompose(), QueryDecomposition::TextTerm(_)))
808        );
809
810        // All should be same field
811        let infos: Vec<_> = query
812            .should
813            .iter()
814            .filter_map(|q| match q.decompose() {
815                QueryDecomposition::TextTerm(info) => Some(info),
816                _ => None,
817            })
818            .collect();
819        assert_eq!(infos.len(), 3);
820        assert!(infos.iter().all(|i| i.field == Field(0)));
821    }
822
823    #[test]
824    fn test_maxscore_not_eligible_different_fields() {
825        // OR query with terms in different fields should NOT use MaxScore
826        let query = BooleanQuery::new()
827            .should(TermQuery::text(Field(0), "hello"))
828            .should(TermQuery::text(Field(1), "world")); // Different field!
829
830        let infos: Vec<_> = query
831            .should
832            .iter()
833            .filter_map(|q| match q.decompose() {
834                QueryDecomposition::TextTerm(info) => Some(info),
835                _ => None,
836            })
837            .collect();
838        assert_eq!(infos.len(), 2);
839        // Fields are different, MaxScore should not be used
840        assert!(infos[0].field != infos[1].field);
841    }
842
843    #[test]
844    fn test_maxscore_not_eligible_with_must() {
845        // Query with MUST clause should NOT use MaxScore optimization
846        let query = BooleanQuery::new()
847            .must(TermQuery::text(Field(0), "required"))
848            .should(TermQuery::text(Field(0), "hello"))
849            .should(TermQuery::text(Field(0), "world"));
850
851        // Has MUST clause, so MaxScore optimization should not kick in
852        assert!(!query.must.is_empty());
853    }
854
855    #[test]
856    fn test_maxscore_not_eligible_with_must_not() {
857        // Query with MUST_NOT clause should NOT use MaxScore optimization
858        let query = BooleanQuery::new()
859            .should(TermQuery::text(Field(0), "hello"))
860            .should(TermQuery::text(Field(0), "world"))
861            .must_not(TermQuery::text(Field(0), "excluded"));
862
863        // Has MUST_NOT clause, so MaxScore optimization should not kick in
864        assert!(!query.must_not.is_empty());
865    }
866
867    #[test]
868    fn test_maxscore_not_eligible_single_term() {
869        // Single SHOULD clause should NOT use MaxScore (no benefit)
870        let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
871
872        // Only one term, MaxScore not beneficial
873        assert_eq!(query.should.len(), 1);
874    }
875
876    #[test]
877    fn test_term_query_info_extraction() {
878        let term_query = TermQuery::text(Field(42), "test");
879        match term_query.decompose() {
880            QueryDecomposition::TextTerm(info) => {
881                assert_eq!(info.field, Field(42));
882                assert_eq!(info.term, b"test");
883            }
884            _ => panic!("Expected TextTerm decomposition"),
885        }
886    }
887
888    #[test]
889    fn test_boolean_query_no_term_info() {
890        // BooleanQuery itself should not return term info
891        let query = BooleanQuery::new().should(TermQuery::text(Field(0), "hello"));
892
893        assert!(matches!(query.decompose(), QueryDecomposition::Opaque));
894    }
895}
hermes_core/query/boolean.rs

hermes_core/query/
boolean.rs