1use crate::core::{DocId, FieldId, NO_MORE_DOCS, Result, ScoreMode, Scorer, TwoPhaseIterator};
10
11use crate::inverted::norms::FieldNormsReader;
12use crate::inverted::postings::PositionPostingListReader;
13use crate::query::{BoundQuery, BoundSpanQuery, Query, ScorerSupplier, SpanQuery};
14use crate::search::bm25::{bm25_idf, bm25_score};
15use crate::search::searcher::Searcher;
16use crate::segment::reader::SegmentReader;
17
18const NO_MORE_POSITIONS: u32 = u32::MAX;
19
20trait Spans: Send {
33 fn doc_id(&self) -> DocId;
34 fn next_doc(&mut self) -> DocId;
35 fn advance_doc(&mut self, target: DocId) -> DocId;
36
37 fn next_start_position(&mut self) -> u32;
40
41 fn start_position(&self) -> u32;
42 fn end_position(&self) -> u32;
43
44 fn width(&self) -> u32 {
49 0
50 }
51}
52
53struct TermSpans<'a> {
58 reader: PositionPostingListReader<'a>,
59 pos_index: usize,
60 current_doc: DocId,
61 current_tf: u32,
63}
64
65unsafe impl Send for TermSpans<'_> {}
66
67impl<'a> TermSpans<'a> {
68 fn new(reader: PositionPostingListReader<'a>) -> Self {
69 Self {
70 reader,
71 pos_index: 0,
72 current_doc: NO_MORE_DOCS,
73 current_tf: 0,
74 }
75 }
76}
77
78impl Spans for TermSpans<'_> {
79 fn doc_id(&self) -> DocId {
80 self.current_doc
81 }
82
83 fn next_doc(&mut self) -> DocId {
84 self.pos_index = 0;
85 match self.reader.next_doc() {
86 Some(doc) => {
87 self.current_doc = doc;
88 self.current_tf = self.reader.current_tf();
89 doc
90 }
91 None => {
92 self.current_doc = NO_MORE_DOCS;
93 self.current_tf = 0;
94 NO_MORE_DOCS
95 }
96 }
97 }
98
99 fn advance_doc(&mut self, target: DocId) -> DocId {
100 self.pos_index = 0;
101 match self.reader.advance(target) {
102 Some(doc) => {
103 self.current_doc = doc;
104 self.current_tf = self.reader.positions().len() as u32;
106 doc
107 }
108 None => {
109 self.current_doc = NO_MORE_DOCS;
110 self.current_tf = 0;
111 NO_MORE_DOCS
112 }
113 }
114 }
115
116 fn next_start_position(&mut self) -> u32 {
117 if self.current_doc == NO_MORE_DOCS {
118 return NO_MORE_POSITIONS;
119 }
120
121 if self.current_tf == 1 {
122 if self.pos_index == 0 {
124 self.pos_index = 1;
125 return self.reader.first_position();
126 }
127 return NO_MORE_POSITIONS;
128 }
129
130 let positions = self.reader.positions();
132 if self.pos_index < positions.len() {
133 let pos = positions[self.pos_index];
134 self.pos_index += 1;
135 pos
136 } else {
137 NO_MORE_POSITIONS
138 }
139 }
140
141 fn start_position(&self) -> u32 {
142 if self.pos_index == 0 {
143 return NO_MORE_POSITIONS;
144 }
145 if self.current_tf == 1 {
146 self.reader.first_position()
147 } else {
148 self.reader.positions()[self.pos_index - 1]
149 }
150 }
151
152 fn end_position(&self) -> u32 {
153 if self.pos_index == 0 {
154 return NO_MORE_POSITIONS;
155 }
156 self.start_position() + 1
157 }
158}
159
160struct FilterSpans<S: Spans> {
170 inner: S,
171 max_end: u32,
172}
173
174impl<S: Spans> Spans for FilterSpans<S> {
175 fn doc_id(&self) -> DocId {
176 self.inner.doc_id()
177 }
178 fn next_doc(&mut self) -> DocId {
179 self.inner.next_doc()
180 }
181 fn advance_doc(&mut self, target: DocId) -> DocId {
182 self.inner.advance_doc(target)
183 }
184 fn next_start_position(&mut self) -> u32 {
185 let pos = self.inner.next_start_position();
186 if pos == NO_MORE_POSITIONS {
187 return NO_MORE_POSITIONS;
188 }
189 if self.inner.end_position() > self.max_end {
190 NO_MORE_POSITIONS
192 } else {
193 pos
194 }
195 }
196 fn start_position(&self) -> u32 {
197 self.inner.start_position()
198 }
199 fn end_position(&self) -> u32 {
200 self.inner.end_position()
201 }
202 fn width(&self) -> u32 {
203 self.inner.width()
204 }
205}
206
207struct NearSpansOrdered<'a> {
213 sub_spans: Vec<TermSpans<'a>>,
214 slop: u32,
215 current_doc: DocId,
216 match_start: u32,
217 match_end: u32,
218 match_width: u32,
219 first_in_doc: bool,
222}
223
224unsafe impl Send for NearSpansOrdered<'_> {}
225
226impl<'a> NearSpansOrdered<'a> {
227 fn new(sub_spans: Vec<TermSpans<'a>>, slop: u32) -> Self {
228 Self {
229 sub_spans,
230 slop,
231 current_doc: NO_MORE_DOCS,
232 match_start: NO_MORE_POSITIONS,
233 match_end: NO_MORE_POSITIONS,
234 match_width: 0,
235 first_in_doc: false,
236 }
237 }
238
239 fn advance_to_common_doc(&mut self) -> DocId {
242 if self.sub_spans.is_empty() {
243 return NO_MORE_DOCS;
244 }
245
246 let mut target = self.sub_spans[0].doc_id();
248 if target == NO_MORE_DOCS {
249 return NO_MORE_DOCS;
250 }
251
252 let mut i = 1;
253 while i < self.sub_spans.len() {
254 let doc = self.sub_spans[i].doc_id();
255 if doc == target {
256 i += 1;
257 continue;
258 }
259 if doc == NO_MORE_DOCS {
260 return NO_MORE_DOCS;
261 }
262 if doc < target {
263 let new_doc = self.sub_spans[i].advance_doc(target);
265 if new_doc == NO_MORE_DOCS {
266 return NO_MORE_DOCS;
267 }
268 if new_doc > target {
269 target = new_doc;
271 let d0 = self.sub_spans[0].advance_doc(target);
273 if d0 == NO_MORE_DOCS {
274 return NO_MORE_DOCS;
275 }
276 target = d0;
277 i = 1; continue;
279 }
280 i += 1;
281 } else {
282 target = doc;
284 let d0 = self.sub_spans[0].advance_doc(target);
285 if d0 == NO_MORE_DOCS {
286 return NO_MORE_DOCS;
287 }
288 target = d0;
289 i = 1;
290 }
291 }
292 target
293 }
294
295 fn stretch_to_order(&mut self) -> bool {
298 self.match_start = self.sub_spans[0].start_position();
299 if self.match_start == NO_MORE_POSITIONS {
300 return false;
301 }
302 self.match_width = 0;
303
304 for i in 1..self.sub_spans.len() {
305 let prev_end = self.sub_spans[i - 1].end_position();
306
307 while self.sub_spans[i].start_position() < prev_end {
309 if self.sub_spans[i].next_start_position() == NO_MORE_POSITIONS {
310 return false;
311 }
312 }
313
314 let gap = self.sub_spans[i].start_position() - prev_end;
315 self.match_width += gap;
316 }
317
318 self.match_end = self.sub_spans.last().unwrap().end_position();
319 self.match_width <= self.slop
320 }
321
322 fn find_next_match_in_doc(&mut self) -> bool {
324 loop {
325 if !self.stretch_to_order() {
326 return false;
327 }
328 if self.match_width <= self.slop {
329 return true;
330 }
331 if self.sub_spans[0].next_start_position() == NO_MORE_POSITIONS {
333 return false;
334 }
335 self.match_start = self.sub_spans[0].start_position();
336 }
337 }
338}
339
340impl Spans for NearSpansOrdered<'_> {
341 fn doc_id(&self) -> DocId {
342 self.current_doc
343 }
344
345 fn next_doc(&mut self) -> DocId {
346 let next = self.sub_spans[0].next_doc();
348 if next == NO_MORE_DOCS {
349 self.current_doc = NO_MORE_DOCS;
350 return NO_MORE_DOCS;
351 }
352 for i in 1..self.sub_spans.len() {
353 self.sub_spans[i].next_doc();
354 }
355 self.current_doc = self.advance_to_common_doc();
356 self.first_in_doc = true;
357 self.current_doc
358 }
359
360 fn advance_doc(&mut self, target: DocId) -> DocId {
361 for s in &mut self.sub_spans {
362 s.advance_doc(target);
363 }
364 self.current_doc = self.advance_to_common_doc();
365 self.first_in_doc = true;
366 self.current_doc
367 }
368
369 fn next_start_position(&mut self) -> u32 {
370 if self.current_doc == NO_MORE_DOCS {
371 return NO_MORE_POSITIONS;
372 }
373
374 if self.first_in_doc {
375 self.first_in_doc = false;
376 for s in &mut self.sub_spans {
378 if s.next_start_position() == NO_MORE_POSITIONS {
379 return NO_MORE_POSITIONS;
380 }
381 }
382 } else {
383 if self.sub_spans[0].next_start_position() == NO_MORE_POSITIONS {
385 return NO_MORE_POSITIONS;
386 }
387 }
388
389 if self.find_next_match_in_doc() {
390 self.match_start
391 } else {
392 NO_MORE_POSITIONS
393 }
394 }
395
396 fn start_position(&self) -> u32 {
397 self.match_start
398 }
399 fn end_position(&self) -> u32 {
400 self.match_end
401 }
402 fn width(&self) -> u32 {
403 self.match_width
404 }
405}
406
407struct NearSpansUnordered<'a> {
415 sub_spans: Vec<TermSpans<'a>>,
416 slop: u32,
417 current_doc: DocId,
418 match_start: u32,
419 match_end: u32,
420 match_width: u32,
421 indices: Vec<usize>,
423 first_in_doc: bool,
424}
425
426unsafe impl Send for NearSpansUnordered<'_> {}
427
428impl<'a> NearSpansUnordered<'a> {
429 fn new(sub_spans: Vec<TermSpans<'a>>, slop: u32) -> Self {
430 let n = sub_spans.len();
431 Self {
432 sub_spans,
433 slop,
434 current_doc: NO_MORE_DOCS,
435 match_start: NO_MORE_POSITIONS,
436 match_end: NO_MORE_POSITIONS,
437 match_width: 0,
438 indices: vec![0; n],
439 first_in_doc: false,
440 }
441 }
442
443 fn advance_to_common_doc(&mut self) -> DocId {
445 if self.sub_spans.is_empty() {
446 return NO_MORE_DOCS;
447 }
448 let mut target = self.sub_spans[0].doc_id();
449 if target == NO_MORE_DOCS {
450 return NO_MORE_DOCS;
451 }
452 let mut i = 1;
453 while i < self.sub_spans.len() {
454 let doc = self.sub_spans[i].doc_id();
455 if doc == target {
456 i += 1;
457 continue;
458 }
459 if doc == NO_MORE_DOCS {
460 return NO_MORE_DOCS;
461 }
462 if doc < target {
463 let new_doc = self.sub_spans[i].advance_doc(target);
464 if new_doc == NO_MORE_DOCS {
465 return NO_MORE_DOCS;
466 }
467 if new_doc > target {
468 target = new_doc;
469 let d0 = self.sub_spans[0].advance_doc(target);
470 if d0 == NO_MORE_DOCS {
471 return NO_MORE_DOCS;
472 }
473 target = d0;
474 i = 1;
475 continue;
476 }
477 i += 1;
478 } else {
479 target = doc;
480 let d0 = self.sub_spans[0].advance_doc(target);
481 if d0 == NO_MORE_DOCS {
482 return NO_MORE_DOCS;
483 }
484 target = d0;
485 i = 1;
486 }
487 }
488 target
489 }
490
491 fn get_positions(&self, i: usize) -> Vec<u32> {
493 let s = &self.sub_spans[i];
494 if s.current_tf == 1 {
495 vec![s.reader.first_position()]
496 } else {
497 s.reader.positions().to_vec()
498 }
499 }
500
501 fn find_match_unordered(&mut self) -> bool {
504 let n = self.sub_spans.len();
505 let all_positions: Vec<Vec<u32>> = (0..n).map(|i| self.get_positions(i)).collect();
506
507 for positions in &all_positions {
509 if positions.is_empty() {
510 return false;
511 }
512 }
513
514 for idx in &mut self.indices {
516 *idx = 0;
517 }
518
519 let max_span = self.slop + n as u32 - 1;
520
521 loop {
522 let mut min_pos = u32::MAX;
523 let mut max_pos = 0u32;
524 let mut min_idx = 0;
525
526 for (i, &idx) in self.indices.iter().enumerate() {
527 if idx >= all_positions[i].len() {
528 return false;
529 }
530 let pos = all_positions[i][idx];
531 if pos < min_pos {
532 min_pos = pos;
533 min_idx = i;
534 }
535 if pos > max_pos {
536 max_pos = pos;
537 }
538 }
539
540 let window = max_pos - min_pos;
541 if window <= max_span {
542 self.match_start = min_pos;
543 self.match_end = max_pos + 1;
544 self.match_width = window - (n as u32 - 1); return true;
546 }
547
548 self.indices[min_idx] += 1;
550 if self.indices[min_idx] >= all_positions[min_idx].len() {
551 return false;
552 }
553 }
554 }
555}
556
557impl Spans for NearSpansUnordered<'_> {
558 fn doc_id(&self) -> DocId {
559 self.current_doc
560 }
561
562 fn next_doc(&mut self) -> DocId {
563 let next = self.sub_spans[0].next_doc();
564 if next == NO_MORE_DOCS {
565 self.current_doc = NO_MORE_DOCS;
566 return NO_MORE_DOCS;
567 }
568 for i in 1..self.sub_spans.len() {
569 self.sub_spans[i].next_doc();
570 }
571 self.current_doc = self.advance_to_common_doc();
572 self.first_in_doc = true;
573 self.current_doc
574 }
575
576 fn advance_doc(&mut self, target: DocId) -> DocId {
577 for s in &mut self.sub_spans {
578 s.advance_doc(target);
579 }
580 self.current_doc = self.advance_to_common_doc();
581 self.first_in_doc = true;
582 self.current_doc
583 }
584
585 fn next_start_position(&mut self) -> u32 {
586 if self.current_doc == NO_MORE_DOCS {
587 return NO_MORE_POSITIONS;
588 }
589 if self.first_in_doc {
590 self.first_in_doc = false;
591 if self.find_match_unordered() {
592 return self.match_start;
593 }
594 return NO_MORE_POSITIONS;
595 }
596 NO_MORE_POSITIONS
599 }
600
601 fn start_position(&self) -> u32 {
602 self.match_start
603 }
604 fn end_position(&self) -> u32 {
605 self.match_end
606 }
607 fn width(&self) -> u32 {
608 self.match_width
609 }
610}
611
612pub struct SpanNotQuery {
617 pub(crate) include: Box<dyn SpanQuery>,
618 pub(crate) exclude: Box<dyn SpanQuery>,
619}
620
621impl Query for SpanNotQuery {
622 fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
623 Ok(<Self as SpanQuery>::bind_span(self, searcher, score_mode)?)
625 }
626}
627
628impl SpanQuery for SpanNotQuery {
629 fn bind_span(
630 &self,
631 searcher: &Searcher,
632 score_mode: ScoreMode,
633 ) -> Result<Box<dyn BoundSpanQuery>> {
634 let include_weight = self.include.bind_span(searcher, score_mode)?;
635 let exclude_weight = self.exclude.bind_span(searcher, score_mode)?;
636 Ok(Box::new(BoundSpanNotQuery {
637 include_weight,
638 exclude_weight,
639 }))
640 }
641}
642
643struct BoundSpanNotQuery {
644 include_weight: Box<dyn BoundSpanQuery>,
645 exclude_weight: Box<dyn BoundSpanQuery>,
646}
647
648impl BoundQuery for BoundSpanNotQuery {
649 fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
650 let include = match self.include_weight.scorer_supplier(reader)? {
651 Some(s) => s,
652 None => return Ok(None),
653 };
654 let exclude = self.exclude_weight.scorer_supplier(reader)?;
655 Ok(Some(Box::new(SpanNotScorerSupplier { include, exclude })))
656 }
657}
658
659impl BoundSpanQuery for BoundSpanNotQuery {
660 fn span_scorer_supplier(
661 &self,
662 reader: &SegmentReader,
663 max_end: u32,
664 ) -> Result<Option<Box<dyn ScorerSupplier>>> {
665 let include = match self.include_weight.span_scorer_supplier(reader, max_end)? {
668 Some(s) => s,
669 None => return Ok(None),
670 };
671 let exclude = self.exclude_weight.scorer_supplier(reader)?;
672 Ok(Some(Box::new(SpanNotScorerSupplier { include, exclude })))
673 }
674}
675
676struct SpanNotScorerSupplier {
677 include: Box<dyn ScorerSupplier>,
678 exclude: Option<Box<dyn ScorerSupplier>>,
679}
680
681impl ScorerSupplier for SpanNotScorerSupplier {
682 fn cost(&self) -> u64 {
683 self.include.cost()
684 }
685 fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
686 let include = self.include.scorer()?;
687 let exclude = match self.exclude {
688 Some(e) => Some(e.scorer()?),
689 None => None,
690 };
691 let mut scorer = SpanNotScorer { include, exclude };
692 scorer.find_next_non_excluded();
693 Ok(Box::new(scorer))
694 }
695}
696
697struct SpanNotScorer {
699 include: Box<dyn Scorer>,
700 exclude: Option<Box<dyn Scorer>>,
701}
702
703impl SpanNotScorer {
704 fn is_excluded(&mut self) -> bool {
705 let Some(ref mut exc) = self.exclude else {
706 return false;
707 };
708 let doc = self.include.doc_id();
709 if exc.doc_id() < doc {
710 exc.advance(doc);
711 }
712 exc.doc_id() == doc
713 }
714
715 fn find_next_non_excluded(&mut self) -> DocId {
716 loop {
717 let doc = self.include.doc_id();
718 if doc == NO_MORE_DOCS {
719 return NO_MORE_DOCS;
720 }
721 if !self.is_excluded() {
722 return doc;
723 }
724 self.include.next();
725 }
726 }
727}
728
729impl Scorer for SpanNotScorer {
730 fn doc_id(&self) -> DocId {
731 self.include.doc_id()
732 }
733 fn next(&mut self) -> DocId {
734 self.include.next();
735 self.find_next_non_excluded()
736 }
737 fn advance(&mut self, target: DocId) -> DocId {
738 self.include.advance(target);
739 self.find_next_non_excluded()
740 }
741 fn score(&mut self) -> f32 {
742 self.include.score()
744 }
745 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
746 None
747 }
748}
749
750pub struct SpanFirstQuery {
755 pub(crate) inner: Box<dyn SpanQuery>,
756 pub end: u32,
757}
758
759impl Query for SpanFirstQuery {
760 fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
761 Ok(<Self as SpanQuery>::bind_span(self, searcher, score_mode)?)
762 }
763}
764
765impl SpanQuery for SpanFirstQuery {
766 fn bind_span(
767 &self,
768 searcher: &Searcher,
769 score_mode: ScoreMode,
770 ) -> Result<Box<dyn BoundSpanQuery>> {
771 let inner_weight = self.inner.bind_span(searcher, score_mode)?;
772 Ok(Box::new(BoundSpanFirstQuery {
773 inner_weight,
774 end: self.end,
775 }))
776 }
777}
778
779struct BoundSpanFirstQuery {
780 inner_weight: Box<dyn BoundSpanQuery>,
781 end: u32,
782}
783
784impl BoundQuery for BoundSpanFirstQuery {
785 fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
786 self.inner_weight.span_scorer_supplier(reader, self.end)
792 }
793}
794
795impl BoundSpanQuery for BoundSpanFirstQuery {
796 fn span_scorer_supplier(
797 &self,
798 reader: &SegmentReader,
799 max_end: u32,
800 ) -> Result<Option<Box<dyn ScorerSupplier>>> {
801 self.inner_weight
803 .span_scorer_supplier(reader, max_end.min(self.end))
804 }
805}
806
807pub struct SpanTermQuery {
813 pub field: String,
814 pub value: String,
815}
816
817impl Query for SpanTermQuery {
818 fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
819 Ok(<Self as SpanQuery>::bind_span(self, searcher, score_mode)?)
820 }
821}
822
823impl SpanQuery for SpanTermQuery {
824 fn bind_span(&self, searcher: &Searcher, _: ScoreMode) -> Result<Box<dyn BoundSpanQuery>> {
825 let total_docs = searcher.total_docs();
826 let doc_freq = searcher.doc_freq(&self.field, &self.value);
827 let idf = bm25_idf(total_docs, doc_freq);
828 let avg_field_length = searcher.avg_field_length(&self.field);
829 Ok(Box::new(BoundSpanTermQuery {
830 field: self.field.clone(),
831 value: self.value.clone(),
832 idf,
833 avg_field_length,
834 }))
835 }
836}
837
838struct BoundSpanTermQuery {
839 field: String,
840 value: String,
841 idf: f32,
842 avg_field_length: f32,
843}
844
845impl BoundQuery for BoundSpanTermQuery {
846 fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
847 let field_id = match reader
848 .header()
849 .fields
850 .iter()
851 .find(|f| f.field_name == self.field)
852 .map(|f| f.field_id)
853 {
854 Some(id) => id,
855 None => return Ok(None),
856 };
857 if reader
858 .postings_with_positions(field_id, &self.value)
859 .is_none()
860 {
861 return Ok(None);
862 }
863 Ok(Some(Box::new(SpanTermScorerSupplier {
864 segment: reader as *const SegmentReader,
865 field_id,
866 value: self.value.clone(),
867 idf: self.idf,
868 avg_field_length: self.avg_field_length,
869 })))
870 }
871}
872
873impl BoundSpanQuery for BoundSpanTermQuery {
874 fn span_scorer_supplier(
875 &self,
876 reader: &SegmentReader,
877 max_end: u32,
878 ) -> Result<Option<Box<dyn ScorerSupplier>>> {
879 let field_id = match reader
880 .header()
881 .fields
882 .iter()
883 .find(|f| f.field_name == self.field)
884 .map(|f| f.field_id)
885 {
886 Some(id) => id,
887 None => return Ok(None),
888 };
889 if reader
890 .postings_with_positions(field_id, &self.value)
891 .is_none()
892 {
893 return Ok(None);
894 }
895 Ok(Some(Box::new(FilteredSpanTermScorerSupplier {
896 segment: reader as *const SegmentReader,
897 field_id,
898 value: self.value.clone(),
899 idf: self.idf,
900 avg_field_length: self.avg_field_length,
901 max_end,
902 })))
903 }
904}
905
906struct SpanTermScorerSupplier {
907 segment: *const SegmentReader,
908 field_id: FieldId,
909 value: String,
910 idf: f32,
911 avg_field_length: f32,
912}
913unsafe impl Send for SpanTermScorerSupplier {}
914
915impl ScorerSupplier for SpanTermScorerSupplier {
916 fn cost(&self) -> u64 {
917 1000
918 }
919 fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
920 let reader = unsafe { &*self.segment };
921 let pos_reader = reader
922 .postings_with_positions(self.field_id, &self.value)
923 .unwrap();
924 let norms = reader.norms(self.field_id);
925 let mut spans = TermSpans::new(pos_reader);
926 spans.next_doc(); Ok(Box::new(SimpleSpanScorer {
928 spans,
929 idf: self.idf,
930 avg_field_length: self.avg_field_length,
931 norms,
932 }))
933 }
934}
935
936struct FilteredSpanTermScorerSupplier {
941 segment: *const SegmentReader,
942 field_id: FieldId,
943 value: String,
944 idf: f32,
945 avg_field_length: f32,
946 max_end: u32,
947}
948unsafe impl Send for FilteredSpanTermScorerSupplier {}
949
950impl ScorerSupplier for FilteredSpanTermScorerSupplier {
951 fn cost(&self) -> u64 {
952 1000
953 }
954 fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
955 let reader = unsafe { &*self.segment };
956 let pos_reader = reader
957 .postings_with_positions(self.field_id, &self.value)
958 .unwrap();
959 let norms = reader.norms(self.field_id);
960 let mut inner = TermSpans::new(pos_reader);
961 inner.next_doc();
962 let spans = FilterSpans {
963 inner,
964 max_end: self.max_end,
965 };
966 let mut scorer = FilteredSpanTermScorer {
967 spans,
968 idf: self.idf,
969 avg_field_length: self.avg_field_length,
970 norms,
971 freq: 0.0,
972 };
973 scorer.find_next_matching_doc();
974 Ok(Box::new(scorer))
975 }
976}
977
978struct FilteredSpanTermScorer<'a> {
982 spans: FilterSpans<TermSpans<'a>>,
983 idf: f32,
984 avg_field_length: f32,
985 norms: Option<FieldNormsReader<'a>>,
986 freq: f32,
987}
988
989unsafe impl Send for FilteredSpanTermScorer<'_> {}
990
991impl FilteredSpanTermScorer<'_> {
992 fn find_next_matching_doc(&mut self) -> DocId {
993 loop {
994 if self.spans.doc_id() == NO_MORE_DOCS {
995 self.freq = 0.0;
996 return NO_MORE_DOCS;
997 }
998 let mut freq = 0.0f32;
999 while self.spans.next_start_position() != NO_MORE_POSITIONS {
1000 freq += 1.0;
1001 }
1002 if freq > 0.0 {
1003 self.freq = freq;
1004 return self.spans.doc_id();
1005 }
1006 self.spans.next_doc();
1007 }
1008 }
1009}
1010
1011impl Scorer for FilteredSpanTermScorer<'_> {
1012 fn doc_id(&self) -> DocId {
1013 self.spans.doc_id()
1014 }
1015 fn next(&mut self) -> DocId {
1016 self.spans.next_doc();
1017 self.find_next_matching_doc()
1018 }
1019 fn advance(&mut self, target: DocId) -> DocId {
1020 self.spans.advance_doc(target);
1021 self.find_next_matching_doc()
1022 }
1023 fn score(&mut self) -> f32 {
1024 let dl = self
1025 .norms
1026 .as_ref()
1027 .map(|n| n.norm(self.doc_id()))
1028 .unwrap_or(1.0);
1029 bm25_score(self.idf, self.freq, dl, self.avg_field_length)
1030 }
1031 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
1032 None
1033 }
1034}
1035
1036pub struct SpanNearQuery {
1041 pub field: String,
1042 pub terms: Vec<String>,
1043 pub slop: u32,
1044 pub in_order: bool,
1045}
1046
1047impl Query for SpanNearQuery {
1048 fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
1049 Ok(<Self as SpanQuery>::bind_span(self, searcher, score_mode)?)
1050 }
1051}
1052
1053impl SpanQuery for SpanNearQuery {
1054 fn bind_span(&self, searcher: &Searcher, _: ScoreMode) -> Result<Box<dyn BoundSpanQuery>> {
1055 let total_docs = searcher.total_docs();
1058 let idf: f32 = self
1059 .terms
1060 .iter()
1061 .map(|t| bm25_idf(total_docs, searcher.doc_freq(&self.field, t)))
1062 .sum();
1063 let avg_field_length = searcher.avg_field_length(&self.field);
1064 Ok(Box::new(BoundSpanNearQuery {
1065 field: self.field.clone(),
1066 terms: self.terms.clone(),
1067 slop: self.slop,
1068 in_order: self.in_order,
1069 idf,
1070 avg_field_length,
1071 }))
1072 }
1073}
1074
1075struct BoundSpanNearQuery {
1076 field: String,
1077 terms: Vec<String>,
1078 slop: u32,
1079 in_order: bool,
1080 idf: f32,
1081 avg_field_length: f32,
1082}
1083
1084impl BoundQuery for BoundSpanNearQuery {
1085 fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
1086 let field_id = match reader
1087 .header()
1088 .fields
1089 .iter()
1090 .find(|f| f.field_name == self.field)
1091 .map(|f| f.field_id)
1092 {
1093 Some(id) => id,
1094 None => return Ok(None),
1095 };
1096 for term in &self.terms {
1098 if reader.postings_with_positions(field_id, term).is_none() {
1099 return Ok(None);
1100 }
1101 }
1102 Ok(Some(Box::new(SpanNearScorerSupplier {
1103 segment: reader as *const SegmentReader,
1104 field_id,
1105 terms: self.terms.clone(),
1106 slop: self.slop,
1107 in_order: self.in_order,
1108 idf: self.idf,
1109 avg_field_length: self.avg_field_length,
1110 max_end: None,
1111 })))
1112 }
1113}
1114
1115impl BoundSpanQuery for BoundSpanNearQuery {
1116 fn span_scorer_supplier(
1117 &self,
1118 reader: &SegmentReader,
1119 max_end: u32,
1120 ) -> Result<Option<Box<dyn ScorerSupplier>>> {
1121 let field_id = match reader
1122 .header()
1123 .fields
1124 .iter()
1125 .find(|f| f.field_name == self.field)
1126 .map(|f| f.field_id)
1127 {
1128 Some(id) => id,
1129 None => return Ok(None),
1130 };
1131 for term in &self.terms {
1132 if reader.postings_with_positions(field_id, term).is_none() {
1133 return Ok(None);
1134 }
1135 }
1136 Ok(Some(Box::new(SpanNearScorerSupplier {
1137 segment: reader as *const SegmentReader,
1138 field_id,
1139 terms: self.terms.clone(),
1140 slop: self.slop,
1141 in_order: self.in_order,
1142 idf: self.idf,
1143 avg_field_length: self.avg_field_length,
1144 max_end: Some(max_end),
1145 })))
1146 }
1147}
1148
1149struct SpanNearScorerSupplier {
1150 segment: *const SegmentReader,
1151 field_id: FieldId,
1152 terms: Vec<String>,
1153 slop: u32,
1154 in_order: bool,
1155 idf: f32,
1156 avg_field_length: f32,
1157 max_end: Option<u32>,
1161}
1162unsafe impl Send for SpanNearScorerSupplier {}
1163
1164impl ScorerSupplier for SpanNearScorerSupplier {
1165 fn cost(&self) -> u64 {
1166 1000
1167 }
1168 fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
1169 let reader = unsafe { &*self.segment };
1170 let sub_spans: Vec<TermSpans> = self
1171 .terms
1172 .iter()
1173 .map(|t| TermSpans::new(reader.postings_with_positions(self.field_id, t).unwrap()))
1174 .collect();
1175 let norms = reader.norms(self.field_id);
1176
1177 match (self.in_order, self.max_end) {
1178 (true, None) => {
1179 let mut spans = NearSpansOrdered::new(sub_spans, self.slop);
1180 spans.next_doc();
1181 let mut scorer = TwoPhaseSpanScorer {
1182 spans,
1183 idf: self.idf,
1184 avg_field_length: self.avg_field_length,
1185 norms,
1186 sloppy_freq: 0.0,
1187 };
1188 scorer.find_next_matching_doc();
1189 Ok(Box::new(scorer))
1190 }
1191 (false, None) => {
1192 let mut spans = NearSpansUnordered::new(sub_spans, self.slop);
1193 spans.next_doc();
1194 let mut scorer = TwoPhaseSpanScorerUnordered {
1195 spans,
1196 idf: self.idf,
1197 avg_field_length: self.avg_field_length,
1198 norms,
1199 sloppy_freq: 0.0,
1200 };
1201 scorer.find_next_matching_doc();
1202 Ok(Box::new(scorer))
1203 }
1204 (true, Some(max_end)) => {
1205 let mut inner = NearSpansOrdered::new(sub_spans, self.slop);
1206 inner.next_doc();
1207 let spans = FilterSpans { inner, max_end };
1208 let mut scorer = FilteredNearSpanScorer {
1209 spans,
1210 idf: self.idf,
1211 avg_field_length: self.avg_field_length,
1212 norms,
1213 sloppy_freq: 0.0,
1214 };
1215 scorer.find_next_matching_doc();
1216 Ok(Box::new(scorer))
1217 }
1218 (false, Some(max_end)) => {
1219 let mut inner = NearSpansUnordered::new(sub_spans, self.slop);
1220 inner.next_doc();
1221 let spans = FilterSpans { inner, max_end };
1222 let mut scorer = FilteredNearSpanScorer {
1223 spans,
1224 idf: self.idf,
1225 avg_field_length: self.avg_field_length,
1226 norms,
1227 sloppy_freq: 0.0,
1228 };
1229 scorer.find_next_matching_doc();
1230 Ok(Box::new(scorer))
1231 }
1232 }
1233 }
1234}
1235
1236struct FilteredNearSpanScorer<'a, S: Spans> {
1244 spans: FilterSpans<S>,
1245 idf: f32,
1246 avg_field_length: f32,
1247 norms: Option<FieldNormsReader<'a>>,
1248 sloppy_freq: f32,
1249}
1250
1251unsafe impl<S: Spans> Send for FilteredNearSpanScorer<'_, S> {}
1252
1253impl<S: Spans> FilteredNearSpanScorer<'_, S> {
1254 fn find_next_matching_doc(&mut self) -> DocId {
1255 loop {
1256 if self.spans.doc_id() == NO_MORE_DOCS {
1257 self.sloppy_freq = 0.0;
1258 return NO_MORE_DOCS;
1259 }
1260 let mut freq = 0.0f32;
1261 while self.spans.next_start_position() != NO_MORE_POSITIONS {
1262 freq += 1.0 / (1.0 + self.spans.width() as f32);
1263 }
1264 if freq > 0.0 {
1265 self.sloppy_freq = freq;
1266 return self.spans.doc_id();
1267 }
1268 self.spans.next_doc();
1269 }
1270 }
1271}
1272
1273impl<S: Spans> Scorer for FilteredNearSpanScorer<'_, S> {
1274 fn doc_id(&self) -> DocId {
1275 self.spans.doc_id()
1276 }
1277 fn next(&mut self) -> DocId {
1278 self.spans.next_doc();
1279 self.find_next_matching_doc()
1280 }
1281 fn advance(&mut self, target: DocId) -> DocId {
1282 self.spans.advance_doc(target);
1283 self.find_next_matching_doc()
1284 }
1285 fn score(&mut self) -> f32 {
1286 let dl = self
1287 .norms
1288 .as_ref()
1289 .map(|n| n.norm(self.doc_id()))
1290 .unwrap_or(1.0);
1291 bm25_score(self.idf, self.sloppy_freq, dl, self.avg_field_length)
1292 }
1293 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
1294 None
1295 }
1296}
1297
1298struct SimpleSpanScorer<'a> {
1305 spans: TermSpans<'a>,
1306 idf: f32,
1307 avg_field_length: f32,
1308 norms: Option<FieldNormsReader<'a>>,
1309}
1310
1311unsafe impl Send for SimpleSpanScorer<'_> {}
1312
1313impl Scorer for SimpleSpanScorer<'_> {
1314 fn doc_id(&self) -> DocId {
1315 self.spans.doc_id()
1316 }
1317 fn next(&mut self) -> DocId {
1318 self.spans.next_doc()
1319 }
1320 fn advance(&mut self, target: DocId) -> DocId {
1321 self.spans.advance_doc(target)
1322 }
1323 fn score(&mut self) -> f32 {
1324 let tf = self.spans.current_tf as f32;
1325 let dl = self
1326 .norms
1327 .as_ref()
1328 .map(|n| n.norm(self.doc_id()))
1329 .unwrap_or(1.0);
1330 bm25_score(self.idf, tf, dl, self.avg_field_length)
1331 }
1332 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
1333 None
1334 }
1335}
1336
1337struct TwoPhaseSpanScorer<'a> {
1344 spans: NearSpansOrdered<'a>,
1345 idf: f32,
1346 avg_field_length: f32,
1347 norms: Option<FieldNormsReader<'a>>,
1348 sloppy_freq: f32,
1350}
1351
1352unsafe impl Send for TwoPhaseSpanScorer<'_> {}
1353
1354impl TwoPhaseSpanScorer<'_> {
1355 fn find_next_matching_doc(&mut self) -> DocId {
1358 loop {
1359 if self.spans.current_doc == NO_MORE_DOCS {
1360 self.sloppy_freq = 0.0;
1361 return NO_MORE_DOCS;
1362 }
1363 let mut freq: f32 = 0.0;
1366 while self.spans.next_start_position() != NO_MORE_POSITIONS {
1367 freq += 1.0 / (1.0 + self.spans.width() as f32);
1368 }
1369 if freq > 0.0 {
1370 self.sloppy_freq = freq;
1371 return self.spans.current_doc;
1372 }
1373 self.spans.next_doc();
1374 }
1375 }
1376}
1377
1378impl Scorer for TwoPhaseSpanScorer<'_> {
1379 fn doc_id(&self) -> DocId {
1380 self.spans.doc_id()
1381 }
1382 fn next(&mut self) -> DocId {
1383 self.spans.next_doc();
1384 self.find_next_matching_doc()
1385 }
1386 fn advance(&mut self, target: DocId) -> DocId {
1387 self.spans.advance_doc(target);
1388 self.find_next_matching_doc()
1389 }
1390 fn score(&mut self) -> f32 {
1391 let dl = self
1392 .norms
1393 .as_ref()
1394 .map(|n| n.norm(self.doc_id()))
1395 .unwrap_or(1.0);
1396 bm25_score(self.idf, self.sloppy_freq, dl, self.avg_field_length)
1397 }
1398 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
1399 None
1400 }
1401}
1402
1403struct TwoPhaseSpanScorerUnordered<'a> {
1409 spans: NearSpansUnordered<'a>,
1410 idf: f32,
1411 avg_field_length: f32,
1412 norms: Option<FieldNormsReader<'a>>,
1413 sloppy_freq: f32,
1414}
1415
1416unsafe impl Send for TwoPhaseSpanScorerUnordered<'_> {}
1417
1418impl TwoPhaseSpanScorerUnordered<'_> {
1419 fn find_next_matching_doc(&mut self) -> DocId {
1420 loop {
1421 if self.spans.current_doc == NO_MORE_DOCS {
1422 self.sloppy_freq = 0.0;
1423 return NO_MORE_DOCS;
1424 }
1425 let mut freq: f32 = 0.0;
1426 while self.spans.next_start_position() != NO_MORE_POSITIONS {
1427 freq += 1.0 / (1.0 + self.spans.width() as f32);
1428 }
1429 if freq > 0.0 {
1430 self.sloppy_freq = freq;
1431 return self.spans.current_doc;
1432 }
1433 self.spans.next_doc();
1434 }
1435 }
1436}
1437
1438impl Scorer for TwoPhaseSpanScorerUnordered<'_> {
1439 fn doc_id(&self) -> DocId {
1440 self.spans.doc_id()
1441 }
1442 fn next(&mut self) -> DocId {
1443 self.spans.next_doc();
1444 self.find_next_matching_doc()
1445 }
1446 fn advance(&mut self, target: DocId) -> DocId {
1447 self.spans.advance_doc(target);
1448 self.find_next_matching_doc()
1449 }
1450 fn score(&mut self) -> f32 {
1451 let dl = self
1452 .norms
1453 .as_ref()
1454 .map(|n| n.norm(self.doc_id()))
1455 .unwrap_or(1.0);
1456 bm25_score(self.idf, self.sloppy_freq, dl, self.avg_field_length)
1457 }
1458
1459 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
1460 None
1461 }
1462}
1463
1464#[cfg(test)]
1469mod tests {
1470 use super::*;
1471 use crate::analysis::Token;
1472 use crate::core::SegmentId;
1473 use crate::mapping::{FieldType, Mapping};
1474 use crate::segment::builder::SegmentBuilder;
1475
1476 fn make_tokens(terms: &[&str]) -> Vec<Token> {
1477 terms
1478 .iter()
1479 .enumerate()
1480 .map(|(i, t)| Token::new(*t, 0, t.len(), i as u32))
1481 .collect()
1482 }
1483
1484 fn build_store(docs: &[&[&str]]) -> crate::search::segment_store::SegmentStore {
1485 let schema = Mapping::builder().field("text", FieldType::Text).build();
1486 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
1487 for terms in docs {
1488 builder.add_document(&[(FieldId::new(0), make_tokens(terms))], b"{}");
1489 }
1490 let reader = SegmentReader::open(builder.build()).unwrap();
1491 crate::search::segment_store::SegmentStore::new(
1492 vec![reader],
1493 crate::analysis::AnalyzerRegistry::new(),
1494 None,
1495 None,
1496 )
1497 }
1498
1499 #[test]
1500 fn span_term_basic() {
1501 let store = build_store(&[
1502 &["the", "quick", "brown", "fox"],
1503 &["the", "lazy", "dog"],
1504 &["quick", "fox"],
1505 ]);
1506 let searcher = Searcher::new(&store);
1507 let results = searcher
1508 .search_query(
1509 &SpanTermQuery {
1510 field: "text".into(),
1511 value: "quick".into(),
1512 },
1513 10,
1514 0,
1515 )
1516 .unwrap();
1517 assert_eq!(results.total_hits.value, 2); }
1519
1520 #[test]
1521 fn span_term_missing() {
1522 let store = build_store(&[&["the", "quick"]]);
1523 let searcher = Searcher::new(&store);
1524 let results = searcher
1525 .search_query(
1526 &SpanTermQuery {
1527 field: "text".into(),
1528 value: "nonexistent".into(),
1529 },
1530 10,
1531 0,
1532 )
1533 .unwrap();
1534 assert_eq!(results.total_hits.value, 0);
1535 }
1536
1537 #[test]
1538 fn span_near_exact_phrase() {
1539 let store = build_store(&[
1541 &["the", "quick", "brown", "fox"], &["brown", "quick", "fox"], &["quick", "brown"], ]);
1545 let searcher = Searcher::new(&store);
1546 let results = searcher
1547 .search_query(
1548 &SpanNearQuery {
1549 field: "text".into(),
1550 terms: vec!["quick".into(), "brown".into()],
1551 slop: 0,
1552 in_order: true,
1553 },
1554 10,
1555 0,
1556 )
1557 .unwrap();
1558 assert_eq!(results.total_hits.value, 2); }
1560
1561 #[test]
1562 fn span_near_with_slop() {
1563 let store = build_store(&[
1564 &["quick", "brown", "fox"], &["quick", "fox"], &["quick", "a", "b", "fox"], ]);
1568 let searcher = Searcher::new(&store);
1569 let results = searcher
1570 .search_query(
1571 &SpanNearQuery {
1572 field: "text".into(),
1573 terms: vec!["quick".into(), "fox".into()],
1574 slop: 1,
1575 in_order: true,
1576 },
1577 10,
1578 0,
1579 )
1580 .unwrap();
1581 assert_eq!(results.total_hits.value, 2); }
1583
1584 #[test]
1585 fn span_near_no_match() {
1586 let store = build_store(&[
1587 &["quick", "a", "b", "c", "fox"], ]);
1589 let searcher = Searcher::new(&store);
1590 let results = searcher
1591 .search_query(
1592 &SpanNearQuery {
1593 field: "text".into(),
1594 terms: vec!["quick".into(), "fox".into()],
1595 slop: 1,
1596 in_order: true,
1597 },
1598 10,
1599 0,
1600 )
1601 .unwrap();
1602 assert_eq!(results.total_hits.value, 0);
1603 }
1604
1605 #[test]
1606 fn span_near_three_terms() {
1607 let store = build_store(&[
1608 &["the", "quick", "brown", "fox"], &["quick", "fox", "brown"], ]);
1611 let searcher = Searcher::new(&store);
1612 let results = searcher
1613 .search_query(
1614 &SpanNearQuery {
1615 field: "text".into(),
1616 terms: vec!["quick".into(), "brown".into(), "fox".into()],
1617 slop: 0,
1618 in_order: true,
1619 },
1620 10,
1621 0,
1622 )
1623 .unwrap();
1624 assert_eq!(results.total_hits.value, 1); }
1626
1627 #[test]
1628 fn span_near_wrong_order() {
1629 let store = build_store(&[
1630 &["fox", "quick"], ]);
1632 let searcher = Searcher::new(&store);
1633 let results = searcher
1634 .search_query(
1635 &SpanNearQuery {
1636 field: "text".into(),
1637 terms: vec!["quick".into(), "fox".into()],
1638 slop: 5,
1639 in_order: true,
1640 },
1641 10,
1642 0,
1643 )
1644 .unwrap();
1645 assert_eq!(results.total_hits.value, 0);
1646 }
1647
1648 #[test]
1649 fn span_near_one_term_missing() {
1650 let store = build_store(&[&["quick", "brown"]]);
1651 let searcher = Searcher::new(&store);
1652 let results = searcher
1653 .search_query(
1654 &SpanNearQuery {
1655 field: "text".into(),
1656 terms: vec!["quick".into(), "nonexistent".into()],
1657 slop: 10,
1658 in_order: true,
1659 },
1660 10,
1661 0,
1662 )
1663 .unwrap();
1664 assert_eq!(results.total_hits.value, 0);
1665 }
1666
1667 #[test]
1670 fn span_near_unordered_basic() {
1671 let store = build_store(&[
1672 &["the", "fox", "quick"], &["quick", "a", "b", "c", "fox"], ]);
1675 let searcher = Searcher::new(&store);
1676 let results = searcher
1677 .search_query(
1678 &SpanNearQuery {
1679 field: "text".into(),
1680 terms: vec!["quick".into(), "fox".into()],
1681 slop: 1,
1682 in_order: false,
1683 },
1684 10,
1685 0,
1686 )
1687 .unwrap();
1688 assert_eq!(results.total_hits.value, 1); }
1690
1691 #[test]
1692 fn span_near_unordered_reversed() {
1693 let store = build_store(&[&["fox", "quick"]]);
1695 let searcher = Searcher::new(&store);
1696 let ordered = searcher
1697 .search_query(
1698 &SpanNearQuery {
1699 field: "text".into(),
1700 terms: vec!["quick".into(), "fox".into()],
1701 slop: 1,
1702 in_order: true,
1703 },
1704 10,
1705 0,
1706 )
1707 .unwrap();
1708 assert_eq!(ordered.total_hits.value, 0); let unordered = searcher
1711 .search_query(
1712 &SpanNearQuery {
1713 field: "text".into(),
1714 terms: vec!["quick".into(), "fox".into()],
1715 slop: 0,
1716 in_order: false,
1717 },
1718 10,
1719 0,
1720 )
1721 .unwrap();
1722 assert_eq!(unordered.total_hits.value, 1); }
1724
1725 #[test]
1728 fn span_not_basic() {
1729 let store = build_store(&[
1730 &["quick", "fox"], &["quick", "brown"], &["slow", "dog"], ]);
1734 let searcher = Searcher::new(&store);
1735 let results = searcher
1736 .search_query(
1737 &SpanNotQuery {
1738 include: Box::new(SpanTermQuery {
1739 field: "text".into(),
1740 value: "quick".into(),
1741 }),
1742 exclude: Box::new(SpanTermQuery {
1743 field: "text".into(),
1744 value: "fox".into(),
1745 }),
1746 },
1747 10,
1748 0,
1749 )
1750 .unwrap();
1751 assert_eq!(results.total_hits.value, 1); }
1753
1754 #[test]
1755 fn span_not_no_exclusions() {
1756 let store = build_store(&[&["quick", "fox"], &["quick", "brown"]]);
1757 let searcher = Searcher::new(&store);
1758 let results = searcher
1759 .search_query(
1760 &SpanNotQuery {
1761 include: Box::new(SpanTermQuery {
1762 field: "text".into(),
1763 value: "quick".into(),
1764 }),
1765 exclude: Box::new(SpanTermQuery {
1766 field: "text".into(),
1767 value: "nonexistent".into(),
1768 }),
1769 },
1770 10,
1771 0,
1772 )
1773 .unwrap();
1774 assert_eq!(results.total_hits.value, 2); }
1776
1777 #[test]
1783 fn span_term_score_uses_bm25_tf() {
1784 let store = build_store(&[
1785 &["search", "engine", "search"], &["search", "tools"], ]);
1788 let searcher = Searcher::new(&store);
1789 let query = SpanTermQuery {
1790 field: "text".into(),
1791 value: "search".into(),
1792 };
1793
1794 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
1795 let supplier = weight
1796 .scorer_supplier(&searcher.segments()[0])
1797 .unwrap()
1798 .unwrap();
1799 let mut scorer = supplier.scorer().unwrap();
1800
1801 assert_eq!(scorer.doc_id(), DocId::new(0));
1803 let doc0_score = scorer.score();
1804 scorer.next();
1805 assert_eq!(scorer.doc_id(), DocId::new(1));
1806 let doc1_score = scorer.score();
1807
1808 assert!(
1809 doc0_score > doc1_score,
1810 "doc with tf=2 ({doc0_score}) must score higher than doc with tf=1 \
1811 ({doc1_score}) — span_term must use BM25 TF, not hardcoded 1.0"
1812 );
1813 }
1814
1815 #[test]
1817 fn span_term_score_matches_term_query() {
1818 let store = build_store(&[&["search", "engine", "search"], &["search", "tools"]]);
1819 let searcher = Searcher::new(&store);
1820
1821 let span_query = SpanTermQuery {
1822 field: "text".into(),
1823 value: "search".into(),
1824 };
1825 let term_query = crate::query::term::TermQuery {
1826 field: "text".into(),
1827 value: "search".into(),
1828 };
1829
1830 let span_weight = span_query.bind(&searcher, ScoreMode::Complete).unwrap();
1831 let span_supplier = span_weight
1832 .scorer_supplier(&searcher.segments()[0])
1833 .unwrap()
1834 .unwrap();
1835 let mut span_scorer = span_supplier.scorer().unwrap();
1836
1837 let term_weight = term_query.bind(&searcher, ScoreMode::Complete).unwrap();
1838 let term_supplier = term_weight
1839 .scorer_supplier(&searcher.segments()[0])
1840 .unwrap()
1841 .unwrap();
1842 let mut term_scorer = term_supplier.scorer().unwrap();
1843
1844 for _ in 0..2 {
1846 assert_eq!(span_scorer.doc_id(), term_scorer.doc_id());
1847 let span_score = span_scorer.score();
1848 let term_score = term_scorer.score();
1849 assert!(
1850 (span_score - term_score).abs() < 1e-5,
1851 "span_term score ({span_score}) must equal term query score ({term_score}) \
1852 for doc {:?}",
1853 span_scorer.doc_id()
1854 );
1855 span_scorer.next();
1856 term_scorer.next();
1857 }
1858 }
1859
1860 #[test]
1867 fn span_near_sloppy_freq_penalizes_width() {
1868 let store = build_store(&[
1873 &["quick", "brown", "a", "b", "c"],
1874 &["quick", "a", "b", "brown", "c"],
1875 ]);
1876 let searcher = Searcher::new(&store);
1877 let query = SpanNearQuery {
1878 field: "text".into(),
1879 terms: vec!["quick".into(), "brown".into()],
1880 slop: 5,
1881 in_order: true,
1882 };
1883
1884 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
1885 let supplier = weight
1886 .scorer_supplier(&searcher.segments()[0])
1887 .unwrap()
1888 .unwrap();
1889 let mut scorer = supplier.scorer().unwrap();
1890
1891 assert_eq!(scorer.doc_id(), DocId::new(0));
1893 let exact_score = scorer.score();
1894 scorer.next();
1895 assert_eq!(scorer.doc_id(), DocId::new(1));
1896 let sloppy_score = scorer.score();
1897
1898 assert!(
1901 exact_score > sloppy_score,
1902 "exact match ({exact_score}) must score higher than sloppy match ({sloppy_score}) — \
1903 sloppy frequency must penalize width"
1904 );
1905 }
1906
1907 #[test]
1909 fn span_near_score_uses_bm25() {
1910 let store = build_store(&[
1914 &["quick", "brown", "and", "quick", "brown", "fox"],
1915 &["quick", "brown", "fox", "and", "lazy", "dog"],
1916 ]);
1917 let searcher = Searcher::new(&store);
1918 let query = SpanNearQuery {
1919 field: "text".into(),
1920 terms: vec!["quick".into(), "brown".into()],
1921 slop: 0,
1922 in_order: true,
1923 };
1924
1925 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
1926 let supplier = weight
1927 .scorer_supplier(&searcher.segments()[0])
1928 .unwrap()
1929 .unwrap();
1930 let mut scorer = supplier.scorer().unwrap();
1931
1932 assert_eq!(scorer.doc_id(), DocId::new(0));
1933 let doc0_score = scorer.score();
1934 scorer.next();
1935 assert_eq!(scorer.doc_id(), DocId::new(1));
1936 let doc1_score = scorer.score();
1937
1938 assert_ne!(doc0_score, 1.0, "span_near score must not be hardcoded 1.0");
1939 assert!(
1940 doc0_score > doc1_score,
1941 "doc with 2 near matches ({doc0_score}) must score higher than \
1942 doc with 1 near match ({doc1_score})"
1943 );
1944 }
1945
1946 #[test]
1949 fn span_not_delegates_score() {
1950 let store = build_store(&[
1951 &["search", "engine", "search"], &["search", "tools"], ]);
1954 let searcher = Searcher::new(&store);
1955 let query = SpanNotQuery {
1956 include: Box::new(SpanTermQuery {
1957 field: "text".into(),
1958 value: "search".into(),
1959 }),
1960 exclude: Box::new(SpanTermQuery {
1961 field: "text".into(),
1962 value: "lazy".into(),
1963 }),
1964 };
1965
1966 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
1967 let supplier = weight
1968 .scorer_supplier(&searcher.segments()[0])
1969 .unwrap()
1970 .unwrap();
1971 let mut scorer = supplier.scorer().unwrap();
1972
1973 assert_eq!(scorer.doc_id(), DocId::new(0));
1975 let doc0_score = scorer.score();
1976 scorer.next();
1977 assert_eq!(scorer.doc_id(), DocId::new(1));
1978 let doc1_score = scorer.score();
1979
1980 assert_ne!(doc0_score, 1.0, "span_not score must not be hardcoded 1.0");
1981 assert!(
1982 doc0_score > doc1_score,
1983 "span_not must delegate to include score: doc0 ({doc0_score}) should > doc1 ({doc1_score})"
1984 );
1985 }
1986}