1use crate::core::{DocId, FieldId, NO_MORE_DOCS, Result, ScoreMode, Scorer, TwoPhaseIterator};
11
12use crate::query::term::TermQuery;
13use crate::query::{BoundQuery, Query, ScorerSupplier};
14use crate::search::bm25::{bm25_idf, bm25_score};
15use crate::search::searcher::Searcher;
16use crate::segment::reader::SegmentReader;
17
18pub struct MatchPhraseQuery {
19 pub field: String,
20 pub query_text: String,
21 pub analyzer: Option<String>,
22}
23
24impl Query for MatchPhraseQuery {
25 fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
26 let analyzer_name = searcher.resolve_search_analyzer(&self.field, self.analyzer.as_deref());
27 let analyzer = searcher.analyzers().get(analyzer_name);
28 let tokens = analyzer.analyze(&self.query_text);
29
30 if tokens.is_empty() {
31 return Ok(Box::new(BoundEmptyQuery));
32 }
33
34 if tokens.len() == 1 {
35 let tq = TermQuery {
36 field: self.field.clone(),
37 value: tokens[0].text.clone(),
38 };
39 return tq.bind(searcher, score_mode);
40 }
41
42 let terms: Vec<String> = tokens.iter().map(|t| t.text.clone()).collect();
43
44 Ok(Box::new(BoundPhraseQuery {
45 field: self.field.clone(),
46 terms,
47 total_docs: searcher.total_docs(),
48 avg_field_length: searcher.avg_field_length(&self.field),
49 }))
50 }
51}
52
53struct BoundPhraseQuery {
54 field: String,
55 terms: Vec<String>,
56 total_docs: u32,
57 avg_field_length: f32,
58}
59
60impl BoundQuery for BoundPhraseQuery {
61 fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
62 let field_id = match reader
63 .header()
64 .fields
65 .iter()
66 .find(|f| f.field_name == self.field)
67 .map(|f| f.field_id)
68 {
69 Some(id) => id,
70 None => return Ok(None),
71 };
72
73 let mut term_doc_freqs = Vec::new();
75 for term in &self.terms {
76 let df = reader.doc_freq(field_id, term);
77 if df == 0 {
78 return Ok(None);
79 }
80 term_doc_freqs.push(df);
81 }
82
83 let cost = *term_doc_freqs.iter().min().unwrap() as u64;
84
85 Ok(Some(Box::new(PhraseScorerSupplier {
86 field_id,
87 terms: self.terms.clone(),
88 term_doc_freqs,
89 total_docs: self.total_docs,
90 avg_field_length: self.avg_field_length,
91 cost,
92 segment_data: reader as *const SegmentReader,
93 })))
94 }
95}
96
97struct PhraseScorerSupplier {
98 field_id: FieldId,
99 terms: Vec<String>,
100 term_doc_freqs: Vec<u32>,
101 total_docs: u32,
102 avg_field_length: f32,
103 cost: u64,
104 segment_data: *const SegmentReader,
105}
106
107unsafe impl Send for PhraseScorerSupplier {}
108
109impl ScorerSupplier for PhraseScorerSupplier {
110 fn cost(&self) -> u64 {
111 self.cost
112 }
113
114 fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
115 let reader = unsafe { &*self.segment_data };
116
117 let mut position_readers: Vec<crate::inverted::postings::PositionPostingListReader> =
121 Vec::new();
122 for term in &self.terms {
123 match reader.postings_with_positions(self.field_id, term) {
124 Some(r) => position_readers.push(r),
125 None => return Ok(Box::new(SimpleIterScorer::empty())),
126 }
127 }
128
129 let mut term_order: Vec<usize> = (0..self.terms.len()).collect();
132 term_order.sort_by_key(|&i| self.term_doc_freqs[i]);
133
134 let sorted_readers: Vec<_> = term_order
135 .iter()
136 .map(|&i| {
137 std::mem::replace(
138 &mut position_readers[i],
139 crate::inverted::postings::PositionPostingListReader::new(&[]),
140 )
141 })
142 .collect();
143 let sorted_offsets: Vec<u32> = term_order.iter().map(|&i| i as u32).collect();
144
145 let idf: f32 = self
147 .term_doc_freqs
148 .iter()
149 .map(|&df| bm25_idf(self.total_docs, df))
150 .sum();
151
152 let reader_state: Vec<(u32, Vec<u32>)> = sorted_readers
154 .iter()
155 .map(|_| (u32::MAX, Vec::new()))
156 .collect();
157
158 let mut scorer = PhraseScorer {
159 readers: sorted_readers,
160 term_offsets: sorted_offsets,
161 reader_state,
162 current: NO_MORE_DOCS,
163 idf,
164 avg_field_length: self.avg_field_length,
165 norms: reader.norms(self.field_id),
166 constant_score: reader
167 .norms(self.field_id)
168 .and_then(|n| n.uniform_norm())
169 .map(|dl| bm25_score(idf, 1.0, dl, self.avg_field_length)),
170 ptrs_buf: Vec::new(),
171 phrase_freq: 0,
172 };
173
174 scorer.advance_to_next_phrase();
176
177 Ok(Box::new(scorer))
178 }
179}
180
181struct PhraseScorer<'a> {
189 readers: Vec<crate::inverted::postings::PositionPostingListReader<'a>>,
191 term_offsets: Vec<u32>,
195 reader_state: Vec<(u32, Vec<u32>)>,
197 current: DocId,
199 idf: f32,
200 avg_field_length: f32,
201 norms: Option<crate::inverted::norms::FieldNormsReader<'a>>,
203 constant_score: Option<f32>,
205 ptrs_buf: Vec<usize>,
207 phrase_freq: u32,
211}
212
213unsafe impl Send for PhraseScorer<'_> {}
216
217impl PhraseScorer<'_> {
218 fn advance_to_next_phrase(&mut self) {
223 let num_readers = self.readers.len();
224 if num_readers == 0 {
225 self.current = NO_MORE_DOCS;
226 return;
227 }
228
229 if num_readers == 2 {
230 self.advance_two_term_phrase();
231 return;
232 }
233
234 let mut lead_doc =
241 match self.readers[0].advance(DocId::new(if self.current == NO_MORE_DOCS {
242 0
243 } else {
244 self.current.as_u32() + 1
245 })) {
246 Some(id) => id,
247 None => {
248 self.current = NO_MORE_DOCS;
249 return;
250 }
251 };
252
253 loop {
254 'align: loop {
258 let target = lead_doc.as_u32();
259 let mut aligned = true;
260
261 for i in 1..num_readers {
262 match self.readers[i].advance(lead_doc) {
263 Some(id) if id.as_u32() == target => {}
264 Some(id) => match self.readers[0].advance(id) {
265 Some(new_lead) => {
266 lead_doc = new_lead;
267 aligned = false;
268 break;
269 }
270 None => {
271 self.current = NO_MORE_DOCS;
272 return;
273 }
274 },
275 None => {
276 self.current = NO_MORE_DOCS;
277 return;
278 }
279 }
280 }
281
282 if aligned {
283 break 'align;
284 }
285 }
286
287 let freq = if num_readers == 1 {
289 1
290 } else {
291 self.count_phrase_positions(lead_doc)
292 };
293 if freq > 0 {
294 self.phrase_freq = freq;
295 self.current = lead_doc;
296 return;
297 }
298
299 self.current = lead_doc;
301 lead_doc = match self.readers[0].advance(DocId::new(lead_doc.as_u32() + 1)) {
302 Some(id) => id,
303 None => {
304 self.current = NO_MORE_DOCS;
305 return;
306 }
307 };
308 }
309 }
310
311 fn advance_two_term_phrase(&mut self) {
316 let off0 = self.term_offsets[0];
317 let off1 = self.term_offsets[1];
318
319 let mut lead_doc =
323 match self.readers[0].advance(DocId::new(if self.current == NO_MORE_DOCS {
324 0
325 } else {
326 self.current.as_u32() + 1
327 })) {
328 Some(id) => id,
329 None => {
330 self.current = NO_MORE_DOCS;
331 return;
332 }
333 };
334
335 loop {
336 loop {
338 match self.readers[1].advance(lead_doc) {
339 Some(id) if id == lead_doc => break,
340 Some(id) => match self.readers[0].advance(id) {
341 Some(new_lead) => {
342 lead_doc = new_lead;
343 }
344 None => {
345 self.current = NO_MORE_DOCS;
346 return;
347 }
348 },
349 None => {
350 self.current = NO_MORE_DOCS;
351 return;
352 }
353 }
354 }
355
356 let tf0 = self.readers[0].current_tf();
358 let tf1 = self.readers[1].current_tf();
359
360 if tf0 == 1 && tf1 == 1 {
361 let pos0 = self.readers[0].first_position();
363 let pos1 = self.readers[1].first_position();
364 if pos0.wrapping_sub(off0) == pos1.wrapping_sub(off1) {
365 self.phrase_freq = 1;
366 self.current = lead_doc;
367 return;
368 }
369 } else {
370 self.reader_state[0].0 = lead_doc.as_u32();
371 self.reader_state[0].1.clear();
372 self.reader_state[0]
373 .1
374 .extend_from_slice(self.readers[0].positions());
375 self.reader_state[1].0 = lead_doc.as_u32();
376 self.reader_state[1].1.clear();
377 self.reader_state[1]
378 .1
379 .extend_from_slice(self.readers[1].positions());
380
381 let freq = self.count_positions();
382 if freq > 0 {
383 self.phrase_freq = freq;
384 self.current = lead_doc;
385 return;
386 }
387 }
388
389 self.current = lead_doc;
391 lead_doc = match self.readers[0].next_doc() {
392 Some(id) => id,
393 None => {
394 self.current = NO_MORE_DOCS;
395 return;
396 }
397 };
398 }
399 }
400
401 fn count_phrase_positions(&mut self, lead_doc: DocId) -> u32 {
406 let num_readers = self.readers.len();
407
408 if self.readers.iter().all(|r| r.current_tf() == 1) {
410 let base = self.readers[0]
411 .first_position()
412 .wrapping_sub(self.term_offsets[0]);
413 let aligned = (1..num_readers).all(|i| {
414 self.readers[i]
415 .first_position()
416 .wrapping_sub(self.term_offsets[i])
417 == base
418 });
419 return if aligned { 1 } else { 0 };
420 }
421
422 let target = lead_doc.as_u32();
424 for i in 0..num_readers {
425 self.reader_state[i].0 = target;
426 self.reader_state[i].1.clear();
427 self.reader_state[i]
428 .1
429 .extend_from_slice(self.readers[i].positions());
430 }
431 self.count_positions()
432 }
433
434 fn count_positions(&mut self) -> u32 {
439 let num = self.readers.len();
440 self.ptrs_buf.clear();
441 self.ptrs_buf.resize(num, 0);
442
443 let anchor = self
445 .term_offsets
446 .iter()
447 .enumerate()
448 .min_by_key(|(_, off)| *off)
449 .map(|(i, _)| i)
450 .unwrap();
451 let anchor_offset = self.term_offsets[anchor];
452 let anchor_positions = &self.reader_state[anchor].1;
453
454 let mut count: u32 = 0;
455
456 for &anchor_pos in anchor_positions.iter() {
457 let start = anchor_pos - anchor_offset; let mut matched = true;
459
460 for i in 0..num {
461 if i == anchor {
462 continue;
463 }
464 let expected = start + self.term_offsets[i];
465 let positions = &self.reader_state[i].1;
466
467 while self.ptrs_buf[i] < positions.len() && positions[self.ptrs_buf[i]] < expected {
468 self.ptrs_buf[i] += 1;
469 }
470
471 if self.ptrs_buf[i] >= positions.len() || positions[self.ptrs_buf[i]] != expected {
472 matched = false;
473 break;
474 }
475 }
476
477 if matched {
478 count += 1;
479 }
480 }
481 count
482 }
483}
484
485impl Scorer for PhraseScorer<'_> {
486 fn doc_id(&self) -> DocId {
487 self.current
488 }
489
490 fn next(&mut self) -> DocId {
491 self.advance_to_next_phrase();
492 self.current
493 }
494
495 fn advance(&mut self, target: DocId) -> DocId {
496 if self.current < target {
497 self.current = DocId::new(target.as_u32().saturating_sub(1));
498 }
499 self.advance_to_next_phrase();
500 self.current
501 }
502
503 fn score(&mut self) -> f32 {
504 if self.phrase_freq <= 1 {
507 if let Some(cs) = self.constant_score {
508 return cs;
509 }
510 }
511 let dl = self
512 .norms
513 .as_ref()
514 .map(|n| n.norm(self.doc_id()))
515 .unwrap_or(1.0);
516 bm25_score(self.idf, self.phrase_freq as f32, dl, self.avg_field_length)
517 }
518
519 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
520 None
521 }
522}
523
524struct SimpleIterScorer<'a> {
525 postings: crate::inverted::postings::PostingListReader<'a>,
526 current: DocId,
527}
528
529impl<'a> SimpleIterScorer<'a> {
530 fn empty() -> Self {
531 Self {
532 postings: crate::inverted::postings::PostingListReader::new(&[0, 0, 0, 0, 0]),
533 current: NO_MORE_DOCS,
534 }
535 }
536}
537
538impl Scorer for SimpleIterScorer<'_> {
539 fn doc_id(&self) -> DocId {
540 self.current
541 }
542 fn next(&mut self) -> DocId {
543 self.current = match self.postings.next() {
544 Some((id, _)) => id,
545 None => NO_MORE_DOCS,
546 };
547 self.current
548 }
549 fn advance(&mut self, target: DocId) -> DocId {
550 while self.current < target && self.current != NO_MORE_DOCS {
551 self.next();
552 }
553 self.current
554 }
555 fn score(&mut self) -> f32 {
556 1.0
557 }
558 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
559 None
560 }
561}
562
563struct BoundEmptyQuery;
564impl BoundQuery for BoundEmptyQuery {
565 fn scorer_supplier(&self, _: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
566 Ok(None)
567 }
568}
569
570#[cfg(test)]
571mod tests {
572 use super::*;
573 use crate::analysis::{AnalyzerRegistry, Token};
574 use crate::core::SegmentId;
575 use crate::mapping::{FieldType, Mapping};
576 use crate::segment::builder::SegmentBuilder;
577
578 fn make_tokens(terms: &[&str]) -> Vec<Token> {
579 terms
580 .iter()
581 .enumerate()
582 .map(|(i, t)| Token::new(*t, 0, t.len(), i as u32))
583 .collect()
584 }
585
586 fn build_phrase_store() -> crate::search::segment_store::SegmentStore {
587 let schema = Mapping::builder().field("body", FieldType::Text).build();
588 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
589
590 builder.add_document(
592 &[(
593 FieldId::new(0),
594 make_tokens(&["the", "quick", "brown", "fox"]),
595 )],
596 br#"{"body":"the quick brown fox"}"#,
597 );
598
599 builder.add_document(
601 &[(
602 FieldId::new(0),
603 make_tokens(&["the", "brown", "quick", "fox"]),
604 )],
605 br#"{"body":"the brown quick fox"}"#,
606 );
607
608 builder.add_document(
610 &[(FieldId::new(0), make_tokens(&["quick", "fox", "brown"]))],
611 br#"{"body":"quick fox brown"}"#,
612 );
613
614 let reader = SegmentReader::open(builder.build()).unwrap();
615 crate::search::segment_store::SegmentStore::new(
616 vec![reader],
617 AnalyzerRegistry::new(),
618 None,
619 None,
620 )
621 }
622
623 #[test]
624 fn phrase_exact_match() {
625 let store = build_phrase_store();
626 let searcher = Searcher::new(&store);
627 let query = MatchPhraseQuery {
628 field: "body".into(),
629 query_text: "quick brown".into(),
630 analyzer: None,
631 };
632
633 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
634 let supplier = weight
635 .scorer_supplier(&searcher.segments()[0])
636 .unwrap()
637 .unwrap();
638 let mut scorer = supplier.scorer().unwrap();
639
640 assert_eq!(scorer.doc_id(), DocId::new(0));
642 assert_eq!(scorer.next(), NO_MORE_DOCS);
643 }
644
645 #[test]
646 fn phrase_wrong_order_no_match() {
647 let store = build_phrase_store();
648 let searcher = Searcher::new(&store);
649 let query = MatchPhraseQuery {
650 field: "body".into(),
651 query_text: "brown quick".into(),
652 analyzer: None,
653 };
654
655 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
656 let supplier = weight
657 .scorer_supplier(&searcher.segments()[0])
658 .unwrap()
659 .unwrap();
660 let mut scorer = supplier.scorer().unwrap();
661
662 assert_eq!(scorer.doc_id(), DocId::new(1));
664 assert_eq!(scorer.next(), NO_MORE_DOCS);
665 }
666
667 #[test]
668 fn phrase_no_match() {
669 let store = build_phrase_store();
670 let searcher = Searcher::new(&store);
671 let query = MatchPhraseQuery {
672 field: "body".into(),
673 query_text: "fox quick".into(), analyzer: None,
675 };
676
677 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
678 let supplier = weight
679 .scorer_supplier(&searcher.segments()[0])
680 .unwrap()
681 .unwrap();
682 let scorer = supplier.scorer().unwrap();
683
684 assert_eq!(scorer.doc_id(), NO_MORE_DOCS);
693 }
694
695 #[test]
696 fn phrase_single_term_degenerates() {
697 let store = build_phrase_store();
698 let searcher = Searcher::new(&store);
699 let query = MatchPhraseQuery {
700 field: "body".into(),
701 query_text: "quick".into(),
702 analyzer: None,
703 };
704
705 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
706 let supplier = weight
707 .scorer_supplier(&searcher.segments()[0])
708 .unwrap()
709 .unwrap();
710 let mut scorer = supplier.scorer().unwrap();
711
712 let mut ids = Vec::new();
714 while scorer.doc_id() != NO_MORE_DOCS {
715 ids.push(scorer.doc_id().as_u32());
716 scorer.next();
717 }
718 assert_eq!(ids, vec![0, 1, 2]);
719 }
720
721 #[test]
722 fn phrase_three_terms() {
723 let store = build_phrase_store();
724 let searcher = Searcher::new(&store);
725 let query = MatchPhraseQuery {
726 field: "body".into(),
727 query_text: "the quick brown".into(),
728 analyzer: None,
729 };
730
731 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
732 let supplier = weight
733 .scorer_supplier(&searcher.segments()[0])
734 .unwrap()
735 .unwrap();
736 let mut scorer = supplier.scorer().unwrap();
737
738 assert_eq!(scorer.doc_id(), DocId::new(0));
740 assert_eq!(scorer.next(), NO_MORE_DOCS);
741 }
742
743 #[test]
744 fn phrase_has_positive_score() {
745 let store = build_phrase_store();
746 let searcher = Searcher::new(&store);
747 let query = MatchPhraseQuery {
748 field: "body".into(),
749 query_text: "quick brown".into(),
750 analyzer: None,
751 };
752
753 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
754 let supplier = weight
755 .scorer_supplier(&searcher.segments()[0])
756 .unwrap()
757 .unwrap();
758 let mut scorer = supplier.scorer().unwrap();
759
760 assert_eq!(scorer.doc_id(), DocId::new(0));
761 let score = scorer.score();
762 assert!(score > 0.0, "phrase score should be positive, got {score}");
763 }
764
765 #[test]
771 fn phrase_2term_catchup_not_skipped() {
772 let schema = Mapping::builder().field("body", FieldType::Text).build();
773 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
774
775 builder.add_document(
785 &[(FieldId::new(0), make_tokens(&["alpha", "gamma"]))],
786 b"{}",
787 );
788 builder.add_document(&[(FieldId::new(0), make_tokens(&["beta", "delta"]))], b"{}");
790 builder.add_document(
792 &[(FieldId::new(0), make_tokens(&["beta", "epsilon"]))],
793 b"{}",
794 );
795 builder.add_document(&[(FieldId::new(0), make_tokens(&["alpha", "beta"]))], b"{}");
797 builder.add_document(&[(FieldId::new(0), make_tokens(&["beta", "zeta"]))], b"{}");
799 builder.add_document(&[(FieldId::new(0), make_tokens(&["alpha", "beta"]))], b"{}");
801
802 let reader = SegmentReader::open(builder.build()).unwrap();
803 let store = crate::search::segment_store::SegmentStore::new(
804 vec![reader],
805 AnalyzerRegistry::new(),
806 None,
807 None,
808 );
809 let searcher = Searcher::new(&store);
810 let query = MatchPhraseQuery {
811 field: "body".into(),
812 query_text: "alpha beta".into(),
813 analyzer: None,
814 };
815
816 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
817 let supplier = weight
818 .scorer_supplier(&searcher.segments()[0])
819 .unwrap()
820 .unwrap();
821 let mut scorer = supplier.scorer().unwrap();
822
823 assert_eq!(
825 scorer.doc_id(),
826 DocId::new(3),
827 "doc 3 must not be skipped after catch-up"
828 );
829 assert_eq!(scorer.next(), DocId::new(5));
830 assert_eq!(scorer.next(), NO_MORE_DOCS);
831 }
832
833 #[test]
840 fn phrase_nterm_catchup_not_skipped() {
841 let schema = Mapping::builder().field("body", FieldType::Text).build();
842 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
843
844 builder.add_document(
862 &[(FieldId::new(0), make_tokens(&["alpha", "gamma", "delta"]))],
863 b"{}",
864 );
865 builder.add_document(
867 &[(FieldId::new(0), make_tokens(&["beta", "delta", "epsilon"]))],
868 b"{}",
869 );
870 builder.add_document(
872 &[(FieldId::new(0), make_tokens(&["beta", "epsilon", "zeta"]))],
873 b"{}",
874 );
875 builder.add_document(
877 &[(FieldId::new(0), make_tokens(&["alpha", "beta", "gamma"]))],
878 b"{}",
879 );
880 builder.add_document(
882 &[(FieldId::new(0), make_tokens(&["beta", "eta", "theta"]))],
883 b"{}",
884 );
885 builder.add_document(
887 &[(FieldId::new(0), make_tokens(&["alpha", "beta", "gamma"]))],
888 b"{}",
889 );
890
891 let reader = SegmentReader::open(builder.build()).unwrap();
892 let store = crate::search::segment_store::SegmentStore::new(
893 vec![reader],
894 AnalyzerRegistry::new(),
895 None,
896 None,
897 );
898 let searcher = Searcher::new(&store);
899 let query = MatchPhraseQuery {
900 field: "body".into(),
901 query_text: "alpha beta gamma".into(),
902 analyzer: None,
903 };
904
905 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
906 let supplier = weight
907 .scorer_supplier(&searcher.segments()[0])
908 .unwrap()
909 .unwrap();
910 let mut scorer = supplier.scorer().unwrap();
911
912 assert_eq!(
914 scorer.doc_id(),
915 DocId::new(3),
916 "N-term: doc 3 must not be skipped after catch-up"
917 );
918 assert_eq!(scorer.next(), DocId::new(5));
919 assert_eq!(scorer.next(), NO_MORE_DOCS);
920 }
921
922 #[test]
927 fn phrase_advance_respects_target() {
928 let schema = Mapping::builder().field("body", FieldType::Text).build();
929 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
930
931 builder.add_document(&[(FieldId::new(0), make_tokens(&["alpha", "beta"]))], b"{}");
934 for _ in 1..5 {
936 builder.add_document(&[(FieldId::new(0), make_tokens(&["gamma"]))], b"{}");
937 }
938 builder.add_document(&[(FieldId::new(0), make_tokens(&["alpha", "beta"]))], b"{}");
940 for _ in 6..10 {
942 builder.add_document(&[(FieldId::new(0), make_tokens(&["gamma"]))], b"{}");
943 }
944 builder.add_document(&[(FieldId::new(0), make_tokens(&["alpha", "beta"]))], b"{}");
946
947 let reader = SegmentReader::open(builder.build()).unwrap();
948 let store = crate::search::segment_store::SegmentStore::new(
949 vec![reader],
950 AnalyzerRegistry::new(),
951 None,
952 None,
953 );
954 let searcher = Searcher::new(&store);
955 let query = MatchPhraseQuery {
956 field: "body".into(),
957 query_text: "alpha beta".into(),
958 analyzer: None,
959 };
960
961 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
962 let supplier = weight
963 .scorer_supplier(&searcher.segments()[0])
964 .unwrap()
965 .unwrap();
966 let mut scorer = supplier.scorer().unwrap();
967
968 assert_eq!(scorer.doc_id(), DocId::new(0));
970
971 let result = scorer.advance(DocId::new(7));
973 assert!(
974 result >= DocId::new(7),
975 "advance(7) returned {result:?}, expected >= DocId(7)"
976 );
977 assert_eq!(result, DocId::new(10));
978 }
979
980 #[test]
987 fn phrase_freq_repeated_phrase_scores_higher() {
988 let schema = Mapping::builder().field("body", FieldType::Text).build();
989 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
990
991 builder.add_document(
994 &[(
995 FieldId::new(0),
996 make_tokens(&["alpha", "beta", "alpha", "beta", "alpha", "beta"]),
997 )],
998 b"{}",
999 );
1000 builder.add_document(
1003 &[(
1004 FieldId::new(0),
1005 make_tokens(&["alpha", "beta", "gamma", "delta", "epsilon", "zeta"]),
1006 )],
1007 b"{}",
1008 );
1009
1010 let reader = SegmentReader::open(builder.build()).unwrap();
1011 let store = crate::search::segment_store::SegmentStore::new(
1012 vec![reader],
1013 AnalyzerRegistry::new(),
1014 None,
1015 None,
1016 );
1017 let searcher = Searcher::new(&store);
1018 let query = MatchPhraseQuery {
1019 field: "body".into(),
1020 query_text: "alpha beta".into(),
1021 analyzer: None,
1022 };
1023
1024 let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
1025 let supplier = weight
1026 .scorer_supplier(&searcher.segments()[0])
1027 .unwrap()
1028 .unwrap();
1029 let mut scorer = supplier.scorer().unwrap();
1030
1031 assert_eq!(scorer.doc_id(), DocId::new(0));
1033 let doc0_score = scorer.score();
1034 scorer.next();
1035 assert_eq!(scorer.doc_id(), DocId::new(1));
1036 let doc1_score = scorer.score();
1037
1038 assert!(
1042 doc0_score > doc1_score,
1043 "doc with 3 phrase occurrences ({doc0_score}) must score higher than \
1044 doc with 1 occurrence ({doc1_score}) — phrase TF must be counted"
1045 );
1046 }
1047}