summavy/query/phrase_query/
phrase_query.rs

1use async_trait::async_trait;
2
3use super::PhraseWeight;
4use crate::query::bm25::Bm25Weight;
5use crate::query::{EnableScoring, Query, Weight};
6use crate::schema::{Field, IndexRecordOption, Term};
7
8/// `PhraseQuery` matches a specific sequence of words.
9///
10/// For instance the phrase query for `"part time"` will match
11/// the sentence
12///
13/// **Alan just got a part time job.**
14///
15/// On the other hand it will not match the sentence.
16///
17/// **This is my favorite part of the job.**
18///
19/// [Slop](PhraseQuery::set_slop) allows leniency in term proximity
20/// for some performance tradeof.
21///
22/// Using a `PhraseQuery` on a field requires positions
23/// to be indexed for this field.
24#[derive(Clone, Debug)]
25pub struct PhraseQuery {
26    field: Field,
27    phrase_terms: Vec<(usize, Term)>,
28    slop: u32,
29}
30
31impl PhraseQuery {
32    /// Creates a new `PhraseQuery` given a list of terms.
33    ///
34    /// There must be at least two terms, and all terms
35    /// must belong to the same field.
36    /// Offset for each term will be same as index in the Vector
37    pub fn new(terms: Vec<Term>) -> PhraseQuery {
38        let terms_with_offset = terms.into_iter().enumerate().collect();
39        PhraseQuery::new_with_offset(terms_with_offset)
40    }
41
42    /// Creates a new `PhraseQuery` given a list of terms and their offsets.
43    ///
44    /// Can be used to provide custom offset for each term.
45    pub fn new_with_offset(terms: Vec<(usize, Term)>) -> PhraseQuery {
46        PhraseQuery::new_with_offset_and_slop(terms, 0)
47    }
48
49    /// Creates a new `PhraseQuery` given a list of terms, their offsets and a slop
50    pub fn new_with_offset_and_slop(mut terms: Vec<(usize, Term)>, slop: u32) -> PhraseQuery {
51        assert!(
52            terms.len() > 1,
53            "A phrase query is required to have strictly more than one term."
54        );
55        terms.sort_by_key(|&(offset, _)| offset);
56        let field = terms[0].1.field();
57        assert!(
58            terms[1..].iter().all(|term| term.1.field() == field),
59            "All terms from a phrase query must belong to the same field"
60        );
61        PhraseQuery {
62            field,
63            phrase_terms: terms,
64            slop,
65        }
66    }
67
68    /// Slop allowed for the phrase.
69    ///
70    /// The query will match if its terms are separated by `slop` terms at most.
71    /// By default the slop is 0 meaning query terms need to be adjacent.
72    pub fn set_slop(&mut self, value: u32) {
73        self.slop = value;
74    }
75
76    /// The [`Field`] this `PhraseQuery` is targeting.
77    pub fn field(&self) -> Field {
78        self.field
79    }
80
81    /// `Term`s in the phrase without the associated offsets.
82    pub fn phrase_terms(&self) -> Vec<Term> {
83        self.phrase_terms
84            .iter()
85            .map(|(_, term)| term.clone())
86            .collect::<Vec<Term>>()
87    }
88
89    /// Returns the [`PhraseWeight`] for the given phrase query given a specific `searcher`.
90    ///
91    /// This function is the same as [`Query::weight()`] except it returns
92    /// a specialized type [`PhraseWeight`] instead of a Boxed trait.
93    pub(crate) fn phrase_weight(
94        &self,
95        enable_scoring: EnableScoring<'_>,
96    ) -> crate::Result<PhraseWeight> {
97        let schema = enable_scoring.schema();
98        let field_entry = schema.get_field_entry(self.field);
99        let has_positions = field_entry
100            .field_type()
101            .get_index_record_option()
102            .map(IndexRecordOption::has_positions)
103            .unwrap_or(false);
104        if !has_positions {
105            let field_name = field_entry.name();
106            return Err(crate::TantivyError::SchemaError(format!(
107                "Applied phrase query on field {:?}, which does not have positions indexed",
108                field_name
109            )));
110        }
111        let terms = self.phrase_terms();
112        let bm25_weight_opt = match enable_scoring {
113            EnableScoring::Enabled(searcher) => Some(Bm25Weight::for_terms(searcher, &terms)?),
114            EnableScoring::Disabled { .. } => None,
115        };
116        let mut weight = PhraseWeight::new(self.phrase_terms.clone(), bm25_weight_opt);
117        if self.slop > 0 {
118            weight.slop(self.slop);
119        }
120        Ok(weight)
121    }
122}
123
124#[cfg(feature = "quickwit")]
125impl PhraseQuery {
126    pub(crate) async fn phrase_weight_async(
127        &self,
128        enable_scoring: EnableScoring<'_>,
129    ) -> crate::Result<PhraseWeight> {
130        let schema = enable_scoring.schema();
131        let field_entry = schema.get_field_entry(self.field);
132        let has_positions = field_entry
133            .field_type()
134            .get_index_record_option()
135            .map(IndexRecordOption::has_positions)
136            .unwrap_or(false);
137        if !has_positions {
138            let field_name = field_entry.name();
139            return Err(crate::TantivyError::SchemaError(format!(
140                "Applied phrase query on field {:?}, which does not have positions indexed",
141                field_name
142            )));
143        }
144        let terms = self.phrase_terms();
145        let bm25_weight_opt = match enable_scoring {
146            EnableScoring::Enabled(searcher) => {
147                Some(Bm25Weight::for_terms_async(searcher, &terms).await?)
148            }
149            EnableScoring::Disabled { .. } => None,
150        };
151        let mut weight = PhraseWeight::new(self.phrase_terms.clone(), bm25_weight_opt);
152        if self.slop > 0 {
153            weight.slop(self.slop);
154        }
155        Ok(weight)
156    }
157}
158
159#[async_trait]
160impl Query for PhraseQuery {
161    /// Create the weight associated with a query.
162    ///
163    /// See [`Weight`].
164    fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
165        let phrase_weight = self.phrase_weight(enable_scoring)?;
166        Ok(Box::new(phrase_weight))
167    }
168
169    #[cfg(feature = "quickwit")]
170    async fn weight_async(
171        &self,
172        enable_scoring: EnableScoring<'_>,
173    ) -> crate::Result<Box<dyn Weight>> {
174        let phrase_weight = self.phrase_weight_async(enable_scoring).await?;
175        Ok(Box::new(phrase_weight))
176    }
177
178    fn query_terms<'a>(&'a self, visitor: &mut dyn FnMut(&'a Term, bool)) {
179        for (_, term) in &self.phrase_terms {
180            visitor(term, true);
181        }
182    }
183}