tantivy/query/phrase_prefix_query/
phrase_prefix_query.rs

1use std::ops::Bound;
2
3use super::{prefix_end, PhrasePrefixWeight};
4use crate::query::bm25::Bm25Weight;
5use crate::query::{EnableScoring, InvertedIndexRangeWeight, Query, Weight};
6use crate::schema::{Field, IndexRecordOption, Term};
7
8const DEFAULT_MAX_EXPANSIONS: u32 = 50;
9
10/// `PhrasePrefixQuery` matches a specific sequence of words followed by term of which only a
11/// prefix is known.
12///
13/// For instance the phrase prefix query for `"part t"` will match
14/// the sentence
15///
16/// **Alan just got a part time job.**
17///
18/// On the other hand it will not match the sentence.
19///
20/// **This is my favorite part of the job.**
21///
22/// Using a `PhrasePrefixQuery` on a field requires positions
23/// to be indexed for this field.
24#[derive(Clone, Debug)]
25pub struct PhrasePrefixQuery {
26    field: Field,
27    phrase_terms: Vec<(usize, Term)>,
28    prefix: (usize, Term),
29    max_expansions: u32,
30}
31
32impl PhrasePrefixQuery {
33    /// Creates a new `PhrasePrefixQuery` given a list of terms.
34    ///
35    /// There must be at least two terms, and all terms
36    /// must belong to the same field.
37    /// Offset for each term will be same as index in the Vector
38    /// The last Term is a prefix and not a full value
39    pub fn new(terms: Vec<Term>) -> PhrasePrefixQuery {
40        let terms_with_offset = terms.into_iter().enumerate().collect();
41        PhrasePrefixQuery::new_with_offset(terms_with_offset)
42    }
43
44    /// Creates a new `PhrasePrefixQuery` given a list of terms and their offsets.
45    ///
46    /// Can be used to provide custom offset for each term.
47    pub fn new_with_offset(mut terms: Vec<(usize, Term)>) -> PhrasePrefixQuery {
48        assert!(
49            !terms.is_empty(),
50            "A phrase prefix query is required to have at least one term."
51        );
52        terms.sort_by_key(|&(offset, _)| offset);
53        let field = terms[0].1.field();
54        assert!(
55            terms[1..].iter().all(|term| term.1.field() == field),
56            "All terms from a phrase query must belong to the same field"
57        );
58        PhrasePrefixQuery {
59            field,
60            prefix: terms.pop().unwrap(),
61            phrase_terms: terms,
62            max_expansions: DEFAULT_MAX_EXPANSIONS,
63        }
64    }
65
66    /// Maximum number of terms to which the last provided term will expand.
67    pub fn set_max_expansions(&mut self, value: u32) {
68        self.max_expansions = value;
69    }
70
71    /// The [`Field`] this `PhrasePrefixQuery` is targeting.
72    pub fn field(&self) -> Field {
73        self.field
74    }
75
76    /// `Term`s in the phrase without the associated offsets.
77    pub fn phrase_terms(&self) -> Vec<Term> {
78        // TODO should we include the last term too?
79        self.phrase_terms
80            .iter()
81            .map(|(_, term)| term.clone())
82            .collect::<Vec<Term>>()
83    }
84
85    /// Returns the [`PhrasePrefixWeight`] for the given phrase query given a specific `searcher`.
86    ///
87    /// This function is the same as [`Query::weight()`] except it returns
88    /// a specialized type [`PhraseQueryWeight`] instead of a Boxed trait.
89    /// If the query was only one term long, this returns `None` whereas [`Query::weight`]
90    /// returns a boxed [`RangeWeight`]
91    pub(crate) fn phrase_prefix_query_weight(
92        &self,
93        enable_scoring: EnableScoring<'_>,
94    ) -> crate::Result<Option<PhrasePrefixWeight>> {
95        if self.phrase_terms.is_empty() {
96            return Ok(None);
97        }
98        let schema = enable_scoring.schema();
99        let field_entry = schema.get_field_entry(self.field);
100        let has_positions = field_entry
101            .field_type()
102            .get_index_record_option()
103            .map(IndexRecordOption::has_positions)
104            .unwrap_or(false);
105        if !has_positions {
106            let field_name = field_entry.name();
107            return Err(crate::TantivyError::SchemaError(format!(
108                "Applied phrase query on field {field_name:?}, which does not have positions \
109                 indexed"
110            )));
111        }
112        let terms = self.phrase_terms();
113        let bm25_weight_opt = match enable_scoring {
114            EnableScoring::Enabled { searcher, .. } => {
115                Some(Bm25Weight::for_terms(searcher, &terms)?)
116            }
117            EnableScoring::Disabled { .. } => None,
118        };
119        let weight = PhrasePrefixWeight::new(
120            self.phrase_terms.clone(),
121            self.prefix.clone(),
122            bm25_weight_opt,
123            self.max_expansions,
124        );
125        Ok(Some(weight))
126    }
127}
128
129impl Query for PhrasePrefixQuery {
130    /// Create the weight associated with a query.
131    ///
132    /// See [`Weight`].
133    fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
134        if let Some(phrase_weight) = self.phrase_prefix_query_weight(enable_scoring)? {
135            Ok(Box::new(phrase_weight))
136        } else {
137            // There are no prefix. Let's just match the suffix.
138            let end_term =
139                if let Some(end_value) = prefix_end(self.prefix.1.serialized_value_bytes()) {
140                    let mut end_term = Term::with_capacity(end_value.len());
141                    end_term.set_field_and_type(self.field, self.prefix.1.typ());
142                    end_term.append_bytes(&end_value);
143                    Bound::Excluded(end_term)
144                } else {
145                    Bound::Unbounded
146                };
147
148            let lower_bound = Bound::Included(self.prefix.1.clone());
149            let upper_bound = end_term;
150
151            Ok(Box::new(InvertedIndexRangeWeight::new(
152                self.field,
153                &lower_bound,
154                &upper_bound,
155                Some(self.max_expansions as u64),
156            )))
157        }
158    }
159
160    fn query_terms<'a>(&'a self, visitor: &mut dyn FnMut(&'a Term, bool)) {
161        for (_, term) in &self.phrase_terms {
162            visitor(term, true);
163        }
164    }
165}