use super::regex_phrase_weight::RegexPhraseWeight;
use crate::query::bm25::Bm25Weight;
use crate::query::{EnableScoring, Query, Weight};
use crate::schema::{Field, IndexRecordOption, Term, Type};
#[derive(Clone, Debug)]
pub struct RegexPhraseQuery {
field: Field,
phrase_terms: Vec<(usize, String)>,
slop: u32,
max_expansions: u32,
}
pub fn wildcard_query_to_regex_str(term: &str) -> String {
regex::escape(term).replace(r"\*", ".*")
}
impl RegexPhraseQuery {
pub fn new(field: Field, terms: Vec<String>) -> RegexPhraseQuery {
let terms_with_offset = terms.into_iter().enumerate().collect();
RegexPhraseQuery::new_with_offset(field, terms_with_offset)
}
pub fn new_with_offset(field: Field, terms: Vec<(usize, String)>) -> RegexPhraseQuery {
RegexPhraseQuery::new_with_offset_and_slop(field, terms, 0)
}
pub fn new_with_offset_and_slop(
field: Field,
mut terms: Vec<(usize, String)>,
slop: u32,
) -> RegexPhraseQuery {
assert!(
terms.len() > 1,
"A phrase query is required to have strictly more than one term."
);
terms.sort_by_key(|&(offset, _)| offset);
RegexPhraseQuery {
field,
phrase_terms: terms,
slop,
max_expansions: 1 << 14,
}
}
pub fn set_slop(&mut self, value: u32) {
self.slop = value;
}
pub fn set_max_expansions(&mut self, value: u32) {
self.max_expansions = value;
}
pub fn field(&self) -> Field {
self.field
}
pub fn phrase_terms(&self) -> Vec<Term> {
self.phrase_terms
.iter()
.map(|(_, term)| Term::from_field_text(self.field, term))
.collect::<Vec<Term>>()
}
pub(crate) fn regex_phrase_weight(
&self,
enable_scoring: EnableScoring<'_>,
) -> crate::Result<RegexPhraseWeight> {
let schema = enable_scoring.schema();
let field_type = schema.get_field_entry(self.field).field_type().value_type();
if field_type != Type::Str {
return Err(crate::LucivyError::SchemaError(format!(
"RegexPhraseQuery can only be used with a field of type text currently, but got \
{field_type:?}"
)));
}
let field_entry = schema.get_field_entry(self.field);
let has_positions = field_entry
.field_type()
.get_index_record_option()
.map(IndexRecordOption::has_positions)
.unwrap_or(false);
if !has_positions {
let field_name = field_entry.name();
return Err(crate::LucivyError::SchemaError(format!(
"Applied phrase query on field {field_name:?}, which does not have positions \
indexed"
)));
}
let terms = self.phrase_terms();
let bm25_weight_opt = match enable_scoring {
EnableScoring::Enabled {
statistics_provider,
..
} => Some(Bm25Weight::for_terms(statistics_provider, &terms)?),
EnableScoring::Disabled { .. } => None,
};
let weight = RegexPhraseWeight::new(
self.field,
self.phrase_terms.clone(),
bm25_weight_opt,
self.max_expansions,
self.slop,
);
Ok(weight)
}
}
impl Query for RegexPhraseQuery {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let phrase_weight = self.regex_phrase_weight(enable_scoring)?;
Ok(Box::new(phrase_weight))
}
}