use serde::{Deserialize, Serialize};
use crate::error::Result;
use crate::lexical::index::inverted::core::terms::{TermDictionaryAccess, TermsEnum};
use crate::lexical::index::inverted::reader::InvertedIndexReader;
use crate::lexical::query::Query;
use crate::lexical::query::matcher::Matcher;
use crate::lexical::query::multi_term::{MultiTermQuery, RewriteMethod};
use crate::lexical::query::scorer::Scorer;
use crate::lexical::reader::LexicalIndexReader;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FuzzyQuery {
field: String,
term: String,
max_edits: u32,
prefix_length: u32,
transpositions: bool,
max_expansions: usize,
boost: f32,
rewrite_method: RewriteMethod,
}
impl FuzzyQuery {
pub fn new<F: Into<String>, T: Into<String>>(field: F, term: T) -> Self {
FuzzyQuery {
field: field.into(),
term: term.into(),
max_edits: 2,
prefix_length: 0,
transpositions: true,
max_expansions: 50, boost: 1.0,
rewrite_method: RewriteMethod::default(),
}
}
pub fn max_edits(mut self, max_edits: u32) -> Self {
self.max_edits = max_edits;
self
}
pub fn prefix_length(mut self, prefix_length: u32) -> Self {
self.prefix_length = prefix_length;
self
}
pub fn transpositions(mut self, transpositions: bool) -> Self {
self.transpositions = transpositions;
self
}
pub fn max_expansions(mut self, max_expansions: usize) -> Self {
self.max_expansions = max_expansions;
self
}
pub fn with_boost(mut self, boost: f32) -> Self {
self.boost = boost;
self
}
pub fn field(&self) -> &str {
&self.field
}
pub fn term(&self) -> &str {
&self.term
}
pub fn get_max_edits(&self) -> u32 {
self.max_edits
}
pub fn get_prefix_length(&self) -> u32 {
self.prefix_length
}
pub fn get_transpositions(&self) -> bool {
self.transpositions
}
pub fn get_max_expansions(&self) -> usize {
self.max_expansions
}
pub fn with_rewrite_method(mut self, rewrite_method: RewriteMethod) -> Self {
self.rewrite_method = rewrite_method;
self
}
pub fn rewrite_method(&self) -> RewriteMethod {
self.rewrite_method
}
fn get_terms_enum(
&self,
reader: &dyn LexicalIndexReader,
) -> Result<Option<Box<dyn TermsEnum>>> {
if let Some(inverted_reader) = reader.as_any().downcast_ref::<InvertedIndexReader>()
&& let Some(terms) = inverted_reader.terms(&self.field)?
{
let automaton =
crate::lexical::index::inverted::core::automaton::LevenshteinAutomaton::new(
&self.term,
self.max_edits,
self.prefix_length as usize,
self.transpositions,
);
let terms_enum =
crate::lexical::index::inverted::core::automaton::AutomatonTermsEnum::new(
terms.iterator()?,
automaton,
);
return Ok(Some(Box::new(terms_enum)));
}
Ok(None)
}
}
impl MultiTermQuery for FuzzyQuery {
fn field(&self) -> &str {
&self.field
}
fn rewrite_method(&self) -> RewriteMethod {
self.rewrite_method
}
fn enumerate_terms(&self, reader: &dyn LexicalIndexReader) -> Result<Vec<(String, u64, f32)>> {
let mut results = Vec::new();
if let Some(mut terms_enum) = self.get_terms_enum(reader)? {
while let Some(term_stats) = terms_enum.next()? {
results.push((term_stats.term.clone(), term_stats.doc_freq, 1.0));
}
}
Ok(results)
}
}
impl Query for FuzzyQuery {
fn matcher(&self, reader: &dyn LexicalIndexReader) -> Result<Box<dyn Matcher>> {
let rewritten = self.rewrite(reader)?;
rewritten.matcher(reader)
}
fn scorer(&self, reader: &dyn LexicalIndexReader) -> Result<Box<dyn Scorer>> {
let rewritten = self.rewrite(reader)?;
rewritten.scorer(reader)
}
fn boost(&self) -> f32 {
self.boost
}
fn set_boost(&mut self, boost: f32) {
self.boost = boost;
}
fn clone_box(&self) -> Box<dyn Query> {
Box::new(self.clone())
}
fn description(&self) -> String {
format!(
"FuzzyQuery(field: {}, term: {}, max_edits: {}, prefix: {})",
self.field, self.term, self.max_edits, self.prefix_length
)
}
fn is_empty(&self, _reader: &dyn LexicalIndexReader) -> Result<bool> {
Ok(self.term.is_empty())
}
fn cost(&self, reader: &dyn LexicalIndexReader) -> Result<u64> {
Ok(reader.doc_count())
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn field(&self) -> Option<&str> {
Some(&self.field)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fuzzy_query_creation() {
let query = FuzzyQuery::new("content", "hello")
.max_edits(1)
.prefix_length(2)
.transpositions(false)
.with_boost(1.5);
assert_eq!(query.field(), "content");
assert_eq!(query.term(), "hello");
assert_eq!(query.get_max_edits(), 1);
assert_eq!(query.get_prefix_length(), 2);
assert!(!query.get_transpositions());
assert_eq!(query.boost(), 1.5);
}
#[test]
fn test_fuzzy_query_description() {
let query = FuzzyQuery::new("title", "test")
.max_edits(2)
.prefix_length(1);
let description = query.description();
assert!(description.contains("FuzzyQuery"));
assert!(description.contains("title"));
assert!(description.contains("test"));
assert!(description.contains("max_edits: 2"));
assert!(description.contains("prefix: 1"));
}
}