use crate::core::{Result, ScoreMode};
use levenshtein_automata::{DFA, LevenshteinAutomatonBuilder};
use crate::query::multi_term::ConstantScoreMultiTermSupplier;
use crate::query::{BoundQuery, Query, ScorerSupplier};
use crate::search::searcher::Searcher;
use crate::segment::reader::SegmentReader;
pub struct FuzzyQuery {
pub field: String,
pub value: String,
pub fuzziness: u32,
}
impl Query for FuzzyQuery {
fn bind(&self, _searcher: &Searcher, _score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
let max_dist = self.fuzziness.min(2) as u8;
let builder = LevenshteinAutomatonBuilder::new(max_dist, true);
let dfa = builder.build_dfa(&self.value);
Ok(Box::new(BoundFuzzyQuery {
field: self.field.clone(),
dfa,
}))
}
}
struct BoundFuzzyQuery {
field: String,
dfa: DFA,
}
impl BoundQuery for BoundFuzzyQuery {
fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
let field_id = match reader
.header()
.fields
.iter()
.find(|f| f.field_name == self.field)
.map(|f| f.field_id)
{
Some(id) => id,
None => return Ok(None),
};
let terms: Vec<(String, u32)> = reader.automaton_search(field_id, &self.dfa);
if terms.is_empty() {
return Ok(None);
}
Ok(Some(Box::new(ConstantScoreMultiTermSupplier::new(
reader, field_id, terms,
))))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::analysis::Token;
use crate::core::{FieldId, SegmentId};
use crate::mapping::{FieldType, Mapping};
use crate::segment::builder::SegmentBuilder;
use crate::segment::reader::SegmentReader;
#[test]
fn fuzzy_query_basic() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["technology", "technlogy", "techology", "science", "tennis"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&FuzzyQuery {
field: "tag".into(),
value: "technology".into(),
fuzziness: 1,
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 3); }
#[test]
fn fuzzy_query_fuzziness_2() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["apple", "aple", "apples", "apply", "orange"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&FuzzyQuery {
field: "tag".into(),
value: "apple".into(),
fuzziness: 2,
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 4);
}
#[test]
fn fuzzy_constant_score_all_ones() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["cat", "bat", "hat", "dog"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&FuzzyQuery {
field: "tag".into(),
value: "cat".into(),
fuzziness: 1,
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 3);
for hit in &results.hits {
assert_eq!(
hit.score, 1.0,
"fuzzy hit should have constant score 1.0, got {}",
hit.score
);
}
}
#[test]
fn fuzzy_no_matches() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
builder.add_document(
&[(FieldId::new(0), vec![Token::new("hello", 0, 5, 0)])],
b"{}",
);
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&FuzzyQuery {
field: "tag".into(),
value: "xyz".into(),
fuzziness: 1,
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 0);
}
#[test]
fn fuzzy_first_char_substitution() {
let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
for tag in &["cat", "bat", "hat", "dog"] {
builder.add_document(
&[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
b"{}",
);
}
let reader = SegmentReader::open(builder.build()).unwrap();
let store = crate::search::segment_store::SegmentStore::new(
vec![reader],
crate::analysis::AnalyzerRegistry::new(),
None,
None,
);
let searcher = Searcher::new(&store);
let results = searcher
.search_query(
&FuzzyQuery {
field: "tag".into(),
value: "cat".into(),
fuzziness: 1,
},
10,
0,
)
.unwrap();
assert_eq!(results.total_hits.value, 3);
}
}