1use crate::core::{Result, ScoreMode};
20use levenshtein_automata::{DFA, LevenshteinAutomatonBuilder};
21
22use crate::query::multi_term::ConstantScoreMultiTermSupplier;
23use crate::query::{BoundQuery, Query, ScorerSupplier};
24use crate::search::searcher::Searcher;
25use crate::segment::reader::SegmentReader;
26
27pub struct FuzzyQuery {
29 pub field: String,
30 pub value: String,
31 pub fuzziness: u32,
32}
33
34impl Query for FuzzyQuery {
35 fn bind(&self, _searcher: &Searcher, _score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
36 let max_dist = self.fuzziness.min(2) as u8;
39 let builder = LevenshteinAutomatonBuilder::new(max_dist, true);
40 let dfa = builder.build_dfa(&self.value);
41
42 Ok(Box::new(BoundFuzzyQuery {
43 field: self.field.clone(),
44 dfa,
45 }))
46 }
47}
48
49struct BoundFuzzyQuery {
52 field: String,
53 dfa: DFA,
54}
55
56impl BoundQuery for BoundFuzzyQuery {
57 fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
58 let field_id = match reader
59 .header()
60 .fields
61 .iter()
62 .find(|f| f.field_name == self.field)
63 .map(|f| f.field_id)
64 {
65 Some(id) => id,
66 None => return Ok(None),
67 };
68
69 let terms: Vec<(String, u32)> = reader.automaton_search(field_id, &self.dfa);
72 if terms.is_empty() {
73 return Ok(None);
74 }
75
76 Ok(Some(Box::new(ConstantScoreMultiTermSupplier::new(
77 reader, field_id, terms,
78 ))))
79 }
80}
81
82#[cfg(test)]
83mod tests {
84 use super::*;
85 use crate::analysis::Token;
86 use crate::core::{FieldId, SegmentId};
87 use crate::mapping::{FieldType, Mapping};
88 use crate::segment::builder::SegmentBuilder;
89 use crate::segment::reader::SegmentReader;
90
91 #[test]
92 fn fuzzy_query_basic() {
93 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
94 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
95 for tag in &["technology", "technlogy", "techology", "science", "tennis"] {
96 builder.add_document(
97 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
98 b"{}",
99 );
100 }
101 let reader = SegmentReader::open(builder.build()).unwrap();
102 let store = crate::search::segment_store::SegmentStore::new(
103 vec![reader],
104 crate::analysis::AnalyzerRegistry::new(),
105 None,
106 None,
107 );
108 let searcher = Searcher::new(&store);
109
110 let results = searcher
111 .search_query(
112 &FuzzyQuery {
113 field: "tag".into(),
114 value: "technology".into(),
115 fuzziness: 1,
116 },
117 10,
118 0,
119 )
120 .unwrap();
121 assert_eq!(results.total_hits.value, 3); }
123
124 #[test]
125 fn fuzzy_query_fuzziness_2() {
126 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
127 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
128 for tag in &["apple", "aple", "apples", "apply", "orange"] {
129 builder.add_document(
130 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
131 b"{}",
132 );
133 }
134 let reader = SegmentReader::open(builder.build()).unwrap();
135 let store = crate::search::segment_store::SegmentStore::new(
136 vec![reader],
137 crate::analysis::AnalyzerRegistry::new(),
138 None,
139 None,
140 );
141 let searcher = Searcher::new(&store);
142
143 let results = searcher
144 .search_query(
145 &FuzzyQuery {
146 field: "tag".into(),
147 value: "apple".into(),
148 fuzziness: 2,
149 },
150 10,
151 0,
152 )
153 .unwrap();
154 assert_eq!(results.total_hits.value, 4);
156 }
157
158 #[test]
159 fn fuzzy_constant_score_all_ones() {
160 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
164 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
165 for tag in &["cat", "bat", "hat", "dog"] {
166 builder.add_document(
167 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
168 b"{}",
169 );
170 }
171 let reader = SegmentReader::open(builder.build()).unwrap();
172 let store = crate::search::segment_store::SegmentStore::new(
173 vec![reader],
174 crate::analysis::AnalyzerRegistry::new(),
175 None,
176 None,
177 );
178 let searcher = Searcher::new(&store);
179
180 let results = searcher
181 .search_query(
182 &FuzzyQuery {
183 field: "tag".into(),
184 value: "cat".into(),
185 fuzziness: 1,
186 },
187 10,
188 0,
189 )
190 .unwrap();
191 assert_eq!(results.total_hits.value, 3);
193 for hit in &results.hits {
194 assert_eq!(
195 hit.score, 1.0,
196 "fuzzy hit should have constant score 1.0, got {}",
197 hit.score
198 );
199 }
200 }
201
202 #[test]
203 fn fuzzy_no_matches() {
204 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
205 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
206 builder.add_document(
207 &[(FieldId::new(0), vec![Token::new("hello", 0, 5, 0)])],
208 b"{}",
209 );
210 let reader = SegmentReader::open(builder.build()).unwrap();
211 let store = crate::search::segment_store::SegmentStore::new(
212 vec![reader],
213 crate::analysis::AnalyzerRegistry::new(),
214 None,
215 None,
216 );
217 let searcher = Searcher::new(&store);
218
219 let results = searcher
220 .search_query(
221 &FuzzyQuery {
222 field: "tag".into(),
223 value: "xyz".into(),
224 fuzziness: 1,
225 },
226 10,
227 0,
228 )
229 .unwrap();
230 assert_eq!(results.total_hits.value, 0);
231 }
232
233 #[test]
234 fn fuzzy_first_char_substitution() {
235 let schema = Mapping::builder().field("tag", FieldType::Keyword).build();
237 let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
238 for tag in &["cat", "bat", "hat", "dog"] {
239 builder.add_document(
240 &[(FieldId::new(0), vec![Token::new(*tag, 0, tag.len(), 0)])],
241 b"{}",
242 );
243 }
244 let reader = SegmentReader::open(builder.build()).unwrap();
245 let store = crate::search::segment_store::SegmentStore::new(
246 vec![reader],
247 crate::analysis::AnalyzerRegistry::new(),
248 None,
249 None,
250 );
251 let searcher = Searcher::new(&store);
252
253 let results = searcher
254 .search_query(
255 &FuzzyQuery {
256 field: "tag".into(),
257 value: "cat".into(),
258 fuzziness: 1,
259 },
260 10,
261 0,
262 )
263 .unwrap();
264 assert_eq!(results.total_hits.value, 3);
266 }
267}