1use crate::search::simd;
8use rapidfuzz::distance::{jaro_winkler, levenshtein};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
12pub enum MatchAlgorithm {
13 Levenshtein,
15 #[default]
17 JaroWinkler,
18}
19
20#[derive(Debug, Clone)]
22pub struct MatchConfig {
23 pub algorithm: MatchAlgorithm,
25
26 pub min_score: f64,
30
31 pub case_sensitive: bool,
33}
34
35impl Default for MatchConfig {
36 fn default() -> Self {
37 Self {
38 algorithm: MatchAlgorithm::JaroWinkler,
39 min_score: 0.6,
40 case_sensitive: false,
41 }
42 }
43}
44
45#[derive(Debug, Clone, PartialEq)]
47pub struct MatchResult {
48 pub score: f64,
50 pub entry_id: usize,
52}
53
54impl MatchResult {
55 #[must_use]
57 pub fn new(score: f64, entry_id: usize) -> Self {
58 Self { score, entry_id }
59 }
60}
61
62pub struct FuzzyMatcher {
64 config: MatchConfig,
65}
66
67impl FuzzyMatcher {
68 #[must_use]
70 pub fn new() -> Self {
71 Self {
72 config: MatchConfig::default(),
73 }
74 }
75
76 #[must_use]
78 pub fn with_config(config: MatchConfig) -> Self {
79 Self { config }
80 }
81
82 #[must_use]
90 pub fn score(&self, query: &str, target: &str) -> f64 {
91 let (query_normalized, target_normalized) = if self.config.case_sensitive {
92 (query.to_string(), target.to_string())
93 } else {
94 let query_norm = query.to_lowercase();
96
97 let target_norm = if target.is_ascii() {
99 simd::to_lowercase_ascii(target)
100 } else {
101 target.to_lowercase()
102 };
103
104 (query_norm, target_norm)
105 };
106
107 match self.config.algorithm {
108 MatchAlgorithm::Levenshtein => {
109 self.levenshtein_similarity(&query_normalized, &target_normalized)
110 }
111 MatchAlgorithm::JaroWinkler => {
112 jaro_winkler::similarity(query_normalized.chars(), target_normalized.chars())
113 }
114 }
115 }
116
117 #[allow(clippy::cast_precision_loss, clippy::unused_self)]
119 fn levenshtein_similarity(&self, query: &str, target: &str) -> f64 {
120 let distance = levenshtein::distance(query.chars(), target.chars());
121
122 let max_len = query.chars().count().max(target.chars().count()) as f64;
124
125 if max_len == 0.0 {
126 return 1.0; }
128
129 1.0 - (distance as f64 / max_len)
130 }
131
132 #[must_use]
136 pub fn match_one(&self, query: &str, target: &str) -> Option<f64> {
137 let score = self.score(query, target);
138
139 if score >= self.config.min_score {
140 Some(score)
141 } else {
142 None
143 }
144 }
145
146 pub fn match_many<'a, I>(&self, query: &str, targets: I) -> Vec<MatchResult>
172 where
173 I: Iterator<Item = (usize, &'a str)>,
174 {
175 let mut results: Vec<MatchResult> = targets
176 .filter_map(|(entry_id, target)| {
177 self.match_one(query, target)
178 .map(|score| MatchResult::new(score, entry_id))
179 })
180 .collect();
181
182 results.sort_by(|a, b| {
184 b.score
185 .partial_cmp(&a.score)
186 .unwrap_or(std::cmp::Ordering::Equal)
187 });
188
189 results
190 }
191
192 #[must_use]
194 pub fn config(&self) -> &MatchConfig {
195 &self.config
196 }
197}
198
199impl Default for FuzzyMatcher {
200 fn default() -> Self {
201 Self::new()
202 }
203}
204
205#[cfg(test)]
206mod tests {
207 use super::*;
208 use approx::assert_abs_diff_eq;
209
210 #[test]
211 fn test_levenshtein_exact_match() {
212 let config = MatchConfig {
213 algorithm: MatchAlgorithm::Levenshtein,
214 min_score: 0.6,
215 case_sensitive: false,
216 };
217 let matcher = FuzzyMatcher::with_config(config);
218
219 let score = matcher.score("hello", "hello");
220 assert_abs_diff_eq!(score, 1.0, epsilon = 1e-10);
221 }
222
223 #[test]
224 fn test_levenshtein_similar() {
225 let config = MatchConfig {
226 algorithm: MatchAlgorithm::Levenshtein,
227 min_score: 0.6,
228 case_sensitive: false,
229 };
230 let matcher = FuzzyMatcher::with_config(config);
231
232 let score = matcher.score("hello", "helo");
233 assert!(score > 0.7); }
235
236 #[test]
237 fn test_jaro_winkler_exact_match() {
238 let matcher = FuzzyMatcher::new(); let score = matcher.score("hello", "hello");
240 assert_abs_diff_eq!(score, 1.0, epsilon = 1e-10);
241 }
242
243 #[test]
244 fn test_jaro_winkler_similar() {
245 let matcher = FuzzyMatcher::new();
246 let score = matcher.score("hello", "helo");
247 assert!(score > 0.8); }
249
250 #[test]
251 fn test_case_insensitive() {
252 let matcher = FuzzyMatcher::new();
253 let score = matcher.score("HELLO", "hello");
254 assert_abs_diff_eq!(score, 1.0, epsilon = 1e-10);
255 }
256
257 #[test]
258 fn test_case_sensitive() {
259 let config = MatchConfig {
260 algorithm: MatchAlgorithm::JaroWinkler,
261 min_score: 0.6,
262 case_sensitive: true,
263 };
264 let matcher = FuzzyMatcher::with_config(config);
265
266 let score = matcher.score("HELLO", "hello");
267 assert!(score < 1.0);
268 }
269
270 #[test]
271 fn test_match_one_threshold() {
272 let config = MatchConfig {
273 algorithm: MatchAlgorithm::JaroWinkler,
274 min_score: 0.8,
275 case_sensitive: false,
276 };
277 let matcher = FuzzyMatcher::with_config(config);
278
279 assert!(matcher.match_one("hello", "hello").is_some());
280 assert!(matcher.match_one("hello", "helo").is_some());
281 assert!(matcher.match_one("hello", "goodbye").is_none());
282 }
283
284 #[test]
285 fn test_match_many_sorted() {
286 let matcher = FuzzyMatcher::new();
287 let targets = vec![
288 (0, "goodbye"),
289 (1, "helo_world"),
290 (2, "hello_world"),
291 (3, "hello"),
292 ];
293
294 let match_results = matcher.match_many("hello", targets.into_iter());
295
296 assert!(!match_results.is_empty());
298 assert_eq!(match_results[0].entry_id, 3); assert!(match_results[0].score >= match_results[1].score);
300 }
301
302 #[test]
303 fn test_match_many_filters_low_scores() {
304 let config = MatchConfig {
305 algorithm: MatchAlgorithm::JaroWinkler,
306 min_score: 0.9,
307 case_sensitive: false,
308 };
309 let matcher = FuzzyMatcher::with_config(config);
310
311 let targets = vec![(0, "hello"), (1, "helo"), (2, "goodbye")];
312
313 let match_results = matcher.match_many("hello", targets.into_iter());
314
315 assert!(match_results.len() <= 2); assert!(match_results.iter().all(|m| m.score >= 0.9));
318 }
319
320 #[test]
321 fn test_empty_strings() {
322 let matcher = FuzzyMatcher::new();
323 let score = matcher.score("", "");
324 assert_abs_diff_eq!(score, 1.0, epsilon = 1e-10);
325 }
326
327 #[test]
328 fn test_levenshtein_similarity_calculation() {
329 let config = MatchConfig {
330 algorithm: MatchAlgorithm::Levenshtein,
331 min_score: 0.6,
332 case_sensitive: false,
333 };
334 let matcher = FuzzyMatcher::with_config(config);
335
336 let score = matcher.score("kitten", "sitting");
339 assert!((score - 0.571).abs() < 0.01);
340 }
341
342 #[test]
343 fn test_levenshtein_unicode_normalization() {
344 let config = MatchConfig {
345 algorithm: MatchAlgorithm::Levenshtein,
346 min_score: 0.0,
347 case_sensitive: false,
348 };
349 let matcher = FuzzyMatcher::with_config(config);
350
351 let score = matcher.score("café", "cafe");
356 assert!((score - 0.75).abs() < 0.01);
357
358 let score = matcher.score("hello世界", "hello");
363 assert!((score - 0.714).abs() < 0.02);
364 }
365}