do_memory_core/search/
fuzzy.rs1use strsim::normalized_levenshtein;
8
9#[must_use]
40pub fn fuzzy_match(text: &str, query: &str, threshold: f64) -> Option<f64> {
41 let text_lower = text.to_lowercase();
42 let query_lower = query.to_lowercase();
43
44 if text_lower.contains(&query_lower) {
46 return Some(1.0);
47 }
48
49 let score = normalized_levenshtein(&text_lower, &query_lower);
51
52 if score >= threshold {
53 Some(score)
54 } else {
55 None
56 }
57}
58
59#[must_use]
86pub fn fuzzy_search_in_text(text: &str, query: &str, threshold: f64) -> Vec<(usize, f64)> {
87 let mut matches = Vec::new();
88 let text_lower = text.to_lowercase();
89 let query_lower = query.to_lowercase();
90 let query_words: Vec<&str> = query_lower.split_whitespace().collect();
91 let text_words: Vec<&str> = text_lower.split_whitespace().collect();
92
93 if let Some(pos) = text_lower.find(&query_lower) {
95 matches.push((pos, 1.0));
96 return matches;
97 }
98
99 for (word_idx, word) in text_words.iter().enumerate() {
101 if let Some(score) = fuzzy_match(word, &query_lower, threshold) {
102 let position = text_words[..word_idx]
104 .iter()
105 .map(|w| w.len() + 1)
106 .sum::<usize>();
107 matches.push((position, score));
108 }
109 }
110
111 if query_words.len() > 1 {
113 for window_size in 2..=query_words.len().min(5) {
114 for window in text_words.windows(window_size) {
115 let window_text = window.join(" ");
116 if let Some(score) = fuzzy_match(&window_text, &query_lower, threshold) {
117 let word_idx = text_words.iter().position(|&w| w == window[0]).unwrap_or(0);
119 let position = text_words[..word_idx]
120 .iter()
121 .map(|w| w.len() + 1)
122 .sum::<usize>();
123 matches.push((position, score));
124 }
125 }
126 }
127 }
128
129 matches.sort_by(|a, b| {
132 b.1.partial_cmp(&a.1)
133 .unwrap_or(std::cmp::Ordering::Equal)
134 .then(a.0.cmp(&b.0))
135 });
136
137 let mut deduped = Vec::new();
139 for (pos, score) in matches {
140 if deduped.is_empty()
141 || deduped
142 .iter()
143 .all(|(p, _)| (*p as i64 - pos as i64).abs() > 5)
144 {
145 deduped.push((pos, score));
146 }
147 }
148
149 deduped
150}
151
152#[must_use]
167pub fn best_fuzzy_match<'a, I>(texts: I, query: &str, threshold: f64) -> Option<f64>
168where
169 I: IntoIterator<Item = &'a str>,
170{
171 texts
172 .into_iter()
173 .filter_map(|text| fuzzy_match(text, query, threshold))
174 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
175}
176
177#[cfg(test)]
178mod tests {
179 use super::*;
180
181 #[test]
182 fn test_exact_match() {
183 assert_eq!(fuzzy_match("database", "database", 0.8), Some(1.0));
184 assert_eq!(fuzzy_match("hello world", "hello", 0.8), Some(1.0));
185 }
186
187 #[test]
188 fn test_fuzzy_match_typo() {
189 let score = fuzzy_match("database", "databse", 0.7).unwrap();
191 assert!(score > 0.7);
192
193 let score = fuzzy_match("connection", "conection", 0.7).unwrap();
194 assert!(score > 0.7);
195 }
196
197 #[test]
198 fn test_fuzzy_match_below_threshold() {
199 assert_eq!(fuzzy_match("database", "xyz", 0.8), None);
201 assert_eq!(fuzzy_match("hello", "goodbye", 0.8), None);
202 }
203
204 #[test]
205 fn test_case_insensitive() {
206 assert_eq!(fuzzy_match("Database", "DATABASE", 0.8), Some(1.0));
207 assert_eq!(fuzzy_match("Hello World", "hello world", 0.8), Some(1.0));
208 }
209
210 #[test]
211 fn test_fuzzy_search_in_text() {
212 let text = "This is a database connection example";
213 let matches = fuzzy_search_in_text(text, "databse", 0.7);
214
215 assert!(!matches.is_empty());
216 assert!(matches[0].1 > 0.7);
217 }
218
219 #[test]
220 fn test_fuzzy_search_exact_substring() {
221 let text = "This is a database connection example";
222 let matches = fuzzy_search_in_text(text, "database", 0.8);
223
224 assert_eq!(matches.len(), 1);
225 assert_eq!(matches[0].1, 1.0);
226 }
227
228 #[test]
229 fn test_fuzzy_search_multi_word() {
230 let text = "This is a database connection example";
231 let matches = fuzzy_search_in_text(text, "databse conection", 0.7);
232
233 assert!(!matches.is_empty());
234 }
235
236 #[test]
237 fn test_fuzzy_search_no_match() {
238 let text = "This is a database connection example";
239 let matches = fuzzy_search_in_text(text, "xyz", 0.8);
240
241 assert!(matches.is_empty());
242 }
243
244 #[test]
245 fn test_best_fuzzy_match() {
246 let texts = ["hello", "database", "connection"];
247 let score = best_fuzzy_match(texts.iter().copied(), "databse", 0.7).unwrap();
248
249 assert!(score > 0.7);
250 }
251
252 #[test]
253 fn test_best_fuzzy_match_no_match() {
254 let texts = ["hello", "world"];
255 let score = best_fuzzy_match(texts.iter().copied(), "xyz", 0.8);
256
257 assert_eq!(score, None);
258 }
259
260 #[test]
261 fn test_fuzzy_match_empty_strings() {
262 assert_eq!(fuzzy_match("", "", 0.8), Some(1.0));
264 assert_eq!(fuzzy_match("text", "", 0.8), Some(1.0));
266 assert_eq!(fuzzy_match("", "text", 0.8), None);
268 }
269
270 #[test]
271 fn test_fuzzy_search_special_characters() {
272 let text = "error: database-connection failed!";
273 let matches = fuzzy_search_in_text(text, "database", 0.7);
276
277 assert!(!matches.is_empty());
279 }
280}