use strsim::normalized_levenshtein;
#[must_use]
pub fn fuzzy_match(text: &str, query: &str, threshold: f64) -> Option<f64> {
let text_lower = text.to_lowercase();
let query_lower = query.to_lowercase();
if text_lower.contains(&query_lower) {
return Some(1.0);
}
let score = normalized_levenshtein(&text_lower, &query_lower);
if score >= threshold {
Some(score)
} else {
None
}
}
#[must_use]
pub fn fuzzy_search_in_text(text: &str, query: &str, threshold: f64) -> Vec<(usize, f64)> {
let mut matches = Vec::new();
let text_lower = text.to_lowercase();
let query_lower = query.to_lowercase();
let query_words: Vec<&str> = query_lower.split_whitespace().collect();
let text_words: Vec<&str> = text_lower.split_whitespace().collect();
if let Some(pos) = text_lower.find(&query_lower) {
matches.push((pos, 1.0));
return matches;
}
for (word_idx, word) in text_words.iter().enumerate() {
if let Some(score) = fuzzy_match(word, &query_lower, threshold) {
let position = text_words[..word_idx]
.iter()
.map(|w| w.len() + 1)
.sum::<usize>();
matches.push((position, score));
}
}
if query_words.len() > 1 {
for window_size in 2..=query_words.len().min(5) {
for window in text_words.windows(window_size) {
let window_text = window.join(" ");
if let Some(score) = fuzzy_match(&window_text, &query_lower, threshold) {
let word_idx = text_words.iter().position(|&w| w == window[0]).unwrap_or(0);
let position = text_words[..word_idx]
.iter()
.map(|w| w.len() + 1)
.sum::<usize>();
matches.push((position, score));
}
}
}
}
matches.sort_by(|a, b| {
b.1.partial_cmp(&a.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then(a.0.cmp(&b.0))
});
let mut deduped = Vec::new();
for (pos, score) in matches {
if deduped.is_empty()
|| deduped
.iter()
.all(|(p, _)| (*p as i64 - pos as i64).abs() > 5)
{
deduped.push((pos, score));
}
}
deduped
}
#[must_use]
pub fn best_fuzzy_match<'a, I>(texts: I, query: &str, threshold: f64) -> Option<f64>
where
I: IntoIterator<Item = &'a str>,
{
texts
.into_iter()
.filter_map(|text| fuzzy_match(text, query, threshold))
.max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exact_match() {
assert_eq!(fuzzy_match("database", "database", 0.8), Some(1.0));
assert_eq!(fuzzy_match("hello world", "hello", 0.8), Some(1.0));
}
#[test]
fn test_fuzzy_match_typo() {
let score = fuzzy_match("database", "databse", 0.7).unwrap();
assert!(score > 0.7);
let score = fuzzy_match("connection", "conection", 0.7).unwrap();
assert!(score > 0.7);
}
#[test]
fn test_fuzzy_match_below_threshold() {
assert_eq!(fuzzy_match("database", "xyz", 0.8), None);
assert_eq!(fuzzy_match("hello", "goodbye", 0.8), None);
}
#[test]
fn test_case_insensitive() {
assert_eq!(fuzzy_match("Database", "DATABASE", 0.8), Some(1.0));
assert_eq!(fuzzy_match("Hello World", "hello world", 0.8), Some(1.0));
}
#[test]
fn test_fuzzy_search_in_text() {
let text = "This is a database connection example";
let matches = fuzzy_search_in_text(text, "databse", 0.7);
assert!(!matches.is_empty());
assert!(matches[0].1 > 0.7);
}
#[test]
fn test_fuzzy_search_exact_substring() {
let text = "This is a database connection example";
let matches = fuzzy_search_in_text(text, "database", 0.8);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].1, 1.0);
}
#[test]
fn test_fuzzy_search_multi_word() {
let text = "This is a database connection example";
let matches = fuzzy_search_in_text(text, "databse conection", 0.7);
assert!(!matches.is_empty());
}
#[test]
fn test_fuzzy_search_no_match() {
let text = "This is a database connection example";
let matches = fuzzy_search_in_text(text, "xyz", 0.8);
assert!(matches.is_empty());
}
#[test]
fn test_best_fuzzy_match() {
let texts = ["hello", "database", "connection"];
let score = best_fuzzy_match(texts.iter().copied(), "databse", 0.7).unwrap();
assert!(score > 0.7);
}
#[test]
fn test_best_fuzzy_match_no_match() {
let texts = ["hello", "world"];
let score = best_fuzzy_match(texts.iter().copied(), "xyz", 0.8);
assert_eq!(score, None);
}
#[test]
fn test_fuzzy_match_empty_strings() {
assert_eq!(fuzzy_match("", "", 0.8), Some(1.0));
assert_eq!(fuzzy_match("text", "", 0.8), Some(1.0));
assert_eq!(fuzzy_match("", "text", 0.8), None);
}
#[test]
fn test_fuzzy_search_special_characters() {
let text = "error: database-connection failed!";
let matches = fuzzy_search_in_text(text, "database", 0.7);
assert!(!matches.is_empty());
}
}