fn levenshtein_distance(a: &str, b: &str) -> usize {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let m = a_chars.len();
let n = b_chars.len();
if m == 0 {
return n;
}
if n == 0 {
return m;
}
let mut prev = vec![0; n + 1];
let mut curr = vec![0; n + 1];
for j in 0..=n {
prev[j] = j;
}
for i in 1..=m {
curr[0] = i;
for j in 1..=n {
let cost = usize::from(a_chars[i - 1] != b_chars[j - 1]);
curr[j] = (prev[j] + 1) .min(curr[j - 1] + 1) .min(prev[j - 1] + cost); }
std::mem::swap(&mut prev, &mut curr);
}
prev[n]
}
pub fn find_similar<'a, S: AsRef<str>>(query: &str, candidates: &'a [S]) -> Option<&'a str> {
if candidates.is_empty() {
return None;
}
let query_lower = query.to_lowercase();
let mut best_match: Option<&str> = None;
let mut best_distance = usize::MAX;
for candidate in candidates {
let candidate_str = candidate.as_ref();
let candidate_lower = candidate_str.to_lowercase();
if query_lower == candidate_lower {
return Some(candidate_str);
}
let distance = levenshtein_distance(&query_lower, &candidate_lower);
if distance < best_distance {
best_distance = distance;
best_match = Some(candidate_str);
}
}
let max_distance = if query.len() <= 3 {
1 } else if query.len() <= 5 {
2 } else {
3 };
if best_distance <= max_distance {
best_match
} else {
None
}
}
pub fn format_suggestion(suggestion: &str) -> String {
format!("Did you mean '{suggestion}'?")
}
pub fn format_suggestions(suggestions: &[&str]) -> String {
match suggestions.len() {
0 => String::new(),
1 => format_suggestion(suggestions[0]),
_ => {
let quoted: Vec<String> = suggestions.iter().map(|s| format!("'{s}'")).collect();
format!("Did you mean one of: {}?", quoted.join(", "))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_levenshtein_distance() {
assert_eq!(levenshtein_distance("", ""), 0);
assert_eq!(levenshtein_distance("a", ""), 1);
assert_eq!(levenshtein_distance("", "a"), 1);
assert_eq!(levenshtein_distance("abc", "abc"), 0);
assert_eq!(levenshtein_distance("abc", "abd"), 1);
assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
assert_eq!(levenshtein_distance("Person", "Peron"), 1);
assert_eq!(levenshtein_distance("Person", "person"), 1); }
#[test]
fn test_find_similar() {
let labels = ["Person", "Place", "Organization", "Event"];
assert_eq!(find_similar("Peron", &labels), Some("Person"));
assert_eq!(find_similar("person", &labels), Some("Person"));
assert_eq!(find_similar("PERSON", &labels), Some("Person"));
assert_eq!(find_similar("Plase", &labels), Some("Place"));
assert_eq!(find_similar("XYZ", &labels), None);
assert_eq!(find_similar("FooBar", &labels), None);
let empty: Vec<&str> = vec![];
assert_eq!(find_similar("Person", &empty), None);
}
#[test]
fn test_format_suggestion() {
assert_eq!(format_suggestion("Person"), "Did you mean 'Person'?");
}
#[test]
fn test_format_suggestions() {
assert_eq!(format_suggestions(&[]), "");
assert_eq!(format_suggestions(&["Person"]), "Did you mean 'Person'?");
assert_eq!(
format_suggestions(&["Person", "Place"]),
"Did you mean one of: 'Person', 'Place'?"
);
}
}