#[must_use]
pub fn levenshtein(a: &str, b: &str) -> usize {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
let (n, m) = (a_chars.len(), b_chars.len());
if n == 0 {
return m;
}
if m == 0 {
return n;
}
let mut prev: Vec<usize> = (0..=m).collect();
let mut curr: Vec<usize> = vec![0; m + 1];
for i in 1..=n {
curr[0] = i;
for j in 1..=m {
let cost = if a_chars[i - 1] == b_chars[j - 1] {
0
} else {
1
};
curr[j] = (prev[j] + 1) .min(curr[j - 1] + 1) .min(prev[j - 1] + cost); }
std::mem::swap(&mut prev, &mut curr);
}
prev[m]
}
#[must_use]
pub fn suggest_builtin(word: &str, max_distance: usize) -> Vec<&'static str> {
suggest(word, crate::spell_dict::dict(), max_distance)
}
#[must_use]
pub fn suggest<'a, I, S>(word: &str, dict: I, max_distance: usize) -> Vec<&'a str>
where
I: IntoIterator<Item = &'a S>,
S: AsRef<str> + 'a + ?Sized,
{
let mut hits: Vec<(usize, &'a str)> = dict
.into_iter()
.map(|w| w.as_ref())
.filter_map(|w| {
let d = levenshtein(word, w);
(d <= max_distance).then_some((d, w))
})
.collect();
hits.sort_by_key(|(d, _)| *d);
hits.into_iter().map(|(_, w)| w).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn distance_zero_for_equal() {
assert_eq!(levenshtein("کتاب", "کتاب"), 0);
assert_eq!(levenshtein("", ""), 0);
}
#[test]
fn distance_simple_edits() {
assert_eq!(levenshtein("کتاب", "کتیب"), 1);
assert_eq!(levenshtein("سلام", "سلامتی"), 2);
assert_eq!(levenshtein("kitten", "sitting"), 3);
}
#[test]
fn distance_empty_inputs() {
assert_eq!(levenshtein("", "abc"), 3);
assert_eq!(levenshtein("abc", ""), 3);
}
#[test]
fn suggest_basic() {
let dict = ["سلام", "کتاب", "سفر"];
let s = suggest("سلامی", &dict, 1);
assert_eq!(s, vec!["سلام"]);
}
#[test]
fn suggest_sorted_by_distance() {
let dict = ["سفر", "سفره", "سلام"];
let s = suggest("سفری", &dict, 2);
assert_eq!(s[0], "سفر");
}
#[test]
fn suggest_no_matches() {
let dict = ["alpha", "beta"];
assert!(suggest("سلام", &dict, 1).is_empty());
}
}