pub(crate) fn levenshtein(a: &str, b: &str, max: usize) -> usize {
let a: Vec<char> = a.chars().collect();
let b: Vec<char> = b.chars().collect();
if a.len().abs_diff(b.len()) > max {
return usize::MAX;
}
let mut prev: Vec<usize> = (0..=b.len()).collect();
let mut curr = vec![0usize; b.len() + 1];
for (i, ca) in a.iter().enumerate() {
curr[0] = i + 1;
let mut row_min = curr[0];
for (j, cb) in b.iter().enumerate() {
let cost = usize::from(ca != cb);
curr[j + 1] = (prev[j] + cost).min(prev[j + 1] + 1).min(curr[j] + 1);
row_min = row_min.min(curr[j + 1]);
}
if row_min > max {
return usize::MAX;
}
std::mem::swap(&mut prev, &mut curr);
}
let dist = prev[b.len()];
if dist > max { usize::MAX } else { dist }
}
pub(crate) fn nearest_match<'a>(
target: &str,
candidates: impl IntoIterator<Item = &'a str>,
max_distance: usize,
) -> Option<&'a str> {
let mut best: Option<(usize, &'a str)> = None;
for cand in candidates {
let d = levenshtein(target, cand, max_distance);
if d == 0 || d == usize::MAX {
continue;
}
let better = match best {
None => true,
Some((bd, bc)) => d < bd || (d == bd && cand < bc),
};
if better {
best = Some((d, cand));
}
}
best.map(|(_, c)| c)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn levenshtein_caps() {
assert_eq!(levenshtein("format", "format", 2), 0);
assert_eq!(levenshtein("forrmat", "format", 2), 1);
assert_eq!(levenshtein("abcdefg", "xyz", 2), usize::MAX);
assert_eq!(levenshtein("cran", "brand", 1), usize::MAX);
}
#[test]
fn nearest_match_skips_exact_and_over_budget() {
assert_eq!(nearest_match("pdf", ["pdf", "pdfa"], 1), Some("pdfa"));
assert_eq!(nearest_match("cran", ["brand"], 1), None);
}
#[test]
fn nearest_match_breaks_ties_alphabetically() {
assert_eq!(
nearest_match("ellips", ["vellip", "hellip"], 2),
Some("hellip")
);
}
}