pub fn floor_char_boundary(s: &str, index: usize) -> usize {
if index >= s.len() {
s.len()
} else {
let mut i = index;
while i > 0 && !s.is_char_boundary(i) {
i -= 1;
}
i
}
}
pub fn edit_distance(a: &str, b: &str) -> usize {
let b: Vec<char> = b.chars().collect();
let mut prev: Vec<usize> = (0..=b.len()).collect();
let mut curr = vec![0usize; b.len() + 1];
for (i, ca) in a.chars().enumerate() {
curr[0] = i + 1;
for (j, &cb) in b.iter().enumerate() {
let cost = usize::from(ca != cb);
curr[j + 1] = (prev[j + 1] + 1).min(curr[j] + 1).min(prev[j] + cost);
}
std::mem::swap(&mut prev, &mut curr);
}
prev[b.len()]
}
pub fn closest_match<'a>(
got: &str,
candidates: impl IntoIterator<Item = &'a str>,
) -> Option<String> {
let got_lower = got.to_lowercase();
let mut best: Option<(usize, &str)> = None;
for c in candidates {
let d = edit_distance(&got_lower, &c.to_lowercase());
if d == 0 {
continue; }
match best {
Some((bd, _)) if d >= bd => {}
_ => best = Some((d, c)),
}
}
best.filter(|&(d, c)| {
let max_len = got.chars().count().max(c.chars().count());
d <= 2 && d * 2 < max_len + 1
})
.map(|(_, c)| c.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_edit_distance() {
assert_eq!(edit_distance("de", "de"), 0);
assert_eq!(edit_distance("ge", "de"), 1);
assert_eq!(edit_distance("", "abc"), 3);
assert_eq!(edit_distance("kitten", "sitting"), 3);
}
#[test]
fn test_closest_match() {
let langs = ["de", "fr", "ru", "ja", "zh", "es"];
assert_eq!(closest_match("ge", langs).as_deref(), Some("de")); assert_eq!(closest_match("rus", langs).as_deref(), Some("ru")); assert_eq!(closest_match("xx", langs), None); assert_eq!(closest_match("de", langs), None); assert_eq!(closest_match("DE", langs), None); assert_eq!(closest_match("zzzz", langs), None); }
#[test]
fn test_closest_match_guard_is_char_based() {
assert_eq!(closest_match("é", ["e", "a"]), None);
assert_eq!(edit_distance("café", "cafe"), 1); }
#[test]
fn test_floor_char_boundary_ascii() {
assert_eq!(floor_char_boundary("hello", 3), 3);
assert_eq!(floor_char_boundary("hello", 10), 5); assert_eq!(floor_char_boundary("hello", 0), 0);
}
#[test]
fn test_floor_char_boundary_multibyte() {
let s = "caf\u{00e9}"; assert_eq!(floor_char_boundary(s, 5), 5); assert_eq!(floor_char_boundary(s, 4), 3); assert_eq!(floor_char_boundary(s, 3), 3); }
#[test]
fn test_floor_char_boundary_cjk() {
let s = "\u{4e16}\u{754c}"; assert_eq!(floor_char_boundary(s, 6), 6);
assert_eq!(floor_char_boundary(s, 5), 3); assert_eq!(floor_char_boundary(s, 4), 3);
assert_eq!(floor_char_boundary(s, 3), 3);
assert_eq!(floor_char_boundary(s, 2), 0); assert_eq!(floor_char_boundary(s, 1), 0);
}
#[test]
fn test_floor_char_boundary_empty() {
assert_eq!(floor_char_boundary("", 0), 0);
assert_eq!(floor_char_boundary("", 5), 0);
}
}