use std::cmp;
pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
let n = a.chars().count();
let m = b.chars().count();
let min_dist = if n < m { m - n } else { n - m };
if min_dist > limit {
return None;
}
if n == 0 || m == 0 {
return if min_dist <= limit {
Some(min_dist)
} else {
None
};
}
let mut dcol: Vec<_> = (0..=m).collect();
for (i, sc) in a.chars().enumerate() {
let mut current = i;
dcol[0] = current + 1;
for (j, tc) in b.chars().enumerate() {
let next = dcol[j + 1];
if sc == tc {
dcol[j + 1] = current;
} else {
dcol[j + 1] = cmp::min(current, next);
dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
}
current = next;
}
}
if dcol[m] <= limit {
Some(dcol[m])
} else {
None
}
}
pub fn lev_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<usize> {
let n = a.chars().count();
let m = b.chars().count();
let big_len_diff = (n * 2) < m || (m * 2) < n;
let len_diff = if n < m { m - n } else { n - m };
let lev = lev_distance(a, b, limit + len_diff)?;
let score = lev - len_diff;
let score = if score == 0 && len_diff > 0 && !big_len_diff {
1 } else if !big_len_diff {
score + (len_diff + 1) / 2
} else {
score + len_diff
};
if score <= limit {
Some(score)
} else {
None
}
}
pub fn find_best_match_for_name_with_substrings<'c>(
candidates: &[&'c str],
lookup: &str,
dist: Option<usize>,
) -> Option<&'c str> {
find_best_match_for_name_impl(true, candidates, lookup, dist)
}
#[allow(dead_code)]
pub fn find_best_match_for_name<'c>(
candidates: &[&'c str],
lookup: &str,
dist: Option<usize>,
) -> Option<&'c str> {
find_best_match_for_name_impl(false, candidates, lookup, dist)
}
#[cold]
fn find_best_match_for_name_impl<'c>(
use_substring_score: bool,
candidates: &[&'c str],
lookup: &str,
dist: Option<usize>,
) -> Option<&'c str> {
let lookup_uppercase = lookup.to_uppercase();
if let Some(c) = candidates
.iter()
.find(|c| c.to_uppercase() == lookup_uppercase)
{
return Some(*c);
}
let mut dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
let mut best = None;
for c in candidates {
let lev_dist = if use_substring_score {
lev_distance_with_substrings(lookup, c, dist)
} else {
lev_distance(lookup, c, dist)
};
match lev_dist {
Some(0) => return Some(*c),
Some(d) => {
dist = d - 1;
best = Some(*c);
}
None => {}
}
}
if best.is_some() {
return best;
}
find_match_by_sorted_words(candidates, lookup)
}
fn find_match_by_sorted_words<'c>(iter_names: &[&'c str], lookup: &str) -> Option<&'c str> {
iter_names.iter().fold(None, |result, candidate| {
if sort_by_words(candidate) == sort_by_words(lookup) {
Some(*candidate)
} else {
result
}
})
}
fn sort_by_words(name: &str) -> String {
let mut split_words: Vec<&str> = name.split('_').collect();
split_words.sort_unstable();
split_words.join("_")
}