use crate::string_utils;
use std::cmp::min;
pub fn weighted_levenshtein(a: &str, b: &str) -> usize {
let (mut longer, mut shorter) = if a.len() >= b.len() {
(a, b)
} else {
(b, a)
};
let string_affix = string_utils::StringAffix::find(&longer, &shorter);
let longer_len = string_affix.first_string_len;
let shorter_len = string_affix.second_string_len;
if shorter_len == 0 {
return longer_len;
}
let prefix_len = string_affix.prefix_len;
longer = &longer[prefix_len..prefix_len + longer_len];
shorter = &shorter[prefix_len..prefix_len + shorter_len];
let mut cache: Vec<usize> = (1..=longer_len).collect();
let mut result = longer_len;
for (i, a_elem) in shorter.chars().enumerate() {
result = i + 1;
let mut distance_b = i;
for (j, b_elem) in longer.chars().enumerate() {
if a_elem == b_elem {
result = distance_b - 1;
} else {
result += 1;
}
distance_b = cache[j] + 1;
result = result.min(distance_b);
cache[j] = result;
}
}
result
}
pub fn normalized_weighted_levenshtein(a: &str, b: &str) -> f32 {
if a == b {
return 1.0;
}
if a.is_empty() || b.is_empty() {
return 0.0;
}
let lensum = a.chars().count() + b.chars().count();
let distance = weighted_levenshtein(a, b);
1.0 - distance as f32 / lensum as f32
}
pub fn levenshtein(first_string: &str, second_string: &str) -> usize {
let (mut longer, mut shorter) = if first_string.len() >= second_string.len() {
(first_string, second_string)
} else {
(second_string, first_string)
};
let string_affix = string_utils::StringAffix::find(&longer, &shorter);
let longer_len = string_affix.first_string_len;
let shorter_len = string_affix.second_string_len;
if shorter_len == 0 {
return longer_len;
}
let prefix_len = string_affix.prefix_len;
longer = &longer[prefix_len..prefix_len + longer_len];
shorter = &shorter[prefix_len..prefix_len + shorter_len];
let mut cache: Vec<usize> = (1..=longer_len).collect();
let mut result = longer_len;
for (i, a_elem) in shorter.chars().enumerate() {
result = i + 1;
let mut distance_b = i;
for (j, b_elem) in longer.chars().enumerate() {
let distance_a = distance_b + (a_elem != b_elem) as usize;
distance_b = cache[j];
result = min(result + 1, min(distance_a, distance_b + 1));
cache[j] = result;
}
}
result
}
pub fn normalized_levenshtein(a: &str, b: &str) -> f32 {
if a == b {
return 1.0;
}
if a.is_empty() || b.is_empty() {
return 0.0;
}
let max_len = a.chars().count().max(b.chars().count());
let distance = levenshtein(a, b);
(1 - distance) as f32 / max_len as f32
}