pub fn jaro(s1: &str, s2: &str) -> f64 {
if s1 == s2 || (s1.is_empty() && s2.is_empty()) {
return 1.0;
} else if s1.is_empty() || s2.is_empty() {
return 0.0;
}
let max_dist = (s1.len().max(s2.len()) / 2) - 1;
let mut matches = 0.0;
let s1 = s1.chars().collect::<Vec<_>>();
let s2 = s2.chars().collect::<Vec<_>>();
let mut s1_hash = vec![false; s1.len()];
let mut s2_hash = vec![false; s2.len()];
for (i, x) in s1.iter().enumerate() {
for (j, y) in s2.iter().enumerate() {
if x == y && i.max(j) - i.min(j) <= max_dist && !s1_hash[i] && !s2_hash[j] {
matches += 1.0;
s1_hash[i] = true;
s2_hash[j] = true;
}
}
}
if matches == 0.0 {
return 0.0;
}
let mut transpositions = 0.0;
let mut k = 0;
for i in 0..s1.len() {
if s1_hash[i] {
while !s2_hash[k] {
k += 1;
}
if s1[i] != s2[k] {
transpositions += 1.0;
}
k += 1;
}
}
transpositions /= 2.0;
((matches / s1.len() as f64)
+ (matches / s2.len() as f64)
+ ((matches - transpositions) / matches))
/ 3.0
}
pub fn jaro_winkler(s1: &str, s2: &str) -> f64 {
const MAX_PREFIX_LEN: usize = 4;
const SCALING_FACTOR: f64 = 0.1;
let jaro_dist = jaro(s1, s2);
let prefix_len = s1
.chars()
.zip(s2.chars())
.take_while(|(x, y)| x == y)
.count()
.min(MAX_PREFIX_LEN);
(prefix_len as f64 * SCALING_FACTOR).mul_add(1.0 - jaro_dist, jaro_dist)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_jaro() {
assert_eq!(jaro("fly", "ant"), 0.0);
assert_eq!(jaro("martha", "marhta"), 0.9444444444444445);
assert_eq!(jaro("dwayne", "duane"), 0.8222222222222223);
assert_eq!(jaro("dixon", "dicksonx"), 0.7666666666666666);
assert_eq!(jaro("jellyfish", "smellyfish"), 0.8962962962962964);
assert_eq!(jaro("FAREMVIEL", "FARMVILLE"), 0.8842592592592592);
}
#[test]
fn test_jaro_winkler() {
assert_eq!(jaro_winkler("fly", "ant"), 0.0);
assert_eq!(jaro_winkler("DwAyNE", "DuANE"), 0.8400000000000001);
assert_eq!(jaro_winkler("TRATE", "TRACE"), 0.9066666666666667);
}
}