use std::collections::HashMap;
use std::char;
use utils;
pub fn damerau_levenshtein(s: &str, t: &str) -> usize {
let len_s = s.chars().count();
let len_t = t.chars().count();
let max_distance = len_t + len_s;
let mut mat: Vec<Vec<usize>> = vec![vec![0; len_t + 2]; len_s + 2];
mat[0][0] = max_distance;
for i in 0..(len_s + 1) {
mat[i+1][0] = max_distance;
mat[i+1][1] = i;
}
for i in 0..(len_t + 1) {
mat[0][i+1] = max_distance;
mat[1][i+1] = i;
}
let mut char_map: HashMap<char, usize> = HashMap::new();
for (i, s_char) in s.chars().enumerate() {
let mut db = 0;
let i = i + 1;
for (j, t_char) in t.chars().enumerate() {
let j = j + 1;
let last = *char_map.get(&t_char).unwrap_or(&0);
let cost = if s_char == t_char { 0 } else { 1 };
mat[i+1][j+1] = utils::min4(
mat[i+1][j] + 1, mat[i][j+1] + 1, mat[i][j] + cost, mat[last][db] + (i - last - 1) + 1 + (j - db - 1) );
if cost == 0 {
db = j;
}
}
char_map.insert(s_char, i);
}
return mat[len_s + 1][len_t + 1];
}
#[cfg(test)]
mod tests {
use super::damerau_levenshtein;
#[test]
fn basic() {
assert_eq!(1, damerau_levenshtein("hannah", "hannha"));
assert_eq!(2, damerau_levenshtein("FOO", "BOR"));
assert_eq!(1, damerau_levenshtein("BAR", "BOR"));
assert_eq!(1, damerau_levenshtein("hansi", "hasni"));
assert_eq!(2, damerau_levenshtein("zzaabbio", "zzababoi"));
assert_eq!(1, damerau_levenshtein("zzaabb", "zzabab"));
assert_eq!(3, damerau_levenshtein("abcdef", "badcfe"));
assert_eq!(1, damerau_levenshtein("klmb", "klm"));
assert_eq!(1, damerau_levenshtein("klm", "klmb"));
}
#[test]
fn empty() {
assert_eq!(0, damerau_levenshtein("", ""));
assert_eq!(3, damerau_levenshtein("", "foo"));
assert_eq!(3, damerau_levenshtein("bar", ""));
}
#[test]
fn cases() {
assert_eq!(1, damerau_levenshtein("Hello", "hello"));
assert_eq!(1, damerau_levenshtein("World", "world"));
}
#[test]
fn same() {
assert_eq!(0, damerau_levenshtein("pomme de terre", "pomme de terre"));
}
#[test]
fn reversed() {
assert_eq!(5, damerau_levenshtein("damerau", "iiqjau"));
assert_eq!(5, damerau_levenshtein("iiqjau", "damerau"));
}
#[test]
fn lorem() {
assert_eq!(80, damerau_levenshtein(
"Lorem ipsum dolor sit amet, autem mucius eirmod ea per. Nec adhuc laudem id, vix dolor interesset ea.",
"Id mundi ponderum constituam nam. Nam in legendos definiebas. Pri commune senserit omittantur cu.")
);
}
#[test]
fn unicode() {
assert_eq!(6, damerau_levenshtein("さようなら", "༆˥ʘöpa"));
assert_eq!(3, damerau_levenshtein("abc", "öঙ香"));
assert_eq!(3, damerau_levenshtein("", "öঙ香"));
assert_eq!(1, damerau_levenshtein("よさ", "äさ"));
}
}