agnostic_levenshtein/
lib.rs#![forbid(unsafe_code)]
#![deny(missing_docs)]
#![warn(clippy::pedantic, clippy::nursery, clippy::cargo)]
#![allow(clippy::cast_possible_truncation)]
#[must_use]
pub fn edit_distance(a: &str, b: &str, ascii: bool) -> u32 {
if a == b {
0
} else if ascii {
min_distance(a.as_bytes(), b.as_bytes())
} else {
let a_chars: Vec<char> = a.chars().collect();
let b_chars: Vec<char> = b.chars().collect();
min_distance(&a_chars, &b_chars)
}
}
fn min_distance<T: PartialEq>(a: &[T], b: &[T]) -> u32 {
let m = a.len();
let n = b.len();
if m == 0 {
return n as u32;
}
if n == 0 {
return m as u32;
}
let mut dp: Vec<u32> = (1..).take(m).collect();
for (row, char_b) in b.iter().enumerate() {
let mut left = row as u32;
let mut diag = row as u32;
for (col, char_a) in a.iter().enumerate() {
let insert = left + 1;
let delete = dp[col] + 1;
let subst = if char_a == char_b { diag } else { diag + 1 };
let min_cost = insert.min(delete).min(subst);
diag = dp[col]; left = min_cost;
dp[col] = min_cost;
}
}
dp[m - 1]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sitting_kitten() {
assert_eq!(edit_distance("sitting", "kitten", true), 3);
}
#[test]
fn ascii_no_difference() {
let a = "If I were a wise man";
let b = "I would do my part";
let ascii_dist = edit_distance(a, b, true);
let unicode_dist = edit_distance(a, b, false);
assert_eq!(ascii_dist, unicode_dist);
}
#[test]
fn accents_difference() {
let a = "ʿAlī ibn Abī Ṭālib";
let b = "ʿUthmān ibn ʿAffān";
let ascii_dist = edit_distance(a, b, true);
let unicode_dist = edit_distance(a, b, false);
assert_ne!(ascii_dist, unicode_dist);
}
#[test]
fn shahnama_unicode() {
let a = "شاهنامه";
let b = "شهنامه";
assert_eq!(edit_distance(a, b, false), 1);
}
#[test]
fn shahnama_ascii() {
let a = "شاهنامه";
let b = "شهنامه";
assert_eq!(edit_distance(a, b, true), 2);
}
#[test]
fn empty_ascii() {
let a = "levenshtein";
let b = "";
assert_eq!(edit_distance(a, b, true), 11);
}
#[test]
fn empty_unicode() {
let a = "maḥmūd";
let b = "";
assert_eq!(edit_distance(a, b, false), 6);
}
#[test]
fn equal_regardless() {
let a = "Ghiyāth al-Dīn";
let b = "Ghiyāth al-Dīn";
assert_eq!(edit_distance(a, b, true), 0);
}
}