agnostic_levenshtein/
lib.rs1#![forbid(unsafe_code)]
12#![deny(missing_docs)]
13#![warn(clippy::pedantic, clippy::nursery, clippy::cargo)]
14#![allow(clippy::cast_possible_truncation)]
15
16use std::mem::swap;
17
18#[must_use]
21pub fn edit_distance(a: &str, b: &str, ascii: bool) -> u32 {
22 if a == b {
24 return 0;
25 }
26
27 if a.is_empty() {
28 if ascii {
29 return b.len() as u32;
30 }
31 return b.chars().count() as u32;
32 }
33
34 if b.is_empty() {
35 if ascii {
36 return a.len() as u32;
37 }
38 return a.chars().count() as u32;
39 }
40
41 if ascii {
42 min_distance(a.as_bytes(), b.as_bytes())
43 } else {
44 let a_chars: Vec<char> = a.chars().collect();
45 let b_chars: Vec<char> = b.chars().collect();
46 min_distance(&a_chars, &b_chars)
47 }
48}
49
50fn min_distance<T: PartialEq>(a: &[T], b: &[T]) -> u32 {
51 let m = a.len();
53
54 let mut dp_prev: Vec<u32> = (0..=m as u32).collect();
57 let mut dp_curr: Vec<u32> = vec![0; m + 1];
58
59 for (i, b_char) in b.iter().enumerate() {
60 dp_curr[0] = i as u32 + 1;
62
63 for j in 1..=m {
64 if a[j - 1] == *b_char {
65 dp_curr[j] = dp_prev[j - 1];
66 continue;
67 }
68
69 let insert = dp_curr[j - 1] + 1;
70 let delete = dp_prev[j] + 1;
71 let substitute = dp_prev[j - 1] + 1;
72
73 dp_curr[j] = insert.min(delete).min(substitute);
74 }
75
76 swap(&mut dp_prev, &mut dp_curr);
78 }
79
80 dp_prev[m]
81}
82
83#[cfg(test)]
84mod tests {
85 use super::*;
86
87 #[test]
88 fn sitting_kitten() {
89 assert_eq!(edit_distance("sitting", "kitten", true), 3);
90 }
91
92 #[test]
93 fn ascii_no_difference() {
94 let a = "If I were a wise man";
95 let b = "I would do my part";
96 let ascii_dist = edit_distance(a, b, true);
97 let unicode_dist = edit_distance(a, b, false);
98 assert_eq!(ascii_dist, unicode_dist);
99 }
100
101 #[test]
102 fn accents_difference() {
103 let a = "ʿAlī ibn Abī Ṭālib";
104 let b = "ʿUthmān ibn ʿAffān";
105 let ascii_dist = edit_distance(a, b, true);
106 let unicode_dist = edit_distance(a, b, false);
107 assert_ne!(ascii_dist, unicode_dist);
108 }
109
110 #[test]
111 fn shahnama_unicode() {
112 let a = "شاهنامه";
113 let b = "شهنامه";
114 assert_eq!(edit_distance(a, b, false), 1);
115 }
116
117 #[test]
118 fn shahnama_ascii() {
119 let a = "شاهنامه";
120 let b = "شهنامه";
121 assert_eq!(edit_distance(a, b, true), 2);
122 }
123
124 #[test]
125 fn empty_ascii() {
126 let a = "levenshtein";
127 let b = "";
128 assert_eq!(edit_distance(a, b, true), 11);
129 }
130
131 #[test]
132 fn empty_unicode() {
133 let a = "maḥmūd";
134 let b = "";
135 assert_eq!(edit_distance(a, b, false), 6);
136 }
137
138 #[test]
139 fn equal_regardless() {
140 let a = "Ghiyāth al-Dīn";
141 let b = "Ghiyāth al-Dīn";
142 assert_eq!(edit_distance(a, b, true), 0);
143 }
144}