levenshtein_diff/
distance.rs1use std::cmp::{max, min};
2
3use crate::util::*;
4
5pub fn levenshtein_naive<T: PartialEq>(source: &[T], target: &[T]) -> usize {
30 if source.is_empty() || target.is_empty() {
32 return max(source.len(), target.len());
33 }
34
35 if source.last() == target.last() {
36 return levenshtein_naive(up_to_last(source), up_to_last(target));
38 }
39
40 let delete = levenshtein_naive(up_to_last(source), target) + 1;
43 let insert = levenshtein_naive(source, up_to_last(target)) + 1;
44 let substitute = levenshtein_naive(up_to_last(source), up_to_last(target)) + 1;
45
46 min(min(insert, delete), substitute)
47}
48
49pub fn levenshtein_tabulation<T: PartialEq>(source: &[T], target: &[T]) -> (usize, DistanceMatrix) {
72 let m = source.len();
73 let n = target.len();
74
75 let mut distances = get_distance_table(m, n);
77
78 for i in 1..distances.len() {
79 for j in 1..distances[0].len() {
80 if source[i - 1] == target[j - 1] {
81 distances[i][j] = distances[i - 1][j - 1];
83 continue;
84 }
85
86 let delete = distances[i - 1][j] + 1;
87 let insert = distances[i][j - 1] + 1;
88 let substitute = distances[i - 1][j - 1] + 1;
89
90 distances[i][j] = min(min(delete, insert), substitute);
91 }
92 }
93
94 (distances[m][n], distances)
95}
96
97pub fn levenshtein_memoization<T: PartialEq>(
120 source: &[T],
121 target: &[T],
122) -> (usize, DistanceMatrix) {
123 fn levenshtein_memoization_helper<T: PartialEq>(
124 source: &[T],
125 target: &[T],
126 distances: &mut DistanceMatrix,
127 ) -> usize {
128 if distances[source.len()][target.len()] < usize::MAX {
130 return distances[source.len()][target.len()];
131 }
132
133 if source.is_empty() || target.is_empty() {
135 return max(source.len(), target.len());
136 }
137
138 let k = if source.last() == target.last() { 0 } else { 1 };
141
142 let delete = levenshtein_memoization_helper(up_to_last(source), target, distances) + 1;
143 let insert = levenshtein_memoization_helper(source, up_to_last(target), distances) + 1;
144 let substitute =
145 levenshtein_memoization_helper(up_to_last(source), up_to_last(target), distances) + k;
146
147 let distance = min(min(delete, insert), substitute);
148
149 distances[source.len()][target.len()] = distance;
151
152 distance
153 }
154
155 let mut distances = get_distance_table(source.len(), target.len());
156
157 let distance = levenshtein_memoization_helper(source, target, &mut distances);
158
159 (distance, distances)
160}
161
162#[cfg(test)]
163mod tests {
164 use crate::distance::*;
165
166 #[test]
167 fn levenshtein_naive_test() {
168 let s1 = String::from("LAWN");
169 let s2 = String::from("FFLAWANN");
170 let expected_leven = 4;
171
172 let leven_naive = levenshtein_naive(s1.as_bytes(), s2.as_bytes());
173
174 assert_eq!(leven_naive, expected_leven);
175 }
176
177 #[test]
178 fn levenshtein_memoization_test() {
179 let s1 = String::from("LAWN");
180 let s2 = String::from("FFLAWANN");
181 let expected_leven = 4;
182
183 let (leven_memo, _) = levenshtein_memoization(s1.as_bytes(), s2.as_bytes());
184
185 assert_eq!(leven_memo, expected_leven);
186 }
187
188 #[test]
189 fn levenshtein_tabulation_test() {
190 let s1 = String::from("LAWN");
191 let s2 = String::from("FFLAWANN");
192 let expected_leven = 4;
193
194 let (leven_tab, _) = levenshtein_tabulation(s1.as_bytes(), s2.as_bytes());
195
196 assert_eq!(leven_tab, expected_leven);
197 }
198}