texcraft_stdext/algorithms/
spellcheck.rs1use crate::collections::circularbuffer::CircularBuffer;
64use colored::*;
65use std::ops::Index;
66
67pub fn find_close_words(dictionary: Vec<String>, word: &str) -> Vec<WordDiff> {
72 let size_hint = dictionary.len();
74 let mut comparisons = Vec::with_capacity(size_hint);
79 for valid_word in dictionary {
80 let comparison = levenshtein_distance(word, valid_word.as_str()); comparisons.push(comparison);
82 }
83 comparisons.sort_by(|a, b| a.distance.cmp(&b.distance));
84 comparisons
85}
86
87fn levenshtein_distance(a: &str, b: &str) -> WordDiff {
88 let a: Vec<char> = a.chars().collect();
89 let b: Vec<char> = b.chars().collect();
90
91 let m = b.len();
92 let mut c = CircularBuffer::new(m + 2);
93 let idx_modify = m + 1;
94 let idx_subtract = m;
95 let idx_add = 0;
96
97 c.push(WordDiff {
99 distance: 0,
100 ops: Vec::with_capacity(std::cmp::max(a.len(), b.len())),
101 });
102
103 for b_j in &b {
104 let mut cmp = c.clone_to_front(idx_add);
107 cmp.ops.push(DiffOp::Add(*b_j));
108 cmp.distance += 1;
109 }
110
111 for a_i in &a {
112 let mut cmp = c.clone_to_front(idx_subtract);
116 cmp.ops.push(DiffOp::Subtract(*a_i));
117 cmp.distance += 1;
118
119 for b_j in &b {
120 let (idx_to_clone, diff, distance_delta) = if a_i == b_j {
122 (idx_modify, DiffOp::Keep(*a_i), 0)
123 } else {
124 let cost_modify = c.index(idx_modify).distance;
125 let cost_add = c.index(idx_add).distance;
126 let cost_subtract = c.index(idx_subtract).distance;
127 if cost_modify <= std::cmp::min(cost_subtract, cost_add) {
128 (idx_modify, DiffOp::Modify(*a_i, *b_j), 1)
129 } else if cost_subtract <= std::cmp::min(cost_modify, cost_add) {
130 (idx_subtract, DiffOp::Subtract(*a_i), 1)
131 } else {
132 (idx_add, DiffOp::Add(*b_j), 1)
133 }
134 };
135
136 let mut cmp = c.clone_to_front(idx_to_clone);
137 cmp.ops.push(diff);
138 cmp.distance += distance_delta;
139 }
140 }
141 c.index(0).clone()
142}
143
144#[derive(Debug, Eq, PartialEq, Copy, Clone)]
145pub enum DiffOp {
146 Keep(char),
147 Add(char),
148 Subtract(char),
149 Modify(char, char),
150}
151
152impl DiffOp {
153 #[cfg(test)]
154 fn invert(&self) -> DiffOp {
155 match self {
156 DiffOp::Keep(c) => DiffOp::Keep(*c),
157 DiffOp::Add(c) => DiffOp::Subtract(*c),
158 DiffOp::Subtract(c) => DiffOp::Add(*c),
159 DiffOp::Modify(a, b) => DiffOp::Modify(*b, *a),
160 }
161 }
162
163 #[cfg(test)]
164 fn distance(&self) -> usize {
165 match self {
166 DiffOp::Keep(_) => 0,
167 DiffOp::Add(_) => 1,
168 DiffOp::Subtract(_) => 1,
169 DiffOp::Modify(_, _) => 1,
170 }
171 }
172
173 fn left(&self) -> Option<char> {
174 match self {
175 DiffOp::Keep(c) => Some(*c),
176 DiffOp::Add(_) => None,
177 DiffOp::Subtract(c) => Some(*c),
178 DiffOp::Modify(c, _) => Some(*c),
179 }
180 }
181
182 fn right(&self) -> Option<char> {
183 match self {
184 DiffOp::Keep(c) => Some(*c),
185 DiffOp::Add(c) => Some(*c),
186 DiffOp::Subtract(_) => None,
187 DiffOp::Modify(_, c) => Some(*c),
188 }
189 }
190
191 fn colored(&self) -> colored::ColoredString {
192 match self {
193 DiffOp::Keep(c) => c.to_string().normal(),
194 DiffOp::Add(c) => c.to_string().on_green(), DiffOp::Subtract(c) => c.to_string().on_red(),
196 DiffOp::Modify(_, c) => c.to_string().on_yellow(), }
198 }
199}
200
201#[derive(Debug, Eq, PartialEq)]
202pub struct WordDiff {
203 distance: usize,
204 ops: Vec<DiffOp>,
205}
206
207impl WordDiff {
208 pub fn left(&self) -> String {
209 self.ops.iter().filter_map(DiffOp::left).collect()
210 }
211
212 pub fn right(&self) -> String {
213 self.ops.iter().filter_map(DiffOp::right).collect()
214 }
215
216 pub fn colored(&self) -> String {
217 let mut s = String::default();
218 for diff in self.ops.iter() {
219 s = format!["{}{}", s, diff.colored()];
220 }
221 s
222 }
223}
224
225impl Clone for WordDiff {
226 fn clone(&self) -> Self {
227 let mut cmp = WordDiff {
228 distance: self.distance,
229 ops: Vec::with_capacity(self.ops.capacity()),
230 };
231 cmp.ops.clone_from(&self.ops);
232 cmp
233 }
234
235 fn clone_from(&mut self, source: &Self) {
236 self.distance = source.distance;
237 self.ops.clear();
238 for diff in source.ops.iter() {
239 self.ops.push(*diff);
240 }
241 }
242}
243
244#[cfg(test)]
245mod tests {
246 use super::*;
247
248 macro_rules! levenshtein_tests {
249 ($( ($name: ident, $a: expr, $b: expr, $i: expr),)+) => {
250 $(
251 #[test]
252 fn $name() {
253 let mut cmp = WordDiff {
254 distance: 0,
255 ops: $i,
256 };
257 for diff in cmp.ops.iter() {
258 cmp.distance += diff.distance();
259 }
260 assert_eq![levenshtein_distance($a, $b), cmp];
261
262 let mut inverse_diffs = Vec::new();
263 for diff in cmp.ops.iter() {
264 inverse_diffs.push(diff.invert());
265 }
266 let cmp = WordDiff {
267 distance: cmp.distance,
268 ops: inverse_diffs,
269 };
270 assert_eq![levenshtein_distance($b, $a), cmp];
271 }
272 )+
273 };
274 }
275
276 levenshtein_tests![
277 (case_1, "", "", Vec::<DiffOp>::new()),
278 (case_2, "a", "", vec![DiffOp::Subtract('a')]),
279 (case_3, "a", "a", vec![DiffOp::Keep('a')]),
280 (case_4, "a", "b", vec![DiffOp::Modify('a', 'b')]),
281 (
282 case_5,
283 "aa",
284 "a",
285 vec![DiffOp::Subtract('a'), DiffOp::Keep('a')]
286 ),
287 (
288 case_6,
289 "aa",
290 "ab",
291 vec![DiffOp::Keep('a'), DiffOp::Modify('a', 'b')]
292 ),
293 (
294 case_7,
295 "abb",
296 "acbb",
297 vec![
298 DiffOp::Keep('a'),
299 DiffOp::Add('c'),
300 DiffOp::Keep('b'),
301 DiffOp::Keep('b'),
302 ]
303 ),
304 (
305 case_8,
306 "aabb",
307 "abb",
308 vec![
309 DiffOp::Subtract('a'),
310 DiffOp::Keep('a'),
311 DiffOp::Keep('b'),
312 DiffOp::Keep('b'),
313 ]
314 ),
315 (
316 case_9,
317 "james",
318 "laura",
319 vec![
320 DiffOp::Modify('j', 'l'),
321 DiffOp::Keep('a'),
322 DiffOp::Modify('m', 'u'),
323 DiffOp::Modify('e', 'r'),
324 DiffOp::Modify('s', 'a'),
325 ]
326 ),
327 (
328 case_10,
329 "ab12345e",
330 "a12345de",
331 vec![
332 DiffOp::Keep('a'),
333 DiffOp::Subtract('b'),
334 DiffOp::Keep('1'),
335 DiffOp::Keep('2'),
336 DiffOp::Keep('3'),
337 DiffOp::Keep('4'),
338 DiffOp::Keep('5'),
339 DiffOp::Add('d'),
340 DiffOp::Keep('e'),
341 ]
342 ),
343 ];
344
345 #[test]
346 fn find_close_words_test() {
347 let dictionary = vec!["james".to_string(), "laura".to_string(), "mint".to_string()];
348 let word = "janes";
349 let result = find_close_words(dictionary, &word);
350
351 assert_eq![result[0].right(), "james"];
352 assert_eq![result[1].right(), "laura"];
353 assert_eq![result[2].right(), "mint"];
354 assert_eq![result.len(), 3];
355 }
356}