texcraft_stdext/algorithms/
spellcheck.rs1use crate::collections::circularbuffer::CircularBuffer;
64use colored::*;
65use std::ops::Index;
66
67pub fn find_close_words(dictionary: Vec<String>, word: &str) -> Vec<WordDiff> {
72    let size_hint = dictionary.len();
74    let mut comparisons = Vec::with_capacity(size_hint);
79    for valid_word in dictionary {
80        let comparison = levenshtein_distance(word, valid_word.as_str()); comparisons.push(comparison);
82    }
83    comparisons.sort_by(|a, b| a.distance.cmp(&b.distance));
84    comparisons
85}
86
87fn levenshtein_distance(a: &str, b: &str) -> WordDiff {
88    let a: Vec<char> = a.chars().collect();
89    let b: Vec<char> = b.chars().collect();
90
91    let m = b.len();
92    let mut c = CircularBuffer::new(m + 2);
93    let idx_modify = m + 1;
94    let idx_subtract = m;
95    let idx_add = 0;
96
97    c.push(WordDiff {
99        distance: 0,
100        ops: Vec::with_capacity(std::cmp::max(a.len(), b.len())),
101    });
102
103    for b_j in &b {
104        let mut cmp = c.clone_to_front(idx_add);
107        cmp.ops.push(DiffOp::Add(*b_j));
108        cmp.distance += 1;
109    }
110
111    for a_i in &a {
112        let mut cmp = c.clone_to_front(idx_subtract);
116        cmp.ops.push(DiffOp::Subtract(*a_i));
117        cmp.distance += 1;
118
119        for b_j in &b {
120            let (idx_to_clone, diff, distance_delta) = if a_i == b_j {
122                (idx_modify, DiffOp::Keep(*a_i), 0)
123            } else {
124                let cost_modify = c.index(idx_modify).distance;
125                let cost_add = c.index(idx_add).distance;
126                let cost_subtract = c.index(idx_subtract).distance;
127                if cost_modify <= std::cmp::min(cost_subtract, cost_add) {
128                    (idx_modify, DiffOp::Modify(*a_i, *b_j), 1)
129                } else if cost_subtract <= std::cmp::min(cost_modify, cost_add) {
130                    (idx_subtract, DiffOp::Subtract(*a_i), 1)
131                } else {
132                    (idx_add, DiffOp::Add(*b_j), 1)
133                }
134            };
135
136            let mut cmp = c.clone_to_front(idx_to_clone);
137            cmp.ops.push(diff);
138            cmp.distance += distance_delta;
139        }
140    }
141    c.index(0).clone()
142}
143
144#[derive(Debug, Eq, PartialEq, Copy, Clone)]
145pub enum DiffOp {
146    Keep(char),
147    Add(char),
148    Subtract(char),
149    Modify(char, char),
150}
151
152impl DiffOp {
153    #[cfg(test)]
154    fn invert(&self) -> DiffOp {
155        match self {
156            DiffOp::Keep(c) => DiffOp::Keep(*c),
157            DiffOp::Add(c) => DiffOp::Subtract(*c),
158            DiffOp::Subtract(c) => DiffOp::Add(*c),
159            DiffOp::Modify(a, b) => DiffOp::Modify(*b, *a),
160        }
161    }
162
163    #[cfg(test)]
164    fn distance(&self) -> usize {
165        match self {
166            DiffOp::Keep(_) => 0,
167            DiffOp::Add(_) => 1,
168            DiffOp::Subtract(_) => 1,
169            DiffOp::Modify(_, _) => 1,
170        }
171    }
172
173    fn left(&self) -> Option<char> {
174        match self {
175            DiffOp::Keep(c) => Some(*c),
176            DiffOp::Add(_) => None,
177            DiffOp::Subtract(c) => Some(*c),
178            DiffOp::Modify(c, _) => Some(*c),
179        }
180    }
181
182    fn right(&self) -> Option<char> {
183        match self {
184            DiffOp::Keep(c) => Some(*c),
185            DiffOp::Add(c) => Some(*c),
186            DiffOp::Subtract(_) => None,
187            DiffOp::Modify(_, c) => Some(*c),
188        }
189    }
190
191    fn colored(&self) -> colored::ColoredString {
192        match self {
193            DiffOp::Keep(c) => c.to_string().normal(),
194            DiffOp::Add(c) => c.to_string().on_green(), DiffOp::Subtract(c) => c.to_string().on_red(),
196            DiffOp::Modify(_, c) => c.to_string().on_yellow(), }
198    }
199}
200
201#[derive(Debug, Eq, PartialEq)]
202pub struct WordDiff {
203    distance: usize,
204    ops: Vec<DiffOp>,
205}
206
207impl WordDiff {
208    pub fn left(&self) -> String {
209        self.ops.iter().filter_map(DiffOp::left).collect()
210    }
211
212    pub fn right(&self) -> String {
213        self.ops.iter().filter_map(DiffOp::right).collect()
214    }
215
216    pub fn colored(&self) -> String {
217        let mut s = String::default();
218        for diff in self.ops.iter() {
219            s = format!["{}{}", s, diff.colored()];
220        }
221        s
222    }
223}
224
225impl Clone for WordDiff {
226    fn clone(&self) -> Self {
227        let mut cmp = WordDiff {
228            distance: self.distance,
229            ops: Vec::with_capacity(self.ops.capacity()),
230        };
231        cmp.ops.clone_from(&self.ops);
232        cmp
233    }
234
235    fn clone_from(&mut self, source: &Self) {
236        self.distance = source.distance;
237        self.ops.clear();
238        for diff in source.ops.iter() {
239            self.ops.push(*diff);
240        }
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247
248    macro_rules! levenshtein_tests {
249        ($( ($name: ident, $a: expr, $b: expr, $i: expr),)+) => {
250            $(
251            #[test]
252            fn $name() {
253                let mut cmp = WordDiff {
254                    distance: 0,
255                    ops: $i,
256                };
257                for diff in cmp.ops.iter() {
258                    cmp.distance += diff.distance();
259                }
260                assert_eq![levenshtein_distance($a, $b), cmp];
261
262                let mut inverse_diffs = Vec::new();
263                for diff in cmp.ops.iter() {
264                    inverse_diffs.push(diff.invert());
265                }
266                let cmp = WordDiff {
267                    distance: cmp.distance,
268                    ops: inverse_diffs,
269                };
270                assert_eq![levenshtein_distance($b, $a), cmp];
271            }
272            )+
273        };
274    }
275
276    levenshtein_tests![
277        (case_1, "", "", Vec::<DiffOp>::new()),
278        (case_2, "a", "", vec![DiffOp::Subtract('a')]),
279        (case_3, "a", "a", vec![DiffOp::Keep('a')]),
280        (case_4, "a", "b", vec![DiffOp::Modify('a', 'b')]),
281        (
282            case_5,
283            "aa",
284            "a",
285            vec![DiffOp::Subtract('a'), DiffOp::Keep('a')]
286        ),
287        (
288            case_6,
289            "aa",
290            "ab",
291            vec![DiffOp::Keep('a'), DiffOp::Modify('a', 'b')]
292        ),
293        (
294            case_7,
295            "abb",
296            "acbb",
297            vec![
298                DiffOp::Keep('a'),
299                DiffOp::Add('c'),
300                DiffOp::Keep('b'),
301                DiffOp::Keep('b'),
302            ]
303        ),
304        (
305            case_8,
306            "aabb",
307            "abb",
308            vec![
309                DiffOp::Subtract('a'),
310                DiffOp::Keep('a'),
311                DiffOp::Keep('b'),
312                DiffOp::Keep('b'),
313            ]
314        ),
315        (
316            case_9,
317            "james",
318            "laura",
319            vec![
320                DiffOp::Modify('j', 'l'),
321                DiffOp::Keep('a'),
322                DiffOp::Modify('m', 'u'),
323                DiffOp::Modify('e', 'r'),
324                DiffOp::Modify('s', 'a'),
325            ]
326        ),
327        (
328            case_10,
329            "ab12345e",
330            "a12345de",
331            vec![
332                DiffOp::Keep('a'),
333                DiffOp::Subtract('b'),
334                DiffOp::Keep('1'),
335                DiffOp::Keep('2'),
336                DiffOp::Keep('3'),
337                DiffOp::Keep('4'),
338                DiffOp::Keep('5'),
339                DiffOp::Add('d'),
340                DiffOp::Keep('e'),
341            ]
342        ),
343    ];
344
345    #[test]
346    fn find_close_words_test() {
347        let dictionary = vec!["james".to_string(), "laura".to_string(), "mint".to_string()];
348        let word = "janes";
349        let result = find_close_words(dictionary, &word);
350
351        assert_eq![result[0].right(), "james"];
352        assert_eq![result[1].right(), "laura"];
353        assert_eq![result[2].right(), "mint"];
354        assert_eq![result.len(), 3];
355    }
356}