edit_distance/
lib.rs

1#![deny(missing_docs)]
2
3//! # Edit distance
4//!
5//! The [Levenshtein edit distance][wikipedia] between two strings is
6//! the number of individual single-character changes (insert, delete,
7//! substitute) necessary to change string `a` into `b`.
8//!
9//! This can be a used to order search results, for fuzzy
10//! auto-completion, and to find candidates for spelling correction.
11//!
12//! ## References
13//! [Wikipedia: Levenshtein distance][wikipedia]  
14//! [NIST: Levenshtein distance][nist]
15//!
16//! [wikipedia]: http://en.wikipedia.org/wiki/Levenshtein_distance
17//! [nist]: http://xlinux.nist.gov/dads/HTML/Levenshtein.html
18
19/// Returns the edit distance between strings `a` and `b`.
20///
21/// The runtime complexity is `O(m*n)`, where `m` and `n` are the
22/// strings' lengths.
23///
24/// # Examples
25///
26/// ```
27/// use edit_distance::edit_distance;
28///
29/// edit_distance("kitten", "sitting"); // => 3
30/// ```
31pub fn edit_distance(a: impl AsRef<str>, b: impl AsRef<str>) -> usize {
32    let len_a = a.as_ref().chars().count();
33    let len_b = b.as_ref().chars().count();
34    if len_a < len_b {
35        return edit_distance(b, a);
36    }
37    // handle special case of 0 length
38    if len_a == 0 {
39        return len_b;
40    } else if len_b == 0 {
41        return len_a;
42    }
43
44    let len_b = len_b + 1;
45
46    let mut pre;
47    let mut tmp;
48    let mut cur = vec![0; len_b];
49
50    // initialize string b
51    for i in 1..len_b {
52        cur[i] = i;
53    }
54
55    // calculate edit distance
56    for (i, ca) in a.as_ref().chars().enumerate() {
57        // get first column for this row
58        pre = cur[0];
59        cur[0] = i + 1;
60        for (j, cb) in b.as_ref().chars().enumerate() {
61            tmp = cur[j + 1];
62            cur[j + 1] = std::cmp::min(
63                // deletion
64                tmp + 1,
65                std::cmp::min(
66                    // insertion
67                    cur[j] + 1,
68                    // match or substitution
69                    pre + if ca == cb { 0 } else { 1 },
70                ),
71            );
72            pre = tmp;
73        }
74    }
75    cur[len_b - 1]
76}