edit_distance/lib.rs
1#![deny(missing_docs)]
2
3//! # Edit distance
4//!
5//! The [Levenshtein edit distance][wikipedia] between two strings is
6//! the number of individual single-character changes (insert, delete,
7//! substitute) necessary to change string `a` into `b`.
8//!
9//! This can be a used to order search results, for fuzzy
10//! auto-completion, and to find candidates for spelling correction.
11//!
12//! ## References
13//! [Wikipedia: Levenshtein distance][wikipedia]
14//! [NIST: Levenshtein distance][nist]
15//!
16//! [wikipedia]: http://en.wikipedia.org/wiki/Levenshtein_distance
17//! [nist]: http://xlinux.nist.gov/dads/HTML/Levenshtein.html
18
19/// Returns the edit distance between strings `a` and `b`.
20///
21/// The runtime complexity is `O(m*n)`, where `m` and `n` are the
22/// strings' lengths.
23///
24/// # Examples
25///
26/// ```
27/// use edit_distance::edit_distance;
28///
29/// edit_distance("kitten", "sitting"); // => 3
30/// ```
31pub fn edit_distance(a: impl AsRef<str>, b: impl AsRef<str>) -> usize {
32 let len_a = a.as_ref().chars().count();
33 let len_b = b.as_ref().chars().count();
34 if len_a < len_b {
35 return edit_distance(b, a);
36 }
37 // handle special case of 0 length
38 if len_a == 0 {
39 return len_b;
40 } else if len_b == 0 {
41 return len_a;
42 }
43
44 let len_b = len_b + 1;
45
46 let mut pre;
47 let mut tmp;
48 let mut cur = vec![0; len_b];
49
50 // initialize string b
51 for i in 1..len_b {
52 cur[i] = i;
53 }
54
55 // calculate edit distance
56 for (i, ca) in a.as_ref().chars().enumerate() {
57 // get first column for this row
58 pre = cur[0];
59 cur[0] = i + 1;
60 for (j, cb) in b.as_ref().chars().enumerate() {
61 tmp = cur[j + 1];
62 cur[j + 1] = std::cmp::min(
63 // deletion
64 tmp + 1,
65 std::cmp::min(
66 // insertion
67 cur[j] + 1,
68 // match or substitution
69 pre + if ca == cb { 0 } else { 1 },
70 ),
71 );
72 pre = tmp;
73 }
74 }
75 cur[len_b - 1]
76}