1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
#![deny(missing_docs)] //! # Edit distance //! //! The [Levenshtein edit distance][wikipedia] between two strings is //! the number of individual single-character changes (insert, delete, //! substitute) necessary to change string `a` into `b`. //! //! This can be a used to order search results, for fuzzy //! auto-completion, and to find candidates for spelling correction. //! //! ## References //! [Wikipedia: Levenshtein distance][wikipedia] //! [NIST: Levenshtein distance][nist] //! //! [wikipedia]: http://en.wikipedia.org/wiki/Levenshtein_distance //! [nist]: http://xlinux.nist.gov/dads/HTML/Levenshtein.html /// Returns the edit distance between strings `a` and `b`. /// /// The runtime complexity is `O(m*n)`, where `m` and `n` are the /// strings' lengths. /// /// # Examples /// /// ``` /// use edit_distance::edit_distance; /// /// edit_distance("kitten", "sitting"); // => 3 /// ``` pub fn edit_distance(a: &str, b: &str) -> usize { let len_a = a.chars().count(); let len_b = b.chars().count(); if len_a < len_b{ return edit_distance(b, a) } // handle special case of 0 length if len_a == 0 { return len_b } else if len_b == 0 { return len_a } let len_b = len_b + 1; let mut pre; let mut tmp; let mut cur = vec![0; len_b]; // initialize string b for i in 1..len_b { cur[i] = i; } // calculate edit distance for (i,ca) in a.chars().enumerate() { // get first column for this row pre = cur[0]; cur[0] = i + 1; for (j, cb) in b.chars().enumerate() { tmp = cur[j + 1]; cur[j + 1] = std::cmp::min( // deletion tmp + 1, std::cmp::min( // insertion cur[j] + 1, // match or substitution pre + if ca == cb { 0 } else { 1 })); pre = tmp; } } cur[len_b - 1] }