1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
// Copyright (c) 2016. See AUTHORS file.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use utils;

/// Calculates the Levenshtein distance between two strings.
/// 
/// # Levenshtein distance
/// The [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) is the number of per-character changes (insertion, deletion & substitution)
/// that are neccessary to convert one string into annother.
/// This implementation does fully support unicode strings.
///
/// ## Complexity 
/// m := len(s) + 1  
/// n := len(t) + 1  
///
/// Time complexity:   O(mn)   
/// Space complexity:  O(mn)
///
/// ## Examples
/// ```
/// use distance::*;
/// 
/// // Levenshtein distance
/// let distance = levenshtein("hannah", "hanna");   
/// assert_eq!(1, distance);
/// ```
///
pub fn levenshtein(s: &str, t: &str) -> usize {
    // get length of unicode chars
    let len_s = s.chars().count();
    let len_t = t.chars().count();

    // initialize the matrix
    let mut mat: Vec<Vec<usize>> = vec![vec![0; len_t + 1]; len_s + 1];
    for i in 1..(len_s + 1) { 
        mat[i][0] = i; 
    }
    for i in 1..(len_t + 1) { 
        mat[0][i] = i; 
    }

    // apply edit operations
    for (i, s_char) in s.chars().enumerate() {
        for (j, t_char) in t.chars().enumerate() {
            let substitution = if s_char == t_char {0} else {1};
            mat[i+1][j+1] = utils::min3(
                mat[i][j+1] + 1,            // deletion
                mat[i+1][j] + 1,            // insertion 
                mat[i][j] + substitution    // substitution
            );
        }
    }

    return mat[len_s][len_t];
}

#[cfg(test)]
mod tests {
    use super::levenshtein;

    #[test]
    fn basic() {
        assert_eq!(3, levenshtein("kitten", "sitting"));
        assert_eq!(2, levenshtein("book", "back"));
        assert_eq!(5, levenshtein("table", "dinner"));
        assert_eq!(2, levenshtein("person", "pardon"));
        assert_eq!(1, levenshtein("person", "persons"));
    }

    #[test]
    fn equal() {
        assert_eq!(0, levenshtein("kitten", "kitten"));
        assert_eq!(0, levenshtein("a", "a"));
    }

    #[test]
    fn cases() {
        assert_eq!(1, levenshtein("Hello", "hello"));
        assert_eq!(1, levenshtein("World", "world"));
    }

    #[test]
    fn empty() {
        assert_eq!(4, levenshtein("book", ""));
        assert_eq!(4, levenshtein("", "book"));
        assert_eq!(0, levenshtein("", ""));
    }

    #[test]
    fn unicode() {
        assert_eq!(2, levenshtein("Späße", "Spaß"));
        assert_eq!(5, levenshtein("さようなら", "こんにちは"));
        assert_eq!(1, levenshtein("さようなら", "さようなう"));
        assert_eq!(4, levenshtein("こんにちは", "こんにちは abc"));
        assert_eq!(1, levenshtein("༆༃ʘ", "༆˥ʘ"));
    }

}