textdistance 1.1.1

Lots of algorithms to compare how similar two sequences are
Documentation
//! Levenshtein distance
use crate::{Algorithm, Result};
use alloc::vec::Vec;

/// [Levenshtein distance] is an edit distance between two sequences.
///
/// It is the minimum number of single-character edits (insertions, deletions or substitutions)
/// required to change one word into the other.
///
/// See also [`DamerauLevenshtein`](crate::DamerauLevenshtein) which is an extended
/// version of this algorithm that also includes transpositions.
///
/// [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
pub struct Levenshtein {
    /// The cost of removing a character.
    pub del_cost: usize,

    /// The cost of adding a new character.
    pub ins_cost: usize,

    /// The cost of replacing a character with another one.
    pub sub_cost: usize,
}

impl Default for Levenshtein {
    fn default() -> Self {
        Self {
            del_cost: 1,
            ins_cost: 1,
            sub_cost: 1,
        }
    }
}

impl Algorithm<usize> for Levenshtein {
    fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<usize>
    where
        C: Iterator<Item = E>,
        E: Eq,
    {
        let s1: Vec<E> = s1.collect();
        let l1 = s1.len();
        if l1 == 0 {
            let l2 = s2.count();
            return Result {
                abs: l2,
                is_distance: true,
                max: l1.max(l2),
                len1: l1,
                len2: l2,
            };
        }

        let mut cache: Vec<usize> = (1..).take(l1).collect();
        let mut dist1;
        let mut dist2;

        let mut result = 0;
        let mut l2 = 0;
        for (i2, c2) in s2.enumerate() {
            result = i2;
            dist1 = i2;
            l2 += 1;

            for (i1, c1) in s1.iter().enumerate() {
                dist2 = if c1 == &c2 {
                    dist1
                } else {
                    dist1 + self.sub_cost
                };
                dist1 = cache[i1];
                result = if dist1 > result {
                    if dist2 > result {
                        result + self.del_cost
                    } else {
                        dist2
                    }
                } else if dist2 > dist1 {
                    dist1 + self.ins_cost
                } else {
                    dist2
                };
                cache[i1] = result;
            }
        }
        if l2 == 0 {
            return Result {
                abs: l1,
                is_distance: true,
                max: l1.max(l2),
                len1: l1,
                len2: l2,
            };
        }
        Result {
            abs: result,
            is_distance: true,
            max: l1.max(l2),
            len1: l1,
            len2: l2,
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::str::levenshtein;
    use assert2::assert;
    use proptest::prelude::*;
    use rstest::rstest;

    #[rstest]
    #[case("", "", 0)]
    #[case("", "\0", 1)]
    #[case("", "abc", 3)]
    #[case("abc", "", 3)]
    #[case("sitting", "sitting", 0)]
    #[case("sitting", "kitten", 3)]
    #[case("example", "samples", 3)]
    #[case("distance", "difference", 5)]
    #[case("test", "text", 1)]
    #[case("test", "tset", 2)]
    #[case("test", "qwe", 4)]
    #[case("test", "testit", 2)]
    #[case("test", "tesst", 1)]
    #[case("test", "tet", 1)]
    // parity with levenshtein-rs
    #[case("sitting", "kitten", 3)]
    #[case("gumbo", "gambol", 2)]
    #[case("saturday", "sunday", 3)]
    #[case("a", "b", 1)]
    #[case("ab", "ac", 1)]
    #[case("ac", "bc", 1)]
    #[case("abc", "axc", 1)]
    #[case("xabxcdxxefxgx", "1ab2cd34ef5g6", 6)]
    #[case("xabxcdxxefxgx", "abcdefg", 6)]
    #[case("javawasneat", "scalaisgreat", 7)]
    #[case("example", "samples", 3)]
    #[case("sturgeon", "urgently", 6)]
    #[case("levenshtein", "frankenstein", 6)]
    #[case("distance", "difference", 5)]
    #[case("kitten", "sitting", 3)]
    #[case("Tier", "Tor", 2)]
    fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
        assert!(levenshtein(s1, s2) == exp);
    }

    proptest! {
        #[test]
        fn prop(s1 in ".*", s2 in ".*") {
            let res = levenshtein(&s1, &s2);
            let res2 = levenshtein(&s2, &s1);
            prop_assert_eq!(res, res2);
            prop_assert!(res <= s1.len() || res <= s2.len());
        }
    }
}