athena_rs 3.5.0

Hyper performant polyglot Database driver
Documentation
//! Levenshtein distance calculation for string similarity.
//!
//! Returns the number of single-character edits (insertions, deletions, substitutions)
//! required to transform `s1` into `s2`.
//!
//! # Complexity
//!
//! The time complexity of the algorithm is O(n * m), where n and m are the lengths of the two strings.
//! The space complexity is O(n * m), as we need to store the entire dynamic programming table.
//!
//! # References
//!
//! - [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance)
//! - [Normalized Levenshtein distance](https://en.wikipedia.org/wiki/Normalized_Levenshtein_distance)

/// Compute the Levenshtein distance between two string slices.
/// Returns the number of single-character edits (insertions, deletions, substitutions)
/// required to transform `s1` into `s2`.
///
/// # Arguments
///
/// * `s1` - The first string slice to compare
/// * `s2` - The second string slice to compare
///
/// # Returns
///
/// # Examples
///
/// ```
/// let distance = levenshtein_distance("hello", "world");
/// assert_eq!(distance, 4);
/// ```
///
pub fn levenshtein_distance(s1: &str, s2: &str) -> usize {
    let len1: usize = s1.len();
    let len2: usize = s2.len();
    let mut dp: Vec<Vec<usize>> = vec![vec![0; len2 + 1]; len1 + 1];

    for i in 0..=len1 {
        dp[i][0] = i;
    }
    for j in 0..=len2 {
        dp[0][j] = j;
    }

    for i in 1..=len1 {
        for j in 1..=len2 {
            let cost = if s1.chars().nth(i - 1) == s2.chars().nth(j - 1) {
                0
            } else {
                1
            };
            dp[i][j] = *[dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + cost]
                .iter()
                .min()
                .unwrap();
        }
    }

    dp[len1][len2]
}

/// Compute the normalized Levenshtein distance between two string slices.
/// Returns a distance between 0.0 (completely different) and 1.0 (identical).
///
/// # Arguments
///
/// * `s1` - The first string slice to compare
/// * `s2` - The second string slice to compare
///
/// # Returns
///
/// A distance between 0.0 (completely different) and 1.0 (identical).
///
/// # Examples
///
/// ```
/// let distance = levenshtein_distance_normalized("hello", "world");
/// assert_eq!(distance, 0.5);
/// ```
pub fn levenshtein_distance_normalized(s1: &str, s2: &str) -> f64 {
    let distance: usize = levenshtein_distance(s1, s2);
    let max_len: usize = std::cmp::max(s1.len(), s2.len());
    if max_len == 0 {
        1.0
    } else {
        distance as f64 / max_len as f64
    }
}