athena_rs 3.5.0

Hyper performant polyglot Database driver
Documentation
/// Calculates a fuzzy similarity score between two strings based on the Levenshtein distance.
/// Higher score indicates higher similarity. The score is normalized to [0.0, 1.0].
///
/// # Arguments
///
/// * `a` - First string to compare
/// * `b` - Second string to compare
///
/// # Returns
///
/// A `f64` value between 0.0 and 1.0 indicating the similarity.
pub fn fuzzy_str_match(a: &str, b: &str) -> f64 {
    if a.is_empty() && b.is_empty() {
        return 1.0;
    }
    if a.is_empty() || b.is_empty() {
        return 0.0;
    }

    let len_a: usize = a.chars().count();
    let len_b: usize = b.chars().count();

    let mut dp: Vec<Vec<usize>> = vec![vec![0usize; len_b + 1]; len_a + 1];

    for i in 0..=len_a {
        dp[i][0] = i;
    }
    for j in 0..=len_b {
        dp[0][j] = j;
    }

    let a_chars: Vec<char> = a.chars().collect::<Vec<_>>();
    let b_chars: Vec<char> = b.chars().collect::<Vec<_>>();

    for i in 1..=len_a {
        for j in 1..=len_b {
            if a_chars[i - 1] == b_chars[j - 1] {
                dp[i][j] = dp[i - 1][j - 1] + 0;
            } else {
                dp[i][j] = 1 + std::cmp::min(
                    dp[i - 1][j - 1], // substitution
                    std::cmp::min(
                        dp[i][j - 1], // insertion
                        dp[i - 1][j], // deletion
                    ),
                );
            }
        }
    }

    let distance = dp[len_a][len_b] as f64;
    let max_len = len_a.max(len_b) as f64;
    if max_len == 0.0 {
        1.0
    } else {
        1.0 - distance / max_len
    }
}