textdistance 1.1.1

Lots of algorithms to compare how similar two sequences are
Documentation
//! MLIPNS similarity
use super::hamming::Hamming;
use crate::{Algorithm, Result};
use core::hash::Hash;

/// [MLIPNS similarity] is a normalization for [`Hamming`] that returns either 0 or 1.
///
/// MLIPNS stands for Modified Language-Independent Product Name Search.
///
/// [MLIPNS similarity]: https://www.sial.iias.spb.su/files/386-386-1-PB.pdf
pub struct MLIPNS {
    hamming: Hamming,
    threshold: f64,
    max_mismatches: usize,
}

impl Default for MLIPNS {
    fn default() -> Self {
        Self {
            hamming: Hamming::default(),
            threshold: 0.25,
            max_mismatches: 2,
        }
    }
}

impl MLIPNS {
    fn check(&self, ham: &Result<usize>) -> bool {
        let mut mismatches = 0;
        let mut max_length = ham.max;
        let mut ham_val = ham.val();
        while mismatches <= self.max_mismatches {
            if max_length == 0 {
                return true;
            }
            if (1.0 - (max_length - ham_val) as f64 / max_length as f64) <= self.threshold {
                return true;
            }
            mismatches += 1;
            ham_val -= 1;
            max_length -= 1;
        }
        max_length == 0
    }
}

impl Algorithm<usize> for MLIPNS {
    fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<usize>
    where
        C: Iterator<Item = E>,
        E: Eq + Hash,
    {
        let ham = self.hamming.for_iter(s1, s2);
        Result {
            abs: self.check(&ham).into(),
            is_distance: false,
            max: 1,
            len1: ham.len1,
            len2: ham.len2,
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::str::mlipns;
    use assert2::assert;
    use rstest::rstest;

    #[rstest]
    #[case("", "", 1)]
    // parity with abydos and talisman
    #[case("cat", "hat", 1)]
    #[case("Niall", "Neil", 0)]
    #[case("aluminum", "Catalan", 0)]
    #[case("ATCG", "TAGC", 0)]
    fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
        assert!(mlipns(s1, s2) == exp);
    }
}