textdistance 1.1.1

Lots of algorithms to compare how similar two sequences are
Documentation
//! Roberts similarity
#![cfg(feature = "std")]
use crate::counter::Counter;
use crate::{Algorithm, Result};

/// [Roberts similarity].
///
/// The metric is always normalized on the interval from 0.0 to 1.0.
///
/// [Roberts similarity]: https://github.com/chrislit/abydos/blob/master/abydos/distance/_roberts.py
#[derive(Default)]
pub struct Roberts {}

impl Algorithm<f64> for Roberts {
    fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
    where
        C: Iterator<Item = E>,
        E: Eq + core::hash::Hash,
    {
        let c1 = Counter::from_iter(s1);
        let c2 = Counter::from_iter(s2);
        let n1 = c1.count();
        let n2 = c2.count();
        if n1 == 0 && n2 == 0 {
            return Result {
                abs: 1.0,
                is_distance: false,
                max: 1.,
                len1: n1,
                len2: n2,
            };
        }

        let cm = c1.merge(&c2);
        let alphabet = cm.keys();
        let mut s1: f64 = 0.;
        let mut s2: usize = 0;
        for key in alphabet {
            let v1 = c1.get(key).unwrap_or(&0);
            let v2 = c2.get(key).unwrap_or(&0);
            if v1 != &0 && v2 != &0 {
                s1 += ((v1 + v2) * v1.min(v2)) as f64 / *v1.max(v2) as f64;
            }
            s2 += v1 + v2;
        }

        Result {
            abs: s1 / s2 as f64,
            is_distance: false,
            max: 1.,
            len1: n1,
            len2: n2,
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::str::roberts;
    use assert2::assert;
    use rstest::rstest;

    fn is_close(a: f64, b: f64) -> bool {
        (a - b).abs() < 1E-5
    }

    #[rstest]
    #[case("", "", 1.)]
    #[case("a", "a", 1.)]
    #[case("", "a", 0.)]
    #[case("a", "", 0.)]
    // Parity with abydos.
    // By default, abydos uses bi-grams with word separators to tokenize any passed text
    // for Roberts. And that's what gets tested. However, textdistance uses bag of chars
    // by default and doesn't add any word separators ever. So, instead of using results
    // from tests, I've put results of running the values through `Roberts(qval=1).sim(a, b)`.
    #[case("cat", "hat", 0.6666666666666666)]
    #[case("Niall", "Neil", 0.6111111111111112)]
    #[case("aluminum", "Catalan", 0.3555555555555555)]
    #[case("ATCG", "TAGC", 1.0)]
    #[case("Nigel", "Niall", 0.55)]
    #[case("Niall", "Nigel", 0.55)]
    #[case("Colin", "Coiln", 1.0)]
    #[case("Coiln", "Colin", 1.0)]
    #[case("ATCAACGAGT", "AACGATTAG", 0.9210526315789473)]
    fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64) {
        let act = roberts(s1, s2);
        let ok = is_close(act, exp);
        assert!(ok, "roberts({}, {}) is {}, not {}", s1, s2, act, exp);
    }
}