textdistance 1.1.1

Lots of algorithms to compare how similar two sequences are
Documentation
//! Jaccard index
#![cfg(feature = "std")]
use crate::counter::Counter;
use crate::{Algorithm, Result};

/// [Jaccard similarity] is a ratio of intersection to union of two sets.
///
/// The metric works with the set of input elements, so strings "abc" and "bca"
/// are the same if compared by letters.
///
/// The metric is always normalized on the interval from 0.0 to 1.0.
///
/// [Jaccard similarity]: https://en.wikipedia.org/wiki/Jaccard_index
#[derive(Default)]
pub struct Jaccard {}

impl Algorithm<f64> for Jaccard {
    fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
    where
        C: Iterator<Item = E>,
        E: Eq + core::hash::Hash,
    {
        let c1 = Counter::from_iter(s1);
        let c2 = Counter::from_iter(s2);
        let uc = c1.union_count(&c2);
        let res = if uc == 0 {
            1.
        } else {
            let ic = c1.intersect_count(&c2);
            ic as f64 / uc as f64
        };
        Result {
            abs: res,
            is_distance: false,
            max: 1.,
            len1: c1.count(),
            len2: c2.count(),
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::str::jaccard;
    use assert2::assert;
    use rstest::rstest;

    fn is_close(a: f64, b: f64) -> bool {
        (a - b).abs() < 1E-5
    }

    #[rstest]
    #[case("", "", 1.)]
    #[case("nelson", "", 0.)]
    #[case("", "neilsen", 0.)]
    #[case("abc", "abc", 1.)]
    #[case("abc", "bac", 1.)]
    // parity with textdistance
    #[case("nelson", "neilsen", 5. / 8.)]
    #[case("test", "text", 3. / 5.)]
    #[case("decide", "resize", 3. / 9.)]
    fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64) {
        let act = jaccard(s1, s2);
        let ok = is_close(act, exp);
        assert!(ok, "jaccard({}, {}) is {}, not {}", s1, s2, act, exp);
    }
}