textdistance/algorithms/
bag.rs

1//! Bag distance
2#![cfg(feature = "std")]
3use crate::counter::Counter;
4use crate::{Algorithm, Result};
5
6/// [Bag distance] is how many max items there are in one sequence that aren't in the other.
7///
8/// [Bag distance]: http://www-db.disi.unibo.it/research/papers/SPIRE02.pdf
9#[derive(Default)]
10pub struct Bag {}
11
12impl Algorithm<usize> for Bag {
13    fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<usize>
14    where
15        C: Iterator<Item = E>,
16        E: Eq + core::hash::Hash,
17    {
18        let c1 = Counter::from_iter(s1);
19        let c2 = Counter::from_iter(s2);
20        let d1 = c1.diff_count(&c2);
21        let d2 = c2.diff_count(&c1);
22        let l1 = c1.count();
23        let l2 = c2.count();
24
25        Result {
26            abs: d1.max(d2),
27            is_distance: true,
28            max: l1.max(l2),
29            len1: l1,
30            len2: l2,
31        }
32    }
33}
34
35#[cfg(test)]
36mod tests {
37    use crate::str::bag;
38    use assert2::assert;
39    use rstest::rstest;
40
41    #[rstest]
42    #[case("", "", 0)]
43    // parity with textdistance
44    #[case("qwe", "qwe", 0)]
45    #[case("qwe", "erty", 3)]
46    #[case("qwe", "ewq", 0)]
47    #[case("qwe", "rtys", 4)]
48    // parity with talisman
49    #[case("cat", "hat", 1)]
50    #[case("Niall", "Neil", 2)]
51    #[case("aluminum", "Catalan", 5)]
52    #[case("ATCG", "TAGC", 0)]
53    fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
54        let act = bag(s1, s2);
55        assert!(act == exp);
56    }
57}