textdistance/
algorithm.rs

1use super::Result;
2use alloc::vec::Vec;
3use core::hash::Hash;
4
5/// A base trait for all distance/similarity algorithms.
6///
7///     use textdistance::{Algorithm, Hamming};
8///     let h = Hamming::default();
9///     let res = h.for_str("abc", "acbd");
10///     assert!(res.val() == 3);
11///
12pub trait Algorithm<R> {
13    /// Calculate distance/similarity for iterators.
14    ///
15    ///     use textdistance::{Algorithm, Hamming};
16    ///     let h = Hamming::default();
17    ///     let res = h.for_iter(1..4, 1..6);
18    ///     assert!(res.val() == 2);
19    ///
20    fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<R>
21    where
22        C: Iterator<Item = E>,
23        E: Eq + Hash,
24    {
25        let s1: Vec<E> = s1.collect();
26        let s2: Vec<E> = s2.collect();
27        self.for_vec(&s1, &s2)
28    }
29
30    /// Calculate distance/similarity for vectors.
31    ///
32    ///     use textdistance::{Algorithm, Hamming};
33    ///     let h = Hamming::default();
34    ///     let res = h.for_vec(&vec![1, 2, 3], &vec![1, 3, 2, 4]);
35    ///     assert!(res.val() == 3);
36    ///
37    fn for_vec<E>(&self, s1: &[E], s2: &[E]) -> Result<R>
38    where
39        E: Eq + Hash,
40    {
41        self.for_iter(s1.iter(), s2.iter())
42    }
43
44    /// Calculate distance/similarity for strings.
45    ///
46    ///     use textdistance::{Algorithm, Hamming};
47    ///     let h = Hamming::default();
48    ///     let res = h.for_str("abc", "acbd");
49    ///     assert!(res.val() == 3);
50    ///
51    fn for_str(&self, s1: &str, s2: &str) -> Result<R> {
52        self.for_iter(s1.chars(), s2.chars())
53    }
54
55    /// Calculate distance/similarity for words in strings.
56    ///
57    ///     use textdistance::{Algorithm, Hamming};
58    ///     let h = Hamming::default();
59    ///     let res = h.for_words("the first edition", "the second edition");
60    ///     assert!(res.val() == 1);
61    ///
62    fn for_words(&self, s1: &str, s2: &str) -> Result<R> {
63        self.for_iter(s1.split_whitespace(), s2.split_whitespace())
64    }
65
66    /// Calculate distance/similarity for bigrams in strings.
67    ///
68    ///     use textdistance::{Algorithm, Hamming};
69    ///     let h = Hamming::default();
70    ///     let res = h.for_str("abd", "abcd");
71    ///     assert!(res.val() == 2); // 3 bigrams (ab, bc, cd), only "ab" matches
72    ///
73    fn for_bigrams(&self, s1: &str, s2: &str) -> Result<R> {
74        self.for_iter(bigrams(s1), bigrams(s2))
75    }
76}
77
78fn bigrams(s: &str) -> impl Iterator<Item = (char, char)> + '_ {
79    s.chars().zip(s.chars().skip(1))
80}
81
82#[cfg(test)]
83mod tests {
84    use super::Algorithm;
85    use crate::Hamming;
86    use assert2::assert;
87    // use proptest::prelude::*;
88    use rstest::rstest;
89
90    #[rstest]
91    #[case(vec![], vec![], 0)]
92    #[case(vec![1], vec![1], 0)]
93    #[case(vec![1], vec![5], 1)]
94    #[case(vec![3], vec![5], 1)]
95    #[case(vec![3, 4, 5, 6], vec![1, 4, 5, 6, 7], 2)]
96    fn for_vec(#[case] s1: Vec<usize>, #[case] s2: Vec<usize>, #[case] exp: usize) {
97        let h = Hamming::default();
98        assert!(h.for_vec(&s1, &s2).val() == exp);
99    }
100
101    #[rstest]
102    #[case("", "", 0)]
103    #[case("", "\0", 1)]
104    #[case("", "abc", 3)]
105    #[case("abc", "", 3)]
106    #[case("sitting", "sitting", 0)]
107    #[case("abcdefg", "hijklmn", 7)]
108    #[case("karolin", "kathrin", 3)]
109    #[case("hello", "world", 4)]
110    fn for_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
111        let h = Hamming::default();
112        assert!(h.for_str(s1, s2).val() == exp);
113    }
114
115    #[rstest]
116    #[case("", "", 0)]
117    #[case("", "\0", 1)]
118    #[case("", "abc", 1)]
119    #[case("abc", "", 1)]
120    #[case("oh hi mark", "oh hi world", 1)]
121    #[case("oh hi mark", "oh hi mad world", 2)]
122    #[case("oh hi mark", "greeting you mad world", 4)]
123    fn for_words(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
124        let h = Hamming::default();
125        assert!(h.for_words(s1, s2).val() == exp);
126    }
127
128    #[rstest]
129    #[case("", "", 0)]
130    // #[case("", "a", 1)]
131    #[case("", "abc", 2)]
132    #[case("abc", "", 2)]
133    #[case("oh hi mark", "oh ho mark", 2)]
134    fn for_bigrams(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
135        let h = Hamming::default();
136        assert!(h.for_bigrams(s1, s2).val() == exp);
137    }
138}