1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
use super::Result;
use std::hash::Hash;
/// A base trait for all distance/similarity algorithms.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abc", "acbd");
/// assert!(res.val() == 3);
///
pub trait Algorithm<R> {
/// Calculate distance/similarity for iterators.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_iter(1..4, 1..6);
/// assert!(res.val() == 2);
///
fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<R>
where
C: Iterator<Item = E>,
E: Eq + Hash,
{
let s1: Vec<E> = s1.collect();
let s2: Vec<E> = s2.collect();
self.for_vec(&s1, &s2)
}
/// Calculate distance/similarity for vectors.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_vec(&vec![1, 2, 3], &vec![1, 3, 2, 4]);
/// assert!(res.val() == 3);
///
fn for_vec<E>(&self, s1: &[E], s2: &[E]) -> Result<R>
where
E: Eq + Hash,
{
self.for_iter(s1.iter(), s2.iter())
}
/// Calculate distance/similarity for strings.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abc", "acbd");
/// assert!(res.val() == 3);
///
fn for_str(&self, s1: &str, s2: &str) -> Result<R> {
self.for_iter(s1.chars(), s2.chars())
}
/// Calculate distance/similarity for words in strings.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_words("the first edition", "the second edition");
/// assert!(res.val() == 1);
///
fn for_words(&self, s1: &str, s2: &str) -> Result<R> {
self.for_iter(s1.split_whitespace(), s2.split_whitespace())
}
/// Calculate distance/similarity for bigrams in strings.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abd", "abcd");
/// assert!(res.val() == 2); // 3 bigrams (ab, bc, cd), only "ab" matches
///
fn for_bigrams(&self, s1: &str, s2: &str) -> Result<R> {
self.for_iter(bigrams(s1), bigrams(s2))
}
}
fn bigrams(s: &str) -> impl Iterator<Item = (char, char)> + '_ {
s.chars().zip(s.chars().skip(1))
}
#[cfg(test)]
mod tests {
use super::Algorithm;
use crate::Hamming;
use assert2::assert;
// use proptest::prelude::*;
use rstest::rstest;
#[rstest]
#[case(vec![], vec![], 0)]
#[case(vec![1], vec![1], 0)]
#[case(vec![1], vec![5], 1)]
#[case(vec![3], vec![5], 1)]
#[case(vec![3, 4, 5, 6], vec![1, 4, 5, 6, 7], 2)]
fn for_vec(#[case] s1: Vec<usize>, #[case] s2: Vec<usize>, #[case] exp: usize) {
let h = Hamming::default();
assert!(h.for_vec(&s1, &s2).val() == exp);
}
#[rstest]
#[case("", "", 0)]
#[case("", "\0", 1)]
#[case("", "abc", 3)]
#[case("abc", "", 3)]
#[case("sitting", "sitting", 0)]
#[case("abcdefg", "hijklmn", 7)]
#[case("karolin", "kathrin", 3)]
#[case("hello", "world", 4)]
fn for_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
let h = Hamming::default();
assert!(h.for_str(s1, s2).val() == exp);
}
#[rstest]
#[case("", "", 0)]
#[case("", "\0", 1)]
#[case("", "abc", 1)]
#[case("abc", "", 1)]
#[case("oh hi mark", "oh hi world", 1)]
#[case("oh hi mark", "oh hi mad world", 2)]
#[case("oh hi mark", "greeting you mad world", 4)]
fn for_words(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
let h = Hamming::default();
assert!(h.for_words(s1, s2).val() == exp);
}
#[rstest]
#[case("", "", 0)]
// #[case("", "a", 1)]
#[case("", "abc", 2)]
#[case("abc", "", 2)]
#[case("oh hi mark", "oh ho mark", 2)]
fn for_bigrams(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
let h = Hamming::default();
assert!(h.for_bigrams(s1, s2).val() == exp);
}
}