#![cfg(feature = "std")]
use crate::counter::Counter;
use crate::{Algorithm, Result};
#[derive(Default)]
pub struct SorensenDice {}
impl Algorithm<f64> for SorensenDice {
fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<f64>
where
C: Iterator<Item = E>,
E: Eq + core::hash::Hash,
{
let c1 = Counter::from_iter(s1);
let c2 = Counter::from_iter(s2);
let cn = c1.count() + c2.count();
let res = if cn == 0 {
1.
} else {
let ic = c1.intersect_count(&c2);
(2 * ic) as f64 / cn as f64
};
Result {
abs: res,
is_distance: false,
max: 1.,
len1: c1.count(),
len2: c2.count(),
}
}
}
#[cfg(test)]
mod tests {
use crate::str::sorensen_dice;
use crate::{Algorithm, SorensenDice};
use assert2::assert;
use rstest::rstest;
fn is_close(a: f64, b: f64) -> bool {
(a - b).abs() < 1E-5
}
#[rstest]
#[case("", "", 1.)]
#[case("nelson", "", 0.)]
#[case("", "neilsen", 0.)]
#[case("test", "text", 2.0 * 3. / 8.)]
fn function_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64) {
let act = sorensen_dice(s1, s2);
let ok = is_close(act, exp);
assert!(ok, "sorensen_dice({}, {}) is {}, not {}", s1, s2, act, exp);
}
#[rstest]
#[case("a", "a", 1.0)]
#[case("", "", 1.0)]
#[case("apple event", "apple event", 1.0)]
#[case("iphone", "iphone x", 0.9090909090909091)]
#[case("french", "quebec", 0.0)]
#[case("france", "france", 1.0)]
#[case("fRaNce", "france", 0.2)]
#[case("healed", "sealed", 0.8)]
#[case("web applications", "applications of the web", 0.7878787878)]
#[case("this has one extra word", "this has one word", 0.7741935483870968)]
#[case(
"this will have a typo somewhere",
"this will huve a typo somewhere",
0.92
)]
#[case(
"Olive-green table for sale, in extremely good condition.",
"For sale: table in very good condition, olive green in colour.",
0.6060606060606061
)]
#[case(
"Olive-green table for sale, in extremely good condition.",
"For sale: green Subaru Impreza, 210,000 miles",
0.2558139534883721
)]
#[case(
"Olive-green table for sale, in extremely good condition.",
"Wanted: mountain bike with at least 21 gears.",
0.1411764705882353
)]
fn for_bigrams(#[case] s1: &str, #[case] s2: &str, #[case] exp: f64) {
let s1 = &s1.replace(' ', "");
let s2 = &s2.replace(' ', "");
let act = SorensenDice::default().for_bigrams(s1, s2).nval();
let ok = is_close(act, exp);
assert!(ok, "sorensen_dice({}, {}) is {}, not {}", s1, s2, act, exp);
}
}