1#![cfg_attr(not(any(feature = "std", test)), no_std)]
2#![doc = include_str!("../README.md")]
3#![deny(missing_docs)]
4#![deny(clippy::all, clippy::pedantic)]
5#![allow(
6 clippy::cast_precision_loss,
7 clippy::must_use_candidate,
8 clippy::similar_names,
9 clippy::unreadable_literal,
10 clippy::doc_markdown,
11 clippy::wildcard_imports
12)]
13
14extern crate alloc;
15
16mod algorithm;
17mod counter;
18mod result;
19
20pub mod nstr;
21pub mod str;
22
23mod algorithms {
24 pub mod bag;
25 pub mod cosine;
26 pub mod damerau_levenshtein;
27 pub mod entropy_ncd;
28 pub mod hamming;
29 pub mod jaccard;
30 pub mod jaro;
31 pub mod jaro_winkler;
32 pub mod lcsseq;
33 pub mod lcsstr;
34 pub mod length;
35 pub mod levenshtein;
36 pub mod lig3;
37 pub mod mlipns;
38 pub mod overlap;
39 pub mod prefix;
40 pub mod ratcliff_obershelp;
41 pub mod roberts;
42 pub mod sift4_common;
43 pub mod sift4_simple;
44 pub mod smith_waterman;
45 pub mod sorensen_dice;
46 pub mod suffix;
47 pub mod tversky;
48 pub mod yujian_bo;
49}
50
51pub use self::algorithm::Algorithm;
52#[cfg(feature = "std")]
53pub use self::algorithms::bag::Bag;
54#[cfg(feature = "std")]
55pub use self::algorithms::cosine::Cosine;
56#[cfg(feature = "std")]
57pub use self::algorithms::damerau_levenshtein::DamerauLevenshtein;
58#[cfg(feature = "std")]
59pub use self::algorithms::entropy_ncd::EntropyNCD;
60pub use self::algorithms::hamming::Hamming;
61#[cfg(feature = "std")]
62pub use self::algorithms::jaccard::Jaccard;
63pub use self::algorithms::jaro::Jaro;
64pub use self::algorithms::jaro_winkler::JaroWinkler;
65pub use self::algorithms::lcsseq::LCSSeq;
66pub use self::algorithms::lcsstr::LCSStr;
67pub use self::algorithms::length::Length;
68pub use self::algorithms::levenshtein::Levenshtein;
69pub use self::algorithms::lig3::LIG3;
70pub use self::algorithms::mlipns::MLIPNS;
71#[cfg(feature = "std")]
72pub use self::algorithms::overlap::Overlap;
73pub use self::algorithms::prefix::Prefix;
74pub use self::algorithms::ratcliff_obershelp::RatcliffObershelp;
75#[cfg(feature = "std")]
76pub use self::algorithms::roberts::Roberts;
77pub use self::algorithms::sift4_common::Sift4Common;
78pub use self::algorithms::sift4_simple::Sift4Simple;
79pub use self::algorithms::smith_waterman::SmithWaterman;
80#[cfg(feature = "std")]
81pub use self::algorithms::sorensen_dice::SorensenDice;
82pub use self::algorithms::suffix::Suffix;
83#[cfg(feature = "std")]
84pub use self::algorithms::tversky::Tversky;
85pub use self::algorithms::yujian_bo::YujianBo;
86pub use self::result::Result;
87
88#[cfg(test)]
89mod tests {
90 #![allow(clippy::float_cmp)]
91
92 use super::*;
93 use assert2::assert;
94 use proptest::prelude::*;
95 use rstest::rstest;
96
97 const ALGS: usize = 8;
98
99 fn get_result(alg: usize, s1: &str, s2: &str) -> Result<usize> {
100 match alg {
101 1 => Hamming::default().for_str(s1, s2),
102 2 => LCSSeq::default().for_str(s1, s2),
103 3 => LCSStr::default().for_str(s1, s2),
104 4 => RatcliffObershelp::default().for_str(s1, s2),
105 5 => Levenshtein::default().for_str(s1, s2),
106 #[cfg(feature = "std")]
107 6 => DamerauLevenshtein::default().for_str(s1, s2),
108 7 => Sift4Simple::default().for_str(s1, s2),
109 8 => MLIPNS::default().for_str(s1, s2),
110 9 => Prefix::default().for_str(s1, s2),
111 10 => Suffix::default().for_str(s1, s2),
112 11 => Length::default().for_str(s1, s2),
113 12 => Bag::default().for_str(s1, s2),
114 13 => SmithWaterman::default().for_str(s1, s2),
115 14 => Sift4Common::default().for_str(s1, s2),
116 _ => panic!("there are not so many algorithms!"),
117 }
118 }
119
120 fn get_result_f64(alg: usize, s1: &str, s2: &str) -> Result<f64> {
121 match alg {
122 1 => Jaro::default().for_str(s1, s2),
123 2 => JaroWinkler::default().for_str(s1, s2),
124 3 => YujianBo::default().for_str(s1, s2),
125 4 => Jaccard::default().for_str(s1, s2),
126 5 => SorensenDice::default().for_str(s1, s2),
127 6 => Tversky::default().for_str(s1, s2),
128 7 => Overlap::default().for_str(s1, s2),
129 8 => Cosine::default().for_str(s1, s2),
130 9 => EntropyNCD::default().for_str(s1, s2),
131 10 => LIG3::default().for_str(s1, s2),
132 11 => Roberts::default().for_str(s1, s2),
133 _ => panic!("there are not so many algorithms!"),
134 }
135 }
136
137 #[rstest]
138 #[case::hamming(1)]
139 #[case::lcsseq(2)]
140 #[case::lcsstr(3)]
141 #[case::ratcliff_obershelp(4)]
142 #[case::levenshtein(5)]
143 #[case::damerau_levenshtein(6)]
144 #[case::sift4_simple(7)]
145 #[case::mlipns(8)]
146 #[case::prefix(9)]
147 #[case::suffix(10)]
148 #[case::length(11)]
149 #[case::bag(12)]
150 #[case::smith_waterman(13)]
151 #[case::sift4_common(14)]
152 fn basic_usize(#[case] alg: usize) {
153 let empty_res = get_result(alg, "", "");
154 assert!(empty_res.dist() == 0);
155 if alg != 8 {
156 assert!(get_result(alg, "ab", "cde").dist() > 0);
157 assert!(get_result(alg, "ab", "cde").ndist() > 0.);
158 }
159 if alg != 11 {
160 assert!(get_result(alg, "spam", "qwer").sim() == 0);
161 assert!(get_result(alg, "spam", "qwer").nsim() == 0.);
162 }
163 assert!(empty_res.ndist() == 0.);
164 assert!(empty_res.nsim() == 1.);
165 }
166
167 #[rstest]
168 #[case::jaro(1)]
169 #[case::jaro_winkler(2)]
170 #[case::yujian_bo(3)]
171 #[case::jaccard(4)]
172 #[case::sorensen_dice(5)]
173 #[case::tversky(6)]
174 #[case::overlap(7)]
175 #[case::cosine(8)]
176 #[case::entropy_ncd(9)]
177 #[case::lig3(10)]
178 #[case::roberts(11)]
179 fn basic_f64(#[case] alg: usize) {
180 let empty_res = get_result_f64(alg, "", "");
181 assert!(get_result_f64(alg, "ab", "cde").ndist() > 0.);
182 if alg != 3 && alg != 9 {
183 assert!(get_result_f64(alg, "spam", "qwer").nsim() == 0.);
184 }
185 assert!(empty_res.ndist() == 0.);
186 assert!(empty_res.nsim() == 1.);
187 assert!(empty_res.max == 1.);
188 }
189
190 fn is_close(a: f64, b: f64) -> bool {
191 (a - b).abs() < 1E-9
192 }
193
194 proptest! {
195 #[test]
196 fn prop(s1 in ".*", s2 in ".*") {
197 for alg in 1..ALGS {
198 let res = get_result(alg, &s1, &s2);
199 let d = res.dist();
200 let s = res.sim();
201
202 let nd = res.ndist();
203 prop_assert!(nd.is_finite());
204 prop_assert!(nd >= 0.);
205 prop_assert!(nd <= 1.);
206
207 let ns = res.nsim();
208 prop_assert!(ns.is_finite());
209 prop_assert!(ns >= 0.);
210 prop_assert!(ns <= 1.);
211
212 prop_assert!(is_close(ns + nd, 1.), "{} + {} == 1", nd, ns);
213
214 if d < s {
215 prop_assert!(nd < ns, "{} < {}", nd, ns);
216 } else if d > s {
217 prop_assert!(nd > ns, "{} > {}", nd, ns);
218 } else if !s1.is_empty() && !s2.is_empty() {
219 prop_assert!(nd == ns, "{} == {}", nd, ns);
220 }
221 prop_assert!(res.val() == d || res.val() == s);
222
223 prop_assert_eq!(res.len1, s1.chars().count());
224 prop_assert_eq!(res.len2, s2.chars().count());
225 prop_assert!(res.max >= res.len1.min(res.len2));
226 }
227 }
228
229 #[test]
230 fn prop_same(s in ".*") {
231 for alg in 1..ALGS {
232 let res = get_result(alg, &s, &s);
233 let nd = res.ndist();
234 prop_assert_eq!(nd, 0., "{}: {} == 0.0", alg, nd);
235 let ns = res.nsim();
236 prop_assert_eq!(ns, 1., "{}: {} == 1.0", alg, ns);
237 }
238 }
239
240 fn prop_prefix(prefix in ".+", s1 in ".+", s2 in ".+") {
242 for alg in 1..ALGS {
243 let r1 = get_result(alg, &s1, &s2).ndist();
244 let mut p1 = prefix.clone();
245 let mut p2 = prefix.clone();
246 p1.push_str(&s1);
247 p2.push_str(&s2);
248 let r2 = get_result(alg, &p1, &p2).ndist();
249 prop_assert!(r1 > r2, "{}: {} > {}", alg, r1, r2);
250 }
251 }
252 }
253}