1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#![warn(missing_docs)]
use structopt::clap::arg_enum;
arg_enum! {
#[allow(missing_docs)]
#[derive(Debug)]
pub enum Metric {
DamerauLevenshtein,
Levenshtein,
Jaro,
JaroWinkler,
}
}
fn metric_fn(m: Metric) -> fn(&str, &str) -> f64 {
match m {
Metric::DamerauLevenshtein => strsim::normalized_damerau_levenshtein,
Metric::Jaro => strsim::jaro,
Metric::JaroWinkler => strsim::jaro_winkler,
Metric::Levenshtein => strsim::normalized_levenshtein,
}
}
pub struct Fzq {
buffer: Vec<String>,
buffer_size: usize,
metric_fn: fn(&str, &str) -> f64,
threshold: f64,
}
impl Fzq {
pub fn new() -> Fzq {
Fzq {
buffer: Vec::new(),
buffer_size: 100,
metric_fn: metric_fn(Metric::Jaro),
threshold: 0.85,
}
}
pub fn buffer_size<'a>(&'a mut self, size: usize) -> &'a mut Fzq {
self.buffer_size = size;
self.buffer.truncate(self.buffer_size);
self
}
pub fn metric<'a>(&'a mut self, metric: Metric) -> &'a mut Fzq {
self.metric_fn = metric_fn(metric);
self
}
pub fn threshold<'a>(&'a mut self, threshold: f64) -> &'a mut Fzq {
self.threshold = threshold;
self
}
pub fn is_similar(&mut self, s: &str) -> bool {
let mut is_similar = false;
for (i, b) in (&self.buffer).iter().enumerate() {
if (self.metric_fn)(s, b) >= self.threshold {
self.buffer.remove(i);
is_similar = true;
break;
}
}
self.buffer.insert(0, String::from(s));
self.buffer.truncate(self.buffer_size);
is_similar
}
}