#![warn(missing_docs)]
use structopt::clap::arg_enum;
arg_enum! {
#[allow(missing_docs)]
#[derive(Debug)]
pub enum Metric {
DamerauLevenshtein,
Levenshtein,
Jaro,
JaroWinkler,
}
}
fn metric_fn(m: Metric) -> fn(&str, &str) -> f64 {
match m {
Metric::DamerauLevenshtein => strsim::normalized_damerau_levenshtein,
Metric::Jaro => strsim::jaro,
Metric::JaroWinkler => strsim::jaro_winkler,
Metric::Levenshtein => strsim::normalized_levenshtein,
}
}
pub struct Fzq {
buffer: Vec<String>,
buffer_size: usize,
metric_fn: fn(&str, &str) -> f64,
threshold: f64,
}
impl Fzq {
pub fn new() -> Fzq {
Fzq {
buffer: Vec::new(),
buffer_size: 100,
metric_fn: metric_fn(Metric::Jaro),
threshold: 0.85,
}
}
pub fn buffer_size<'a>(&'a mut self, size: usize) -> &'a mut Fzq {
self.buffer_size = size;
self.buffer.truncate(self.buffer_size);
self
}
pub fn metric<'a>(&'a mut self, metric: Metric) -> &'a mut Fzq {
self.metric_fn = metric_fn(metric);
self
}
pub fn threshold<'a>(&'a mut self, threshold: f64) -> &'a mut Fzq {
self.threshold = threshold;
self
}
pub fn is_similar(&mut self, s: &str) -> bool {
let mut is_similar = false;
for (i, b) in (&self.buffer).iter().enumerate() {
if (self.metric_fn)(s, b) >= self.threshold {
self.buffer.remove(i);
is_similar = true;
break;
}
}
self.buffer.insert(0, String::from(s));
self.buffer.truncate(self.buffer_size);
is_similar
}
}