use super::*;
use std::cmp;
use strsim;
pub enum DistanceFn {
Levenshtein,
DamerauLevenshtein,
OptimalStringAlignment,
}
pub fn baseline(
pattern: &str,
text: &str,
max_distance: usize,
distance_fn: DistanceFn,
) -> super::BitapResult {
let pattern_len = pattern.chars().count();
if !pattern_length_is_valid(pattern_len) {
return Err(ERR_INVALID_PATTERN);
}
let max_distance = cmp::min(max_distance, pattern_len);
let text_chars = text.chars().collect::<Vec<_>>();
let mut results = Vec::new();
for i in 0..text_chars.len() {
let max_diff = max_distance + pattern_len;
let start = if i > max_diff { i - max_diff } else { 0 };
let mut best_distance: usize = max_distance + 1;
for j in start..=i {
let sub_text: String = text_chars[j..=i].iter().collect();
let distance = match distance_fn {
DistanceFn::Levenshtein => strsim::levenshtein(pattern, &sub_text),
DistanceFn::DamerauLevenshtein => strsim::damerau_levenshtein(pattern, &sub_text),
DistanceFn::OptimalStringAlignment => strsim::osa_distance(pattern, &sub_text),
};
if distance < best_distance {
best_distance = distance;
}
if best_distance == 0 {
break;
}
}
if best_distance <= max_distance {
results.push(Match {
distance: best_distance,
end: i,
})
}
}
Ok(results)
}
pub fn find(pattern: &str, text: &str) -> FindResult {
baseline(pattern, text, 0, DistanceFn::Levenshtein).map(|v| {
let offset = pattern.chars().count() - 1;
v.iter().map(|m| m.end - offset).collect::<Vec<_>>()
})
}
pub fn lev(pattern: &str, text: &str, k: usize) -> BitapResult {
baseline(pattern, text, k, DistanceFn::Levenshtein)
}
pub fn damerau(pattern: &str, text: &str, k: usize) -> BitapResult {
baseline(pattern, text, k, DistanceFn::DamerauLevenshtein)
}
pub fn osa(pattern: &str, text: &str, k: usize) -> BitapResult {
baseline(pattern, text, k, DistanceFn::OptimalStringAlignment)
}