use std::collections::HashMap;
use matching::Match;
use parsing::Occurrences;
use scoring::Scoring;
use crate::{
parsing::{build_occurrences, process_query, Occurrence, QueryChar, QueryChars},
scoring::DEFAULT_SCORING,
};
pub struct FuzzySearch<'a> {
query: &'a str,
target: &'a str,
scoring: Option<&'a Scoring>,
case_insensitive: bool,
}
impl<'a> FuzzySearch<'a> {
pub fn new(query: &'a str, target: &'a str) -> Self {
FuzzySearch {
query,
target,
scoring: None,
case_insensitive: true,
}
}
pub fn score_with(mut self, scoring: &'a Scoring) -> Self {
self.scoring = Some(scoring);
self
}
pub fn case_sensitive(mut self) -> Self {
self.case_insensitive = false;
self
}
pub fn case_insensitive(mut self) -> Self {
self.case_insensitive = true;
self
}
pub fn best_match(self) -> Option<Match> {
let processed_query = process_query(self.query);
if processed_query.len() == 0 || self.target.len() == 0 {
return None;
}
let occurrences = build_occurrences(&processed_query, self.target, self.case_insensitive);
let searcher = FuzzySearcher::new(
processed_query,
self.scoring.unwrap_or(&DEFAULT_SCORING),
self.case_insensitive,
);
searcher.best_match(&occurrences)
}
}
struct FuzzySearcher<'a> {
query: QueryChars,
scoring: &'a Scoring,
match_cache: HashMap<(usize, usize, usize), Option<Match>>,
case_insensitive: bool,
}
impl<'a> FuzzySearcher<'a> {
fn new(query: QueryChars, scoring: &'a Scoring, case_insensitive: bool) -> Self {
FuzzySearcher {
match_cache: HashMap::with_capacity(query.len() * query.len()),
query,
scoring,
case_insensitive,
}
}
#[inline(always)]
fn queried_char(&self, qc: &QueryChar) -> char {
if self.case_insensitive {
qc.lower
} else {
qc.original
}
}
#[inline(always)]
fn case_bonus(&self, query_idx: usize, occurrence: &Occurrence) -> isize {
if self.case_insensitive {
self.query
.get(query_idx)
.map_or(0, |c| (c.original == occurrence.char) as isize)
* self.scoring.bonus_match_case
} else {
0
}
}
fn best_match(mut self, occurrences: &Occurrences) -> Option<Match> {
let qc = self.query.get(0)?;
occurrences
.get(&self.queried_char(qc))?
.iter()
.filter_map(|o| self.match_(1, o, 0, &occurrences))
.max()
}
fn match_(
&mut self,
query_idx: usize,
occurrence: &Occurrence,
consecutive: usize,
occurrences: &Occurrences,
) -> Option<Match> {
let this_key = (query_idx, occurrence.target_idx, consecutive);
if let Some(cached) = self.match_cache.get(&this_key) {
return cached.clone();
}
let next_char = self.query.get(query_idx);
let score = consecutive as isize * self.scoring.bonus_consecutive
+ occurrence.is_start as isize * self.scoring.bonus_word_start
+ self.case_bonus(query_idx - 1, occurrence);
let mut this_match = Match::with_matched(score, consecutive, vec![occurrence.target_idx]);
if next_char.is_none() {
self.match_cache.insert(this_key, Some(this_match.clone()));
return Some(this_match);
}
let occs = occurrences.get(&self.queried_char(next_char.unwrap()));
if occs.is_none() {
self.match_cache.insert(this_key, None);
return None;
}
let best_match = occs
.unwrap()
.iter()
.filter(|&o| o.target_idx > occurrence.target_idx)
.filter_map(|o| {
let distance = o.target_idx - occurrence.target_idx;
let new_consecutive = if distance == 1 { consecutive + 1 } else { 0 };
self.match_(query_idx + 1, o, new_consecutive, occurrences)
})
.max()
.and_then(|m| {
this_match.extend_with(&m, &self.scoring);
Some(this_match)
});
self.match_cache.insert(this_key, best_match.clone());
best_match
}
}