use {
super::NameMatch,
secular,
smallvec::smallvec,
std::fmt::{self, Write},
};
const BONUS_MATCH: i32 = 50_000;
const BONUS_EXACT: i32 = 1_000;
const BONUS_START: i32 = 10;
const BONUS_START_WORD: i32 = 5;
const BONUS_CANDIDATE_LENGTH: i32 = -1; const BONUS_MATCH_LENGTH: i32 = -10; const BONUS_NB_HOLES: i32 = -30; const BONUS_SINGLED_CHAR: i32 = -15;
#[derive(Debug, Clone)]
pub struct FuzzyPattern {
chars: Box<[char]>, max_nb_holes: usize,
}
impl fmt::Display for FuzzyPattern {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for &c in self.chars.iter() {
f.write_char(c)?
}
Ok(())
}
}
enum MatchSearchResult {
Perfect(NameMatch), Some(NameMatch),
None,
}
fn is_word_separator(c: char) -> bool {
matches!(c, '_' | ' ' | '-')
}
impl FuzzyPattern {
pub fn from(pat: &str) -> Self {
let chars = pat
.chars()
.map(secular::lower_lay_char)
.collect::<Vec<char>>()
.into_boxed_slice();
let max_nb_holes = match chars.len() {
1 => 0,
2 => 1,
3 => 2,
4 => 2,
5 => 3,
6 => 3,
7 => 3,
8 => 4,
_ => chars.len() * 4 / 7,
};
FuzzyPattern {
chars,
max_nb_holes,
}
}
fn match_starting_at_index(
&self,
cand_chars: &[char],
start_idx: usize, ) -> MatchSearchResult {
if cand_chars[start_idx] != self.chars[0] {
return MatchSearchResult::None;
}
let mut pos = smallvec![0; self.chars.len()]; let mut nb_holes = 0;
let mut nb_singled_chars = 0;
let mut cand_idx = 1 + start_idx;
if self.chars.len() > 1 {
let mut group_len = 1;
let mut pat_idx = 1; let mut in_hole = false;
loop {
if cand_chars[cand_idx] == self.chars[pat_idx] {
if in_hole {
in_hole = false;
if nb_holes > 0 {
let mut can_be_joined = true;
for ri in 0..group_len {
if cand_chars[cand_idx-ri-1] != self.chars[pat_idx-ri-1] {
can_be_joined = false;
break;
}
}
if can_be_joined {
for ri in 0..group_len {
pos[pat_idx-ri-1] = cand_idx-ri-1;
}
} else {
if group_len == 1 {
nb_singled_chars += 1;
}
nb_holes += 1;
group_len = 0;
}
} else {
nb_holes += 1;
group_len = 0;
}
}
pos[pat_idx] = cand_idx;
pat_idx += 1;
if pat_idx == self.chars.len() {
break; }
cand_idx += 1;
group_len += 1;
} else {
if cand_chars.len() - cand_idx <= self.chars.len() - pat_idx {
return MatchSearchResult::None;
}
cand_idx += 1;
in_hole = true;
}
}
}
pos[0] = start_idx;
let match_len = 1 + cand_idx - start_idx;
let mut score = BONUS_MATCH;
score += BONUS_CANDIDATE_LENGTH * (cand_chars.len() as i32);
score += BONUS_SINGLED_CHAR * (nb_singled_chars as i32);
score += BONUS_NB_HOLES * (nb_holes as i32);
score += match_len as i32 * BONUS_MATCH_LENGTH;
if start_idx == 0 {
score += BONUS_START + BONUS_START_WORD;
if cand_chars.len() == self.chars.len() {
score += BONUS_EXACT;
return MatchSearchResult::Perfect(NameMatch { score, pos });
}
} else {
let previous = cand_chars[start_idx - 1];
if is_word_separator(previous) {
score += BONUS_START_WORD;
if cand_chars.len() - start_idx == self.chars.len() {
return MatchSearchResult::Perfect(NameMatch { score, pos });
}
}
}
MatchSearchResult::Some(NameMatch { score, pos })
}
pub fn find(&self, candidate: &str) -> Option<NameMatch> {
if candidate.len() < self.chars.len() {
return None;
}
let mut cand_chars: Vec<char> = Vec::with_capacity(candidate.len());
cand_chars.extend(candidate.chars().map(secular::lower_lay_char));
if cand_chars.len() < self.chars.len() {
return None;
}
let mut best_score = 0;
let mut best_match: Option<NameMatch> = None;
let n = cand_chars.len() - self.chars.len();
for start_idx in 0..=n {
match self.match_starting_at_index(&cand_chars, start_idx) {
MatchSearchResult::Perfect(m) => {
return Some(m);
}
MatchSearchResult::Some(m) => {
if m.score > best_score {
best_score = m.score;
best_match = Some(m);
}
}
_ => {}
}
}
best_match
}
pub fn score_of(&self, candidate: &str) -> Option<i32> {
self.find(candidate)
.map(|nm| nm.score)
}
}
#[cfg(test)]
mod fuzzy_pattern_tests {
use super::*;
fn check_ordering_for(pattern: &str, names: &[&str]) {
let fp = FuzzyPattern::from(pattern);
let mut last_score = fp.find(pattern).map(|m| m.score);
let mut last_name = pattern;
for name in names {
let score = fp.find(name).map(|m| m.score);
assert!(
score < last_score,
"score({:?}) should be lower than score({:?}) (using find)",
name,
last_name
);
last_name = name;
last_score = score;
}
}
#[test]
fn check_orderings() {
check_ordering_for(
"broot",
&[
"a broot",
"abbroot",
"abcbroot",
" abdbroot",
"1234broot1",
"12345brrrroooottt",
"12345brrr roooottt",
"brot",
],
);
check_ordering_for(
"Abc",
&[
"abCd",
"aBdc",
" abdc",
" abdbccccc",
" a b c",
"nothing",
],
);
check_ordering_for(
"réveil",
&[
"Réveillon",
"Réveillons",
" réveils",
"πréveil",
"déréveil",
" rêves",
],
);
}
}