#![no_std]
extern crate alloc;
use alloc::vec::Vec;
pub struct FuzzyMatcher {
target_chars: Vec<char>,
first_possible_match: Vec<usize>,
prev_seq_match_counts: Vec<usize>,
prev_score: Vec<usize>,
seq_match_counts: Vec<usize>,
score: Vec<usize>,
}
fn char_matches(query_char: char, target_char: char) -> bool {
match query_char {
'/' => matches!(target_char, '/' | '\\'),
'\\' => matches!(target_char, '/' | '\\'),
_ => {
if query_char.is_ascii() {
query_char.eq_ignore_ascii_case(&target_char)
} else {
query_char
.to_lowercase()
.zip(target_char.to_lowercase())
.all(|(a, b)| a == b)
}
}
}
}
impl FuzzyMatcher {
pub fn new() -> Self {
FuzzyMatcher {
target_chars: Vec::new(),
first_possible_match: Vec::new(),
prev_seq_match_counts: Vec::new(),
prev_score: Vec::new(),
seq_match_counts: Vec::new(),
score: Vec::new(),
}
}
pub fn fuzzy_match(&mut self, target: &str, query: &str) -> Option<usize> {
self.target_chars.clear();
self.target_chars.extend(target.chars());
self.first_possible_match.clear();
let mut query_chars = query.chars();
let mut cur_query_char = query_chars.next();
for (target_idx, target_char) in target.chars().enumerate() {
if let Some(query_char) = cur_query_char {
if char_matches(query_char, target_char) {
self.first_possible_match.push(target_idx);
cur_query_char = query_chars.next();
}
}
}
if cur_query_char.is_some() {
return None;
}
debug_assert_eq!(target.chars().count(), self.target_chars.len());
debug_assert_eq!(query.chars().count(), self.first_possible_match.len());
self.prev_seq_match_counts.clear();
self.prev_score.clear();
self.prev_seq_match_counts
.resize(self.target_chars.len(), 0);
self.prev_score.resize(self.target_chars.len(), 0);
self.seq_match_counts.clear();
self.score.clear();
self.seq_match_counts.resize(self.target_chars.len(), 0);
self.score.resize(self.target_chars.len(), 0);
let mut first_possible_target_idx: usize = 0;
let mut first_query_char = true;
for (query_char, first_possible_match) in
query.chars().zip(self.first_possible_match.iter())
{
if first_possible_target_idx >= self.target_chars.len() {
return None;
}
first_possible_target_idx = first_possible_target_idx.max(*first_possible_match);
(&mut self.seq_match_counts[first_possible_target_idx..self.target_chars.len()])
.fill(0);
(&mut self.score[first_possible_target_idx..self.target_chars.len()]).fill(0);
let mut first_nonzero_score = None;
for i in first_possible_target_idx..self.target_chars.len() {
let target_char = self.target_chars[i];
let prev_target_score = if i == first_possible_target_idx {
0
} else {
self.score[i - 1]
};
let prev_query_score = if i == 0 { 0 } else { self.prev_score[i - 1] };
let seq_match_count = if i == 0 {
0
} else {
self.prev_seq_match_counts[i - 1]
};
if !first_query_char && prev_query_score == 0 {
self.score[i] = prev_target_score;
continue;
}
if !char_matches(query_char, target_char) {
self.score[i] = prev_target_score;
continue;
}
let mut char_score = 1;
char_score += seq_match_count * 5;
if target_char == query_char {
char_score += 1;
}
if i == 0 {
char_score += 8;
} else {
if matches!(target_char, '/' | '\\') {
char_score += 5;
} else if matches!(target_char, '_' | '-' | '.' | ' ' | '\'' | '"' | ':') {
char_score += 4;
} else if seq_match_count == 0 {
if i > 0
&& matches!(
self.target_chars[i - 1],
'_' | '-' | '.' | ' ' | '\'' | '"' | ':'
)
{
char_score += 2;
} else if target_char.is_ascii() {
if target_char.is_ascii_uppercase() {
char_score += 2;
}
} else if target_char.is_uppercase() {
char_score += 2;
}
}
}
if i + 1 == self.target_chars.len() {
char_score += 2;
}
let new_score = prev_query_score + char_score;
if new_score >= prev_target_score {
self.score[i] = new_score;
self.seq_match_counts[i] = seq_match_count + 1;
if first_nonzero_score.is_none() {
first_nonzero_score = Some(i);
}
} else {
self.score[i] = prev_target_score;
}
}
if let Some(first_nonzero_score) = first_nonzero_score {
first_possible_target_idx = first_nonzero_score + 1;
(&mut self.prev_score[first_nonzero_score..self.target_chars.len()])
.copy_from_slice(&self.score[first_nonzero_score..self.target_chars.len()]);
(&mut self.prev_seq_match_counts[first_nonzero_score..self.target_chars.len()])
.copy_from_slice(
&self.seq_match_counts[first_nonzero_score..self.target_chars.len()],
);
first_query_char = false;
} else {
return None;
}
}
let score = *self.prev_score.last().unwrap_or(&0);
if score == 0 {
None
} else {
Some(score)
}
}
}
pub fn fuzzy_match(target: &str, query: &str) -> Option<usize> {
let mut matcher = FuzzyMatcher::new();
matcher.fuzzy_match(target, query)
}
#[cfg(test)]
mod tests {
use alloc::vec::Vec;
#[test]
fn test_match() {
let result = crate::fuzzy_match("The quick brown fox jumps over the lazy dog.", "fox");
assert!(result.is_some());
let result = crate::fuzzy_match(
"The quick brown fox jumps over the lazy dog.",
"Quick fox jumps the dog",
);
assert!(result.is_some());
}
#[test]
fn test_no_match() {
let result = crate::fuzzy_match("The quick brown fox jumps over the lazy dog.", "cat");
assert!(result.is_none());
let result = crate::fuzzy_match(
"The quick brown fox jumps over the lazy dog.",
"Quick fox jumps the cat",
);
assert!(result.is_none());
}
#[test]
fn test_ranking() {
const TARGET: &str = "The quick brown fox jumps over the lazy dog.";
const QUERIES: &[&str] = &[
"fx", "fox", "jump", "JUMP", "The", "the", "fx over", "quick cat", "The quick", "the quick", "jump the dog", "jmp the do", "jmp the cat", "dog the fox", "het", "xz", "xx", "ee", ];
let mut results = QUERIES
.iter()
.map(|query| (query, crate::fuzzy_match(TARGET, query)))
.collect::<Vec<_>>();
results.retain(|(_, result)| result.is_some());
results.sort_by_key(|(_, result)| result.unwrap());
assert_eq!(
results.iter().map(|(query, _)| *query).collect::<Vec<_>>(),
&[
&"xz",
&"ee",
&"fx",
&"het",
&"fox",
&"the",
&"The",
&"JUMP",
&"jump",
&"fx over",
&"jmp the do",
&"jump the dog",
&"the quick",
&"The quick",
]
);
}
#[test]
fn test_slash() {
let result = crate::fuzzy_match("/bin/ls", "/ls");
assert!(result.is_some());
let result = crate::fuzzy_match("/bin/ls", "\\ls");
assert!(result.is_some());
let result = crate::fuzzy_match("c:\\windows\\notepad.exe", "/windows");
assert!(result.is_some());
let result = crate::fuzzy_match("c:\\windows\\notepad.exe", "\\windows");
assert!(result.is_some());
}
#[test]
fn test_word_bonus() {
let higher = crate::fuzzy_match("words with spaces", "spa");
let lower = crate::fuzzy_match("words with spaces", "pac");
assert!(higher.is_some());
assert!(lower.is_some());
assert!(
higher.unwrap() > lower.unwrap(),
"higher = {:?}, lower = {:?}",
higher,
lower
);
let higher = crate::fuzzy_match("words_with_underscores", "und");
let lower = crate::fuzzy_match("words_with_underscores", "nde");
assert!(higher.is_some());
assert!(lower.is_some());
assert!(
higher.unwrap() > lower.unwrap(),
"higher = {:?}, lower = {:?}",
higher,
lower
);
let higher = crate::fuzzy_match("camelCaseWords", "Wor");
let lower = crate::fuzzy_match("camelCaseWords", "ord");
assert!(higher.is_some());
assert!(lower.is_some());
assert!(
higher.unwrap() > lower.unwrap(),
"higher = {:?}, lower = {:?}",
higher,
lower
);
}
}