use {
super::NameMatch,
secular,
smallvec::{
SmallVec,
smallvec,
},
std::{
cmp::Reverse,
ops::Range,
},
};
type CandChars = SmallVec<[char; 32]>;
static SEPARATORS: &[char] = &[',', ';'];
const BONUS_MATCH: i32 = 50_000;
const BONUS_CANDIDATE_LENGTH: i32 = -1;
pub fn norm_chars(s: &str) -> Box<[char]> {
secular::normalized_lower_lay_string(s)
.chars()
.collect::<Vec<char>>()
.into_boxed_slice()
}
#[derive(Debug, Clone, PartialEq)]
pub struct TokPattern {
toks: Vec<Box<[char]>>,
sum_len: usize,
}
impl TokPattern {
pub fn new(pattern: &str) -> Self {
let sep = pattern.chars().find(|c| SEPARATORS.contains(c));
let mut toks: Vec<Box<[char]>> = if let Some(sep) = sep {
pattern
.split(sep)
.filter(|s| !s.is_empty())
.map(norm_chars)
.collect()
} else if pattern.is_empty() {
Vec::new()
} else {
vec![norm_chars(pattern)]
};
toks.sort_by_key(|t| Reverse(t.len()));
let sum_len = toks.iter().map(|s| s.len()).sum();
Self { toks, sum_len }
}
pub fn is_empty(&self) -> bool {
self.sum_len == 0
}
fn find_ranges(
&self,
candidate: &str,
) -> Option<Vec<Range<usize>>> {
let mut cand_chars: CandChars = SmallVec::with_capacity(candidate.len());
cand_chars.extend(candidate.chars().map(secular::lower_lay_char));
if cand_chars.len() < self.sum_len || self.sum_len == 0 {
return None;
}
let first_tok = &self.toks[0];
let l = first_tok.len();
let first_matching_range = (0..cand_chars.len() + 1 - l)
.map(|idx| idx..idx + l)
.find(|r| &cand_chars[r.start..r.end] == first_tok.as_ref());
first_matching_range.and_then(|first_matching_range| {
let mut matching_ranges = vec![first_matching_range];
for tok in self.toks.iter().skip(1) {
let l = tok.len();
let matching_range = (0..cand_chars.len() + 1 - l)
.map(|idx| idx..idx + l)
.filter(|r| &cand_chars[r.start..r.end] == tok.as_ref())
.find(|r| {
for pr in &matching_ranges {
if pr.contains(&r.start) || pr.contains(&(r.end - 1)) {
return false;
}
}
true
});
if let Some(r) = matching_range {
matching_ranges.push(r);
} else {
return None;
}
}
Some(matching_ranges)
})
}
fn score_of_matching(
&self,
candidate: &str,
) -> i32 {
BONUS_MATCH + BONUS_CANDIDATE_LENGTH * candidate.len() as i32
}
pub fn find(
&self,
candidate: &str,
) -> Option<NameMatch> {
self.find_ranges(candidate).map(|matching_ranges| {
let mut pos = smallvec![0; self.sum_len];
let mut i = 0;
for r in matching_ranges {
for p in r {
pos[i] = p;
i += 1;
}
}
pos.sort_unstable();
let score = self.score_of_matching(candidate);
NameMatch { score, pos }
})
}
pub fn score_of(
&self,
candidate: &str,
) -> Option<i32> {
self.find_ranges(candidate)
.map(|_| self.score_of_matching(candidate))
}
}
#[cfg(test)]
mod tok_pattern_tests {
use {
super::*,
crate::pattern::Pos,
};
fn check_pos(
pattern: &str,
name: &str,
pos: &str,
) {
println!("checking pattern={pattern:?} name={name:?}");
let pat = TokPattern::new(pattern);
let match_pos = pat.find(name).unwrap().pos;
let target_pos: Pos = pos
.chars()
.enumerate()
.filter(|(_, c)| *c == '^')
.map(|(i, _)| i)
.collect();
assert_eq!(match_pos, target_pos);
}
#[test]
fn check_match_pos() {
check_pos("m,", "miaou", "^ ");
check_pos("bat", "cabat", " ^^^");
check_pos(";ba", "babababaaa", "^^ ");
check_pos("ba,ca", "bababacaa", "^^ ^^ ");
check_pos(
"sub,doc,2",
"/home/user/path2/subpath/Documents/",
" ^ ^^^ ^^^",
);
check_pos("ab,abc", "0123/abc/ab/cdg", " ^^^ ^^ ");
}
fn check_match(
pattern: &str,
name: &str,
do_match: bool,
) {
assert_eq!(TokPattern::new(pattern).find(name).is_some(), do_match,);
}
#[test]
fn test_separators() {
let a = TokPattern::new("ab;cd;ef");
let b = TokPattern::new("ab,cd,ef");
assert_eq!(a, b);
let a = TokPattern::new(",ab;cd;ef");
assert_eq!(a.toks.len(), 1);
assert_eq!(a.toks[0].len(), 8);
let a = TokPattern::new(";ab,cd,ef;");
assert_eq!(a.toks.len(), 1);
assert_eq!(a.toks[0].len(), 8);
}
#[test]
fn test_match() {
check_match("mia", "android/phonegap", false);
check_match("mi", "a", false);
check_match("mi", "π", false);
check_match("mi", "miaou/a", true);
check_match("imm", "😍", false);
}
#[test]
fn test_tok_repetitions() {
check_match("sub", "rasub", true);
check_match("sub,sub", "rasub", false);
check_match("sub,sub", "rasubandsub", true);
check_match("sub,sub,sub", "rasubandsub", false);
check_match("ghi,abc,def,ccc", "abccc/Defghi", false);
check_match("ghi,abc,def,ccc", "abcccc/Defghi", true);
}
}