1use {
6 super::NameMatch,
7 secular,
8 smallvec::{
9 SmallVec,
10 smallvec,
11 },
12 std::fmt::{
13 self,
14 Write,
15 },
16};
17
18type CandChars = SmallVec<[char; 32]>;
19
20const BONUS_MATCH: i32 = 50_000;
22const BONUS_EXACT: i32 = 1_000;
23const BONUS_START: i32 = 10;
24const BONUS_START_WORD: i32 = 5;
25const BONUS_CANDIDATE_LENGTH: i32 = -1; const BONUS_MATCH_LENGTH: i32 = -10; const BONUS_NB_HOLES: i32 = -30; const BONUS_SINGLED_CHAR: i32 = -15; #[derive(Debug, Clone)]
32pub struct FuzzyPattern {
33 chars: Box<[char]>, max_nb_holes: usize,
35}
36
37impl fmt::Display for FuzzyPattern {
38 fn fmt(
39 &self,
40 f: &mut fmt::Formatter<'_>,
41 ) -> fmt::Result {
42 for &c in self.chars.iter() {
43 f.write_char(c)?
44 }
45 Ok(())
46 }
47}
48
49enum MatchSearchResult {
50 Perfect(NameMatch), Some(NameMatch),
52 None,
53}
54
55fn is_word_separator(c: char) -> bool {
56 matches!(c, '_' | ' ' | '-')
57}
58
59impl FuzzyPattern {
60 pub fn from(pat: &str) -> Self {
63 let chars = secular::normalized_lower_lay_string(pat)
64 .chars()
65 .collect::<Vec<char>>()
66 .into_boxed_slice();
67 let max_nb_holes = match chars.len() {
68 1 => 0,
69 2 => 1,
70 3 => 2,
71 4 => 2,
72 5 => 2,
73 6 => 3,
74 7 => 3,
75 8 => 4,
76 _ => chars.len() * 4 / 7,
77 };
78 FuzzyPattern {
79 chars,
80 max_nb_holes,
81 }
82 }
83
84 pub fn is_empty(&self) -> bool {
87 self.chars.is_empty()
88 }
89
90 fn tight_match_from_index(
91 &self,
92 cand_chars: &CandChars,
93 start_idx: usize, ) -> MatchSearchResult {
95 let mut pos = smallvec![0; self.chars.len()]; let mut cand_idx = start_idx;
97 let mut pat_idx = 0; let mut in_hole = false;
99 loop {
100 if cand_chars[cand_idx] == self.chars[pat_idx] {
101 pos[pat_idx] = cand_idx;
102 if in_hole {
103 let mut rev_idx = 1;
106 loop {
107 if pat_idx < rev_idx {
108 break;
109 }
110 if cand_chars[cand_idx - rev_idx] == self.chars[pat_idx - rev_idx] {
111 pos[pat_idx - rev_idx] = cand_idx - rev_idx;
113 } else {
114 break;
115 }
116 rev_idx += 1;
117 }
118 in_hole = false;
119 }
120 pat_idx += 1;
121 if pat_idx == self.chars.len() {
122 break; }
124 } else {
125 if cand_chars.len() - cand_idx <= self.chars.len() - pat_idx {
127 return MatchSearchResult::None;
128 }
129 in_hole = true;
130 }
131 cand_idx += 1;
132 }
133 let mut nb_holes = 0;
134 let mut nb_singled_chars = 0;
135 for idx in 1..pos.len() {
136 if pos[idx] > 1 + pos[idx - 1] {
137 nb_holes += 1;
138 if idx > 1 && pos[idx - 1] > 1 + pos[idx - 2] {
139 if cand_chars[pos[idx - 2] + 1] == cand_chars[pos[idx - 1]] {
142 pos[idx - 1] = pos[idx - 2] + 1;
145 nb_holes -= 1;
146 } else {
147 nb_singled_chars += 1;
148 }
149 }
150 }
151 }
152 if nb_holes > self.max_nb_holes {
153 return MatchSearchResult::None;
154 }
155 let match_len = 1 + cand_idx - pos[0];
156 let mut score = BONUS_MATCH;
157 score += BONUS_CANDIDATE_LENGTH * (cand_chars.len() as i32);
158 score += BONUS_SINGLED_CHAR * nb_singled_chars;
159 score += BONUS_NB_HOLES * (nb_holes as i32);
160 score += match_len as i32 * BONUS_MATCH_LENGTH;
161 if pos[0] == 0 {
162 score += BONUS_START + BONUS_START_WORD;
163 if cand_chars.len() == self.chars.len() {
164 score += BONUS_EXACT;
165 return MatchSearchResult::Perfect(NameMatch { score, pos });
166 }
167 } else {
168 let previous = cand_chars[pos[0] - 1];
169 if is_word_separator(previous) {
170 score += BONUS_START_WORD;
171 if cand_chars.len() - pos[0] == self.chars.len() {
172 return MatchSearchResult::Perfect(NameMatch { score, pos });
173 }
174 }
175 }
176 MatchSearchResult::Some(NameMatch { score, pos })
177 }
178
179 pub fn find(
183 &self,
184 candidate: &str,
185 ) -> Option<NameMatch> {
186 if candidate.len() < self.chars.len() {
187 return None;
188 }
189 let mut cand_chars: CandChars = SmallVec::with_capacity(candidate.len());
190 cand_chars.extend(candidate.chars().map(secular::lower_lay_char));
191 if cand_chars.len() < self.chars.len() {
192 return None;
193 }
194 let mut best_score = 0;
195 let mut best_match: Option<NameMatch> = None;
196 let n = cand_chars.len() + 1 - self.chars.len();
197 for start_idx in 0..n {
198 if cand_chars[start_idx] == self.chars[0] {
199 match self.tight_match_from_index(&cand_chars, start_idx) {
200 MatchSearchResult::Perfect(m) => {
201 return Some(m);
202 }
203 MatchSearchResult::Some(m) => {
204 if m.score > best_score {
205 best_score = m.score;
206 best_match = Some(m);
207 }
208 }
213 _ => {}
214 }
215 }
216 }
217 best_match
218 }
219
220 pub fn score_of(
222 &self,
223 candidate: &str,
224 ) -> Option<i32> {
225 self.find(candidate).map(|nm| nm.score)
226 }
227}
228
229#[cfg(test)]
230mod fuzzy_pattern_tests {
231
232 use {
233 super::*,
234 crate::pattern::Pos,
235 };
236
237 fn check_pos(
239 pattern: &str,
240 name: &str,
241 pos: &str,
242 ) {
243 let pat = FuzzyPattern::from(pattern);
244 let match_pos = pat.find(name).unwrap().pos;
245 let target_pos: Pos = pos
246 .chars()
247 .enumerate()
248 .filter(|(_, c)| *c == '^')
249 .map(|(i, _)| i)
250 .collect();
251 assert_eq!(match_pos, target_pos);
252 }
253
254 #[test]
256 fn check_match_pos() {
257 check_pos("ba", "babababaaa", "^^ ");
258 check_pos("baa", "babababaaa", " ^^^ ");
259 check_pos("bbabaa", "babababaaa", " ^ ^^^^^ ");
260 check_pos("aoml", "bacon.coml", " ^ ^^^");
261 check_pos("broot", "ab br bro oxt ", " ^^^ ^ ^ ");
262 check_pos("broot", "b_broooooot_broot", " ^^^^^");
263 check_pos(
264 "buityp",
265 "builder-styles-less-typing.d.ts",
266 "^^^ ^^^ ",
267 );
268 check_pos(
269 "broot",
270 "ветер br x o vent ootot",
271 " ^^ ^^^ ",
272 );
273 check_pos(
274 "broot",
275 "brbroorrbbbbbrrooorobrototooooot.txt",
276 " ^^^ ^^ ",
277 );
278 check_pos("besh", "benches/shared", "^^ ^^ ");
279 }
280
281 fn check_ordering_for(
285 pattern: &str,
286 names: &[&str],
287 ) {
288 let fp = FuzzyPattern::from(pattern);
289 let mut last_score = fp.find(pattern).map(|m| m.score);
290 let mut last_name = pattern;
291 for name in names {
292 let score = fp.find(name).map(|m| m.score);
293 assert!(
294 score < last_score,
295 "score({name:?}) should be lower than score({last_name:?}) (using find)"
296 );
297 last_name = name;
298 last_score = score;
299 }
300 }
301
302 #[test]
303 fn check_orderings() {
304 check_ordering_for(
305 "broot",
306 &[
307 "a broot",
308 "abbroot",
309 "abcbroot",
310 " abdbroot",
311 "1234broot1",
312 "12345brrrroooottt",
313 "12345brrr roooottt",
314 "brot",
315 ],
316 );
317 check_ordering_for(
318 "Abc",
319 &["abCd", "aBdc", " abdc", " abdbccccc", " a b c", "nothing"],
320 );
321 check_ordering_for(
322 "réveil",
323 &[
324 "Réveillon",
325 "Réveillons",
326 " réveils",
327 "πréveil",
328 "déréveil",
329 "Rêve-t-il ?",
330 " rêves",
331 ],
332 );
333 }
334
335 #[test]
343 fn check_equivalences() {
344 fn check_equivalences_in(arr: &[&str]) {
345 for pattern in arr.iter() {
346 let fp = FuzzyPattern::from(pattern);
347 for name in arr.iter() {
348 println!("looking for pattern {pattern:?} in name {name:?}");
349 assert!(fp.find(name).unwrap().score > 0);
350 }
351 }
352 }
353 check_equivalences_in(&["aB", "ab", "àb", "âB"]);
354 let c12 = "Comunicações";
355 assert_eq!(c12.len(), 14);
356 assert_eq!(c12.chars().count(), 12);
357 let c14 = "Comunicações";
358 assert_eq!(c14.len(), 16);
359 assert_eq!(c14.chars().count(), 14);
360 check_equivalences_in(&["comunicacoes", c12, c14]);
361 check_equivalences_in(&["у", "У"]);
362 }
363 #[test]
367 fn issue_746() {
368 let fp = FuzzyPattern::from("устр");
369 assert!(fp.find("Устройства").is_some());
370 }
371}