fuzzy_matcher/
simple.rs

1use std::cell::RefCell;
2use std::ops::Mul;
3
4use crate::FuzzyMatcher;
5use crate::IndexType;
6use crate::ScoreType;
7
8const BASELINE: i64 = 0i64;
9
10thread_local! {
11    static FORWARD: RefCell<Vec<usize>> = RefCell::new(Vec::new());
12    static REVERSE: RefCell<Vec<usize>> = RefCell::new(Vec::new());
13}
14
15impl FuzzyMatcher for SimpleMatcher {
16    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
17        self.fuzzy(choice, pattern)
18    }
19
20    fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<ScoreType> {
21        self.fuzzy(choice, pattern).map(|(score, _)| score)
22    }
23}
24
25#[derive(Eq, PartialEq, Debug, Copy, Clone)]
26enum CaseMatching {
27    Respect,
28    Ignore,
29    Smart,
30}
31
32pub struct SimpleMatcher {
33    case: CaseMatching,
34}
35
36impl Default for SimpleMatcher {
37    fn default() -> Self {
38        SimpleMatcher {
39            case: CaseMatching::Smart,
40        }
41    }
42}
43
44impl SimpleMatcher {
45    fn fuzzy(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
46        let new_match = SimpleMatch::new(choice, pattern, self);
47        new_match.fuzzy()
48    }
49
50    pub fn ignore_case(mut self) -> Self {
51        self.case = CaseMatching::Ignore;
52        self
53    }
54
55    pub fn smart_case(mut self) -> Self {
56        self.case = CaseMatching::Smart;
57        self
58    }
59
60    pub fn respect_case(mut self) -> Self {
61        self.case = CaseMatching::Respect;
62        self
63    }
64
65    fn contains_upper(&self, string: &str) -> bool {
66        string.bytes().any(|b| b.is_ascii_uppercase())
67    }
68
69    fn is_case_sensitive(&self, pattern: &str) -> bool {
70        match self.case {
71            CaseMatching::Respect => true,
72            CaseMatching::Ignore => false,
73            CaseMatching::Smart => self.contains_upper(pattern),
74        }
75    }
76}
77
78struct SimpleMatch<'a> {
79    choice: &'a str,
80    pattern: &'a str,
81    choice_len: usize,
82    pattern_len: usize,
83    case_sensitive: bool,
84    is_ascii: bool,
85}
86
87impl<'a> SimpleMatch<'a> {
88    fn new(choice: &'a str, pattern: &'a str, matcher: &'a SimpleMatcher) -> Self {
89        let case_sensitive = matcher.is_case_sensitive(pattern);
90        let mut choice_len = choice.len();
91        let mut pattern_len = pattern.len();
92
93        let is_ascii = choice.is_ascii() && pattern.is_ascii();
94        if !is_ascii {
95            choice_len = choice.chars().count();
96            pattern_len = pattern.chars().count();
97        }
98
99        Self {
100            choice,
101            pattern,
102            choice_len,
103            pattern_len,
104            case_sensitive,
105            is_ascii,
106        }
107    }
108
109    fn fuzzy(&self) -> Option<(ScoreType, Vec<IndexType>)> {
110        if self.pattern_len == 0 {
111            return Some((0, Vec::new()));
112        }
113
114        if self.choice_len == 0 || self.pattern_len > self.choice_len {
115            return None;
116        }
117
118        let score = FORWARD.with_borrow_mut(|mut pattern_indices| {
119            pattern_indices.clear();
120
121            self.forward_matches(pattern_indices)?;
122
123            let forward_closeness = self.closeness(&pattern_indices);
124
125            if forward_closeness != 0 {
126                self.reverse_matches(&mut pattern_indices, forward_closeness)
127            }
128
129            if self.pattern_len > 3 && Self::none_consecutive(&pattern_indices) {
130                return None;
131            }
132
133            Some(self.score(&pattern_indices))
134        })?;
135
136        if score >= BASELINE {
137            return Some((score, FORWARD.replace(Vec::with_capacity(self.pattern_len))));
138        }
139
140        None
141    }
142
143    #[inline]
144    fn closeness(&self, matches: &[usize]) -> usize {
145        let matches_len = matches.len();
146
147        let start_idx = *matches.first().unwrap_or(&0);
148        let end_idx = *matches.last().unwrap_or(&0);
149
150        self.pattern_len.abs_diff(matches_len).mul(4)
151            + self.pattern_len.abs_diff(end_idx.abs_diff(start_idx) + 1)
152    }
153
154    #[inline]
155    fn none_consecutive(matches: &[usize]) -> bool {
156        matches.iter().enumerate().all(|(idx, val)| {
157            let next_proposed = Some(val + &1);
158            let next_actual = matches.get(idx + 1);
159
160            next_actual != next_proposed.as_ref()
161        })
162    }
163
164    #[inline]
165    fn score(&self, matches: &[usize]) -> i64 {
166        let start_idx = matches.first().unwrap_or(&0);
167
168        let closeness_score = 1_048_576 - (self.closeness(matches) * 32_768);
169
170        let start_idx_bonus = if let Some((first_alpha_idx, _)) = self
171            .choice
172            .bytes()
173            .enumerate()
174            .filter(|(_idx, c_char)| !c_char.is_ascii_alphabetic())
175            .next()
176        {
177            if &first_alpha_idx == start_idx {
178                32_768
179            } else {
180                0
181            }
182        } else {
183            0
184        };
185
186        let first_letter_case_bonus = if self.first_letter_uppercase(start_idx) {
187            16_384
188        } else {
189            0
190        };
191
192        let word_boundary_bonus = self.word_boundary(matches) * 16_384;
193
194        let follows_special_char_bonus = self.follows_special_char(matches) * 4_096;
195
196        (closeness_score
197            + start_idx_bonus
198            + follows_special_char_bonus
199            + word_boundary_bonus
200            + first_letter_case_bonus
201            - 131_072) as i64
202    }
203
204    #[inline]
205    fn forward_matches(&self, pattern_indices: &mut Vec<usize>) -> Option<()> {
206        self.forward(pattern_indices);
207
208        if pattern_indices.is_empty() {
209            return None;
210        }
211
212        if pattern_indices.len() + 2 <= self.pattern_len {
213            return None;
214        }
215
216        Some(())
217    }
218
219    #[inline]
220    fn reverse_matches(&self, matches: &mut Vec<usize>, forward_closeness: usize) {
221        let reverse_closeness = REVERSE.with_borrow_mut(|mut pattern_indices| {
222            pattern_indices.clear();
223
224            self.reverse(&mut pattern_indices);
225
226            self.closeness(&pattern_indices)
227        });
228
229        if reverse_closeness < forward_closeness {
230            *matches = REVERSE.replace(Vec::with_capacity(self.pattern_len));
231        }
232    }
233
234    #[inline]
235    fn word_boundary(&self, matches: &[usize]) -> usize {
236        matches
237            .iter()
238            .filter(|idx| {
239                if idx == &&0 {
240                    return true;
241                }
242
243                let previous = *idx - 1;
244
245                self.choice
246                    .bytes()
247                    .enumerate()
248                    .nth(previous)
249                    .map(|(idx, b)| self.choice.is_char_boundary(idx) && b == b'\t' || b == b' ')
250                    .unwrap_or(false)
251            })
252            .count()
253    }
254
255    #[inline]
256    fn follows_special_char(&self, matches: &[usize]) -> usize {
257        matches
258            .iter()
259            .map(|idx| {
260                let previous = idx - 1;
261
262                if previous <= 0 {
263                    return None;
264                }
265
266                self.choice
267                    .bytes()
268                    .enumerate()
269                    .nth(previous)
270                    .map(|(idx, b)| {
271                        self.choice.is_char_boundary(idx) && b == b'\t'
272                            || b == b'/'
273                            || b == b':'
274                            || b == b'-'
275                            || b == b'_'
276                            || b == b' '
277                    })
278            })
279            .count()
280    }
281
282    #[inline]
283    fn first_letter_uppercase(&self, start_idx: &usize) -> bool {
284        self.pattern.bytes().nth(0).unwrap().is_ascii_uppercase()
285            && self
286                .choice
287                .bytes()
288                .nth(*start_idx)
289                .unwrap()
290                .is_ascii_uppercase()
291    }
292}
293
294pub trait Matching {
295    fn forward(&self, pattern_indices: &mut Vec<usize>);
296    fn reverse(&self, pattern_indices: &mut Vec<usize>);
297    fn char_equal(&self, a: &char, b: &char) -> bool;
298    fn byte_equal(&self, a: &u8, b: &u8) -> bool;
299}
300
301impl<'a> Matching for SimpleMatch<'a> {
302    #[inline]
303    fn forward(&self, pattern_indices: &mut Vec<usize>) {
304        if self.is_ascii {
305            let mut choice_iter = self.choice.bytes().enumerate();
306
307            for p_char in self.pattern.bytes() {
308                match choice_iter.find_map(|(idx, c_char)| {
309                    if self.byte_equal(&p_char, &c_char) {
310                        return Some(idx);
311                    }
312
313                    None
314                }) {
315                    Some(char_idx) => pattern_indices.push(char_idx),
316                    None => continue,
317                }
318            }
319        } else {
320            let mut choice_iter = self.choice.char_indices();
321
322            for p_char in self.pattern.chars() {
323                match choice_iter.find_map(|(idx, c_char)| {
324                    if self.char_equal(&p_char, &c_char) {
325                        return Some(idx);
326                    }
327
328                    None
329                }) {
330                    Some(char_idx) => pattern_indices.push(char_idx),
331                    None => continue,
332                }
333            }
334        }
335    }
336
337    #[inline]
338    fn reverse(&self, pattern_indices: &mut Vec<usize>) {
339        if self.is_ascii {
340            let mut choice_iter = self.choice.bytes().enumerate().rev();
341
342            for p_char in self.pattern.bytes().rev() {
343                match choice_iter.find_map(|(idx, c_char)| {
344                    if self.byte_equal(&p_char, &c_char) {
345                        return Some(idx);
346                    }
347
348                    None
349                }) {
350                    Some(char_idx) => pattern_indices.push(char_idx),
351                    None => continue,
352                }
353            }
354        } else {
355            let mut choice_iter = self.choice.char_indices().rev();
356
357            for p_char in self.pattern.chars().rev() {
358                match choice_iter.find_map(|(idx, c_char)| {
359                    if self.char_equal(&p_char, &c_char) {
360                        return Some(idx);
361                    }
362
363                    None
364                }) {
365                    Some(char_idx) => pattern_indices.push(char_idx),
366                    None => continue,
367                }
368            }
369        }
370
371        pattern_indices.reverse();
372    }
373
374    #[inline]
375    fn char_equal(&self, a: &char, b: &char) -> bool {
376        if !self.case_sensitive {
377            return a.to_lowercase().eq(b.to_lowercase());
378        }
379
380        a == b
381    }
382
383    #[inline]
384    fn byte_equal(&self, a: &u8, b: &u8) -> bool {
385        if !self.case_sensitive {
386            return a.eq_ignore_ascii_case(&b);
387        }
388
389        a == b
390    }
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396
397    #[test]
398    fn test_simple_reverse() {
399        let matcher = SimpleMatcher::default();
400        assert_eq!(
401            Some(vec![7, 8, 9, 10]),
402            matcher
403                .fuzzy_indices("bullsh shit\n", "shit")
404                .map(|inner| inner.1)
405        );
406    }
407
408    #[test]
409    fn test_simple_double_reverse() {
410        let matcher = SimpleMatcher::default();
411        assert_eq!(
412            Some(vec![10, 11, 12, 13]),
413            matcher
414                .fuzzy_indices("bullsh it shit\n", "shit")
415                .map(|inner| inner.1)
416        );
417    }
418
419    #[test]
420    fn test_simple_non_consecutive() {
421        let matcher = SimpleMatcher::default();
422        assert_eq!(None, matcher.fuzzy_indices("bsuhlilt\n", "shit"));
423    }
424}