fuzzy_matcher/
simple.rs

1use std::cell::RefCell;
2
3use crate::FuzzyMatcher;
4use crate::IndexType;
5use crate::ScoreType;
6
7const BASELINE: i64 = 0i64;
8
9thread_local! {
10    static FORWARD: RefCell<Vec<usize>> = RefCell::new(Vec::new());
11    static REVERSE: RefCell<Vec<usize>> = RefCell::new(Vec::new());
12}
13
14impl FuzzyMatcher for SimpleMatcher {
15    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
16        self.fuzzy(choice, pattern)
17    }
18
19    fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<ScoreType> {
20        self.fuzzy(choice, pattern).map(|(score, _)| score)
21    }
22}
23
24#[derive(Eq, PartialEq, Debug, Copy, Clone)]
25enum CaseMatching {
26    Respect,
27    Ignore,
28    Smart,
29}
30
31pub struct SimpleMatcher {
32    case: CaseMatching,
33}
34
35impl Default for SimpleMatcher {
36    fn default() -> Self {
37        SimpleMatcher {
38            case: CaseMatching::Smart,
39        }
40    }
41}
42
43impl SimpleMatcher {
44    fn fuzzy(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
45        let new_match = SimpleMatch::new(choice, pattern, self);
46        new_match.fuzzy()
47    }
48
49    pub fn ignore_case(mut self) -> Self {
50        self.case = CaseMatching::Ignore;
51        self
52    }
53
54    pub fn smart_case(mut self) -> Self {
55        self.case = CaseMatching::Smart;
56        self
57    }
58
59    pub fn respect_case(mut self) -> Self {
60        self.case = CaseMatching::Respect;
61        self
62    }
63
64    fn contains_upper(&self, string: &str) -> bool {
65        string.bytes().any(|b| b.is_ascii_uppercase())
66    }
67
68    fn is_case_sensitive(&self, pattern: &str) -> bool {
69        match self.case {
70            CaseMatching::Respect => true,
71            CaseMatching::Ignore => false,
72            CaseMatching::Smart => self.contains_upper(pattern),
73        }
74    }
75}
76
77struct SimpleMatch<'a> {
78    choice: &'a str,
79    pattern: &'a str,
80    choice_len: usize,
81    pattern_len: usize,
82    case_sensitive: bool,
83    is_ascii: bool,
84}
85
86impl<'a> SimpleMatch<'a> {
87    fn new(choice: &'a str, pattern: &'a str, matcher: &'a SimpleMatcher) -> Self {
88        let case_sensitive = matcher.is_case_sensitive(pattern);
89        let mut choice_len = choice.len();
90        let mut pattern_len = pattern.len();
91
92        let is_ascii = choice.is_ascii() && pattern.is_ascii();
93        if !is_ascii {
94            choice_len = choice.chars().count();
95            pattern_len = pattern.chars().count();
96        }
97
98        Self {
99            choice,
100            pattern,
101            choice_len,
102            pattern_len,
103            case_sensitive,
104            is_ascii,
105        }
106    }
107
108    fn fuzzy(&self) -> Option<(ScoreType, Vec<IndexType>)> {
109        if self.pattern_len == 0 {
110            return Some((0, Vec::new()));
111        }
112
113        if self.choice_len == 0 || self.pattern_len > self.choice_len {
114            return None;
115        }
116
117        let score = FORWARD.with_borrow_mut(|mut pattern_indices| {
118            pattern_indices.clear();
119
120            self.forward_matches(pattern_indices)?;
121
122            let forward_closeness = self.closeness(&pattern_indices);
123
124            if forward_closeness != 0 {
125                self.reverse_matches(&mut pattern_indices, forward_closeness)
126            }
127
128            if self.pattern_len > 3 && Self::none_consecutive(&pattern_indices) {
129                return None;
130            }
131
132            Some(self.score(&pattern_indices))
133        })?;
134
135        if score >= BASELINE {
136            return Some((score, FORWARD.replace(Vec::with_capacity(self.pattern_len))));
137        }
138
139        None
140    }
141
142    #[inline]
143    fn closeness(&self, matches: &[usize]) -> usize {
144        let matches_len = matches.len();
145
146        let start_idx = *matches.first().unwrap_or(&0);
147        let end_idx = *matches.last().unwrap_or(&0);
148
149        self.pattern_len.abs_diff(matches_len)
150            + self.pattern_len.abs_diff(end_idx.abs_diff(start_idx) + 1)
151    }
152
153    #[inline]
154    fn none_consecutive(matches: &[usize]) -> bool {
155        matches.iter().enumerate().all(|(idx, val)| {
156            let next_proposed = Some(val + &1);
157            let next_actual = matches.get(idx + 1);
158
159            next_actual != next_proposed.as_ref()
160        })
161    }
162
163    #[inline]
164    fn score(&self, matches: &[usize]) -> i64 {
165        let start_idx = matches.first().unwrap_or(&0);
166
167        let closeness_score = 1_048_576 - (self.closeness(matches) * 32_768);
168
169        let start_idx_bonus = if let Some((first_alpha_idx, _)) = self
170            .choice
171            .bytes()
172            .enumerate()
173            .filter(|(_idx, c_char)| !c_char.is_ascii_alphabetic())
174            .next()
175        {
176            if &first_alpha_idx == start_idx {
177                32_768
178            } else {
179                0
180            }
181        } else {
182            0
183        };
184
185        let first_letter_case_bonus = if self.first_letter_uppercase(start_idx) {
186            16_384
187        } else {
188            0
189        };
190
191        let word_boundary_bonus = self.word_boundary(matches) * 16_384;
192
193        let follows_special_char_bonus = self.follows_special_char(matches) * 4_096;
194
195        let len_neg = self.choice_len * 16;
196
197        (closeness_score
198            + start_idx_bonus
199            + follows_special_char_bonus
200            + word_boundary_bonus
201            + first_letter_case_bonus
202            - len_neg
203            - 131_072) as i64
204    }
205
206    #[inline]
207    fn forward_matches(&self, pattern_indices: &mut Vec<usize>) -> Option<()> {
208        self.forward(pattern_indices);
209
210        if pattern_indices.is_empty() {
211            return None;
212        }
213
214        if pattern_indices.len() + 2 <= self.pattern_len {
215            return None;
216        }
217
218        Some(())
219    }
220
221    #[inline]
222    fn reverse_matches(&self, matches: &mut Vec<usize>, forward_closeness: usize) {
223        let reverse_closeness = REVERSE.with_borrow_mut(|mut pattern_indices| {
224            pattern_indices.clear();
225
226            self.reverse(&mut pattern_indices);
227
228            self.closeness(&pattern_indices)
229        });
230
231        if reverse_closeness < forward_closeness {
232            *matches = REVERSE.replace(Vec::with_capacity(self.pattern_len));
233        }
234    }
235
236    #[inline]
237    fn word_boundary(&self, matches: &[usize]) -> usize {
238        matches
239            .iter()
240            .filter(|idx| {
241                if idx == &&0 {
242                    return true;
243                }
244
245                let previous = *idx - 1;
246
247                self.choice
248                    .bytes()
249                    .enumerate()
250                    .nth(previous)
251                    .map(|(idx, b)| self.choice.is_char_boundary(idx) && b == b'\t' || b == b' ')
252                    .unwrap_or(false)
253            })
254            .count()
255    }
256
257    #[inline]
258    fn follows_special_char(&self, matches: &[usize]) -> usize {
259        matches
260            .iter()
261            .map(|idx| {
262                let previous = idx - 1;
263
264                if previous <= 0 {
265                    return None;
266                }
267
268                self.choice
269                    .bytes()
270                    .enumerate()
271                    .nth(previous)
272                    .map(|(idx, b)| {
273                        self.choice.is_char_boundary(idx) && b == b'\t'
274                            || b == b'/'
275                            || b == b':'
276                            || b == b'-'
277                            || b == b'_'
278                            || b == b' '
279                    })
280            })
281            .count()
282    }
283
284    #[inline]
285    fn first_letter_uppercase(&self, start_idx: &usize) -> bool {
286        self.pattern.bytes().nth(0).unwrap().is_ascii_uppercase()
287            && self
288                .choice
289                .bytes()
290                .nth(*start_idx)
291                .unwrap()
292                .is_ascii_uppercase()
293    }
294}
295
296pub trait Matching {
297    fn forward(&self, pattern_indices: &mut Vec<usize>);
298    fn reverse(&self, pattern_indices: &mut Vec<usize>);
299    fn char_equal(&self, a: &char, b: &char) -> bool;
300    fn byte_equal(&self, a: &u8, b: &u8) -> bool;
301}
302
303impl<'a> Matching for SimpleMatch<'a> {
304    #[inline]
305    fn forward(&self, pattern_indices: &mut Vec<usize>) {
306        if self.is_ascii {
307            let mut choice_iter = self.choice.bytes().enumerate();
308
309            for p_char in self.pattern.bytes() {
310                match choice_iter.find_map(|(idx, c_char)| {
311                    if self.byte_equal(&p_char, &c_char) {
312                        return Some(idx);
313                    }
314
315                    None
316                }) {
317                    Some(char_idx) => pattern_indices.push(char_idx),
318                    None => continue,
319                }
320            }
321        } else {
322            let mut choice_iter = self.choice.char_indices();
323
324            for p_char in self.pattern.chars() {
325                match choice_iter.find_map(|(idx, c_char)| {
326                    if self.char_equal(&p_char, &c_char) {
327                        return Some(idx);
328                    }
329
330                    None
331                }) {
332                    Some(char_idx) => pattern_indices.push(char_idx),
333                    None => continue,
334                }
335            }
336        }
337    }
338
339    #[inline]
340    fn reverse(&self, pattern_indices: &mut Vec<usize>) {
341        if self.is_ascii {
342            let mut choice_iter = self.choice.bytes().enumerate().rev();
343
344            for p_char in self.pattern.bytes().rev() {
345                match choice_iter.find_map(|(idx, c_char)| {
346                    if self.byte_equal(&p_char, &c_char) {
347                        return Some(idx);
348                    }
349
350                    None
351                }) {
352                    Some(char_idx) => pattern_indices.push(char_idx),
353                    None => continue,
354                }
355            }
356        } else {
357            let mut choice_iter = self.choice.char_indices().rev();
358
359            for p_char in self.pattern.chars().rev() {
360                match choice_iter.find_map(|(idx, c_char)| {
361                    if self.char_equal(&p_char, &c_char) {
362                        return Some(idx);
363                    }
364
365                    None
366                }) {
367                    Some(char_idx) => pattern_indices.push(char_idx),
368                    None => continue,
369                }
370            }
371        }
372
373        pattern_indices.reverse();
374    }
375
376    #[inline]
377    fn char_equal(&self, a: &char, b: &char) -> bool {
378        if !self.case_sensitive {
379            return a.to_lowercase().eq(b.to_lowercase());
380        }
381
382        a == b
383    }
384
385    #[inline]
386    fn byte_equal(&self, a: &u8, b: &u8) -> bool {
387        if !self.case_sensitive {
388            return a.eq_ignore_ascii_case(&b);
389        }
390
391        a == b
392    }
393}
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398
399    #[test]
400    fn test_simple_reverse() {
401        let matcher = SimpleMatcher::default();
402        assert_eq!(
403            Some(vec![7, 8, 9, 10]),
404            matcher
405                .fuzzy_indices("bullsh shit\n", "shit")
406                .map(|inner| inner.1)
407        );
408    }
409
410    #[test]
411    fn test_simple_double_reverse() {
412        let matcher = SimpleMatcher::default();
413        assert_eq!(
414            Some(vec![10, 11, 12, 13]),
415            matcher
416                .fuzzy_indices("bullsh it shit\n", "shit")
417                .map(|inner| inner.1)
418        );
419    }
420
421    #[test]
422    fn test_simple_non_consecutive() {
423        let matcher = SimpleMatcher::default();
424        assert_eq!(None, matcher.fuzzy_indices("bsuhlilt\n", "shit"));
425    }
426}