fuzzy_matcher/
skim.rs

1#![allow(deprecated)]
2
3use std::cell::RefCell;
4use std::cmp::max;
5use std::fmt::Formatter;
6
7use thread_local::CachedThreadLocal;
8
9use crate::skim::Movement::{Match, Skip};
10use crate::util::{char_equal, cheap_matches};
11///! The fuzzy matching algorithm used by skim
12///!
13///! # Example:
14///! ```edition2018
15///! use fuzzy_matcher::FuzzyMatcher;
16///! use fuzzy_matcher::skim::SkimMatcherV2;
17///!
18///! let matcher = SkimMatcherV2::default();
19///! assert_eq!(None, matcher.fuzzy_match("abc", "abx"));
20///! assert!(matcher.fuzzy_match("axbycz", "abc").is_some());
21///! assert!(matcher.fuzzy_match("axbycz", "xyz").is_some());
22///!
23///! let (score, indices) = matcher.fuzzy_indices("axbycz", "abc").unwrap();
24///! assert_eq!(indices, [0, 2, 4]);
25///! ```
26use crate::{FuzzyMatcher, IndexType, ScoreType};
27
28const BONUS_MATCHED: ScoreType = 4;
29const BONUS_CASE_MATCH: ScoreType = 4;
30const BONUS_UPPER_MATCH: ScoreType = 6;
31const BONUS_ADJACENCY: ScoreType = 10;
32const BONUS_SEPARATOR: ScoreType = 8;
33const BONUS_CAMEL: ScoreType = 8;
34const PENALTY_CASE_UNMATCHED: ScoreType = -1;
35const PENALTY_LEADING: ScoreType = -6;
36// penalty applied for every letter before the first match
37const PENALTY_MAX_LEADING: ScoreType = -18;
38// maxing penalty for leading letters
39const PENALTY_UNMATCHED: ScoreType = -2;
40
41#[deprecated(since = "0.3.5", note = "Please use SkimMatcherV2 instead")]
42pub struct SkimMatcher {}
43
44impl Default for SkimMatcher {
45    fn default() -> Self {
46        Self {}
47    }
48}
49
50/// The V1 matcher is based on ForrestTheWoods's post
51/// https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/
52///
53/// V1 algorithm is deprecated, checkout `FuzzyMatcherV2`
54impl FuzzyMatcher for SkimMatcher {
55    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
56        fuzzy_indices(choice, pattern)
57    }
58
59    fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<ScoreType> {
60        fuzzy_match(choice, pattern)
61    }
62}
63
64#[deprecated(since = "0.3.5", note = "Please use SkimMatcherV2 instead")]
65pub fn fuzzy_match(choice: &str, pattern: &str) -> Option<ScoreType> {
66    if pattern.is_empty() {
67        return Some(0);
68    }
69
70    let scores = build_graph(choice, pattern)?;
71
72    let last_row = &scores[scores.len() - 1];
73    let (_, &MatchingStatus { final_score, .. }) = last_row
74        .iter()
75        .enumerate()
76        .max_by_key(|&(_, x)| x.final_score)
77        .expect("fuzzy_indices failed to iterate over last_row");
78    Some(final_score)
79}
80
81#[deprecated(since = "0.3.5", note = "Please use SkimMatcherV2 instead")]
82pub fn fuzzy_indices(choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
83    if pattern.is_empty() {
84        return Some((0, Vec::new()));
85    }
86
87    let mut picked = vec![];
88    let scores = build_graph(choice, pattern)?;
89
90    let last_row = &scores[scores.len() - 1];
91    let (mut next_col, &MatchingStatus { final_score, .. }) = last_row
92        .iter()
93        .enumerate()
94        .max_by_key(|&(_, x)| x.final_score)
95        .expect("fuzzy_indices failed to iterate over last_row");
96    let mut pat_idx = scores.len() as i64 - 1;
97    while pat_idx >= 0 {
98        let status = scores[pat_idx as usize][next_col];
99        next_col = status.back_ref as usize;
100        picked.push(status.idx);
101        pat_idx -= 1;
102    }
103    picked.reverse();
104    Some((final_score, picked))
105}
106
107#[derive(Clone, Copy, Debug)]
108struct MatchingStatus {
109    pub idx: IndexType,
110    pub score: ScoreType,
111    pub final_score: ScoreType,
112    pub adj_num: IndexType,
113    pub back_ref: IndexType,
114}
115
116impl Default for MatchingStatus {
117    fn default() -> Self {
118        MatchingStatus {
119            idx: 0,
120            score: 0,
121            final_score: 0,
122            adj_num: 1,
123            back_ref: 0,
124        }
125    }
126}
127
128fn build_graph(choice: &str, pattern: &str) -> Option<Vec<Vec<MatchingStatus>>> {
129    let mut scores = vec![];
130
131    let mut match_start_idx = 0; // to ensure that the pushed char are able to match the pattern
132    let mut pat_prev_ch = '\0';
133
134    // initialize the match positions and inline scores
135    for (pat_idx, pat_ch) in pattern.chars().enumerate() {
136        let mut vec = vec![];
137        let mut choice_prev_ch = '\0';
138        for (idx, ch) in choice.chars().enumerate() {
139            if ch.to_ascii_lowercase() == pat_ch.to_ascii_lowercase() && idx >= match_start_idx {
140                let score = fuzzy_score(
141                    ch,
142                    idx as IndexType,
143                    choice_prev_ch,
144                    pat_ch,
145                    pat_idx as IndexType,
146                    pat_prev_ch,
147                );
148                vec.push(MatchingStatus {
149                    idx: idx as IndexType,
150                    score,
151                    final_score: score,
152                    adj_num: 1,
153                    back_ref: 0,
154                });
155            }
156            choice_prev_ch = ch;
157        }
158
159        if vec.is_empty() {
160            // not matched
161            return None;
162        }
163        match_start_idx = vec[0].idx as usize + 1;
164        scores.push(vec);
165        pat_prev_ch = pat_ch;
166    }
167
168    // calculate max scores considering adjacent characters
169    for pat_idx in 1..scores.len() {
170        let (first_half, last_half) = scores.split_at_mut(pat_idx);
171
172        let prev_row = &first_half[first_half.len() - 1];
173        let cur_row = &mut last_half[0];
174
175        for idx in 0..cur_row.len() {
176            let next = cur_row[idx];
177            let prev = if idx > 0 {
178                cur_row[idx - 1]
179            } else {
180                MatchingStatus::default()
181            };
182
183            let mut score_before_idx = prev.final_score - prev.score + next.score;
184            score_before_idx += PENALTY_UNMATCHED * ((next.idx - prev.idx) as ScoreType);
185            score_before_idx -= if prev.adj_num == 0 {
186                BONUS_ADJACENCY
187            } else {
188                0
189            };
190
191            let (back_ref, score, adj_num) = prev_row
192                .iter()
193                .enumerate()
194                .take_while(|&(_, &MatchingStatus { idx, .. })| idx < next.idx)
195                .skip_while(|&(_, &MatchingStatus { idx, .. })| idx < prev.idx)
196                .map(|(back_ref, cur)| {
197                    let adj_num = next.idx - cur.idx - 1;
198                    let mut final_score = cur.final_score + next.score;
199                    final_score += if adj_num == 0 {
200                        BONUS_ADJACENCY
201                    } else {
202                        PENALTY_UNMATCHED * adj_num as ScoreType
203                    };
204                    (back_ref, final_score, adj_num)
205                })
206                .max_by_key(|&(_, x, _)| x)
207                .unwrap_or((prev.back_ref as usize, score_before_idx, prev.adj_num));
208
209            cur_row[idx] = if idx > 0 && score < score_before_idx {
210                MatchingStatus {
211                    final_score: score_before_idx,
212                    back_ref: prev.back_ref,
213                    adj_num,
214                    ..next
215                }
216            } else {
217                MatchingStatus {
218                    final_score: score,
219                    back_ref: back_ref as IndexType,
220                    adj_num,
221                    ..next
222                }
223            };
224        }
225    }
226
227    Some(scores)
228}
229
230// judge how many scores the current index should get
231fn fuzzy_score(
232    choice_ch: char,
233    choice_idx: IndexType,
234    choice_prev_ch: char,
235    pat_ch: char,
236    pat_idx: IndexType,
237    _pat_prev_ch: char,
238) -> ScoreType {
239    let mut score = BONUS_MATCHED;
240
241    let choice_prev_ch_type = CharType::of(choice_prev_ch);
242    let choice_role = CharRole::of(choice_prev_ch, choice_ch);
243
244    if pat_ch == choice_ch {
245        if pat_ch.is_uppercase() {
246            score += BONUS_UPPER_MATCH;
247        } else {
248            score += BONUS_CASE_MATCH;
249        }
250    } else {
251        score += PENALTY_CASE_UNMATCHED;
252    }
253
254    // apply bonus for camelCases
255    if choice_role == CharRole::Head
256        || choice_role == CharRole::Break
257        || choice_role == CharRole::Camel
258    {
259        score += BONUS_CAMEL;
260    }
261
262    // apply bonus for matches after a separator
263    if choice_prev_ch_type == CharType::HardSep || choice_prev_ch_type == CharType::SoftSep {
264        score += BONUS_SEPARATOR;
265    }
266
267    if pat_idx == 0 {
268        score += max(
269            (choice_idx as ScoreType) * PENALTY_LEADING,
270            PENALTY_MAX_LEADING,
271        );
272    }
273
274    score
275}
276
277#[derive(Copy, Clone)]
278pub struct SkimScoreConfig {
279    pub score_match: i32,
280    pub gap_start: i32,
281    pub gap_extension: i32,
282
283    /// The first character in the typed pattern usually has more significance
284    /// than the rest so it's important that it appears at special positions where
285    /// bonus points are given. e.g. "to-go" vs. "ongoing" on "og" or on "ogo".
286    /// The amount of the extra bonus should be limited so that the gap penalty is
287    /// still respected.
288    pub bonus_first_char_multiplier: i32,
289
290    /// We prefer matches at the beginning of a word, but the bonus should not be
291    /// too great to prevent the longer acronym matches from always winning over
292    /// shorter fuzzy matches. The bonus point here was specifically chosen that
293    /// the bonus is cancelled when the gap between the acronyms grows over
294    /// 8 characters, which is approximately the average length of the words found
295    /// in web2 dictionary and my file system.
296    pub bonus_head: i32,
297
298    /// Just like bonus_head, but its breakage of word is not that strong, so it should
299    /// be slighter less then bonus_head
300    pub bonus_break: i32,
301
302    /// Edge-triggered bonus for matches in camelCase words.
303    /// Compared to word-boundary case, they don't accompany single-character gaps
304    /// (e.g. FooBar vs. foo-bar), so we deduct bonus point accordingly.
305    pub bonus_camel: i32,
306
307    /// Minimum bonus point given to characters in consecutive chunks.
308    /// Note that bonus points for consecutive matches shouldn't have needed if we
309    /// used fixed match score as in the original algorithm.
310    pub bonus_consecutive: i32,
311
312    /// Skim will match case-sensitively if the pattern contains ASCII upper case,
313    /// If case of case insensitive match, the penalty will be given to case mismatch
314    pub penalty_case_mismatch: i32,
315}
316
317impl Default for SkimScoreConfig {
318    fn default() -> Self {
319        let score_match = 16;
320        let gap_start = -3;
321        let gap_extension = -1;
322        let bonus_first_char_multiplier = 2;
323
324        Self {
325            score_match,
326            gap_start,
327            gap_extension,
328            bonus_first_char_multiplier,
329            bonus_head: score_match / 2,
330            bonus_break: score_match / 2 + gap_extension,
331            bonus_camel: score_match / 2 + 2 * gap_extension,
332            bonus_consecutive: -(gap_start + gap_extension),
333            penalty_case_mismatch: gap_extension * 2,
334        }
335    }
336}
337
338#[derive(Debug, Copy, Clone, PartialEq)]
339enum Movement {
340    Match,
341    Skip,
342}
343
344/// Inner state of the score matrix
345// Implementation detail: tried to pad to 16B
346// will store the m and p matrix together
347#[derive(Clone)]
348struct MatrixCell {
349    pub m_move: Movement,
350    pub m_score: i32,
351    pub p_move: Movement,
352    pub p_score: i32, // The max score of align pattern[..i] & choice[..j]
353
354    // temporary fields (make use the rest of the padding)
355    pub matched: bool,
356    pub bonus: i32,
357}
358
359const MATRIX_CELL_NEG_INFINITY: i32 = std::i16::MIN as i32;
360
361impl Default for MatrixCell {
362    fn default() -> Self {
363        Self {
364            m_move: Skip,
365            m_score: MATRIX_CELL_NEG_INFINITY,
366            p_move: Skip,
367            p_score: MATRIX_CELL_NEG_INFINITY,
368            matched: false,
369            bonus: 0,
370        }
371    }
372}
373
374impl MatrixCell {
375    pub fn reset(&mut self) {
376        self.m_move = Skip;
377        self.m_score = MATRIX_CELL_NEG_INFINITY;
378        self.p_move = Skip;
379        self.p_score = MATRIX_CELL_NEG_INFINITY;
380        self.bonus = 0;
381        self.matched = false;
382    }
383}
384
385/// Simulate a 1-D vector as 2-D matrix
386struct ScoreMatrix<'a> {
387    matrix: &'a mut [MatrixCell],
388    pub rows: usize,
389    pub cols: usize,
390}
391
392impl<'a> ScoreMatrix<'a> {
393    /// given a matrix, extend it to be (rows x cols) and fill in as init_val
394    pub fn new(matrix: &'a mut Vec<MatrixCell>, rows: usize, cols: usize) -> Self {
395        matrix.resize(rows * cols, MatrixCell::default());
396        ScoreMatrix { matrix, rows, cols }
397    }
398
399    #[inline]
400    fn get_index(&self, row: usize, col: usize) -> usize {
401        row * self.cols + col
402    }
403
404    fn get_row(&self, row: usize) -> &[MatrixCell] {
405        let start = row * self.cols;
406        &self.matrix[start..start + self.cols]
407    }
408}
409
410impl<'a> std::ops::Index<(usize, usize)> for ScoreMatrix<'a> {
411    type Output = MatrixCell;
412
413    fn index(&self, index: (usize, usize)) -> &Self::Output {
414        &self.matrix[self.get_index(index.0, index.1)]
415    }
416}
417
418impl<'a> std::ops::IndexMut<(usize, usize)> for ScoreMatrix<'a> {
419    fn index_mut(&mut self, index: (usize, usize)) -> &mut Self::Output {
420        &mut self.matrix[self.get_index(index.0, index.1)]
421    }
422}
423
424impl<'a> std::fmt::Debug for ScoreMatrix<'a> {
425    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
426        let _ = writeln!(f, "M score:");
427        for row in 0..self.rows {
428            for col in 0..self.cols {
429                let cell = &self[(row, col)];
430                write!(
431                    f,
432                    "{:4}/{}  ",
433                    if cell.m_score == MATRIX_CELL_NEG_INFINITY {
434                        -999
435                    } else {
436                        cell.m_score
437                    },
438                    match cell.m_move {
439                        Match => 'M',
440                        Skip => 'S',
441                    }
442                )?;
443            }
444            writeln!(f)?;
445        }
446
447        let _ = writeln!(f, "P score:");
448        for row in 0..self.rows {
449            for col in 0..self.cols {
450                let cell = &self[(row, col)];
451                write!(
452                    f,
453                    "{:4}/{}  ",
454                    if cell.p_score == MATRIX_CELL_NEG_INFINITY {
455                        -999
456                    } else {
457                        cell.p_score
458                    },
459                    match cell.p_move {
460                        Match => 'M',
461                        Skip => 'S',
462                    }
463                )?;
464            }
465            writeln!(f)?;
466        }
467
468        Ok(())
469    }
470}
471
472/// We categorize characters into types:
473///
474/// - Empty(E): the start of string
475/// - Upper(U): the ascii upper case
476/// - lower(L): the ascii lower case & other unicode characters
477/// - number(N): ascii number
478/// - hard separator(S): clearly separate the content: ` ` `/` `\` `|` `(` `) `[` `]` `{` `}`
479/// - soft separator(s): other ascii punctuation, e.g. `!` `"` `#` `$`, ...
480#[derive(Debug, PartialEq, Copy, Clone)]
481enum CharType {
482    Empty,
483    Upper,
484    Lower,
485    Number,
486    HardSep,
487    SoftSep,
488}
489
490impl CharType {
491    pub fn of(ch: char) -> Self {
492        match ch {
493            '\0' => CharType::Empty,
494            ' ' | '/' | '\\' | '|' | '(' | ')' | '[' | ']' | '{' | '}' => CharType::HardSep,
495            '!'..='\'' | '*'..='.' | ':'..='@' | '^'..='`' | '~' => CharType::SoftSep,
496            '0'..='9' => CharType::Number,
497            'A'..='Z' => CharType::Upper,
498            _ => CharType::Lower,
499        }
500    }
501}
502
503/// Ref: https://github.com/llvm-mirror/clang-tools-extra/blob/master/clangd/FuzzyMatch.cpp
504///
505///
506/// ```text
507/// +-----------+--------------+-------+
508/// | Example   | Chars | Type | Role  |
509/// +-----------+--------------+-------+
510/// | (f)oo     | ^fo   | Ell  | Head  |
511/// | (F)oo     | ^Fo   | EUl  | Head  |
512/// | Foo/(B)ar | /Ba   | SUl  | Head  |
513/// | Foo/(b)ar | /ba   | Sll  | Head  |
514/// | Foo.(B)ar | .Ba   | SUl  | Break |
515/// | Foo(B)ar  | oBa   | lUl  | Camel |
516/// | 123(B)ar  | 3Ba   | nUl  | Camel |
517/// | F(o)oBar  | Foo   | Ull  | Tail  |
518/// | H(T)TP    | HTT   | UUU  | Tail  |
519/// | others    |       |      | Tail  |
520/// +-----------+--------------+-------+
521#[derive(Debug, PartialEq, Copy, Clone)]
522enum CharRole {
523    Head,
524    Tail,
525    Camel,
526    Break,
527}
528
529impl CharRole {
530    pub fn of(prev: char, cur: char) -> Self {
531        Self::of_type(CharType::of(prev), CharType::of(cur))
532    }
533    pub fn of_type(prev: CharType, cur: CharType) -> Self {
534        match (prev, cur) {
535            (CharType::Empty, _) | (CharType::HardSep, _) => CharRole::Head,
536            (CharType::SoftSep, _) => CharRole::Break,
537            (CharType::Lower, CharType::Upper) | (CharType::Number, CharType::Upper) => {
538                CharRole::Camel
539            }
540            _ => CharRole::Tail,
541        }
542    }
543}
544
545#[derive(Eq, PartialEq, Debug, Copy, Clone)]
546enum CaseMatching {
547    Respect,
548    Ignore,
549    Smart,
550}
551
552/// Fuzzy matching is a sub problem is sequence alignment.
553/// Specifically what we'd like to implement is sequence alignment with affine gap penalty.
554/// Ref: https://www.cs.cmu.edu/~ckingsf/bioinfo-lectures/gaps.pdf
555///
556/// Given `pattern`(i) and `choice`(j), we'll maintain 2 score matrix:
557///
558/// ```text
559/// M[i][j] = match(i, j) + max(M[i-1][j-1] + consecutive, P[i-1][j-1])
560/// M[i][j] = -infinity if p[i][j] do not match
561///
562/// M[i][j] means the score of best alignment of p[..=i] and c[..=j] ending with match/mismatch e.g.:
563///
564/// c: [.........]b
565/// p: [.........]b
566///
567/// So that p[..=i-1] and c[..=j-1] could be any alignment
568///
569/// P[i][j] = max(M[i][j-k]-gap(k)) for k in 1..j
570///
571/// P[i][j] means the score of best alignment of p[..=i] and c[..=j] where c[j] is not matched.
572/// So that we need to search through all the previous matches, and calculate the gap.
573///
574///   (j-k)--.   j
575/// c: [....]bcdef
576/// p: [....]b----
577///          i
578/// ```
579///
580/// Note that the above is O(n^3) in the worst case. However the above algorithm uses a general gap
581/// penalty, but we use affine gap: `gap = gap_start + k * gap_extend` where:
582/// - u: the cost of starting of gap
583/// - v: the cost of extending a gap by one more space.
584///
585/// So that we could optimize the algorithm by:
586///
587/// ```text
588/// P[i][j] = max(gap_start + gap_extend + M[i][j-1], gap_extend + P[i][j-1])
589/// ```
590///
591/// Besides, since we are doing fuzzy matching, we'll prefer some pattern over others.
592/// So we'll calculate in-place bonus for each character. e.g. bonus for camel cases.
593///
594/// In summary:
595///
596/// ```text
597/// B[j] = in_place_bonus_of(j)
598/// M[i][j] = match(i, j) + max(M[i-1][j-1] + consecutive, P[i-1][j-1])
599/// M[i][j] = -infinity if p[i] and c[j] do not match
600/// P[i][j] = max(gap_start + gap_extend + M[i][j-1], gap_extend + P[i][j-1])
601/// ```
602pub struct SkimMatcherV2 {
603    debug: bool,
604
605    score_config: SkimScoreConfig,
606    element_limit: usize,
607    case: CaseMatching,
608    use_cache: bool,
609
610    m_cache: CachedThreadLocal<RefCell<Vec<MatrixCell>>>,
611    c_cache: CachedThreadLocal<RefCell<Vec<char>>>, // vector to store the characters of choice
612    p_cache: CachedThreadLocal<RefCell<Vec<char>>>, // vector to store the characters of pattern
613}
614
615impl Default for SkimMatcherV2 {
616    fn default() -> Self {
617        Self {
618            debug: false,
619            score_config: SkimScoreConfig::default(),
620            element_limit: 0,
621            case: CaseMatching::Smart,
622            use_cache: true,
623
624            m_cache: CachedThreadLocal::new(),
625            c_cache: CachedThreadLocal::new(),
626            p_cache: CachedThreadLocal::new(),
627        }
628    }
629}
630
631impl SkimMatcherV2 {
632    pub fn score_config(mut self, score_config: SkimScoreConfig) -> Self {
633        self.score_config = score_config;
634        self
635    }
636
637    pub fn element_limit(mut self, elements: usize) -> Self {
638        self.element_limit = elements;
639        self
640    }
641
642    pub fn ignore_case(mut self) -> Self {
643        self.case = CaseMatching::Ignore;
644        self
645    }
646
647    pub fn smart_case(mut self) -> Self {
648        self.case = CaseMatching::Smart;
649        self
650    }
651
652    pub fn respect_case(mut self) -> Self {
653        self.case = CaseMatching::Respect;
654        self
655    }
656
657    pub fn use_cache(mut self, use_cache: bool) -> Self {
658        self.use_cache = use_cache;
659        self
660    }
661
662    pub fn debug(mut self, debug: bool) -> Self {
663        self.debug = debug;
664        self
665    }
666
667    /// Build the score matrix using the algorithm described above
668    fn build_score_matrix(
669        &self,
670        m: &mut ScoreMatrix,
671        choice: &[char],
672        pattern: &[char],
673        first_match_indices: &[usize],
674        compressed: bool,
675        case_sensitive: bool,
676    ) {
677        let mut in_place_bonuses = vec![0; m.cols];
678
679        self.build_in_place_bonus(choice, &mut in_place_bonuses);
680
681        // need to reset M[row][first_match] & M[i][j-1]
682        m[(0, 0)].reset();
683        for i in 1..m.rows {
684            m[(i, first_match_indices[i - 1])].reset();
685        }
686
687        for j in 0..m.cols {
688            // p[0][j]: the score of best alignment of p[] and c[..=j] where c[j] is not matched
689            m[(0, j)].reset();
690            m[(0, j)].p_score = self.score_config.gap_extension;
691        }
692
693        // update the matrix;
694        for (i, &p_ch) in pattern.iter().enumerate() {
695            let row = self.adjust_row_idx(i + 1, compressed);
696            let row_prev = self.adjust_row_idx(i, compressed);
697            let to_skip = first_match_indices[i];
698
699            for (j, &c_ch) in choice[to_skip..].iter().enumerate() {
700                let col = to_skip + j + 1;
701                let col_prev = to_skip + j;
702                let idx_cur = m.get_index(row, col);
703                let idx_last = m.get_index(row, col_prev);
704                let idx_prev = m.get_index(row_prev, col_prev);
705
706                // update M matrix
707                // M[i][j] = match(i, j) + max(M[i-1][j-1], P[i-1][j-1])
708                if let Some(cur_match_score) =
709                    self.calculate_match_score(c_ch, p_ch, case_sensitive)
710                {
711                    let prev_match_score = m.matrix[idx_prev].m_score;
712                    let prev_skip_score = m.matrix[idx_prev].p_score;
713                    let prev_match_bonus = m.matrix[idx_last].bonus;
714                    let in_place_bonus = in_place_bonuses[col];
715
716                    let consecutive_bonus = max(
717                        prev_match_bonus,
718                        max(in_place_bonus, self.score_config.bonus_consecutive),
719                    );
720                    m.matrix[idx_last].bonus = consecutive_bonus;
721
722                    let score_match = prev_match_score + consecutive_bonus;
723                    let score_skip = prev_skip_score + in_place_bonus;
724
725                    if score_match >= score_skip {
726                        m.matrix[idx_cur].m_score = score_match + cur_match_score as i32;
727                        m.matrix[idx_cur].m_move = Movement::Match;
728                    } else {
729                        m.matrix[idx_cur].m_score = score_skip + cur_match_score as i32;
730                        m.matrix[idx_cur].m_move = Movement::Skip;
731                    }
732                } else {
733                    m.matrix[idx_cur].m_score = MATRIX_CELL_NEG_INFINITY;
734                    m.matrix[idx_cur].m_move = Movement::Skip;
735                    m.matrix[idx_cur].bonus = 0;
736                }
737
738                // update P matrix
739                // P[i][j] = max(gap_start + gap_extend + M[i][j-1], gap_extend + P[i][j-1])
740                let prev_match_score = self.score_config.gap_start
741                    + self.score_config.gap_extension
742                    + m.matrix[idx_last].m_score;
743                let prev_skip_score = self.score_config.gap_extension + m.matrix[idx_last].p_score;
744                if prev_match_score >= prev_skip_score {
745                    m.matrix[idx_cur].p_score = prev_match_score;
746                    m.matrix[idx_cur].p_move = Movement::Match;
747                } else {
748                    m.matrix[idx_cur].p_score = prev_skip_score;
749                    m.matrix[idx_cur].p_move = Movement::Skip;
750                }
751            }
752        }
753    }
754
755    /// check bonus for start of camel case, etc.
756    fn build_in_place_bonus(&self, choice: &[char], b: &mut [i32]) {
757        let mut prev_ch = '\0';
758        for (j, &c_ch) in choice.iter().enumerate() {
759            let prev_ch_type = CharType::of(prev_ch);
760            let ch_type = CharType::of(c_ch);
761            b[j + 1] = self.in_place_bonus(prev_ch_type, ch_type);
762            prev_ch = c_ch;
763        }
764
765        if b.len() > 1 {
766            b[1] *= self.score_config.bonus_first_char_multiplier;
767        }
768    }
769
770    /// In case we don't need to backtrack the matching indices, we could use only 2 rows for the
771    /// matrix, this function could be used to rotate accessing these two rows.
772    fn adjust_row_idx(&self, row_idx: usize, compressed: bool) -> usize {
773        if compressed {
774            row_idx & 1
775        } else {
776            row_idx
777        }
778    }
779
780    /// Calculate the matching score of the characters
781    /// return None if not matched.
782    fn calculate_match_score(&self, c: char, p: char, case_sensitive: bool) -> Option<u16> {
783        if !char_equal(c, p, case_sensitive) {
784            return None;
785        }
786
787        let score = self.score_config.score_match;
788        let mut bonus = 0;
789
790        // penalty on case mismatch
791        if !case_sensitive && p != c {
792            bonus += self.score_config.penalty_case_mismatch;
793        }
794
795        Some(max(0, score + bonus) as u16)
796    }
797
798    #[inline]
799    fn in_place_bonus(&self, prev_char_type: CharType, char_type: CharType) -> i32 {
800        match CharRole::of_type(prev_char_type, char_type) {
801            CharRole::Head => self.score_config.bonus_head,
802            CharRole::Camel => self.score_config.bonus_camel,
803            CharRole::Break => self.score_config.bonus_break,
804            CharRole::Tail => 0,
805        }
806    }
807
808    fn contains_upper(&self, string: &str) -> bool {
809        for ch in string.chars() {
810            if ch.is_ascii_uppercase() {
811                return true;
812            }
813        }
814
815        false
816    }
817
818    pub fn fuzzy(
819        &self,
820        choice: &str,
821        pattern: &str,
822        with_pos: bool,
823    ) -> Option<(ScoreType, Vec<IndexType>)> {
824        if pattern.is_empty() {
825            return Some((0, Vec::new()));
826        }
827
828        let case_sensitive = match self.case {
829            CaseMatching::Respect => true,
830            CaseMatching::Ignore => false,
831            CaseMatching::Smart => self.contains_upper(pattern),
832        };
833
834        let compressed = !with_pos;
835
836        // initialize the score matrix
837        let mut m = self
838            .m_cache
839            .get_or(|| RefCell::new(Vec::new()))
840            .borrow_mut();
841        let mut choice_chars = self
842            .c_cache
843            .get_or(|| RefCell::new(Vec::new()))
844            .borrow_mut();
845        let mut pattern_chars = self
846            .p_cache
847            .get_or(|| RefCell::new(Vec::new()))
848            .borrow_mut();
849
850        choice_chars.clear();
851        for char in choice.chars() {
852            choice_chars.push(char);
853        }
854
855        pattern_chars.clear();
856        for char in pattern.chars() {
857            pattern_chars.push(char);
858        }
859
860        let first_match_indices = cheap_matches(&choice_chars, &pattern_chars, case_sensitive)?;
861
862        let cols = choice_chars.len() + 1;
863        let num_char_pattern = pattern_chars.len();
864        let rows = if compressed { 2 } else { num_char_pattern + 1 };
865
866        if self.element_limit > 0 && self.element_limit < rows * cols {
867            return self.simple_match(
868                &choice_chars,
869                &pattern_chars,
870                &first_match_indices,
871                case_sensitive,
872                with_pos,
873            );
874        }
875
876        let mut m = ScoreMatrix::new(&mut m, rows, cols);
877        self.build_score_matrix(
878            &mut m,
879            &choice_chars,
880            &pattern_chars,
881            &first_match_indices,
882            compressed,
883            case_sensitive,
884        );
885        let first_col_of_last_row = first_match_indices[first_match_indices.len() - 1];
886        let last_row = m.get_row(self.adjust_row_idx(num_char_pattern, compressed));
887        let (pat_idx, &MatrixCell { m_score, .. }) = last_row[first_col_of_last_row..]
888            .iter()
889            .enumerate()
890            .max_by_key(|&(_, x)| x.m_score)
891            .map(|(idx, cell)| (idx + first_col_of_last_row, cell))
892            .expect("fuzzy_matcher failed to iterate over last_row");
893
894        let mut positions = if with_pos {
895            Vec::with_capacity(num_char_pattern)
896        } else {
897            Vec::new()
898        };
899        if with_pos {
900            let mut i = m.rows - 1;
901            let mut j = pat_idx;
902            let mut track_m = true;
903            let mut current_move = Match;
904            let first_col_first_row = first_match_indices[0];
905            while i > 0 && j > first_col_first_row {
906                if current_move == Match {
907                    positions.push((j - 1) as IndexType);
908                }
909
910                let cell = &m[(i, j)];
911                current_move = if track_m { cell.m_move } else { cell.p_move };
912                if track_m {
913                    i -= 1;
914                }
915
916                j -= 1;
917
918                track_m = match current_move {
919                    Match => true,
920                    Skip => false,
921                };
922            }
923            positions.reverse();
924        }
925
926        if self.debug {
927            println!("Matrix:\n{:?}", m);
928        }
929
930        if !self.use_cache {
931            // drop the allocated memory
932            self.m_cache.get().map(|cell| cell.replace(vec![]));
933            self.c_cache.get().map(|cell| cell.replace(vec![]));
934            self.p_cache.get().map(|cell| cell.replace(vec![]));
935        }
936
937        Some((m_score as ScoreType, positions))
938    }
939
940    pub fn simple_match(
941        &self,
942        choice: &[char],
943        pattern: &[char],
944        first_match_indices: &[usize],
945        case_sensitive: bool,
946        with_pos: bool,
947    ) -> Option<(ScoreType, Vec<IndexType>)> {
948        if pattern.len() <= 0 {
949            return Some((0, Vec::new()));
950        } else if pattern.len() == 1 {
951            let match_idx = first_match_indices[0];
952            let prev_ch = if match_idx > 0 {
953                choice[match_idx - 1]
954            } else {
955                '\0'
956            };
957            let prev_ch_type = CharType::of(prev_ch);
958            let ch_type = CharType::of(choice[match_idx]);
959            let in_place_bonus = self.in_place_bonus(prev_ch_type, ch_type);
960            return Some((in_place_bonus as ScoreType, vec![match_idx as IndexType]));
961        }
962
963        let mut start_idx = first_match_indices[0];
964        let end_idx = first_match_indices[first_match_indices.len() - 1];
965
966        let mut pattern_iter = pattern.iter().rev().peekable();
967        for (idx, &c) in choice[start_idx..=end_idx].iter().enumerate().rev() {
968            match pattern_iter.peek() {
969                Some(&&p) => {
970                    if char_equal(c, p, case_sensitive) {
971                        let _ = pattern_iter.next();
972                        start_idx = idx;
973                    }
974                }
975                None => break,
976            }
977        }
978
979        Some(self.calculate_score_with_pos(
980            choice,
981            pattern,
982            start_idx,
983            end_idx,
984            case_sensitive,
985            with_pos,
986        ))
987    }
988
989    fn calculate_score_with_pos(
990        &self,
991        choice: &[char],
992        pattern: &[char],
993        start_idx: usize,
994        end_idx: usize,
995        case_sensitive: bool,
996        with_pos: bool,
997    ) -> (ScoreType, Vec<IndexType>) {
998        let mut pos = Vec::new();
999
1000        let choice_iter = choice[start_idx..=end_idx].iter().enumerate();
1001        let mut pattern_iter = pattern.iter().enumerate().peekable();
1002
1003        // unfortunately we could not get the the character before the first character's(for performance)
1004        // so we tread them as NonWord
1005        let mut prev_ch = '\0';
1006
1007        let mut score: i32 = 0;
1008        let mut in_gap = false;
1009        let mut prev_match_bonus = 0;
1010
1011        for (c_idx, &c) in choice_iter {
1012            let op = pattern_iter.peek();
1013            if op.is_none() {
1014                break;
1015            }
1016
1017            let prev_ch_type = CharType::of(prev_ch);
1018            let ch_type = CharType::of(c);
1019            let in_place_bonus = self.in_place_bonus(prev_ch_type, ch_type);
1020
1021            let (_p_idx, &p) = *op.unwrap();
1022
1023            if let Some(match_score) = self.calculate_match_score(c, p, case_sensitive) {
1024                if with_pos {
1025                    pos.push((c_idx + start_idx) as IndexType);
1026                }
1027
1028                score += match_score as i32;
1029
1030                let consecutive_bonus = max(
1031                    prev_match_bonus,
1032                    max(in_place_bonus, self.score_config.bonus_consecutive),
1033                );
1034                prev_match_bonus = consecutive_bonus;
1035
1036                if !in_gap {
1037                    score += consecutive_bonus;
1038                }
1039
1040                in_gap = false;
1041                let _ = pattern_iter.next();
1042            } else {
1043                if !in_gap {
1044                    score += self.score_config.gap_start;
1045                }
1046
1047                score += self.score_config.gap_extension;
1048                in_gap = true;
1049                prev_match_bonus = 0;
1050            }
1051
1052            prev_ch = c;
1053        }
1054
1055        (score as ScoreType, pos)
1056    }
1057}
1058
1059impl FuzzyMatcher for SkimMatcherV2 {
1060    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
1061        self.fuzzy(choice, pattern, true)
1062    }
1063
1064    fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<ScoreType> {
1065        self.fuzzy(choice, pattern, false).map(|(score, _)| score)
1066    }
1067}
1068
1069#[cfg(test)]
1070mod tests {
1071    use crate::util::{assert_order, wrap_matches};
1072
1073    use super::*;
1074
1075    fn wrap_fuzzy_match(matcher: &dyn FuzzyMatcher, line: &str, pattern: &str) -> Option<String> {
1076        let (_score, indices) = matcher.fuzzy_indices(line, pattern)?;
1077        println!("score: {:?}, indices: {:?}", _score, indices);
1078        Some(wrap_matches(line, &indices))
1079    }
1080
1081    #[test]
1082    fn test_match_or_not() {
1083        let matcher = SkimMatcherV2::default();
1084        assert_eq!(Some(0), matcher.fuzzy_match("", ""));
1085        assert_eq!(Some(0), matcher.fuzzy_match("abcdefaghi", ""));
1086        assert_eq!(None, matcher.fuzzy_match("", "a"));
1087        assert_eq!(None, matcher.fuzzy_match("abcdefaghi", "中"));
1088        assert_eq!(None, matcher.fuzzy_match("abc", "abx"));
1089        assert!(matcher.fuzzy_match("axbycz", "abc").is_some());
1090        assert!(matcher.fuzzy_match("axbycz", "xyz").is_some());
1091
1092        assert_eq!(
1093            "[a]x[b]y[c]z",
1094            &wrap_fuzzy_match(&matcher, "axbycz", "abc").unwrap()
1095        );
1096        assert_eq!(
1097            "a[x]b[y]c[z]",
1098            &wrap_fuzzy_match(&matcher, "axbycz", "xyz").unwrap()
1099        );
1100        assert_eq!(
1101            "[H]ello, [世]界",
1102            &wrap_fuzzy_match(&matcher, "Hello, 世界", "H世").unwrap()
1103        );
1104    }
1105
1106    #[test]
1107    fn test_match_quality() {
1108        let matcher = SkimMatcherV2::default().ignore_case();
1109
1110        // initials
1111        assert_order(&matcher, "ab", &["ab", "aoo_boo", "acb"]);
1112        assert_order(&matcher, "CC", &["CamelCase", "camelCase", "camelcase"]);
1113        assert_order(&matcher, "cC", &["camelCase", "CamelCase", "camelcase"]);
1114        assert_order(
1115            &matcher,
1116            "cc",
1117            &[
1118                "camel case",
1119                "camelCase",
1120                "CamelCase",
1121                "camelcase",
1122                "camel ace",
1123            ],
1124        );
1125        assert_order(
1126            &matcher,
1127            "Da.Te",
1128            &["Data.Text", "Data.Text.Lazy", "Data.Aeson.Encoding.text"],
1129        );
1130        // prefix
1131        assert_order(&matcher, "is", &["isIEEE", "inSuf"]);
1132        // shorter
1133        assert_order(&matcher, "ma", &["map", "many", "maximum"]);
1134        assert_order(&matcher, "print", &["printf", "sprintf"]);
1135        // score(PRINT) = kMinScore
1136        assert_order(&matcher, "ast", &["ast", "AST", "INT_FAST16_MAX"]);
1137        // score(PRINT) > kMinScore
1138        assert_order(&matcher, "Int", &["int", "INT", "PRINT"]);
1139    }
1140
1141    fn simple_match(
1142        matcher: &SkimMatcherV2,
1143        choice: &str,
1144        pattern: &str,
1145        case_sensitive: bool,
1146        with_pos: bool,
1147    ) -> Option<(ScoreType, Vec<IndexType>)> {
1148        let choice: Vec<char> = choice.chars().collect();
1149        let pattern: Vec<char> = pattern.chars().collect();
1150        let first_match_indices = cheap_matches(&choice, &pattern, case_sensitive)?;
1151        matcher.simple_match(
1152            &choice,
1153            &pattern,
1154            &first_match_indices,
1155            case_sensitive,
1156            with_pos,
1157        )
1158    }
1159
1160    #[test]
1161    fn test_match_or_not_simple() {
1162        let matcher = SkimMatcherV2::default();
1163        assert_eq!(
1164            simple_match(&matcher, "axbycz", "xyz", false, true)
1165                .unwrap()
1166                .1,
1167            vec![1, 3, 5]
1168        );
1169
1170        assert_eq!(
1171            simple_match(&matcher, "", "", false, false),
1172            Some((0, vec![]))
1173        );
1174        assert_eq!(
1175            simple_match(&matcher, "abcdefaghi", "", false, false),
1176            Some((0, vec![]))
1177        );
1178        assert_eq!(simple_match(&matcher, "", "a", false, false), None);
1179        assert_eq!(
1180            simple_match(&matcher, "abcdefaghi", "中", false, false),
1181            None
1182        );
1183        assert_eq!(simple_match(&matcher, "abc", "abx", false, false), None);
1184        assert_eq!(
1185            simple_match(&matcher, "axbycz", "abc", false, true)
1186                .unwrap()
1187                .1,
1188            vec![0, 2, 4]
1189        );
1190        assert_eq!(
1191            simple_match(&matcher, "axbycz", "xyz", false, true)
1192                .unwrap()
1193                .1,
1194            vec![1, 3, 5]
1195        );
1196        assert_eq!(
1197            simple_match(&matcher, "Hello, 世界", "H世", false, true)
1198                .unwrap()
1199                .1,
1200            vec![0, 7]
1201        );
1202    }
1203
1204    #[test]
1205    fn test_match_or_not_v2() {
1206        let matcher = SkimMatcherV2::default().debug(true);
1207
1208        assert_eq!(matcher.fuzzy_match("", ""), Some(0));
1209        assert_eq!(matcher.fuzzy_match("abcdefaghi", ""), Some(0));
1210        assert_eq!(matcher.fuzzy_match("", "a"), None);
1211        assert_eq!(matcher.fuzzy_match("abcdefaghi", "中"), None);
1212        assert_eq!(matcher.fuzzy_match("abc", "abx"), None);
1213        assert!(matcher.fuzzy_match("axbycz", "abc").is_some());
1214        assert!(matcher.fuzzy_match("axbycz", "xyz").is_some());
1215
1216        assert_eq!(
1217            &wrap_fuzzy_match(&matcher, "axbycz", "abc").unwrap(),
1218            "[a]x[b]y[c]z"
1219        );
1220        assert_eq!(
1221            &wrap_fuzzy_match(&matcher, "axbycz", "xyz").unwrap(),
1222            "a[x]b[y]c[z]"
1223        );
1224        assert_eq!(
1225            &wrap_fuzzy_match(&matcher, "Hello, 世界", "H世").unwrap(),
1226            "[H]ello, [世]界"
1227        );
1228    }
1229
1230    #[test]
1231    fn test_case_option_v2() {
1232        let matcher = SkimMatcherV2::default().ignore_case();
1233        assert!(matcher.fuzzy_match("aBc", "abc").is_some());
1234        assert!(matcher.fuzzy_match("aBc", "aBc").is_some());
1235        assert!(matcher.fuzzy_match("aBc", "aBC").is_some());
1236
1237        let matcher = SkimMatcherV2::default().respect_case();
1238        assert!(matcher.fuzzy_match("aBc", "abc").is_none());
1239        assert!(matcher.fuzzy_match("aBc", "aBc").is_some());
1240        assert!(matcher.fuzzy_match("aBc", "aBC").is_none());
1241
1242        let matcher = SkimMatcherV2::default().smart_case();
1243        assert!(matcher.fuzzy_match("aBc", "abc").is_some());
1244        assert!(matcher.fuzzy_match("aBc", "aBc").is_some());
1245        assert!(matcher.fuzzy_match("aBc", "aBC").is_none());
1246    }
1247
1248    #[test]
1249    fn test_matcher_quality_v2() {
1250        let matcher = SkimMatcherV2::default();
1251        assert_order(&matcher, "ab", &["ab", "aoo_boo", "acb"]);
1252        assert_order(
1253            &matcher,
1254            "cc",
1255            &[
1256                "camel case",
1257                "camelCase",
1258                "CamelCase",
1259                "camelcase",
1260                "camel ace",
1261            ],
1262        );
1263        assert_order(
1264            &matcher,
1265            "Da.Te",
1266            &["Data.Text", "Data.Text.Lazy", "Data.Aeson.Encoding.Text"],
1267        );
1268        assert_order(&matcher, "is", &["isIEEE", "inSuf"]);
1269        assert_order(&matcher, "ma", &["map", "many", "maximum"]);
1270        assert_order(&matcher, "print", &["printf", "sprintf"]);
1271        assert_order(&matcher, "ast", &["ast", "AST", "INT_FAST16_MAX"]);
1272        assert_order(&matcher, "int", &["int", "INT", "PRINT"]);
1273    }
1274    
1275    #[test]
1276    fn test_reuse_should_not_affect_indices() {
1277        let matcher = SkimMatcherV2::default();
1278        let pattern = "139";
1279        for num in 0..10000 {
1280            let choice = num.to_string();
1281            if let Some((_score, indices)) = matcher.fuzzy_indices(&choice, pattern) {
1282                assert_eq!(indices.len(), 3);
1283            }
1284        }
1285    }
1286}