fuzzy_matcher/
skim.rs

1#![allow(deprecated)]
2
3use std::cell::RefCell;
4use std::cmp::max;
5use std::fmt::Formatter;
6
7use thread_local::ThreadLocal;
8
9use crate::skim::Movement::{Match, Skip};
10use crate::util::{char_equal, cheap_matches};
11///! The fuzzy matching algorithm used by skim
12///!
13///! # Example:
14///! ```edition2018
15///! use fuzzy_matcher::FuzzyMatcher;
16///! use fuzzy_matcher::skim::SkimMatcherV2;
17///!
18///! let matcher = SkimMatcherV2::default();
19///! assert_eq!(None, matcher.fuzzy_match("abc", "abx"));
20///! assert!(matcher.fuzzy_match("axbycz", "abc").is_some());
21///! assert!(matcher.fuzzy_match("axbycz", "xyz").is_some());
22///!
23///! let (score, indices) = matcher.fuzzy_indices("axbycz", "abc").unwrap();
24///! assert_eq!(indices, [0, 2, 4]);
25///! ```
26use crate::{FuzzyMatcher, IndexType, ScoreType};
27
28const BONUS_MATCHED: ScoreType = 4;
29const BONUS_CASE_MATCH: ScoreType = 4;
30const BONUS_UPPER_MATCH: ScoreType = 6;
31const BONUS_ADJACENCY: ScoreType = 10;
32const BONUS_SEPARATOR: ScoreType = 8;
33const BONUS_CAMEL: ScoreType = 8;
34const PENALTY_CASE_UNMATCHED: ScoreType = -1;
35const PENALTY_LEADING: ScoreType = -6;
36// penalty applied for every letter before the first match
37const PENALTY_MAX_LEADING: ScoreType = -18;
38// maxing penalty for leading letters
39const PENALTY_UNMATCHED: ScoreType = -2;
40
41#[deprecated(since = "0.3.5", note = "Please use SkimMatcherV2 instead")]
42#[derive(Debug, Default)]
43pub struct SkimMatcher {}
44
45/// The V1 matcher is based on ForrestTheWoods's post
46/// https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/
47///
48/// V1 algorithm is deprecated, checkout `FuzzyMatcherV2`
49impl FuzzyMatcher for SkimMatcher {
50    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
51        fuzzy_indices(choice, pattern)
52    }
53
54    fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<ScoreType> {
55        fuzzy_match(choice, pattern)
56    }
57}
58
59#[deprecated(since = "0.3.5", note = "Please use SkimMatcherV2 instead")]
60pub fn fuzzy_match(choice: &str, pattern: &str) -> Option<ScoreType> {
61    if pattern.is_empty() {
62        return Some(0);
63    }
64
65    let scores = build_graph(choice, pattern)?;
66
67    let last_row = &scores[scores.len() - 1];
68    let (_, &MatchingStatus { final_score, .. }) = last_row
69        .iter()
70        .enumerate()
71        .max_by_key(|&(_, x)| x.final_score)
72        .expect("fuzzy_indices failed to iterate over last_row");
73    Some(final_score)
74}
75
76#[deprecated(since = "0.3.5", note = "Please use SkimMatcherV2 instead")]
77pub fn fuzzy_indices(choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
78    if pattern.is_empty() {
79        return Some((0, Vec::new()));
80    }
81
82    let mut picked = vec![];
83    let scores = build_graph(choice, pattern)?;
84
85    let last_row = &scores[scores.len() - 1];
86    let (mut next_col, &MatchingStatus { final_score, .. }) = last_row
87        .iter()
88        .enumerate()
89        .max_by_key(|&(_, x)| x.final_score)
90        .expect("fuzzy_indices failed to iterate over last_row");
91    let mut pat_idx = scores.len() as i64 - 1;
92    while pat_idx >= 0 {
93        let status = scores[pat_idx as usize][next_col];
94        next_col = status.back_ref;
95        picked.push(status.idx);
96        pat_idx -= 1;
97    }
98    picked.reverse();
99    Some((final_score, picked))
100}
101
102#[derive(Clone, Copy, Debug)]
103struct MatchingStatus {
104    pub idx: IndexType,
105    pub score: ScoreType,
106    pub final_score: ScoreType,
107    pub adj_num: IndexType,
108    pub back_ref: IndexType,
109}
110
111impl Default for MatchingStatus {
112    fn default() -> Self {
113        MatchingStatus {
114            idx: 0,
115            score: 0,
116            final_score: 0,
117            adj_num: 1,
118            back_ref: 0,
119        }
120    }
121}
122
123fn build_graph(choice: &str, pattern: &str) -> Option<Vec<Vec<MatchingStatus>>> {
124    let mut scores = vec![];
125
126    let mut match_start_idx = 0; // to ensure that the pushed char are able to match the pattern
127    let mut pat_prev_ch = '\0';
128
129    // initialize the match positions and inline scores
130    for (pat_idx, pat_ch) in pattern.chars().enumerate() {
131        let mut vec = vec![];
132        let mut choice_prev_ch = '\0';
133        for (idx, ch) in choice.chars().enumerate() {
134            if ch.to_ascii_lowercase() == pat_ch.to_ascii_lowercase() && idx >= match_start_idx {
135                let score = fuzzy_score(
136                    ch,
137                    idx as IndexType,
138                    choice_prev_ch,
139                    pat_ch,
140                    pat_idx as IndexType,
141                    pat_prev_ch,
142                );
143                vec.push(MatchingStatus {
144                    idx: idx as IndexType,
145                    score,
146                    final_score: score,
147                    adj_num: 1,
148                    back_ref: 0,
149                });
150            }
151            choice_prev_ch = ch;
152        }
153
154        if vec.is_empty() {
155            // not matched
156            return None;
157        }
158        match_start_idx = vec[0].idx + 1;
159        scores.push(vec);
160        pat_prev_ch = pat_ch;
161    }
162
163    // calculate max scores considering adjacent characters
164    for pat_idx in 1..scores.len() {
165        let (first_half, last_half) = scores.split_at_mut(pat_idx);
166
167        let prev_row = &first_half[first_half.len() - 1];
168        let cur_row = &mut last_half[0];
169
170        for idx in 0..cur_row.len() {
171            let next = cur_row[idx];
172            let prev = if idx > 0 {
173                cur_row[idx - 1]
174            } else {
175                MatchingStatus::default()
176            };
177
178            let mut score_before_idx = prev.final_score - prev.score + next.score;
179            score_before_idx += PENALTY_UNMATCHED * ((next.idx - prev.idx) as ScoreType);
180            score_before_idx -= if prev.adj_num == 0 {
181                BONUS_ADJACENCY
182            } else {
183                0
184            };
185
186            let (back_ref, score, adj_num) = prev_row
187                .iter()
188                .enumerate()
189                .take_while(|&(_, &MatchingStatus { idx, .. })| idx < next.idx)
190                .skip_while(|&(_, &MatchingStatus { idx, .. })| idx < prev.idx)
191                .map(|(back_ref, cur)| {
192                    let adj_num = next.idx - cur.idx - 1;
193                    let mut final_score = cur.final_score + next.score;
194                    final_score += if adj_num == 0 {
195                        BONUS_ADJACENCY
196                    } else {
197                        PENALTY_UNMATCHED * adj_num as ScoreType
198                    };
199                    (back_ref, final_score, adj_num)
200                })
201                .max_by_key(|&(_, x, _)| x)
202                .unwrap_or((prev.back_ref, score_before_idx, prev.adj_num));
203
204            cur_row[idx] = if idx > 0 && score < score_before_idx {
205                MatchingStatus {
206                    final_score: score_before_idx,
207                    back_ref: prev.back_ref,
208                    adj_num,
209                    ..next
210                }
211            } else {
212                MatchingStatus {
213                    final_score: score,
214                    back_ref: back_ref as IndexType,
215                    adj_num,
216                    ..next
217                }
218            };
219        }
220    }
221
222    Some(scores)
223}
224
225// judge how many scores the current index should get
226fn fuzzy_score(
227    choice_ch: char,
228    choice_idx: IndexType,
229    choice_prev_ch: char,
230    pat_ch: char,
231    pat_idx: IndexType,
232    _pat_prev_ch: char,
233) -> ScoreType {
234    let mut score = BONUS_MATCHED;
235
236    let choice_prev_ch_type = CharType::of(choice_prev_ch);
237    let choice_role = CharRole::of(choice_prev_ch, choice_ch);
238
239    if pat_ch == choice_ch {
240        if pat_ch.is_uppercase() {
241            score += BONUS_UPPER_MATCH;
242        } else {
243            score += BONUS_CASE_MATCH;
244        }
245    } else {
246        score += PENALTY_CASE_UNMATCHED;
247    }
248
249    // apply bonus for camelCases
250    if choice_role == CharRole::Head
251        || choice_role == CharRole::Break
252        || choice_role == CharRole::Camel
253    {
254        score += BONUS_CAMEL;
255    }
256
257    // apply bonus for matches after a separator
258    if choice_prev_ch_type == CharType::HardSep || choice_prev_ch_type == CharType::SoftSep {
259        score += BONUS_SEPARATOR;
260    }
261
262    if pat_idx == 0 {
263        score += max(
264            (choice_idx as ScoreType) * PENALTY_LEADING,
265            PENALTY_MAX_LEADING,
266        );
267    }
268
269    score
270}
271
272#[derive(Copy, Clone, Debug)]
273pub struct SkimScoreConfig {
274    pub score_match: i32,
275    pub gap_start: i32,
276    pub gap_extension: i32,
277
278    /// The first character in the typed pattern usually has more significance
279    /// than the rest so it's important that it appears at special positions where
280    /// bonus points are given. e.g. "to-go" vs. "ongoing" on "og" or on "ogo".
281    /// The amount of the extra bonus should be limited so that the gap penalty is
282    /// still respected.
283    pub bonus_first_char_multiplier: i32,
284
285    /// We prefer matches at the beginning of a word, but the bonus should not be
286    /// too great to prevent the longer acronym matches from always winning over
287    /// shorter fuzzy matches. The bonus point here was specifically chosen that
288    /// the bonus is cancelled when the gap between the acronyms grows over
289    /// 8 characters, which is approximately the average length of the words found
290    /// in web2 dictionary and my file system.
291    pub bonus_head: i32,
292
293    /// Just like bonus_head, but its breakage of word is not that strong, so it should
294    /// be slighter less then bonus_head
295    pub bonus_break: i32,
296
297    /// Edge-triggered bonus for matches in camelCase words.
298    /// Compared to word-boundary case, they don't accompany single-character gaps
299    /// (e.g. FooBar vs. foo-bar), so we deduct bonus point accordingly.
300    pub bonus_camel: i32,
301
302    /// Minimum bonus point given to characters in consecutive chunks.
303    /// Note that bonus points for consecutive matches shouldn't have needed if we
304    /// used fixed match score as in the original algorithm.
305    pub bonus_consecutive: i32,
306
307    /// Skim will match case-sensitively if the pattern contains ASCII upper case,
308    /// If case of case insensitive match, the penalty will be given to case mismatch
309    pub penalty_case_mismatch: i32,
310}
311
312impl Default for SkimScoreConfig {
313    fn default() -> Self {
314        let score_match = 16;
315        let gap_start = -3;
316        let gap_extension = -1;
317        let bonus_first_char_multiplier = 2;
318
319        Self {
320            score_match,
321            gap_start,
322            gap_extension,
323            bonus_first_char_multiplier,
324            bonus_head: score_match / 2,
325            bonus_break: score_match / 2 + gap_extension,
326            bonus_camel: score_match / 2 + 2 * gap_extension,
327            bonus_consecutive: -(gap_start + gap_extension),
328            penalty_case_mismatch: gap_extension * 2,
329        }
330    }
331}
332
333#[derive(Debug, Copy, Clone, PartialEq)]
334enum Movement {
335    Match,
336    Skip,
337}
338
339/// Inner state of the score matrix
340// Implementation detail: tried to pad to 16B
341// will store the m and p matrix together
342#[derive(Clone, Debug)]
343struct MatrixCell {
344    pub m_move: Movement,
345    pub m_score: i32,
346    pub p_move: Movement,
347    pub p_score: i32, // The max score of align pattern[..i] & choice[..j]
348
349    // temporary fields (make use the rest of the padding)
350    pub matched: bool,
351    pub bonus: i32,
352}
353
354const MATRIX_CELL_NEG_INFINITY: i32 = std::i16::MIN as i32;
355
356impl Default for MatrixCell {
357    fn default() -> Self {
358        Self {
359            m_move: Skip,
360            m_score: MATRIX_CELL_NEG_INFINITY,
361            p_move: Skip,
362            p_score: MATRIX_CELL_NEG_INFINITY,
363            matched: false,
364            bonus: 0,
365        }
366    }
367}
368
369impl MatrixCell {
370    pub fn reset(&mut self) {
371        self.m_move = Skip;
372        self.m_score = MATRIX_CELL_NEG_INFINITY;
373        self.p_move = Skip;
374        self.p_score = MATRIX_CELL_NEG_INFINITY;
375        self.bonus = 0;
376        self.matched = false;
377    }
378}
379
380/// Simulate a 1-D vector as 2-D matrix
381struct ScoreMatrix<'a> {
382    matrix: &'a mut [MatrixCell],
383    pub rows: usize,
384    pub cols: usize,
385}
386
387impl<'a> ScoreMatrix<'a> {
388    /// given a matrix, extend it to be (rows x cols) and fill in as init_val
389    pub fn new(matrix: &'a mut Vec<MatrixCell>, rows: usize, cols: usize) -> Self {
390        matrix.resize(rows * cols, MatrixCell::default());
391        ScoreMatrix { matrix, rows, cols }
392    }
393
394    #[inline]
395    fn get_index(&self, row: usize, col: usize) -> usize {
396        row * self.cols + col
397    }
398
399    fn get_row(&self, row: usize) -> &[MatrixCell] {
400        let start = row * self.cols;
401        &self.matrix[start..start + self.cols]
402    }
403}
404
405impl<'a> std::ops::Index<(usize, usize)> for ScoreMatrix<'a> {
406    type Output = MatrixCell;
407
408    fn index(&self, index: (usize, usize)) -> &Self::Output {
409        &self.matrix[self.get_index(index.0, index.1)]
410    }
411}
412
413impl<'a> std::ops::IndexMut<(usize, usize)> for ScoreMatrix<'a> {
414    fn index_mut(&mut self, index: (usize, usize)) -> &mut Self::Output {
415        &mut self.matrix[self.get_index(index.0, index.1)]
416    }
417}
418
419impl<'a> std::fmt::Debug for ScoreMatrix<'a> {
420    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
421        let _ = writeln!(f, "M score:");
422        for row in 0..self.rows {
423            for col in 0..self.cols {
424                let cell = &self[(row, col)];
425                write!(
426                    f,
427                    "{:4}/{}  ",
428                    if cell.m_score == MATRIX_CELL_NEG_INFINITY {
429                        -999
430                    } else {
431                        cell.m_score
432                    },
433                    match cell.m_move {
434                        Match => 'M',
435                        Skip => 'S',
436                    }
437                )?;
438            }
439            writeln!(f)?;
440        }
441
442        let _ = writeln!(f, "P score:");
443        for row in 0..self.rows {
444            for col in 0..self.cols {
445                let cell = &self[(row, col)];
446                write!(
447                    f,
448                    "{:4}/{}  ",
449                    if cell.p_score == MATRIX_CELL_NEG_INFINITY {
450                        -999
451                    } else {
452                        cell.p_score
453                    },
454                    match cell.p_move {
455                        Match => 'M',
456                        Skip => 'S',
457                    }
458                )?;
459            }
460            writeln!(f)?;
461        }
462
463        Ok(())
464    }
465}
466
467/// We categorize characters into types:
468///
469/// - Empty(E): the start of string
470/// - Upper(U): the ascii upper case
471/// - lower(L): the ascii lower case & other unicode characters
472/// - number(N): ascii number
473/// - hard separator(S): clearly separate the content: ` ` `/` `\` `|` `(` `) `[` `]` `{` `}`
474/// - soft separator(s): other ascii punctuation, e.g. `!` `"` `#` `$`, ...
475#[derive(Debug, PartialEq, Copy, Clone)]
476enum CharType {
477    Empty,
478    Upper,
479    Lower,
480    Number,
481    HardSep,
482    SoftSep,
483}
484
485impl CharType {
486    pub fn of(ch: char) -> Self {
487        match ch {
488            '\0' => CharType::Empty,
489            ' ' | '/' | '\\' | '|' | '(' | ')' | '[' | ']' | '{' | '}' => CharType::HardSep,
490            '!'..='\'' | '*'..='.' | ':'..='@' | '^'..='`' | '~' => CharType::SoftSep,
491            '0'..='9' => CharType::Number,
492            'A'..='Z' => CharType::Upper,
493            _ => CharType::Lower,
494        }
495    }
496}
497
498/// Ref: https://github.com/llvm-mirror/clang-tools-extra/blob/master/clangd/FuzzyMatch.cpp
499///
500///
501/// ```text
502/// +-----------+--------------+-------+
503/// | Example   | Chars | Type | Role  |
504/// +-----------+--------------+-------+
505/// | (f)oo     | ^fo   | Ell  | Head  |
506/// | (F)oo     | ^Fo   | EUl  | Head  |
507/// | Foo/(B)ar | /Ba   | SUl  | Head  |
508/// | Foo/(b)ar | /ba   | Sll  | Head  |
509/// | Foo.(B)ar | .Ba   | SUl  | Break |
510/// | Foo(B)ar  | oBa   | lUl  | Camel |
511/// | 123(B)ar  | 3Ba   | nUl  | Camel |
512/// | F(o)oBar  | Foo   | Ull  | Tail  |
513/// | H(T)TP    | HTT   | UUU  | Tail  |
514/// | others    |       |      | Tail  |
515/// +-----------+--------------+-------+
516#[derive(Debug, PartialEq, Copy, Clone)]
517enum CharRole {
518    Head,
519    Tail,
520    Camel,
521    Break,
522}
523
524impl CharRole {
525    pub fn of(prev: char, cur: char) -> Self {
526        Self::of_type(CharType::of(prev), CharType::of(cur))
527    }
528    pub fn of_type(prev: CharType, cur: CharType) -> Self {
529        match (prev, cur) {
530            (CharType::Empty, _) | (CharType::HardSep, _) => CharRole::Head,
531            (CharType::SoftSep, _) => CharRole::Break,
532            (CharType::Lower, CharType::Upper) | (CharType::Number, CharType::Upper) => {
533                CharRole::Camel
534            }
535            _ => CharRole::Tail,
536        }
537    }
538}
539
540#[derive(Eq, PartialEq, Debug, Copy, Clone)]
541enum CaseMatching {
542    Respect,
543    Ignore,
544    Smart,
545}
546
547/// Fuzzy matching is a sub problem is sequence alignment.
548/// Specifically what we'd like to implement is sequence alignment with affine gap penalty.
549/// Ref: https://www.cs.cmu.edu/~ckingsf/bioinfo-lectures/gaps.pdf
550///
551/// Given `pattern`(i) and `choice`(j), we'll maintain 2 score matrix:
552///
553/// ```text
554/// M[i][j] = match(i, j) + max(M[i-1][j-1] + consecutive, P[i-1][j-1])
555/// M[i][j] = -infinity if p[i][j] do not match
556///
557/// M[i][j] means the score of best alignment of p[..=i] and c[..=j] ending with match/mismatch e.g.:
558///
559/// c: [.........]b
560/// p: [.........]b
561///
562/// So that p[..=i-1] and c[..=j-1] could be any alignment
563///
564/// P[i][j] = max(M[i][j-k]-gap(k)) for k in 1..j
565///
566/// P[i][j] means the score of best alignment of p[..=i] and c[..=j] where c[j] is not matched.
567/// So that we need to search through all the previous matches, and calculate the gap.
568///
569///   (j-k)--.   j
570/// c: [....]bcdef
571/// p: [....]b----
572///          i
573/// ```
574///
575/// Note that the above is O(n^3) in the worst case. However the above algorithm uses a general gap
576/// penalty, but we use affine gap: `gap = gap_start + k * gap_extend` where:
577/// - u: the cost of starting of gap
578/// - v: the cost of extending a gap by one more space.
579///
580/// So that we could optimize the algorithm by:
581///
582/// ```text
583/// P[i][j] = max(gap_start + gap_extend + M[i][j-1], gap_extend + P[i][j-1])
584/// ```
585///
586/// Besides, since we are doing fuzzy matching, we'll prefer some pattern over others.
587/// So we'll calculate in-place bonus for each character. e.g. bonus for camel cases.
588///
589/// In summary:
590///
591/// ```text
592/// B[j] = in_place_bonus_of(j)
593/// M[i][j] = match(i, j) + max(M[i-1][j-1] + consecutive, P[i-1][j-1])
594/// M[i][j] = -infinity if p[i] and c[j] do not match
595/// P[i][j] = max(gap_start + gap_extend + M[i][j-1], gap_extend + P[i][j-1])
596/// ```
597#[derive(Debug)]
598pub struct SkimMatcherV2 {
599    debug: bool,
600
601    score_config: SkimScoreConfig,
602    element_limit: usize,
603    case: CaseMatching,
604    use_cache: bool,
605
606    m_cache: ThreadLocal<RefCell<Vec<MatrixCell>>>,
607    c_cache: ThreadLocal<RefCell<Vec<char>>>, // vector to store the characters of choice
608    p_cache: ThreadLocal<RefCell<Vec<char>>>, // vector to store the characters of pattern
609}
610
611impl Default for SkimMatcherV2 {
612    fn default() -> Self {
613        Self {
614            debug: false,
615            score_config: SkimScoreConfig::default(),
616            element_limit: 0,
617            case: CaseMatching::Smart,
618            use_cache: true,
619
620            m_cache: ThreadLocal::new(),
621            c_cache: ThreadLocal::new(),
622            p_cache: ThreadLocal::new(),
623        }
624    }
625}
626
627impl SkimMatcherV2 {
628    pub fn score_config(mut self, score_config: SkimScoreConfig) -> Self {
629        self.score_config = score_config;
630        self
631    }
632
633    pub fn element_limit(mut self, elements: usize) -> Self {
634        self.element_limit = elements;
635        self
636    }
637
638    pub fn ignore_case(mut self) -> Self {
639        self.case = CaseMatching::Ignore;
640        self
641    }
642
643    pub fn smart_case(mut self) -> Self {
644        self.case = CaseMatching::Smart;
645        self
646    }
647
648    pub fn respect_case(mut self) -> Self {
649        self.case = CaseMatching::Respect;
650        self
651    }
652
653    pub fn use_cache(mut self, use_cache: bool) -> Self {
654        self.use_cache = use_cache;
655        self
656    }
657
658    pub fn debug(mut self, debug: bool) -> Self {
659        self.debug = debug;
660        self
661    }
662
663    /// Build the score matrix using the algorithm described above
664    fn build_score_matrix(
665        &self,
666        m: &mut ScoreMatrix,
667        choice: &[char],
668        pattern: &[char],
669        first_match_indices: &[usize],
670        compressed: bool,
671        case_sensitive: bool,
672    ) {
673        let mut in_place_bonuses = vec![0; m.cols];
674
675        self.build_in_place_bonus(choice, &mut in_place_bonuses);
676
677        // need to reset M[row][first_match] & M[i][j-1]
678        m[(0, 0)].reset();
679        for i in 1..m.rows {
680            m[(i, first_match_indices[i - 1])].reset();
681        }
682
683        for j in 0..m.cols {
684            // p[0][j]: the score of best alignment of p[] and c[..=j] where c[j] is not matched
685            m[(0, j)].reset();
686            m[(0, j)].p_score = self.score_config.gap_extension;
687        }
688
689        // update the matrix;
690        for (i, &p_ch) in pattern.iter().enumerate() {
691            let row = self.adjust_row_idx(i + 1, compressed);
692            let row_prev = self.adjust_row_idx(i, compressed);
693            let to_skip = first_match_indices[i];
694
695            for (j, &c_ch) in choice[to_skip..].iter().enumerate() {
696                let col = to_skip + j + 1;
697                let col_prev = to_skip + j;
698                let idx_cur = m.get_index(row, col);
699                let idx_last = m.get_index(row, col_prev);
700                let idx_prev = m.get_index(row_prev, col_prev);
701
702                // update M matrix
703                // M[i][j] = match(i, j) + max(M[i-1][j-1], P[i-1][j-1])
704                if let Some(cur_match_score) =
705                    self.calculate_match_score(c_ch, p_ch, case_sensitive)
706                {
707                    let prev_match_score = m.matrix[idx_prev].m_score;
708                    let prev_skip_score = m.matrix[idx_prev].p_score;
709                    let prev_match_bonus = m.matrix[idx_last].bonus;
710                    let in_place_bonus = in_place_bonuses[col];
711
712                    let consecutive_bonus = max(
713                        prev_match_bonus,
714                        max(in_place_bonus, self.score_config.bonus_consecutive),
715                    );
716                    m.matrix[idx_last].bonus = consecutive_bonus;
717
718                    let score_match = prev_match_score + consecutive_bonus;
719                    let score_skip = prev_skip_score + in_place_bonus;
720
721                    if score_match >= score_skip {
722                        m.matrix[idx_cur].m_score = score_match + cur_match_score as i32;
723                        m.matrix[idx_cur].m_move = Movement::Match;
724                    } else {
725                        m.matrix[idx_cur].m_score = score_skip + cur_match_score as i32;
726                        m.matrix[idx_cur].m_move = Movement::Skip;
727                    }
728                } else {
729                    m.matrix[idx_cur].m_score = MATRIX_CELL_NEG_INFINITY;
730                    m.matrix[idx_cur].m_move = Movement::Skip;
731                    m.matrix[idx_cur].bonus = 0;
732                }
733
734                // update P matrix
735                // P[i][j] = max(gap_start + gap_extend + M[i][j-1], gap_extend + P[i][j-1])
736                let prev_match_score = self.score_config.gap_start
737                    + self.score_config.gap_extension
738                    + m.matrix[idx_last].m_score;
739                let prev_skip_score = self.score_config.gap_extension + m.matrix[idx_last].p_score;
740                if prev_match_score >= prev_skip_score {
741                    m.matrix[idx_cur].p_score = prev_match_score;
742                    m.matrix[idx_cur].p_move = Movement::Match;
743                } else {
744                    m.matrix[idx_cur].p_score = prev_skip_score;
745                    m.matrix[idx_cur].p_move = Movement::Skip;
746                }
747            }
748        }
749    }
750
751    /// check bonus for start of camel case, etc.
752    fn build_in_place_bonus(&self, choice: &[char], b: &mut [i32]) {
753        let mut prev_ch = '\0';
754        for (j, &c_ch) in choice.iter().enumerate() {
755            let prev_ch_type = CharType::of(prev_ch);
756            let ch_type = CharType::of(c_ch);
757            b[j + 1] = self.in_place_bonus(prev_ch_type, ch_type);
758            prev_ch = c_ch;
759        }
760
761        if b.len() > 1 {
762            b[1] *= self.score_config.bonus_first_char_multiplier;
763        }
764    }
765
766    /// In case we don't need to backtrack the matching indices, we could use only 2 rows for the
767    /// matrix, this function could be used to rotate accessing these two rows.
768    fn adjust_row_idx(&self, row_idx: usize, compressed: bool) -> usize {
769        if compressed {
770            row_idx & 1
771        } else {
772            row_idx
773        }
774    }
775
776    /// Calculate the matching score of the characters
777    /// return None if not matched.
778    fn calculate_match_score(&self, c: char, p: char, case_sensitive: bool) -> Option<u16> {
779        if !char_equal(c, p, case_sensitive) {
780            return None;
781        }
782
783        let score = self.score_config.score_match;
784        let mut bonus = 0;
785
786        // penalty on case mismatch
787        if !case_sensitive && p != c {
788            bonus += self.score_config.penalty_case_mismatch;
789        }
790
791        Some(max(0, score + bonus) as u16)
792    }
793
794    #[inline]
795    fn in_place_bonus(&self, prev_char_type: CharType, char_type: CharType) -> i32 {
796        match CharRole::of_type(prev_char_type, char_type) {
797            CharRole::Head => self.score_config.bonus_head,
798            CharRole::Camel => self.score_config.bonus_camel,
799            CharRole::Break => self.score_config.bonus_break,
800            CharRole::Tail => 0,
801        }
802    }
803
804    fn contains_upper(&self, string: &str) -> bool {
805        for ch in string.chars() {
806            if ch.is_ascii_uppercase() {
807                return true;
808            }
809        }
810
811        false
812    }
813
814    pub fn fuzzy(
815        &self,
816        choice: &str,
817        pattern: &str,
818        with_pos: bool,
819    ) -> Option<(ScoreType, Vec<IndexType>)> {
820        if pattern.is_empty() {
821            return Some((0, Vec::new()));
822        }
823
824        let case_sensitive = match self.case {
825            CaseMatching::Respect => true,
826            CaseMatching::Ignore => false,
827            CaseMatching::Smart => self.contains_upper(pattern),
828        };
829
830        let compressed = !with_pos;
831
832        // initialize the score matrix
833        let mut m = self.m_cache.get_or_default().borrow_mut();
834        let mut choice_chars = self.c_cache.get_or_default().borrow_mut();
835        let mut pattern_chars = self.p_cache.get_or_default().borrow_mut();
836
837        *choice_chars = choice.chars().collect();
838
839        *pattern_chars = pattern.chars().collect();
840
841        let first_match_indices = cheap_matches(&choice_chars, &pattern_chars, case_sensitive)?;
842
843        let cols = choice_chars.len() + 1;
844        let num_char_pattern = pattern_chars.len();
845        let rows = if compressed { 2 } else { num_char_pattern + 1 };
846
847        if self.element_limit > 0 && self.element_limit < rows * cols {
848            return self.simple_match(
849                &choice_chars,
850                &pattern_chars,
851                &first_match_indices,
852                case_sensitive,
853                with_pos,
854            );
855        }
856
857        let mut m = ScoreMatrix::new(&mut m, rows, cols);
858        self.build_score_matrix(
859            &mut m,
860            &choice_chars,
861            &pattern_chars,
862            &first_match_indices,
863            compressed,
864            case_sensitive,
865        );
866        let first_col_of_last_row = first_match_indices[first_match_indices.len() - 1];
867        let last_row = m.get_row(self.adjust_row_idx(num_char_pattern, compressed));
868        let (pat_idx, &MatrixCell { m_score, .. }) = last_row[first_col_of_last_row..]
869            .iter()
870            .enumerate()
871            .max_by_key(|&(_, x)| x.m_score)
872            .map(|(idx, cell)| (idx + first_col_of_last_row, cell))
873            .expect("fuzzy_matcher failed to iterate over last_row");
874
875        let mut positions = if with_pos {
876            Vec::with_capacity(num_char_pattern)
877        } else {
878            Vec::new()
879        };
880        if with_pos {
881            let mut i = m.rows - 1;
882            let mut j = pat_idx;
883            let mut track_m = true;
884            let mut current_move = Match;
885            let first_col_first_row = first_match_indices[0];
886            while i > 0 && j > first_col_first_row {
887                if current_move == Match {
888                    positions.push((j - 1) as IndexType);
889                }
890
891                let cell = &m[(i, j)];
892                current_move = if track_m { cell.m_move } else { cell.p_move };
893                if track_m {
894                    i -= 1;
895                }
896
897                j -= 1;
898
899                track_m = match current_move {
900                    Match => true,
901                    Skip => false,
902                };
903            }
904            positions.reverse();
905        }
906
907        if self.debug {
908            println!("Matrix:\n{:?}", m);
909        }
910
911        if !self.use_cache {
912            // drop the allocated memory
913            self.m_cache.get().map(|cell| cell.take());
914            self.c_cache.get().map(|cell| cell.take());
915            self.p_cache.get().map(|cell| cell.take());
916        }
917
918        Some((m_score as ScoreType, positions))
919    }
920
921    pub fn simple_match(
922        &self,
923        choice: &[char],
924        pattern: &[char],
925        first_match_indices: &[usize],
926        case_sensitive: bool,
927        with_pos: bool,
928    ) -> Option<(ScoreType, Vec<IndexType>)> {
929        if pattern.is_empty() {
930            return Some((0, Vec::new()));
931        }
932
933        if pattern.len() == 1 {
934            let match_idx = first_match_indices[0];
935            let prev_ch = if match_idx > 0 {
936                choice[match_idx - 1]
937            } else {
938                '\0'
939            };
940            let prev_ch_type = CharType::of(prev_ch);
941            let ch_type = CharType::of(choice[match_idx]);
942            let in_place_bonus = self.in_place_bonus(prev_ch_type, ch_type);
943            return Some((in_place_bonus as ScoreType, vec![match_idx as IndexType]));
944        }
945
946        let start_idx = first_match_indices[0];
947        let end_idx = first_match_indices[first_match_indices.len() - 1];
948
949        Some(self.calculate_score_with_pos(
950            choice,
951            pattern,
952            start_idx,
953            end_idx,
954            case_sensitive,
955            with_pos,
956        ))
957    }
958
959    fn calculate_score_with_pos(
960        &self,
961        choice: &[char],
962        pattern: &[char],
963        start_idx: usize,
964        end_idx: usize,
965        case_sensitive: bool,
966        with_pos: bool,
967    ) -> (ScoreType, Vec<IndexType>) {
968        let mut pos = Vec::new();
969
970        let choice_iter = choice[start_idx..=end_idx].iter().enumerate();
971        let mut pattern_iter = pattern.iter().enumerate().peekable();
972
973        // unfortunately we could not get the the character before the first character's(for performance)
974        // so we tread them as NonWord
975        let mut prev_ch = '\0';
976
977        let mut score: i32 = 0;
978        let mut in_gap = false;
979        let mut prev_match_bonus = 0;
980
981        for (c_idx, &c) in choice_iter {
982            let op = pattern_iter.peek();
983            if op.is_none() {
984                break;
985            }
986
987            let prev_ch_type = CharType::of(prev_ch);
988            let ch_type = CharType::of(c);
989            let in_place_bonus = self.in_place_bonus(prev_ch_type, ch_type);
990
991            let (_p_idx, &p) = *op.unwrap();
992
993            if let Some(match_score) = self.calculate_match_score(c, p, case_sensitive) {
994                if with_pos {
995                    pos.push((c_idx + start_idx) as IndexType);
996                }
997
998                score += match_score as i32;
999
1000                let consecutive_bonus = max(
1001                    prev_match_bonus,
1002                    max(in_place_bonus, self.score_config.bonus_consecutive),
1003                );
1004                prev_match_bonus = consecutive_bonus;
1005
1006                if !in_gap {
1007                    score += consecutive_bonus;
1008                }
1009
1010                in_gap = false;
1011                let _ = pattern_iter.next();
1012            } else {
1013                if !in_gap {
1014                    score += self.score_config.gap_start;
1015                }
1016
1017                score += self.score_config.gap_extension;
1018                in_gap = true;
1019                prev_match_bonus = 0;
1020            }
1021
1022            prev_ch = c;
1023        }
1024
1025        (score as ScoreType, pos)
1026    }
1027}
1028
1029impl FuzzyMatcher for SkimMatcherV2 {
1030    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
1031        self.fuzzy(choice, pattern, true)
1032    }
1033
1034    fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<ScoreType> {
1035        self.fuzzy(choice, pattern, false).map(|(score, _)| score)
1036    }
1037}
1038
1039#[cfg(test)]
1040mod tests {
1041    use crate::util::{assert_order, wrap_matches};
1042
1043    use super::*;
1044
1045    fn wrap_fuzzy_match(matcher: &dyn FuzzyMatcher, line: &str, pattern: &str) -> Option<String> {
1046        let (_score, indices) = matcher.fuzzy_indices(line, pattern)?;
1047        println!("score: {:?}, indices: {:?}", _score, indices);
1048        Some(wrap_matches(line, &indices))
1049    }
1050
1051    #[test]
1052    fn test_match_or_not() {
1053        let matcher = SkimMatcherV2::default();
1054        assert_eq!(Some(0), matcher.fuzzy_match("", ""));
1055        assert_eq!(Some(0), matcher.fuzzy_match("abcdefaghi", ""));
1056        assert_eq!(None, matcher.fuzzy_match("", "a"));
1057        assert_eq!(None, matcher.fuzzy_match("abcdefaghi", "中"));
1058        assert_eq!(None, matcher.fuzzy_match("abc", "abx"));
1059        assert!(matcher.fuzzy_match("axbycz", "abc").is_some());
1060        assert!(matcher.fuzzy_match("axbycz", "xyz").is_some());
1061
1062        assert_eq!(
1063            "[a]x[b]y[c]z",
1064            &wrap_fuzzy_match(&matcher, "axbycz", "abc").unwrap()
1065        );
1066        assert_eq!(
1067            "a[x]b[y]c[z]",
1068            &wrap_fuzzy_match(&matcher, "axbycz", "xyz").unwrap()
1069        );
1070        assert_eq!(
1071            "[H]ello, [世]界",
1072            &wrap_fuzzy_match(&matcher, "Hello, 世界", "H世").unwrap()
1073        );
1074    }
1075
1076    #[test]
1077    fn test_match_quality() {
1078        let matcher = SkimMatcherV2::default().ignore_case();
1079
1080        // initials
1081        assert_order(&matcher, "ab", &["ab", "aoo_boo", "acb"]);
1082        assert_order(&matcher, "CC", &["CamelCase", "camelCase", "camelcase"]);
1083        assert_order(&matcher, "cC", &["camelCase", "CamelCase", "camelcase"]);
1084        assert_order(
1085            &matcher,
1086            "cc",
1087            &[
1088                "camel case",
1089                "camelCase",
1090                "CamelCase",
1091                "camelcase",
1092                "camel ace",
1093            ],
1094        );
1095        assert_order(
1096            &matcher,
1097            "Da.Te",
1098            &["Data.Text", "Data.Text.Lazy", "Data.Aeson.Encoding.text"],
1099        );
1100        // prefix
1101        assert_order(&matcher, "is", &["isIEEE", "inSuf"]);
1102        // shorter
1103        assert_order(&matcher, "ma", &["map", "many", "maximum"]);
1104        assert_order(&matcher, "print", &["printf", "sprintf"]);
1105        // score(PRINT) = kMinScore
1106        assert_order(&matcher, "ast", &["ast", "AST", "INT_FAST16_MAX"]);
1107        // score(PRINT) > kMinScore
1108        assert_order(&matcher, "Int", &["int", "INT", "PRINT"]);
1109    }
1110
1111    fn simple_match(
1112        matcher: &SkimMatcherV2,
1113        choice: &str,
1114        pattern: &str,
1115        case_sensitive: bool,
1116        with_pos: bool,
1117    ) -> Option<(ScoreType, Vec<IndexType>)> {
1118        let choice: Vec<char> = choice.chars().collect();
1119        let pattern: Vec<char> = pattern.chars().collect();
1120        let first_match_indices = cheap_matches(&choice, &pattern, case_sensitive)?;
1121        matcher.simple_match(
1122            &choice,
1123            &pattern,
1124            &first_match_indices,
1125            case_sensitive,
1126            with_pos,
1127        )
1128    }
1129
1130    #[test]
1131    fn test_match_or_not_simple() {
1132        let matcher = SkimMatcherV2::default();
1133        assert_eq!(
1134            simple_match(&matcher, "axbycz", "xyz", false, true)
1135                .unwrap()
1136                .1,
1137            vec![1, 3, 5]
1138        );
1139
1140        assert_eq!(
1141            simple_match(&matcher, "", "", false, false),
1142            Some((0, vec![]))
1143        );
1144        assert_eq!(
1145            simple_match(&matcher, "abcdefaghi", "", false, false),
1146            Some((0, vec![]))
1147        );
1148        assert_eq!(simple_match(&matcher, "", "a", false, false), None);
1149        assert_eq!(
1150            simple_match(&matcher, "abcdefaghi", "中", false, false),
1151            None
1152        );
1153        assert_eq!(simple_match(&matcher, "abc", "abx", false, false), None);
1154        assert_eq!(
1155            simple_match(&matcher, "axbycz", "abc", false, true)
1156                .unwrap()
1157                .1,
1158            vec![0, 2, 4]
1159        );
1160        assert_eq!(
1161            simple_match(&matcher, "axbycz", "xyz", false, true)
1162                .unwrap()
1163                .1,
1164            vec![1, 3, 5]
1165        );
1166        assert_eq!(
1167            simple_match(&matcher, "Hello, 世界", "H世", false, true)
1168                .unwrap()
1169                .1,
1170            vec![0, 7]
1171        );
1172    }
1173
1174    #[test]
1175    fn test_match_or_not_v2() {
1176        let matcher = SkimMatcherV2::default().debug(true);
1177
1178        assert_eq!(matcher.fuzzy_match("", ""), Some(0));
1179        assert_eq!(matcher.fuzzy_match("abcdefaghi", ""), Some(0));
1180        assert_eq!(matcher.fuzzy_match("", "a"), None);
1181        assert_eq!(matcher.fuzzy_match("abcdefaghi", "中"), None);
1182        assert_eq!(matcher.fuzzy_match("abc", "abx"), None);
1183        assert!(matcher.fuzzy_match("axbycz", "abc").is_some());
1184        assert!(matcher.fuzzy_match("axbycz", "xyz").is_some());
1185
1186        assert_eq!(
1187            &wrap_fuzzy_match(&matcher, "axbycz", "abc").unwrap(),
1188            "[a]x[b]y[c]z"
1189        );
1190        assert_eq!(
1191            &wrap_fuzzy_match(&matcher, "axbycz", "xyz").unwrap(),
1192            "a[x]b[y]c[z]"
1193        );
1194        assert_eq!(
1195            &wrap_fuzzy_match(&matcher, "Hello, 世界", "H世").unwrap(),
1196            "[H]ello, [世]界"
1197        );
1198    }
1199
1200    #[test]
1201    fn test_case_option_v2() {
1202        let matcher = SkimMatcherV2::default().ignore_case();
1203        assert!(matcher.fuzzy_match("aBc", "abc").is_some());
1204        assert!(matcher.fuzzy_match("aBc", "aBc").is_some());
1205        assert!(matcher.fuzzy_match("aBc", "aBC").is_some());
1206
1207        let matcher = SkimMatcherV2::default().respect_case();
1208        assert!(matcher.fuzzy_match("aBc", "abc").is_none());
1209        assert!(matcher.fuzzy_match("aBc", "aBc").is_some());
1210        assert!(matcher.fuzzy_match("aBc", "aBC").is_none());
1211
1212        let matcher = SkimMatcherV2::default().smart_case();
1213        assert!(matcher.fuzzy_match("aBc", "abc").is_some());
1214        assert!(matcher.fuzzy_match("aBc", "aBc").is_some());
1215        assert!(matcher.fuzzy_match("aBc", "aBC").is_none());
1216    }
1217
1218    #[test]
1219    fn test_matcher_quality_v2() {
1220        let matcher = SkimMatcherV2::default();
1221        assert_order(&matcher, "ab", &["ab", "aoo_boo", "acb"]);
1222        assert_order(
1223            &matcher,
1224            "cc",
1225            &[
1226                "camel case",
1227                "camelCase",
1228                "CamelCase",
1229                "camelcase",
1230                "camel ace",
1231            ],
1232        );
1233        assert_order(
1234            &matcher,
1235            "Da.Te",
1236            &["Data.Text", "Data.Text.Lazy", "Data.Aeson.Encoding.Text"],
1237        );
1238        assert_order(&matcher, "is", &["isIEEE", "inSuf"]);
1239        assert_order(&matcher, "ma", &["map", "many", "maximum"]);
1240        assert_order(&matcher, "print", &["printf", "sprintf"]);
1241        assert_order(&matcher, "ast", &["ast", "AST", "INT_FAST16_MAX"]);
1242        assert_order(&matcher, "int", &["int", "INT", "PRINT"]);
1243    }
1244
1245    #[test]
1246    fn test_reuse_should_not_affect_indices() {
1247        let matcher = SkimMatcherV2::default();
1248        let pattern = "139";
1249        for num in 0..10000 {
1250            let choice = num.to_string();
1251            if let Some((_score, indices)) = matcher.fuzzy_indices(&choice, pattern) {
1252                assert_eq!(indices.len(), 3);
1253            }
1254        }
1255    }
1256}