Skip to main content

fresh/input/
fuzzy.rs

1//! Fuzzy matching algorithm inspired by fzf
2//!
3//! Provides substring-style fuzzy matching where query characters must appear
4//! in order in the target string, but not necessarily consecutively.
5//! Matching is case-insensitive.
6
7/// Score bonus constants for match quality ranking
8mod score {
9    /// Bonus for consecutive character matches
10    pub const CONSECUTIVE: i32 = 16;
11    /// Bonus for matching at word boundary (after space, underscore, etc.)
12    pub const WORD_BOUNDARY: i32 = 32;
13    /// Bonus for matching at the start of the string
14    pub const START_OF_STRING: i32 = 48;
15    /// Bonus for matching a camelCase transition (lowercase -> uppercase)
16    pub const CAMEL_CASE: i32 = 24;
17    /// Penalty per gap between matched characters
18    pub const GAP_PENALTY: i32 = -3;
19    /// Penalty for starting a gap (first unmatched char after a match)
20    pub const GAP_START_PENALTY: i32 = -5;
21    /// Bonus for exact match (query matches entire target)
22    pub const EXACT_MATCH: i32 = 100;
23    /// Bonus for exact base name match (query matches filename without extension)
24    pub const EXACT_BASENAME_MATCH: i32 = 80;
25}
26
27/// Result of a fuzzy match, containing match status and quality score
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct FuzzyMatch {
30    /// Whether the query matched the target
31    pub matched: bool,
32    /// Quality score (higher is better). Only meaningful if matched is true.
33    pub score: i32,
34    /// Indices in the target string where query characters matched
35    pub match_positions: Vec<usize>,
36}
37
38impl FuzzyMatch {
39    /// Create a non-matching result
40    pub fn no_match() -> Self {
41        Self {
42            matched: false,
43            score: 0,
44            match_positions: Vec::new(),
45        }
46    }
47}
48
49impl Ord for FuzzyMatch {
50    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
51        // Non-matches are always worse than matches
52        match (self.matched, other.matched) {
53            (true, false) => std::cmp::Ordering::Greater,
54            (false, true) => std::cmp::Ordering::Less,
55            (false, false) => std::cmp::Ordering::Equal,
56            (true, true) => self.score.cmp(&other.score),
57        }
58    }
59}
60
61impl PartialOrd for FuzzyMatch {
62    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
63        Some(self.cmp(other))
64    }
65}
66
67/// Perform fzf-style fuzzy matching of a query against a target string.
68///
69/// Returns a `FuzzyMatch` containing:
70/// - `matched`: true if all query characters appear in order in the target
71/// - `score`: quality score based on match positions (consecutive matches, word boundaries, etc.)
72/// - `match_positions`: indices in target where each query character matched
73///
74/// The algorithm favors:
75/// - Consecutive character matches
76/// - Matches at word boundaries (after space, underscore, hyphen, or camelCase transitions)
77/// - Matches at the start of the string
78///
79/// # Examples
80/// ```
81/// use fresh::input::fuzzy::fuzzy_match;
82///
83/// // Exact substring match
84/// let result = fuzzy_match("save", "Save File");
85/// assert!(result.matched);
86///
87/// // Sparse match (fzf-style)
88/// let result = fuzzy_match("sf", "Save File");
89/// assert!(result.matched);
90///
91/// // Non-matching
92/// let result = fuzzy_match("xyz", "Save File");
93/// assert!(!result.matched);
94/// ```
95pub fn fuzzy_match(query: &str, target: &str) -> FuzzyMatch {
96    if query.is_empty() {
97        return FuzzyMatch {
98            matched: true,
99            score: 0,
100            match_positions: Vec::new(),
101        };
102    }
103
104    let query_lower: Vec<char> = query.to_lowercase().chars().collect();
105    let target_chars: Vec<char> = target.chars().collect();
106    let target_lower: Vec<char> = target.to_lowercase().chars().collect();
107
108    // Try to find the best matching positions using a greedy approach
109    // that considers bonuses at each step
110    let result = find_best_match(&query_lower, &target_chars, &target_lower);
111
112    if let Some((positions, mut final_score)) = result {
113        // Apply exact match bonuses to prioritize exact matches
114        let query_len = query_lower.len();
115        let target_len = target_lower.len();
116
117        // Exact match bonus: query matches entire target
118        if query_len == target_len {
119            final_score += score::EXACT_MATCH;
120        } else if target_len > query_len {
121            // Check if the query is a prefix match (all consecutive from start)
122            let is_prefix_match = positions.len() == query_len
123                && positions.iter().enumerate().all(|(i, &pos)| pos == i);
124
125            if is_prefix_match {
126                let next_char = target_chars[query_len];
127
128                // Highest priority: exact basename match (before extension)
129                // This handles "config" matching "config.rs" better than "config_manager.rs"
130                if next_char == '.' {
131                    final_score += score::EXACT_MATCH; // Full exact match bonus for extension case
132                }
133                // Second priority: match before word separator (hyphen, underscore, space)
134                // This handles "fresh" matching "fresh-editor" better than "freshness"
135                else if next_char == '-' || next_char == '_' || next_char == ' ' {
136                    final_score += score::EXACT_BASENAME_MATCH;
137                }
138            }
139        }
140
141        FuzzyMatch {
142            matched: true,
143            score: final_score,
144            match_positions: positions,
145        }
146    } else {
147        FuzzyMatch::no_match()
148    }
149}
150
151/// Find the best matching positions for query in target
152fn find_best_match(
153    query: &[char],
154    target_chars: &[char],
155    target_lower: &[char],
156) -> Option<(Vec<usize>, i32)> {
157    if query.is_empty() {
158        return Some((Vec::new(), 0));
159    }
160
161    // Use dynamic programming to find the best match
162    // For each query position and target position, track the best score achievable
163    let n = target_lower.len();
164    let m = query.len();
165
166    if n < m {
167        return None;
168    }
169
170    // First, check if a match is even possible (quick rejection)
171    {
172        let mut qi = 0;
173        for &tc in target_lower {
174            if qi < m && tc == query[qi] {
175                qi += 1;
176            }
177        }
178        if qi < m {
179            return None; // Not all query chars matched
180        }
181    }
182
183    // dp[qi] = (best_score, prev_match_pos) for matching query[0..qi]
184    // We'll track match positions separately
185    #[derive(Clone)]
186    struct State {
187        score: i32,
188        positions: Vec<usize>,
189        last_match_pos: Option<usize>,
190    }
191
192    let mut best_for_query_len: Vec<Option<State>> = vec![None; m + 1];
193    best_for_query_len[0] = Some(State {
194        score: 0,
195        positions: Vec::new(),
196        last_match_pos: None,
197    });
198
199    for ti in 0..n {
200        // Process in reverse to avoid using updated values in same iteration
201        for qi in (0..m).rev() {
202            if target_lower[ti] != query[qi] {
203                continue;
204            }
205
206            let prev_state = &best_for_query_len[qi];
207            if prev_state.is_none() {
208                continue;
209            }
210            let prev = prev_state.as_ref().unwrap();
211
212            // Check if this position is valid (must be after last match)
213            if let Some(last_pos) = prev.last_match_pos {
214                if ti <= last_pos {
215                    continue;
216                }
217            }
218
219            // Calculate score for matching query[qi] at target[ti]
220            let mut match_score = 0;
221
222            // Start of string bonus
223            if ti == 0 {
224                match_score += score::START_OF_STRING;
225            }
226
227            // Word boundary bonus
228            if ti > 0 {
229                let prev_char = target_chars[ti - 1];
230                if prev_char == ' '
231                    || prev_char == '_'
232                    || prev_char == '-'
233                    || prev_char == '/'
234                    || prev_char == '.'
235                {
236                    match_score += score::WORD_BOUNDARY;
237                } else if prev_char.is_lowercase() && target_chars[ti].is_uppercase() {
238                    match_score += score::CAMEL_CASE;
239                }
240            }
241
242            // Consecutive match bonus
243            if let Some(last_pos) = prev.last_match_pos {
244                if ti == last_pos + 1 {
245                    match_score += score::CONSECUTIVE;
246                } else {
247                    // Gap penalty
248                    let gap_size = ti - last_pos - 1;
249                    match_score += score::GAP_START_PENALTY;
250                    match_score += score::GAP_PENALTY * (gap_size as i32 - 1).max(0);
251                }
252            }
253
254            let new_score = prev.score + match_score;
255
256            let current = &best_for_query_len[qi + 1];
257            let should_update = match current {
258                None => true,
259                Some(curr) => new_score > curr.score,
260            };
261
262            if should_update {
263                let mut new_positions = prev.positions.clone();
264                new_positions.push(ti);
265                best_for_query_len[qi + 1] = Some(State {
266                    score: new_score,
267                    positions: new_positions,
268                    last_match_pos: Some(ti),
269                });
270            }
271        }
272    }
273
274    best_for_query_len[m]
275        .as_ref()
276        .map(|s| (s.positions.clone(), s.score))
277}
278
279/// Filter a list of items using fuzzy matching, returning sorted results
280///
281/// Items are sorted by match quality (best matches first).
282/// Non-matching items are excluded.
283pub fn fuzzy_filter<T, F>(query: &str, items: &[T], get_text: F) -> Vec<(usize, FuzzyMatch)>
284where
285    F: Fn(&T) -> &str,
286{
287    let mut results: Vec<(usize, FuzzyMatch)> = items
288        .iter()
289        .enumerate()
290        .map(|(idx, item)| (idx, fuzzy_match(query, get_text(item))))
291        .filter(|(_, m)| m.matched)
292        .collect();
293
294    // Sort by score descending (best matches first)
295    results.sort_by(|a, b| b.1.score.cmp(&a.1.score));
296
297    results
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    #[test]
305    fn test_empty_query_matches_everything() {
306        let result = fuzzy_match("", "anything");
307        assert!(result.matched);
308        assert_eq!(result.score, 0);
309    }
310
311    #[test]
312    fn test_exact_match() {
313        let result = fuzzy_match("save", "save");
314        assert!(result.matched);
315        assert!(result.score > 0);
316    }
317
318    #[test]
319    fn test_case_insensitive() {
320        let result = fuzzy_match("SAVE", "save file");
321        assert!(result.matched);
322
323        let result = fuzzy_match("save", "SAVE FILE");
324        assert!(result.matched);
325    }
326
327    #[test]
328    fn test_substring_match() {
329        let result = fuzzy_match("file", "Save File");
330        assert!(result.matched);
331    }
332
333    #[test]
334    fn test_sparse_match() {
335        let result = fuzzy_match("sf", "Save File");
336        assert!(result.matched);
337        assert_eq!(result.match_positions.len(), 2);
338    }
339
340    #[test]
341    fn test_no_match() {
342        let result = fuzzy_match("xyz", "Save File");
343        assert!(!result.matched);
344    }
345
346    #[test]
347    fn test_query_longer_than_target() {
348        let result = fuzzy_match("very long query", "short");
349        assert!(!result.matched);
350    }
351
352    #[test]
353    fn test_consecutive_matches_score_higher() {
354        // Use examples without word boundary interference
355        let result_consecutive = fuzzy_match("ab", "xabc");
356        let result_sparse = fuzzy_match("ab", "xaxb");
357        assert!(result_consecutive.matched);
358        assert!(result_sparse.matched);
359        assert!(
360            result_consecutive.score > result_sparse.score,
361            "consecutive: {}, sparse: {}",
362            result_consecutive.score,
363            result_sparse.score
364        );
365    }
366
367    #[test]
368    fn test_word_boundary_scores_higher() {
369        let result_boundary = fuzzy_match("sf", "Save File");
370        let result_middle = fuzzy_match("af", "Save File");
371        assert!(result_boundary.matched);
372        assert!(result_middle.matched);
373        assert!(
374            result_boundary.score > result_middle.score,
375            "boundary: {}, middle: {}",
376            result_boundary.score,
377            result_middle.score
378        );
379    }
380
381    #[test]
382    fn test_start_of_string_scores_higher() {
383        let result_start = fuzzy_match("s", "Save File");
384        let result_middle = fuzzy_match("a", "Save File");
385        assert!(result_start.matched);
386        assert!(result_middle.matched);
387        assert!(
388            result_start.score > result_middle.score,
389            "start: {}, middle: {}",
390            result_start.score,
391            result_middle.score
392        );
393    }
394
395    #[test]
396    fn test_camel_case_boundary() {
397        let result = fuzzy_match("sf", "saveFile");
398        assert!(result.matched);
399        // 'F' is at a camelCase boundary
400        assert!(result.score > 0);
401    }
402
403    #[test]
404    fn test_fuzzy_filter() {
405        let items = vec!["Save File", "Open File", "Save As", "Quit"];
406        let results = fuzzy_filter("sf", &items, |s| s);
407
408        assert!(!results.is_empty());
409        // "Save File" should match
410        let matched_texts: Vec<&str> = results.iter().map(|(idx, _)| items[*idx]).collect();
411        assert!(matched_texts.contains(&"Save File"));
412    }
413
414    #[test]
415    fn test_match_positions_are_correct() {
416        let result = fuzzy_match("sf", "Save File");
417        assert!(result.matched);
418        assert_eq!(result.match_positions.len(), 2);
419        assert_eq!(result.match_positions[0], 0); // 'S' in "Save"
420        assert_eq!(result.match_positions[1], 5); // 'F' in "File"
421    }
422
423    #[test]
424    fn test_fuzzy_ordering() {
425        // Better match should have higher score
426        let match1 = FuzzyMatch {
427            matched: true,
428            score: 100,
429            match_positions: vec![],
430        };
431        let match2 = FuzzyMatch {
432            matched: true,
433            score: 50,
434            match_positions: vec![],
435        };
436        let no_match = FuzzyMatch::no_match();
437
438        assert!(match1 > match2);
439        assert!(match2 > no_match);
440        assert!(match1 > no_match);
441    }
442
443    #[test]
444    fn test_out_of_order_no_match() {
445        // Characters must appear in order
446        let result = fuzzy_match("fs", "Save File");
447        assert!(!result.matched);
448    }
449
450    #[test]
451    fn test_real_world_command_names() {
452        // Test with real command palette patterns
453        assert!(fuzzy_match("gtd", "Go to Definition").matched);
454        assert!(fuzzy_match("ofl", "Open File").matched);
455        assert!(fuzzy_match("sas", "Save As").matched);
456        assert!(fuzzy_match("fr", "Find and Replace").matched);
457    }
458
459    #[test]
460    fn test_tab_name_patterns() {
461        // Test with typical tab/file names
462        assert!(fuzzy_match("main", "src/main.rs").matched);
463        assert!(fuzzy_match("mod", "src/input/mod.rs").matched);
464        assert!(fuzzy_match("cmdreg", "command_registry.rs").matched);
465    }
466
467    #[test]
468    fn test_exact_match_scores_highest() {
469        // "fresh" should score higher against "fresh" than against "fresh-editor"
470        let exact = fuzzy_match("fresh", "fresh");
471        let longer = fuzzy_match("fresh", "fresh-editor");
472
473        assert!(exact.matched);
474        assert!(longer.matched);
475        assert!(
476            exact.score > longer.score,
477            "exact: {}, longer: {}",
478            exact.score,
479            longer.score
480        );
481    }
482
483    #[test]
484    fn test_exact_basename_match_scores_high() {
485        // "fresh" matching "fresh-editor" should score higher than "fresh" matching "freshness"
486        let basename_match = fuzzy_match("fresh", "fresh-editor");
487        let substring_match = fuzzy_match("fresh", "freshness");
488
489        assert!(basename_match.matched);
490        assert!(substring_match.matched);
491        assert!(
492            basename_match.score > substring_match.score,
493            "basename: {}, substring: {}",
494            basename_match.score,
495            substring_match.score
496        );
497    }
498
499    #[test]
500    fn test_exact_match_with_extension() {
501        // "config" should score higher against "config.rs" than "config_manager.rs"
502        let exact_base = fuzzy_match("config", "config.rs");
503        let longer_name = fuzzy_match("config", "config_manager.rs");
504
505        assert!(exact_base.matched);
506        assert!(longer_name.matched);
507        assert!(
508            exact_base.score > longer_name.score,
509            "exact_base: {}, longer: {}",
510            exact_base.score,
511            longer_name.score
512        );
513    }
514}