Skip to main content

fresh/input/
fuzzy.rs

1//! Fuzzy matching algorithm inspired by fzf
2//!
3//! Provides substring-style fuzzy matching where query characters must appear
4//! in order in the target string, but not necessarily consecutively.
5//! Matching is case-insensitive.
6
7/// Score bonus constants for match quality ranking
8mod score {
9    /// Bonus for consecutive character matches
10    pub const CONSECUTIVE: i32 = 16;
11    /// Bonus for matching at word boundary (after space, underscore, etc.)
12    pub const WORD_BOUNDARY: i32 = 32;
13    /// Bonus for matching at the start of the string
14    pub const START_OF_STRING: i32 = 48;
15    /// Bonus for matching a camelCase transition (lowercase -> uppercase)
16    pub const CAMEL_CASE: i32 = 24;
17    /// Penalty per gap between matched characters
18    pub const GAP_PENALTY: i32 = -3;
19    /// Penalty for starting a gap (first unmatched char after a match)
20    pub const GAP_START_PENALTY: i32 = -5;
21    /// Bonus for exact match (query matches entire target)
22    pub const EXACT_MATCH: i32 = 100;
23    /// Bonus for exact base name match (query matches filename without extension)
24    pub const EXACT_BASENAME_MATCH: i32 = 80;
25}
26
27/// Result of a fuzzy match, containing match status and quality score
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct FuzzyMatch {
30    /// Whether the query matched the target
31    pub matched: bool,
32    /// Quality score (higher is better). Only meaningful if matched is true.
33    pub score: i32,
34    /// Indices in the target string where query characters matched
35    pub match_positions: Vec<usize>,
36}
37
38impl FuzzyMatch {
39    /// Create a non-matching result
40    pub fn no_match() -> Self {
41        Self {
42            matched: false,
43            score: 0,
44            match_positions: Vec::new(),
45        }
46    }
47}
48
49impl Ord for FuzzyMatch {
50    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
51        // Non-matches are always worse than matches
52        match (self.matched, other.matched) {
53            (true, false) => std::cmp::Ordering::Greater,
54            (false, true) => std::cmp::Ordering::Less,
55            (false, false) => std::cmp::Ordering::Equal,
56            (true, true) => self.score.cmp(&other.score),
57        }
58    }
59}
60
61impl PartialOrd for FuzzyMatch {
62    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
63        Some(self.cmp(other))
64    }
65}
66
67/// Perform fzf-style fuzzy matching of a query against a target string.
68///
69/// Returns a `FuzzyMatch` containing:
70/// - `matched`: true if all query characters appear in order in the target
71/// - `score`: quality score based on match positions (consecutive matches, word boundaries, etc.)
72/// - `match_positions`: indices in target where each query character matched
73///
74/// The algorithm favors:
75/// - Consecutive character matches
76/// - Matches at word boundaries (after space, underscore, hyphen, or camelCase transitions)
77/// - Matches at the start of the string
78///
79/// Queries containing spaces are split into separate terms. Each term is matched
80/// independently and all terms must match for the overall match to succeed.
81///
82/// # Examples
83/// ```
84/// use fresh::input::fuzzy::fuzzy_match;
85///
86/// // Exact substring match
87/// let result = fuzzy_match("save", "Save File");
88/// assert!(result.matched);
89///
90/// // Sparse match (fzf-style)
91/// let result = fuzzy_match("sf", "Save File");
92/// assert!(result.matched);
93///
94/// // Non-matching
95/// let result = fuzzy_match("xyz", "Save File");
96/// assert!(!result.matched);
97///
98/// // Multi-term match (space-separated)
99/// let result = fuzzy_match("features groups-view", "/features/groups/groups-view.tsx");
100/// assert!(result.matched);
101/// ```
102pub fn fuzzy_match(query: &str, target: &str) -> FuzzyMatch {
103    if query.is_empty() {
104        return FuzzyMatch {
105            matched: true,
106            score: 0,
107            match_positions: Vec::new(),
108        };
109    }
110
111    // Split query into terms by whitespace
112    let terms: Vec<&str> = query.split_whitespace().collect();
113
114    // If query is only whitespace, treat as empty query
115    if terms.is_empty() {
116        return FuzzyMatch {
117            matched: true,
118            score: 0,
119            match_positions: Vec::new(),
120        };
121    }
122
123    // If there are multiple terms, match each independently
124    if terms.len() > 1 {
125        return fuzzy_match_multi_term(query, &terms, target);
126    }
127
128    // Single term - use the trimmed version
129    fuzzy_match_single_term(terms[0], target)
130}
131
132/// Match multiple space-separated terms against a target.
133/// All terms must match for the overall match to succeed.
134fn fuzzy_match_multi_term(original_query: &str, terms: &[&str], target: &str) -> FuzzyMatch {
135    let mut total_score = 0;
136    let mut all_positions = Vec::new();
137
138    for term in terms {
139        let result = fuzzy_match_single_term(term, target);
140        if !result.matched {
141            return FuzzyMatch::no_match();
142        }
143        total_score += result.score;
144        all_positions.extend(result.match_positions);
145    }
146
147    // Sort and deduplicate positions (terms may have overlapping matches)
148    all_positions.sort_unstable();
149    all_positions.dedup();
150
151    // Bonus: if the original query (with spaces) appears as a contiguous substring
152    // in the target, give a significant bonus to prefer exact/substring matches
153    // over scattered multi-term matches.
154    let query_lower = original_query.to_lowercase();
155    let target_lower = target.to_lowercase();
156    if let Some(start_pos) = target_lower.find(&query_lower) {
157        // The original query appears verbatim in the target - give exact match bonus
158        total_score += score::EXACT_MATCH;
159
160        // Rebuild positions to show the contiguous match instead of scattered matches
161        let query_chars: Vec<char> = original_query.chars().collect();
162        let target_chars: Vec<char> = target.chars().collect();
163
164        // Find the byte offset -> char index mapping for the start position
165        let char_start = target
166            .char_indices()
167            .position(|(byte_idx, _)| byte_idx == start_pos)
168            .unwrap_or(0);
169
170        all_positions = (char_start..char_start + query_chars.len())
171            .filter(|&i| i < target_chars.len())
172            .collect();
173    }
174
175    FuzzyMatch {
176        matched: true,
177        score: total_score,
178        match_positions: all_positions,
179    }
180}
181
182/// Match a single term (no spaces) against a target string.
183fn fuzzy_match_single_term(query: &str, target: &str) -> FuzzyMatch {
184    let query_lower: Vec<char> = query.to_lowercase().chars().collect();
185    let target_chars: Vec<char> = target.chars().collect();
186    let target_lower: Vec<char> = target.to_lowercase().chars().collect();
187
188    // Try to find the best matching positions using a greedy approach
189    // that considers bonuses at each step
190    let result = find_best_match(&query_lower, &target_chars, &target_lower);
191
192    if let Some((positions, mut final_score)) = result {
193        // Apply exact match bonuses to prioritize exact matches
194        let query_len = query_lower.len();
195        let target_len = target_lower.len();
196
197        // Exact match bonus: query matches entire target
198        if query_len == target_len {
199            final_score += score::EXACT_MATCH;
200        } else if target_len > query_len {
201            // Check if the query is a prefix match (all consecutive from start)
202            let is_prefix_match = positions.len() == query_len
203                && positions.iter().enumerate().all(|(i, &pos)| pos == i);
204
205            if is_prefix_match {
206                let next_char = target_chars[query_len];
207
208                // Highest priority: exact basename match (before extension)
209                // This handles "config" matching "config.rs" better than "config_manager.rs"
210                if next_char == '.' {
211                    final_score += score::EXACT_MATCH; // Full exact match bonus for extension case
212                }
213                // Second priority: match before word separator (hyphen, underscore, space)
214                // This handles "fresh" matching "fresh-editor" better than "freshness"
215                else if next_char == '-' || next_char == '_' || next_char == ' ' {
216                    final_score += score::EXACT_BASENAME_MATCH;
217                }
218            }
219        }
220
221        FuzzyMatch {
222            matched: true,
223            score: final_score,
224            match_positions: positions,
225        }
226    } else {
227        FuzzyMatch::no_match()
228    }
229}
230
231/// Find the best matching positions for query in target
232fn find_best_match(
233    query: &[char],
234    target_chars: &[char],
235    target_lower: &[char],
236) -> Option<(Vec<usize>, i32)> {
237    if query.is_empty() {
238        return Some((Vec::new(), 0));
239    }
240
241    // Use dynamic programming to find the best match
242    // For each query position and target position, track the best score achievable
243    let n = target_lower.len();
244    let m = query.len();
245
246    if n < m {
247        return None;
248    }
249
250    // First, check if a match is even possible (quick rejection)
251    {
252        let mut qi = 0;
253        for &tc in target_lower {
254            if qi < m && tc == query[qi] {
255                qi += 1;
256            }
257        }
258        if qi < m {
259            return None; // Not all query chars matched
260        }
261    }
262
263    // dp[qi] = (best_score, prev_match_pos) for matching query[0..qi]
264    // We'll track match positions separately
265    #[derive(Clone)]
266    struct State {
267        score: i32,
268        positions: Vec<usize>,
269        last_match_pos: Option<usize>,
270    }
271
272    let mut best_for_query_len: Vec<Option<State>> = vec![None; m + 1];
273    best_for_query_len[0] = Some(State {
274        score: 0,
275        positions: Vec::new(),
276        last_match_pos: None,
277    });
278
279    for ti in 0..n {
280        // Process in reverse to avoid using updated values in same iteration
281        for qi in (0..m).rev() {
282            if target_lower[ti] != query[qi] {
283                continue;
284            }
285
286            let prev_state = &best_for_query_len[qi];
287            if prev_state.is_none() {
288                continue;
289            }
290            let prev = prev_state.as_ref().unwrap();
291
292            // Check if this position is valid (must be after last match)
293            if let Some(last_pos) = prev.last_match_pos {
294                if ti <= last_pos {
295                    continue;
296                }
297            }
298
299            // Calculate score for matching query[qi] at target[ti]
300            let mut match_score = 0;
301
302            // Start of string bonus
303            if ti == 0 {
304                match_score += score::START_OF_STRING;
305            }
306
307            // Word boundary bonus
308            if ti > 0 {
309                let prev_char = target_chars[ti - 1];
310                if prev_char == ' '
311                    || prev_char == '_'
312                    || prev_char == '-'
313                    || prev_char == '/'
314                    || prev_char == '.'
315                {
316                    match_score += score::WORD_BOUNDARY;
317                } else if prev_char.is_lowercase() && target_chars[ti].is_uppercase() {
318                    match_score += score::CAMEL_CASE;
319                }
320            }
321
322            // Consecutive match bonus
323            if let Some(last_pos) = prev.last_match_pos {
324                if ti == last_pos + 1 {
325                    match_score += score::CONSECUTIVE;
326                } else {
327                    // Gap penalty
328                    let gap_size = ti - last_pos - 1;
329                    match_score += score::GAP_START_PENALTY;
330                    match_score += score::GAP_PENALTY * (gap_size as i32 - 1).max(0);
331                }
332            }
333
334            let new_score = prev.score + match_score;
335
336            let current = &best_for_query_len[qi + 1];
337            let should_update = match current {
338                None => true,
339                Some(curr) => new_score > curr.score,
340            };
341
342            if should_update {
343                let mut new_positions = prev.positions.clone();
344                new_positions.push(ti);
345                best_for_query_len[qi + 1] = Some(State {
346                    score: new_score,
347                    positions: new_positions,
348                    last_match_pos: Some(ti),
349                });
350            }
351        }
352    }
353
354    best_for_query_len[m]
355        .as_ref()
356        .map(|s| (s.positions.clone(), s.score))
357}
358
359/// Filter a list of items using fuzzy matching, returning sorted results
360///
361/// Items are sorted by match quality (best matches first).
362/// Non-matching items are excluded.
363pub fn fuzzy_filter<T, F>(query: &str, items: &[T], get_text: F) -> Vec<(usize, FuzzyMatch)>
364where
365    F: Fn(&T) -> &str,
366{
367    let mut results: Vec<(usize, FuzzyMatch)> = items
368        .iter()
369        .enumerate()
370        .map(|(idx, item)| (idx, fuzzy_match(query, get_text(item))))
371        .filter(|(_, m)| m.matched)
372        .collect();
373
374    // Sort by score descending (best matches first)
375    results.sort_by(|a, b| b.1.score.cmp(&a.1.score));
376
377    results
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383
384    #[test]
385    fn test_empty_query_matches_everything() {
386        let result = fuzzy_match("", "anything");
387        assert!(result.matched);
388        assert_eq!(result.score, 0);
389    }
390
391    #[test]
392    fn test_exact_match() {
393        let result = fuzzy_match("save", "save");
394        assert!(result.matched);
395        assert!(result.score > 0);
396    }
397
398    #[test]
399    fn test_case_insensitive() {
400        let result = fuzzy_match("SAVE", "save file");
401        assert!(result.matched);
402
403        let result = fuzzy_match("save", "SAVE FILE");
404        assert!(result.matched);
405    }
406
407    #[test]
408    fn test_substring_match() {
409        let result = fuzzy_match("file", "Save File");
410        assert!(result.matched);
411    }
412
413    #[test]
414    fn test_sparse_match() {
415        let result = fuzzy_match("sf", "Save File");
416        assert!(result.matched);
417        assert_eq!(result.match_positions.len(), 2);
418    }
419
420    #[test]
421    fn test_no_match() {
422        let result = fuzzy_match("xyz", "Save File");
423        assert!(!result.matched);
424    }
425
426    #[test]
427    fn test_query_longer_than_target() {
428        let result = fuzzy_match("very long query", "short");
429        assert!(!result.matched);
430    }
431
432    #[test]
433    fn test_consecutive_matches_score_higher() {
434        // Use examples without word boundary interference
435        let result_consecutive = fuzzy_match("ab", "xabc");
436        let result_sparse = fuzzy_match("ab", "xaxb");
437        assert!(result_consecutive.matched);
438        assert!(result_sparse.matched);
439        assert!(
440            result_consecutive.score > result_sparse.score,
441            "consecutive: {}, sparse: {}",
442            result_consecutive.score,
443            result_sparse.score
444        );
445    }
446
447    #[test]
448    fn test_word_boundary_scores_higher() {
449        let result_boundary = fuzzy_match("sf", "Save File");
450        let result_middle = fuzzy_match("af", "Save File");
451        assert!(result_boundary.matched);
452        assert!(result_middle.matched);
453        assert!(
454            result_boundary.score > result_middle.score,
455            "boundary: {}, middle: {}",
456            result_boundary.score,
457            result_middle.score
458        );
459    }
460
461    #[test]
462    fn test_start_of_string_scores_higher() {
463        let result_start = fuzzy_match("s", "Save File");
464        let result_middle = fuzzy_match("a", "Save File");
465        assert!(result_start.matched);
466        assert!(result_middle.matched);
467        assert!(
468            result_start.score > result_middle.score,
469            "start: {}, middle: {}",
470            result_start.score,
471            result_middle.score
472        );
473    }
474
475    #[test]
476    fn test_camel_case_boundary() {
477        let result = fuzzy_match("sf", "saveFile");
478        assert!(result.matched);
479        // 'F' is at a camelCase boundary
480        assert!(result.score > 0);
481    }
482
483    #[test]
484    fn test_fuzzy_filter() {
485        let items = vec!["Save File", "Open File", "Save As", "Quit"];
486        let results = fuzzy_filter("sf", &items, |s| s);
487
488        assert!(!results.is_empty());
489        // "Save File" should match
490        let matched_texts: Vec<&str> = results.iter().map(|(idx, _)| items[*idx]).collect();
491        assert!(matched_texts.contains(&"Save File"));
492    }
493
494    #[test]
495    fn test_match_positions_are_correct() {
496        let result = fuzzy_match("sf", "Save File");
497        assert!(result.matched);
498        assert_eq!(result.match_positions.len(), 2);
499        assert_eq!(result.match_positions[0], 0); // 'S' in "Save"
500        assert_eq!(result.match_positions[1], 5); // 'F' in "File"
501    }
502
503    #[test]
504    fn test_fuzzy_ordering() {
505        // Better match should have higher score
506        let match1 = FuzzyMatch {
507            matched: true,
508            score: 100,
509            match_positions: vec![],
510        };
511        let match2 = FuzzyMatch {
512            matched: true,
513            score: 50,
514            match_positions: vec![],
515        };
516        let no_match = FuzzyMatch::no_match();
517
518        assert!(match1 > match2);
519        assert!(match2 > no_match);
520        assert!(match1 > no_match);
521    }
522
523    #[test]
524    fn test_out_of_order_no_match() {
525        // Characters must appear in order
526        let result = fuzzy_match("fs", "Save File");
527        assert!(!result.matched);
528    }
529
530    #[test]
531    fn test_multi_term_query_with_spaces() {
532        // Each term should be matched independently
533        let result = fuzzy_match("features groups-view", "/features/groups/groups-view.tsx");
534        assert!(result.matched);
535    }
536
537    #[test]
538    fn test_multi_term_query_partial_match_fails() {
539        // If any term doesn't match, the whole query fails
540        let result = fuzzy_match("features nonexistent", "/features/groups/groups-view.tsx");
541        assert!(!result.matched);
542    }
543
544    #[test]
545    fn test_multi_term_query_all_must_match() {
546        // All terms must match
547        let result = fuzzy_match("src main rs", "src/main.rs");
548        assert!(result.matched);
549
550        let result = fuzzy_match("src xyz", "src/main.rs");
551        assert!(!result.matched);
552    }
553
554    #[test]
555    fn test_multi_term_combines_scores() {
556        // Multi-term match should combine scores from each term
557        let result = fuzzy_match("save file", "Save File");
558        assert!(result.matched);
559        assert!(result.score > 0);
560    }
561
562    #[test]
563    fn test_leading_trailing_spaces_ignored() {
564        // Leading/trailing whitespace should be ignored
565        let result = fuzzy_match("  save  ", "Save File");
566        assert!(result.matched);
567    }
568
569    #[test]
570    fn test_multiple_spaces_between_terms() {
571        // Multiple spaces between terms should be treated as single separator
572        let result = fuzzy_match("save   file", "Save File");
573        assert!(result.matched);
574    }
575
576    #[test]
577    fn test_real_world_command_names() {
578        // Test with real command palette patterns
579        assert!(fuzzy_match("gtd", "Go to Definition").matched);
580        assert!(fuzzy_match("ofl", "Open File").matched);
581        assert!(fuzzy_match("sas", "Save As").matched);
582        assert!(fuzzy_match("fr", "Find and Replace").matched);
583    }
584
585    #[test]
586    fn test_tab_name_patterns() {
587        // Test with typical tab/file names
588        assert!(fuzzy_match("main", "src/main.rs").matched);
589        assert!(fuzzy_match("mod", "src/input/mod.rs").matched);
590        assert!(fuzzy_match("cmdreg", "command_registry.rs").matched);
591    }
592
593    #[test]
594    fn test_exact_match_scores_highest() {
595        // "fresh" should score higher against "fresh" than against "fresh-editor"
596        let exact = fuzzy_match("fresh", "fresh");
597        let longer = fuzzy_match("fresh", "fresh-editor");
598
599        assert!(exact.matched);
600        assert!(longer.matched);
601        assert!(
602            exact.score > longer.score,
603            "exact: {}, longer: {}",
604            exact.score,
605            longer.score
606        );
607    }
608
609    #[test]
610    fn test_exact_basename_match_scores_high() {
611        // "fresh" matching "fresh-editor" should score higher than "fresh" matching "freshness"
612        let basename_match = fuzzy_match("fresh", "fresh-editor");
613        let substring_match = fuzzy_match("fresh", "freshness");
614
615        assert!(basename_match.matched);
616        assert!(substring_match.matched);
617        assert!(
618            basename_match.score > substring_match.score,
619            "basename: {}, substring: {}",
620            basename_match.score,
621            substring_match.score
622        );
623    }
624
625    #[test]
626    fn test_exact_match_with_extension() {
627        // "config" should score higher against "config.rs" than "config_manager.rs"
628        let exact_base = fuzzy_match("config", "config.rs");
629        let longer_name = fuzzy_match("config", "config_manager.rs");
630
631        assert!(exact_base.matched);
632        assert!(longer_name.matched);
633        assert!(
634            exact_base.score > longer_name.score,
635            "exact_base: {}, longer: {}",
636            exact_base.score,
637            longer_name.score
638        );
639    }
640
641    #[test]
642    fn test_multi_term_exact_target_scores_higher() {
643        // "Package: Packages" should score higher against "Package: Packages"
644        // than against "Package: Install from URL"
645        let exact = fuzzy_match("Package: Packages", "Package: Packages");
646        let partial = fuzzy_match("Package: Packages", "Package: Install from URL");
647
648        assert!(exact.matched, "exact should match");
649        assert!(partial.matched, "partial should match");
650        assert!(
651            exact.score > partial.score,
652            "exact target should score higher: exact={}, partial={}",
653            exact.score,
654            partial.score
655        );
656    }
657}