Skip to main content

fresh/input/fuzzy/
mod.rs

1//! Fuzzy matching algorithm inspired by fzf.
2//!
3//! Provides substring-style fuzzy matching where query characters must appear
4//! in order in the target string, but not necessarily consecutively.  Matching
5//! is case-insensitive.
6//!
7//! # Hot-path usage
8//!
9//! For a single keystroke matched against many candidates, build a
10//! [`FuzzyMatcher`] once and reuse it.  The matcher owns both the
11//! [`PreparedPattern`] *and* two reusable `Vec<char>` scratch buffers, so
12//! after the first call neither query preparation nor target-side scratch
13//! touches the allocator:
14//!
15//! ```ignore
16//! let mut matcher = FuzzyMatcher::new(user_input);
17//! for file in files {
18//!     let result = matcher.match_target(&file.path);
19//!     // ...
20//! }
21//! ```
22//!
23//! The convenience wrapper [`fuzzy_match`] rebuilds the matcher on every
24//! call — fine for one-shot use, wasteful in a loop.
25//!
26//! The legacy [`fuzzy_match_prepared`] entry point still exists for
27//! callers that want query amortisation without scratch amortisation; it
28//! internally clones the pattern into a throwaway [`FuzzyMatcher`].
29//!
30//! # Module layout
31//!
32//! - [`pattern`] owns [`PreparedPattern`] and the non-allocating subsequence
33//!   rejection used as the hot-path gate.
34//! - [`matcher`] owns the scoring DP (with an arena-based backpointer chain
35//!   instead of cloning position vectors) and the contiguous-substring
36//!   scorer that runs in parallel with it.
37
38mod matcher;
39mod pattern;
40
41pub use matcher::FuzzyMatcher;
42pub use pattern::PreparedPattern;
43
44/// Score bonus constants for match quality ranking.
45pub(crate) mod score {
46    /// Bonus for consecutive character matches
47    pub const CONSECUTIVE: i32 = 16;
48    /// Bonus for matching at word boundary (after space, underscore, etc.)
49    pub const WORD_BOUNDARY: i32 = 32;
50    /// Bonus for matching at the start of the string
51    pub const START_OF_STRING: i32 = 48;
52    /// Bonus for matching a camelCase transition (lowercase -> uppercase)
53    pub const CAMEL_CASE: i32 = 24;
54    /// Penalty per gap between matched characters
55    pub const GAP_PENALTY: i32 = -3;
56    /// Penalty for starting a gap (first unmatched char after a match)
57    pub const GAP_START_PENALTY: i32 = -5;
58    /// Bonus for exact match (query matches entire target)
59    pub const EXACT_MATCH: i32 = 100;
60    /// Bonus for exact base name match (query matches filename without extension)
61    pub const EXACT_BASENAME_MATCH: i32 = 80;
62    /// Bonus for a contiguous substring match (all query chars are consecutive
63    /// in the target but not necessarily from position 0). This ensures that
64    /// e.g. "results" in "results.json" ranks above scattered r-e-s-u-l-t-s.
65    pub const CONTIGUOUS_SUBSTRING: i32 = 64;
66}
67
68/// Result of a fuzzy match, containing match status and quality score
69#[derive(Debug, Clone, PartialEq, Eq)]
70pub struct FuzzyMatch {
71    /// Whether the query matched the target
72    pub matched: bool,
73    /// Quality score (higher is better). Only meaningful if matched is true.
74    pub score: i32,
75    /// Indices in the target string where query characters matched
76    pub match_positions: Vec<usize>,
77}
78
79impl FuzzyMatch {
80    /// Create a non-matching result
81    pub fn no_match() -> Self {
82        Self {
83            matched: false,
84            score: 0,
85            match_positions: Vec::new(),
86        }
87    }
88}
89
90impl Ord for FuzzyMatch {
91    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
92        // Non-matches are always worse than matches
93        match (self.matched, other.matched) {
94            (true, false) => std::cmp::Ordering::Greater,
95            (false, true) => std::cmp::Ordering::Less,
96            (false, false) => std::cmp::Ordering::Equal,
97            (true, true) => self.score.cmp(&other.score),
98        }
99    }
100}
101
102impl PartialOrd for FuzzyMatch {
103    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
104        Some(self.cmp(other))
105    }
106}
107
108/// Perform fzf-style fuzzy matching of a query against a target string.
109///
110/// Convenience wrapper that builds a [`PreparedPattern`] internally.  For
111/// hot paths matching one query against many targets, prefer building a
112/// [`PreparedPattern`] once and calling [`fuzzy_match_prepared`] per target.
113///
114/// Returns a `FuzzyMatch` containing:
115/// - `matched`: true if all query characters appear in order in the target
116/// - `score`: quality score based on match positions (consecutive matches, word boundaries, etc.)
117/// - `match_positions`: indices in target where each query character matched
118///
119/// The algorithm favors:
120/// - Consecutive character matches
121/// - Matches at word boundaries (after space, underscore, hyphen, or camelCase transitions)
122/// - Matches at the start of the string
123///
124/// Queries containing spaces are split into separate terms. Each term is matched
125/// independently and all terms must match for the overall match to succeed.
126///
127/// # Examples
128/// ```
129/// use fresh::input::fuzzy::fuzzy_match;
130///
131/// // Exact substring match
132/// let result = fuzzy_match("save", "Save File");
133/// assert!(result.matched);
134///
135/// // Sparse match (fzf-style)
136/// let result = fuzzy_match("sf", "Save File");
137/// assert!(result.matched);
138///
139/// // Non-matching
140/// let result = fuzzy_match("xyz", "Save File");
141/// assert!(!result.matched);
142///
143/// // Multi-term match (space-separated)
144/// let result = fuzzy_match("features groups-view", "/features/groups/groups-view.tsx");
145/// assert!(result.matched);
146/// ```
147pub fn fuzzy_match(query: &str, target: &str) -> FuzzyMatch {
148    let pattern = PreparedPattern::new(query);
149    fuzzy_match_prepared(&pattern, target)
150}
151
152/// Perform fuzzy matching using a pre-prepared pattern.
153///
154/// This is the hot-path entry point — build the [`PreparedPattern`] once
155/// and call this per target to amortise query-preparation work.
156pub fn fuzzy_match_prepared(pattern: &PreparedPattern, target: &str) -> FuzzyMatch {
157    matcher::match_prepared(pattern, target)
158}
159
160/// Filter a list of items using fuzzy matching, returning sorted results.
161///
162/// Items are sorted by match quality (best matches first).
163/// Non-matching items are excluded.
164pub fn fuzzy_filter<T, F>(query: &str, items: &[T], get_text: F) -> Vec<(usize, FuzzyMatch)>
165where
166    F: Fn(&T) -> &str,
167{
168    let pattern = PreparedPattern::new(query);
169    let mut results: Vec<(usize, FuzzyMatch)> = items
170        .iter()
171        .enumerate()
172        .map(|(idx, item)| (idx, fuzzy_match_prepared(&pattern, get_text(item))))
173        .filter(|(_, m)| m.matched)
174        .collect();
175
176    // Sort by score descending (best matches first)
177    results.sort_by(|a, b| b.1.score.cmp(&a.1.score));
178
179    results
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn test_empty_query_matches_everything() {
188        let result = fuzzy_match("", "anything");
189        assert!(result.matched);
190        assert_eq!(result.score, 0);
191    }
192
193    #[test]
194    fn test_exact_match() {
195        let result = fuzzy_match("save", "save");
196        assert!(result.matched);
197        assert!(result.score > 0);
198    }
199
200    #[test]
201    fn test_case_insensitive() {
202        let result = fuzzy_match("SAVE", "save file");
203        assert!(result.matched);
204
205        let result = fuzzy_match("save", "SAVE FILE");
206        assert!(result.matched);
207    }
208
209    #[test]
210    fn test_substring_match() {
211        let result = fuzzy_match("file", "Save File");
212        assert!(result.matched);
213    }
214
215    #[test]
216    fn test_sparse_match() {
217        let result = fuzzy_match("sf", "Save File");
218        assert!(result.matched);
219        assert_eq!(result.match_positions.len(), 2);
220    }
221
222    #[test]
223    fn test_no_match() {
224        let result = fuzzy_match("xyz", "Save File");
225        assert!(!result.matched);
226    }
227
228    #[test]
229    fn test_query_longer_than_target() {
230        let result = fuzzy_match("very long query", "short");
231        assert!(!result.matched);
232    }
233
234    #[test]
235    fn test_consecutive_matches_score_higher() {
236        // Use examples without word boundary interference
237        let result_consecutive = fuzzy_match("ab", "xabc");
238        let result_sparse = fuzzy_match("ab", "xaxb");
239        assert!(result_consecutive.matched);
240        assert!(result_sparse.matched);
241        assert!(
242            result_consecutive.score > result_sparse.score,
243            "consecutive: {}, sparse: {}",
244            result_consecutive.score,
245            result_sparse.score
246        );
247    }
248
249    #[test]
250    fn test_word_boundary_scores_higher() {
251        let result_boundary = fuzzy_match("sf", "Save File");
252        let result_middle = fuzzy_match("af", "Save File");
253        assert!(result_boundary.matched);
254        assert!(result_middle.matched);
255        assert!(
256            result_boundary.score > result_middle.score,
257            "boundary: {}, middle: {}",
258            result_boundary.score,
259            result_middle.score
260        );
261    }
262
263    #[test]
264    fn test_start_of_string_scores_higher() {
265        let result_start = fuzzy_match("s", "Save File");
266        let result_middle = fuzzy_match("a", "Save File");
267        assert!(result_start.matched);
268        assert!(result_middle.matched);
269        assert!(
270            result_start.score > result_middle.score,
271            "start: {}, middle: {}",
272            result_start.score,
273            result_middle.score
274        );
275    }
276
277    #[test]
278    fn test_camel_case_boundary() {
279        let result = fuzzy_match("sf", "saveFile");
280        assert!(result.matched);
281        // 'F' is at a camelCase boundary
282        assert!(result.score > 0);
283    }
284
285    #[test]
286    fn test_fuzzy_filter() {
287        let items = vec!["Save File", "Open File", "Save As", "Quit"];
288        let results = fuzzy_filter("sf", &items, |s| s);
289
290        assert!(!results.is_empty());
291        // "Save File" should match
292        let matched_texts: Vec<&str> = results.iter().map(|(idx, _)| items[*idx]).collect();
293        assert!(matched_texts.contains(&"Save File"));
294    }
295
296    #[test]
297    fn test_match_positions_are_correct() {
298        let result = fuzzy_match("sf", "Save File");
299        assert!(result.matched);
300        assert_eq!(result.match_positions.len(), 2);
301        assert_eq!(result.match_positions[0], 0); // 'S' in "Save"
302        assert_eq!(result.match_positions[1], 5); // 'F' in "File"
303    }
304
305    #[test]
306    fn test_fuzzy_ordering() {
307        // Better match should have higher score
308        let match1 = FuzzyMatch {
309            matched: true,
310            score: 100,
311            match_positions: vec![],
312        };
313        let match2 = FuzzyMatch {
314            matched: true,
315            score: 50,
316            match_positions: vec![],
317        };
318        let no_match = FuzzyMatch::no_match();
319
320        assert!(match1 > match2);
321        assert!(match2 > no_match);
322        assert!(match1 > no_match);
323    }
324
325    #[test]
326    fn test_out_of_order_no_match() {
327        // Characters must appear in order
328        let result = fuzzy_match("fs", "Save File");
329        assert!(!result.matched);
330    }
331
332    #[test]
333    fn test_multi_term_query_with_spaces() {
334        // Each term should be matched independently
335        let result = fuzzy_match("features groups-view", "/features/groups/groups-view.tsx");
336        assert!(result.matched);
337    }
338
339    #[test]
340    fn test_multi_term_query_partial_match_fails() {
341        // If any term doesn't match, the whole query fails
342        let result = fuzzy_match("features nonexistent", "/features/groups/groups-view.tsx");
343        assert!(!result.matched);
344    }
345
346    #[test]
347    fn test_multi_term_query_all_must_match() {
348        // All terms must match
349        let result = fuzzy_match("src main rs", "src/main.rs");
350        assert!(result.matched);
351
352        let result = fuzzy_match("src xyz", "src/main.rs");
353        assert!(!result.matched);
354    }
355
356    #[test]
357    fn test_multi_term_combines_scores() {
358        // Multi-term match should combine scores from each term
359        let result = fuzzy_match("save file", "Save File");
360        assert!(result.matched);
361        assert!(result.score > 0);
362    }
363
364    #[test]
365    fn test_leading_trailing_spaces_ignored() {
366        // Leading/trailing whitespace should be ignored
367        let result = fuzzy_match("  save  ", "Save File");
368        assert!(result.matched);
369    }
370
371    #[test]
372    fn test_multiple_spaces_between_terms() {
373        // Multiple spaces between terms should be treated as single separator
374        let result = fuzzy_match("save   file", "Save File");
375        assert!(result.matched);
376    }
377
378    #[test]
379    fn test_real_world_command_names() {
380        // Test with real command palette patterns
381        assert!(fuzzy_match("gtd", "Go to Definition").matched);
382        assert!(fuzzy_match("ofl", "Open File").matched);
383        assert!(fuzzy_match("sas", "Save As").matched);
384        assert!(fuzzy_match("fr", "Find and Replace").matched);
385    }
386
387    #[test]
388    fn test_tab_name_patterns() {
389        // Test with typical tab/file names
390        assert!(fuzzy_match("main", "src/main.rs").matched);
391        assert!(fuzzy_match("mod", "src/input/mod.rs").matched);
392        assert!(fuzzy_match("cmdreg", "command_registry.rs").matched);
393    }
394
395    #[test]
396    fn test_exact_match_scores_highest() {
397        // "fresh" should score higher against "fresh" than against "fresh-editor"
398        let exact = fuzzy_match("fresh", "fresh");
399        let longer = fuzzy_match("fresh", "fresh-editor");
400
401        assert!(exact.matched);
402        assert!(longer.matched);
403        assert!(
404            exact.score > longer.score,
405            "exact: {}, longer: {}",
406            exact.score,
407            longer.score
408        );
409    }
410
411    #[test]
412    fn test_exact_basename_match_scores_high() {
413        // "fresh" matching "fresh-editor" should score higher than "fresh" matching "freshness"
414        let basename_match = fuzzy_match("fresh", "fresh-editor");
415        let substring_match = fuzzy_match("fresh", "freshness");
416
417        assert!(basename_match.matched);
418        assert!(substring_match.matched);
419        assert!(
420            basename_match.score > substring_match.score,
421            "basename: {}, substring: {}",
422            basename_match.score,
423            substring_match.score
424        );
425    }
426
427    #[test]
428    fn test_exact_match_with_extension() {
429        // "config" should score higher against "config.rs" than "config_manager.rs"
430        let exact_base = fuzzy_match("config", "config.rs");
431        let longer_name = fuzzy_match("config", "config_manager.rs");
432
433        assert!(exact_base.matched);
434        assert!(longer_name.matched);
435        assert!(
436            exact_base.score > longer_name.score,
437            "exact_base: {}, longer: {}",
438            exact_base.score,
439            longer_name.score
440        );
441    }
442
443    #[test]
444    fn test_multi_term_exact_target_scores_higher() {
445        // "Package: Packages" should score higher against "Package: Packages"
446        // than against "Package: Install from URL"
447        let exact = fuzzy_match("Package: Packages", "Package: Packages");
448        let partial = fuzzy_match("Package: Packages", "Package: Install from URL");
449
450        assert!(exact.matched, "exact should match");
451        assert!(partial.matched, "partial should match");
452        assert!(
453            exact.score > partial.score,
454            "exact target should score higher: exact={}, partial={}",
455            exact.score,
456            partial.score
457        );
458    }
459
460    #[test]
461    fn test_contiguous_substring_beats_scattered() {
462        // "results" as a contiguous substring in the path should rank above
463        // scattered r-e-s-u-l-t-s across different path components
464        let contiguous = fuzzy_match("results", "repos/editor-benchmark/results.json");
465        let scattered = fuzzy_match("results", "repos/quicklsp/LSP_TEST_REPORT.md");
466
467        assert!(contiguous.matched);
468        assert!(scattered.matched);
469        assert!(
470            contiguous.score > scattered.score,
471            "contiguous ({}) should beat scattered ({})",
472            contiguous.score,
473            scattered.score
474        );
475    }
476
477    #[test]
478    fn test_multi_term_joined_by_path_separator_ranks_above_scattered() {
479        // When the user types "etc hosts" (two terms), a target that
480        // reconstructs the query with a common path separator between
481        // the terms (e.g. "/etc/hosts") must rank higher than a target
482        // where each term matches individually but scattered across
483        // unrelated path components.
484        let joined = fuzzy_match("etc hosts", "/etc/hosts");
485        let scattered = fuzzy_match("etc hosts", "some/etc/deeply/nested/host_tests/foo.rs");
486
487        assert!(joined.matched);
488        assert!(scattered.matched);
489        assert!(
490            joined.score > scattered.score,
491            "joined /etc/hosts ({}) should outrank scattered ({})",
492            joined.score,
493            scattered.score
494        );
495    }
496
497    #[test]
498    fn test_multi_term_joined_by_underscore_ranks_above_scattered() {
499        // Same idea with an underscore separator: "save file" → "save_file.rs".
500        let joined = fuzzy_match("save file", "src/utils/save_file.rs");
501        let scattered = fuzzy_match("save file", "src/storage/savepoint/filetree_handler.rs");
502
503        assert!(joined.matched);
504        assert!(scattered.matched);
505        assert!(
506            joined.score > scattered.score,
507            "joined save_file.rs ({}) should outrank scattered ({})",
508            joined.score,
509            scattered.score
510        );
511    }
512
513    #[test]
514    fn test_multi_term_joined_by_arbitrary_chars_ranks_above_scattered() {
515        // The tight-span bonus is not specific to path separators:
516        // "etc hosts" should rank a target like "etcmohosts" (two chars
517        // between the terms, no separator at all) above a target where
518        // the individual characters e-t-c-h-o-s-t-s are scattered with
519        // big gaps, even though both targets satisfy the per-term
520        // subsequence check.
521        let tight = fuzzy_match("etc hosts", "etcmohosts");
522        let scattered = fuzzy_match("etc hosts", "eblatblacblahblaoblasblatblas");
523
524        assert!(tight.matched);
525        assert!(scattered.matched);
526        assert!(
527            tight.score > scattered.score,
528            "etcmohosts ({}) should outrank scattered ({})",
529            tight.score,
530            scattered.score
531        );
532    }
533
534    #[test]
535    fn test_multi_term_camel_case_joined_ranks_above_scattered() {
536        // "save file" → "saveFile" (zero characters between, just a
537        // camelCase transition) should get the tight-span bonus too.
538        let camel = fuzzy_match("save file", "saveFile.rs");
539        let scattered = fuzzy_match("save file", "savepoint_filetree_handler.rs");
540
541        assert!(camel.matched);
542        assert!(scattered.matched);
543        assert!(
544            camel.score > scattered.score,
545            "saveFile.rs ({}) should outrank scattered ({})",
546            camel.score,
547            scattered.score
548        );
549    }
550
551    #[test]
552    fn test_amortized_apis_equivalent_to_oneshot() {
553        // Both amortized entry points (`fuzzy_match_prepared` borrowing a
554        // pre-built `PreparedPattern`, and `FuzzyMatcher` reusing scratch
555        // across calls) must produce identical results to the one-shot
556        // `fuzzy_match` wrapper for every (query, target) pair.
557        //
558        // The target list is arranged to cover:
559        //   - long target first, then shorter ones (scratch-growth /
560        //     stale-tail check — if `FuzzyMatcher` didn't clear its
561        //     scratch correctly, a subsequent shorter target would see
562        //     leftover chars and diverge from the one-shot).
563        //   - matches interleaved with rejections (rejection path must
564        //     not corrupt scratch for the next accepting call).
565        //   - an empty target (edge case).
566        let queries = ["main", "config", "results", "sf", "save file"];
567        let targets = [
568            "a/very/long/path/to/some/nested/src/main.rs", // warm scratch large
569            "src/main.rs",                                 // shorter, still matches "main"
570            "src/app/config.rs",                           // rejects "main", matches "config"
571            "repos/editor-benchmark/results.json",         // matches "results"
572            "Save File",                                   // matches "sf" / "save file"
573            "nomatchatall",                                // rejects most queries
574            "README.md",
575            "",
576        ];
577
578        for query in queries {
579            let pattern = PreparedPattern::new(query);
580            let mut matcher = FuzzyMatcher::new(query);
581            for target in targets {
582                let oneshot = fuzzy_match(query, target);
583                let prepared = fuzzy_match_prepared(&pattern, target);
584                let reused = matcher.match_target(target);
585                assert_eq!(
586                    oneshot, prepared,
587                    "fuzzy_match_prepared mismatch for query={:?} target={:?}",
588                    query, target
589                );
590                assert_eq!(
591                    oneshot, reused,
592                    "FuzzyMatcher reuse mismatch for query={:?} target={:?}",
593                    query, target
594                );
595            }
596        }
597    }
598}