Skip to main content

fresh/input/fuzzy/
mod.rs

1//! Fuzzy matching algorithm inspired by fzf.
2//!
3//! Provides substring-style fuzzy matching where query characters must appear
4//! in order in the target string, but not necessarily consecutively.  Matching
5//! is case-insensitive.
6//!
7//! # Hot-path usage
8//!
9//! For a single keystroke matched against many candidates, build a
10//! [`FuzzyMatcher`] once and reuse it.  The matcher owns both the
11//! [`PreparedPattern`] *and* two reusable `Vec<char>` scratch buffers, so
12//! after the first call neither query preparation nor target-side scratch
13//! touches the allocator:
14//!
15//! ```ignore
16//! let mut matcher = FuzzyMatcher::new(user_input);
17//! for file in files {
18//!     let result = matcher.match_target(&file.path);
19//!     // ...
20//! }
21//! ```
22//!
23//! The convenience wrapper [`fuzzy_match`] rebuilds the matcher on every
24//! call — fine for one-shot use, wasteful in a loop.
25//!
26//! The legacy [`fuzzy_match_prepared`] entry point still exists for
27//! callers that want query amortisation without scratch amortisation; it
28//! internally clones the pattern into a throwaway [`FuzzyMatcher`].
29//!
30//! # Module layout
31//!
32//! - [`pattern`] owns [`PreparedPattern`] and the non-allocating subsequence
33//!   rejection used as the hot-path gate.
34//! - [`matcher`] owns the scoring DP (with an arena-based backpointer chain
35//!   instead of cloning position vectors) and the contiguous-substring
36//!   scorer that runs in parallel with it.
37
38mod matcher;
39mod pattern;
40
41pub use matcher::FuzzyMatcher;
42pub use pattern::PreparedPattern;
43
44/// Score bonus constants for match quality ranking.
45pub(crate) mod score {
46    /// Bonus for consecutive character matches
47    pub const CONSECUTIVE: i32 = 16;
48    /// Bonus for matching at word boundary (after space, underscore, etc.)
49    pub const WORD_BOUNDARY: i32 = 32;
50    /// Bonus for matching at the start of the string
51    pub const START_OF_STRING: i32 = 48;
52    /// Bonus for matching a camelCase transition (lowercase -> uppercase)
53    pub const CAMEL_CASE: i32 = 24;
54    /// Penalty per gap between matched characters
55    pub const GAP_PENALTY: i32 = -3;
56    /// Penalty for starting a gap (first unmatched char after a match)
57    pub const GAP_START_PENALTY: i32 = -5;
58    /// Bonus for exact match (query matches entire target)
59    pub const EXACT_MATCH: i32 = 100;
60    /// Bonus for exact base name match (query matches filename without extension)
61    pub const EXACT_BASENAME_MATCH: i32 = 80;
62    /// Bonus for a contiguous substring match (all query chars are consecutive
63    /// in the target but not necessarily from position 0). This ensures that
64    /// e.g. "results" in "results.json" ranks above scattered r-e-s-u-l-t-s.
65    pub const CONTIGUOUS_SUBSTRING: i32 = 64;
66    /// Bonus for a contiguous match that begins at the start of the basename
67    /// (immediately after the last `/`, or position 0 if there is no `/`).
68    /// Stacks with the existing word-boundary bonus to give nested basename
69    /// prefix matches (e.g. "ts" -> ".../tsconfig.json") a decisive edge over
70    /// equally-contiguous matches that sit inside the basename (e.g. "ts" ->
71    /// ".../pkg.ts", where "ts" comes after the extension dot).
72    pub const BASENAME_PREFIX: i32 = 64;
73    /// Bonus for a contiguous match that begins at the start of an interior
74    /// path segment (immediately after some `/` other than the one preceding
75    /// the basename). Smaller than `BASENAME_PREFIX` because the basename is
76    /// the more common search target, but still enough to outrank arbitrary
77    /// mid-segment substring matches.
78    pub const PATH_SEGMENT_PREFIX: i32 = 32;
79}
80
81/// Result of a fuzzy match, containing match status and quality score
82#[derive(Debug, Clone, PartialEq, Eq)]
83pub struct FuzzyMatch {
84    /// Whether the query matched the target
85    pub matched: bool,
86    /// Quality score (higher is better). Only meaningful if matched is true.
87    pub score: i32,
88    /// Indices in the target string where query characters matched
89    pub match_positions: Vec<usize>,
90}
91
92impl FuzzyMatch {
93    /// Create a non-matching result
94    pub fn no_match() -> Self {
95        Self {
96            matched: false,
97            score: 0,
98            match_positions: Vec::new(),
99        }
100    }
101}
102
103impl Ord for FuzzyMatch {
104    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
105        // Non-matches are always worse than matches
106        match (self.matched, other.matched) {
107            (true, false) => std::cmp::Ordering::Greater,
108            (false, true) => std::cmp::Ordering::Less,
109            (false, false) => std::cmp::Ordering::Equal,
110            (true, true) => self.score.cmp(&other.score),
111        }
112    }
113}
114
115impl PartialOrd for FuzzyMatch {
116    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
117        Some(self.cmp(other))
118    }
119}
120
121/// Perform fzf-style fuzzy matching of a query against a target string.
122///
123/// Convenience wrapper that builds a [`PreparedPattern`] internally.  For
124/// hot paths matching one query against many targets, prefer building a
125/// [`PreparedPattern`] once and calling [`fuzzy_match_prepared`] per target.
126///
127/// Returns a `FuzzyMatch` containing:
128/// - `matched`: true if all query characters appear in order in the target
129/// - `score`: quality score based on match positions (consecutive matches, word boundaries, etc.)
130/// - `match_positions`: indices in target where each query character matched
131///
132/// The algorithm favors:
133/// - Consecutive character matches
134/// - Matches at word boundaries (after space, underscore, hyphen, or camelCase transitions)
135/// - Matches at the start of the string
136///
137/// Queries containing spaces are split into separate terms. Each term is matched
138/// independently and all terms must match for the overall match to succeed.
139///
140/// # Examples
141/// ```
142/// use fresh::input::fuzzy::fuzzy_match;
143///
144/// // Exact substring match
145/// let result = fuzzy_match("save", "Save File");
146/// assert!(result.matched);
147///
148/// // Sparse match (fzf-style)
149/// let result = fuzzy_match("sf", "Save File");
150/// assert!(result.matched);
151///
152/// // Non-matching
153/// let result = fuzzy_match("xyz", "Save File");
154/// assert!(!result.matched);
155///
156/// // Multi-term match (space-separated)
157/// let result = fuzzy_match("features groups-view", "/features/groups/groups-view.tsx");
158/// assert!(result.matched);
159/// ```
160pub fn fuzzy_match(query: &str, target: &str) -> FuzzyMatch {
161    let pattern = PreparedPattern::new(query);
162    fuzzy_match_prepared(&pattern, target)
163}
164
165/// Perform fuzzy matching using a pre-prepared pattern.
166///
167/// This is the hot-path entry point — build the [`PreparedPattern`] once
168/// and call this per target to amortise query-preparation work.
169pub fn fuzzy_match_prepared(pattern: &PreparedPattern, target: &str) -> FuzzyMatch {
170    matcher::match_prepared(pattern, target)
171}
172
173/// Filter a list of items using fuzzy matching, returning sorted results.
174///
175/// Items are sorted by match quality (best matches first).
176/// Non-matching items are excluded.
177pub fn fuzzy_filter<T, F>(query: &str, items: &[T], get_text: F) -> Vec<(usize, FuzzyMatch)>
178where
179    F: Fn(&T) -> &str,
180{
181    let pattern = PreparedPattern::new(query);
182    let mut results: Vec<(usize, FuzzyMatch)> = items
183        .iter()
184        .enumerate()
185        .map(|(idx, item)| (idx, fuzzy_match_prepared(&pattern, get_text(item))))
186        .filter(|(_, m)| m.matched)
187        .collect();
188
189    // Sort by score descending (best matches first)
190    results.sort_by(|a, b| b.1.score.cmp(&a.1.score));
191
192    results
193}
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198
199    #[test]
200    fn test_empty_query_matches_everything() {
201        let result = fuzzy_match("", "anything");
202        assert!(result.matched);
203        assert_eq!(result.score, 0);
204    }
205
206    #[test]
207    fn test_exact_match() {
208        let result = fuzzy_match("save", "save");
209        assert!(result.matched);
210        assert!(result.score > 0);
211    }
212
213    #[test]
214    fn test_case_insensitive() {
215        let result = fuzzy_match("SAVE", "save file");
216        assert!(result.matched);
217
218        let result = fuzzy_match("save", "SAVE FILE");
219        assert!(result.matched);
220    }
221
222    #[test]
223    fn test_substring_match() {
224        let result = fuzzy_match("file", "Save File");
225        assert!(result.matched);
226    }
227
228    #[test]
229    fn test_sparse_match() {
230        let result = fuzzy_match("sf", "Save File");
231        assert!(result.matched);
232        assert_eq!(result.match_positions.len(), 2);
233    }
234
235    #[test]
236    fn test_no_match() {
237        let result = fuzzy_match("xyz", "Save File");
238        assert!(!result.matched);
239    }
240
241    #[test]
242    fn test_query_longer_than_target() {
243        let result = fuzzy_match("very long query", "short");
244        assert!(!result.matched);
245    }
246
247    #[test]
248    fn test_consecutive_matches_score_higher() {
249        // Use examples without word boundary interference
250        let result_consecutive = fuzzy_match("ab", "xabc");
251        let result_sparse = fuzzy_match("ab", "xaxb");
252        assert!(result_consecutive.matched);
253        assert!(result_sparse.matched);
254        assert!(
255            result_consecutive.score > result_sparse.score,
256            "consecutive: {}, sparse: {}",
257            result_consecutive.score,
258            result_sparse.score
259        );
260    }
261
262    #[test]
263    fn test_word_boundary_scores_higher() {
264        let result_boundary = fuzzy_match("sf", "Save File");
265        let result_middle = fuzzy_match("af", "Save File");
266        assert!(result_boundary.matched);
267        assert!(result_middle.matched);
268        assert!(
269            result_boundary.score > result_middle.score,
270            "boundary: {}, middle: {}",
271            result_boundary.score,
272            result_middle.score
273        );
274    }
275
276    #[test]
277    fn test_start_of_string_scores_higher() {
278        let result_start = fuzzy_match("s", "Save File");
279        let result_middle = fuzzy_match("a", "Save File");
280        assert!(result_start.matched);
281        assert!(result_middle.matched);
282        assert!(
283            result_start.score > result_middle.score,
284            "start: {}, middle: {}",
285            result_start.score,
286            result_middle.score
287        );
288    }
289
290    #[test]
291    fn test_camel_case_boundary() {
292        let result = fuzzy_match("sf", "saveFile");
293        assert!(result.matched);
294        // 'F' is at a camelCase boundary
295        assert!(result.score > 0);
296    }
297
298    #[test]
299    fn test_fuzzy_filter() {
300        let items = vec!["Save File", "Open File", "Save As", "Quit"];
301        let results = fuzzy_filter("sf", &items, |s| s);
302
303        assert!(!results.is_empty());
304        // "Save File" should match
305        let matched_texts: Vec<&str> = results.iter().map(|(idx, _)| items[*idx]).collect();
306        assert!(matched_texts.contains(&"Save File"));
307    }
308
309    #[test]
310    fn test_match_positions_are_correct() {
311        let result = fuzzy_match("sf", "Save File");
312        assert!(result.matched);
313        assert_eq!(result.match_positions.len(), 2);
314        assert_eq!(result.match_positions[0], 0); // 'S' in "Save"
315        assert_eq!(result.match_positions[1], 5); // 'F' in "File"
316    }
317
318    #[test]
319    fn test_fuzzy_ordering() {
320        // Better match should have higher score
321        let match1 = FuzzyMatch {
322            matched: true,
323            score: 100,
324            match_positions: vec![],
325        };
326        let match2 = FuzzyMatch {
327            matched: true,
328            score: 50,
329            match_positions: vec![],
330        };
331        let no_match = FuzzyMatch::no_match();
332
333        assert!(match1 > match2);
334        assert!(match2 > no_match);
335        assert!(match1 > no_match);
336    }
337
338    #[test]
339    fn test_out_of_order_no_match() {
340        // Characters must appear in order
341        let result = fuzzy_match("fs", "Save File");
342        assert!(!result.matched);
343    }
344
345    #[test]
346    fn test_multi_term_query_with_spaces() {
347        // Each term should be matched independently
348        let result = fuzzy_match("features groups-view", "/features/groups/groups-view.tsx");
349        assert!(result.matched);
350    }
351
352    #[test]
353    fn test_multi_term_query_partial_match_fails() {
354        // If any term doesn't match, the whole query fails
355        let result = fuzzy_match("features nonexistent", "/features/groups/groups-view.tsx");
356        assert!(!result.matched);
357    }
358
359    #[test]
360    fn test_multi_term_query_all_must_match() {
361        // All terms must match
362        let result = fuzzy_match("src main rs", "src/main.rs");
363        assert!(result.matched);
364
365        let result = fuzzy_match("src xyz", "src/main.rs");
366        assert!(!result.matched);
367    }
368
369    #[test]
370    fn test_multi_term_combines_scores() {
371        // Multi-term match should combine scores from each term
372        let result = fuzzy_match("save file", "Save File");
373        assert!(result.matched);
374        assert!(result.score > 0);
375    }
376
377    #[test]
378    fn test_leading_trailing_spaces_ignored() {
379        // Leading/trailing whitespace should be ignored
380        let result = fuzzy_match("  save  ", "Save File");
381        assert!(result.matched);
382    }
383
384    #[test]
385    fn test_multiple_spaces_between_terms() {
386        // Multiple spaces between terms should be treated as single separator
387        let result = fuzzy_match("save   file", "Save File");
388        assert!(result.matched);
389    }
390
391    #[test]
392    fn test_real_world_command_names() {
393        // Test with real command palette patterns
394        assert!(fuzzy_match("gtd", "Go to Definition").matched);
395        assert!(fuzzy_match("ofl", "Open File").matched);
396        assert!(fuzzy_match("sas", "Save As").matched);
397        assert!(fuzzy_match("fr", "Find and Replace").matched);
398    }
399
400    #[test]
401    fn test_tab_name_patterns() {
402        // Test with typical tab/file names
403        assert!(fuzzy_match("main", "src/main.rs").matched);
404        assert!(fuzzy_match("mod", "src/input/mod.rs").matched);
405        assert!(fuzzy_match("cmdreg", "command_registry.rs").matched);
406    }
407
408    #[test]
409    fn test_exact_match_scores_highest() {
410        // "fresh" should score higher against "fresh" than against "fresh-editor"
411        let exact = fuzzy_match("fresh", "fresh");
412        let longer = fuzzy_match("fresh", "fresh-editor");
413
414        assert!(exact.matched);
415        assert!(longer.matched);
416        assert!(
417            exact.score > longer.score,
418            "exact: {}, longer: {}",
419            exact.score,
420            longer.score
421        );
422    }
423
424    #[test]
425    fn test_exact_basename_match_scores_high() {
426        // "fresh" matching "fresh-editor" should score higher than "fresh" matching "freshness"
427        let basename_match = fuzzy_match("fresh", "fresh-editor");
428        let substring_match = fuzzy_match("fresh", "freshness");
429
430        assert!(basename_match.matched);
431        assert!(substring_match.matched);
432        assert!(
433            basename_match.score > substring_match.score,
434            "basename: {}, substring: {}",
435            basename_match.score,
436            substring_match.score
437        );
438    }
439
440    #[test]
441    fn test_exact_match_with_extension() {
442        // "config" should score higher against "config.rs" than "config_manager.rs"
443        let exact_base = fuzzy_match("config", "config.rs");
444        let longer_name = fuzzy_match("config", "config_manager.rs");
445
446        assert!(exact_base.matched);
447        assert!(longer_name.matched);
448        assert!(
449            exact_base.score > longer_name.score,
450            "exact_base: {}, longer: {}",
451            exact_base.score,
452            longer_name.score
453        );
454    }
455
456    #[test]
457    fn test_multi_term_exact_target_scores_higher() {
458        // "Package: Packages" should score higher against "Package: Packages"
459        // than against "Package: Install from URL"
460        let exact = fuzzy_match("Package: Packages", "Package: Packages");
461        let partial = fuzzy_match("Package: Packages", "Package: Install from URL");
462
463        assert!(exact.matched, "exact should match");
464        assert!(partial.matched, "partial should match");
465        assert!(
466            exact.score > partial.score,
467            "exact target should score higher: exact={}, partial={}",
468            exact.score,
469            partial.score
470        );
471    }
472
473    #[test]
474    fn test_basename_prefix_beats_intra_segment_match() {
475        // Typing "ts" should rank `tsconfig.json` (basename starts with the
476        // prefix) above unrelated files whose basename merely *contains*
477        // "ts" as a contiguous substring after the extension dot
478        // (e.g. `pkg.ts`, `finder.ts`).  Without the basename-prefix bonus,
479        // both score identically because each match earns the same
480        // word-boundary + contiguous bonuses.
481        let prefix = fuzzy_match("ts", "crates/fresh-editor/plugins/tsconfig.json");
482        for distractor in &[
483            "crates/fresh-editor/plugins/pkg.ts",
484            "crates/fresh-editor/plugins/lib/finder.ts",
485            "crates/fresh-editor/plugins/lib/index.ts",
486            "crates/fresh-editor/plugins/diagnostics_panel.ts",
487        ] {
488            let other = fuzzy_match("ts", distractor);
489            assert!(prefix.matched && other.matched);
490            assert!(
491                prefix.score > other.score,
492                "tsconfig.json ({}) must outrank {} ({})",
493                prefix.score,
494                distractor,
495                other.score
496            );
497        }
498    }
499
500    #[test]
501    fn test_directory_segment_prefix_beats_intra_segment_match() {
502        // A directory segment that starts with the prefix should also
503        // outrank a mid-segment match, even when it isn't the basename.
504        let dir_prefix = fuzzy_match("ts", "crates/ts-parser/src/lib.rs");
505        let intra = fuzzy_match("ts", "crates/fresh-editor/plugins/pkg.ts");
506        assert!(dir_prefix.matched && intra.matched);
507        assert!(
508            dir_prefix.score > intra.score,
509            "ts-parser/... ({}) must outrank pkg.ts ({})",
510            dir_prefix.score,
511            intra.score
512        );
513    }
514
515    #[test]
516    fn test_basename_prefix_outranks_directory_prefix() {
517        // When both a basename and an interior directory start with the
518        // same prefix, the basename should win — typing "ts" is far more
519        // often a search for a file named tsconfig.json than for a file
520        // *inside* a directory whose name happens to start with "ts".
521        let basename = fuzzy_match("ts", "crates/fresh-editor/plugins/tsconfig.json");
522        let dir = fuzzy_match("ts", "crates/ts-parser/src/lib.rs");
523        assert!(basename.matched && dir.matched);
524        assert!(
525            basename.score > dir.score,
526            "basename-prefix tsconfig.json ({}) must outrank dir-prefix ts-parser/... ({})",
527            basename.score,
528            dir.score
529        );
530    }
531
532    #[test]
533    fn test_contiguous_substring_beats_scattered() {
534        // "results" as a contiguous substring in the path should rank above
535        // scattered r-e-s-u-l-t-s across different path components
536        let contiguous = fuzzy_match("results", "repos/editor-benchmark/results.json");
537        let scattered = fuzzy_match("results", "repos/quicklsp/LSP_TEST_REPORT.md");
538
539        assert!(contiguous.matched);
540        assert!(scattered.matched);
541        assert!(
542            contiguous.score > scattered.score,
543            "contiguous ({}) should beat scattered ({})",
544            contiguous.score,
545            scattered.score
546        );
547    }
548
549    #[test]
550    fn test_multi_term_joined_by_path_separator_ranks_above_scattered() {
551        // When the user types "etc hosts" (two terms), a target that
552        // reconstructs the query with a common path separator between
553        // the terms (e.g. "/etc/hosts") must rank higher than a target
554        // where each term matches individually but scattered across
555        // unrelated path components.
556        let joined = fuzzy_match("etc hosts", "/etc/hosts");
557        let scattered = fuzzy_match("etc hosts", "some/etc/deeply/nested/host_tests/foo.rs");
558
559        assert!(joined.matched);
560        assert!(scattered.matched);
561        assert!(
562            joined.score > scattered.score,
563            "joined /etc/hosts ({}) should outrank scattered ({})",
564            joined.score,
565            scattered.score
566        );
567    }
568
569    #[test]
570    fn test_multi_term_joined_by_underscore_ranks_above_scattered() {
571        // Same idea with an underscore separator: "save file" → "save_file.rs".
572        let joined = fuzzy_match("save file", "src/utils/save_file.rs");
573        let scattered = fuzzy_match("save file", "src/storage/savepoint/filetree_handler.rs");
574
575        assert!(joined.matched);
576        assert!(scattered.matched);
577        assert!(
578            joined.score > scattered.score,
579            "joined save_file.rs ({}) should outrank scattered ({})",
580            joined.score,
581            scattered.score
582        );
583    }
584
585    #[test]
586    fn test_multi_term_joined_by_arbitrary_chars_ranks_above_scattered() {
587        // The tight-span bonus is not specific to path separators:
588        // "etc hosts" should rank a target like "etcmohosts" (two chars
589        // between the terms, no separator at all) above a target where
590        // the individual characters e-t-c-h-o-s-t-s are scattered with
591        // big gaps, even though both targets satisfy the per-term
592        // subsequence check.
593        let tight = fuzzy_match("etc hosts", "etcmohosts");
594        let scattered = fuzzy_match("etc hosts", "eblatblacblahblaoblasblatblas");
595
596        assert!(tight.matched);
597        assert!(scattered.matched);
598        assert!(
599            tight.score > scattered.score,
600            "etcmohosts ({}) should outrank scattered ({})",
601            tight.score,
602            scattered.score
603        );
604    }
605
606    #[test]
607    fn test_multi_term_camel_case_joined_ranks_above_scattered() {
608        // "save file" → "saveFile" (zero characters between, just a
609        // camelCase transition) should get the tight-span bonus too.
610        let camel = fuzzy_match("save file", "saveFile.rs");
611        let scattered = fuzzy_match("save file", "savepoint_filetree_handler.rs");
612
613        assert!(camel.matched);
614        assert!(scattered.matched);
615        assert!(
616            camel.score > scattered.score,
617            "saveFile.rs ({}) should outrank scattered ({})",
618            camel.score,
619            scattered.score
620        );
621    }
622
623    #[test]
624    fn test_amortized_apis_equivalent_to_oneshot() {
625        // Both amortized entry points (`fuzzy_match_prepared` borrowing a
626        // pre-built `PreparedPattern`, and `FuzzyMatcher` reusing scratch
627        // across calls) must produce identical results to the one-shot
628        // `fuzzy_match` wrapper for every (query, target) pair.
629        //
630        // The target list is arranged to cover:
631        //   - long target first, then shorter ones (scratch-growth /
632        //     stale-tail check — if `FuzzyMatcher` didn't clear its
633        //     scratch correctly, a subsequent shorter target would see
634        //     leftover chars and diverge from the one-shot).
635        //   - matches interleaved with rejections (rejection path must
636        //     not corrupt scratch for the next accepting call).
637        //   - an empty target (edge case).
638        let queries = ["main", "config", "results", "sf", "save file"];
639        let targets = [
640            "a/very/long/path/to/some/nested/src/main.rs", // warm scratch large
641            "src/main.rs",                                 // shorter, still matches "main"
642            "src/app/config.rs",                           // rejects "main", matches "config"
643            "repos/editor-benchmark/results.json",         // matches "results"
644            "Save File",                                   // matches "sf" / "save file"
645            "nomatchatall",                                // rejects most queries
646            "README.md",
647            "",
648        ];
649
650        for query in queries {
651            let pattern = PreparedPattern::new(query);
652            let mut matcher = FuzzyMatcher::new(query);
653            for target in targets {
654                let oneshot = fuzzy_match(query, target);
655                let prepared = fuzzy_match_prepared(&pattern, target);
656                let reused = matcher.match_target(target);
657                assert_eq!(
658                    oneshot, prepared,
659                    "fuzzy_match_prepared mismatch for query={:?} target={:?}",
660                    query, target
661                );
662                assert_eq!(
663                    oneshot, reused,
664                    "FuzzyMatcher reuse mismatch for query={:?} target={:?}",
665                    query, target
666                );
667            }
668        }
669    }
670}