Skip to main content

fresh/input/fuzzy/
pattern.rs

1//! Query preparation and fast subsequence rejection.
2//!
3//! A [`PreparedPattern`] owns the expensive bits of a query (lowercased
4//! chars, ASCII fast-path bytes, split terms) so they are computed once
5//! per keystroke instead of once per candidate target.  The subsequence
6//! helpers then reject unmatched targets without allocating anything on
7//! the target side, leaving the heavier DP in [`super::matcher`] to run
8//! only on candidates that could actually match.
9
10/// A query pre-processed for fuzzy matching.
11///
12/// Creating a `PreparedPattern` once per keystroke and reusing it across all
13/// candidate targets avoids re-lowercasing and re-allocating the query for
14/// every file in a large corpus.  This is the primary entry point for hot
15/// paths that match one query against thousands of targets (Quick Open).
16#[derive(Debug, Clone)]
17pub struct PreparedPattern {
18    /// Individual whitespace-separated terms, pre-lowercased and indexed.
19    pub(super) terms: Vec<PreparedTerm>,
20}
21
22/// A single search term pre-processed for fast matching.
23#[derive(Debug, Clone)]
24pub(super) struct PreparedTerm {
25    /// Lowercased query term as a Vec<char> (needed for DP indexing).
26    pub(super) lower_chars: Vec<char>,
27    /// Lowercased ASCII bytes — `Some` iff the term is all-ASCII, which
28    /// unlocks byte-level fast paths for rejection.
29    pub(super) ascii_lower: Option<Vec<u8>>,
30}
31
32impl PreparedTerm {
33    fn new(term: &str) -> Self {
34        let lower = term.to_lowercase();
35        let lower_chars: Vec<char> = lower.chars().collect();
36        let ascii_lower = if lower.is_ascii() {
37            Some(lower.into_bytes())
38        } else {
39            None
40        };
41        Self {
42            lower_chars,
43            ascii_lower,
44        }
45    }
46}
47
48impl PreparedPattern {
49    /// Prepare a query for repeated fuzzy matching.
50    pub fn new(query: &str) -> Self {
51        let terms: Vec<PreparedTerm> = query.split_whitespace().map(PreparedTerm::new).collect();
52        Self { terms }
53    }
54
55    /// Returns `true` if the prepared query has no terms (empty or whitespace-only).
56    pub fn is_empty(&self) -> bool {
57        self.terms.is_empty()
58    }
59}
60
61/// Non-allocating subsequence check: does every lowercased character of the
62/// prepared term appear, in order, somewhere in `target` (case-insensitive)?
63///
64/// This runs on the hot path for every (query, target) pair.  It uses an
65/// ASCII byte-level fast path when the term and target are both ASCII
66/// (the common case for file paths), otherwise it falls back to iterating
67/// `target.chars().flat_map(|c| c.to_lowercase())` without allocating.
68pub(super) fn is_subsequence_prepared(term: &PreparedTerm, target: &str) -> bool {
69    if let Some(ref ascii_q) = term.ascii_lower {
70        if target.is_ascii() {
71            return is_subsequence_ascii(ascii_q, target.as_bytes());
72        }
73    }
74    is_subsequence_chars(&term.lower_chars, target)
75}
76
77fn is_subsequence_ascii(query_lower: &[u8], target: &[u8]) -> bool {
78    if query_lower.is_empty() {
79        return true;
80    }
81    let mut qi = 0;
82    for &b in target {
83        let lower = b.to_ascii_lowercase();
84        if lower == query_lower[qi] {
85            qi += 1;
86            if qi == query_lower.len() {
87                return true;
88            }
89        }
90    }
91    false
92}
93
94fn is_subsequence_chars(query_lower: &[char], target: &str) -> bool {
95    if query_lower.is_empty() {
96        return true;
97    }
98    let mut qi = 0;
99    for lc in target.chars().flat_map(|c| c.to_lowercase()) {
100        if lc == query_lower[qi] {
101            qi += 1;
102            if qi == query_lower.len() {
103                return true;
104            }
105        }
106    }
107    false
108}
109
110#[cfg(test)]
111mod tests {
112    use super::*;
113
114    #[test]
115    fn prepared_pattern_splits_terms() {
116        let p = PreparedPattern::new("Save File");
117        assert_eq!(p.terms.len(), 2);
118        assert_eq!(p.terms[0].lower_chars, vec!['s', 'a', 'v', 'e']);
119        assert_eq!(p.terms[1].lower_chars, vec!['f', 'i', 'l', 'e']);
120    }
121
122    #[test]
123    fn prepared_pattern_ascii_detection() {
124        let p = PreparedPattern::new("hello");
125        assert!(p.terms[0].ascii_lower.is_some());
126
127        let p = PreparedPattern::new("héllo");
128        assert!(p.terms[0].ascii_lower.is_none());
129    }
130
131    #[test]
132    fn prepared_pattern_empty_when_whitespace_only() {
133        assert!(PreparedPattern::new("").is_empty());
134        assert!(PreparedPattern::new("   ").is_empty());
135        assert!(!PreparedPattern::new("x").is_empty());
136    }
137
138    #[test]
139    fn subsequence_ascii_accepts_interleaved_matches() {
140        let t = PreparedTerm::new("sf");
141        assert!(is_subsequence_prepared(&t, "Save File"));
142        assert!(is_subsequence_prepared(&t, "SAVE FILE"));
143        assert!(!is_subsequence_prepared(&t, "only s"));
144        assert!(!is_subsequence_prepared(&t, "fs"));
145    }
146
147    #[test]
148    fn subsequence_non_ascii_target_falls_back() {
149        let t = PreparedTerm::new("hl");
150        assert!(is_subsequence_prepared(&t, "héllo"));
151    }
152
153    #[test]
154    fn subsequence_non_ascii_query() {
155        let t = PreparedTerm::new("é");
156        assert!(is_subsequence_prepared(&t, "hÉllo"));
157        assert!(!is_subsequence_prepared(&t, "hello"));
158    }
159}