Skip to main content

redox_core/
fuzzy.rs

1use std::cmp::Ordering;
2use std::ops::Range;
3use std::path::Path;
4
5#[derive(Debug, Clone)]
6pub struct FuzzyQuery {
7    tokens: Vec<String>,
8}
9
10impl FuzzyQuery {
11    pub fn new(query: &str) -> Self {
12        Self {
13            tokens: query
14                .split_whitespace()
15                .filter(|token| !token.is_empty())
16                .map(|token| token.to_lowercase())
17                .collect(),
18        }
19    }
20
21    pub fn is_empty(&self) -> bool {
22        self.tokens.is_empty()
23    }
24}
25
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct FuzzyMatch {
28    pub highlights: Vec<Range<usize>>,
29    pub first_match: usize,
30    pub match_span: usize,
31    pub gap_count: usize,
32    pub contiguous_match_count: usize,
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub struct PathMatchScore {
37    pub filename_match_count: usize,
38    pub filename_contiguous_match_count: usize,
39    pub contiguous_match_count: usize,
40    pub filename_first_match: usize,
41    pub path_first_match: usize,
42    pub match_span: usize,
43    pub gap_count: usize,
44    pub depth: usize,
45    pub len: usize,
46}
47
48#[derive(Debug, Clone)]
49struct FuzzyTokenMatch {
50    highlights: Vec<Range<usize>>,
51    first_match: usize,
52    last_match_end: usize,
53    gap_count: usize,
54    is_contiguous: bool,
55}
56
57pub fn fuzzy_match_ranges(label: &str, query: &FuzzyQuery) -> Option<FuzzyMatch> {
58    if query.is_empty() {
59        return Some(FuzzyMatch {
60            highlights: Vec::new(),
61            first_match: usize::MAX,
62            match_span: usize::MAX,
63            gap_count: usize::MAX,
64            contiguous_match_count: 0,
65        });
66    }
67
68    let mut ranges = Vec::new();
69    let mut first_match = usize::MAX;
70    let mut last_match_end = 0;
71    let mut gap_count = 0usize;
72    let mut contiguous_match_count = 0usize;
73
74    for token in &query.tokens {
75        let token_match = fuzzy_match_token(label, token)?;
76        if first_match == usize::MAX {
77            first_match = token_match.first_match;
78        } else if token_match.first_match > last_match_end {
79            gap_count += token_match.first_match - last_match_end;
80        }
81        last_match_end = token_match.last_match_end;
82        gap_count += token_match.gap_count;
83        if token_match.is_contiguous {
84            contiguous_match_count += 1;
85        }
86        ranges.extend(token_match.highlights);
87    }
88
89    let highlights = merge_ranges(ranges);
90    let match_span = highlights
91        .first()
92        .zip(highlights.last())
93        .map(|(first, last)| last.end.saturating_sub(first.start))
94        .unwrap_or(usize::MAX);
95
96    Some(FuzzyMatch {
97        highlights,
98        first_match,
99        match_span,
100        gap_count,
101        contiguous_match_count,
102    })
103}
104
105pub fn path_match_score(label: &str, matched: &FuzzyMatch, query: &FuzzyQuery) -> PathMatchScore {
106    let file_name = Path::new(label)
107        .file_name()
108        .and_then(|name| name.to_str())
109        .unwrap_or(label);
110    let filename_stats = filename_match_stats(file_name, query);
111
112    PathMatchScore {
113        filename_match_count: filename_stats.match_count,
114        filename_contiguous_match_count: filename_stats.contiguous_match_count,
115        contiguous_match_count: matched.contiguous_match_count,
116        filename_first_match: filename_stats.first_match,
117        path_first_match: matched.first_match,
118        match_span: matched.match_span,
119        gap_count: matched.gap_count,
120        depth: path_depth(label),
121        len: label.len(),
122    }
123}
124
125fn path_depth(label: &str) -> usize {
126    Path::new(label).components().count().saturating_sub(1)
127}
128
129struct FilenameMatchStats {
130    match_count: usize,
131    contiguous_match_count: usize,
132    first_match: usize,
133}
134
135fn filename_match_stats(file_name: &str, query: &FuzzyQuery) -> FilenameMatchStats {
136    let mut match_count = 0usize;
137    let mut contiguous_match_count = 0usize;
138    let mut first_match = usize::MAX;
139
140    for token in &query.tokens {
141        let Some(matched) = fuzzy_match_token(file_name, token) else {
142            continue;
143        };
144        match_count += 1;
145        first_match = first_match.min(matched.first_match);
146        if matched.is_contiguous {
147            contiguous_match_count += 1;
148        }
149    }
150
151    FilenameMatchStats {
152        match_count,
153        contiguous_match_count,
154        first_match,
155    }
156}
157
158pub fn compare_path_match_scores(left: &PathMatchScore, right: &PathMatchScore) -> Ordering {
159    right
160        .filename_match_count
161        .cmp(&left.filename_match_count)
162        .then(
163            right
164                .filename_contiguous_match_count
165                .cmp(&left.filename_contiguous_match_count),
166        )
167        .then(
168            right
169                .contiguous_match_count
170                .cmp(&left.contiguous_match_count),
171        )
172        .then(left.filename_first_match.cmp(&right.filename_first_match))
173        .then(left.path_first_match.cmp(&right.path_first_match))
174        .then(left.match_span.cmp(&right.match_span))
175        .then(left.gap_count.cmp(&right.gap_count))
176        .then(left.depth.cmp(&right.depth))
177        .then(left.len.cmp(&right.len))
178}
179
180fn fuzzy_match_token(label: &str, token: &str) -> Option<FuzzyTokenMatch> {
181    if token.is_empty() {
182        return Some(FuzzyTokenMatch {
183            highlights: Vec::new(),
184            first_match: usize::MAX,
185            last_match_end: 0,
186            gap_count: usize::MAX,
187            is_contiguous: false,
188        });
189    }
190
191    let file_name_offset = file_name_byte_offset(label);
192    if let Some(range) = contiguous_match_range(&label[file_name_offset..], token) {
193        let range = range.start + file_name_offset..range.end + file_name_offset;
194        return Some(FuzzyTokenMatch {
195            first_match: range.start,
196            last_match_end: range.end,
197            highlights: vec![range],
198            gap_count: 0,
199            is_contiguous: true,
200        });
201    }
202
203    if let Some(range) = contiguous_match_range(label, token) {
204        return Some(FuzzyTokenMatch {
205            first_match: range.start,
206            last_match_end: range.end,
207            highlights: vec![range],
208            gap_count: 0,
209            is_contiguous: true,
210        });
211    }
212
213    let label_chars = label
214        .char_indices()
215        .map(|(byte_idx, ch)| (byte_idx, ch, ch.len_utf8(), ch.to_lowercase().to_string()))
216        .collect::<Vec<_>>();
217    let token_chars = token.chars().map(|ch| ch.to_string()).collect::<Vec<_>>();
218
219    let mut search_start = 0usize;
220    let mut highlights = Vec::with_capacity(token_chars.len());
221    let mut matched_char_indices = Vec::with_capacity(token_chars.len());
222
223    for token_ch in token_chars {
224        let next = label_chars
225            .iter()
226            .enumerate()
227            .skip(search_start)
228            .find(|(_, (_, _, _, label_ch))| label_ch == &token_ch)?;
229        let (char_idx, (byte_idx, _, byte_len, _)) = next;
230        highlights.push(*byte_idx..*byte_idx + *byte_len);
231        matched_char_indices.push(char_idx);
232        search_start = char_idx + 1;
233    }
234
235    let first_match = highlights
236        .first()
237        .map(|range| range.start)
238        .unwrap_or(usize::MAX);
239    let last_match_end = highlights.last().map(|range| range.end).unwrap_or(0);
240    let gap_count = matched_char_indices
241        .windows(2)
242        .map(|window| window[1].saturating_sub(window[0]).saturating_sub(1))
243        .sum();
244
245    Some(FuzzyTokenMatch {
246        highlights,
247        first_match,
248        last_match_end,
249        gap_count,
250        is_contiguous: false,
251    })
252}
253
254fn contiguous_match_range(label: &str, token: &str) -> Option<Range<usize>> {
255    let label_chars = label
256        .char_indices()
257        .map(|(byte_idx, ch)| (byte_idx, ch.len_utf8(), ch.to_lowercase().to_string()))
258        .collect::<Vec<_>>();
259    let token_chars = token
260        .chars()
261        .map(|ch| ch.to_lowercase().to_string())
262        .collect::<Vec<_>>();
263
264    if token_chars.is_empty() || token_chars.len() > label_chars.len() {
265        return None;
266    }
267
268    label_chars
269        .windows(token_chars.len())
270        .find(|window| {
271            window
272                .iter()
273                .zip(&token_chars)
274                .all(|((_, _, label_ch), token_ch)| label_ch == token_ch)
275        })
276        .map(|window| {
277            let (start, _, _) = window[0];
278            let (end_start, end_len, _) = window[window.len() - 1];
279            start..end_start + end_len
280        })
281}
282
283fn file_name_byte_offset(label: &str) -> usize {
284    let Some(file_name) = Path::new(label).file_name().and_then(|name| name.to_str()) else {
285        return 0;
286    };
287
288    label.rfind(file_name).unwrap_or(0)
289}
290
291fn merge_ranges(mut ranges: Vec<Range<usize>>) -> Vec<Range<usize>> {
292    if ranges.is_empty() {
293        return ranges;
294    }
295
296    ranges.sort_by_key(|range| (range.start, range.end));
297    let mut merged: Vec<Range<usize>> = Vec::with_capacity(ranges.len());
298    for range in ranges {
299        if let Some(previous) = merged.last_mut()
300            && range.start <= previous.end
301        {
302            previous.end = previous.end.max(range.end);
303            continue;
304        }
305        merged.push(range);
306    }
307    merged
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313
314    #[test]
315    fn merge_ranges_combines_overlaps() {
316        let merged = merge_ranges(vec![0..2, 1..4, 5..7, 6..9]);
317        assert_eq!(merged, vec![0..4, 5..9]);
318    }
319
320    #[test]
321    fn fuzzy_match_is_case_insensitive_and_non_contiguous() {
322        let query = FuzzyQuery::new("SRMA");
323        let matched = fuzzy_match_ranges("src/main.rs", &query).expect("fuzzy match");
324
325        assert_eq!(matched.highlights, vec![0..2, 4..6]);
326    }
327
328    #[test]
329    fn fuzzy_match_treats_regex_metacharacters_literally() {
330        let bracket_query = FuzzyQuery::new("[");
331        let bracket_match =
332            fuzzy_match_ranges("src/[draft].rs", &bracket_query).expect("bracket match");
333        assert_eq!(bracket_match.highlights, vec![4..5]);
334
335        let dot_query = FuzzyQuery::new(".");
336        let dot_match = fuzzy_match_ranges("src/main.rs", &dot_query).expect("dot match");
337        assert_eq!(dot_match.highlights, vec![8..9]);
338    }
339
340    #[test]
341    fn fuzzy_match_prefers_contiguous_token_over_earliest_scattered_match() {
342        let query = FuzzyQuery::new("tui");
343        let label = "crates/redox-tui/src/app/state.rs";
344        let matched = fuzzy_match_ranges(label, &query).expect("contiguous token match");
345        let start = label.find("tui").expect("tui occurrence");
346
347        assert_eq!(matched.highlights, vec![start..start + "tui".len()]);
348        assert_eq!(matched.gap_count, 0);
349    }
350
351    #[test]
352    fn fuzzy_match_prefers_leaf_contiguous_token() {
353        let query = FuzzyQuery::new("state");
354        let label = "stateful/src/app/state.rs";
355        let matched = fuzzy_match_ranges(label, &query).expect("leaf token match");
356        let start = label.rfind("state").expect("leaf state occurrence");
357
358        assert_eq!(matched.highlights, vec![start..start + "state".len()]);
359    }
360
361    #[test]
362    fn path_helpers_use_path_components() {
363        assert_eq!(path_depth("src/main.rs"), 1);
364        assert_eq!(file_name_byte_offset("src/main.rs"), "src/".len());
365    }
366
367    #[cfg(windows)]
368    #[test]
369    fn path_helpers_use_windows_path_components() {
370        assert_eq!(path_depth(r"src\main.rs"), 1);
371        assert_eq!(file_name_byte_offset(r"src\main.rs"), r"src\".len());
372    }
373
374    #[test]
375    fn path_match_score_prefers_filename_matches() {
376        let query = FuzzyQuery::new("main");
377        let direct_match = fuzzy_match_ranges("src/main.rs", &query).expect("direct match");
378        let indirect_match = fuzzy_match_ranges("main/src/lib.rs", &query).expect("indirect match");
379        let direct = path_match_score("src/main.rs", &direct_match, &query);
380        let indirect = path_match_score("main/src/lib.rs", &indirect_match, &query);
381
382        assert_eq!(
383            compare_path_match_scores(&direct, &indirect),
384            Ordering::Less
385        );
386    }
387
388    #[test]
389    fn path_match_score_prefers_contiguous_path_matches_over_earlier_scattered_matches() {
390        let query = FuzzyQuery::new("tui");
391        let contiguous_match =
392            fuzzy_match_ranges("crates/redox-tui/src/lib.rs", &query).expect("contiguous match");
393        let scattered_match =
394            fuzzy_match_ranges("toplevel/utility/index.rs", &query).expect("scattered match");
395        let contiguous = path_match_score("crates/redox-tui/src/lib.rs", &contiguous_match, &query);
396        let scattered = path_match_score("toplevel/utility/index.rs", &scattered_match, &query);
397
398        assert_eq!(
399            compare_path_match_scores(&contiguous, &scattered),
400            Ordering::Less
401        );
402    }
403}