norm/metrics/fzf/
parser.rs

1use core::mem::transmute;
2
3use super::query::{Condition, FzfQuery, Pattern};
4use crate::utils;
5
6/// The parser used to parse strings into [`FzfQuery`]s.
7///
8/// Queries can be parsed according to fzf's [extended-search mode][esm] via
9/// [`parse`][FzfParser::parse]. If this is not desired, use
10/// [`parse_not_extended`][FzfParser::parse_not_extended] instead.
11///
12/// [esm]: https://github.com/junegunn/fzf#search-syntax
13#[derive(Clone)]
14pub struct FzfParser {
15    /// TODO: docs
16    chars: Vec<char>,
17
18    /// TODO: docs
19    patterns: Vec<Pattern<'static>>,
20
21    /// TODO: docs
22    conditions: Vec<Condition<'static>>,
23}
24
25impl Default for FzfParser {
26    #[inline]
27    fn default() -> Self {
28        Self {
29            chars: vec![char::default(); 64],
30            patterns: vec![Pattern::default(); 64],
31            conditions: vec![Condition::default(); 64],
32        }
33    }
34}
35
36impl core::fmt::Debug for FzfParser {
37    #[inline]
38    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
39        f.debug_struct("FzfParser").finish_non_exhaustive()
40    }
41}
42
43impl FzfParser {
44    /// Creates a new `FzfParser`.
45    #[inline]
46    pub fn new() -> Self {
47        Self::default()
48    }
49
50    /// Parses the given query string according to fzf's
51    /// [extended-search mode][esm].
52    ///
53    /// In extended-search mode certain characters change how the query is
54    /// matched in candidates.
55    ///
56    /// In particular:
57    ///
58    /// | Pattern | Matches                                      |
59    /// | ------- | -------------------------------------------- |
60    /// | `foo`   | candidates that fuzzy-match `"foo"`          |
61    /// | `'foo`  | candidates that include `"foo"`              |
62    /// | `^foo`  | candidates that start with `"foo"`           |
63    /// | `foo$`  | candidates that end with `"foo"`             |
64    /// | `!foo`  | candidates that **don't** include `"foo"`    |
65    /// | `!^foo` | candidates that **don't** start with `"foo"` |
66    /// | `!foo$` | candidates that **don't**  end with `"foo"`  |
67    ///
68    /// It's also possible to query for multiple patterns by separating them
69    /// with spaces or with the pipe character `"|"`. A space acts as a logical
70    /// AND operator, while a pipe character acts as a logical OR operator.
71    ///
72    /// For example, the query `"^main .c$ | .rs$"` would only match candidates
73    /// that start with `"main"` and end with either `".c"` or `".rs"`.
74    /// Spaces can be escaped with a backslash if they're part of a pattern,
75    /// e.g. `"foo\ baz"` will match `"foo bar baz"` but not `"baz foo"`.
76    ///
77    /// Note that like in fzf, but unlike in logical expressions, the pipe
78    /// character (OR) has a higher precedence than the space character (AND),
79    /// so that `"foo bar | baz"` gets parsed as `"foo && (bar || baz)"`, and
80    /// **not** as `"(foo && bar) || baz"`;
81    ///
82    /// If you want to treat all the characters in the query as fuzzy-matching,
83    /// use [`parse_not_extended`][FzfParser::parse_not_extended] instead.
84    ///
85    /// [esm]: https://github.com/junegunn/fzf#search-syntax
86    #[inline]
87    pub fn parse<'a>(&'a mut self, query: &str) -> FzfQuery<'a> {
88        let max_chars = query.len();
89
90        if self.chars.len() < max_chars {
91            self.chars.resize(max_chars, char::default());
92        }
93
94        // The theoretical maximum number of conditions that could be included
95        // in the query.
96        //
97        // The actual number of conditions (which we'll only know after
98        // parsing) matches this maximum on space-separated queries of
99        // multiple ascii characters, e.g. `a b c d`.
100        let max_conditions = query.len() / 2 + 1;
101
102        if self.conditions.len() < max_conditions {
103            self.conditions.resize(max_conditions, Condition::default());
104        }
105
106        if self.patterns.len() < max_conditions {
107            self.patterns.resize(max_conditions, Pattern::default());
108        }
109
110        let patterns: &'a mut [Pattern<'static>] =
111            self.patterns.as_mut_slice();
112
113        // SAFETY: todo.
114        let patterns = unsafe {
115            transmute::<&'a mut [Pattern<'static>], &'a mut [Pattern<'a>]>(
116                patterns,
117            )
118        };
119
120        let mut num_conditions = 0;
121
122        for condition in
123            Patterns::new(patterns, &mut self.chars, query).map(Condition::new)
124        {
125            // SAFETY: todo
126            let condition = unsafe {
127                transmute::<Condition, Condition<'static>>(condition)
128            };
129
130            self.conditions[num_conditions] = condition;
131
132            num_conditions += 1;
133        }
134
135        FzfQuery::new_extended(&self.conditions[..num_conditions])
136    }
137
138    /// Parses the given query string without using fzf's extended-search mode.
139    ///
140    /// All the characters in the query string are used for fuzzy-matching,
141    /// with no special meaning attached to any of them. This is equivalent to
142    /// calling `fzf` with the `--no-extended` flag.
143    ///
144    /// If you want to apply fzf's extended-search mode to the query, parse it
145    /// with [`parse`][FzfParser::parse] instead.
146    ///
147    /// # Examples
148    ///
149    /// ```rust
150    /// # use norm::fzf::{FzfParser, FzfV2};
151    /// # use norm::Metric;
152    /// let mut fzf = FzfV2::new();
153    /// let mut parser = FzfParser::new();
154    /// let mut ranges = Vec::new();
155    ///
156    /// let query = parser.parse_not_extended("^bar | baz$");
157    ///
158    /// let distance =
159    ///     fzf.distance_and_ranges(query, "^foo bar | baz $ foo", &mut ranges);
160    ///
161    /// // We expect a match since the characters in the query fuzzy-match the
162    /// // candidate.
163    /// //
164    /// // If we parsed the query by calling `parse` there wouldn't have been a
165    /// // match since the candidate doesn't start with `"bar"` nor does it end
166    /// // with `"baz"`.
167    /// assert!(distance.is_some());
168    ///
169    /// assert_eq!(ranges, [0..1, 5..14, 15..16]);
170    /// ```
171    #[inline]
172    pub fn parse_not_extended<'a>(&'a mut self, query: &str) -> FzfQuery<'a> {
173        let mut char_len = 0;
174
175        for ch in query.chars() {
176            self.chars[char_len] = ch;
177            char_len += 1;
178        }
179
180        FzfQuery::new_not_extended(&self.chars[..char_len])
181    }
182}
183
184const OR_BLOCK_SEPARATOR: &[char] = &['|'];
185
186/// TODO: docs
187struct Patterns<'buf, 's> {
188    /// TODO: docs
189    buf: &'buf mut [Pattern<'buf>],
190
191    /// TODO: docs
192    allocated: usize,
193
194    /// TODO: docs
195    words: Words<'buf, 's>,
196
197    /// TODO: docs
198    next: Option<Pattern<'buf>>,
199}
200
201impl<'buf, 's> Patterns<'buf, 's> {
202    #[inline]
203    fn alloc(&mut self, pattern: Pattern<'buf>) {
204        self.buf[self.allocated] = pattern;
205        self.allocated += 1;
206    }
207
208    #[inline]
209    fn new(
210        patterns_buf: &'buf mut [Pattern<'buf>],
211        char_buf: &'buf mut [char],
212        s: &'s str,
213    ) -> Self {
214        Self {
215            buf: patterns_buf,
216            allocated: 0,
217            words: Words::new(char_buf, s),
218            next: None,
219        }
220    }
221}
222
223impl<'buf, 's> Iterator for Patterns<'buf, 's> {
224    type Item = &'buf [Pattern<'buf>];
225
226    #[inline]
227    fn next(&mut self) -> Option<Self::Item> {
228        let prev_allocated = self.allocated;
229
230        // Whether we're expecting the next word yielded by `self.words` to be
231        // a "|". This is set to true after getting a word, and set to false
232        // after a "|".
233        let mut looking_for_or;
234
235        if let Some(first_pattern) = self.next.take() {
236            self.alloc(first_pattern);
237            looking_for_or = true;
238        } else {
239            looking_for_or = false;
240        }
241
242        loop {
243            let Some(word) = self.words.next() else {
244                break;
245            };
246
247            let word_is_condition = word != OR_BLOCK_SEPARATOR;
248
249            if word_is_condition {
250                let Some(word) = Pattern::parse(word) else { continue };
251
252                if looking_for_or {
253                    self.next = Some(word);
254                    break;
255                } else {
256                    self.alloc(word);
257                    looking_for_or = true;
258                    continue;
259                }
260            }
261
262            looking_for_or = false;
263        }
264
265        if self.allocated == prev_allocated {
266            return None;
267        }
268
269        let patterns = &self.buf[prev_allocated..self.allocated];
270
271        // SAFETY: todo
272        let patterns =
273            unsafe { transmute::<&[Pattern], &'buf [Pattern]>(patterns) };
274
275        Some(patterns)
276    }
277}
278
279/// An iterator over the words of a string.
280///
281/// Here, a "word" is simply a string of consecutive non-ascii-space
282/// characters. Escaped spaces are treated as non-space characters.
283///
284/// # Examples
285///
286/// ```rust
287/// # use norm::fzf::words;
288/// let mut words = words("foo 'bar' \"baz\"");
289/// assert_eq!(words.next().as_deref(), Some("foo"));
290/// assert_eq!(words.next().as_deref(), Some("'bar'"));
291/// assert_eq!(words.next().as_deref(), Some("\"baz\""));
292/// assert_eq!(words.next(), None);
293/// ```
294///
295/// ```rust
296/// # use norm::fzf::words;
297/// let mut words = words("foo\\ bar baz");
298/// assert_eq!(words.next().as_deref(), Some("foo bar"));
299/// assert_eq!(words.next().as_deref(), Some("baz"));
300/// assert_eq!(words.next(), None);
301/// ```
302///
303/// ```rust
304/// # use norm::fzf::words;
305/// let mut words = words("foo \\ bar");
306/// assert_eq!(words.next().as_deref(), Some("foo"));
307/// assert_eq!(words.next().as_deref(), Some(" bar"));
308/// assert_eq!(words.next(), None);
309/// ```
310#[doc(hidden)]
311pub struct Words<'buf, 'sentence> {
312    /// TODO: docs
313    buf: &'buf mut [char],
314
315    /// TODO: docs
316    allocated: usize,
317
318    /// TODO: docs
319    s: &'sentence str,
320}
321
322impl<'buf, 'sentence> Words<'buf, 'sentence> {
323    /// TODO: docs
324    #[inline]
325    fn alloc(&mut self, s: &str) {
326        for ch in s.chars() {
327            self.buf[self.allocated] = ch;
328            self.allocated += 1;
329        }
330    }
331
332    /// TODO: docs
333    #[inline]
334    fn new(buf: &'buf mut [char], s: &'sentence str) -> Self {
335        Self { buf, s: utils::strip_leading_spaces(s), allocated: 0 }
336    }
337}
338
339impl<'buf> Iterator for Words<'buf, '_> {
340    type Item = &'buf [char];
341
342    #[inline(always)]
343    fn next(&mut self) -> Option<Self::Item> {
344        if self.s.is_empty() {
345            return None;
346        }
347
348        let prev_allocated = self.allocated;
349
350        let mut word_byte_end = 0;
351
352        let mut s = self.s;
353
354        loop {
355            match memchr::memchr(b' ', s.as_bytes()) {
356                Some(0) => break,
357
358                Some(offset) if s.as_bytes()[offset - 1] == b'\\' => {
359                    // Push everything up to (but not including) the escape.
360                    self.alloc(&s[..offset - 1]);
361
362                    // ..skip the escape..
363
364                    // ..and push the space.
365                    self.alloc(" ");
366
367                    s = &s[offset + 1..];
368
369                    word_byte_end += offset + 1;
370                },
371
372                Some(offset) => {
373                    let s = &s[..offset];
374                    self.alloc(s);
375                    word_byte_end += s.len();
376                    break;
377                },
378
379                None => {
380                    self.alloc(s);
381                    word_byte_end += s.len();
382                    break;
383                },
384            }
385        }
386
387        self.s = utils::strip_leading_spaces(&self.s[word_byte_end..]);
388
389        let word = &self.buf[prev_allocated..self.allocated];
390
391        // SAFETY: todo
392        let word = unsafe { transmute::<&[char], &'buf [char]>(word) };
393
394        Some(word)
395    }
396}
397
398/// TODO: docs
399#[cfg(feature = "__tests")]
400#[doc(hidden)]
401pub fn parse(s: &str) -> FzfQuery<'static> {
402    let parser = Box::leak(Box::new(FzfParser::new()));
403    parser.parse(s)
404}
405
406#[cfg(test)]
407mod parse_tests {
408    use super::super::query::*;
409    use super::*;
410
411    #[test]
412    fn parse_query_empty() {
413        assert!(parse("").is_empty());
414    }
415
416    #[test]
417    fn parse_query_single_fuzzy() {
418        let query = parse("foo");
419
420        let SearchMode::NotExtended(pattern) = query.search_mode else {
421            panic!();
422        };
423
424        assert_eq!(pattern.into_string(), "foo");
425
426        assert_eq!(pattern.match_type, MatchType::Fuzzy);
427    }
428
429    #[test]
430    fn parse_query_upstream_extended() {
431        let query = parse(
432            "aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$ | ^iii$ ^xxx | 'yyy | \
433             zzz$ | !ZZZ |",
434        );
435
436        let SearchMode::Extended(conditions) = query.search_mode else {
437            panic!();
438        };
439
440        assert_eq!(conditions.len(), 9);
441
442        let pattern = conditions[0].or_patterns()[0];
443        assert_eq!(pattern.match_type, MatchType::Fuzzy);
444        assert!(!pattern.is_inverse);
445
446        let pattern = conditions[1].or_patterns()[0];
447        assert_eq!(pattern.match_type, MatchType::Exact);
448        assert!(!pattern.is_inverse);
449
450        let pattern = conditions[2].or_patterns()[0];
451        assert_eq!(pattern.match_type, MatchType::PrefixExact);
452        assert!(!pattern.is_inverse);
453
454        let pattern = conditions[3].or_patterns()[0];
455        assert_eq!(pattern.match_type, MatchType::SuffixExact);
456        assert!(!pattern.is_inverse);
457
458        let pattern = conditions[4].or_patterns()[0];
459        assert_eq!(pattern.match_type, MatchType::Exact);
460        assert!(pattern.is_inverse);
461
462        let pattern = conditions[5].or_patterns()[0];
463        assert_eq!(pattern.match_type, MatchType::Fuzzy);
464        assert!(pattern.is_inverse);
465
466        let pattern = conditions[6].or_patterns()[0];
467        assert_eq!(pattern.match_type, MatchType::PrefixExact);
468        assert!(pattern.is_inverse);
469
470        let pattern = conditions[7].or_patterns()[0];
471        assert_eq!(pattern.match_type, MatchType::SuffixExact);
472        assert!(pattern.is_inverse);
473
474        let pattern = conditions[7].or_patterns()[1];
475        assert_eq!(pattern.match_type, MatchType::EqualExact);
476        assert!(!pattern.is_inverse);
477
478        let pattern = conditions[8].or_patterns()[0];
479        assert_eq!(pattern.match_type, MatchType::PrefixExact);
480        assert!(!pattern.is_inverse);
481
482        let pattern = conditions[8].or_patterns()[1];
483        assert_eq!(pattern.match_type, MatchType::Exact);
484        assert!(!pattern.is_inverse);
485
486        let pattern = conditions[8].or_patterns()[2];
487        assert_eq!(pattern.match_type, MatchType::SuffixExact);
488        assert!(!pattern.is_inverse);
489
490        let pattern = conditions[8].or_patterns()[3];
491        assert_eq!(pattern.match_type, MatchType::Exact);
492        assert!(pattern.is_inverse);
493    }
494}
495
496#[cfg(test)]
497mod patterns_tests {
498    use super::*;
499
500    fn patterns(
501        s: &str,
502    ) -> impl Iterator<Item = &'static [Pattern<'static>]> + '_ {
503        let patterns_buf = vec![Pattern::default(); s.len() / 2 + 1].leak();
504        let char_buf = vec![char::default(); s.len()].leak();
505        Patterns::new(patterns_buf, char_buf, s)
506    }
507
508    fn pattern(s: &str) -> Pattern<'static> {
509        Pattern::parse(s.chars().collect::<Vec<_>>().leak()).unwrap()
510    }
511
512    #[test]
513    fn patterns_empty() {
514        let mut blocks = patterns("");
515        assert!(blocks.next().is_none());
516    }
517
518    #[test]
519    fn patterns_single() {
520        let mut blocks = patterns("foo");
521        assert_eq!(blocks.next().unwrap(), [pattern("foo")]);
522        assert_eq!(blocks.next(), None);
523    }
524
525    #[test]
526    fn patterns_multiple_ors() {
527        let mut blocks = patterns("foo | bar | baz");
528        assert_eq!(
529            blocks.next().unwrap(),
530            [pattern("foo"), pattern("bar"), pattern("baz")]
531        );
532        assert_eq!(blocks.next(), None);
533    }
534
535    #[test]
536    fn patterns_multiple_ands() {
537        let mut blocks = patterns("foo bar baz");
538        assert_eq!(blocks.next().unwrap(), [pattern("foo")]);
539        assert_eq!(blocks.next().unwrap(), [pattern("bar")]);
540        assert_eq!(blocks.next().unwrap(), [pattern("baz")]);
541        assert_eq!(blocks.next(), None);
542    }
543
544    #[test]
545    fn patterns_empty_between_ors() {
546        let mut blocks = patterns("foo | | bar");
547        assert_eq!(blocks.next().unwrap(), [pattern("foo"), pattern("bar")]);
548        assert_eq!(blocks.next(), None);
549    }
550
551    #[test]
552    fn patterns_multiple_ors_multiple_ands() {
553        let mut blocks = patterns("foo | bar baz qux | quux | corge");
554        assert_eq!(blocks.next().unwrap(), [pattern("foo"), pattern("bar")]);
555        assert_eq!(blocks.next().unwrap(), [pattern("baz")]);
556        assert_eq!(
557            blocks.next().unwrap(),
558            [pattern("qux"), pattern("quux"), pattern("corge")]
559        );
560        assert_eq!(blocks.next(), None);
561    }
562}
563
564#[cfg(feature = "__tests")]
565#[doc(hidden)]
566pub fn words(s: &str) -> impl Iterator<Item = String> {
567    let mut buf = Vec::new();
568
569    buf.resize(s.len(), char::default());
570
571    Words::new(&mut buf, s)
572        .map(String::from_iter)
573        .collect::<Vec<_>>()
574        .into_iter()
575}
576
577#[cfg(test)]
578mod word_tests {
579    use super::*;
580
581    #[test]
582    fn words_empty() {
583        let mut words = words("");
584        assert!(words.next().is_none());
585    }
586
587    #[test]
588    fn words_single() {
589        let mut words = words("foo");
590        assert_eq!(words.next().as_deref(), Some("foo"));
591        assert_eq!(words.next(), None);
592    }
593
594    #[test]
595    fn words_escaped_escape_escaped_space() {
596        let mut words = words("\\\\ ");
597        assert_eq!(words.next().as_deref(), Some("\\ "));
598        assert_eq!(words.next(), None);
599    }
600
601    #[test]
602    fn words_multiple() {
603        let mut words = words("foo bar");
604        assert_eq!(words.next().as_deref(), Some("foo"));
605        assert_eq!(words.next().as_deref(), Some("bar"));
606        assert_eq!(words.next(), None);
607    }
608
609    #[test]
610    fn words_multiple_leading_trailing_spaces() {
611        let mut words = words("   foo bar   ");
612        assert_eq!(words.next().as_deref(), Some("foo"));
613        assert_eq!(words.next().as_deref(), Some("bar"));
614        assert_eq!(words.next(), None);
615    }
616
617    #[test]
618    fn words_multiple_escaped_spaces() {
619        let mut words = words("foo\\ bar\\ baz");
620        assert_eq!(words.next().as_deref(), Some("foo bar baz"));
621        assert_eq!(words.next(), None);
622    }
623
624    #[test]
625    fn words_multiple_standalone_escaped_spaces() {
626        let mut words = words(" \\  foo \\ bar \\  ");
627        assert_eq!(words.next().as_deref(), Some(" "));
628        assert_eq!(words.next().as_deref(), Some("foo"));
629        assert_eq!(words.next().as_deref(), Some(" bar"));
630        assert_eq!(words.next().as_deref(), Some(" "));
631        assert_eq!(words.next(), None);
632    }
633
634    #[test]
635    fn words_single_escaped_spaces() {
636        let mut words = words("\\ ");
637        assert_eq!(words.next().as_deref(), Some(" "));
638        assert_eq!(words.next(), None);
639    }
640
641    #[test]
642    fn words_consecutive_escaped_spaces() {
643        let mut words = words(" \\ \\ \\  ");
644        assert_eq!(words.next().as_deref(), Some("   "));
645        assert_eq!(words.next(), None);
646    }
647}