intelli_shell/utils/
fuzzy.rs

1/// Represents a component of a fuzzy search query.
2///
3/// A query is parsed into a sequence of `FuzzyMatch` items.
4///
5/// These items are implicitly ANDed together when performing a search, unless they are part of an `Or` variant.
6#[derive(PartialEq, Eq, Debug)]
7pub enum FuzzyMatch<'a> {
8    /// A single search term with a specific matching strategy
9    Term(FuzzyTerm<'a>),
10    /// A collection of terms where at least one must match (logical OR)
11    Or(Vec<FuzzyTerm<'a>>),
12}
13
14/// Represents an individual search term and its associated matching semantics
15#[derive(PartialEq, Eq, Debug)]
16pub struct FuzzyTerm<'a> {
17    /// The kind of fuzzy matching to apply to this term
18    pub kind: FuzzyTermKind,
19    /// The string slice representing the term to match
20    pub term: &'a str,
21}
22
23/// Defines the different kinds of matching strategies for a [`FuzzyTerm`].
24///
25/// The specific syntax used in the query string determines the `FuzzyTermKind`.
26#[derive(Clone, Copy, PartialEq, Eq, Default, Debug)]
27pub enum FuzzyTermKind {
28    /// Chars must be found in the same order, but not necessarily consecutively
29    #[default]
30    Fuzzy,
31    /// The search input must contain the term exactly
32    Exact,
33    /// The term must appear as a whole word, respecting word boundaries
34    ExactBoundary,
35    /// The search input must start with this term
36    PrefixExact,
37    /// The search input must end with this term
38    SuffixExact,
39    /// The search input must *not* contain this term exactly
40    InverseExact,
41    /// The search input must *not* start with this term
42    InversePrefixExact,
43    /// The search input must *not* end with this term
44    InverseSuffixExact,
45}
46
47/// Parses a fuzzy search query string into a vector of [`FuzzyMatch`] items.
48///
49/// The query is split by spaces. Terms are implicitly ANDed.
50///
51/// A `|` token acts as an OR operator, grouping the term before it with the terms after it until the next non-OR term
52/// or end of query.
53///
54/// # Examples
55///
56/// ```rust
57/// # use intelli_shell::utils::{FuzzyMatch, FuzzyTerm, FuzzyTermKind, parse_fuzzy_query};
58/// assert_eq!(
59///     parse_fuzzy_query("foo bar"),
60///     vec![
61///         FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "foo" }),
62///         FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "bar" }),
63///     ]
64/// );
65///
66/// assert_eq!(
67///     parse_fuzzy_query("foo | bar"),
68///     vec![FuzzyMatch::Or(vec![
69///         FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "foo" },
70///         FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "bar" },
71///     ])],
72/// );
73///
74/// assert_eq!(
75///     parse_fuzzy_query("^core go$ | rb$ | py$"),
76///     vec![
77///         FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::PrefixExact, term: "core" }),
78///         FuzzyMatch::Or(vec![
79///             FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "go" },
80///             FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "rb" },
81///             FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "py" },
82///         ]),
83///     ]
84/// );
85pub fn parse_fuzzy_query(query: &str) -> Vec<FuzzyMatch<'_>> {
86    let mut matches: Vec<FuzzyMatch<'_>> = Vec::new();
87    let mut current_or_group: Vec<FuzzyTerm<'_>> = Vec::new();
88    // Indicates if the last processed token was a regular term (true) or a `|` operator / start of query (false)
89    let mut last_token_was_term = false;
90
91    for token_str in query.split_whitespace() {
92        if token_str == "|" {
93            // This token is an OR operator
94            if last_token_was_term {
95                // The `|` operator applies to the previous valid term
96                if current_or_group.is_empty() {
97                    // The previous item was a single term; start a new OR group with it
98                    if let Some(FuzzyMatch::Term(term)) = matches.pop() {
99                        current_or_group.push(term);
100                    } else {
101                        last_token_was_term = false;
102                        continue;
103                    }
104                }
105                // If current_or_group was not empty, this `|` continues an existing OR sequence
106                last_token_was_term = false;
107            } else {
108                // Consecutive `|` tokens or `|` after an ignored (empty) term or at query start
109                last_token_was_term = false;
110            }
111        } else {
112            // This token is a potential search term
113            if let Some(parsed_term) = parse_individual_term(token_str) {
114                // A valid, non-empty term was parsed
115                if !last_token_was_term && !current_or_group.is_empty() {
116                    // Previous significant token was `|`, and we are in an active OR group. Add this term to it.
117                    current_or_group.push(parsed_term);
118                } else {
119                    // This term starts a new single item, or follows another single item
120                    // Finalize any pending OR group before adding this new term as a single
121                    if !current_or_group.is_empty() {
122                        matches.push(FuzzyMatch::Or(std::mem::take(&mut current_or_group)));
123                    }
124                    matches.push(FuzzyMatch::Term(parsed_term));
125                }
126                last_token_was_term = true;
127            } else {
128                last_token_was_term = false;
129            }
130        }
131    }
132
133    // After iterating through all tokens, if an OR group is still pending, finalize it.
134    if !current_or_group.is_empty() {
135        matches.push(FuzzyMatch::Or(current_or_group));
136    }
137
138    matches
139}
140
141/// Parses an individual token string from the query into a [`FuzzyTerm`].
142///
143/// It identifies special characters (`'`, `^`, `$`, `!`) to determine the [`FuzzyTermKind`] and extracts the actual
144/// term string.
145///
146/// Returns `None` if the resulting term string is empty after stripping special characters.
147fn parse_individual_term(token_str: &str) -> Option<FuzzyTerm<'_>> {
148    // Check for most specific patterns first
149    let fuzzy = if let Some(term) = token_str.strip_prefix("!^") {
150        // Handles "!^term"
151        FuzzyTerm {
152            kind: FuzzyTermKind::InversePrefixExact,
153            term,
154        }
155    } else if token_str.starts_with('!') && token_str.ends_with('$') {
156        // Handles "!term$"
157        FuzzyTerm {
158            kind: FuzzyTermKind::InverseSuffixExact,
159            term: &token_str[1..(token_str.len() - 1)],
160        }
161    } else if token_str.starts_with('\'') && token_str.ends_with('\'') {
162        // Handles "'term'"
163        FuzzyTerm {
164            kind: FuzzyTermKind::ExactBoundary,
165            term: &token_str[1..(token_str.len() - 1)],
166        }
167    } else if let Some(term) = token_str.strip_prefix('\'') {
168        // Handles "'term"
169        FuzzyTerm {
170            kind: FuzzyTermKind::Exact,
171            term,
172        }
173    } else if let Some(term) = token_str.strip_prefix('^') {
174        // Handles "^term"
175        FuzzyTerm {
176            kind: FuzzyTermKind::PrefixExact,
177            term,
178        }
179    } else if let Some(term) = token_str.strip_suffix('$') {
180        // Handles "term$"
181        FuzzyTerm {
182            kind: FuzzyTermKind::SuffixExact,
183            term,
184        }
185    } else if let Some(term) = token_str.strip_prefix('!') {
186        // Handles "!term"
187        FuzzyTerm {
188            kind: FuzzyTermKind::InverseExact,
189            term,
190        }
191    } else {
192        // Default: Fuzzy match for "term"
193        FuzzyTerm {
194            kind: FuzzyTermKind::Fuzzy,
195            term: token_str,
196        }
197    };
198    // Skip empty terms
199    if fuzzy.term.is_empty() { None } else { Some(fuzzy) }
200}