intelli_shell/utils/
fuzzy.rs

1/// Represents a component of a fuzzy search query.
2///
3/// A query is parsed into a sequence of `FuzzyMatch` items.
4///
5/// These items are implicitly ANDed together when performing a search, unless they are part of an `Or` variant.
6#[derive(PartialEq, Eq)]
7#[cfg_attr(debug_assertions, derive(Debug))]
8pub enum FuzzyMatch<'a> {
9    /// A single search term with a specific matching strategy
10    Term(FuzzyTerm<'a>),
11    /// A collection of terms where at least one must match (logical OR)
12    Or(Vec<FuzzyTerm<'a>>),
13}
14
15/// Represents an individual search term and its associated matching semantics
16#[derive(PartialEq, Eq)]
17#[cfg_attr(debug_assertions, derive(Debug))]
18pub struct FuzzyTerm<'a> {
19    /// The kind of fuzzy matching to apply to this term
20    pub kind: FuzzyTermKind,
21    /// The string slice representing the term to match
22    pub term: &'a str,
23}
24
25/// Defines the different kinds of matching strategies for a [`FuzzyTerm`].
26///
27/// The specific syntax used in the query string determines the `FuzzyTermKind`.
28#[derive(Clone, Copy, PartialEq, Eq, Default)]
29#[cfg_attr(debug_assertions, derive(Debug))]
30pub enum FuzzyTermKind {
31    /// Chars must be found in the same order, but not necessarily consecutively
32    #[default]
33    Fuzzy,
34    /// The search input must contain the term exactly
35    Exact,
36    /// The term must appear as a whole word, respecting word boundaries
37    ExactBoundary,
38    /// The search input must start with this term
39    PrefixExact,
40    /// The search input must end with this term
41    SuffixExact,
42    /// The search input must *not* contain this term exactly
43    InverseExact,
44    /// The search input must *not* start with this term
45    InversePrefixExact,
46    /// The search input must *not* end with this term
47    InverseSuffixExact,
48}
49
50/// Parses a fuzzy search query string into a vector of [`FuzzyMatch`] items.
51///
52/// The query is split by spaces. Terms are implicitly ANDed.
53///
54/// A `|` token acts as an OR operator, grouping the term before it with the terms after it until the next non-OR term
55/// or end of query.
56///
57/// # Examples
58///
59/// ```rust
60/// # use intelli_shell::utils::{FuzzyMatch, FuzzyTerm, FuzzyTermKind, parse_fuzzy_query};
61/// assert_eq!(
62///     parse_fuzzy_query("foo bar"),
63///     vec![
64///         FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "foo" }),
65///         FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "bar" }),
66///     ]
67/// );
68///
69/// assert_eq!(
70///     parse_fuzzy_query("foo | bar"),
71///     vec![FuzzyMatch::Or(vec![
72///         FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "foo" },
73///         FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "bar" },
74///     ])],
75/// );
76///
77/// assert_eq!(
78///     parse_fuzzy_query("^core go$ | rb$ | py$"),
79///     vec![
80///         FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::PrefixExact, term: "core" }),
81///         FuzzyMatch::Or(vec![
82///             FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "go" },
83///             FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "rb" },
84///             FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "py" },
85///         ]),
86///     ]
87/// );
88pub fn parse_fuzzy_query(query: &str) -> Vec<FuzzyMatch<'_>> {
89    let mut matches: Vec<FuzzyMatch<'_>> = Vec::new();
90    let mut current_or_group: Vec<FuzzyTerm<'_>> = Vec::new();
91    // Indicates if the last processed token was a regular term (true) or a `|` operator / start of query (false)
92    let mut last_token_was_term = false;
93
94    for token_str in query.split_whitespace() {
95        if token_str == "|" {
96            // This token is an OR operator
97            if last_token_was_term {
98                // The `|` operator applies to the previous valid term
99                if current_or_group.is_empty() {
100                    // The previous item was a single term; start a new OR group with it
101                    if let Some(FuzzyMatch::Term(term)) = matches.pop() {
102                        current_or_group.push(term);
103                    } else {
104                        last_token_was_term = false;
105                        continue;
106                    }
107                }
108                // If current_or_group was not empty, this `|` continues an existing OR sequence
109                last_token_was_term = false;
110            } else {
111                // Consecutive `|` tokens or `|` after an ignored (empty) term or at query start
112                last_token_was_term = false;
113            }
114        } else {
115            // This token is a potential search term
116            if let Some(parsed_term) = parse_individual_term(token_str) {
117                // A valid, non-empty term was parsed
118                if !last_token_was_term && !current_or_group.is_empty() {
119                    // Previous significant token was `|`, and we are in an active OR group. Add this term to it.
120                    current_or_group.push(parsed_term);
121                } else {
122                    // This term starts a new single item, or follows another single item
123                    // Finalize any pending OR group before adding this new term as a single
124                    if !current_or_group.is_empty() {
125                        matches.push(FuzzyMatch::Or(std::mem::take(&mut current_or_group)));
126                    }
127                    matches.push(FuzzyMatch::Term(parsed_term));
128                }
129                last_token_was_term = true;
130            } else {
131                last_token_was_term = false;
132            }
133        }
134    }
135
136    // After iterating through all tokens, if an OR group is still pending, finalize it.
137    if !current_or_group.is_empty() {
138        matches.push(FuzzyMatch::Or(current_or_group));
139    }
140
141    matches
142}
143
144/// Parses an individual token string from the query into a [`FuzzyTerm`].
145///
146/// It identifies special characters (`'`, `^`, `$`, `!`) to determine the [`FuzzyTermKind`] and extracts the actual
147/// term string.
148///
149/// Returns `None` if the resulting term string is empty after stripping special characters.
150fn parse_individual_term(token_str: &str) -> Option<FuzzyTerm<'_>> {
151    // Check for most specific patterns first
152    let fuzzy = if let Some(term) = token_str.strip_prefix("!^") {
153        // Handles "!^term"
154        FuzzyTerm {
155            kind: FuzzyTermKind::InversePrefixExact,
156            term,
157        }
158    } else if token_str.starts_with('!') && token_str.ends_with('$') {
159        // Handles "!term$"
160        FuzzyTerm {
161            kind: FuzzyTermKind::InverseSuffixExact,
162            term: &token_str[1..(token_str.len() - 1)],
163        }
164    } else if token_str.starts_with('\'') && token_str.ends_with('\'') {
165        // Handles "'term'"
166        FuzzyTerm {
167            kind: FuzzyTermKind::ExactBoundary,
168            term: &token_str[1..(token_str.len() - 1)],
169        }
170    } else if let Some(term) = token_str.strip_prefix('\'') {
171        // Handles "'term"
172        FuzzyTerm {
173            kind: FuzzyTermKind::Exact,
174            term,
175        }
176    } else if let Some(term) = token_str.strip_prefix('^') {
177        // Handles "^term"
178        FuzzyTerm {
179            kind: FuzzyTermKind::PrefixExact,
180            term,
181        }
182    } else if let Some(term) = token_str.strip_suffix('$') {
183        // Handles "term$"
184        FuzzyTerm {
185            kind: FuzzyTermKind::SuffixExact,
186            term,
187        }
188    } else if let Some(term) = token_str.strip_prefix('!') {
189        // Handles "!term"
190        FuzzyTerm {
191            kind: FuzzyTermKind::InverseExact,
192            term,
193        }
194    } else {
195        // Default: Fuzzy match for "term"
196        FuzzyTerm {
197            kind: FuzzyTermKind::Fuzzy,
198            term: token_str,
199        }
200    };
201    // Skip empty terms
202    if fuzzy.term.is_empty() { None } else { Some(fuzzy) }
203}