intelli_shell/utils/fuzzy.rs
1/// Represents a component of a fuzzy search query.
2///
3/// A query is parsed into a sequence of `FuzzyMatch` items.
4///
5/// These items are implicitly ANDed together when performing a search, unless they are part of an `Or` variant.
6#[derive(PartialEq, Eq)]
7#[cfg_attr(debug_assertions, derive(Debug))]
8pub enum FuzzyMatch<'a> {
9 /// A single search term with a specific matching strategy
10 Term(FuzzyTerm<'a>),
11 /// A collection of terms where at least one must match (logical OR)
12 Or(Vec<FuzzyTerm<'a>>),
13}
14
15/// Represents an individual search term and its associated matching semantics
16#[derive(PartialEq, Eq)]
17#[cfg_attr(debug_assertions, derive(Debug))]
18pub struct FuzzyTerm<'a> {
19 /// The kind of fuzzy matching to apply to this term
20 pub kind: FuzzyTermKind,
21 /// The string slice representing the term to match
22 pub term: &'a str,
23}
24
25/// Defines the different kinds of matching strategies for a [`FuzzyTerm`].
26///
27/// The specific syntax used in the query string determines the `FuzzyTermKind`.
28#[derive(Clone, Copy, PartialEq, Eq, Default)]
29#[cfg_attr(debug_assertions, derive(Debug))]
30pub enum FuzzyTermKind {
31 /// Chars must be found in the same order, but not necessarily consecutively
32 #[default]
33 Fuzzy,
34 /// The search input must contain the term exactly
35 Exact,
36 /// The term must appear as a whole word, respecting word boundaries
37 ExactBoundary,
38 /// The search input must start with this term
39 PrefixExact,
40 /// The search input must end with this term
41 SuffixExact,
42 /// The search input must *not* contain this term exactly
43 InverseExact,
44 /// The search input must *not* start with this term
45 InversePrefixExact,
46 /// The search input must *not* end with this term
47 InverseSuffixExact,
48}
49
50/// Parses a fuzzy search query string into a vector of [`FuzzyMatch`] items.
51///
52/// The query is split by spaces. Terms are implicitly ANDed.
53///
54/// A `|` token acts as an OR operator, grouping the term before it with the terms after it until the next non-OR term
55/// or end of query.
56///
57/// # Examples
58///
59/// ```rust
60/// # use intelli_shell::utils::{FuzzyMatch, FuzzyTerm, FuzzyTermKind, parse_fuzzy_query};
61/// assert_eq!(
62/// parse_fuzzy_query("foo bar"),
63/// vec![
64/// FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "foo" }),
65/// FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "bar" }),
66/// ]
67/// );
68///
69/// assert_eq!(
70/// parse_fuzzy_query("foo | bar"),
71/// vec![FuzzyMatch::Or(vec![
72/// FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "foo" },
73/// FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "bar" },
74/// ])],
75/// );
76///
77/// assert_eq!(
78/// parse_fuzzy_query("^core go$ | rb$ | py$"),
79/// vec![
80/// FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::PrefixExact, term: "core" }),
81/// FuzzyMatch::Or(vec![
82/// FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "go" },
83/// FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "rb" },
84/// FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "py" },
85/// ]),
86/// ]
87/// );
88pub fn parse_fuzzy_query(query: &str) -> Vec<FuzzyMatch<'_>> {
89 let mut matches: Vec<FuzzyMatch<'_>> = Vec::new();
90 let mut current_or_group: Vec<FuzzyTerm<'_>> = Vec::new();
91 // Indicates if the last processed token was a regular term (true) or a `|` operator / start of query (false)
92 let mut last_token_was_term = false;
93
94 for token_str in query.split_whitespace() {
95 if token_str == "|" {
96 // This token is an OR operator
97 if last_token_was_term {
98 // The `|` operator applies to the previous valid term
99 if current_or_group.is_empty() {
100 // The previous item was a single term; start a new OR group with it
101 if let Some(FuzzyMatch::Term(term)) = matches.pop() {
102 current_or_group.push(term);
103 } else {
104 last_token_was_term = false;
105 continue;
106 }
107 }
108 // If current_or_group was not empty, this `|` continues an existing OR sequence
109 last_token_was_term = false;
110 } else {
111 // Consecutive `|` tokens or `|` after an ignored (empty) term or at query start
112 last_token_was_term = false;
113 }
114 } else {
115 // This token is a potential search term
116 if let Some(parsed_term) = parse_individual_term(token_str) {
117 // A valid, non-empty term was parsed
118 if !last_token_was_term && !current_or_group.is_empty() {
119 // Previous significant token was `|`, and we are in an active OR group. Add this term to it.
120 current_or_group.push(parsed_term);
121 } else {
122 // This term starts a new single item, or follows another single item
123 // Finalize any pending OR group before adding this new term as a single
124 if !current_or_group.is_empty() {
125 matches.push(FuzzyMatch::Or(std::mem::take(&mut current_or_group)));
126 }
127 matches.push(FuzzyMatch::Term(parsed_term));
128 }
129 last_token_was_term = true;
130 } else {
131 last_token_was_term = false;
132 }
133 }
134 }
135
136 // After iterating through all tokens, if an OR group is still pending, finalize it.
137 if !current_or_group.is_empty() {
138 matches.push(FuzzyMatch::Or(current_or_group));
139 }
140
141 matches
142}
143
144/// Parses an individual token string from the query into a [`FuzzyTerm`].
145///
146/// It identifies special characters (`'`, `^`, `$`, `!`) to determine the [`FuzzyTermKind`] and extracts the actual
147/// term string.
148///
149/// Returns `None` if the resulting term string is empty after stripping special characters.
150fn parse_individual_term(token_str: &str) -> Option<FuzzyTerm<'_>> {
151 // Check for most specific patterns first
152 let fuzzy = if let Some(term) = token_str.strip_prefix("!^") {
153 // Handles "!^term"
154 FuzzyTerm {
155 kind: FuzzyTermKind::InversePrefixExact,
156 term,
157 }
158 } else if token_str.starts_with('!') && token_str.ends_with('$') {
159 // Handles "!term$"
160 FuzzyTerm {
161 kind: FuzzyTermKind::InverseSuffixExact,
162 term: &token_str[1..(token_str.len() - 1)],
163 }
164 } else if token_str.starts_with('\'') && token_str.ends_with('\'') {
165 // Handles "'term'"
166 FuzzyTerm {
167 kind: FuzzyTermKind::ExactBoundary,
168 term: &token_str[1..(token_str.len() - 1)],
169 }
170 } else if let Some(term) = token_str.strip_prefix('\'') {
171 // Handles "'term"
172 FuzzyTerm {
173 kind: FuzzyTermKind::Exact,
174 term,
175 }
176 } else if let Some(term) = token_str.strip_prefix('^') {
177 // Handles "^term"
178 FuzzyTerm {
179 kind: FuzzyTermKind::PrefixExact,
180 term,
181 }
182 } else if let Some(term) = token_str.strip_suffix('$') {
183 // Handles "term$"
184 FuzzyTerm {
185 kind: FuzzyTermKind::SuffixExact,
186 term,
187 }
188 } else if let Some(term) = token_str.strip_prefix('!') {
189 // Handles "!term"
190 FuzzyTerm {
191 kind: FuzzyTermKind::InverseExact,
192 term,
193 }
194 } else {
195 // Default: Fuzzy match for "term"
196 FuzzyTerm {
197 kind: FuzzyTermKind::Fuzzy,
198 term: token_str,
199 }
200 };
201 // Skip empty terms
202 if fuzzy.term.is_empty() { None } else { Some(fuzzy) }
203}