intelli_shell/utils/fuzzy.rs
1/// Represents a component of a fuzzy search query.
2///
3/// A query is parsed into a sequence of `FuzzyMatch` items.
4///
5/// These items are implicitly ANDed together when performing a search, unless they are part of an `Or` variant.
6#[derive(PartialEq, Eq, Debug)]
7pub enum FuzzyMatch<'a> {
8 /// A single search term with a specific matching strategy
9 Term(FuzzyTerm<'a>),
10 /// A collection of terms where at least one must match (logical OR)
11 Or(Vec<FuzzyTerm<'a>>),
12}
13
14/// Represents an individual search term and its associated matching semantics
15#[derive(PartialEq, Eq, Debug)]
16pub struct FuzzyTerm<'a> {
17 /// The kind of fuzzy matching to apply to this term
18 pub kind: FuzzyTermKind,
19 /// The string slice representing the term to match
20 pub term: &'a str,
21}
22
23/// Defines the different kinds of matching strategies for a [`FuzzyTerm`].
24///
25/// The specific syntax used in the query string determines the `FuzzyTermKind`.
26#[derive(Clone, Copy, PartialEq, Eq, Default, Debug)]
27pub enum FuzzyTermKind {
28 /// Chars must be found in the same order, but not necessarily consecutively
29 #[default]
30 Fuzzy,
31 /// The search input must contain the term exactly
32 Exact,
33 /// The term must appear as a whole word, respecting word boundaries
34 ExactBoundary,
35 /// The search input must start with this term
36 PrefixExact,
37 /// The search input must end with this term
38 SuffixExact,
39 /// The search input must *not* contain this term exactly
40 InverseExact,
41 /// The search input must *not* start with this term
42 InversePrefixExact,
43 /// The search input must *not* end with this term
44 InverseSuffixExact,
45}
46
47/// Parses a fuzzy search query string into a vector of [`FuzzyMatch`] items.
48///
49/// The query is split by spaces. Terms are implicitly ANDed.
50///
51/// A `|` token acts as an OR operator, grouping the term before it with the terms after it until the next non-OR term
52/// or end of query.
53///
54/// # Examples
55///
56/// ```rust
57/// # use intelli_shell::utils::{FuzzyMatch, FuzzyTerm, FuzzyTermKind, parse_fuzzy_query};
58/// assert_eq!(
59/// parse_fuzzy_query("foo bar"),
60/// vec![
61/// FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "foo" }),
62/// FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "bar" }),
63/// ]
64/// );
65///
66/// assert_eq!(
67/// parse_fuzzy_query("foo | bar"),
68/// vec![FuzzyMatch::Or(vec![
69/// FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "foo" },
70/// FuzzyTerm { kind: FuzzyTermKind::Fuzzy, term: "bar" },
71/// ])],
72/// );
73///
74/// assert_eq!(
75/// parse_fuzzy_query("^core go$ | rb$ | py$"),
76/// vec![
77/// FuzzyMatch::Term(FuzzyTerm { kind: FuzzyTermKind::PrefixExact, term: "core" }),
78/// FuzzyMatch::Or(vec![
79/// FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "go" },
80/// FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "rb" },
81/// FuzzyTerm { kind: FuzzyTermKind::SuffixExact, term: "py" },
82/// ]),
83/// ]
84/// );
85pub fn parse_fuzzy_query(query: &str) -> Vec<FuzzyMatch<'_>> {
86 let mut matches: Vec<FuzzyMatch<'_>> = Vec::new();
87 let mut current_or_group: Vec<FuzzyTerm<'_>> = Vec::new();
88 // Indicates if the last processed token was a regular term (true) or a `|` operator / start of query (false)
89 let mut last_token_was_term = false;
90
91 for token_str in query.split_whitespace() {
92 if token_str == "|" {
93 // This token is an OR operator
94 if last_token_was_term {
95 // The `|` operator applies to the previous valid term
96 if current_or_group.is_empty() {
97 // The previous item was a single term; start a new OR group with it
98 if let Some(FuzzyMatch::Term(term)) = matches.pop() {
99 current_or_group.push(term);
100 } else {
101 last_token_was_term = false;
102 continue;
103 }
104 }
105 // If current_or_group was not empty, this `|` continues an existing OR sequence
106 last_token_was_term = false;
107 } else {
108 // Consecutive `|` tokens or `|` after an ignored (empty) term or at query start
109 last_token_was_term = false;
110 }
111 } else {
112 // This token is a potential search term
113 if let Some(parsed_term) = parse_individual_term(token_str) {
114 // A valid, non-empty term was parsed
115 if !last_token_was_term && !current_or_group.is_empty() {
116 // Previous significant token was `|`, and we are in an active OR group. Add this term to it.
117 current_or_group.push(parsed_term);
118 } else {
119 // This term starts a new single item, or follows another single item
120 // Finalize any pending OR group before adding this new term as a single
121 if !current_or_group.is_empty() {
122 matches.push(FuzzyMatch::Or(std::mem::take(&mut current_or_group)));
123 }
124 matches.push(FuzzyMatch::Term(parsed_term));
125 }
126 last_token_was_term = true;
127 } else {
128 last_token_was_term = false;
129 }
130 }
131 }
132
133 // After iterating through all tokens, if an OR group is still pending, finalize it.
134 if !current_or_group.is_empty() {
135 matches.push(FuzzyMatch::Or(current_or_group));
136 }
137
138 matches
139}
140
141/// Parses an individual token string from the query into a [`FuzzyTerm`].
142///
143/// It identifies special characters (`'`, `^`, `$`, `!`) to determine the [`FuzzyTermKind`] and extracts the actual
144/// term string.
145///
146/// Returns `None` if the resulting term string is empty after stripping special characters.
147fn parse_individual_term(token_str: &str) -> Option<FuzzyTerm<'_>> {
148 // Check for most specific patterns first
149 let fuzzy = if let Some(term) = token_str.strip_prefix("!^") {
150 // Handles "!^term"
151 FuzzyTerm {
152 kind: FuzzyTermKind::InversePrefixExact,
153 term,
154 }
155 } else if token_str.starts_with('!') && token_str.ends_with('$') {
156 // Handles "!term$"
157 FuzzyTerm {
158 kind: FuzzyTermKind::InverseSuffixExact,
159 term: &token_str[1..(token_str.len() - 1)],
160 }
161 } else if token_str.starts_with('\'') && token_str.ends_with('\'') {
162 // Handles "'term'"
163 FuzzyTerm {
164 kind: FuzzyTermKind::ExactBoundary,
165 term: &token_str[1..(token_str.len() - 1)],
166 }
167 } else if let Some(term) = token_str.strip_prefix('\'') {
168 // Handles "'term"
169 FuzzyTerm {
170 kind: FuzzyTermKind::Exact,
171 term,
172 }
173 } else if let Some(term) = token_str.strip_prefix('^') {
174 // Handles "^term"
175 FuzzyTerm {
176 kind: FuzzyTermKind::PrefixExact,
177 term,
178 }
179 } else if let Some(term) = token_str.strip_suffix('$') {
180 // Handles "term$"
181 FuzzyTerm {
182 kind: FuzzyTermKind::SuffixExact,
183 term,
184 }
185 } else if let Some(term) = token_str.strip_prefix('!') {
186 // Handles "!term"
187 FuzzyTerm {
188 kind: FuzzyTermKind::InverseExact,
189 term,
190 }
191 } else {
192 // Default: Fuzzy match for "term"
193 FuzzyTerm {
194 kind: FuzzyTermKind::Fuzzy,
195 term: token_str,
196 }
197 };
198 // Skip empty terms
199 if fuzzy.term.is_empty() { None } else { Some(fuzzy) }
200}