1#[derive(Clone, Debug, PartialEq, Eq)]
8pub enum TextQuery {
9 Empty,
11 Term(String),
13 Phrase(String),
15 Not(Box<TextQuery>),
17 And(Vec<TextQuery>),
19 Or(Vec<TextQuery>),
21}
22
23#[derive(Clone, Debug, PartialEq, Eq)]
24enum Token {
25 Word(String),
26 Phrase(String),
27}
28
29impl TextQuery {
30 #[must_use]
36 pub fn parse(raw: &str) -> Self {
37 let tokens = tokenize(raw);
38 if tokens.is_empty() {
39 return Self::Empty;
40 }
41
42 let mut groups = Vec::new();
43 let mut current = Vec::new();
44 let mut index = 0;
45
46 while index < tokens.len() {
47 if is_or_token(&tokens[index]) {
48 let can_split = !current.is_empty() && can_start_or_clause(&tokens, index + 1);
49 if can_split {
50 groups.push(normalize_and(current));
51 current = Vec::new();
52 } else {
53 current.push(Self::Term("OR".to_owned()));
54 }
55 index += 1;
56 continue;
57 }
58
59 let (node, next) =
60 parse_atom_or_literal(&tokens, index, can_negate_from_current(¤t));
61 current.push(node);
62 index = next;
63 }
64
65 if !current.is_empty() {
66 groups.push(normalize_and(current));
67 }
68
69 match groups.len() {
70 0 => Self::Empty,
71 1 => groups.into_iter().next().unwrap_or(Self::Empty),
72 _ => Self::Or(groups),
73 }
74 }
75}
76
77#[must_use]
83pub fn render_text_query_fts5(query: &TextQuery) -> String {
84 render_with_grouping(query, false)
85}
86
87fn render_with_grouping(query: &TextQuery, parenthesize: bool) -> String {
88 match query {
89 TextQuery::Empty => String::new(),
90 TextQuery::Term(term) | TextQuery::Phrase(term) => quote_fts5_literal(term),
91 TextQuery::Not(child) => {
92 let rendered = render_with_grouping(child, true);
93 format!("NOT {rendered}")
94 }
95 TextQuery::And(children) => {
96 let rendered = children
97 .iter()
98 .map(|child| render_with_grouping(child, matches!(child, TextQuery::Or(_))))
99 .collect::<Vec<_>>()
100 .join(" ");
101 if parenthesize && children.len() > 1 {
102 format!("({rendered})")
103 } else {
104 rendered
105 }
106 }
107 TextQuery::Or(children) => {
108 let rendered = children
109 .iter()
110 .map(|child| render_with_grouping(child, matches!(child, TextQuery::And(_))))
111 .collect::<Vec<_>>()
112 .join(" OR ");
113 if parenthesize && children.len() > 1 {
114 format!("({rendered})")
115 } else {
116 rendered
117 }
118 }
119 }
120}
121
122fn quote_fts5_literal(raw: &str) -> String {
123 let escaped = raw.replace('"', "\"\"");
124 format!("\"{escaped}\"")
125}
126
127fn tokenize(raw: &str) -> Vec<Token> {
128 let mut tokens = Vec::new();
129 let chars: Vec<char> = raw.chars().collect();
130 let mut index = 0;
131
132 while index < chars.len() {
133 while index < chars.len() && chars[index].is_whitespace() {
134 index += 1;
135 }
136 if index >= chars.len() {
137 break;
138 }
139
140 if chars[index] == '"' {
141 let start = index + 1;
142 let mut end = start;
143 while end < chars.len() && chars[end] != '"' {
144 end += 1;
145 }
146 if end < chars.len() {
147 let phrase: String = chars[start..end].iter().collect();
148 tokens.push(Token::Phrase(phrase));
149 index = end + 1;
150 continue;
151 }
152 }
153
154 let start = index;
155 while index < chars.len() && !chars[index].is_whitespace() {
156 index += 1;
157 }
158 let word: String = chars[start..index].iter().collect();
159 tokens.push(Token::Word(word));
160 }
161
162 tokens
163}
164
165fn is_or_token(token: &Token) -> bool {
166 matches!(token, Token::Word(word) if word == "OR")
167}
168
169fn can_start_or_clause(tokens: &[Token], index: usize) -> bool {
170 match tokens.get(index) {
171 Some(Token::Phrase(_)) => true,
172 Some(Token::Word(word)) => word != "OR" && word != "NOT",
173 None => false,
174 }
175}
176
177fn can_negate_from_current(current: &[TextQuery]) -> bool {
178 match current.last() {
179 Some(TextQuery::Phrase(_)) => true,
180 Some(TextQuery::Term(term)) => term != "OR" && term != "AND" && term != "NOT",
181 _ => false,
182 }
183}
184
185fn parse_atom_or_literal(tokens: &[Token], index: usize, can_negate: bool) -> (TextQuery, usize) {
186 match tokens.get(index) {
187 Some(Token::Phrase(phrase)) => (TextQuery::Phrase(phrase.clone()), index + 1),
188 Some(Token::Word(word)) if word == "NOT" => {
189 if can_negate {
190 match tokens.get(index + 1) {
191 Some(Token::Phrase(phrase)) => (
192 TextQuery::Not(Box::new(TextQuery::Phrase(phrase.clone()))),
193 index + 2,
194 ),
195 Some(Token::Word(next)) if next != "OR" && next != "NOT" => (
196 TextQuery::Not(Box::new(TextQuery::Term(next.clone()))),
197 index + 2,
198 ),
199 _ => (TextQuery::Term("NOT".to_owned()), index + 1),
200 }
201 } else {
202 (TextQuery::Term("NOT".to_owned()), index + 1)
203 }
204 }
205 Some(Token::Word(word)) => (TextQuery::Term(word.clone()), index + 1),
206 None => (TextQuery::Empty, index),
207 }
208}
209
210fn normalize_and(mut nodes: Vec<TextQuery>) -> TextQuery {
211 match nodes.len() {
212 0 => TextQuery::Empty,
213 1 => nodes.pop().unwrap_or(TextQuery::Empty),
214 _ => TextQuery::And(nodes),
215 }
216}
217
218#[cfg(test)]
219mod tests {
220 use super::{TextQuery, render_text_query_fts5};
221
222 #[test]
223 fn parse_empty_query() {
224 assert_eq!(TextQuery::parse(""), TextQuery::Empty);
225 assert_eq!(TextQuery::parse(" "), TextQuery::Empty);
226 }
227
228 #[test]
229 fn parse_plain_terms_as_implicit_and() {
230 assert_eq!(
231 TextQuery::parse("budget meeting"),
232 TextQuery::And(vec![
233 TextQuery::Term("budget".into()),
234 TextQuery::Term("meeting".into()),
235 ])
236 );
237 }
238
239 #[test]
240 fn parse_phrase() {
241 assert_eq!(
242 TextQuery::parse("\"release notes\""),
243 TextQuery::Phrase("release notes".into())
244 );
245 }
246
247 #[test]
248 fn parse_or_operator() {
249 assert_eq!(
250 TextQuery::parse("ship OR docs"),
251 TextQuery::Or(vec![
252 TextQuery::Term("ship".into()),
253 TextQuery::Term("docs".into()),
254 ])
255 );
256 }
257
258 #[test]
259 fn parse_not_operator() {
260 assert_eq!(
261 TextQuery::parse("ship NOT blocked"),
262 TextQuery::And(vec![
263 TextQuery::Term("ship".into()),
264 TextQuery::Not(Box::new(TextQuery::Term("blocked".into()))),
265 ])
266 );
267 }
268
269 #[test]
270 fn parse_leading_not_as_literal() {
271 assert_eq!(
272 TextQuery::parse("NOT blocked"),
273 TextQuery::And(vec![
274 TextQuery::Term("NOT".into()),
275 TextQuery::Term("blocked".into()),
276 ])
277 );
278 }
279
280 #[test]
281 fn parse_not_after_or_as_literal() {
282 assert_eq!(
283 TextQuery::parse("ship OR NOT blocked"),
284 TextQuery::And(vec![
285 TextQuery::Term("ship".into()),
286 TextQuery::Term("OR".into()),
287 TextQuery::Term("NOT".into()),
288 TextQuery::Term("blocked".into()),
289 ])
290 );
291 }
292
293 #[test]
294 fn parse_lowercase_or_as_literal() {
295 assert_eq!(
296 TextQuery::parse("ship or docs"),
297 TextQuery::And(vec![
298 TextQuery::Term("ship".into()),
299 TextQuery::Term("or".into()),
300 TextQuery::Term("docs".into()),
301 ])
302 );
303 }
304
305 #[test]
306 fn parse_lowercase_not_as_literal() {
307 assert_eq!(
308 TextQuery::parse("not a ship"),
309 TextQuery::And(vec![
310 TextQuery::Term("not".into()),
311 TextQuery::Term("a".into()),
312 TextQuery::Term("ship".into()),
313 ])
314 );
315 }
316
317 #[test]
318 fn parse_trailing_or_as_literal() {
319 assert_eq!(
320 TextQuery::parse("ship OR"),
321 TextQuery::And(vec![
322 TextQuery::Term("ship".into()),
323 TextQuery::Term("OR".into()),
324 ])
325 );
326 }
327
328 #[test]
329 fn parse_apostrophe_as_literal_term() {
330 assert_eq!(
331 TextQuery::parse("User's name"),
332 TextQuery::And(vec![
333 TextQuery::Term("User's".into()),
334 TextQuery::Term("name".into()),
335 ])
336 );
337 }
338
339 #[test]
340 fn parse_unsupported_column_filter_as_literal() {
341 assert_eq!(
342 TextQuery::parse("col:value"),
343 TextQuery::Term("col:value".into())
344 );
345 }
346
347 #[test]
348 fn parse_unsupported_prefix_as_literal() {
349 assert_eq!(
350 TextQuery::parse("prefix*"),
351 TextQuery::Term("prefix*".into())
352 );
353 }
354
355 #[test]
356 fn parse_near_as_literal() {
357 assert_eq!(
358 TextQuery::parse("a NEAR b"),
359 TextQuery::And(vec![
360 TextQuery::Term("a".into()),
361 TextQuery::Term("NEAR".into()),
362 TextQuery::Term("b".into()),
363 ])
364 );
365 }
366
367 #[test]
368 fn parse_explicit_and_as_literal() {
369 assert_eq!(
370 TextQuery::parse("cats AND dogs OR fish"),
371 TextQuery::Or(vec![
372 TextQuery::And(vec![
373 TextQuery::Term("cats".into()),
374 TextQuery::Term("AND".into()),
375 TextQuery::Term("dogs".into()),
376 ]),
377 TextQuery::Term("fish".into()),
378 ])
379 );
380 }
381
382 #[test]
383 fn render_term_query() {
384 assert_eq!(
385 render_text_query_fts5(&TextQuery::Term("budget".into())),
386 "\"budget\""
387 );
388 }
389
390 #[test]
391 fn render_phrase_query() {
392 assert_eq!(
393 render_text_query_fts5(&TextQuery::Phrase("release notes".into())),
394 "\"release notes\""
395 );
396 }
397
398 #[test]
399 fn render_or_query() {
400 assert_eq!(
401 render_text_query_fts5(&TextQuery::Or(vec![
402 TextQuery::Term("ship".into()),
403 TextQuery::Term("docs".into()),
404 ])),
405 "\"ship\" OR \"docs\""
406 );
407 }
408
409 #[test]
410 fn render_not_query() {
411 assert_eq!(
412 render_text_query_fts5(&TextQuery::And(vec![
413 TextQuery::Term("ship".into()),
414 TextQuery::Not(Box::new(TextQuery::Term("blocked".into()))),
415 ])),
416 "\"ship\" NOT \"blocked\""
417 );
418 }
419
420 #[test]
421 fn render_escapes_embedded_quotes() {
422 assert_eq!(
423 render_text_query_fts5(&TextQuery::Term("say \"hello\"".into())),
424 "\"say \"\"hello\"\"\""
425 );
426 }
427
428 #[test]
429 fn render_leading_not_literalized_parse_safely() {
430 assert_eq!(
431 render_text_query_fts5(&TextQuery::parse("NOT blocked")),
432 "\"NOT\" \"blocked\""
433 );
434 }
435
436 #[test]
437 fn render_lowercase_not_as_literal_terms() {
438 assert_eq!(
439 render_text_query_fts5(&TextQuery::parse("not a ship")),
440 "\"not\" \"a\" \"ship\""
441 );
442 }
443}