1#[derive(Clone, Debug, PartialEq, Eq)]
8pub enum TextQuery {
9 Empty,
11 Term(String),
13 Phrase(String),
15 Not(Box<TextQuery>),
17 And(Vec<TextQuery>),
19 Or(Vec<TextQuery>),
21}
22
23#[derive(Clone, Debug, PartialEq, Eq)]
24enum Token {
25 Word(String),
26 Phrase(String),
27}
28
29impl TextQuery {
30 #[must_use]
36 pub fn parse(raw: &str) -> Self {
37 let tokens = tokenize(raw);
38 if tokens.is_empty() {
39 return Self::Empty;
40 }
41
42 let mut groups = Vec::new();
43 let mut current = Vec::new();
44 let mut index = 0;
45
46 while index < tokens.len() {
47 if is_or_token(&tokens[index]) {
48 let can_split = !current.is_empty() && can_start_or_clause(&tokens, index + 1);
49 if can_split {
50 groups.push(normalize_and(current));
51 current = Vec::new();
52 } else {
53 current.push(Self::Term("OR".to_owned()));
54 }
55 index += 1;
56 continue;
57 }
58
59 let (node, next) =
60 parse_atom_or_literal(&tokens, index, can_negate_from_current(¤t));
61 current.push(node);
62 index = next;
63 }
64
65 if !current.is_empty() {
66 groups.push(normalize_and(current));
67 }
68
69 match groups.len() {
70 0 => Self::Empty,
71 1 => groups.into_iter().next().unwrap_or(Self::Empty),
72 _ => Self::Or(groups),
73 }
74 }
75}
76
77#[must_use]
83pub fn render_text_query_fts5(query: &TextQuery) -> String {
84 render_with_grouping(query, false)
85}
86
87fn render_with_grouping(query: &TextQuery, parenthesize: bool) -> String {
88 match query {
89 TextQuery::Empty => String::new(),
90 TextQuery::Term(term) | TextQuery::Phrase(term) => quote_fts5_literal(term),
91 TextQuery::Not(child) => {
92 let rendered = render_with_grouping(child, true);
93 format!("NOT {rendered}")
94 }
95 TextQuery::And(children) => {
96 let rendered = children
97 .iter()
98 .map(|child| render_with_grouping(child, matches!(child, TextQuery::Or(_))))
99 .collect::<Vec<_>>()
100 .join(" ");
101 if parenthesize && children.len() > 1 {
102 format!("({rendered})")
103 } else {
104 rendered
105 }
106 }
107 TextQuery::Or(children) => {
108 let rendered = children
109 .iter()
110 .map(|child| render_with_grouping(child, matches!(child, TextQuery::And(_))))
111 .collect::<Vec<_>>()
112 .join(" OR ");
113 if parenthesize && children.len() > 1 {
114 format!("({rendered})")
115 } else {
116 rendered
117 }
118 }
119 }
120}
121
122fn quote_fts5_literal(raw: &str) -> String {
123 let escaped = raw.replace('"', "\"\"");
124 format!("\"{escaped}\"")
125}
126
127fn tokenize(raw: &str) -> Vec<Token> {
128 let mut tokens = Vec::new();
129 let chars: Vec<char> = raw.chars().collect();
130 let mut index = 0;
131
132 while index < chars.len() {
133 while index < chars.len() && chars[index].is_whitespace() {
134 index += 1;
135 }
136 if index >= chars.len() {
137 break;
138 }
139
140 if chars[index] == '"' {
141 let start = index + 1;
142 let mut end = start;
143 while end < chars.len() && chars[end] != '"' {
144 end += 1;
145 }
146 if end < chars.len() {
147 let phrase: String = chars[start..end].iter().collect();
148 tokens.push(Token::Phrase(phrase));
149 index = end + 1;
150 continue;
151 }
152 }
153
154 let start = index;
155 while index < chars.len() && !chars[index].is_whitespace() {
156 index += 1;
157 }
158 let word: String = chars[start..index].iter().collect();
159 tokens.push(Token::Word(word));
160 }
161
162 tokens
163}
164
165fn is_or_token(token: &Token) -> bool {
166 matches!(token, Token::Word(word) if word == "OR")
167}
168
169fn can_start_or_clause(tokens: &[Token], index: usize) -> bool {
170 match tokens.get(index) {
171 Some(Token::Phrase(_)) => true,
172 Some(Token::Word(word)) => word != "OR" && word != "NOT",
173 None => false,
174 }
175}
176
177fn can_negate_from_current(current: &[TextQuery]) -> bool {
178 match current.last() {
179 Some(TextQuery::Phrase(_)) => true,
180 Some(TextQuery::Term(term)) => term != "OR" && term != "AND" && term != "NOT",
181 _ => false,
182 }
183}
184
185fn parse_atom_or_literal(tokens: &[Token], index: usize, can_negate: bool) -> (TextQuery, usize) {
186 match tokens.get(index) {
187 Some(Token::Phrase(phrase)) => (TextQuery::Phrase(phrase.clone()), index + 1),
188 Some(Token::Word(word)) if word == "NOT" => {
189 if can_negate {
190 match tokens.get(index + 1) {
191 Some(Token::Phrase(phrase)) => (
192 TextQuery::Not(Box::new(TextQuery::Phrase(phrase.clone()))),
193 index + 2,
194 ),
195 Some(Token::Word(next)) if next != "OR" && next != "NOT" => (
196 TextQuery::Not(Box::new(TextQuery::Term(next.clone()))),
197 index + 2,
198 ),
199 _ => (TextQuery::Term("NOT".to_owned()), index + 1),
200 }
201 } else {
202 (TextQuery::Term("NOT".to_owned()), index + 1)
203 }
204 }
205 Some(Token::Word(word)) => (TextQuery::Term(word.clone()), index + 1),
206 None => (TextQuery::Empty, index),
207 }
208}
209
210fn normalize_and(mut nodes: Vec<TextQuery>) -> TextQuery {
211 match nodes.len() {
212 0 => TextQuery::Empty,
213 1 => nodes.pop().unwrap_or(TextQuery::Empty),
214 _ => TextQuery::And(nodes),
215 }
216}
217
218#[cfg(test)]
219mod tests {
251 use super::{TextQuery, render_text_query_fts5};
252
253 #[test]
257 fn parse_empty_query() {
258 assert_eq!(TextQuery::parse(""), TextQuery::Empty);
259 assert_eq!(TextQuery::parse(" "), TextQuery::Empty);
260 assert_eq!(TextQuery::parse("\t\n \t"), TextQuery::Empty);
261 }
262
263 #[test]
264 fn parse_plain_terms_as_implicit_and() {
265 assert_eq!(
266 TextQuery::parse("budget meeting"),
267 TextQuery::And(vec![
268 TextQuery::Term("budget".into()),
269 TextQuery::Term("meeting".into()),
270 ])
271 );
272 }
273
274 #[test]
275 fn parse_phrase() {
276 assert_eq!(
277 TextQuery::parse("\"release notes\""),
278 TextQuery::Phrase("release notes".into())
279 );
280 }
281
282 #[test]
283 fn parse_or_operator() {
284 assert_eq!(
285 TextQuery::parse("ship OR docs"),
286 TextQuery::Or(vec![
287 TextQuery::Term("ship".into()),
288 TextQuery::Term("docs".into()),
289 ])
290 );
291 }
292
293 #[test]
294 fn parse_not_operator() {
295 assert_eq!(
296 TextQuery::parse("ship NOT blocked"),
297 TextQuery::And(vec![
298 TextQuery::Term("ship".into()),
299 TextQuery::Not(Box::new(TextQuery::Term("blocked".into()))),
300 ])
301 );
302 }
303
304 #[test]
308 fn parse_leading_not_as_literal() {
309 assert_eq!(
310 TextQuery::parse("NOT blocked"),
311 TextQuery::And(vec![
312 TextQuery::Term("NOT".into()),
313 TextQuery::Term("blocked".into()),
314 ])
315 );
316 }
317
318 #[test]
323 fn parse_not_after_or_as_literal() {
324 assert_eq!(
325 TextQuery::parse("ship OR NOT blocked"),
326 TextQuery::And(vec![
327 TextQuery::Term("ship".into()),
328 TextQuery::Term("OR".into()),
329 TextQuery::Term("NOT".into()),
330 TextQuery::Term("blocked".into()),
331 ])
332 );
333 }
334
335 #[test]
338 fn parse_lowercase_or_as_literal() {
339 assert_eq!(
340 TextQuery::parse("ship or docs"),
341 TextQuery::And(vec![
342 TextQuery::Term("ship".into()),
343 TextQuery::Term("or".into()),
344 TextQuery::Term("docs".into()),
345 ])
346 );
347 }
348
349 #[test]
352 fn parse_lowercase_not_as_literal() {
353 assert_eq!(
354 TextQuery::parse("not a ship"),
355 TextQuery::And(vec![
356 TextQuery::Term("not".into()),
357 TextQuery::Term("a".into()),
358 TextQuery::Term("ship".into()),
359 ])
360 );
361 }
362
363 #[test]
364 fn parse_trailing_or_as_literal() {
365 assert_eq!(
366 TextQuery::parse("ship OR"),
367 TextQuery::And(vec![
368 TextQuery::Term("ship".into()),
369 TextQuery::Term("OR".into()),
370 ])
371 );
372 }
373
374 #[test]
375 fn parse_apostrophe_as_literal_term() {
376 assert_eq!(
377 TextQuery::parse("User's name"),
378 TextQuery::And(vec![
379 TextQuery::Term("User's".into()),
380 TextQuery::Term("name".into()),
381 ])
382 );
383 }
384
385 #[test]
388 fn parse_unsupported_column_filter_as_literal() {
389 assert_eq!(
390 TextQuery::parse("col:value"),
391 TextQuery::Term("col:value".into())
392 );
393 }
394
395 #[test]
398 fn parse_unsupported_prefix_as_literal() {
399 assert_eq!(
400 TextQuery::parse("prefix*"),
401 TextQuery::Term("prefix*".into())
402 );
403 }
404
405 #[test]
408 fn parse_near_as_literal() {
409 assert_eq!(
410 TextQuery::parse("a NEAR b"),
411 TextQuery::And(vec![
412 TextQuery::Term("a".into()),
413 TextQuery::Term("NEAR".into()),
414 TextQuery::Term("b".into()),
415 ])
416 );
417 }
418
419 #[test]
423 fn parse_explicit_and_as_literal() {
424 assert_eq!(
425 TextQuery::parse("cats AND dogs OR fish"),
426 TextQuery::Or(vec![
427 TextQuery::And(vec![
428 TextQuery::Term("cats".into()),
429 TextQuery::Term("AND".into()),
430 TextQuery::Term("dogs".into()),
431 ]),
432 TextQuery::Term("fish".into()),
433 ])
434 );
435 }
436
437 #[test]
443 fn parse_phrase_with_or_operator() {
444 assert_eq!(
445 TextQuery::parse("\"release notes\" OR changelog"),
446 TextQuery::Or(vec![
447 TextQuery::Phrase("release notes".into()),
448 TextQuery::Term("changelog".into()),
449 ])
450 );
451 assert_eq!(
452 TextQuery::parse("ship OR \"release notes\""),
453 TextQuery::Or(vec![
454 TextQuery::Term("ship".into()),
455 TextQuery::Phrase("release notes".into()),
456 ])
457 );
458 }
459
460 #[test]
465 fn parse_phrase_with_not_operator() {
466 assert_eq!(
467 TextQuery::parse("ship NOT \"release notes\""),
468 TextQuery::And(vec![
469 TextQuery::Term("ship".into()),
470 TextQuery::Not(Box::new(TextQuery::Phrase("release notes".into()))),
471 ])
472 );
473 assert_eq!(
474 render_text_query_fts5(&TextQuery::parse("ship NOT \"release notes\"")),
475 "\"ship\" NOT \"release notes\""
476 );
477 }
478
479 #[test]
480 fn render_term_query() {
481 assert_eq!(
482 render_text_query_fts5(&TextQuery::Term("budget".into())),
483 "\"budget\""
484 );
485 }
486
487 #[test]
488 fn render_phrase_query() {
489 assert_eq!(
490 render_text_query_fts5(&TextQuery::Phrase("release notes".into())),
491 "\"release notes\""
492 );
493 }
494
495 #[test]
496 fn render_or_query() {
497 assert_eq!(
498 render_text_query_fts5(&TextQuery::Or(vec![
499 TextQuery::Term("ship".into()),
500 TextQuery::Term("docs".into()),
501 ])),
502 "\"ship\" OR \"docs\""
503 );
504 }
505
506 #[test]
507 fn render_not_query() {
508 assert_eq!(
509 render_text_query_fts5(&TextQuery::And(vec![
510 TextQuery::Term("ship".into()),
511 TextQuery::Not(Box::new(TextQuery::Term("blocked".into()))),
512 ])),
513 "\"ship\" NOT \"blocked\""
514 );
515 }
516
517 #[test]
518 fn render_escapes_embedded_quotes() {
519 assert_eq!(
520 render_text_query_fts5(&TextQuery::Term("say \"hello\"".into())),
521 "\"say \"\"hello\"\"\""
522 );
523 }
524
525 #[test]
529 fn render_leading_not_literalized_parse_safely() {
530 assert_eq!(
531 render_text_query_fts5(&TextQuery::parse("NOT blocked")),
532 "\"NOT\" \"blocked\""
533 );
534 }
535
536 #[test]
539 fn render_lowercase_not_as_literal_terms() {
540 assert_eq!(
541 render_text_query_fts5(&TextQuery::parse("not a ship")),
542 "\"not\" \"a\" \"ship\""
543 );
544 }
545}