1use std::collections::HashMap;
2
3use aho_corasick::{AhoCorasickBuilder, AhoCorasickKind};
4use regex::{RegexBuilder, RegexSetBuilder};
5
6use crate::parser::{Expression, Match, MatchType, Search};
7use crate::tokeniser::BoolSym;
8
9pub struct Optimisations {
11 pub coalesce: bool,
13 pub matrix: bool,
15 pub rewrite: bool,
17 pub shake: bool,
19}
20
21impl Default for Optimisations {
22 fn default() -> Self {
23 Self {
24 coalesce: true,
25 matrix: true,
26 rewrite: true,
27 shake: true,
28 }
29 }
30}
31
32pub fn coalesce(expression: Expression, identifiers: &HashMap<String, Expression>) -> Expression {
33 match expression {
34 Expression::BooleanGroup(symbol, expressions) => {
35 let mut scratch = vec![];
36 for expression in expressions {
37 scratch.push(coalesce(expression, identifiers));
38 }
39 Expression::BooleanGroup(symbol, scratch)
40 }
41 Expression::BooleanExpression(left, symbol, right) => {
42 let left = coalesce(*left, identifiers);
43 let right = coalesce(*right, identifiers);
44 Expression::BooleanExpression(Box::new(left), symbol, Box::new(right))
45 }
46 Expression::Identifier(i) => identifiers
47 .get(&i)
48 .expect("could not get identifier")
49 .clone(),
50 Expression::Match(symbol, expression) => {
51 Expression::Match(symbol, Box::new(coalesce(*expression, identifiers)))
52 }
53 Expression::Negate(expression) => {
54 Expression::Negate(Box::new(coalesce(*expression, identifiers)))
55 }
56 Expression::Nested(field, expression) => {
57 Expression::Nested(field, Box::new(coalesce(*expression, identifiers)))
58 }
59 Expression::Boolean(_)
60 | Expression::Cast(_, _)
61 | Expression::Field(_)
62 | Expression::Float(_)
63 | Expression::Integer(_)
64 | Expression::Matrix(_, _)
65 | Expression::Null
66 | Expression::Search(_, _, _) => expression,
67 }
68}
69
70pub fn matrix(expression: Expression) -> Expression {
71 match expression {
72 Expression::BooleanGroup(BoolSym::And, expressions) => {
73 let mut scratch = vec![];
74 for expression in expressions {
75 scratch.push(matrix(expression));
76 }
77 Expression::BooleanGroup(BoolSym::And, scratch)
78 }
79 Expression::BooleanGroup(BoolSym::Or, expressions) => {
80 let mut scratch = vec![];
82 for expression in expressions {
83 scratch.push(matrix(expression));
84 }
85
86 let mut fields: HashMap<String, u32> = HashMap::new();
87 for expression in &scratch {
88 match expression {
89 Expression::BooleanGroup(BoolSym::And, ref expressions) => {
90 let mut valid = true;
91 for expression in expressions {
92 match expression {
93 Expression::BooleanExpression(left, _, right) => {
94 match (&**left, &**right) {
95 (Expression::Cast(_, _), Expression::Boolean(_))
96 | (Expression::Cast(_, _), Expression::Float(_))
97 | (Expression::Cast(_, _), Expression::Integer(_))
98 | (Expression::Cast(_, _), Expression::Null)
99 | (Expression::Field(_), Expression::Boolean(_))
100 | (Expression::Field(_), Expression::Float(_))
101 | (Expression::Field(_), Expression::Integer(_))
102 | (Expression::Field(_), Expression::Null) => {}
103 (_, _) => {
104 valid = false;
105 break;
106 }
107 }
108 }
109 Expression::Nested(_, _) | Expression::Search(_, _, _) => {}
110 _ => {
111 valid = false;
112 break;
113 }
114 }
115 }
116 if valid {
117 for expression in expressions {
118 match expression {
119 Expression::BooleanExpression(left, _, right) => {
120 match (&**left, &**right) {
121 (
122 Expression::Cast(field, _),
123 Expression::Boolean(_),
124 )
125 | (Expression::Cast(field, _), Expression::Float(_))
126 | (
127 Expression::Cast(field, _),
128 Expression::Integer(_),
129 )
130 | (Expression::Cast(field, _), Expression::Null)
131 | (Expression::Field(field), Expression::Boolean(_))
132 | (Expression::Field(field), Expression::Float(_))
133 | (Expression::Field(field), Expression::Integer(_))
134 | (Expression::Field(field), Expression::Null) => {
135 let count =
136 fields.entry(field.clone()).or_insert(0);
137 *count += 1;
138 }
139 (_, _) => {}
140 }
141 }
142 Expression::Nested(field, _)
143 | Expression::Search(_, field, _) => {
144 let count = fields.entry(field.clone()).or_insert(0);
145 *count += 1;
146 }
147 _ => {}
148 }
149 }
150 }
151 }
152 Expression::BooleanExpression(left, _, _) => match **left {
153 Expression::Cast(ref field, _) | Expression::Field(ref field) => {
154 let count = fields.entry(field.clone()).or_insert(0);
155 *count += 1;
156 }
157 _ => {}
158 },
159 Expression::Nested(ref field, _) | Expression::Search(_, ref field, _) => {
160 let count = fields.entry(field.clone()).or_insert(0);
161 *count += 1;
162 }
163 _ => {}
164 }
165 }
166
167 let mut matrix = false;
168 for (_, count) in &fields {
169 if *count > 1 && *count < 256 {
170 matrix = true;
171 }
172 }
173
174 if matrix {
175 let mut columns: Vec<(String, u32)> =
176 fields.into_iter().map(|(k, v)| (k, v)).collect();
177 columns.sort_by(|x, y| x.1.cmp(&y.1));
178 let columns: Vec<String> = columns.into_iter().map(|(c, _)| c).collect();
179 let mut rows = vec![];
180 let mut rest = vec![];
181 for expression in scratch {
182 match expression {
183 Expression::BooleanGroup(BoolSym::And, expressions) => {
184 let mut lookup = HashMap::new();
185 let mut valid = true;
186 for expression in &expressions {
187 match expression {
188 Expression::BooleanExpression(left, _, _) => match **left {
189 Expression::Cast(ref field, _)
190 | Expression::Field(ref field) => {
191 if lookup.contains_key(field) {
192 valid = false;
193 break;
194 }
195 lookup.insert(field.clone(), expression.clone());
196 }
197 _ => {}
198 },
199 Expression::Nested(ref field, _)
200 | Expression::Search(_, ref field, _) => {
201 if lookup.contains_key(field) {
202 valid = false;
203 break;
204 }
205 lookup.insert(field.clone(), expression.clone());
206 }
207 _ => {
208 valid = false;
209 break;
210 }
211 }
212 }
213 if valid {
214 let mut row = vec![];
215 for (i, column) in columns.iter().enumerate() {
216 if let Some(expression) = lookup.remove(column) {
217 let key = std::char::from_u32(i as u32)
218 .expect("could not make key");
219 match expression {
220 Expression::BooleanExpression(left, symbol, right) => {
221 match *left {
222 Expression::Cast(_, kind) => {
223 row.push(Some(
224 Expression::BooleanExpression(
225 Box::new(Expression::Cast(
226 key.to_string(),
227 kind.clone(),
228 )),
229 symbol,
230 right.clone(),
231 ),
232 ));
233 }
234 Expression::Field(_) => {
235 row.push(Some(
236 Expression::BooleanExpression(
237 Box::new(Expression::Field(
238 key.to_string(),
239 )),
240 symbol,
241 right.clone(),
242 ),
243 ));
244 }
245 _ => {}
246 }
247 }
248 Expression::Nested(_, expression) => {
249 row.push(Some(Expression::Nested(
250 key.to_string(),
251 expression,
252 )));
253 }
254 Expression::Search(search, _, cast) => {
255 row.push(Some(Expression::Search(
256 search,
257 key.to_string(),
258 cast,
259 )));
260 }
261 _ => {}
262 }
263 } else {
264 row.push(None);
265 }
266 }
267 rows.push(row);
268 } else {
269 rest.push(Expression::BooleanGroup(BoolSym::And, expressions));
270 }
271 }
272 Expression::BooleanExpression(left, symbol, right) => {
273 match (*left, &*right) {
274 (Expression::Cast(field, kind), Expression::Boolean(_))
275 | (Expression::Cast(field, kind), Expression::Float(_))
276 | (Expression::Cast(field, kind), Expression::Integer(_))
277 | (Expression::Cast(field, kind), Expression::Null) => {
278 let mut row = vec![];
279 for (i, column) in columns.iter().enumerate() {
280 if column == &field {
281 let key = std::char::from_u32(i as u32)
282 .expect("could not make key");
283 row.push(Some(Expression::BooleanExpression(
284 Box::new(Expression::Cast(
285 key.to_string(),
286 kind.clone(),
287 )),
288 symbol,
289 right.clone(),
290 )));
291 } else {
292 row.push(None);
293 }
294 }
295 rows.push(row);
296 }
297 (Expression::Field(field), Expression::Boolean(_))
298 | (Expression::Field(field), Expression::Float(_))
299 | (Expression::Field(field), Expression::Integer(_))
300 | (Expression::Field(field), Expression::Null) => {
301 let mut row = vec![];
302 for (i, column) in columns.iter().enumerate() {
303 if column == &field {
304 let key = std::char::from_u32(i as u32)
305 .expect("could not make key");
306 row.push(Some(Expression::BooleanExpression(
307 Box::new(Expression::Field(key.to_string())),
308 symbol,
309 right.clone(),
310 )));
311 } else {
312 row.push(None);
313 }
314 }
315 rows.push(row);
316 }
317 (left, _) => {
318 rest.push(Expression::BooleanExpression(
319 Box::new(left),
320 symbol,
321 right,
322 ));
323 }
324 }
325 }
326 Expression::Nested(field, expression) => {
327 let mut row = vec![];
328 for (i, column) in columns.iter().enumerate() {
329 if column == &field {
330 let key =
331 std::char::from_u32(i as u32).expect("could not make key");
332 row.push(Some(Expression::Nested(
333 key.to_string(),
334 expression.clone(),
335 )));
336 } else {
337 row.push(None);
338 }
339 }
340 rows.push(row);
341 }
342 Expression::Search(search, field, cast) => {
343 let mut row = vec![];
344 for (i, column) in columns.iter().enumerate() {
345 if column == &field {
346 let key =
347 std::char::from_u32(i as u32).expect("could not make key");
348 row.push(Some(Expression::Search(
349 search.clone(),
350 key.to_string(),
351 cast,
352 )));
353 } else {
354 row.push(None);
355 }
356 }
357 rows.push(row);
358 }
359 _ => rest.push(expression),
360 }
361 }
362
363 let mut expressions = vec![];
364 if !rows.is_empty() {
365 expressions.push(Expression::Matrix(columns, rows));
366 }
367 expressions.extend(rest);
368 if expressions.len() == 1 {
369 expressions
370 .into_iter()
371 .next()
372 .expect("could not get expression")
373 } else {
374 Expression::BooleanGroup(BoolSym::Or, expressions)
375 }
376 } else {
377 Expression::BooleanGroup(BoolSym::Or, scratch)
378 }
379 }
380 Expression::BooleanExpression(left, symbol, right) => {
381 let left = matrix(*left);
382 let right = matrix(*right);
383 Expression::BooleanExpression(Box::new(left), symbol, Box::new(right))
384 }
385 Expression::Match(kind, expression) => match *expression {
386 Expression::BooleanGroup(symbol, expressions) => {
387 let mut scratch = vec![];
388 for expression in expressions {
389 scratch.push(shake_1(expression));
390 }
391 Expression::Match(kind, Box::new(Expression::BooleanGroup(symbol, scratch)))
392 }
393 expression => Expression::Match(kind, Box::new(shake_1(expression))),
394 },
395 Expression::Negate(expression) => Expression::Negate(Box::new(matrix(*expression))),
396 Expression::Nested(field, expression) => {
397 Expression::Nested(field, Box::new(matrix(*expression)))
398 }
399 Expression::Boolean(_)
400 | Expression::BooleanGroup(_, _)
401 | Expression::Cast(_, _)
402 | Expression::Field(_)
403 | Expression::Float(_)
404 | Expression::Identifier(_)
405 | Expression::Integer(_)
406 | Expression::Matrix(_, _)
407 | Expression::Null
408 | Expression::Search(_, _, _) => expression,
409 }
410}
411
412fn rewrite_search(search: Search) -> Search {
413 match search {
414 Search::Regex(regex, insensitive) => {
415 let mut pattern = regex.as_str().to_owned();
416 if let Some(tail) = pattern.strip_prefix(".*") {
417 pattern = tail.to_owned();
418 }
419 if let Some(head) = pattern.strip_suffix(".*") {
420 pattern = head.to_owned();
421 }
422 Search::Regex(
423 RegexBuilder::new(&pattern)
424 .case_insensitive(insensitive)
425 .build()
426 .expect("could not build regex"),
427 insensitive,
428 )
429 }
430 Search::RegexSet(regex, insensitive) => {
431 let mut patterns = vec![];
432 for pattern in regex.patterns() {
433 let mut pattern = pattern.to_owned();
434 if let Some(tail) = pattern.strip_prefix(".*") {
435 pattern = tail.to_owned();
436 }
437 if let Some(head) = pattern.strip_suffix(".*") {
438 pattern = head.to_owned();
439 }
440 patterns.push(pattern);
441 }
442 Search::RegexSet(
443 RegexSetBuilder::new(patterns)
444 .case_insensitive(insensitive)
445 .build()
446 .expect("could not build regex"),
447 insensitive,
448 )
449 }
450 _ => search,
451 }
452}
453
454pub fn rewrite(expression: Expression) -> Expression {
455 match expression {
456 Expression::BooleanGroup(symbol, expressions) => {
457 let mut scratch = vec![];
458 for expression in expressions {
459 let rewriten = rewrite(expression);
460 scratch.push(rewriten);
461 }
462 Expression::BooleanGroup(symbol, scratch)
463 }
464 Expression::BooleanExpression(left, symbol, right) => {
465 let left = rewrite(*left);
466 let right = rewrite(*right);
467 Expression::BooleanExpression(Box::new(left), symbol, Box::new(right))
468 }
469 Expression::Match(symbol, expression) => {
470 Expression::Match(symbol, Box::new(rewrite(*expression)))
471 }
472 Expression::Negate(expression) => Expression::Negate(Box::new(rewrite(*expression))),
473 Expression::Nested(field, expression) => {
474 Expression::Nested(field, Box::new(rewrite(*expression)))
475 }
476 Expression::Search(search, f, c) => Expression::Search(rewrite_search(search), f, c),
477 Expression::Boolean(_)
478 | Expression::Cast(_, _)
479 | Expression::Field(_)
480 | Expression::Float(_)
481 | Expression::Identifier(_)
482 | Expression::Integer(_)
483 | Expression::Matrix(_, _)
484 | Expression::Null => expression,
485 }
486}
487
488pub fn shake(expression: Expression) -> Expression {
489 let expression = shake_0(expression);
493 let expression = shake_1(expression);
494 expression
495}
496
497fn shake_0(expression: Expression) -> Expression {
498 match expression {
499 Expression::BooleanGroup(symbol, expressions) => {
500 let length = expressions.len();
501 let expressions = match symbol {
502 BoolSym::And => {
503 let mut scratch = vec![];
506 for expression in expressions {
507 let shaken = shake_0(expression);
508 scratch.push(shaken);
509 }
510 scratch
511 }
512 BoolSym::Or => {
513 let mut scratch = vec![];
514 for expression in expressions {
515 let shaken = shake_0(expression);
516 scratch.push(shaken);
517 }
518 scratch
519 }
520 _ => unreachable!(),
521 };
522 if expressions.len() != length {
523 shake_0(Expression::BooleanGroup(symbol, expressions))
524 } else if expressions.len() == 1 {
525 expressions
526 .into_iter()
527 .next()
528 .expect("could not get expression")
529 } else {
530 Expression::BooleanGroup(symbol, expressions)
531 }
532 }
533 Expression::BooleanExpression(left, symbol, right) => {
534 let left = shake_0(*left);
535 let right = shake_0(*right);
536 match (left, symbol, right) {
537 (
538 Expression::BooleanGroup(BoolSym::And, mut left),
539 BoolSym::And,
540 Expression::BooleanGroup(BoolSym::And, right),
541 ) => {
542 left.extend(right);
543 shake_0(Expression::BooleanGroup(BoolSym::And, left))
544 }
545 (Expression::BooleanGroup(BoolSym::And, mut left), BoolSym::And, right) => {
546 left.push(right);
547 shake_0(Expression::BooleanGroup(BoolSym::And, left))
548 }
549 (left, BoolSym::And, Expression::BooleanGroup(BoolSym::And, right)) => {
550 let mut left = vec![left];
551 left.extend(right);
552 shake_0(Expression::BooleanGroup(BoolSym::And, left))
553 }
554 (
555 Expression::BooleanGroup(BoolSym::Or, mut left),
556 BoolSym::Or,
557 Expression::BooleanGroup(BoolSym::Or, right),
558 ) => {
559 left.extend(right);
560 shake_0(Expression::BooleanGroup(BoolSym::Or, left))
561 }
562 (Expression::BooleanGroup(BoolSym::Or, mut left), BoolSym::Or, right) => {
563 left.push(right);
564 shake_0(Expression::BooleanGroup(BoolSym::Or, left))
565 }
566 (left, BoolSym::Or, Expression::BooleanGroup(BoolSym::Or, right)) => {
567 let mut left = vec![left];
568 left.extend(right);
569 shake_0(Expression::BooleanGroup(BoolSym::Or, left))
570 }
571 (Expression::BooleanExpression(x, BoolSym::And, y), BoolSym::And, z) => {
572 shake_0(Expression::BooleanGroup(BoolSym::And, vec![*x, *y, z]))
573 }
574 (x, BoolSym::And, Expression::BooleanExpression(y, BoolSym::And, z)) => {
575 shake_0(Expression::BooleanGroup(BoolSym::And, vec![x, *y, *z]))
576 }
577 (Expression::BooleanExpression(x, BoolSym::Or, y), BoolSym::Or, z) => {
578 shake_0(Expression::BooleanGroup(BoolSym::Or, vec![*x, *y, z]))
579 }
580 (x, BoolSym::Or, Expression::BooleanExpression(y, BoolSym::Or, z)) => {
581 shake_0(Expression::BooleanGroup(BoolSym::Or, vec![x, *y, *z]))
582 }
583 (left, _, right) => {
592 Expression::BooleanExpression(Box::new(left), symbol, Box::new(right))
593 }
594 }
595 }
596 Expression::Match(m, expression) => {
597 let expression = shake_0(*expression);
598 Expression::Match(m, Box::new(expression))
599 }
600 Expression::Negate(expression) => {
601 let expression = shake_0(*expression);
602 match expression {
603 Expression::Negate(inner) => shake_0(*inner),
604 _ => Expression::Negate(Box::new(expression)),
605 }
606 }
607 Expression::Nested(field, expression) => {
608 Expression::Nested(field, Box::new(shake_0(*expression)))
609 }
610 Expression::Boolean(_)
611 | Expression::Cast(_, _)
612 | Expression::Field(_)
613 | Expression::Float(_)
614 | Expression::Identifier(_)
615 | Expression::Integer(_)
616 | Expression::Matrix(_, _)
617 | Expression::Null
618 | Expression::Search(_, _, _) => expression,
619 }
620}
621
622fn shake_1(expression: Expression) -> Expression {
623 match expression {
624 Expression::BooleanGroup(BoolSym::And, expressions) => {
628 let length = expressions.len();
629
630 let mut nested = HashMap::new();
631
632 let mut scratch = vec![];
633
634 for expression in expressions {
635 let shaken = shake_1(expression);
636 match shaken {
637 Expression::Nested(field, expression) => {
638 let expressions = nested.entry(field).or_insert(vec![]);
639 (*expressions).push(*expression);
640 }
641 shaken => scratch.push(shaken),
642 };
643 }
644 for (field, expressions) in nested {
645 let shaken = if expressions.len() == 1 {
646 shake_1(
647 expressions
648 .into_iter()
649 .next()
650 .expect("could not get expression"),
651 )
652 } else {
653 shake_1(Expression::Match(
654 Match::All,
655 Box::new(Expression::BooleanGroup(BoolSym::Or, expressions)),
656 ))
657 };
658 scratch.push(Expression::Nested(field, Box::new(shaken)));
659 }
660 if scratch.len() != length {
661 shake_1(Expression::BooleanGroup(BoolSym::And, scratch))
662 } else if scratch.len() == 1 {
663 scratch
664 .into_iter()
665 .next()
666 .expect("could not get expression")
667 } else {
668 Expression::BooleanGroup(BoolSym::And, scratch)
669 }
670 }
671 Expression::BooleanGroup(BoolSym::Or, expressions) => {
672 let length = expressions.len();
673 let expressions = {
674 let mut needles = HashMap::new();
675 let mut nested = HashMap::new();
676 let mut patterns = HashMap::new();
677
678 let mut any = vec![];
681 let mut exact = vec![];
682 let mut starts_with = vec![];
683 let mut ends_with = vec![];
684 let mut contains = vec![];
685 let mut aho = vec![];
686 let mut regex = vec![];
687 let mut regex_set = vec![];
688 let mut rest = vec![];
689
690 for expression in expressions {
691 let shaken = shake_1(expression);
692
693 match shaken {
694 Expression::Nested(field, expression) => {
695 let expressions = nested.entry(field).or_insert(vec![]);
696 (*expressions).push(*expression);
697 }
698 Expression::Search(
699 Search::AhoCorasick(_, contexts, insensitive),
700 field,
701 cast,
702 ) => {
703 let expressions =
704 needles.entry((field, cast, insensitive)).or_insert(vec![]);
705 for context in contexts {
706 let value = context.value().to_owned();
707 (*expressions).push((context.clone(), value));
708 }
709 }
710 Expression::Search(Search::Contains(value), field, cast) => {
711 let expressions = needles.entry((field, cast, false)).or_insert(vec![]);
712 (*expressions).push((MatchType::Contains(value.clone()), value));
713 }
714 Expression::Search(Search::EndsWith(value), field, cast) => {
715 let expressions = needles.entry((field, cast, false)).or_insert(vec![]);
716 (*expressions).push((MatchType::EndsWith(value.clone()), value));
717 }
718 Expression::Search(Search::Exact(value), field, cast) => {
719 let expressions = needles.entry((field, cast, false)).or_insert(vec![]);
720 (*expressions).push((MatchType::Exact(value.clone()), value));
721 }
722 Expression::Search(Search::StartsWith(value), field, cast) => {
723 let expressions = needles.entry((field, cast, false)).or_insert(vec![]);
724 (*expressions).push((MatchType::StartsWith(value.clone()), value));
725 }
726 Expression::Search(Search::Any, _, _) => {
727 any.push(shaken);
728 }
729 Expression::Search(Search::Regex(r, insensitive), field, cast) => {
730 let patterns =
731 patterns.entry((field, cast, insensitive)).or_insert(vec![]);
732 (*patterns).push(r.as_str().to_owned());
733 }
734 Expression::Search(Search::RegexSet(r, insensitive), field, cast) => {
735 let patterns =
736 patterns.entry((field, cast, insensitive)).or_insert(vec![]);
737 for pattern in r.patterns() {
738 (*patterns).push(pattern.to_owned());
739 }
740 }
741 _ => rest.push(shaken),
742 }
743 }
744
745 for ((field, cast, insensitive), searches) in needles {
746 if !insensitive && searches.len() == 1 {
747 let search = searches.into_iter().next().expect("could not get search");
748 match search.0 {
749 MatchType::Contains(v) => {
750 contains.push(Expression::Search(Search::Contains(v), field, cast));
751 }
752 MatchType::EndsWith(v) => {
753 ends_with.push(Expression::Search(
754 Search::EndsWith(v),
755 field,
756 cast,
757 ));
758 }
759 MatchType::Exact(v) => {
760 exact.push(Expression::Search(Search::Exact(v), field, cast));
761 }
762 MatchType::StartsWith(v) => {
763 starts_with.push(Expression::Search(
764 Search::StartsWith(v),
765 field,
766 cast,
767 ));
768 }
769 };
770 } else {
771 let (context, needles): (Vec<_>, Vec<_>) = searches.into_iter().unzip();
772 let expression = Expression::Search(
773 Search::AhoCorasick(
774 Box::new(
775 AhoCorasickBuilder::new()
776 .ascii_case_insensitive(insensitive)
777 .kind(Some(AhoCorasickKind::DFA))
778 .build(needles)
779 .expect("failed to build dfa"),
780 ),
781 context,
782 insensitive,
783 ),
784 field,
785 cast,
786 );
787 aho.push(expression);
788 };
789 }
790
791 for (field, expressions) in nested {
792 let shaken = if expressions.len() == 1 {
793 shake_1(
794 expressions
795 .into_iter()
796 .next()
797 .expect("could not get expression"),
798 )
799 } else {
800 shake_1(Expression::BooleanGroup(BoolSym::Or, expressions))
801 };
802 rest.push(Expression::Nested(field, Box::new(shaken)));
803 }
804
805 for ((field, cast, insensitive), patterns) in patterns {
806 if patterns.len() == 1 {
807 let pattern = patterns.into_iter().next().expect("could not get pattern");
808 let expression = Expression::Search(
809 Search::Regex(
810 RegexBuilder::new(&pattern)
811 .case_insensitive(insensitive)
812 .build()
813 .expect("could not build regex"),
814 insensitive,
815 ),
816 field,
817 cast,
818 );
819 regex.push(expression);
820 } else {
821 let expression = Expression::Search(
822 Search::RegexSet(
823 RegexSetBuilder::new(patterns)
824 .case_insensitive(insensitive)
825 .build()
826 .expect("could not build regex set"),
827 insensitive,
828 ),
829 field,
830 cast,
831 );
832 regex_set.push(expression);
833 }
834 }
835
836 let mut scratch = vec![];
837 scratch.extend(any);
838 exact.sort_by(|x, y| match (x, y) {
839 (
840 Expression::Search(Search::Exact(a), _, _),
841 Expression::Search(Search::Exact(b), _, _),
842 ) => a.len().cmp(&b.len()),
843 _ => std::cmp::Ordering::Equal,
844 });
845 scratch.extend(exact);
846 starts_with.sort_by(|x, y| match (x, y) {
847 (
848 Expression::Search(Search::StartsWith(a), _, _),
849 Expression::Search(Search::StartsWith(b), _, _),
850 ) => a.len().cmp(&b.len()),
851 _ => std::cmp::Ordering::Equal,
852 });
853 scratch.extend(starts_with);
854 ends_with.sort_by(|x, y| match (x, y) {
855 (
856 Expression::Search(Search::EndsWith(a), _, _),
857 Expression::Search(Search::EndsWith(b), _, _),
858 ) => a.len().cmp(&b.len()),
859 _ => std::cmp::Ordering::Equal,
860 });
861 scratch.extend(ends_with);
862 contains.sort_by(|x, y| match (x, y) {
863 (
864 Expression::Search(Search::Contains(a), _, _),
865 Expression::Search(Search::Contains(b), _, _),
866 ) => a.len().cmp(&b.len()),
867 _ => std::cmp::Ordering::Equal,
868 });
869 scratch.extend(contains);
870 aho.sort_by(|x, y| match (x, y) {
871 (
872 Expression::Search(Search::AhoCorasick(_, a, case0), _, _),
873 Expression::Search(Search::AhoCorasick(_, b, case1), _, _),
874 ) => (b.len(), case1).cmp(&(a.len(), case0)),
875 _ => std::cmp::Ordering::Equal,
876 });
877 scratch.extend(aho);
878 regex.sort_by(|x, y| match (x, y) {
879 (
880 Expression::Search(Search::Regex(reg0, case0), _, _),
881 Expression::Search(Search::Regex(reg1, case1), _, _),
882 ) => (reg0.as_str(), case0).cmp(&(reg1.as_str(), case1)),
883 _ => std::cmp::Ordering::Equal,
884 });
885 scratch.extend(regex);
886 regex_set.sort_by(|x, y| match (x, y) {
887 (
888 Expression::Search(Search::RegexSet(set0, case0), _, _),
889 Expression::Search(Search::RegexSet(set1, case1), _, _),
890 ) => (set0.patterns(), case0).cmp(&(set1.patterns(), case1)),
891 _ => std::cmp::Ordering::Equal,
892 });
893 scratch.extend(regex_set);
894 scratch.extend(rest);
895 scratch
896 };
897 if expressions.len() != length {
898 shake_1(Expression::BooleanGroup(BoolSym::Or, expressions))
899 } else if expressions.len() == 1 {
900 expressions
901 .into_iter()
902 .next()
903 .expect("could not get expression")
904 } else {
905 Expression::BooleanGroup(BoolSym::Or, expressions)
906 }
907 }
908 Expression::BooleanGroup(symbol, expressions) => {
909 let mut scratch = vec![];
910 for expression in expressions {
911 scratch.push(shake_1(expression));
912 }
913 Expression::BooleanGroup(symbol, scratch)
914 }
915 Expression::BooleanExpression(left, symbol, right) => {
916 let left = shake_1(*left);
917 let right = shake_1(*right);
918 Expression::BooleanExpression(Box::new(left), symbol, Box::new(right))
919 }
920 Expression::Match(kind, expression) => match *expression {
921 Expression::BooleanGroup(symbol, expressions) => {
922 let mut scratch = vec![];
923 for expression in expressions {
924 scratch.push(shake_1(expression));
925 }
926 Expression::Match(kind, Box::new(Expression::BooleanGroup(symbol, scratch)))
927 }
928 expression => Expression::Match(kind, Box::new(shake_1(expression))),
929 },
930 Expression::Negate(expression) => Expression::Negate(Box::new(shake_1(*expression))),
931 Expression::Nested(field, expression) => {
932 Expression::Nested(field, Box::new(shake_1(*expression)))
933 }
934 Expression::Boolean(_)
935 | Expression::Cast(_, _)
936 | Expression::Field(_)
937 | Expression::Float(_)
938 | Expression::Identifier(_)
939 | Expression::Integer(_)
940 | Expression::Matrix(_, _)
941 | Expression::Null
942 | Expression::Search(_, _, _) => expression,
943 }
944}
945
946#[cfg(test)]
947mod tests {
948 use super::*;
949
950 #[test]
951 fn coalesce_basic() {
952 let mut identifiers = HashMap::new();
953 identifiers.insert(
954 "A".to_owned(),
955 Expression::BooleanExpression(
956 Box::new(Expression::Field("count".to_owned())),
957 BoolSym::Equal,
958 Box::new(Expression::Integer(1)),
959 ),
960 );
961 let expression = Expression::Identifier("A".to_owned());
962
963 let coalesced = coalesce(expression, &identifiers);
964
965 let expected = Expression::BooleanExpression(
966 Box::new(Expression::Field("count".to_owned())),
967 BoolSym::Equal,
968 Box::new(Expression::Integer(1)),
969 );
970
971 assert_eq!(coalesced, expected);
972 }
973
974 #[test]
994 fn shake_ands() {
995 let expression = Expression::BooleanExpression(
996 Box::new(Expression::Null),
997 BoolSym::And,
998 Box::new(Expression::Null),
999 );
1000 let shaken = shake(expression);
1001
1002 let expected = Expression::BooleanExpression(
1003 Box::new(Expression::Null),
1004 BoolSym::And,
1005 Box::new(Expression::Null),
1006 );
1007
1008 assert_eq!(shaken, expected);
1009
1010 let expression = Expression::BooleanExpression(
1011 Box::new(Expression::Null),
1012 BoolSym::And,
1013 Box::new(Expression::BooleanExpression(
1014 Box::new(Expression::Null),
1015 BoolSym::And,
1016 Box::new(Expression::Null),
1017 )),
1018 );
1019 let shaken = shake(expression);
1020
1021 let expected = Expression::BooleanGroup(
1022 BoolSym::And,
1023 vec![Expression::Null, Expression::Null, Expression::Null],
1024 );
1025
1026 assert_eq!(shaken, expected);
1027 }
1028
1029 #[test]
1030 fn shake_ors() {
1031 let expression = Expression::BooleanExpression(
1032 Box::new(Expression::Null),
1033 BoolSym::Or,
1034 Box::new(Expression::Null),
1035 );
1036 let shaken = shake(expression);
1037
1038 let expected = Expression::BooleanExpression(
1039 Box::new(Expression::Null),
1040 BoolSym::Or,
1041 Box::new(Expression::Null),
1042 );
1043
1044 assert_eq!(shaken, expected);
1045
1046 let expression = Expression::BooleanExpression(
1047 Box::new(Expression::Null),
1048 BoolSym::Or,
1049 Box::new(Expression::BooleanExpression(
1050 Box::new(Expression::Null),
1051 BoolSym::Or,
1052 Box::new(Expression::Null),
1053 )),
1054 );
1055 let shaken = shake(expression);
1056
1057 let expected = Expression::BooleanGroup(
1058 BoolSym::Or,
1059 vec![Expression::Null, Expression::Null, Expression::Null],
1060 );
1061
1062 assert_eq!(shaken, expected);
1063 }
1064
1065 #[test]
1066 fn shake_group_of_nested() {
1067 let expression = Expression::BooleanGroup(
1068 BoolSym::Or,
1069 vec![
1070 Expression::Nested(
1071 "ids".to_owned(),
1072 Box::new(Expression::Search(
1073 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bca".to_owned()),
1074 "id".to_owned(),
1075 false,
1076 )),
1077 ),
1078 Expression::Nested(
1079 "ids".to_owned(),
1080 Box::new(Expression::Search(
1081 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcb".to_owned()),
1082 "id".to_owned(),
1083 false,
1084 )),
1085 ),
1086 Expression::Nested(
1087 "ids".to_owned(),
1088 Box::new(Expression::Search(
1089 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcc".to_owned()),
1090 "id".to_owned(),
1091 false,
1092 )),
1093 ),
1094 ],
1095 );
1096 let shaken = shake(expression);
1097
1098 let expected = Expression::Nested(
1099 "ids".to_owned(),
1100 Box::new(Expression::Search(
1101 Search::AhoCorasick(
1102 Box::new(
1103 AhoCorasickBuilder::new()
1104 .kind(Some(AhoCorasickKind::DFA))
1105 .build(vec![
1106 "e2ec14cb-299e-4adf-bb09-04a6a8417bca",
1107 "e2ec14cb-299e-4adf-bb09-04a6a8417bcb",
1108 "e2ec14cb-299e-4adf-bb09-04a6a8417bcc",
1109 ])
1110 .expect("failed to build dfa"),
1111 ),
1112 vec![
1113 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bca".to_owned()),
1114 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcb".to_owned()),
1115 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcc".to_owned()),
1116 ],
1117 false,
1118 ),
1119 "id".to_owned(),
1120 false,
1121 )),
1122 );
1123
1124 assert_eq!(shaken, expected);
1125 }
1126
1127 #[test]
1128 fn shake_group_or() {
1129 let expression = Expression::BooleanGroup(
1131 BoolSym::Or,
1132 vec![
1133 Expression::Search(
1134 Search::AhoCorasick(
1135 Box::new(
1136 AhoCorasickBuilder::new()
1137 .kind(Some(AhoCorasickKind::DFA))
1138 .ascii_case_insensitive(false)
1139 .build(vec![
1140 "Quick".to_owned(),
1141 "Brown".to_owned(),
1142 "Fox".to_owned(),
1143 ])
1144 .expect("failed to build dfa"),
1145 ),
1146 vec![
1147 MatchType::Contains("Quick".to_owned()),
1148 MatchType::Exact("Brown".to_owned()),
1149 MatchType::EndsWith("Fox".to_owned()),
1150 ],
1151 false,
1152 ),
1153 "name".to_owned(),
1154 false,
1155 ),
1156 Expression::Search(
1157 Search::AhoCorasick(
1158 Box::new(
1159 AhoCorasickBuilder::new()
1160 .kind(Some(AhoCorasickKind::DFA))
1161 .ascii_case_insensitive(true)
1162 .build(vec![
1163 "quick".to_owned(),
1164 "brown".to_owned(),
1165 "fox".to_owned(),
1166 ])
1167 .expect("failed to build dfa"),
1168 ),
1169 vec![
1170 MatchType::Contains("quick".to_owned()),
1171 MatchType::Exact("brown".to_owned()),
1172 MatchType::EndsWith("fox".to_owned()),
1173 ],
1174 true,
1175 ),
1176 "name".to_owned(),
1177 false,
1178 ),
1179 Expression::Search(Search::Any, "name".to_owned(), false),
1180 Expression::Search(Search::Contains("afoo".to_owned()), "a".to_owned(), false),
1181 Expression::Search(Search::Contains("foo".to_owned()), "name".to_owned(), false),
1182 Expression::Search(Search::EndsWith("bbar".to_owned()), "b".to_owned(), false),
1183 Expression::Search(Search::EndsWith("bar".to_owned()), "name".to_owned(), false),
1184 Expression::Search(Search::Exact("cbaz".to_owned()), "c".to_owned(), false),
1185 Expression::Search(Search::Exact("baz".to_owned()), "name".to_owned(), false),
1186 Expression::Search(
1187 Search::Regex(
1188 RegexBuilder::new("foo")
1189 .case_insensitive(false)
1190 .build()
1191 .unwrap(),
1192 false,
1193 ),
1194 "name".to_owned(),
1195 false,
1196 ),
1197 Expression::Search(
1198 Search::Regex(
1199 RegexBuilder::new("bar")
1200 .case_insensitive(true)
1201 .build()
1202 .unwrap(),
1203 true,
1204 ),
1205 "name".to_owned(),
1206 false,
1207 ),
1208 Expression::Search(
1209 Search::RegexSet(
1210 RegexSetBuilder::new(vec!["lorem"])
1211 .case_insensitive(false)
1212 .build()
1213 .unwrap(),
1214 false,
1215 ),
1216 "name".to_owned(),
1217 false,
1218 ),
1219 Expression::Search(
1220 Search::RegexSet(
1221 RegexSetBuilder::new(vec!["ipsum"])
1222 .case_insensitive(true)
1223 .build()
1224 .unwrap(),
1225 true,
1226 ),
1227 "name".to_owned(),
1228 false,
1229 ),
1230 Expression::Search(
1231 Search::StartsWith("dfoobar".to_owned()),
1232 "d".to_owned(),
1233 false,
1234 ),
1235 Expression::Search(
1236 Search::StartsWith("foobar".to_owned()),
1237 "name".to_owned(),
1238 false,
1239 ),
1240 ],
1241 );
1242 let shaken = shake(expression);
1243
1244 let expected = Expression::BooleanGroup(
1245 BoolSym::Or,
1246 vec![
1247 Expression::Search(Search::Any, "name".to_owned(), false),
1248 Expression::Search(Search::Exact("cbaz".to_owned()), "c".to_owned(), false),
1249 Expression::Search(
1250 Search::StartsWith("dfoobar".to_owned()),
1251 "d".to_owned(),
1252 false,
1253 ),
1254 Expression::Search(Search::EndsWith("bbar".to_owned()), "b".to_owned(), false),
1255 Expression::Search(Search::Contains("afoo".to_owned()), "a".to_owned(), false),
1256 Expression::Search(
1257 Search::AhoCorasick(
1258 Box::new(
1259 AhoCorasickBuilder::new()
1260 .kind(Some(AhoCorasickKind::DFA))
1261 .ascii_case_insensitive(false)
1262 .build(vec![
1263 "Quick".to_owned(),
1264 "Brown".to_owned(),
1265 "Fox".to_owned(),
1266 "foo".to_owned(),
1267 "bar".to_owned(),
1268 "baz".to_owned(),
1269 "foobar".to_owned(),
1270 ])
1271 .expect("failed to build dfa"),
1272 ),
1273 vec![
1274 MatchType::Contains("Quick".to_owned()),
1275 MatchType::Exact("Brown".to_owned()),
1276 MatchType::EndsWith("Fox".to_owned()),
1277 MatchType::Contains("foo".to_owned()),
1278 MatchType::EndsWith("bar".to_owned()),
1279 MatchType::Exact("baz".to_owned()),
1280 MatchType::StartsWith("foobar".to_owned()),
1281 ],
1282 false,
1283 ),
1284 "name".to_owned(),
1285 false,
1286 ),
1287 Expression::Search(
1288 Search::AhoCorasick(
1289 Box::new(
1290 AhoCorasickBuilder::new()
1291 .kind(Some(AhoCorasickKind::DFA))
1292 .ascii_case_insensitive(true)
1293 .build(vec![
1294 "quick".to_owned(),
1295 "brown".to_owned(),
1296 "fox".to_owned(),
1297 ])
1298 .expect("failed to build dfa"),
1299 ),
1300 vec![
1301 MatchType::Contains("quick".to_owned()),
1302 MatchType::Exact("brown".to_owned()),
1303 MatchType::EndsWith("fox".to_owned()),
1304 ],
1305 true,
1306 ),
1307 "name".to_owned(),
1308 false,
1309 ),
1310 Expression::Search(
1311 Search::RegexSet(
1312 RegexSetBuilder::new(vec!["bar", "ipsum"])
1313 .case_insensitive(true)
1314 .build()
1315 .unwrap(),
1316 true,
1317 ),
1318 "name".to_owned(),
1319 false,
1320 ),
1321 Expression::Search(
1322 Search::RegexSet(
1323 RegexSetBuilder::new(vec!["foo", "lorem"])
1324 .case_insensitive(false)
1325 .build()
1326 .unwrap(),
1327 false,
1328 ),
1329 "name".to_owned(),
1330 false,
1331 ),
1332 ],
1333 );
1334
1335 assert_eq!(shaken, expected);
1336 }
1337
1338 #[test]
1339 fn shake_group_or_1() {
1340 let expression = Expression::BooleanGroup(BoolSym::Or, vec![Expression::Null]);
1342 let shaken = shake(expression);
1343
1344 let expected = Expression::Null;
1345
1346 assert_eq!(shaken, expected);
1347 }
1348
1349 #[test]
1350 fn shake_nested() {
1351 let expression = Expression::Nested(
1352 "ids".to_owned(),
1353 Box::new(Expression::BooleanGroup(
1354 BoolSym::Or,
1355 vec![
1356 Expression::Search(
1357 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bca".to_owned()),
1358 "id".to_owned(),
1359 false,
1360 ),
1361 Expression::Search(
1362 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcb".to_owned()),
1363 "id".to_owned(),
1364 false,
1365 ),
1366 Expression::Search(
1367 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcc".to_owned()),
1368 "id".to_owned(),
1369 false,
1370 ),
1371 ],
1372 )),
1373 );
1374 let shaken = shake(expression);
1375
1376 let expected = Expression::Nested(
1377 "ids".to_owned(),
1378 Box::new(Expression::Search(
1379 Search::AhoCorasick(
1380 Box::new(
1381 AhoCorasickBuilder::new()
1382 .kind(Some(AhoCorasickKind::DFA))
1383 .build(vec![
1384 "e2ec14cb-299e-4adf-bb09-04a6a8417bca",
1385 "e2ec14cb-299e-4adf-bb09-04a6a8417bcb",
1386 "e2ec14cb-299e-4adf-bb09-04a6a8417bcc",
1387 ])
1388 .expect("failed to build dfa"),
1389 ),
1390 vec![
1391 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bca".to_owned()),
1392 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcb".to_owned()),
1393 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcc".to_owned()),
1394 ],
1395 false,
1396 ),
1397 "id".to_owned(),
1398 false,
1399 )),
1400 );
1401
1402 assert_eq!(shaken, expected);
1403 }
1404
1405 #[test]
1406 fn shake_match() {
1407 let expression = Expression::Match(
1408 Match::All,
1409 Box::new(Expression::BooleanGroup(
1410 BoolSym::Or,
1411 vec![Expression::Null, Expression::Null],
1412 )),
1413 );
1414 let shaken = shake(expression);
1415
1416 let expected = Expression::Match(
1417 Match::All,
1418 Box::new(Expression::BooleanGroup(
1419 BoolSym::Or,
1420 vec![Expression::Null, Expression::Null],
1421 )),
1422 );
1423
1424 assert_eq!(shaken, expected);
1425 }
1426
1427 #[test]
1428 fn shake_negate() {
1429 let expression =
1430 Expression::Negate(Box::new(Expression::Negate(Box::new(Expression::Null))));
1431 let shaken = shake(expression);
1432
1433 let expected = Expression::Null;
1434
1435 assert_eq!(shaken, expected);
1436 }
1437
1438 #[test]
1439 fn rewrite_regex() {
1440 let expression = Expression::Search(
1441 Search::Regex(RegexBuilder::new(".*foo.*").build().unwrap(), false),
1442 "name".to_owned(),
1443 false,
1444 );
1445 let rewriten = rewrite(expression);
1446
1447 let expected = Expression::Search(
1448 Search::Regex(RegexBuilder::new("foo").build().unwrap(), false),
1449 "name".to_owned(),
1450 false,
1451 );
1452
1453 assert_eq!(rewriten, expected);
1454
1455 let expression = Expression::Search(
1456 Search::RegexSet(
1457 RegexSetBuilder::new(vec![".*foo.*"]).build().unwrap(),
1458 false,
1459 ),
1460 "name".to_owned(),
1461 false,
1462 );
1463 let rewriten = rewrite(expression);
1464
1465 let expected = Expression::Search(
1466 Search::RegexSet(RegexSetBuilder::new(vec!["foo"]).build().unwrap(), false),
1467 "name".to_owned(),
1468 false,
1469 );
1470
1471 assert_eq!(rewriten, expected);
1472 }
1473
1474 #[test]
1475 fn rewrite_rule_0() {
1476 let expression = Expression::BooleanExpression(
1477 Box::new(Expression::BooleanExpression(
1478 Box::new(Expression::Match(
1479 Match::All,
1480 Box::new(Expression::Search(
1481 Search::Exact("a".to_owned()),
1482 "a".to_owned(),
1483 false,
1484 )),
1485 )),
1486 BoolSym::Or,
1487 Box::new(Expression::Null),
1488 )),
1489 BoolSym::And,
1490 Box::new(Expression::Negate(Box::new(Expression::Null))),
1491 );
1492 let shaken = shake(expression);
1493
1494 let expected = Expression::BooleanExpression(
1495 Box::new(Expression::BooleanExpression(
1496 Box::new(Expression::Match(
1497 Match::All,
1498 Box::new(Expression::Search(
1499 Search::Exact("a".to_owned()),
1500 "a".to_owned(),
1501 false,
1502 )),
1503 )),
1504 BoolSym::Or,
1505 Box::new(Expression::Null),
1506 )),
1507 BoolSym::And,
1508 Box::new(Expression::Negate(Box::new(Expression::Null))),
1509 );
1510
1511 assert_eq!(shaken, expected);
1512 }
1513
1514 #[test]
1515 fn rewrite_rule_edge_0() {
1516 let expression = Expression::Negate(Box::new(Expression::BooleanGroup(
1518 BoolSym::Or,
1519 vec![Expression::BooleanGroup(
1520 BoolSym::And,
1521 vec![
1522 Expression::Search(Search::Exact("a".to_owned()), "a".to_owned(), false),
1523 Expression::Search(Search::Exact("b".to_owned()), "b".to_owned(), false),
1524 ],
1525 )],
1526 )));
1527 let shaken = shake(expression);
1528
1529 let expected = Expression::Negate(Box::new(Expression::BooleanGroup(
1530 BoolSym::And,
1531 vec![
1532 Expression::Search(Search::Exact("a".to_owned()), "a".to_owned(), false),
1533 Expression::Search(Search::Exact("b".to_owned()), "b".to_owned(), false),
1534 ],
1535 )));
1536
1537 assert_eq!(shaken, expected);
1538 }
1539}