1use std::collections::HashMap;
2
3use aho_corasick::{AhoCorasickBuilder, AhoCorasickKind};
4use regex::{RegexBuilder, RegexSetBuilder};
5
6use crate::parser::{Expression, Match, MatchType, Search};
7use crate::tokeniser::BoolSym;
8
9pub struct Optimisations {
11 pub coalesce: bool,
13 pub matrix: bool,
15 pub rewrite: bool,
17 pub shake: bool,
19}
20
21impl Default for Optimisations {
22 fn default() -> Self {
23 Self {
24 coalesce: true,
25 matrix: true,
26 rewrite: true,
27 shake: true,
28 }
29 }
30}
31
32pub fn coalesce(expression: Expression, identifiers: &HashMap<String, Expression>) -> Expression {
33 match expression {
34 Expression::BooleanGroup(symbol, expressions) => {
35 let mut scratch = vec![];
36 for expression in expressions {
37 scratch.push(coalesce(expression, identifiers));
38 }
39 Expression::BooleanGroup(symbol, scratch)
40 }
41 Expression::BooleanExpression(left, symbol, right) => {
42 let left = coalesce(*left, identifiers);
43 let right = coalesce(*right, identifiers);
44 Expression::BooleanExpression(Box::new(left), symbol, Box::new(right))
45 }
46 Expression::Identifier(i) => identifiers
47 .get(&i)
48 .expect("could not get identifier")
49 .clone(),
50 Expression::Match(symbol, expression) => {
51 Expression::Match(symbol, Box::new(coalesce(*expression, identifiers)))
52 }
53 Expression::Negate(expression) => {
54 Expression::Negate(Box::new(coalesce(*expression, identifiers)))
55 }
56 Expression::Nested(field, expression) => {
57 Expression::Nested(field, Box::new(coalesce(*expression, identifiers)))
58 }
59 Expression::Boolean(_)
60 | Expression::Cast(_, _)
61 | Expression::Field(_)
62 | Expression::Float(_)
63 | Expression::Integer(_)
64 | Expression::Matrix(_, _)
65 | Expression::Null
66 | Expression::Search(_, _, _) => expression,
67 }
68}
69
70pub fn matrix(expression: Expression) -> Expression {
71 match expression {
72 Expression::BooleanGroup(BoolSym::And, expressions) => {
73 let mut scratch = vec![];
74 for expression in expressions {
75 scratch.push(matrix(expression));
76 }
77 Expression::BooleanGroup(BoolSym::And, scratch)
78 }
79 Expression::BooleanGroup(BoolSym::Or, expressions) => {
80 let mut scratch = vec![];
82 for expression in expressions {
83 scratch.push(matrix(expression));
84 }
85
86 let mut fields: HashMap<String, u32> = HashMap::new();
87 for expression in &scratch {
88 match expression {
89 Expression::BooleanGroup(BoolSym::And, expressions) => {
90 let mut valid = true;
91 for expression in expressions {
92 match expression {
93 Expression::BooleanExpression(left, _, right) => {
94 match (&**left, &**right) {
95 (Expression::Cast(_, _), Expression::Boolean(_))
96 | (Expression::Cast(_, _), Expression::Float(_))
97 | (Expression::Cast(_, _), Expression::Integer(_))
98 | (Expression::Cast(_, _), Expression::Null)
99 | (Expression::Field(_), Expression::Boolean(_))
100 | (Expression::Field(_), Expression::Float(_))
101 | (Expression::Field(_), Expression::Integer(_))
102 | (Expression::Field(_), Expression::Null) => {}
103 (_, _) => {
104 valid = false;
105 break;
106 }
107 }
108 }
109 Expression::Nested(_, _) | Expression::Search(_, _, _) => {}
110 _ => {
111 valid = false;
112 break;
113 }
114 }
115 }
116 if valid {
117 for expression in expressions {
118 match expression {
119 Expression::BooleanExpression(left, _, right) => {
120 match (&**left, &**right) {
121 (
122 Expression::Cast(field, _),
123 Expression::Boolean(_),
124 )
125 | (Expression::Cast(field, _), Expression::Float(_))
126 | (
127 Expression::Cast(field, _),
128 Expression::Integer(_),
129 )
130 | (Expression::Cast(field, _), Expression::Null)
131 | (Expression::Field(field), Expression::Boolean(_))
132 | (Expression::Field(field), Expression::Float(_))
133 | (Expression::Field(field), Expression::Integer(_))
134 | (Expression::Field(field), Expression::Null) => {
135 let count =
136 fields.entry(field.clone()).or_insert(0);
137 *count += 1;
138 }
139 (_, _) => {}
140 }
141 }
142 Expression::Nested(field, _)
143 | Expression::Search(_, field, _) => {
144 let count = fields.entry(field.clone()).or_insert(0);
145 *count += 1;
146 }
147 _ => {}
148 }
149 }
150 }
151 }
152 Expression::BooleanExpression(left, _, _) => match **left {
153 Expression::Cast(ref field, _) | Expression::Field(ref field) => {
154 let count = fields.entry(field.clone()).or_insert(0);
155 *count += 1;
156 }
157 _ => {}
158 },
159 Expression::Nested(field, _) | Expression::Search(_, field, _) => {
160 let count = fields.entry(field.clone()).or_insert(0);
161 *count += 1;
162 }
163 _ => {}
164 }
165 }
166
167 let mut matrix = false;
168 for count in fields.values() {
169 if *count > 1 && *count < 256 {
170 matrix = true;
171 }
172 }
173
174 if matrix {
175 let mut columns: Vec<(String, u32)> = fields.into_iter().collect();
176 columns.sort_by(|x, y| x.1.cmp(&y.1));
177 let columns: Vec<String> = columns.into_iter().map(|(c, _)| c).collect();
178 let mut rows = vec![];
179 let mut rest = vec![];
180 for expression in scratch {
181 match expression {
182 Expression::BooleanGroup(BoolSym::And, expressions) => {
183 let mut lookup = HashMap::new();
184 let mut valid = true;
185 for expression in &expressions {
186 match expression {
187 Expression::BooleanExpression(left, _, _) => match **left {
188 Expression::Cast(ref field, _)
189 | Expression::Field(ref field) => {
190 if lookup.contains_key(field) {
191 valid = false;
192 break;
193 }
194 lookup.insert(field.clone(), expression.clone());
195 }
196 _ => {}
197 },
198 Expression::Nested(field, _)
199 | Expression::Search(_, field, _) => {
200 if lookup.contains_key(field) {
201 valid = false;
202 break;
203 }
204 lookup.insert(field.clone(), expression.clone());
205 }
206 _ => {
207 valid = false;
208 break;
209 }
210 }
211 }
212 if valid {
213 let mut row = vec![];
214 for (i, column) in columns.iter().enumerate() {
215 if let Some(expression) = lookup.remove(column) {
216 let key = std::char::from_u32(i as u32)
217 .expect("could not make key");
218 match expression {
219 Expression::BooleanExpression(left, symbol, right) => {
220 match *left {
221 Expression::Cast(_, kind) => {
222 row.push(Some(
223 Expression::BooleanExpression(
224 Box::new(Expression::Cast(
225 key.to_string(),
226 kind.clone(),
227 )),
228 symbol,
229 right.clone(),
230 ),
231 ));
232 }
233 Expression::Field(_) => {
234 row.push(Some(
235 Expression::BooleanExpression(
236 Box::new(Expression::Field(
237 key.to_string(),
238 )),
239 symbol,
240 right.clone(),
241 ),
242 ));
243 }
244 _ => {}
245 }
246 }
247 Expression::Nested(_, expression) => {
248 row.push(Some(Expression::Nested(
249 key.to_string(),
250 expression,
251 )));
252 }
253 Expression::Search(search, _, cast) => {
254 row.push(Some(Expression::Search(
255 search,
256 key.to_string(),
257 cast,
258 )));
259 }
260 _ => {}
261 }
262 } else {
263 row.push(None);
264 }
265 }
266 rows.push(row);
267 } else {
268 rest.push(Expression::BooleanGroup(BoolSym::And, expressions));
269 }
270 }
271 Expression::BooleanExpression(left, symbol, right) => {
272 match (*left, &*right) {
273 (Expression::Cast(field, kind), Expression::Boolean(_))
274 | (Expression::Cast(field, kind), Expression::Float(_))
275 | (Expression::Cast(field, kind), Expression::Integer(_))
276 | (Expression::Cast(field, kind), Expression::Null) => {
277 let mut row = vec![];
278 for (i, column) in columns.iter().enumerate() {
279 if column == &field {
280 let key = std::char::from_u32(i as u32)
281 .expect("could not make key");
282 row.push(Some(Expression::BooleanExpression(
283 Box::new(Expression::Cast(
284 key.to_string(),
285 kind.clone(),
286 )),
287 symbol,
288 right.clone(),
289 )));
290 } else {
291 row.push(None);
292 }
293 }
294 rows.push(row);
295 }
296 (Expression::Field(field), Expression::Boolean(_))
297 | (Expression::Field(field), Expression::Float(_))
298 | (Expression::Field(field), Expression::Integer(_))
299 | (Expression::Field(field), Expression::Null) => {
300 let mut row = vec![];
301 for (i, column) in columns.iter().enumerate() {
302 if column == &field {
303 let key = std::char::from_u32(i as u32)
304 .expect("could not make key");
305 row.push(Some(Expression::BooleanExpression(
306 Box::new(Expression::Field(key.to_string())),
307 symbol,
308 right.clone(),
309 )));
310 } else {
311 row.push(None);
312 }
313 }
314 rows.push(row);
315 }
316 (left, _) => {
317 rest.push(Expression::BooleanExpression(
318 Box::new(left),
319 symbol,
320 right,
321 ));
322 }
323 }
324 }
325 Expression::Nested(field, expression) => {
326 let mut row = vec![];
327 for (i, column) in columns.iter().enumerate() {
328 if column == &field {
329 let key =
330 std::char::from_u32(i as u32).expect("could not make key");
331 row.push(Some(Expression::Nested(
332 key.to_string(),
333 expression.clone(),
334 )));
335 } else {
336 row.push(None);
337 }
338 }
339 rows.push(row);
340 }
341 Expression::Search(search, field, cast) => {
342 let mut row = vec![];
343 for (i, column) in columns.iter().enumerate() {
344 if column == &field {
345 let key =
346 std::char::from_u32(i as u32).expect("could not make key");
347 row.push(Some(Expression::Search(
348 search.clone(),
349 key.to_string(),
350 cast,
351 )));
352 } else {
353 row.push(None);
354 }
355 }
356 rows.push(row);
357 }
358 _ => rest.push(expression),
359 }
360 }
361
362 let mut expressions = vec![];
363 if !rows.is_empty() {
364 expressions.push(Expression::Matrix(columns, rows));
365 }
366 expressions.extend(rest);
367 if expressions.len() == 1 {
368 expressions
369 .into_iter()
370 .next()
371 .expect("could not get expression")
372 } else {
373 Expression::BooleanGroup(BoolSym::Or, expressions)
374 }
375 } else {
376 Expression::BooleanGroup(BoolSym::Or, scratch)
377 }
378 }
379 Expression::BooleanExpression(left, symbol, right) => {
380 let left = matrix(*left);
381 let right = matrix(*right);
382 Expression::BooleanExpression(Box::new(left), symbol, Box::new(right))
383 }
384 Expression::Match(kind, expression) => match *expression {
385 Expression::BooleanGroup(symbol, expressions) => {
386 let mut scratch = vec![];
387 for expression in expressions {
388 scratch.push(shake_1(expression));
389 }
390 Expression::Match(kind, Box::new(Expression::BooleanGroup(symbol, scratch)))
391 }
392 expression => Expression::Match(kind, Box::new(shake_1(expression))),
393 },
394 Expression::Negate(expression) => Expression::Negate(Box::new(matrix(*expression))),
395 Expression::Nested(field, expression) => {
396 Expression::Nested(field, Box::new(matrix(*expression)))
397 }
398 Expression::Boolean(_)
399 | Expression::BooleanGroup(_, _)
400 | Expression::Cast(_, _)
401 | Expression::Field(_)
402 | Expression::Float(_)
403 | Expression::Identifier(_)
404 | Expression::Integer(_)
405 | Expression::Matrix(_, _)
406 | Expression::Null
407 | Expression::Search(_, _, _) => expression,
408 }
409}
410
411fn rewrite_search(search: Search) -> Search {
412 match search {
413 Search::Regex(regex, insensitive) => {
414 let mut pattern = regex.as_str().to_owned();
415 if let Some(tail) = pattern.strip_prefix(".*") {
416 pattern = tail.to_owned();
417 }
418 if let Some(head) = pattern.strip_suffix(".*") {
419 pattern = head.to_owned();
420 }
421 Search::Regex(
422 RegexBuilder::new(&pattern)
423 .case_insensitive(insensitive)
424 .build()
425 .expect("could not build regex"),
426 insensitive,
427 )
428 }
429 Search::RegexSet(regex, insensitive) => {
430 let mut patterns = vec![];
431 for pattern in regex.patterns() {
432 let mut pattern = pattern.to_owned();
433 if let Some(tail) = pattern.strip_prefix(".*") {
434 pattern = tail.to_owned();
435 }
436 if let Some(head) = pattern.strip_suffix(".*") {
437 pattern = head.to_owned();
438 }
439 patterns.push(pattern);
440 }
441 Search::RegexSet(
442 RegexSetBuilder::new(patterns)
443 .case_insensitive(insensitive)
444 .build()
445 .expect("could not build regex"),
446 insensitive,
447 )
448 }
449 _ => search,
450 }
451}
452
453pub fn rewrite(expression: Expression) -> Expression {
454 match expression {
455 Expression::BooleanGroup(symbol, expressions) => {
456 let mut scratch = vec![];
457 for expression in expressions {
458 let rewriten = rewrite(expression);
459 scratch.push(rewriten);
460 }
461 Expression::BooleanGroup(symbol, scratch)
462 }
463 Expression::BooleanExpression(left, symbol, right) => {
464 let left = rewrite(*left);
465 let right = rewrite(*right);
466 Expression::BooleanExpression(Box::new(left), symbol, Box::new(right))
467 }
468 Expression::Match(symbol, expression) => {
469 Expression::Match(symbol, Box::new(rewrite(*expression)))
470 }
471 Expression::Negate(expression) => Expression::Negate(Box::new(rewrite(*expression))),
472 Expression::Nested(field, expression) => {
473 Expression::Nested(field, Box::new(rewrite(*expression)))
474 }
475 Expression::Search(search, f, c) => Expression::Search(rewrite_search(search), f, c),
476 Expression::Boolean(_)
477 | Expression::Cast(_, _)
478 | Expression::Field(_)
479 | Expression::Float(_)
480 | Expression::Identifier(_)
481 | Expression::Integer(_)
482 | Expression::Matrix(_, _)
483 | Expression::Null => expression,
484 }
485}
486
487pub fn shake(expression: Expression) -> Expression {
488 let expression = shake_0(expression);
492 shake_1(expression)
493}
494
495fn shake_0(expression: Expression) -> Expression {
496 match expression {
497 Expression::BooleanGroup(symbol, expressions) => {
498 let length = expressions.len();
499 let expressions = match symbol {
500 BoolSym::And => {
501 let mut scratch = vec![];
504 for expression in expressions {
505 let shaken = shake_0(expression);
506 scratch.push(shaken);
507 }
508 scratch
509 }
510 BoolSym::Or => {
511 let mut scratch = vec![];
512 for expression in expressions {
513 let shaken = shake_0(expression);
514 scratch.push(shaken);
515 }
516 scratch
517 }
518 _ => unreachable!(),
519 };
520 if expressions.len() != length {
521 shake_0(Expression::BooleanGroup(symbol, expressions))
522 } else if expressions.len() == 1 {
523 expressions
524 .into_iter()
525 .next()
526 .expect("could not get expression")
527 } else {
528 Expression::BooleanGroup(symbol, expressions)
529 }
530 }
531 Expression::BooleanExpression(left, symbol, right) => {
532 let left = shake_0(*left);
533 let right = shake_0(*right);
534 match (left, symbol, right) {
535 (
536 Expression::BooleanGroup(BoolSym::And, mut left),
537 BoolSym::And,
538 Expression::BooleanGroup(BoolSym::And, right),
539 ) => {
540 left.extend(right);
541 shake_0(Expression::BooleanGroup(BoolSym::And, left))
542 }
543 (Expression::BooleanGroup(BoolSym::And, mut left), BoolSym::And, right) => {
544 left.push(right);
545 shake_0(Expression::BooleanGroup(BoolSym::And, left))
546 }
547 (left, BoolSym::And, Expression::BooleanGroup(BoolSym::And, right)) => {
548 let mut left = vec![left];
549 left.extend(right);
550 shake_0(Expression::BooleanGroup(BoolSym::And, left))
551 }
552 (
553 Expression::BooleanGroup(BoolSym::Or, mut left),
554 BoolSym::Or,
555 Expression::BooleanGroup(BoolSym::Or, right),
556 ) => {
557 left.extend(right);
558 shake_0(Expression::BooleanGroup(BoolSym::Or, left))
559 }
560 (Expression::BooleanGroup(BoolSym::Or, mut left), BoolSym::Or, right) => {
561 left.push(right);
562 shake_0(Expression::BooleanGroup(BoolSym::Or, left))
563 }
564 (left, BoolSym::Or, Expression::BooleanGroup(BoolSym::Or, right)) => {
565 let mut left = vec![left];
566 left.extend(right);
567 shake_0(Expression::BooleanGroup(BoolSym::Or, left))
568 }
569 (Expression::BooleanExpression(x, BoolSym::And, y), BoolSym::And, z) => {
570 shake_0(Expression::BooleanGroup(BoolSym::And, vec![*x, *y, z]))
571 }
572 (x, BoolSym::And, Expression::BooleanExpression(y, BoolSym::And, z)) => {
573 shake_0(Expression::BooleanGroup(BoolSym::And, vec![x, *y, *z]))
574 }
575 (Expression::BooleanExpression(x, BoolSym::Or, y), BoolSym::Or, z) => {
576 shake_0(Expression::BooleanGroup(BoolSym::Or, vec![*x, *y, z]))
577 }
578 (x, BoolSym::Or, Expression::BooleanExpression(y, BoolSym::Or, z)) => {
579 shake_0(Expression::BooleanGroup(BoolSym::Or, vec![x, *y, *z]))
580 }
581 (left, _, right) => {
590 Expression::BooleanExpression(Box::new(left), symbol, Box::new(right))
591 }
592 }
593 }
594 Expression::Match(m, expression) => {
595 let expression = shake_0(*expression);
596 Expression::Match(m, Box::new(expression))
597 }
598 Expression::Negate(expression) => {
599 let expression = shake_0(*expression);
600 match expression {
601 Expression::Negate(inner) => shake_0(*inner),
602 _ => Expression::Negate(Box::new(expression)),
603 }
604 }
605 Expression::Nested(field, expression) => {
606 Expression::Nested(field, Box::new(shake_0(*expression)))
607 }
608 Expression::Boolean(_)
609 | Expression::Cast(_, _)
610 | Expression::Field(_)
611 | Expression::Float(_)
612 | Expression::Identifier(_)
613 | Expression::Integer(_)
614 | Expression::Matrix(_, _)
615 | Expression::Null
616 | Expression::Search(_, _, _) => expression,
617 }
618}
619
620fn shake_1(expression: Expression) -> Expression {
621 match expression {
622 Expression::BooleanGroup(BoolSym::And, expressions) => {
626 let length = expressions.len();
627
628 let mut nested = HashMap::new();
629
630 let mut scratch = vec![];
631
632 for expression in expressions {
633 let shaken = shake_1(expression);
634 match shaken {
635 Expression::Nested(field, expression) => {
636 let expressions = nested.entry(field).or_insert(vec![]);
637 (*expressions).push(*expression);
638 }
639 shaken => scratch.push(shaken),
640 };
641 }
642 for (field, expressions) in nested {
643 let shaken = if expressions.len() == 1 {
644 shake_1(
645 expressions
646 .into_iter()
647 .next()
648 .expect("could not get expression"),
649 )
650 } else {
651 shake_1(Expression::Match(
652 Match::All,
653 Box::new(Expression::BooleanGroup(BoolSym::Or, expressions)),
654 ))
655 };
656 scratch.push(Expression::Nested(field, Box::new(shaken)));
657 }
658 if scratch.len() != length {
659 shake_1(Expression::BooleanGroup(BoolSym::And, scratch))
660 } else if scratch.len() == 1 {
661 scratch
662 .into_iter()
663 .next()
664 .expect("could not get expression")
665 } else {
666 Expression::BooleanGroup(BoolSym::And, scratch)
667 }
668 }
669 Expression::BooleanGroup(BoolSym::Or, expressions) => {
670 let length = expressions.len();
671 let expressions = {
672 let mut needles = HashMap::new();
673 let mut nested = HashMap::new();
674 let mut patterns = HashMap::new();
675
676 let mut any = vec![];
679 let mut exact = vec![];
680 let mut starts_with = vec![];
681 let mut ends_with = vec![];
682 let mut contains = vec![];
683 let mut aho = vec![];
684 let mut regex = vec![];
685 let mut regex_set = vec![];
686 let mut rest = vec![];
687
688 for expression in expressions {
689 let shaken = shake_1(expression);
690
691 match shaken {
692 Expression::Nested(field, expression) => {
693 let expressions = nested.entry(field).or_insert(vec![]);
694 (*expressions).push(*expression);
695 }
696 Expression::Search(
697 Search::AhoCorasick(_, contexts, insensitive),
698 field,
699 cast,
700 ) => {
701 let expressions =
702 needles.entry((field, cast, insensitive)).or_insert(vec![]);
703 for context in contexts {
704 let value = context.value().to_owned();
705 (*expressions).push((context.clone(), value));
706 }
707 }
708 Expression::Search(Search::Contains(value), field, cast) => {
709 let expressions = needles.entry((field, cast, false)).or_insert(vec![]);
710 (*expressions).push((MatchType::Contains(value.clone()), value));
711 }
712 Expression::Search(Search::EndsWith(value), field, cast) => {
713 let expressions = needles.entry((field, cast, false)).or_insert(vec![]);
714 (*expressions).push((MatchType::EndsWith(value.clone()), value));
715 }
716 Expression::Search(Search::Exact(value), field, cast) => {
717 let expressions = needles.entry((field, cast, false)).or_insert(vec![]);
718 (*expressions).push((MatchType::Exact(value.clone()), value));
719 }
720 Expression::Search(Search::StartsWith(value), field, cast) => {
721 let expressions = needles.entry((field, cast, false)).or_insert(vec![]);
722 (*expressions).push((MatchType::StartsWith(value.clone()), value));
723 }
724 Expression::Search(Search::Any, _, _) => {
725 any.push(shaken);
726 }
727 Expression::Search(Search::Regex(r, insensitive), field, cast) => {
728 let patterns =
729 patterns.entry((field, cast, insensitive)).or_insert(vec![]);
730 (*patterns).push(r.as_str().to_owned());
731 }
732 Expression::Search(Search::RegexSet(r, insensitive), field, cast) => {
733 let patterns =
734 patterns.entry((field, cast, insensitive)).or_insert(vec![]);
735 for pattern in r.patterns() {
736 (*patterns).push(pattern.to_owned());
737 }
738 }
739 _ => rest.push(shaken),
740 }
741 }
742
743 for ((field, cast, insensitive), searches) in needles {
744 if !insensitive && searches.len() == 1 {
745 let search = searches.into_iter().next().expect("could not get search");
746 match search.0 {
747 MatchType::Contains(v) => {
748 contains.push(Expression::Search(Search::Contains(v), field, cast));
749 }
750 MatchType::EndsWith(v) => {
751 ends_with.push(Expression::Search(
752 Search::EndsWith(v),
753 field,
754 cast,
755 ));
756 }
757 MatchType::Exact(v) => {
758 exact.push(Expression::Search(Search::Exact(v), field, cast));
759 }
760 MatchType::StartsWith(v) => {
761 starts_with.push(Expression::Search(
762 Search::StartsWith(v),
763 field,
764 cast,
765 ));
766 }
767 };
768 } else {
769 let (context, needles): (Vec<_>, Vec<_>) = searches.into_iter().unzip();
770 let expression = Expression::Search(
771 Search::AhoCorasick(
772 Box::new(
773 AhoCorasickBuilder::new()
774 .ascii_case_insensitive(insensitive)
775 .kind(Some(AhoCorasickKind::DFA))
776 .build(needles)
777 .expect("failed to build dfa"),
778 ),
779 context,
780 insensitive,
781 ),
782 field,
783 cast,
784 );
785 aho.push(expression);
786 };
787 }
788
789 for (field, expressions) in nested {
790 let shaken = if expressions.len() == 1 {
791 shake_1(
792 expressions
793 .into_iter()
794 .next()
795 .expect("could not get expression"),
796 )
797 } else {
798 shake_1(Expression::BooleanGroup(BoolSym::Or, expressions))
799 };
800 rest.push(Expression::Nested(field, Box::new(shaken)));
801 }
802
803 for ((field, cast, insensitive), patterns) in patterns {
804 if patterns.len() == 1 {
805 let pattern = patterns.into_iter().next().expect("could not get pattern");
806 let expression = Expression::Search(
807 Search::Regex(
808 RegexBuilder::new(&pattern)
809 .case_insensitive(insensitive)
810 .build()
811 .expect("could not build regex"),
812 insensitive,
813 ),
814 field,
815 cast,
816 );
817 regex.push(expression);
818 } else {
819 let expression = Expression::Search(
820 Search::RegexSet(
821 RegexSetBuilder::new(patterns)
822 .case_insensitive(insensitive)
823 .build()
824 .expect("could not build regex set"),
825 insensitive,
826 ),
827 field,
828 cast,
829 );
830 regex_set.push(expression);
831 }
832 }
833
834 let mut scratch = vec![];
835 scratch.extend(any);
836 exact.sort_by(|x, y| match (x, y) {
837 (
838 Expression::Search(Search::Exact(a), _, _),
839 Expression::Search(Search::Exact(b), _, _),
840 ) => a.len().cmp(&b.len()),
841 _ => std::cmp::Ordering::Equal,
842 });
843 scratch.extend(exact);
844 starts_with.sort_by(|x, y| match (x, y) {
845 (
846 Expression::Search(Search::StartsWith(a), _, _),
847 Expression::Search(Search::StartsWith(b), _, _),
848 ) => a.len().cmp(&b.len()),
849 _ => std::cmp::Ordering::Equal,
850 });
851 scratch.extend(starts_with);
852 ends_with.sort_by(|x, y| match (x, y) {
853 (
854 Expression::Search(Search::EndsWith(a), _, _),
855 Expression::Search(Search::EndsWith(b), _, _),
856 ) => a.len().cmp(&b.len()),
857 _ => std::cmp::Ordering::Equal,
858 });
859 scratch.extend(ends_with);
860 contains.sort_by(|x, y| match (x, y) {
861 (
862 Expression::Search(Search::Contains(a), _, _),
863 Expression::Search(Search::Contains(b), _, _),
864 ) => a.len().cmp(&b.len()),
865 _ => std::cmp::Ordering::Equal,
866 });
867 scratch.extend(contains);
868 aho.sort_by(|x, y| match (x, y) {
869 (
870 Expression::Search(Search::AhoCorasick(_, a, case0), _, _),
871 Expression::Search(Search::AhoCorasick(_, b, case1), _, _),
872 ) => (b.len(), case1).cmp(&(a.len(), case0)),
873 _ => std::cmp::Ordering::Equal,
874 });
875 scratch.extend(aho);
876 regex.sort_by(|x, y| match (x, y) {
877 (
878 Expression::Search(Search::Regex(reg0, case0), _, _),
879 Expression::Search(Search::Regex(reg1, case1), _, _),
880 ) => (reg0.as_str(), case0).cmp(&(reg1.as_str(), case1)),
881 _ => std::cmp::Ordering::Equal,
882 });
883 scratch.extend(regex);
884 regex_set.sort_by(|x, y| match (x, y) {
885 (
886 Expression::Search(Search::RegexSet(set0, case0), _, _),
887 Expression::Search(Search::RegexSet(set1, case1), _, _),
888 ) => (set0.patterns(), case0).cmp(&(set1.patterns(), case1)),
889 _ => std::cmp::Ordering::Equal,
890 });
891 scratch.extend(regex_set);
892 scratch.extend(rest);
893 scratch
894 };
895 if expressions.len() != length {
896 shake_1(Expression::BooleanGroup(BoolSym::Or, expressions))
897 } else if expressions.len() == 1 {
898 expressions
899 .into_iter()
900 .next()
901 .expect("could not get expression")
902 } else {
903 Expression::BooleanGroup(BoolSym::Or, expressions)
904 }
905 }
906 Expression::BooleanGroup(symbol, expressions) => {
907 let mut scratch = vec![];
908 for expression in expressions {
909 scratch.push(shake_1(expression));
910 }
911 Expression::BooleanGroup(symbol, scratch)
912 }
913 Expression::BooleanExpression(left, symbol, right) => {
914 let left = shake_1(*left);
915 let right = shake_1(*right);
916 Expression::BooleanExpression(Box::new(left), symbol, Box::new(right))
917 }
918 Expression::Match(kind, expression) => match *expression {
919 Expression::BooleanGroup(symbol, expressions) => {
920 let mut scratch = vec![];
921 for expression in expressions {
922 scratch.push(shake_1(expression));
923 }
924 Expression::Match(kind, Box::new(Expression::BooleanGroup(symbol, scratch)))
925 }
926 expression => Expression::Match(kind, Box::new(shake_1(expression))),
927 },
928 Expression::Negate(expression) => Expression::Negate(Box::new(shake_1(*expression))),
929 Expression::Nested(field, expression) => {
930 Expression::Nested(field, Box::new(shake_1(*expression)))
931 }
932 Expression::Boolean(_)
933 | Expression::Cast(_, _)
934 | Expression::Field(_)
935 | Expression::Float(_)
936 | Expression::Identifier(_)
937 | Expression::Integer(_)
938 | Expression::Matrix(_, _)
939 | Expression::Null
940 | Expression::Search(_, _, _) => expression,
941 }
942}
943
944#[cfg(test)]
945mod tests {
946 use super::*;
947
948 #[test]
949 fn coalesce_basic() {
950 let mut identifiers = HashMap::new();
951 identifiers.insert(
952 "A".to_owned(),
953 Expression::BooleanExpression(
954 Box::new(Expression::Field("count".to_owned())),
955 BoolSym::Equal,
956 Box::new(Expression::Integer(1)),
957 ),
958 );
959 let expression = Expression::Identifier("A".to_owned());
960
961 let coalesced = coalesce(expression, &identifiers);
962
963 let expected = Expression::BooleanExpression(
964 Box::new(Expression::Field("count".to_owned())),
965 BoolSym::Equal,
966 Box::new(Expression::Integer(1)),
967 );
968
969 assert_eq!(coalesced, expected);
970 }
971
972 #[test]
992 fn shake_ands() {
993 let expression = Expression::BooleanExpression(
994 Box::new(Expression::Null),
995 BoolSym::And,
996 Box::new(Expression::Null),
997 );
998 let shaken = shake(expression);
999
1000 let expected = Expression::BooleanExpression(
1001 Box::new(Expression::Null),
1002 BoolSym::And,
1003 Box::new(Expression::Null),
1004 );
1005
1006 assert_eq!(shaken, expected);
1007
1008 let expression = Expression::BooleanExpression(
1009 Box::new(Expression::Null),
1010 BoolSym::And,
1011 Box::new(Expression::BooleanExpression(
1012 Box::new(Expression::Null),
1013 BoolSym::And,
1014 Box::new(Expression::Null),
1015 )),
1016 );
1017 let shaken = shake(expression);
1018
1019 let expected = Expression::BooleanGroup(
1020 BoolSym::And,
1021 vec![Expression::Null, Expression::Null, Expression::Null],
1022 );
1023
1024 assert_eq!(shaken, expected);
1025 }
1026
1027 #[test]
1028 fn shake_ors() {
1029 let expression = Expression::BooleanExpression(
1030 Box::new(Expression::Null),
1031 BoolSym::Or,
1032 Box::new(Expression::Null),
1033 );
1034 let shaken = shake(expression);
1035
1036 let expected = Expression::BooleanExpression(
1037 Box::new(Expression::Null),
1038 BoolSym::Or,
1039 Box::new(Expression::Null),
1040 );
1041
1042 assert_eq!(shaken, expected);
1043
1044 let expression = Expression::BooleanExpression(
1045 Box::new(Expression::Null),
1046 BoolSym::Or,
1047 Box::new(Expression::BooleanExpression(
1048 Box::new(Expression::Null),
1049 BoolSym::Or,
1050 Box::new(Expression::Null),
1051 )),
1052 );
1053 let shaken = shake(expression);
1054
1055 let expected = Expression::BooleanGroup(
1056 BoolSym::Or,
1057 vec![Expression::Null, Expression::Null, Expression::Null],
1058 );
1059
1060 assert_eq!(shaken, expected);
1061 }
1062
1063 #[test]
1064 fn shake_group_of_nested() {
1065 let expression = Expression::BooleanGroup(
1066 BoolSym::Or,
1067 vec![
1068 Expression::Nested(
1069 "ids".to_owned(),
1070 Box::new(Expression::Search(
1071 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bca".to_owned()),
1072 "id".to_owned(),
1073 false,
1074 )),
1075 ),
1076 Expression::Nested(
1077 "ids".to_owned(),
1078 Box::new(Expression::Search(
1079 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcb".to_owned()),
1080 "id".to_owned(),
1081 false,
1082 )),
1083 ),
1084 Expression::Nested(
1085 "ids".to_owned(),
1086 Box::new(Expression::Search(
1087 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcc".to_owned()),
1088 "id".to_owned(),
1089 false,
1090 )),
1091 ),
1092 ],
1093 );
1094 let shaken = shake(expression);
1095
1096 let expected = Expression::Nested(
1097 "ids".to_owned(),
1098 Box::new(Expression::Search(
1099 Search::AhoCorasick(
1100 Box::new(
1101 AhoCorasickBuilder::new()
1102 .kind(Some(AhoCorasickKind::DFA))
1103 .build(vec![
1104 "e2ec14cb-299e-4adf-bb09-04a6a8417bca",
1105 "e2ec14cb-299e-4adf-bb09-04a6a8417bcb",
1106 "e2ec14cb-299e-4adf-bb09-04a6a8417bcc",
1107 ])
1108 .expect("failed to build dfa"),
1109 ),
1110 vec![
1111 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bca".to_owned()),
1112 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcb".to_owned()),
1113 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcc".to_owned()),
1114 ],
1115 false,
1116 ),
1117 "id".to_owned(),
1118 false,
1119 )),
1120 );
1121
1122 assert_eq!(shaken, expected);
1123 }
1124
1125 #[test]
1126 fn shake_group_or() {
1127 let expression = Expression::BooleanGroup(
1129 BoolSym::Or,
1130 vec![
1131 Expression::Search(
1132 Search::AhoCorasick(
1133 Box::new(
1134 AhoCorasickBuilder::new()
1135 .kind(Some(AhoCorasickKind::DFA))
1136 .ascii_case_insensitive(false)
1137 .build(vec![
1138 "Quick".to_owned(),
1139 "Brown".to_owned(),
1140 "Fox".to_owned(),
1141 ])
1142 .expect("failed to build dfa"),
1143 ),
1144 vec![
1145 MatchType::Contains("Quick".to_owned()),
1146 MatchType::Exact("Brown".to_owned()),
1147 MatchType::EndsWith("Fox".to_owned()),
1148 ],
1149 false,
1150 ),
1151 "name".to_owned(),
1152 false,
1153 ),
1154 Expression::Search(
1155 Search::AhoCorasick(
1156 Box::new(
1157 AhoCorasickBuilder::new()
1158 .kind(Some(AhoCorasickKind::DFA))
1159 .ascii_case_insensitive(true)
1160 .build(vec![
1161 "quick".to_owned(),
1162 "brown".to_owned(),
1163 "fox".to_owned(),
1164 ])
1165 .expect("failed to build dfa"),
1166 ),
1167 vec![
1168 MatchType::Contains("quick".to_owned()),
1169 MatchType::Exact("brown".to_owned()),
1170 MatchType::EndsWith("fox".to_owned()),
1171 ],
1172 true,
1173 ),
1174 "name".to_owned(),
1175 false,
1176 ),
1177 Expression::Search(Search::Any, "name".to_owned(), false),
1178 Expression::Search(Search::Contains("afoo".to_owned()), "a".to_owned(), false),
1179 Expression::Search(Search::Contains("foo".to_owned()), "name".to_owned(), false),
1180 Expression::Search(Search::EndsWith("bbar".to_owned()), "b".to_owned(), false),
1181 Expression::Search(Search::EndsWith("bar".to_owned()), "name".to_owned(), false),
1182 Expression::Search(Search::Exact("cbaz".to_owned()), "c".to_owned(), false),
1183 Expression::Search(Search::Exact("baz".to_owned()), "name".to_owned(), false),
1184 Expression::Search(
1185 Search::Regex(
1186 RegexBuilder::new("foo")
1187 .case_insensitive(false)
1188 .build()
1189 .unwrap(),
1190 false,
1191 ),
1192 "name".to_owned(),
1193 false,
1194 ),
1195 Expression::Search(
1196 Search::Regex(
1197 RegexBuilder::new("bar")
1198 .case_insensitive(true)
1199 .build()
1200 .unwrap(),
1201 true,
1202 ),
1203 "name".to_owned(),
1204 false,
1205 ),
1206 Expression::Search(
1207 Search::RegexSet(
1208 RegexSetBuilder::new(vec!["lorem"])
1209 .case_insensitive(false)
1210 .build()
1211 .unwrap(),
1212 false,
1213 ),
1214 "name".to_owned(),
1215 false,
1216 ),
1217 Expression::Search(
1218 Search::RegexSet(
1219 RegexSetBuilder::new(vec!["ipsum"])
1220 .case_insensitive(true)
1221 .build()
1222 .unwrap(),
1223 true,
1224 ),
1225 "name".to_owned(),
1226 false,
1227 ),
1228 Expression::Search(
1229 Search::StartsWith("dfoobar".to_owned()),
1230 "d".to_owned(),
1231 false,
1232 ),
1233 Expression::Search(
1234 Search::StartsWith("foobar".to_owned()),
1235 "name".to_owned(),
1236 false,
1237 ),
1238 ],
1239 );
1240 let shaken = shake(expression);
1241
1242 let expected = Expression::BooleanGroup(
1243 BoolSym::Or,
1244 vec![
1245 Expression::Search(Search::Any, "name".to_owned(), false),
1246 Expression::Search(Search::Exact("cbaz".to_owned()), "c".to_owned(), false),
1247 Expression::Search(
1248 Search::StartsWith("dfoobar".to_owned()),
1249 "d".to_owned(),
1250 false,
1251 ),
1252 Expression::Search(Search::EndsWith("bbar".to_owned()), "b".to_owned(), false),
1253 Expression::Search(Search::Contains("afoo".to_owned()), "a".to_owned(), false),
1254 Expression::Search(
1255 Search::AhoCorasick(
1256 Box::new(
1257 AhoCorasickBuilder::new()
1258 .kind(Some(AhoCorasickKind::DFA))
1259 .ascii_case_insensitive(false)
1260 .build(vec![
1261 "Quick".to_owned(),
1262 "Brown".to_owned(),
1263 "Fox".to_owned(),
1264 "foo".to_owned(),
1265 "bar".to_owned(),
1266 "baz".to_owned(),
1267 "foobar".to_owned(),
1268 ])
1269 .expect("failed to build dfa"),
1270 ),
1271 vec![
1272 MatchType::Contains("Quick".to_owned()),
1273 MatchType::Exact("Brown".to_owned()),
1274 MatchType::EndsWith("Fox".to_owned()),
1275 MatchType::Contains("foo".to_owned()),
1276 MatchType::EndsWith("bar".to_owned()),
1277 MatchType::Exact("baz".to_owned()),
1278 MatchType::StartsWith("foobar".to_owned()),
1279 ],
1280 false,
1281 ),
1282 "name".to_owned(),
1283 false,
1284 ),
1285 Expression::Search(
1286 Search::AhoCorasick(
1287 Box::new(
1288 AhoCorasickBuilder::new()
1289 .kind(Some(AhoCorasickKind::DFA))
1290 .ascii_case_insensitive(true)
1291 .build(vec![
1292 "quick".to_owned(),
1293 "brown".to_owned(),
1294 "fox".to_owned(),
1295 ])
1296 .expect("failed to build dfa"),
1297 ),
1298 vec![
1299 MatchType::Contains("quick".to_owned()),
1300 MatchType::Exact("brown".to_owned()),
1301 MatchType::EndsWith("fox".to_owned()),
1302 ],
1303 true,
1304 ),
1305 "name".to_owned(),
1306 false,
1307 ),
1308 Expression::Search(
1309 Search::RegexSet(
1310 RegexSetBuilder::new(vec!["bar", "ipsum"])
1311 .case_insensitive(true)
1312 .build()
1313 .unwrap(),
1314 true,
1315 ),
1316 "name".to_owned(),
1317 false,
1318 ),
1319 Expression::Search(
1320 Search::RegexSet(
1321 RegexSetBuilder::new(vec!["foo", "lorem"])
1322 .case_insensitive(false)
1323 .build()
1324 .unwrap(),
1325 false,
1326 ),
1327 "name".to_owned(),
1328 false,
1329 ),
1330 ],
1331 );
1332
1333 assert_eq!(shaken, expected);
1334 }
1335
1336 #[test]
1337 fn shake_group_or_1() {
1338 let expression = Expression::BooleanGroup(BoolSym::Or, vec![Expression::Null]);
1340 let shaken = shake(expression);
1341
1342 let expected = Expression::Null;
1343
1344 assert_eq!(shaken, expected);
1345 }
1346
1347 #[test]
1348 fn shake_nested() {
1349 let expression = Expression::Nested(
1350 "ids".to_owned(),
1351 Box::new(Expression::BooleanGroup(
1352 BoolSym::Or,
1353 vec![
1354 Expression::Search(
1355 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bca".to_owned()),
1356 "id".to_owned(),
1357 false,
1358 ),
1359 Expression::Search(
1360 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcb".to_owned()),
1361 "id".to_owned(),
1362 false,
1363 ),
1364 Expression::Search(
1365 Search::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcc".to_owned()),
1366 "id".to_owned(),
1367 false,
1368 ),
1369 ],
1370 )),
1371 );
1372 let shaken = shake(expression);
1373
1374 let expected = Expression::Nested(
1375 "ids".to_owned(),
1376 Box::new(Expression::Search(
1377 Search::AhoCorasick(
1378 Box::new(
1379 AhoCorasickBuilder::new()
1380 .kind(Some(AhoCorasickKind::DFA))
1381 .build(vec![
1382 "e2ec14cb-299e-4adf-bb09-04a6a8417bca",
1383 "e2ec14cb-299e-4adf-bb09-04a6a8417bcb",
1384 "e2ec14cb-299e-4adf-bb09-04a6a8417bcc",
1385 ])
1386 .expect("failed to build dfa"),
1387 ),
1388 vec![
1389 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bca".to_owned()),
1390 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcb".to_owned()),
1391 MatchType::Exact("e2ec14cb-299e-4adf-bb09-04a6a8417bcc".to_owned()),
1392 ],
1393 false,
1394 ),
1395 "id".to_owned(),
1396 false,
1397 )),
1398 );
1399
1400 assert_eq!(shaken, expected);
1401 }
1402
1403 #[test]
1404 fn shake_match() {
1405 let expression = Expression::Match(
1406 Match::All,
1407 Box::new(Expression::BooleanGroup(
1408 BoolSym::Or,
1409 vec![Expression::Null, Expression::Null],
1410 )),
1411 );
1412 let shaken = shake(expression);
1413
1414 let expected = Expression::Match(
1415 Match::All,
1416 Box::new(Expression::BooleanGroup(
1417 BoolSym::Or,
1418 vec![Expression::Null, Expression::Null],
1419 )),
1420 );
1421
1422 assert_eq!(shaken, expected);
1423 }
1424
1425 #[test]
1426 fn shake_negate() {
1427 let expression =
1428 Expression::Negate(Box::new(Expression::Negate(Box::new(Expression::Null))));
1429 let shaken = shake(expression);
1430
1431 let expected = Expression::Null;
1432
1433 assert_eq!(shaken, expected);
1434 }
1435
1436 #[test]
1437 fn rewrite_regex() {
1438 let expression = Expression::Search(
1439 Search::Regex(RegexBuilder::new(".*foo.*").build().unwrap(), false),
1440 "name".to_owned(),
1441 false,
1442 );
1443 let rewriten = rewrite(expression);
1444
1445 let expected = Expression::Search(
1446 Search::Regex(RegexBuilder::new("foo").build().unwrap(), false),
1447 "name".to_owned(),
1448 false,
1449 );
1450
1451 assert_eq!(rewriten, expected);
1452
1453 let expression = Expression::Search(
1454 Search::RegexSet(
1455 RegexSetBuilder::new(vec![".*foo.*"]).build().unwrap(),
1456 false,
1457 ),
1458 "name".to_owned(),
1459 false,
1460 );
1461 let rewriten = rewrite(expression);
1462
1463 let expected = Expression::Search(
1464 Search::RegexSet(RegexSetBuilder::new(vec!["foo"]).build().unwrap(), false),
1465 "name".to_owned(),
1466 false,
1467 );
1468
1469 assert_eq!(rewriten, expected);
1470 }
1471
1472 #[test]
1473 fn rewrite_rule_0() {
1474 let expression = Expression::BooleanExpression(
1475 Box::new(Expression::BooleanExpression(
1476 Box::new(Expression::Match(
1477 Match::All,
1478 Box::new(Expression::Search(
1479 Search::Exact("a".to_owned()),
1480 "a".to_owned(),
1481 false,
1482 )),
1483 )),
1484 BoolSym::Or,
1485 Box::new(Expression::Null),
1486 )),
1487 BoolSym::And,
1488 Box::new(Expression::Negate(Box::new(Expression::Null))),
1489 );
1490 let shaken = shake(expression);
1491
1492 let expected = Expression::BooleanExpression(
1493 Box::new(Expression::BooleanExpression(
1494 Box::new(Expression::Match(
1495 Match::All,
1496 Box::new(Expression::Search(
1497 Search::Exact("a".to_owned()),
1498 "a".to_owned(),
1499 false,
1500 )),
1501 )),
1502 BoolSym::Or,
1503 Box::new(Expression::Null),
1504 )),
1505 BoolSym::And,
1506 Box::new(Expression::Negate(Box::new(Expression::Null))),
1507 );
1508
1509 assert_eq!(shaken, expected);
1510 }
1511
1512 #[test]
1513 fn rewrite_rule_edge_0() {
1514 let expression = Expression::Negate(Box::new(Expression::BooleanGroup(
1516 BoolSym::Or,
1517 vec![Expression::BooleanGroup(
1518 BoolSym::And,
1519 vec![
1520 Expression::Search(Search::Exact("a".to_owned()), "a".to_owned(), false),
1521 Expression::Search(Search::Exact("b".to_owned()), "b".to_owned(), false),
1522 ],
1523 )],
1524 )));
1525 let shaken = shake(expression);
1526
1527 let expected = Expression::Negate(Box::new(Expression::BooleanGroup(
1528 BoolSym::And,
1529 vec![
1530 Expression::Search(Search::Exact("a".to_owned()), "a".to_owned(), false),
1531 Expression::Search(Search::Exact("b".to_owned()), "b".to_owned(), false),
1532 ],
1533 )));
1534
1535 assert_eq!(shaken, expected);
1536 }
1537}