1use arrow::{
21 array::{new_null_array, AsArray},
22 datatypes::{DataType, Field, Schema},
23 record_batch::RecordBatch,
24};
25use std::borrow::Cow;
26use std::collections::HashSet;
27use std::ops::Not;
28use std::sync::Arc;
29
30use datafusion_common::{
31 cast::{as_large_list_array, as_list_array},
32 tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
33};
34use datafusion_common::{internal_err, DFSchema, DataFusionError, Result, ScalarValue};
35use datafusion_expr::{
36 and, binary::BinaryTypeCoercer, lit, or, BinaryExpr, Case, ColumnarValue, Expr, Like,
37 Operator, Volatility,
38};
39use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval};
40use datafusion_expr::{
41 expr::{InList, InSubquery},
42 utils::{iter_conjunction, iter_conjunction_owned},
43};
44use datafusion_expr::{simplify::ExprSimplifyResult, Cast, TryCast};
45use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
46
47use super::inlist_simplifier::ShortenInListSimplifier;
48use super::utils::*;
49use crate::analyzer::type_coercion::TypeCoercionRewriter;
50use crate::simplify_expressions::guarantees::GuaranteeRewriter;
51use crate::simplify_expressions::regex::simplify_regex_expr;
52use crate::simplify_expressions::unwrap_cast::{
53 is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary,
54 is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist,
55 unwrap_cast_in_comparison_for_binary,
56};
57use crate::simplify_expressions::SimplifyInfo;
58use datafusion_expr::expr::FieldMetadata;
59use datafusion_expr_common::casts::try_cast_literal_to_type;
60use indexmap::IndexSet;
61use regex::Regex;
62
63pub struct ExprSimplifier<S> {
99 info: S,
100 guarantees: Vec<(Expr, NullableInterval)>,
103 canonicalize: bool,
106 max_simplifier_cycles: u32,
108}
109
110pub const THRESHOLD_INLINE_INLIST: usize = 3;
111pub const DEFAULT_MAX_SIMPLIFIER_CYCLES: u32 = 3;
112
113impl<S: SimplifyInfo> ExprSimplifier<S> {
114 pub fn new(info: S) -> Self {
120 Self {
121 info,
122 guarantees: vec![],
123 canonicalize: true,
124 max_simplifier_cycles: DEFAULT_MAX_SIMPLIFIER_CYCLES,
125 }
126 }
127
128 pub fn simplify(&self, expr: Expr) -> Result<Expr> {
190 Ok(self.simplify_with_cycle_count_transformed(expr)?.0.data)
191 }
192
193 #[deprecated(
201 since = "48.0.0",
202 note = "Use `simplify_with_cycle_count_transformed` instead"
203 )]
204 #[allow(unused_mut)]
205 pub fn simplify_with_cycle_count(&self, mut expr: Expr) -> Result<(Expr, u32)> {
206 let (transformed, cycle_count) =
207 self.simplify_with_cycle_count_transformed(expr)?;
208 Ok((transformed.data, cycle_count))
209 }
210
211 pub fn simplify_with_cycle_count_transformed(
225 &self,
226 mut expr: Expr,
227 ) -> Result<(Transformed<Expr>, u32)> {
228 let mut simplifier = Simplifier::new(&self.info);
229 let mut const_evaluator = ConstEvaluator::try_new(self.info.execution_props())?;
230 let mut shorten_in_list_simplifier = ShortenInListSimplifier::new();
231 let mut guarantee_rewriter = GuaranteeRewriter::new(&self.guarantees);
232
233 if self.canonicalize {
234 expr = expr.rewrite(&mut Canonicalizer::new()).data()?
235 }
236
237 let mut num_cycles = 0;
241 let mut has_transformed = false;
242 loop {
243 let Transformed {
244 data, transformed, ..
245 } = expr
246 .rewrite(&mut const_evaluator)?
247 .transform_data(|expr| expr.rewrite(&mut simplifier))?
248 .transform_data(|expr| expr.rewrite(&mut guarantee_rewriter))?;
249 expr = data;
250 num_cycles += 1;
251 has_transformed = has_transformed || transformed;
253 if !transformed || num_cycles >= self.max_simplifier_cycles {
254 break;
255 }
256 }
257 expr = expr.rewrite(&mut shorten_in_list_simplifier).data()?;
259 Ok((
260 Transformed::new_transformed(expr, has_transformed),
261 num_cycles,
262 ))
263 }
264
265 pub fn coerce(&self, expr: Expr, schema: &DFSchema) -> Result<Expr> {
271 let mut expr_rewrite = TypeCoercionRewriter { schema };
272 expr.rewrite(&mut expr_rewrite).data()
273 }
274
275 pub fn with_guarantees(mut self, guarantees: Vec<(Expr, NullableInterval)>) -> Self {
330 self.guarantees = guarantees;
331 self
332 }
333
334 pub fn with_canonicalize(mut self, canonicalize: bool) -> Self {
384 self.canonicalize = canonicalize;
385 self
386 }
387
388 pub fn with_max_cycles(mut self, max_simplifier_cycles: u32) -> Self {
441 self.max_simplifier_cycles = max_simplifier_cycles;
442 self
443 }
444}
445
446struct Canonicalizer {}
453
454impl Canonicalizer {
455 fn new() -> Self {
456 Self {}
457 }
458}
459
460impl TreeNodeRewriter for Canonicalizer {
461 type Node = Expr;
462
463 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
464 let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr else {
465 return Ok(Transformed::no(expr));
466 };
467 match (left.as_ref(), right.as_ref(), op.swap()) {
468 (Expr::Column(left_col), Expr::Column(right_col), Some(swapped_op))
470 if right_col > left_col =>
471 {
472 Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
473 left: right,
474 op: swapped_op,
475 right: left,
476 })))
477 }
478 (Expr::Literal(_a, _), Expr::Column(_b), Some(swapped_op)) => {
480 Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
481 left: right,
482 op: swapped_op,
483 right: left,
484 })))
485 }
486 _ => Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
487 left,
488 op,
489 right,
490 }))),
491 }
492 }
493}
494
495#[allow(rustdoc::private_intra_doc_links)]
496struct ConstEvaluator<'a> {
501 can_evaluate: Vec<bool>,
514
515 execution_props: &'a ExecutionProps,
516 input_schema: DFSchema,
517 input_batch: RecordBatch,
518}
519
520#[allow(dead_code)]
521enum ConstSimplifyResult {
523 Simplified(ScalarValue, Option<FieldMetadata>),
525 NotSimplified(ScalarValue, Option<FieldMetadata>),
527 SimplifyRuntimeError(DataFusionError, Expr),
529}
530
531impl TreeNodeRewriter for ConstEvaluator<'_> {
532 type Node = Expr;
533
534 fn f_down(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
535 self.can_evaluate.push(true);
537
538 if !Self::can_evaluate(&expr) {
543 let parent_iter = self.can_evaluate.iter_mut().rev();
545 for p in parent_iter {
546 if !*p {
547 break;
550 }
551 *p = false;
552 }
553 }
554
555 Ok(Transformed::no(expr))
559 }
560
561 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
562 match self.can_evaluate.pop() {
563 Some(true) => match self.evaluate_to_scalar(expr) {
568 ConstSimplifyResult::Simplified(s, m) => {
569 Ok(Transformed::yes(Expr::Literal(s, m)))
570 }
571 ConstSimplifyResult::NotSimplified(s, m) => {
572 Ok(Transformed::no(Expr::Literal(s, m)))
573 }
574 ConstSimplifyResult::SimplifyRuntimeError(_, expr) => {
575 Ok(Transformed::yes(expr))
576 }
577 },
578 Some(false) => Ok(Transformed::no(expr)),
579 _ => internal_err!("Failed to pop can_evaluate"),
580 }
581 }
582}
583
584impl<'a> ConstEvaluator<'a> {
585 pub fn try_new(execution_props: &'a ExecutionProps) -> Result<Self> {
589 static DUMMY_COL_NAME: &str = ".";
592 let schema = Arc::new(Schema::new(vec![Field::new(
593 DUMMY_COL_NAME,
594 DataType::Null,
595 true,
596 )]));
597 let input_schema = DFSchema::try_from(Arc::clone(&schema))?;
598 let col = new_null_array(&DataType::Null, 1);
600 let input_batch = RecordBatch::try_new(schema, vec![col])?;
601
602 Ok(Self {
603 can_evaluate: vec![],
604 execution_props,
605 input_schema,
606 input_batch,
607 })
608 }
609
610 fn volatility_ok(volatility: Volatility) -> bool {
612 match volatility {
613 Volatility::Immutable => true,
614 Volatility::Stable => true,
616 Volatility::Volatile => false,
617 }
618 }
619
620 fn can_evaluate(expr: &Expr) -> bool {
623 match expr {
629 #[expect(deprecated)]
631 Expr::AggregateFunction { .. }
632 | Expr::ScalarVariable(_, _)
633 | Expr::Column(_)
634 | Expr::OuterReferenceColumn(_, _)
635 | Expr::Exists { .. }
636 | Expr::InSubquery(_)
637 | Expr::ScalarSubquery(_)
638 | Expr::WindowFunction { .. }
639 | Expr::GroupingSet(_)
640 | Expr::Wildcard { .. }
641 | Expr::Placeholder(_) => false,
642 Expr::ScalarFunction(ScalarFunction { func, .. }) => {
643 Self::volatility_ok(func.signature().volatility)
644 }
645 Expr::Literal(_, _)
646 | Expr::Alias(..)
647 | Expr::Unnest(_)
648 | Expr::BinaryExpr { .. }
649 | Expr::Not(_)
650 | Expr::IsNotNull(_)
651 | Expr::IsNull(_)
652 | Expr::IsTrue(_)
653 | Expr::IsFalse(_)
654 | Expr::IsUnknown(_)
655 | Expr::IsNotTrue(_)
656 | Expr::IsNotFalse(_)
657 | Expr::IsNotUnknown(_)
658 | Expr::Negative(_)
659 | Expr::Between { .. }
660 | Expr::Like { .. }
661 | Expr::SimilarTo { .. }
662 | Expr::Case(_)
663 | Expr::Cast { .. }
664 | Expr::TryCast { .. }
665 | Expr::InList { .. } => true,
666 }
667 }
668
669 pub(crate) fn evaluate_to_scalar(&mut self, expr: Expr) -> ConstSimplifyResult {
671 if let Expr::Literal(s, m) = expr {
672 return ConstSimplifyResult::NotSimplified(s, m);
673 }
674
675 let phys_expr =
676 match create_physical_expr(&expr, &self.input_schema, self.execution_props) {
677 Ok(e) => e,
678 Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
679 };
680 let metadata = phys_expr
681 .return_field(self.input_batch.schema_ref())
682 .ok()
683 .and_then(|f| {
684 let m = f.metadata();
685 match m.is_empty() {
686 true => None,
687 false => Some(FieldMetadata::from(m)),
688 }
689 });
690 let col_val = match phys_expr.evaluate(&self.input_batch) {
691 Ok(v) => v,
692 Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
693 };
694 match col_val {
695 ColumnarValue::Array(a) => {
696 if a.len() != 1 {
697 ConstSimplifyResult::SimplifyRuntimeError(
698 DataFusionError::Execution(format!("Could not evaluate the expression, found a result of length {}", a.len())),
699 expr,
700 )
701 } else if as_list_array(&a).is_ok() {
702 ConstSimplifyResult::Simplified(
703 ScalarValue::List(a.as_list::<i32>().to_owned().into()),
704 metadata,
705 )
706 } else if as_large_list_array(&a).is_ok() {
707 ConstSimplifyResult::Simplified(
708 ScalarValue::LargeList(a.as_list::<i64>().to_owned().into()),
709 metadata,
710 )
711 } else {
712 match ScalarValue::try_from_array(&a, 0) {
714 Ok(s) => {
715 if matches!(&s, ScalarValue::Map(_)) {
717 ConstSimplifyResult::SimplifyRuntimeError(
718 DataFusionError::NotImplemented("Const evaluate for Map type is still not supported".to_string()),
719 expr,
720 )
721 } else {
722 ConstSimplifyResult::Simplified(s, metadata)
723 }
724 }
725 Err(err) => ConstSimplifyResult::SimplifyRuntimeError(err, expr),
726 }
727 }
728 }
729 ColumnarValue::Scalar(s) => {
730 if matches!(&s, ScalarValue::Map(_)) {
732 ConstSimplifyResult::SimplifyRuntimeError(
733 DataFusionError::NotImplemented(
734 "Const evaluate for Map type is still not supported"
735 .to_string(),
736 ),
737 expr,
738 )
739 } else {
740 ConstSimplifyResult::Simplified(s, metadata)
741 }
742 }
743 }
744 }
745}
746
747struct Simplifier<'a, S> {
757 info: &'a S,
758}
759
760impl<'a, S> Simplifier<'a, S> {
761 pub fn new(info: &'a S) -> Self {
762 Self { info }
763 }
764}
765
766impl<S: SimplifyInfo> TreeNodeRewriter for Simplifier<'_, S> {
767 type Node = Expr;
768
769 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
771 use datafusion_expr::Operator::{
772 And, BitwiseAnd, BitwiseOr, BitwiseShiftLeft, BitwiseShiftRight, BitwiseXor,
773 Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch,
774 RegexNotIMatch, RegexNotMatch,
775 };
776
777 let info = self.info;
778 Ok(match expr {
779 ref expr @ Expr::BinaryExpr(BinaryExpr {
783 ref left,
784 ref op,
785 ref right,
786 }) if op.returns_null_on_null()
787 && (is_null(left.as_ref()) || is_null(right.as_ref())) =>
788 {
789 Transformed::yes(Expr::Literal(
790 ScalarValue::try_new_null(&info.get_data_type(expr)?)?,
791 None,
792 ))
793 }
794
795 Expr::BinaryExpr(BinaryExpr {
797 left,
798 op: And | Or,
799 right,
800 }) if is_null(&left) && is_null(&right) => Transformed::yes(lit_bool_null()),
801
802 Expr::BinaryExpr(BinaryExpr {
810 left,
811 op: Eq,
812 right,
813 }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
814 Transformed::yes(match as_bool_lit(&left)? {
815 Some(true) => *right,
816 Some(false) => Expr::Not(right),
817 None => lit_bool_null(),
818 })
819 }
820 Expr::BinaryExpr(BinaryExpr {
824 left,
825 op: Eq,
826 right,
827 }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
828 Transformed::yes(match as_bool_lit(&right)? {
829 Some(true) => *left,
830 Some(false) => Expr::Not(left),
831 None => lit_bool_null(),
832 })
833 }
834 Expr::BinaryExpr(BinaryExpr {
839 left,
840 op: Eq,
841 right,
842 }) if (left == right) & !left.is_volatile() => {
843 Transformed::yes(match !info.nullable(&left)? {
844 true => lit(true),
845 false => Expr::BinaryExpr(BinaryExpr {
846 left: Box::new(Expr::IsNotNull(left)),
847 op: Or,
848 right: Box::new(lit_bool_null()),
849 }),
850 })
851 }
852
853 Expr::BinaryExpr(BinaryExpr {
860 left,
861 op: NotEq,
862 right,
863 }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
864 Transformed::yes(match as_bool_lit(&left)? {
865 Some(true) => Expr::Not(right),
866 Some(false) => *right,
867 None => lit_bool_null(),
868 })
869 }
870 Expr::BinaryExpr(BinaryExpr {
874 left,
875 op: NotEq,
876 right,
877 }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
878 Transformed::yes(match as_bool_lit(&right)? {
879 Some(true) => Expr::Not(left),
880 Some(false) => *left,
881 None => lit_bool_null(),
882 })
883 }
884
885 Expr::BinaryExpr(BinaryExpr {
891 left,
892 op: Or,
893 right: _,
894 }) if is_true(&left) => Transformed::yes(*left),
895 Expr::BinaryExpr(BinaryExpr {
897 left,
898 op: Or,
899 right,
900 }) if is_false(&left) => Transformed::yes(*right),
901 Expr::BinaryExpr(BinaryExpr {
903 left: _,
904 op: Or,
905 right,
906 }) if is_true(&right) => Transformed::yes(*right),
907 Expr::BinaryExpr(BinaryExpr {
909 left,
910 op: Or,
911 right,
912 }) if is_false(&right) => Transformed::yes(*left),
913 Expr::BinaryExpr(BinaryExpr {
915 left,
916 op: Or,
917 right,
918 }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
919 Transformed::yes(lit(true))
920 }
921 Expr::BinaryExpr(BinaryExpr {
923 left,
924 op: Or,
925 right,
926 }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
927 Transformed::yes(lit(true))
928 }
929 Expr::BinaryExpr(BinaryExpr {
931 left,
932 op: Or,
933 right,
934 }) if expr_contains(&left, &right, Or) => Transformed::yes(*left),
935 Expr::BinaryExpr(BinaryExpr {
937 left,
938 op: Or,
939 right,
940 }) if expr_contains(&right, &left, Or) => Transformed::yes(*right),
941 Expr::BinaryExpr(BinaryExpr {
943 left,
944 op: Or,
945 right,
946 }) if is_op_with(And, &right, &left) => Transformed::yes(*left),
947 Expr::BinaryExpr(BinaryExpr {
949 left,
950 op: Or,
951 right,
952 }) if is_op_with(And, &left, &right) => Transformed::yes(*right),
953 Expr::BinaryExpr(BinaryExpr {
956 left,
957 op: Or,
958 right,
959 }) if has_common_conjunction(&left, &right) => {
960 let lhs: IndexSet<Expr> = iter_conjunction_owned(*left).collect();
961 let (common, rhs): (Vec<_>, Vec<_>) = iter_conjunction_owned(*right)
962 .partition(|e| lhs.contains(e) && !e.is_volatile());
963
964 let new_rhs = rhs.into_iter().reduce(and);
965 let new_lhs = lhs.into_iter().filter(|e| !common.contains(e)).reduce(and);
966 let common_conjunction = common.into_iter().reduce(and).unwrap();
967
968 let new_expr = match (new_lhs, new_rhs) {
969 (Some(lhs), Some(rhs)) => and(common_conjunction, or(lhs, rhs)),
970 (_, _) => common_conjunction,
971 };
972 Transformed::yes(new_expr)
973 }
974
975 Expr::BinaryExpr(BinaryExpr {
981 left,
982 op: And,
983 right,
984 }) if is_true(&left) => Transformed::yes(*right),
985 Expr::BinaryExpr(BinaryExpr {
987 left,
988 op: And,
989 right: _,
990 }) if is_false(&left) => Transformed::yes(*left),
991 Expr::BinaryExpr(BinaryExpr {
993 left,
994 op: And,
995 right,
996 }) if is_true(&right) => Transformed::yes(*left),
997 Expr::BinaryExpr(BinaryExpr {
999 left: _,
1000 op: And,
1001 right,
1002 }) if is_false(&right) => Transformed::yes(*right),
1003 Expr::BinaryExpr(BinaryExpr {
1005 left,
1006 op: And,
1007 right,
1008 }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
1009 Transformed::yes(lit(false))
1010 }
1011 Expr::BinaryExpr(BinaryExpr {
1013 left,
1014 op: And,
1015 right,
1016 }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
1017 Transformed::yes(lit(false))
1018 }
1019 Expr::BinaryExpr(BinaryExpr {
1021 left,
1022 op: And,
1023 right,
1024 }) if expr_contains(&left, &right, And) => Transformed::yes(*left),
1025 Expr::BinaryExpr(BinaryExpr {
1027 left,
1028 op: And,
1029 right,
1030 }) if expr_contains(&right, &left, And) => Transformed::yes(*right),
1031 Expr::BinaryExpr(BinaryExpr {
1033 left,
1034 op: And,
1035 right,
1036 }) if is_op_with(Or, &right, &left) => Transformed::yes(*left),
1037 Expr::BinaryExpr(BinaryExpr {
1039 left,
1040 op: And,
1041 right,
1042 }) if is_op_with(Or, &left, &right) => Transformed::yes(*right),
1043 Expr::BinaryExpr(BinaryExpr {
1045 left,
1046 op: And,
1047 right,
1048 }) if can_reduce_to_equal_statement(&left, &right) => {
1049 if let Expr::BinaryExpr(BinaryExpr {
1050 left: left_left,
1051 right: left_right,
1052 ..
1053 }) = *left
1054 {
1055 Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1056 left: left_left,
1057 op: Eq,
1058 right: left_right,
1059 }))
1060 } else {
1061 return internal_err!("can_reduce_to_equal_statement should only be called with a BinaryExpr");
1062 }
1063 }
1064
1065 Expr::BinaryExpr(BinaryExpr {
1071 left,
1072 op: Multiply,
1073 right,
1074 }) if is_one(&right) => {
1075 simplify_right_is_one_case(info, left, &Multiply, &right)?
1076 }
1077 Expr::BinaryExpr(BinaryExpr {
1079 left,
1080 op: Multiply,
1081 right,
1082 }) if is_one(&left) => {
1083 simplify_right_is_one_case(info, right, &Multiply, &left)?
1085 }
1086
1087 Expr::BinaryExpr(BinaryExpr {
1089 left,
1090 op: Multiply,
1091 right,
1092 }) if !info.nullable(&left)?
1093 && !info.get_data_type(&left)?.is_floating()
1094 && is_zero(&right) =>
1095 {
1096 Transformed::yes(*right)
1097 }
1098 Expr::BinaryExpr(BinaryExpr {
1100 left,
1101 op: Multiply,
1102 right,
1103 }) if !info.nullable(&right)?
1104 && !info.get_data_type(&right)?.is_floating()
1105 && is_zero(&left) =>
1106 {
1107 Transformed::yes(*left)
1108 }
1109
1110 Expr::BinaryExpr(BinaryExpr {
1116 left,
1117 op: Divide,
1118 right,
1119 }) if is_one(&right) => {
1120 simplify_right_is_one_case(info, left, &Divide, &right)?
1121 }
1122
1123 Expr::BinaryExpr(BinaryExpr {
1129 left,
1130 op: Modulo,
1131 right,
1132 }) if !info.nullable(&left)?
1133 && !info.get_data_type(&left)?.is_floating()
1134 && is_one(&right) =>
1135 {
1136 Transformed::yes(Expr::Literal(
1137 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1138 None,
1139 ))
1140 }
1141
1142 Expr::BinaryExpr(BinaryExpr {
1148 left,
1149 op: BitwiseAnd,
1150 right,
1151 }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*right),
1152
1153 Expr::BinaryExpr(BinaryExpr {
1155 left,
1156 op: BitwiseAnd,
1157 right,
1158 }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*left),
1159
1160 Expr::BinaryExpr(BinaryExpr {
1162 left,
1163 op: BitwiseAnd,
1164 right,
1165 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1166 Transformed::yes(Expr::Literal(
1167 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1168 None,
1169 ))
1170 }
1171
1172 Expr::BinaryExpr(BinaryExpr {
1174 left,
1175 op: BitwiseAnd,
1176 right,
1177 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1178 Transformed::yes(Expr::Literal(
1179 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1180 None,
1181 ))
1182 }
1183
1184 Expr::BinaryExpr(BinaryExpr {
1186 left,
1187 op: BitwiseAnd,
1188 right,
1189 }) if expr_contains(&left, &right, BitwiseAnd) => Transformed::yes(*left),
1190
1191 Expr::BinaryExpr(BinaryExpr {
1193 left,
1194 op: BitwiseAnd,
1195 right,
1196 }) if expr_contains(&right, &left, BitwiseAnd) => Transformed::yes(*right),
1197
1198 Expr::BinaryExpr(BinaryExpr {
1200 left,
1201 op: BitwiseAnd,
1202 right,
1203 }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => {
1204 Transformed::yes(*left)
1205 }
1206
1207 Expr::BinaryExpr(BinaryExpr {
1209 left,
1210 op: BitwiseAnd,
1211 right,
1212 }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => {
1213 Transformed::yes(*right)
1214 }
1215
1216 Expr::BinaryExpr(BinaryExpr {
1222 left,
1223 op: BitwiseOr,
1224 right,
1225 }) if is_zero(&right) => Transformed::yes(*left),
1226
1227 Expr::BinaryExpr(BinaryExpr {
1229 left,
1230 op: BitwiseOr,
1231 right,
1232 }) if is_zero(&left) => Transformed::yes(*right),
1233
1234 Expr::BinaryExpr(BinaryExpr {
1236 left,
1237 op: BitwiseOr,
1238 right,
1239 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1240 Transformed::yes(Expr::Literal(
1241 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1242 None,
1243 ))
1244 }
1245
1246 Expr::BinaryExpr(BinaryExpr {
1248 left,
1249 op: BitwiseOr,
1250 right,
1251 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1252 Transformed::yes(Expr::Literal(
1253 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1254 None,
1255 ))
1256 }
1257
1258 Expr::BinaryExpr(BinaryExpr {
1260 left,
1261 op: BitwiseOr,
1262 right,
1263 }) if expr_contains(&left, &right, BitwiseOr) => Transformed::yes(*left),
1264
1265 Expr::BinaryExpr(BinaryExpr {
1267 left,
1268 op: BitwiseOr,
1269 right,
1270 }) if expr_contains(&right, &left, BitwiseOr) => Transformed::yes(*right),
1271
1272 Expr::BinaryExpr(BinaryExpr {
1274 left,
1275 op: BitwiseOr,
1276 right,
1277 }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => {
1278 Transformed::yes(*left)
1279 }
1280
1281 Expr::BinaryExpr(BinaryExpr {
1283 left,
1284 op: BitwiseOr,
1285 right,
1286 }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => {
1287 Transformed::yes(*right)
1288 }
1289
1290 Expr::BinaryExpr(BinaryExpr {
1296 left,
1297 op: BitwiseXor,
1298 right,
1299 }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*left),
1300
1301 Expr::BinaryExpr(BinaryExpr {
1303 left,
1304 op: BitwiseXor,
1305 right,
1306 }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*right),
1307
1308 Expr::BinaryExpr(BinaryExpr {
1310 left,
1311 op: BitwiseXor,
1312 right,
1313 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1314 Transformed::yes(Expr::Literal(
1315 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1316 None,
1317 ))
1318 }
1319
1320 Expr::BinaryExpr(BinaryExpr {
1322 left,
1323 op: BitwiseXor,
1324 right,
1325 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1326 Transformed::yes(Expr::Literal(
1327 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1328 None,
1329 ))
1330 }
1331
1332 Expr::BinaryExpr(BinaryExpr {
1334 left,
1335 op: BitwiseXor,
1336 right,
1337 }) if expr_contains(&left, &right, BitwiseXor) => {
1338 let expr = delete_xor_in_complex_expr(&left, &right, false);
1339 Transformed::yes(if expr == *right {
1340 Expr::Literal(
1341 ScalarValue::new_zero(&info.get_data_type(&right)?)?,
1342 None,
1343 )
1344 } else {
1345 expr
1346 })
1347 }
1348
1349 Expr::BinaryExpr(BinaryExpr {
1351 left,
1352 op: BitwiseXor,
1353 right,
1354 }) if expr_contains(&right, &left, BitwiseXor) => {
1355 let expr = delete_xor_in_complex_expr(&right, &left, true);
1356 Transformed::yes(if expr == *left {
1357 Expr::Literal(
1358 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1359 None,
1360 )
1361 } else {
1362 expr
1363 })
1364 }
1365
1366 Expr::BinaryExpr(BinaryExpr {
1372 left,
1373 op: BitwiseShiftRight,
1374 right,
1375 }) if is_zero(&right) => Transformed::yes(*left),
1376
1377 Expr::BinaryExpr(BinaryExpr {
1383 left,
1384 op: BitwiseShiftLeft,
1385 right,
1386 }) if is_zero(&right) => Transformed::yes(*left),
1387
1388 Expr::Not(inner) => Transformed::yes(negate_clause(*inner)),
1392
1393 Expr::Negative(inner) => Transformed::yes(distribute_negation(*inner)),
1397
1398 Expr::Case(Case {
1414 expr: None,
1415 when_then_expr,
1416 else_expr,
1417 }) if !when_then_expr.is_empty()
1418 && when_then_expr.len() < 3 && info.is_boolean_type(&when_then_expr[0].1)? =>
1420 {
1421 let mut filter_expr = lit(false);
1423 let mut out_expr = lit(false);
1425
1426 for (when, then) in when_then_expr {
1427 let when = is_exactly_true(*when, info)?;
1428 let case_expr =
1429 when.clone().and(filter_expr.clone().not()).and(*then);
1430
1431 out_expr = out_expr.or(case_expr);
1432 filter_expr = filter_expr.or(when);
1433 }
1434
1435 let else_expr = else_expr.map(|b| *b).unwrap_or_else(lit_bool_null);
1436 let case_expr = filter_expr.not().and(else_expr);
1437 out_expr = out_expr.or(case_expr);
1438
1439 out_expr.rewrite(self)?
1441 }
1442 Expr::ScalarFunction(ScalarFunction { func: udf, args }) => {
1443 match udf.simplify(args, info)? {
1444 ExprSimplifyResult::Original(args) => {
1445 Transformed::no(Expr::ScalarFunction(ScalarFunction {
1446 func: udf,
1447 args,
1448 }))
1449 }
1450 ExprSimplifyResult::Simplified(expr) => Transformed::yes(expr),
1451 }
1452 }
1453
1454 Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction {
1455 ref func,
1456 ..
1457 }) => match (func.simplify(), expr) {
1458 (Some(simplify_function), Expr::AggregateFunction(af)) => {
1459 Transformed::yes(simplify_function(af, info)?)
1460 }
1461 (_, expr) => Transformed::no(expr),
1462 },
1463
1464 Expr::WindowFunction(ref window_fun) => match (window_fun.simplify(), expr) {
1465 (Some(simplify_function), Expr::WindowFunction(wf)) => {
1466 Transformed::yes(simplify_function(*wf, info)?)
1467 }
1468 (_, expr) => Transformed::no(expr),
1469 },
1470
1471 Expr::Between(between) => Transformed::yes(if between.negated {
1478 let l = *between.expr.clone();
1479 let r = *between.expr;
1480 or(l.lt(*between.low), r.gt(*between.high))
1481 } else {
1482 and(
1483 between.expr.clone().gt_eq(*between.low),
1484 between.expr.lt_eq(*between.high),
1485 )
1486 }),
1487
1488 Expr::BinaryExpr(BinaryExpr {
1492 left,
1493 op: op @ (RegexMatch | RegexNotMatch | RegexIMatch | RegexNotIMatch),
1494 right,
1495 }) => Transformed::yes(simplify_regex_expr(left, op, right)?),
1496
1497 Expr::Like(like) => {
1499 let escape_char = like.escape_char.unwrap_or('\\');
1501 match as_string_scalar(&like.pattern) {
1502 Some((data_type, pattern_str)) => {
1503 match pattern_str {
1504 None => return Ok(Transformed::yes(lit_bool_null())),
1505 Some(pattern_str) if pattern_str == "%" => {
1506 let result_for_non_null = lit(!like.negated);
1513 Transformed::yes(if !info.nullable(&like.expr)? {
1514 result_for_non_null
1515 } else {
1516 Expr::Case(Case {
1517 expr: Some(Box::new(Expr::IsNotNull(like.expr))),
1518 when_then_expr: vec![(
1519 Box::new(lit(true)),
1520 Box::new(result_for_non_null),
1521 )],
1522 else_expr: None,
1523 })
1524 })
1525 }
1526 Some(pattern_str)
1527 if pattern_str.contains("%%")
1528 && !pattern_str.contains(escape_char) =>
1529 {
1530 let simplified_pattern = Regex::new("%%+")
1533 .unwrap()
1534 .replace_all(pattern_str, "%")
1535 .to_string();
1536 Transformed::yes(Expr::Like(Like {
1537 pattern: Box::new(to_string_scalar(
1538 data_type,
1539 Some(simplified_pattern),
1540 )),
1541 ..like
1542 }))
1543 }
1544 Some(pattern_str)
1545 if !like.case_insensitive
1546 && !pattern_str
1547 .contains(['%', '_', escape_char].as_ref()) =>
1548 {
1549 Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1552 left: like.expr.clone(),
1553 op: if like.negated { NotEq } else { Eq },
1554 right: like.pattern.clone(),
1555 }))
1556 }
1557
1558 Some(_pattern_str) => Transformed::no(Expr::Like(like)),
1559 }
1560 }
1561 None => Transformed::no(Expr::Like(like)),
1562 }
1563 }
1564
1565 Expr::IsNotNull(expr) | Expr::IsNotUnknown(expr)
1567 if !info.nullable(&expr)? =>
1568 {
1569 Transformed::yes(lit(true))
1570 }
1571
1572 Expr::IsNull(expr) | Expr::IsUnknown(expr) if !info.nullable(&expr)? => {
1574 Transformed::yes(lit(false))
1575 }
1576
1577 Expr::InList(InList {
1580 expr: _,
1581 list,
1582 negated,
1583 }) if list.is_empty() => Transformed::yes(lit(negated)),
1584
1585 Expr::InList(InList {
1588 expr,
1589 list,
1590 negated: _,
1591 }) if is_null(expr.as_ref()) && !list.is_empty() => {
1592 Transformed::yes(lit_bool_null())
1593 }
1594
1595 Expr::InList(InList {
1597 expr,
1598 mut list,
1599 negated,
1600 }) if list.len() == 1
1601 && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
1602 {
1603 let Expr::ScalarSubquery(subquery) = list.remove(0) else {
1604 unreachable!()
1605 };
1606
1607 Transformed::yes(Expr::InSubquery(InSubquery::new(
1608 expr, subquery, negated,
1609 )))
1610 }
1611
1612 Expr::BinaryExpr(BinaryExpr {
1616 left,
1617 op: Or,
1618 right,
1619 }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
1620 let lhs = to_inlist(*left).unwrap();
1621 let rhs = to_inlist(*right).unwrap();
1622 let mut seen: HashSet<Expr> = HashSet::new();
1623 let list = lhs
1624 .list
1625 .into_iter()
1626 .chain(rhs.list)
1627 .filter(|e| seen.insert(e.to_owned()))
1628 .collect::<Vec<_>>();
1629
1630 let merged_inlist = InList {
1631 expr: lhs.expr,
1632 list,
1633 negated: false,
1634 };
1635
1636 Transformed::yes(Expr::InList(merged_inlist))
1637 }
1638
1639 Expr::BinaryExpr(BinaryExpr {
1656 left,
1657 op: And,
1658 right,
1659 }) if are_inlist_and_eq_and_match_neg(
1660 left.as_ref(),
1661 right.as_ref(),
1662 false,
1663 false,
1664 ) =>
1665 {
1666 match (*left, *right) {
1667 (Expr::InList(l1), Expr::InList(l2)) => {
1668 return inlist_intersection(l1, &l2, false).map(Transformed::yes);
1669 }
1670 _ => unreachable!(),
1672 }
1673 }
1674
1675 Expr::BinaryExpr(BinaryExpr {
1676 left,
1677 op: And,
1678 right,
1679 }) if are_inlist_and_eq_and_match_neg(
1680 left.as_ref(),
1681 right.as_ref(),
1682 true,
1683 true,
1684 ) =>
1685 {
1686 match (*left, *right) {
1687 (Expr::InList(l1), Expr::InList(l2)) => {
1688 return inlist_union(l1, l2, true).map(Transformed::yes);
1689 }
1690 _ => unreachable!(),
1692 }
1693 }
1694
1695 Expr::BinaryExpr(BinaryExpr {
1696 left,
1697 op: And,
1698 right,
1699 }) if are_inlist_and_eq_and_match_neg(
1700 left.as_ref(),
1701 right.as_ref(),
1702 false,
1703 true,
1704 ) =>
1705 {
1706 match (*left, *right) {
1707 (Expr::InList(l1), Expr::InList(l2)) => {
1708 return inlist_except(l1, &l2).map(Transformed::yes);
1709 }
1710 _ => unreachable!(),
1712 }
1713 }
1714
1715 Expr::BinaryExpr(BinaryExpr {
1716 left,
1717 op: And,
1718 right,
1719 }) if are_inlist_and_eq_and_match_neg(
1720 left.as_ref(),
1721 right.as_ref(),
1722 true,
1723 false,
1724 ) =>
1725 {
1726 match (*left, *right) {
1727 (Expr::InList(l1), Expr::InList(l2)) => {
1728 return inlist_except(l2, &l1).map(Transformed::yes);
1729 }
1730 _ => unreachable!(),
1732 }
1733 }
1734
1735 Expr::BinaryExpr(BinaryExpr {
1736 left,
1737 op: Or,
1738 right,
1739 }) if are_inlist_and_eq_and_match_neg(
1740 left.as_ref(),
1741 right.as_ref(),
1742 true,
1743 true,
1744 ) =>
1745 {
1746 match (*left, *right) {
1747 (Expr::InList(l1), Expr::InList(l2)) => {
1748 return inlist_intersection(l1, &l2, true).map(Transformed::yes);
1749 }
1750 _ => unreachable!(),
1752 }
1753 }
1754
1755 Expr::BinaryExpr(BinaryExpr { left, op, right })
1762 if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1763 info, &left, op, &right,
1764 ) && op.supports_propagation() =>
1765 {
1766 unwrap_cast_in_comparison_for_binary(info, *left, *right, op)?
1767 }
1768 Expr::BinaryExpr(BinaryExpr { left, op, right })
1772 if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1773 info, &right, op, &left,
1774 ) && op.supports_propagation()
1775 && op.swap().is_some() =>
1776 {
1777 unwrap_cast_in_comparison_for_binary(
1778 info,
1779 *right,
1780 *left,
1781 op.swap().unwrap(),
1782 )?
1783 }
1784 Expr::InList(InList {
1787 expr: mut left,
1788 list,
1789 negated,
1790 }) if is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist(
1791 info, &left, &list,
1792 ) =>
1793 {
1794 let (Expr::TryCast(TryCast {
1795 expr: left_expr, ..
1796 })
1797 | Expr::Cast(Cast {
1798 expr: left_expr, ..
1799 })) = left.as_mut()
1800 else {
1801 return internal_err!("Expect cast expr, but got {:?}", left)?;
1802 };
1803
1804 let expr_type = info.get_data_type(left_expr)?;
1805 let right_exprs = list
1806 .into_iter()
1807 .map(|right| {
1808 match right {
1809 Expr::Literal(right_lit_value, _) => {
1810 let Some(value) = try_cast_literal_to_type(&right_lit_value, &expr_type) else {
1813 internal_err!(
1814 "Can't cast the list expr {:?} to type {:?}",
1815 right_lit_value, &expr_type
1816 )?
1817 };
1818 Ok(lit(value))
1819 }
1820 other_expr => internal_err!(
1821 "Only support literal expr to optimize, but the expr is {:?}",
1822 &other_expr
1823 ),
1824 }
1825 })
1826 .collect::<Result<Vec<_>>>()?;
1827
1828 Transformed::yes(Expr::InList(InList {
1829 expr: std::mem::take(left_expr),
1830 list: right_exprs,
1831 negated,
1832 }))
1833 }
1834
1835 expr => Transformed::no(expr),
1837 })
1838 }
1839}
1840
1841fn as_string_scalar(expr: &Expr) -> Option<(DataType, &Option<String>)> {
1842 match expr {
1843 Expr::Literal(ScalarValue::Utf8(s), _) => Some((DataType::Utf8, s)),
1844 Expr::Literal(ScalarValue::LargeUtf8(s), _) => Some((DataType::LargeUtf8, s)),
1845 Expr::Literal(ScalarValue::Utf8View(s), _) => Some((DataType::Utf8View, s)),
1846 _ => None,
1847 }
1848}
1849
1850fn to_string_scalar(data_type: DataType, value: Option<String>) -> Expr {
1851 match data_type {
1852 DataType::Utf8 => Expr::Literal(ScalarValue::Utf8(value), None),
1853 DataType::LargeUtf8 => Expr::Literal(ScalarValue::LargeUtf8(value), None),
1854 DataType::Utf8View => Expr::Literal(ScalarValue::Utf8View(value), None),
1855 _ => unreachable!(),
1856 }
1857}
1858
1859fn has_common_conjunction(lhs: &Expr, rhs: &Expr) -> bool {
1860 let lhs_set: HashSet<&Expr> = iter_conjunction(lhs).collect();
1861 iter_conjunction(rhs).any(|e| lhs_set.contains(&e) && !e.is_volatile())
1862}
1863
1864fn are_inlist_and_eq_and_match_neg(
1866 left: &Expr,
1867 right: &Expr,
1868 is_left_neg: bool,
1869 is_right_neg: bool,
1870) -> bool {
1871 match (left, right) {
1872 (Expr::InList(l), Expr::InList(r)) => {
1873 l.expr == r.expr && l.negated == is_left_neg && r.negated == is_right_neg
1874 }
1875 _ => false,
1876 }
1877}
1878
1879fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
1881 let left = as_inlist(left);
1882 let right = as_inlist(right);
1883 if let (Some(lhs), Some(rhs)) = (left, right) {
1884 matches!(lhs.expr.as_ref(), Expr::Column(_))
1885 && matches!(rhs.expr.as_ref(), Expr::Column(_))
1886 && lhs.expr == rhs.expr
1887 && !lhs.negated
1888 && !rhs.negated
1889 } else {
1890 false
1891 }
1892}
1893
1894fn as_inlist(expr: &'_ Expr) -> Option<Cow<'_, InList>> {
1896 match expr {
1897 Expr::InList(inlist) => Some(Cow::Borrowed(inlist)),
1898 Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::Eq => {
1899 match (left.as_ref(), right.as_ref()) {
1900 (Expr::Column(_), Expr::Literal(_, _)) => Some(Cow::Owned(InList {
1901 expr: left.clone(),
1902 list: vec![*right.clone()],
1903 negated: false,
1904 })),
1905 (Expr::Literal(_, _), Expr::Column(_)) => Some(Cow::Owned(InList {
1906 expr: right.clone(),
1907 list: vec![*left.clone()],
1908 negated: false,
1909 })),
1910 _ => None,
1911 }
1912 }
1913 _ => None,
1914 }
1915}
1916
1917fn to_inlist(expr: Expr) -> Option<InList> {
1918 match expr {
1919 Expr::InList(inlist) => Some(inlist),
1920 Expr::BinaryExpr(BinaryExpr {
1921 left,
1922 op: Operator::Eq,
1923 right,
1924 }) => match (left.as_ref(), right.as_ref()) {
1925 (Expr::Column(_), Expr::Literal(_, _)) => Some(InList {
1926 expr: left,
1927 list: vec![*right],
1928 negated: false,
1929 }),
1930 (Expr::Literal(_, _), Expr::Column(_)) => Some(InList {
1931 expr: right,
1932 list: vec![*left],
1933 negated: false,
1934 }),
1935 _ => None,
1936 },
1937 _ => None,
1938 }
1939}
1940
1941fn inlist_union(mut l1: InList, l2: InList, negated: bool) -> Result<Expr> {
1944 let l1_items: HashSet<_> = l1.list.iter().collect();
1946
1947 let keep_l2: Vec<_> = l2
1949 .list
1950 .into_iter()
1951 .filter_map(|e| if l1_items.contains(&e) { None } else { Some(e) })
1952 .collect();
1953
1954 l1.list.extend(keep_l2);
1955 l1.negated = negated;
1956 Ok(Expr::InList(l1))
1957}
1958
1959fn inlist_intersection(mut l1: InList, l2: &InList, negated: bool) -> Result<Expr> {
1962 let l2_items = l2.list.iter().collect::<HashSet<_>>();
1963
1964 l1.list.retain(|e| l2_items.contains(e));
1966
1967 if l1.list.is_empty() {
1970 return Ok(lit(negated));
1971 }
1972 Ok(Expr::InList(l1))
1973}
1974
1975fn inlist_except(mut l1: InList, l2: &InList) -> Result<Expr> {
1978 let l2_items = l2.list.iter().collect::<HashSet<_>>();
1979
1980 l1.list.retain(|e| !l2_items.contains(e));
1982
1983 if l1.list.is_empty() {
1984 return Ok(lit(false));
1985 }
1986 Ok(Expr::InList(l1))
1987}
1988
1989fn is_exactly_true(expr: Expr, info: &impl SimplifyInfo) -> Result<Expr> {
1991 if !info.nullable(&expr)? {
1992 Ok(expr)
1993 } else {
1994 Ok(Expr::BinaryExpr(BinaryExpr {
1995 left: Box::new(expr),
1996 op: Operator::IsNotDistinctFrom,
1997 right: Box::new(lit(true)),
1998 }))
1999 }
2000}
2001
2002fn simplify_right_is_one_case<S: SimplifyInfo>(
2007 info: &S,
2008 left: Box<Expr>,
2009 op: &Operator,
2010 right: &Expr,
2011) -> Result<Transformed<Expr>> {
2012 let left_type = info.get_data_type(&left)?;
2014 let right_type = info.get_data_type(right)?;
2015 match BinaryTypeCoercer::new(&left_type, op, &right_type).get_result_type() {
2016 Ok(result_type) => {
2017 if left_type != result_type {
2019 Ok(Transformed::yes(Expr::Cast(Cast::new(left, result_type))))
2020 } else {
2021 Ok(Transformed::yes(*left))
2022 }
2023 }
2024 Err(_) => Ok(Transformed::yes(*left)),
2025 }
2026}
2027
2028#[cfg(test)]
2029mod tests {
2030 use super::*;
2031 use crate::simplify_expressions::SimplifyContext;
2032 use crate::test::test_table_scan_with_name;
2033 use arrow::datatypes::FieldRef;
2034 use datafusion_common::{assert_contains, DFSchemaRef, ToDFSchema};
2035 use datafusion_expr::{
2036 expr::WindowFunction,
2037 function::{
2038 AccumulatorArgs, AggregateFunctionSimplification,
2039 WindowFunctionSimplification,
2040 },
2041 interval_arithmetic::Interval,
2042 *,
2043 };
2044 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
2045 use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
2046 use std::hash::Hash;
2047 use std::{
2048 collections::HashMap,
2049 ops::{BitAnd, BitOr, BitXor},
2050 sync::Arc,
2051 };
2052
2053 #[test]
2057 fn api_basic() {
2058 let props = ExecutionProps::new();
2059 let simplifier =
2060 ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema()));
2061
2062 let expr = lit(1) + lit(2);
2063 let expected = lit(3);
2064 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2065 }
2066
2067 #[test]
2068 fn basic_coercion() {
2069 let schema = test_schema();
2070 let props = ExecutionProps::new();
2071 let simplifier = ExprSimplifier::new(
2072 SimplifyContext::new(&props).with_schema(Arc::clone(&schema)),
2073 );
2074
2075 let expr = (lit(1i64) + lit(2i32)).lt(col("i"));
2078 let expected = lit(3i64).lt(col("i"));
2080
2081 let expr = simplifier.coerce(expr, &schema).unwrap();
2082
2083 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2084 }
2085
2086 fn test_schema() -> DFSchemaRef {
2087 Schema::new(vec![
2088 Field::new("i", DataType::Int64, false),
2089 Field::new("b", DataType::Boolean, true),
2090 ])
2091 .to_dfschema_ref()
2092 .unwrap()
2093 }
2094
2095 #[test]
2096 fn simplify_and_constant_prop() {
2097 let props = ExecutionProps::new();
2098 let simplifier =
2099 ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema()));
2100
2101 let expr = (col("i") * (lit(1) - lit(1))).gt(lit(0));
2104 let expected = lit(false);
2105 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2106 }
2107
2108 #[test]
2109 fn simplify_and_constant_prop_with_case() {
2110 let props = ExecutionProps::new();
2111 let simplifier =
2112 ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema()));
2113
2114 let expr = when(col("i").gt(lit(5)).and(lit(false)), col("i").gt(lit(5)))
2122 .when(col("i").lt(lit(5)).and(lit(true)), col("i").lt(lit(5)))
2123 .otherwise(lit(false))
2124 .unwrap();
2125 let expected = col("i").lt(lit(5));
2126 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2127 }
2128
2129 #[test]
2134 fn test_simplify_canonicalize() {
2135 {
2136 let expr = lit(1).lt(col("c2")).and(col("c2").gt(lit(1)));
2137 let expected = col("c2").gt(lit(1));
2138 assert_eq!(simplify(expr), expected);
2139 }
2140 {
2141 let expr = col("c1").lt(col("c2")).and(col("c2").gt(col("c1")));
2142 let expected = col("c2").gt(col("c1"));
2143 assert_eq!(simplify(expr), expected);
2144 }
2145 {
2146 let expr = col("c1")
2147 .eq(lit(1))
2148 .and(lit(1).eq(col("c1")))
2149 .and(col("c1").eq(lit(3)));
2150 let expected = col("c1").eq(lit(1)).and(col("c1").eq(lit(3)));
2151 assert_eq!(simplify(expr), expected);
2152 }
2153 {
2154 let expr = col("c1")
2155 .eq(col("c2"))
2156 .and(col("c1").gt(lit(5)))
2157 .and(col("c2").eq(col("c1")));
2158 let expected = col("c2").eq(col("c1")).and(col("c1").gt(lit(5)));
2159 assert_eq!(simplify(expr), expected);
2160 }
2161 {
2162 let expr = col("c1")
2163 .eq(lit(1))
2164 .and(col("c2").gt(lit(3)).or(lit(3).lt(col("c2"))));
2165 let expected = col("c1").eq(lit(1)).and(col("c2").gt(lit(3)));
2166 assert_eq!(simplify(expr), expected);
2167 }
2168 {
2169 let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2170 let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2171 assert_eq!(simplify(expr), expected);
2172 }
2173 {
2174 let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2175 let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2176 assert_eq!(simplify(expr), expected);
2177 }
2178 {
2179 let expr = col("c1").gt(col("c2")).and(col("c1").gt(col("c2")));
2180 let expected = col("c2").lt(col("c1"));
2181 assert_eq!(simplify(expr), expected);
2182 }
2183 }
2184
2185 #[test]
2186 fn test_simplify_eq_not_self() {
2187 let expr_a = col("c2").eq(col("c2"));
2190 let expected_a = col("c2").is_not_null().or(lit_bool_null());
2191
2192 let expr_b = col("c2_non_null").eq(col("c2_non_null"));
2194 let expected_b = lit(true);
2195
2196 assert_eq!(simplify(expr_a), expected_a);
2197 assert_eq!(simplify(expr_b), expected_b);
2198 }
2199
2200 #[test]
2201 fn test_simplify_or_true() {
2202 let expr_a = col("c2").or(lit(true));
2203 let expr_b = lit(true).or(col("c2"));
2204 let expected = lit(true);
2205
2206 assert_eq!(simplify(expr_a), expected);
2207 assert_eq!(simplify(expr_b), expected);
2208 }
2209
2210 #[test]
2211 fn test_simplify_or_false() {
2212 let expr_a = lit(false).or(col("c2"));
2213 let expr_b = col("c2").or(lit(false));
2214 let expected = col("c2");
2215
2216 assert_eq!(simplify(expr_a), expected);
2217 assert_eq!(simplify(expr_b), expected);
2218 }
2219
2220 #[test]
2221 fn test_simplify_or_same() {
2222 let expr = col("c2").or(col("c2"));
2223 let expected = col("c2");
2224
2225 assert_eq!(simplify(expr), expected);
2226 }
2227
2228 #[test]
2229 fn test_simplify_or_not_self() {
2230 let expr_a = col("c2_non_null").or(col("c2_non_null").not());
2233 let expr_b = col("c2_non_null").not().or(col("c2_non_null"));
2234 let expected = lit(true);
2235
2236 assert_eq!(simplify(expr_a), expected);
2237 assert_eq!(simplify(expr_b), expected);
2238 }
2239
2240 #[test]
2241 fn test_simplify_and_false() {
2242 let expr_a = lit(false).and(col("c2"));
2243 let expr_b = col("c2").and(lit(false));
2244 let expected = lit(false);
2245
2246 assert_eq!(simplify(expr_a), expected);
2247 assert_eq!(simplify(expr_b), expected);
2248 }
2249
2250 #[test]
2251 fn test_simplify_and_same() {
2252 let expr = col("c2").and(col("c2"));
2253 let expected = col("c2");
2254
2255 assert_eq!(simplify(expr), expected);
2256 }
2257
2258 #[test]
2259 fn test_simplify_and_true() {
2260 let expr_a = lit(true).and(col("c2"));
2261 let expr_b = col("c2").and(lit(true));
2262 let expected = col("c2");
2263
2264 assert_eq!(simplify(expr_a), expected);
2265 assert_eq!(simplify(expr_b), expected);
2266 }
2267
2268 #[test]
2269 fn test_simplify_and_not_self() {
2270 let expr_a = col("c2_non_null").and(col("c2_non_null").not());
2273 let expr_b = col("c2_non_null").not().and(col("c2_non_null"));
2274 let expected = lit(false);
2275
2276 assert_eq!(simplify(expr_a), expected);
2277 assert_eq!(simplify(expr_b), expected);
2278 }
2279
2280 #[test]
2281 fn test_simplify_multiply_by_one() {
2282 let expr_a = col("c2") * lit(1);
2283 let expr_b = lit(1) * col("c2");
2284 let expected = col("c2");
2285
2286 assert_eq!(simplify(expr_a), expected);
2287 assert_eq!(simplify(expr_b), expected);
2288
2289 let expr = col("c2") * lit(ScalarValue::Decimal128(Some(10000000000), 38, 10));
2290 assert_eq!(simplify(expr), expected);
2291
2292 let expr = lit(ScalarValue::Decimal128(Some(10000000000), 31, 10)) * col("c2");
2293 assert_eq!(simplify(expr), expected);
2294 }
2295
2296 #[test]
2297 fn test_simplify_multiply_by_null() {
2298 let null = lit(ScalarValue::Int64(None));
2299 {
2301 let expr = col("c3") * null.clone();
2302 assert_eq!(simplify(expr), null);
2303 }
2304 {
2306 let expr = null.clone() * col("c3");
2307 assert_eq!(simplify(expr), null);
2308 }
2309 }
2310
2311 #[test]
2312 fn test_simplify_multiply_by_zero() {
2313 {
2315 let expr_a = col("c2") * lit(0);
2316 let expr_b = lit(0) * col("c2");
2317
2318 assert_eq!(simplify(expr_a.clone()), expr_a);
2319 assert_eq!(simplify(expr_b.clone()), expr_b);
2320 }
2321 {
2323 let expr = lit(0) * col("c2_non_null");
2324 assert_eq!(simplify(expr), lit(0));
2325 }
2326 {
2328 let expr = col("c2_non_null") * lit(0);
2329 assert_eq!(simplify(expr), lit(0));
2330 }
2331 {
2333 let expr = col("c2_non_null") * lit(ScalarValue::Decimal128(Some(0), 31, 10));
2334 assert_eq!(
2335 simplify(expr),
2336 lit(ScalarValue::Decimal128(Some(0), 31, 10))
2337 );
2338 let expr = binary_expr(
2339 lit(ScalarValue::Decimal128(Some(0), 31, 10)),
2340 Operator::Multiply,
2341 col("c2_non_null"),
2342 );
2343 assert_eq!(
2344 simplify(expr),
2345 lit(ScalarValue::Decimal128(Some(0), 31, 10))
2346 );
2347 }
2348 }
2349
2350 #[test]
2351 fn test_simplify_divide_by_one() {
2352 let expr = binary_expr(col("c2"), Operator::Divide, lit(1));
2353 let expected = col("c2");
2354 assert_eq!(simplify(expr), expected);
2355 let expr = col("c2") / lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2356 assert_eq!(simplify(expr), expected);
2357 }
2358
2359 #[test]
2360 fn test_simplify_divide_null() {
2361 let null = lit(ScalarValue::Int64(None));
2363 {
2364 let expr = col("c3") / null.clone();
2365 assert_eq!(simplify(expr), null);
2366 }
2367 {
2369 let expr = null.clone() / col("c3");
2370 assert_eq!(simplify(expr), null);
2371 }
2372 }
2373
2374 #[test]
2375 fn test_simplify_divide_by_same() {
2376 let expr = col("c2") / col("c2");
2377 let expected = expr.clone();
2379
2380 assert_eq!(simplify(expr), expected);
2381 }
2382
2383 #[test]
2384 fn test_simplify_modulo_by_null() {
2385 let null = lit(ScalarValue::Int64(None));
2386 {
2388 let expr = col("c3") % null.clone();
2389 assert_eq!(simplify(expr), null);
2390 }
2391 {
2393 let expr = null.clone() % col("c3");
2394 assert_eq!(simplify(expr), null);
2395 }
2396 }
2397
2398 #[test]
2399 fn test_simplify_modulo_by_one() {
2400 let expr = col("c2") % lit(1);
2401 let expected = expr.clone();
2403
2404 assert_eq!(simplify(expr), expected);
2405 }
2406
2407 #[test]
2408 fn test_simplify_divide_zero_by_zero() {
2409 let expr = lit(0) / lit(0);
2412 let expected = expr.clone();
2413
2414 assert_eq!(simplify(expr), expected);
2415 }
2416
2417 #[test]
2418 fn test_simplify_divide_by_zero() {
2419 let expr = col("c2_non_null") / lit(0);
2422 let expected = expr.clone();
2423
2424 assert_eq!(simplify(expr), expected);
2425 }
2426
2427 #[test]
2428 fn test_simplify_modulo_by_one_non_null() {
2429 let expr = col("c3_non_null") % lit(1);
2430 let expected = lit(0_i64);
2431 assert_eq!(simplify(expr), expected);
2432 let expr =
2433 col("c3_non_null") % lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2434 assert_eq!(simplify(expr), expected);
2435 }
2436
2437 #[test]
2438 fn test_simplify_bitwise_xor_by_null() {
2439 let null = lit(ScalarValue::Int64(None));
2440 {
2442 let expr = col("c3") ^ null.clone();
2443 assert_eq!(simplify(expr), null);
2444 }
2445 {
2447 let expr = null.clone() ^ col("c3");
2448 assert_eq!(simplify(expr), null);
2449 }
2450 }
2451
2452 #[test]
2453 fn test_simplify_bitwise_shift_right_by_null() {
2454 let null = lit(ScalarValue::Int64(None));
2455 {
2457 let expr = col("c3") >> null.clone();
2458 assert_eq!(simplify(expr), null);
2459 }
2460 {
2462 let expr = null.clone() >> col("c3");
2463 assert_eq!(simplify(expr), null);
2464 }
2465 }
2466
2467 #[test]
2468 fn test_simplify_bitwise_shift_left_by_null() {
2469 let null = lit(ScalarValue::Int64(None));
2470 {
2472 let expr = col("c3") << null.clone();
2473 assert_eq!(simplify(expr), null);
2474 }
2475 {
2477 let expr = null.clone() << col("c3");
2478 assert_eq!(simplify(expr), null);
2479 }
2480 }
2481
2482 #[test]
2483 fn test_simplify_bitwise_and_by_zero() {
2484 {
2486 let expr = col("c2_non_null") & lit(0);
2487 assert_eq!(simplify(expr), lit(0));
2488 }
2489 {
2491 let expr = lit(0) & col("c2_non_null");
2492 assert_eq!(simplify(expr), lit(0));
2493 }
2494 }
2495
2496 #[test]
2497 fn test_simplify_bitwise_or_by_zero() {
2498 {
2500 let expr = col("c2_non_null") | lit(0);
2501 assert_eq!(simplify(expr), col("c2_non_null"));
2502 }
2503 {
2505 let expr = lit(0) | col("c2_non_null");
2506 assert_eq!(simplify(expr), col("c2_non_null"));
2507 }
2508 }
2509
2510 #[test]
2511 fn test_simplify_bitwise_xor_by_zero() {
2512 {
2514 let expr = col("c2_non_null") ^ lit(0);
2515 assert_eq!(simplify(expr), col("c2_non_null"));
2516 }
2517 {
2519 let expr = lit(0) ^ col("c2_non_null");
2520 assert_eq!(simplify(expr), col("c2_non_null"));
2521 }
2522 }
2523
2524 #[test]
2525 fn test_simplify_bitwise_bitwise_shift_right_by_zero() {
2526 {
2528 let expr = col("c2_non_null") >> lit(0);
2529 assert_eq!(simplify(expr), col("c2_non_null"));
2530 }
2531 }
2532
2533 #[test]
2534 fn test_simplify_bitwise_bitwise_shift_left_by_zero() {
2535 {
2537 let expr = col("c2_non_null") << lit(0);
2538 assert_eq!(simplify(expr), col("c2_non_null"));
2539 }
2540 }
2541
2542 #[test]
2543 fn test_simplify_bitwise_and_by_null() {
2544 let null = Expr::Literal(ScalarValue::Int64(None), None);
2545 {
2547 let expr = col("c3") & null.clone();
2548 assert_eq!(simplify(expr), null);
2549 }
2550 {
2552 let expr = null.clone() & col("c3");
2553 assert_eq!(simplify(expr), null);
2554 }
2555 }
2556
2557 #[test]
2558 fn test_simplify_composed_bitwise_and() {
2559 let expr = bitwise_and(
2562 bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2563 col("c2").gt(lit(5)),
2564 );
2565 let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2566
2567 assert_eq!(simplify(expr), expected);
2568
2569 let expr = bitwise_and(
2572 col("c2").gt(lit(5)),
2573 bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2574 );
2575 let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2576 assert_eq!(simplify(expr), expected);
2577 }
2578
2579 #[test]
2580 fn test_simplify_composed_bitwise_or() {
2581 let expr = bitwise_or(
2584 bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2585 col("c2").gt(lit(5)),
2586 );
2587 let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2588
2589 assert_eq!(simplify(expr), expected);
2590
2591 let expr = bitwise_or(
2594 col("c2").gt(lit(5)),
2595 bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2596 );
2597 let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2598
2599 assert_eq!(simplify(expr), expected);
2600 }
2601
2602 #[test]
2603 fn test_simplify_composed_bitwise_xor() {
2604 let expr = bitwise_xor(
2608 col("c2"),
2609 bitwise_xor(
2610 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2611 bitwise_and(col("c1"), col("c2")),
2612 ),
2613 );
2614
2615 let expected = bitwise_xor(
2616 bitwise_or(col("c2"), col("c1")),
2617 bitwise_and(col("c1"), col("c2")),
2618 );
2619
2620 assert_eq!(simplify(expr), expected);
2621
2622 let expr = bitwise_xor(
2626 col("c2"),
2627 bitwise_xor(
2628 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2629 bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2630 ),
2631 );
2632
2633 let expected = bitwise_xor(
2634 col("c2"),
2635 bitwise_xor(
2636 bitwise_or(col("c2"), col("c1")),
2637 bitwise_and(col("c1"), col("c2")),
2638 ),
2639 );
2640
2641 assert_eq!(simplify(expr), expected);
2642
2643 let expr = bitwise_xor(
2647 bitwise_xor(
2648 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2649 bitwise_and(col("c1"), col("c2")),
2650 ),
2651 col("c2"),
2652 );
2653
2654 let expected = bitwise_xor(
2655 bitwise_or(col("c2"), col("c1")),
2656 bitwise_and(col("c1"), col("c2")),
2657 );
2658
2659 assert_eq!(simplify(expr), expected);
2660
2661 let expr = bitwise_xor(
2665 bitwise_xor(
2666 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2667 bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2668 ),
2669 col("c2"),
2670 );
2671
2672 let expected = bitwise_xor(
2673 bitwise_xor(
2674 bitwise_or(col("c2"), col("c1")),
2675 bitwise_and(col("c1"), col("c2")),
2676 ),
2677 col("c2"),
2678 );
2679
2680 assert_eq!(simplify(expr), expected);
2681 }
2682
2683 #[test]
2684 fn test_simplify_negated_bitwise_and() {
2685 let expr = (-col("c4_non_null")) & col("c4_non_null");
2687 let expected = lit(0u32);
2688
2689 assert_eq!(simplify(expr), expected);
2690 let expr = col("c4_non_null") & (-col("c4_non_null"));
2692 let expected = lit(0u32);
2693
2694 assert_eq!(simplify(expr), expected);
2695
2696 let expr = (-col("c3_non_null")) & col("c3_non_null");
2698 let expected = lit(0i64);
2699
2700 assert_eq!(simplify(expr), expected);
2701 let expr = col("c3_non_null") & (-col("c3_non_null"));
2703 let expected = lit(0i64);
2704
2705 assert_eq!(simplify(expr), expected);
2706 }
2707
2708 #[test]
2709 fn test_simplify_negated_bitwise_or() {
2710 let expr = (-col("c4_non_null")) | col("c4_non_null");
2712 let expected = lit(-1i32);
2713
2714 assert_eq!(simplify(expr), expected);
2715
2716 let expr = col("c4_non_null") | (-col("c4_non_null"));
2718 let expected = lit(-1i32);
2719
2720 assert_eq!(simplify(expr), expected);
2721
2722 let expr = (-col("c3_non_null")) | col("c3_non_null");
2724 let expected = lit(-1i64);
2725
2726 assert_eq!(simplify(expr), expected);
2727
2728 let expr = col("c3_non_null") | (-col("c3_non_null"));
2730 let expected = lit(-1i64);
2731
2732 assert_eq!(simplify(expr), expected);
2733 }
2734
2735 #[test]
2736 fn test_simplify_negated_bitwise_xor() {
2737 let expr = (-col("c4_non_null")) ^ col("c4_non_null");
2739 let expected = lit(-1i32);
2740
2741 assert_eq!(simplify(expr), expected);
2742
2743 let expr = col("c4_non_null") ^ (-col("c4_non_null"));
2745 let expected = lit(-1i32);
2746
2747 assert_eq!(simplify(expr), expected);
2748
2749 let expr = (-col("c3_non_null")) ^ col("c3_non_null");
2751 let expected = lit(-1i64);
2752
2753 assert_eq!(simplify(expr), expected);
2754
2755 let expr = col("c3_non_null") ^ (-col("c3_non_null"));
2757 let expected = lit(-1i64);
2758
2759 assert_eq!(simplify(expr), expected);
2760 }
2761
2762 #[test]
2763 fn test_simplify_bitwise_and_or() {
2764 let expr = bitwise_and(
2766 col("c2_non_null").lt(lit(3)),
2767 bitwise_or(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
2768 );
2769 let expected = col("c2_non_null").lt(lit(3));
2770
2771 assert_eq!(simplify(expr), expected);
2772 }
2773
2774 #[test]
2775 fn test_simplify_bitwise_or_and() {
2776 let expr = bitwise_or(
2778 col("c2_non_null").lt(lit(3)),
2779 bitwise_and(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
2780 );
2781 let expected = col("c2_non_null").lt(lit(3));
2782
2783 assert_eq!(simplify(expr), expected);
2784 }
2785
2786 #[test]
2787 fn test_simplify_simple_bitwise_and() {
2788 let expr = (col("c2").gt(lit(5))).bitand(col("c2").gt(lit(5)));
2790 let expected = col("c2").gt(lit(5));
2791
2792 assert_eq!(simplify(expr), expected);
2793 }
2794
2795 #[test]
2796 fn test_simplify_simple_bitwise_or() {
2797 let expr = (col("c2").gt(lit(5))).bitor(col("c2").gt(lit(5)));
2799 let expected = col("c2").gt(lit(5));
2800
2801 assert_eq!(simplify(expr), expected);
2802 }
2803
2804 #[test]
2805 fn test_simplify_simple_bitwise_xor() {
2806 let expr = (col("c4")).bitxor(col("c4"));
2808 let expected = lit(0u32);
2809
2810 assert_eq!(simplify(expr), expected);
2811
2812 let expr = col("c3").bitxor(col("c3"));
2814 let expected = lit(0i64);
2815
2816 assert_eq!(simplify(expr), expected);
2817 }
2818
2819 #[test]
2820 fn test_simplify_modulo_by_zero_non_null() {
2821 let expr = col("c2_non_null") % lit(0);
2824 let expected = expr.clone();
2825
2826 assert_eq!(simplify(expr), expected);
2827 }
2828
2829 #[test]
2830 fn test_simplify_simple_and() {
2831 let expr = (col("c2").gt(lit(5))).and(col("c2").gt(lit(5)));
2833 let expected = col("c2").gt(lit(5));
2834
2835 assert_eq!(simplify(expr), expected);
2836 }
2837
2838 #[test]
2839 fn test_simplify_composed_and() {
2840 let expr = and(
2842 and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2843 col("c2").gt(lit(5)),
2844 );
2845 let expected = and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2846
2847 assert_eq!(simplify(expr), expected);
2848 }
2849
2850 #[test]
2851 fn test_simplify_negated_and() {
2852 let expr = and(col("c2").gt(lit(5)), Expr::not(col("c2").gt(lit(5))));
2854 let expected = col("c2").gt(lit(5)).and(col("c2").lt_eq(lit(5)));
2855
2856 assert_eq!(simplify(expr), expected);
2857 }
2858
2859 #[test]
2860 fn test_simplify_or_and() {
2861 let l = col("c2").gt(lit(5));
2862 let r = and(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
2863
2864 let expr = or(l.clone(), r.clone());
2866
2867 let expected = l.clone();
2868 assert_eq!(simplify(expr), expected);
2869
2870 let expr = or(r, l);
2872 assert_eq!(simplify(expr), expected);
2873 }
2874
2875 #[test]
2876 fn test_simplify_or_and_non_null() {
2877 let l = col("c2_non_null").gt(lit(5));
2878 let r = and(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
2879
2880 let expr = or(l.clone(), r.clone());
2882
2883 let expected = col("c2_non_null").gt(lit(5));
2885
2886 assert_eq!(simplify(expr), expected);
2887
2888 let expr = or(l, r);
2890
2891 assert_eq!(simplify(expr), expected);
2892 }
2893
2894 #[test]
2895 fn test_simplify_and_or() {
2896 let l = col("c2").gt(lit(5));
2897 let r = or(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
2898
2899 let expr = and(l.clone(), r.clone());
2901
2902 let expected = l.clone();
2903 assert_eq!(simplify(expr), expected);
2904
2905 let expr = and(r, l);
2907 assert_eq!(simplify(expr), expected);
2908 }
2909
2910 #[test]
2911 fn test_simplify_and_or_non_null() {
2912 let l = col("c2_non_null").gt(lit(5));
2913 let r = or(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
2914
2915 let expr = and(l.clone(), r.clone());
2917
2918 let expected = col("c2_non_null").gt(lit(5));
2920
2921 assert_eq!(simplify(expr), expected);
2922
2923 let expr = and(l, r);
2925
2926 assert_eq!(simplify(expr), expected);
2927 }
2928
2929 #[test]
2930 fn test_simplify_by_de_morgan_laws() {
2931 let expr = and(col("c3"), col("c4")).not();
2934 let expected = or(col("c3").not(), col("c4").not());
2935 assert_eq!(simplify(expr), expected);
2936 let expr = or(col("c3"), col("c4")).not();
2938 let expected = and(col("c3").not(), col("c4").not());
2939 assert_eq!(simplify(expr), expected);
2940 let expr = col("c3").not().not();
2942 let expected = col("c3");
2943 assert_eq!(simplify(expr), expected);
2944
2945 let expr = -bitwise_and(col("c3"), col("c4"));
2948 let expected = bitwise_or(-col("c3"), -col("c4"));
2949 assert_eq!(simplify(expr), expected);
2950 let expr = -bitwise_or(col("c3"), col("c4"));
2952 let expected = bitwise_and(-col("c3"), -col("c4"));
2953 assert_eq!(simplify(expr), expected);
2954 let expr = -(-col("c3"));
2956 let expected = col("c3");
2957 assert_eq!(simplify(expr), expected);
2958 }
2959
2960 #[test]
2961 fn test_simplify_null_and_false() {
2962 let expr = and(lit_bool_null(), lit(false));
2963 let expr_eq = lit(false);
2964
2965 assert_eq!(simplify(expr), expr_eq);
2966 }
2967
2968 #[test]
2969 fn test_simplify_divide_null_by_null() {
2970 let null = lit(ScalarValue::Int32(None));
2971 let expr_plus = null.clone() / null.clone();
2972 let expr_eq = null;
2973
2974 assert_eq!(simplify(expr_plus), expr_eq);
2975 }
2976
2977 #[test]
2978 fn test_simplify_simplify_arithmetic_expr() {
2979 let expr_plus = lit(1) + lit(1);
2980
2981 assert_eq!(simplify(expr_plus), lit(2));
2982 }
2983
2984 #[test]
2985 fn test_simplify_simplify_eq_expr() {
2986 let expr_eq = binary_expr(lit(1), Operator::Eq, lit(1));
2987
2988 assert_eq!(simplify(expr_eq), lit(true));
2989 }
2990
2991 #[test]
2992 fn test_simplify_regex() {
2993 assert_contains!(
2995 try_simplify(regex_match(col("c1"), lit("foo{")))
2996 .unwrap_err()
2997 .to_string(),
2998 "regex parse error"
2999 );
3000
3001 assert_no_change(regex_match(col("c1"), lit("foo.*")));
3003 assert_no_change(regex_match(col("c1"), lit("(foo)")));
3004 assert_no_change(regex_match(col("c1"), lit("%")));
3005 assert_no_change(regex_match(col("c1"), lit("_")));
3006 assert_no_change(regex_match(col("c1"), lit("f%o")));
3007 assert_no_change(regex_match(col("c1"), lit("^f%o")));
3008 assert_no_change(regex_match(col("c1"), lit("f_o")));
3009
3010 assert_change(
3012 regex_match(col("c1"), lit("")),
3013 if_not_null(col("c1"), true),
3014 );
3015 assert_change(
3016 regex_not_match(col("c1"), lit("")),
3017 if_not_null(col("c1"), false),
3018 );
3019 assert_change(
3020 regex_imatch(col("c1"), lit("")),
3021 if_not_null(col("c1"), true),
3022 );
3023 assert_change(
3024 regex_not_imatch(col("c1"), lit("")),
3025 if_not_null(col("c1"), false),
3026 );
3027
3028 assert_change(regex_match(col("c1"), lit("x")), col("c1").like(lit("%x%")));
3030
3031 assert_change(
3033 regex_match(col("c1"), lit("foo")),
3034 col("c1").like(lit("%foo%")),
3035 );
3036
3037 assert_change(regex_match(col("c1"), lit("^$")), col("c1").eq(lit("")));
3039 assert_change(
3040 regex_not_match(col("c1"), lit("^$")),
3041 col("c1").not_eq(lit("")),
3042 );
3043 assert_change(
3044 regex_match(col("c1"), lit("^foo$")),
3045 col("c1").eq(lit("foo")),
3046 );
3047 assert_change(
3048 regex_not_match(col("c1"), lit("^foo$")),
3049 col("c1").not_eq(lit("foo")),
3050 );
3051
3052 assert_change(
3054 regex_match(col("c1"), lit("^(foo|bar)$")),
3055 col("c1").eq(lit("foo")).or(col("c1").eq(lit("bar"))),
3056 );
3057 assert_change(
3058 regex_not_match(col("c1"), lit("^(foo|bar)$")),
3059 col("c1")
3060 .not_eq(lit("foo"))
3061 .and(col("c1").not_eq(lit("bar"))),
3062 );
3063 assert_change(
3064 regex_match(col("c1"), lit("^(foo)$")),
3065 col("c1").eq(lit("foo")),
3066 );
3067 assert_change(
3068 regex_match(col("c1"), lit("^(foo|bar|baz)$")),
3069 ((col("c1").eq(lit("foo"))).or(col("c1").eq(lit("bar"))))
3070 .or(col("c1").eq(lit("baz"))),
3071 );
3072 assert_change(
3073 regex_match(col("c1"), lit("^(foo|bar|baz|qux)$")),
3074 col("c1")
3075 .in_list(vec![lit("foo"), lit("bar"), lit("baz"), lit("qux")], false),
3076 );
3077 assert_change(
3078 regex_match(col("c1"), lit("^(fo_o)$")),
3079 col("c1").eq(lit("fo_o")),
3080 );
3081 assert_change(
3082 regex_match(col("c1"), lit("^(fo_o)$")),
3083 col("c1").eq(lit("fo_o")),
3084 );
3085 assert_change(
3086 regex_match(col("c1"), lit("^(fo_o|ba_r)$")),
3087 col("c1").eq(lit("fo_o")).or(col("c1").eq(lit("ba_r"))),
3088 );
3089 assert_change(
3090 regex_not_match(col("c1"), lit("^(fo_o|ba_r)$")),
3091 col("c1")
3092 .not_eq(lit("fo_o"))
3093 .and(col("c1").not_eq(lit("ba_r"))),
3094 );
3095 assert_change(
3096 regex_match(col("c1"), lit("^(fo_o|ba_r|ba_z)$")),
3097 ((col("c1").eq(lit("fo_o"))).or(col("c1").eq(lit("ba_r"))))
3098 .or(col("c1").eq(lit("ba_z"))),
3099 );
3100 assert_change(
3101 regex_match(col("c1"), lit("^(fo_o|ba_r|baz|qu_x)$")),
3102 col("c1").in_list(
3103 vec![lit("fo_o"), lit("ba_r"), lit("baz"), lit("qu_x")],
3104 false,
3105 ),
3106 );
3107
3108 assert_no_change(regex_match(col("c1"), lit("(foo|bar)")));
3110 assert_no_change(regex_match(col("c1"), lit("(foo|bar)*")));
3111 assert_no_change(regex_match(col("c1"), lit("(fo_o|b_ar)")));
3112 assert_no_change(regex_match(col("c1"), lit("(foo|ba_r)*")));
3113 assert_no_change(regex_match(col("c1"), lit("(fo_o|ba_r)*")));
3114 assert_no_change(regex_match(col("c1"), lit("^(foo|bar)*")));
3115 assert_no_change(regex_match(col("c1"), lit("^(foo)(bar)$")));
3116 assert_no_change(regex_match(col("c1"), lit("^")));
3117 assert_no_change(regex_match(col("c1"), lit("$")));
3118 assert_no_change(regex_match(col("c1"), lit("$^")));
3119 assert_no_change(regex_match(col("c1"), lit("$foo^")));
3120
3121 assert_change(
3123 regex_match(col("c1"), lit("^foo")),
3124 col("c1").like(lit("foo%")),
3125 );
3126 assert_change(
3127 regex_match(col("c1"), lit("foo$")),
3128 col("c1").like(lit("%foo")),
3129 );
3130 assert_change(
3131 regex_match(col("c1"), lit("^foo|bar$")),
3132 col("c1").like(lit("foo%")).or(col("c1").like(lit("%bar"))),
3133 );
3134
3135 assert_change(
3137 regex_match(col("c1"), lit("foo|bar|baz")),
3138 col("c1")
3139 .like(lit("%foo%"))
3140 .or(col("c1").like(lit("%bar%")))
3141 .or(col("c1").like(lit("%baz%"))),
3142 );
3143 assert_change(
3144 regex_match(col("c1"), lit("foo|x|baz")),
3145 col("c1")
3146 .like(lit("%foo%"))
3147 .or(col("c1").like(lit("%x%")))
3148 .or(col("c1").like(lit("%baz%"))),
3149 );
3150 assert_change(
3151 regex_not_match(col("c1"), lit("foo|bar|baz")),
3152 col("c1")
3153 .not_like(lit("%foo%"))
3154 .and(col("c1").not_like(lit("%bar%")))
3155 .and(col("c1").not_like(lit("%baz%"))),
3156 );
3157 assert_change(
3159 regex_match(col("c1"), lit("foo|^x$|baz")),
3160 col("c1")
3161 .like(lit("%foo%"))
3162 .or(col("c1").eq(lit("x")))
3163 .or(col("c1").like(lit("%baz%"))),
3164 );
3165 assert_change(
3166 regex_not_match(col("c1"), lit("foo|^bar$|baz")),
3167 col("c1")
3168 .not_like(lit("%foo%"))
3169 .and(col("c1").not_eq(lit("bar")))
3170 .and(col("c1").not_like(lit("%baz%"))),
3171 );
3172 assert_no_change(regex_match(col("c1"), lit("foo|bar|baz|blarg|bozo|etc")));
3174 }
3175
3176 #[track_caller]
3177 fn assert_no_change(expr: Expr) {
3178 let optimized = simplify(expr.clone());
3179 assert_eq!(expr, optimized);
3180 }
3181
3182 #[track_caller]
3183 fn assert_change(expr: Expr, expected: Expr) {
3184 let optimized = simplify(expr);
3185 assert_eq!(optimized, expected);
3186 }
3187
3188 fn regex_match(left: Expr, right: Expr) -> Expr {
3189 Expr::BinaryExpr(BinaryExpr {
3190 left: Box::new(left),
3191 op: Operator::RegexMatch,
3192 right: Box::new(right),
3193 })
3194 }
3195
3196 fn regex_not_match(left: Expr, right: Expr) -> Expr {
3197 Expr::BinaryExpr(BinaryExpr {
3198 left: Box::new(left),
3199 op: Operator::RegexNotMatch,
3200 right: Box::new(right),
3201 })
3202 }
3203
3204 fn regex_imatch(left: Expr, right: Expr) -> Expr {
3205 Expr::BinaryExpr(BinaryExpr {
3206 left: Box::new(left),
3207 op: Operator::RegexIMatch,
3208 right: Box::new(right),
3209 })
3210 }
3211
3212 fn regex_not_imatch(left: Expr, right: Expr) -> Expr {
3213 Expr::BinaryExpr(BinaryExpr {
3214 left: Box::new(left),
3215 op: Operator::RegexNotIMatch,
3216 right: Box::new(right),
3217 })
3218 }
3219
3220 fn try_simplify(expr: Expr) -> Result<Expr> {
3225 let schema = expr_test_schema();
3226 let execution_props = ExecutionProps::new();
3227 let simplifier = ExprSimplifier::new(
3228 SimplifyContext::new(&execution_props).with_schema(schema),
3229 );
3230 simplifier.simplify(expr)
3231 }
3232
3233 fn coerce(expr: Expr) -> Expr {
3234 let schema = expr_test_schema();
3235 let execution_props = ExecutionProps::new();
3236 let simplifier = ExprSimplifier::new(
3237 SimplifyContext::new(&execution_props).with_schema(Arc::clone(&schema)),
3238 );
3239 simplifier.coerce(expr, schema.as_ref()).unwrap()
3240 }
3241
3242 fn simplify(expr: Expr) -> Expr {
3243 try_simplify(expr).unwrap()
3244 }
3245
3246 fn try_simplify_with_cycle_count(expr: Expr) -> Result<(Expr, u32)> {
3247 let schema = expr_test_schema();
3248 let execution_props = ExecutionProps::new();
3249 let simplifier = ExprSimplifier::new(
3250 SimplifyContext::new(&execution_props).with_schema(schema),
3251 );
3252 let (expr, count) = simplifier.simplify_with_cycle_count_transformed(expr)?;
3253 Ok((expr.data, count))
3254 }
3255
3256 fn simplify_with_cycle_count(expr: Expr) -> (Expr, u32) {
3257 try_simplify_with_cycle_count(expr).unwrap()
3258 }
3259
3260 fn simplify_with_guarantee(
3261 expr: Expr,
3262 guarantees: Vec<(Expr, NullableInterval)>,
3263 ) -> Expr {
3264 let schema = expr_test_schema();
3265 let execution_props = ExecutionProps::new();
3266 let simplifier = ExprSimplifier::new(
3267 SimplifyContext::new(&execution_props).with_schema(schema),
3268 )
3269 .with_guarantees(guarantees);
3270 simplifier.simplify(expr).unwrap()
3271 }
3272
3273 fn expr_test_schema() -> DFSchemaRef {
3274 Arc::new(
3275 DFSchema::from_unqualified_fields(
3276 vec![
3277 Field::new("c1", DataType::Utf8, true),
3278 Field::new("c2", DataType::Boolean, true),
3279 Field::new("c3", DataType::Int64, true),
3280 Field::new("c4", DataType::UInt32, true),
3281 Field::new("c1_non_null", DataType::Utf8, false),
3282 Field::new("c2_non_null", DataType::Boolean, false),
3283 Field::new("c3_non_null", DataType::Int64, false),
3284 Field::new("c4_non_null", DataType::UInt32, false),
3285 Field::new("c5", DataType::FixedSizeBinary(3), true),
3286 ]
3287 .into(),
3288 HashMap::new(),
3289 )
3290 .unwrap(),
3291 )
3292 }
3293
3294 #[test]
3295 fn simplify_expr_null_comparison() {
3296 assert_eq!(
3298 simplify(lit(true).eq(lit(ScalarValue::Boolean(None)))),
3299 lit(ScalarValue::Boolean(None)),
3300 );
3301
3302 assert_eq!(
3304 simplify(
3305 lit(ScalarValue::Boolean(None)).not_eq(lit(ScalarValue::Boolean(None)))
3306 ),
3307 lit(ScalarValue::Boolean(None)),
3308 );
3309
3310 assert_eq!(
3312 simplify(col("c2").not_eq(lit(ScalarValue::Boolean(None)))),
3313 lit(ScalarValue::Boolean(None)),
3314 );
3315
3316 assert_eq!(
3318 simplify(lit(ScalarValue::Boolean(None)).eq(col("c2"))),
3319 lit(ScalarValue::Boolean(None)),
3320 );
3321 }
3322
3323 #[test]
3324 fn simplify_expr_is_not_null() {
3325 assert_eq!(
3326 simplify(Expr::IsNotNull(Box::new(col("c1")))),
3327 Expr::IsNotNull(Box::new(col("c1")))
3328 );
3329
3330 assert_eq!(
3332 simplify(Expr::IsNotNull(Box::new(col("c1_non_null")))),
3333 lit(true)
3334 );
3335 }
3336
3337 #[test]
3338 fn simplify_expr_is_null() {
3339 assert_eq!(
3340 simplify(Expr::IsNull(Box::new(col("c1")))),
3341 Expr::IsNull(Box::new(col("c1")))
3342 );
3343
3344 assert_eq!(
3346 simplify(Expr::IsNull(Box::new(col("c1_non_null")))),
3347 lit(false)
3348 );
3349 }
3350
3351 #[test]
3352 fn simplify_expr_is_unknown() {
3353 assert_eq!(simplify(col("c2").is_unknown()), col("c2").is_unknown(),);
3354
3355 assert_eq!(simplify(col("c2_non_null").is_unknown()), lit(false));
3357 }
3358
3359 #[test]
3360 fn simplify_expr_is_not_known() {
3361 assert_eq!(
3362 simplify(col("c2").is_not_unknown()),
3363 col("c2").is_not_unknown()
3364 );
3365
3366 assert_eq!(simplify(col("c2_non_null").is_not_unknown()), lit(true));
3368 }
3369
3370 #[test]
3371 fn simplify_expr_eq() {
3372 let schema = expr_test_schema();
3373 assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3374
3375 assert_eq!(simplify(lit(true).eq(lit(true))), lit(true));
3377
3378 assert_eq!(simplify(lit(true).eq(lit(false))), lit(false),);
3380
3381 assert_eq!(simplify(col("c2").eq(lit(true))), col("c2"));
3383
3384 assert_eq!(simplify(col("c2").eq(lit(false))), col("c2").not(),);
3386 }
3387
3388 #[test]
3389 fn simplify_expr_eq_skip_nonboolean_type() {
3390 let schema = expr_test_schema();
3391
3392 assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3398
3399 assert_eq!(simplify(col("c1").eq(lit("foo"))), col("c1").eq(lit("foo")),);
3401 }
3402
3403 #[test]
3404 fn simplify_expr_not_eq() {
3405 let schema = expr_test_schema();
3406
3407 assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3408
3409 assert_eq!(simplify(col("c2").not_eq(lit(true))), col("c2").not(),);
3411
3412 assert_eq!(simplify(col("c2").not_eq(lit(false))), col("c2"),);
3414
3415 assert_eq!(simplify(lit(true).not_eq(lit(true))), lit(false),);
3417
3418 assert_eq!(simplify(lit(true).not_eq(lit(false))), lit(true),);
3419 }
3420
3421 #[test]
3422 fn simplify_expr_not_eq_skip_nonboolean_type() {
3423 let schema = expr_test_schema();
3424
3425 assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3429
3430 assert_eq!(
3431 simplify(col("c1").not_eq(lit("foo"))),
3432 col("c1").not_eq(lit("foo")),
3433 );
3434 }
3435
3436 #[test]
3437 fn simplify_expr_case_when_then_else() {
3438 assert_eq!(
3444 simplify(Expr::Case(Case::new(
3445 None,
3446 vec![(
3447 Box::new(col("c2_non_null").not_eq(lit(false))),
3448 Box::new(lit("ok").eq(lit("not_ok"))),
3449 )],
3450 Some(Box::new(col("c2_non_null").eq(lit(true)))),
3451 ))),
3452 lit(false) );
3454
3455 assert_eq!(
3464 simplify(simplify(Expr::Case(Case::new(
3465 None,
3466 vec![(
3467 Box::new(col("c2_non_null").not_eq(lit(false))),
3468 Box::new(lit("ok").eq(lit("ok"))),
3469 )],
3470 Some(Box::new(col("c2_non_null").eq(lit(true)))),
3471 )))),
3472 col("c2_non_null")
3473 );
3474
3475 assert_eq!(
3482 simplify(simplify(Expr::Case(Case::new(
3483 None,
3484 vec![(Box::new(col("c2").is_null()), Box::new(lit(true)),)],
3485 Some(Box::new(col("c2"))),
3486 )))),
3487 col("c2")
3488 .is_null()
3489 .or(col("c2").is_not_null().and(col("c2")))
3490 );
3491
3492 assert_eq!(
3500 simplify(simplify(Expr::Case(Case::new(
3501 None,
3502 vec![
3503 (Box::new(col("c1_non_null")), Box::new(lit(true)),),
3504 (Box::new(col("c2_non_null")), Box::new(lit(false)),),
3505 ],
3506 Some(Box::new(lit(true))),
3507 )))),
3508 col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
3509 );
3510
3511 assert_eq!(
3519 simplify(simplify(Expr::Case(Case::new(
3520 None,
3521 vec![
3522 (Box::new(col("c1_non_null")), Box::new(lit(true)),),
3523 (Box::new(col("c2_non_null")), Box::new(lit(false)),),
3524 ],
3525 Some(Box::new(lit(true))),
3526 )))),
3527 col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
3528 );
3529
3530 assert_eq!(
3532 simplify(simplify(Expr::Case(Case::new(
3533 None,
3534 vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
3535 None,
3536 )))),
3537 not_distinct_from(col("c3").gt(lit(0_i64)), lit(true)).or(distinct_from(
3538 col("c3").gt(lit(0_i64)),
3539 lit(true)
3540 )
3541 .and(lit_bool_null()))
3542 );
3543
3544 assert_eq!(
3546 simplify(simplify(Expr::Case(Case::new(
3547 None,
3548 vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
3549 Some(Box::new(lit(false))),
3550 )))),
3551 not_distinct_from(col("c3").gt(lit(0_i64)), lit(true))
3552 );
3553 }
3554
3555 fn distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
3556 Expr::BinaryExpr(BinaryExpr {
3557 left: Box::new(left.into()),
3558 op: Operator::IsDistinctFrom,
3559 right: Box::new(right.into()),
3560 })
3561 }
3562
3563 fn not_distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
3564 Expr::BinaryExpr(BinaryExpr {
3565 left: Box::new(left.into()),
3566 op: Operator::IsNotDistinctFrom,
3567 right: Box::new(right.into()),
3568 })
3569 }
3570
3571 #[test]
3572 fn simplify_expr_bool_or() {
3573 assert_eq!(simplify(col("c2").or(lit(true))), lit(true),);
3575
3576 assert_eq!(simplify(col("c2").or(lit(false))), col("c2"),);
3578
3579 assert_eq!(simplify(lit(true).or(lit_bool_null())), lit(true),);
3581
3582 assert_eq!(simplify(lit_bool_null().or(lit(true))), lit(true),);
3584
3585 assert_eq!(simplify(lit(false).or(lit_bool_null())), lit_bool_null(),);
3587
3588 assert_eq!(simplify(lit_bool_null().or(lit(false))), lit_bool_null(),);
3590
3591 let expr = col("c1").between(lit(0), lit(10));
3595 let expr = expr.or(lit_bool_null());
3596 let result = simplify(expr);
3597
3598 let expected_expr = or(
3599 and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
3600 lit_bool_null(),
3601 );
3602 assert_eq!(expected_expr, result);
3603 }
3604
3605 #[test]
3606 fn simplify_inlist() {
3607 assert_eq!(simplify(in_list(col("c1"), vec![], false)), lit(false));
3608 assert_eq!(simplify(in_list(col("c1"), vec![], true)), lit(true));
3609
3610 assert_eq!(
3612 simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], false)),
3613 lit_bool_null()
3614 );
3615
3616 assert_eq!(
3618 simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], true)),
3619 lit_bool_null()
3620 );
3621
3622 assert_eq!(
3623 simplify(in_list(col("c1"), vec![lit(1)], false)),
3624 col("c1").eq(lit(1))
3625 );
3626 assert_eq!(
3627 simplify(in_list(col("c1"), vec![lit(1)], true)),
3628 col("c1").not_eq(lit(1))
3629 );
3630
3631 assert_eq!(
3634 simplify(in_list(col("c1") * lit(10), vec![lit(2)], false)),
3635 (col("c1") * lit(10)).eq(lit(2))
3636 );
3637
3638 assert_eq!(
3639 simplify(in_list(col("c1"), vec![lit(1), lit(2)], false)),
3640 col("c1").eq(lit(1)).or(col("c1").eq(lit(2)))
3641 );
3642 assert_eq!(
3643 simplify(in_list(col("c1"), vec![lit(1), lit(2)], true)),
3644 col("c1").not_eq(lit(1)).and(col("c1").not_eq(lit(2)))
3645 );
3646
3647 let subquery = Arc::new(test_table_scan_with_name("test").unwrap());
3648 assert_eq!(
3649 simplify(in_list(
3650 col("c1"),
3651 vec![scalar_subquery(Arc::clone(&subquery))],
3652 false
3653 )),
3654 in_subquery(col("c1"), Arc::clone(&subquery))
3655 );
3656 assert_eq!(
3657 simplify(in_list(
3658 col("c1"),
3659 vec![scalar_subquery(Arc::clone(&subquery))],
3660 true
3661 )),
3662 not_in_subquery(col("c1"), subquery)
3663 );
3664
3665 let subquery1 =
3666 scalar_subquery(Arc::new(test_table_scan_with_name("test1").unwrap()));
3667 let subquery2 =
3668 scalar_subquery(Arc::new(test_table_scan_with_name("test2").unwrap()));
3669
3670 assert_eq!(
3672 simplify(in_list(
3673 col("c1"),
3674 vec![subquery1.clone(), subquery2.clone()],
3675 true
3676 )),
3677 col("c1")
3678 .not_eq(subquery1.clone())
3679 .and(col("c1").not_eq(subquery2.clone()))
3680 );
3681
3682 assert_eq!(
3684 simplify(in_list(
3685 col("c1"),
3686 vec![subquery1.clone(), subquery2.clone()],
3687 false
3688 )),
3689 col("c1").eq(subquery1).or(col("c1").eq(subquery2))
3690 );
3691
3692 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
3694 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false),
3695 );
3696 assert_eq!(simplify(expr), lit(false));
3697
3698 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
3700 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], false),
3701 );
3702 assert_eq!(simplify(expr), col("c1").eq(lit(4)));
3703
3704 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
3706 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
3707 );
3708 assert_eq!(simplify(expr), lit(true));
3709
3710 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
3712 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
3713 );
3714 assert_eq!(simplify(expr), col("c1").not_eq(lit(4)));
3715
3716 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
3718 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
3719 );
3720 assert_eq!(
3721 simplify(expr),
3722 in_list(
3723 col("c1"),
3724 vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6), lit(7)],
3725 true
3726 )
3727 );
3728
3729 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).or(
3731 in_list(col("c1"), vec![lit(2), lit(3), lit(4), lit(5)], false),
3732 );
3733 assert_eq!(
3734 simplify(expr),
3735 in_list(
3736 col("c1"),
3737 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
3738 false
3739 )
3740 );
3741
3742 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3)], false).and(in_list(
3744 col("c1"),
3745 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
3746 true,
3747 ));
3748 assert_eq!(simplify(expr), lit(false));
3749
3750 let expr =
3752 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(in_list(
3753 col("c1"),
3754 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
3755 false,
3756 ));
3757 assert_eq!(simplify(expr), col("c1").eq(lit(5)));
3758
3759 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
3761 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
3762 );
3763 assert_eq!(
3764 simplify(expr),
3765 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false)
3766 );
3767
3768 let expr = in_list(
3771 col("c1"),
3772 vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6)],
3773 false,
3774 )
3775 .and(in_list(
3776 col("c1"),
3777 vec![lit(1), lit(3), lit(5), lit(6)],
3778 false,
3779 ))
3780 .and(in_list(col("c1"), vec![lit(3), lit(6)], false));
3781 assert_eq!(
3782 simplify(expr),
3783 col("c1").eq(lit(3)).or(col("c1").eq(lit(6)))
3784 );
3785
3786 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
3788 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false)
3789 .and(in_list(
3790 col("c1"),
3791 vec![lit(3), lit(4), lit(5), lit(6)],
3792 true,
3793 ))
3794 .and(in_list(col("c1"), vec![lit(8), lit(9), lit(10)], false)),
3795 );
3796 assert_eq!(simplify(expr), col("c1").eq(lit(8)));
3797
3798 let expr =
3801 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(col("c1")
3802 .not_eq(lit(5))
3803 .or(in_list(
3804 col("c1"),
3805 vec![lit(6), lit(7), lit(8), lit(9)],
3806 true,
3807 )));
3808 assert_eq!(simplify(expr.clone()), expr);
3812 }
3813
3814 #[test]
3815 fn simplify_null_in_empty_inlist() {
3816 let expr = in_list(lit_bool_null(), vec![], false);
3818 assert_eq!(simplify(expr), lit(false));
3819
3820 let expr = in_list(lit_bool_null(), vec![], true);
3822 assert_eq!(simplify(expr), lit(true));
3823
3824 let null_null = || Expr::Literal(ScalarValue::Null, None);
3826 let expr = in_list(null_null(), vec![], false);
3827 assert_eq!(simplify(expr), lit(false));
3828
3829 let expr = in_list(null_null(), vec![], true);
3831 assert_eq!(simplify(expr), lit(true));
3832 }
3833
3834 #[test]
3835 fn just_simplifier_simplify_null_in_empty_inlist() {
3836 let simplify = |expr: Expr| -> Expr {
3837 let schema = expr_test_schema();
3838 let execution_props = ExecutionProps::new();
3839 let info = SimplifyContext::new(&execution_props).with_schema(schema);
3840 let simplifier = &mut Simplifier::new(&info);
3841 expr.rewrite(simplifier)
3842 .expect("Failed to simplify expression")
3843 .data
3844 };
3845
3846 let expr = in_list(lit_bool_null(), vec![], false);
3848 assert_eq!(simplify(expr), lit(false));
3849
3850 let expr = in_list(lit_bool_null(), vec![], true);
3852 assert_eq!(simplify(expr), lit(true));
3853
3854 let null_null = || Expr::Literal(ScalarValue::Null, None);
3856 let expr = in_list(null_null(), vec![], false);
3857 assert_eq!(simplify(expr), lit(false));
3858
3859 let expr = in_list(null_null(), vec![], true);
3861 assert_eq!(simplify(expr), lit(true));
3862 }
3863
3864 #[test]
3865 fn simplify_large_or() {
3866 let expr = (0..5)
3867 .map(|i| col("c1").eq(lit(i)))
3868 .fold(lit(false), |acc, e| acc.or(e));
3869 assert_eq!(
3870 simplify(expr),
3871 in_list(col("c1"), (0..5).map(lit).collect(), false),
3872 );
3873 }
3874
3875 #[test]
3876 fn simplify_expr_bool_and() {
3877 assert_eq!(simplify(col("c2").and(lit(true))), col("c2"),);
3879 assert_eq!(simplify(col("c2").and(lit(false))), lit(false),);
3881
3882 assert_eq!(simplify(lit(true).and(lit_bool_null())), lit_bool_null(),);
3884
3885 assert_eq!(simplify(lit_bool_null().and(lit(true))), lit_bool_null(),);
3887
3888 assert_eq!(simplify(lit(false).and(lit_bool_null())), lit(false),);
3890
3891 assert_eq!(simplify(lit_bool_null().and(lit(false))), lit(false),);
3893
3894 let expr = col("c1").between(lit(0), lit(10));
3898 let expr = expr.and(lit_bool_null());
3899 let result = simplify(expr);
3900
3901 let expected_expr = and(
3902 and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
3903 lit_bool_null(),
3904 );
3905 assert_eq!(expected_expr, result);
3906 }
3907
3908 #[test]
3909 fn simplify_expr_between() {
3910 let expr = col("c2").between(lit(3), lit(4));
3912 assert_eq!(
3913 simplify(expr),
3914 and(col("c2").gt_eq(lit(3)), col("c2").lt_eq(lit(4)))
3915 );
3916
3917 let expr = col("c2").not_between(lit(3), lit(4));
3919 assert_eq!(
3920 simplify(expr),
3921 or(col("c2").lt(lit(3)), col("c2").gt(lit(4)))
3922 );
3923 }
3924
3925 #[test]
3926 fn test_like_and_ilike() {
3927 let null = lit(ScalarValue::Utf8(None));
3928
3929 let expr = col("c1").like(null.clone());
3931 assert_eq!(simplify(expr), lit_bool_null());
3932
3933 let expr = col("c1").not_like(null.clone());
3934 assert_eq!(simplify(expr), lit_bool_null());
3935
3936 let expr = col("c1").ilike(null.clone());
3937 assert_eq!(simplify(expr), lit_bool_null());
3938
3939 let expr = col("c1").not_ilike(null.clone());
3940 assert_eq!(simplify(expr), lit_bool_null());
3941
3942 let expr = col("c1").like(lit("%"));
3944 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
3945
3946 let expr = col("c1").not_like(lit("%"));
3947 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
3948
3949 let expr = col("c1").ilike(lit("%"));
3950 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
3951
3952 let expr = col("c1").not_ilike(lit("%"));
3953 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
3954
3955 let expr = col("c1").like(lit("%%"));
3957 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
3958
3959 let expr = col("c1").not_like(lit("%%"));
3960 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
3961
3962 let expr = col("c1").ilike(lit("%%"));
3963 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
3964
3965 let expr = col("c1").not_ilike(lit("%%"));
3966 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
3967
3968 let expr = col("c1_non_null").like(lit("%"));
3970 assert_eq!(simplify(expr), lit(true));
3971
3972 let expr = col("c1_non_null").not_like(lit("%"));
3973 assert_eq!(simplify(expr), lit(false));
3974
3975 let expr = col("c1_non_null").ilike(lit("%"));
3976 assert_eq!(simplify(expr), lit(true));
3977
3978 let expr = col("c1_non_null").not_ilike(lit("%"));
3979 assert_eq!(simplify(expr), lit(false));
3980
3981 let expr = col("c1_non_null").like(lit("%%"));
3983 assert_eq!(simplify(expr), lit(true));
3984
3985 let expr = col("c1_non_null").not_like(lit("%%"));
3986 assert_eq!(simplify(expr), lit(false));
3987
3988 let expr = col("c1_non_null").ilike(lit("%%"));
3989 assert_eq!(simplify(expr), lit(true));
3990
3991 let expr = col("c1_non_null").not_ilike(lit("%%"));
3992 assert_eq!(simplify(expr), lit(false));
3993
3994 let expr = null.clone().like(lit("%"));
3996 assert_eq!(simplify(expr), lit_bool_null());
3997
3998 let expr = null.clone().not_like(lit("%"));
3999 assert_eq!(simplify(expr), lit_bool_null());
4000
4001 let expr = null.clone().ilike(lit("%"));
4002 assert_eq!(simplify(expr), lit_bool_null());
4003
4004 let expr = null.clone().not_ilike(lit("%"));
4005 assert_eq!(simplify(expr), lit_bool_null());
4006
4007 let expr = null.clone().like(lit("%%"));
4009 assert_eq!(simplify(expr), lit_bool_null());
4010
4011 let expr = null.clone().not_like(lit("%%"));
4012 assert_eq!(simplify(expr), lit_bool_null());
4013
4014 let expr = null.clone().ilike(lit("%%"));
4015 assert_eq!(simplify(expr), lit_bool_null());
4016
4017 let expr = null.clone().not_ilike(lit("%%"));
4018 assert_eq!(simplify(expr), lit_bool_null());
4019
4020 let expr = null.clone().like(lit("a%"));
4022 assert_eq!(simplify(expr), lit_bool_null());
4023
4024 let expr = null.clone().not_like(lit("a%"));
4025 assert_eq!(simplify(expr), lit_bool_null());
4026
4027 let expr = null.clone().ilike(lit("a%"));
4028 assert_eq!(simplify(expr), lit_bool_null());
4029
4030 let expr = null.clone().not_ilike(lit("a%"));
4031 assert_eq!(simplify(expr), lit_bool_null());
4032
4033 let expr = col("c1").like(lit("a"));
4035 assert_eq!(simplify(expr), col("c1").eq(lit("a")));
4036 let expr = col("c1").not_like(lit("a"));
4037 assert_eq!(simplify(expr), col("c1").not_eq(lit("a")));
4038 let expr = col("c1").like(lit("a_"));
4039 assert_eq!(simplify(expr), col("c1").like(lit("a_")));
4040 let expr = col("c1").not_like(lit("a_"));
4041 assert_eq!(simplify(expr), col("c1").not_like(lit("a_")));
4042
4043 let expr = col("c1").ilike(lit("a"));
4044 assert_eq!(simplify(expr), col("c1").ilike(lit("a")));
4045 let expr = col("c1").not_ilike(lit("a"));
4046 assert_eq!(simplify(expr), col("c1").not_ilike(lit("a")));
4047 }
4048
4049 #[test]
4050 fn test_simplify_with_guarantee() {
4051 let expr_x = col("c3").gt(lit(3_i64));
4053 let expr_y = (col("c4") + lit(2_u32)).lt(lit(10_u32));
4054 let expr_z = col("c1").in_list(vec![lit("a"), lit("b")], true);
4055 let expr = expr_x.clone().and(expr_y.or(expr_z));
4056
4057 let guarantees = vec![
4059 (col("c3"), NullableInterval::from(ScalarValue::Int64(None))),
4060 (col("c4"), NullableInterval::from(ScalarValue::UInt32(None))),
4061 (col("c1"), NullableInterval::from(ScalarValue::Utf8(None))),
4062 ];
4063
4064 let output = simplify_with_guarantee(expr.clone(), guarantees);
4065 assert_eq!(output, lit_bool_null());
4066
4067 let guarantees = vec![
4069 (
4070 col("c3"),
4071 NullableInterval::NotNull {
4072 values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4073 },
4074 ),
4075 (
4076 col("c4"),
4077 NullableInterval::from(ScalarValue::UInt32(Some(9))),
4078 ),
4079 (col("c1"), NullableInterval::from(ScalarValue::from("a"))),
4080 ];
4081 let output = simplify_with_guarantee(expr.clone(), guarantees);
4082 assert_eq!(output, lit(false));
4083
4084 let guarantees = vec![
4086 (
4087 col("c3"),
4088 NullableInterval::MaybeNull {
4089 values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4090 },
4091 ),
4092 (
4093 col("c4"),
4094 NullableInterval::MaybeNull {
4095 values: Interval::make(Some(9_u32), Some(9_u32)).unwrap(),
4096 },
4097 ),
4098 (
4099 col("c1"),
4100 NullableInterval::NotNull {
4101 values: Interval::try_new(
4102 ScalarValue::from("d"),
4103 ScalarValue::from("f"),
4104 )
4105 .unwrap(),
4106 },
4107 ),
4108 ];
4109 let output = simplify_with_guarantee(expr.clone(), guarantees);
4110 assert_eq!(&output, &expr_x);
4111
4112 let guarantees = vec![
4114 (
4115 col("c3"),
4116 NullableInterval::from(ScalarValue::Int64(Some(9))),
4117 ),
4118 (
4119 col("c4"),
4120 NullableInterval::from(ScalarValue::UInt32(Some(3))),
4121 ),
4122 ];
4123 let output = simplify_with_guarantee(expr.clone(), guarantees);
4124 assert_eq!(output, lit(true));
4125
4126 let guarantees = vec![(
4128 col("c4"),
4129 NullableInterval::from(ScalarValue::UInt32(Some(3))),
4130 )];
4131 let output = simplify_with_guarantee(expr, guarantees);
4132 assert_eq!(&output, &expr_x);
4133 }
4134
4135 #[test]
4136 fn test_expression_partial_simplify_1() {
4137 let expr = (lit(1) + lit(2)) + (lit(4) / lit(0));
4139 let expected = (lit(3)) + (lit(4) / lit(0));
4140
4141 assert_eq!(simplify(expr), expected);
4142 }
4143
4144 #[test]
4145 fn test_expression_partial_simplify_2() {
4146 let expr = (lit(1).gt(lit(2))).and(lit(4) / lit(0));
4148 let expected = lit(false);
4149
4150 assert_eq!(simplify(expr), expected);
4151 }
4152
4153 #[test]
4154 fn test_simplify_cycles() {
4155 let expr = lit(true);
4157 let expected = lit(true);
4158 let (expr, num_iter) = simplify_with_cycle_count(expr);
4159 assert_eq!(expr, expected);
4160 assert_eq!(num_iter, 1);
4161
4162 let expr = lit(true).not_eq(lit_bool_null()).or(lit(5).gt(lit(10)));
4164 let expected = lit_bool_null();
4165 let (expr, num_iter) = simplify_with_cycle_count(expr);
4166 assert_eq!(expr, expected);
4167 assert_eq!(num_iter, 2);
4168
4169 let expr = (((col("c4") - lit(10)) + lit(10)) * lit(100)) / lit(100);
4172 let expected = expr.clone();
4173 let (expr, num_iter) = simplify_with_cycle_count(expr);
4174 assert_eq!(expr, expected);
4175 assert_eq!(num_iter, 1);
4176
4177 let expr = col("c4")
4179 .lt(lit(1))
4180 .or(col("c3").lt(lit(2)))
4181 .and(col("c3_non_null").lt(lit(3)))
4182 .and(lit(false));
4183 let expected = lit(false);
4184 let (expr, num_iter) = simplify_with_cycle_count(expr);
4185 assert_eq!(expr, expected);
4186 assert_eq!(num_iter, 2);
4187 }
4188
4189 fn boolean_test_schema() -> DFSchemaRef {
4190 Schema::new(vec![
4191 Field::new("A", DataType::Boolean, false),
4192 Field::new("B", DataType::Boolean, false),
4193 Field::new("C", DataType::Boolean, false),
4194 Field::new("D", DataType::Boolean, false),
4195 ])
4196 .to_dfschema_ref()
4197 .unwrap()
4198 }
4199
4200 #[test]
4201 fn simplify_common_factor_conjunction_in_disjunction() {
4202 let props = ExecutionProps::new();
4203 let schema = boolean_test_schema();
4204 let simplifier =
4205 ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema));
4206
4207 let a = || col("A");
4208 let b = || col("B");
4209 let c = || col("C");
4210 let d = || col("D");
4211
4212 let expr = a().and(b()).or(a().and(c()));
4214 let expected = a().and(b().or(c()));
4215
4216 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4217
4218 let expr = a().and(b()).or(a().and(c())).or(a().and(d()));
4220 let expected = a().and(b().or(c()).or(d()));
4221 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4222
4223 let expr = a().or(b().and(c().and(a())));
4225 let expected = a();
4226 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4227 }
4228
4229 #[test]
4230 fn test_simplify_udaf() {
4231 let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_with_simplify());
4232 let aggregate_function_expr =
4233 Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4234 udaf.into(),
4235 vec![],
4236 false,
4237 None,
4238 vec![],
4239 None,
4240 ));
4241
4242 let expected = col("result_column");
4243 assert_eq!(simplify(aggregate_function_expr), expected);
4244
4245 let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_without_simplify());
4246 let aggregate_function_expr =
4247 Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4248 udaf.into(),
4249 vec![],
4250 false,
4251 None,
4252 vec![],
4253 None,
4254 ));
4255
4256 let expected = aggregate_function_expr.clone();
4257 assert_eq!(simplify(aggregate_function_expr), expected);
4258 }
4259
4260 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4263 struct SimplifyMockUdaf {
4264 simplify: bool,
4265 }
4266
4267 impl SimplifyMockUdaf {
4268 fn new_with_simplify() -> Self {
4270 Self { simplify: true }
4271 }
4272 fn new_without_simplify() -> Self {
4274 Self { simplify: false }
4275 }
4276 }
4277
4278 impl AggregateUDFImpl for SimplifyMockUdaf {
4279 fn as_any(&self) -> &dyn std::any::Any {
4280 self
4281 }
4282
4283 fn name(&self) -> &str {
4284 "mock_simplify"
4285 }
4286
4287 fn signature(&self) -> &Signature {
4288 unimplemented!()
4289 }
4290
4291 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4292 unimplemented!("not needed for tests")
4293 }
4294
4295 fn accumulator(
4296 &self,
4297 _acc_args: AccumulatorArgs,
4298 ) -> Result<Box<dyn Accumulator>> {
4299 unimplemented!("not needed for tests")
4300 }
4301
4302 fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
4303 unimplemented!("not needed for testing")
4304 }
4305
4306 fn create_groups_accumulator(
4307 &self,
4308 _args: AccumulatorArgs,
4309 ) -> Result<Box<dyn GroupsAccumulator>> {
4310 unimplemented!("not needed for testing")
4311 }
4312
4313 fn simplify(&self) -> Option<AggregateFunctionSimplification> {
4314 if self.simplify {
4315 Some(Box::new(|_, _| Ok(col("result_column"))))
4316 } else {
4317 None
4318 }
4319 }
4320 }
4321
4322 #[test]
4323 fn test_simplify_udwf() {
4324 let udwf = WindowFunctionDefinition::WindowUDF(
4325 WindowUDF::new_from_impl(SimplifyMockUdwf::new_with_simplify()).into(),
4326 );
4327 let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
4328
4329 let expected = col("result_column");
4330 assert_eq!(simplify(window_function_expr), expected);
4331
4332 let udwf = WindowFunctionDefinition::WindowUDF(
4333 WindowUDF::new_from_impl(SimplifyMockUdwf::new_without_simplify()).into(),
4334 );
4335 let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
4336
4337 let expected = window_function_expr.clone();
4338 assert_eq!(simplify(window_function_expr), expected);
4339 }
4340
4341 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4344 struct SimplifyMockUdwf {
4345 simplify: bool,
4346 }
4347
4348 impl SimplifyMockUdwf {
4349 fn new_with_simplify() -> Self {
4351 Self { simplify: true }
4352 }
4353 fn new_without_simplify() -> Self {
4355 Self { simplify: false }
4356 }
4357 }
4358
4359 impl WindowUDFImpl for SimplifyMockUdwf {
4360 fn as_any(&self) -> &dyn std::any::Any {
4361 self
4362 }
4363
4364 fn name(&self) -> &str {
4365 "mock_simplify"
4366 }
4367
4368 fn signature(&self) -> &Signature {
4369 unimplemented!()
4370 }
4371
4372 fn simplify(&self) -> Option<WindowFunctionSimplification> {
4373 if self.simplify {
4374 Some(Box::new(|_, _| Ok(col("result_column"))))
4375 } else {
4376 None
4377 }
4378 }
4379
4380 fn partition_evaluator(
4381 &self,
4382 _partition_evaluator_args: PartitionEvaluatorArgs,
4383 ) -> Result<Box<dyn PartitionEvaluator>> {
4384 unimplemented!("not needed for tests")
4385 }
4386
4387 fn field(&self, _field_args: WindowUDFFieldArgs) -> Result<FieldRef> {
4388 unimplemented!("not needed for tests")
4389 }
4390 }
4391 #[derive(Debug, PartialEq, Eq, Hash)]
4392 struct VolatileUdf {
4393 signature: Signature,
4394 }
4395
4396 impl VolatileUdf {
4397 pub fn new() -> Self {
4398 Self {
4399 signature: Signature::exact(vec![], Volatility::Volatile),
4400 }
4401 }
4402 }
4403 impl ScalarUDFImpl for VolatileUdf {
4404 fn as_any(&self) -> &dyn std::any::Any {
4405 self
4406 }
4407
4408 fn name(&self) -> &str {
4409 "VolatileUdf"
4410 }
4411
4412 fn signature(&self) -> &Signature {
4413 &self.signature
4414 }
4415
4416 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4417 Ok(DataType::Int16)
4418 }
4419
4420 fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
4421 panic!("dummy - not implemented")
4422 }
4423 }
4424
4425 #[test]
4426 fn test_optimize_volatile_conditions() {
4427 let fun = Arc::new(ScalarUDF::new_from_impl(VolatileUdf::new()));
4428 let rand = Expr::ScalarFunction(ScalarFunction::new_udf(fun, vec![]));
4429 {
4430 let expr = rand
4431 .clone()
4432 .eq(lit(0))
4433 .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
4434
4435 assert_eq!(simplify(expr.clone()), expr);
4436 }
4437
4438 {
4439 let expr = col("column1")
4440 .eq(lit(2))
4441 .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
4442
4443 assert_eq!(simplify(expr), col("column1").eq(lit(2)));
4444 }
4445
4446 {
4447 let expr = (col("column1").eq(lit(2)).and(rand.clone().eq(lit(0)))).or(col(
4448 "column1",
4449 )
4450 .eq(lit(2))
4451 .and(rand.clone().eq(lit(0))));
4452
4453 assert_eq!(
4454 simplify(expr),
4455 col("column1")
4456 .eq(lit(2))
4457 .and((rand.clone().eq(lit(0))).or(rand.clone().eq(lit(0))))
4458 );
4459 }
4460 }
4461
4462 #[test]
4463 fn simplify_fixed_size_binary_eq_lit() {
4464 let bytes = [1u8, 2, 3].as_slice();
4465
4466 let expr = col("c5").eq(lit(bytes));
4468
4469 let coerced = coerce(expr.clone());
4471 let schema = expr_test_schema();
4472 assert_eq!(
4473 coerced,
4474 col("c5")
4475 .cast_to(&DataType::Binary, schema.as_ref())
4476 .unwrap()
4477 .eq(lit(bytes))
4478 );
4479
4480 assert_eq!(
4482 simplify(coerced),
4483 col("c5").eq(Expr::Literal(
4484 ScalarValue::FixedSizeBinary(3, Some(bytes.to_vec()),),
4485 None
4486 ))
4487 );
4488 }
4489
4490 fn if_not_null(expr: Expr, then: bool) -> Expr {
4491 Expr::Case(Case {
4492 expr: Some(expr.is_not_null().into()),
4493 when_then_expr: vec![(lit(true).into(), lit(then).into())],
4494 else_expr: None,
4495 })
4496 }
4497}