1use arrow::{
21 array::{Array, AsArray, new_null_array},
22 datatypes::{DataType, Field, Schema},
23 record_batch::RecordBatch,
24};
25use std::borrow::Cow;
26use std::collections::HashSet;
27use std::ops::Not;
28use std::sync::Arc;
29
30use datafusion_common::config::ConfigOptions;
31use datafusion_common::nested_struct::has_one_of_more_common_fields;
32use datafusion_common::{
33 DFSchema, DataFusionError, Result, ScalarValue, exec_datafusion_err, internal_err,
34};
35use datafusion_common::{
36 HashMap,
37 cast::{as_large_list_array, as_list_array},
38 metadata::FieldMetadata,
39 tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
40};
41use datafusion_expr::{
42 BinaryExpr, Case, ColumnarValue, Expr, ExprSchemable, Like, Operator, Volatility,
43 and, binary::BinaryTypeCoercer, lit, or, preimage::PreimageResult,
44};
45use datafusion_expr::{Cast, TryCast, simplify::ExprSimplifyResult};
46use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval};
47use datafusion_expr::{
48 expr::{InList, InSubquery},
49 utils::{iter_conjunction, iter_conjunction_owned},
50};
51use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
52
53use super::inlist_simplifier::ShortenInListSimplifier;
54use super::utils::*;
55use crate::simplify_expressions::SimplifyContext;
56use crate::simplify_expressions::regex::simplify_regex_expr;
57use crate::simplify_expressions::unwrap_cast::{
58 is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary,
59 is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist,
60 unwrap_cast_in_comparison_for_binary,
61};
62use crate::{
63 analyzer::type_coercion::TypeCoercionRewriter,
64 simplify_expressions::udf_preimage::rewrite_with_preimage,
65};
66use datafusion_expr::expr_rewriter::rewrite_with_guarantees_map;
67use datafusion_expr_common::casts::try_cast_literal_to_type;
68use indexmap::IndexSet;
69use regex::Regex;
70
71pub struct ExprSimplifier {
103 info: SimplifyContext,
104 guarantees: Vec<(Expr, NullableInterval)>,
107 canonicalize: bool,
110 max_simplifier_cycles: u32,
112}
113
114pub const THRESHOLD_INLINE_INLIST: usize = 3;
115pub const DEFAULT_MAX_SIMPLIFIER_CYCLES: u32 = 3;
116
117impl ExprSimplifier {
118 pub fn new(info: SimplifyContext) -> Self {
123 Self {
124 info,
125 guarantees: vec![],
126 canonicalize: true,
127 max_simplifier_cycles: DEFAULT_MAX_SIMPLIFIER_CYCLES,
128 }
129 }
130
131 pub fn simplify(&self, expr: Expr) -> Result<Expr> {
174 Ok(self.simplify_with_cycle_count_transformed(expr)?.0.data)
175 }
176
177 #[deprecated(
184 since = "48.0.0",
185 note = "Use `simplify_with_cycle_count_transformed` instead"
186 )]
187 #[expect(unused_mut)]
188 pub fn simplify_with_cycle_count(&self, mut expr: Expr) -> Result<(Expr, u32)> {
189 let (transformed, cycle_count) =
190 self.simplify_with_cycle_count_transformed(expr)?;
191 Ok((transformed.data, cycle_count))
192 }
193
194 pub fn simplify_with_cycle_count_transformed(
207 &self,
208 mut expr: Expr,
209 ) -> Result<(Transformed<Expr>, u32)> {
210 let mut simplifier = Simplifier::new(&self.info);
211 let config_options = Some(Arc::clone(self.info.config_options()));
212 let mut const_evaluator = ConstEvaluator::try_new(config_options)?;
213 let mut shorten_in_list_simplifier = ShortenInListSimplifier::new();
214 let guarantees_map: HashMap<&Expr, &NullableInterval> =
215 self.guarantees.iter().map(|(k, v)| (k, v)).collect();
216
217 if self.canonicalize {
218 expr = expr.rewrite(&mut Canonicalizer::new()).data()?
219 }
220
221 let mut num_cycles = 0;
225 let mut has_transformed = false;
226 loop {
227 let Transformed {
228 data, transformed, ..
229 } = expr
230 .rewrite(&mut const_evaluator)?
231 .transform_data(|expr| expr.rewrite(&mut simplifier))?
232 .transform_data(|expr| {
233 rewrite_with_guarantees_map(expr, &guarantees_map)
234 })?;
235 expr = data;
236 num_cycles += 1;
237 has_transformed = has_transformed || transformed;
239 if !transformed || num_cycles >= self.max_simplifier_cycles {
240 break;
241 }
242 }
243 expr = expr.rewrite(&mut shorten_in_list_simplifier).data()?;
245 Ok((
246 Transformed::new_transformed(expr, has_transformed),
247 num_cycles,
248 ))
249 }
250
251 pub fn coerce(&self, expr: Expr, schema: &DFSchema) -> Result<Expr> {
257 let mut expr_rewrite = TypeCoercionRewriter { schema };
258 expr.rewrite(&mut expr_rewrite).data()
259 }
260
261 pub fn with_guarantees(mut self, guarantees: Vec<(Expr, NullableInterval)>) -> Self {
317 self.guarantees = guarantees;
318 self
319 }
320
321 pub fn with_canonicalize(mut self, canonicalize: bool) -> Self {
369 self.canonicalize = canonicalize;
370 self
371 }
372
373 pub fn with_max_cycles(mut self, max_simplifier_cycles: u32) -> Self {
422 self.max_simplifier_cycles = max_simplifier_cycles;
423 self
424 }
425}
426
427struct Canonicalizer {}
434
435impl Canonicalizer {
436 fn new() -> Self {
437 Self {}
438 }
439}
440
441impl TreeNodeRewriter for Canonicalizer {
442 type Node = Expr;
443
444 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
445 let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr else {
446 return Ok(Transformed::no(expr));
447 };
448 match (left.as_ref(), right.as_ref(), op.swap()) {
449 (Expr::Column(left_col), Expr::Column(right_col), Some(swapped_op))
451 if right_col > left_col =>
452 {
453 Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
454 left: right,
455 op: swapped_op,
456 right: left,
457 })))
458 }
459 (Expr::Literal(_a, _), Expr::Column(_b), Some(swapped_op)) => {
461 Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
462 left: right,
463 op: swapped_op,
464 right: left,
465 })))
466 }
467 _ => Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
468 left,
469 op,
470 right,
471 }))),
472 }
473 }
474}
475
476struct ConstEvaluator {
481 can_evaluate: Vec<bool>,
494 execution_props: ExecutionProps,
501 input_schema: DFSchema,
502 input_batch: RecordBatch,
503}
504
505enum ConstSimplifyResult {
507 Simplified(ScalarValue, Option<FieldMetadata>),
509 NotSimplified(ScalarValue, Option<FieldMetadata>),
511 SimplifyRuntimeError(DataFusionError, Expr),
513}
514
515impl TreeNodeRewriter for ConstEvaluator {
516 type Node = Expr;
517
518 fn f_down(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
519 self.can_evaluate.push(true);
521
522 if !Self::can_evaluate(&expr) {
527 let parent_iter = self.can_evaluate.iter_mut().rev();
529 for p in parent_iter {
530 if !*p {
531 break;
534 }
535 *p = false;
536 }
537 }
538
539 Ok(Transformed::no(expr))
543 }
544
545 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
546 match self.can_evaluate.pop() {
547 Some(true) => match self.evaluate_to_scalar(expr) {
552 ConstSimplifyResult::Simplified(s, m) => {
553 Ok(Transformed::yes(Expr::Literal(s, m)))
554 }
555 ConstSimplifyResult::NotSimplified(s, m) => {
556 Ok(Transformed::no(Expr::Literal(s, m)))
557 }
558 ConstSimplifyResult::SimplifyRuntimeError(err, expr) => {
559 if let Expr::Cast(Cast { ref expr, .. })
562 | Expr::TryCast(TryCast { ref expr, .. }) = expr
563 && matches!(expr.as_ref(), Expr::Literal(_, _))
564 {
565 return Err(err);
566 }
567 Ok(Transformed::yes(expr))
570 }
571 },
572 Some(false) => Ok(Transformed::no(expr)),
573 _ => internal_err!("Failed to pop can_evaluate"),
574 }
575 }
576}
577
578impl ConstEvaluator {
579 pub fn try_new(config_options: Option<Arc<ConfigOptions>>) -> Result<Self> {
589 static DUMMY_COL_NAME: &str = ".";
592 let schema = Arc::new(Schema::new(vec![Field::new(
593 DUMMY_COL_NAME,
594 DataType::Null,
595 true,
596 )]));
597 let input_schema = DFSchema::try_from(Arc::clone(&schema))?;
598 let col = new_null_array(&DataType::Null, 1);
600 let input_batch = RecordBatch::try_new(schema, vec![col])?;
601
602 let mut execution_props = ExecutionProps::new();
603 execution_props.config_options = config_options;
604
605 Ok(Self {
606 can_evaluate: vec![],
607 execution_props,
608 input_schema,
609 input_batch,
610 })
611 }
612
613 fn volatility_ok(volatility: Volatility) -> bool {
615 match volatility {
616 Volatility::Immutable => true,
617 Volatility::Stable => true,
619 Volatility::Volatile => false,
620 }
621 }
622
623 fn can_evaluate(expr: &Expr) -> bool {
626 match expr {
632 #[expect(deprecated)]
634 Expr::AggregateFunction { .. }
635 | Expr::ScalarVariable(_, _)
636 | Expr::Column(_)
637 | Expr::OuterReferenceColumn(_, _)
638 | Expr::Exists { .. }
639 | Expr::InSubquery(_)
640 | Expr::SetComparison(_)
641 | Expr::ScalarSubquery(_)
642 | Expr::WindowFunction { .. }
643 | Expr::GroupingSet(_)
644 | Expr::Wildcard { .. }
645 | Expr::Placeholder(_) => false,
646 Expr::ScalarFunction(ScalarFunction { func, .. }) => {
647 Self::volatility_ok(func.signature().volatility)
648 }
649 Expr::Cast(Cast { expr, data_type })
650 | Expr::TryCast(TryCast { expr, data_type }) => {
651 if let (
652 Ok(DataType::Struct(source_fields)),
653 DataType::Struct(target_fields),
654 ) = (expr.get_type(&DFSchema::empty()), data_type)
655 {
656 if source_fields.len() != target_fields.len() {
658 return false;
659 }
660
661 if !has_one_of_more_common_fields(&source_fields, target_fields) {
663 return false;
664 }
665
666 if let Expr::Literal(ScalarValue::Struct(struct_array), _) =
670 expr.as_ref()
671 && struct_array.len() == 0
672 {
673 return false;
674 }
675 }
676 true
677 }
678 Expr::Literal(_, _)
679 | Expr::Alias(..)
680 | Expr::Unnest(_)
681 | Expr::BinaryExpr { .. }
682 | Expr::Not(_)
683 | Expr::IsNotNull(_)
684 | Expr::IsNull(_)
685 | Expr::IsTrue(_)
686 | Expr::IsFalse(_)
687 | Expr::IsUnknown(_)
688 | Expr::IsNotTrue(_)
689 | Expr::IsNotFalse(_)
690 | Expr::IsNotUnknown(_)
691 | Expr::Negative(_)
692 | Expr::Between { .. }
693 | Expr::Like { .. }
694 | Expr::SimilarTo { .. }
695 | Expr::Case(_)
696 | Expr::InList { .. } => true,
697 }
698 }
699
700 pub(crate) fn evaluate_to_scalar(&mut self, expr: Expr) -> ConstSimplifyResult {
702 if let Expr::Literal(s, m) = expr {
703 return ConstSimplifyResult::NotSimplified(s, m);
704 }
705
706 let phys_expr = match create_physical_expr(
707 &expr,
708 &self.input_schema,
709 &self.execution_props,
710 ) {
711 Ok(e) => e,
712 Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
713 };
714 let metadata = phys_expr
715 .return_field(self.input_batch.schema_ref())
716 .ok()
717 .and_then(|f| {
718 let m = f.metadata();
719 match m.is_empty() {
720 true => None,
721 false => Some(FieldMetadata::from(m)),
722 }
723 });
724 let col_val = match phys_expr.evaluate(&self.input_batch) {
725 Ok(v) => v,
726 Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
727 };
728 match col_val {
729 ColumnarValue::Array(a) => {
730 if a.len() != 1 {
731 ConstSimplifyResult::SimplifyRuntimeError(
732 exec_datafusion_err!(
733 "Could not evaluate the expression, found a result of length {}",
734 a.len()
735 ),
736 expr,
737 )
738 } else if as_list_array(&a).is_ok() {
739 ConstSimplifyResult::Simplified(
740 ScalarValue::List(a.as_list::<i32>().to_owned().into()),
741 metadata,
742 )
743 } else if as_large_list_array(&a).is_ok() {
744 ConstSimplifyResult::Simplified(
745 ScalarValue::LargeList(a.as_list::<i64>().to_owned().into()),
746 metadata,
747 )
748 } else {
749 match ScalarValue::try_from_array(&a, 0) {
751 Ok(s) => ConstSimplifyResult::Simplified(s, metadata),
752 Err(err) => ConstSimplifyResult::SimplifyRuntimeError(err, expr),
753 }
754 }
755 }
756 ColumnarValue::Scalar(s) => ConstSimplifyResult::Simplified(s, metadata),
757 }
758 }
759}
760
761struct Simplifier<'a> {
771 info: &'a SimplifyContext,
772}
773
774impl<'a> Simplifier<'a> {
775 pub fn new(info: &'a SimplifyContext) -> Self {
776 Self { info }
777 }
778}
779
780impl TreeNodeRewriter for Simplifier<'_> {
781 type Node = Expr;
782
783 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
785 use datafusion_expr::Operator::{
786 And, BitwiseAnd, BitwiseOr, BitwiseShiftLeft, BitwiseShiftRight, BitwiseXor,
787 Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch,
788 RegexNotIMatch, RegexNotMatch,
789 };
790
791 let info = self.info;
792 Ok(match expr {
793 ref expr @ Expr::BinaryExpr(BinaryExpr {
797 ref left,
798 ref op,
799 ref right,
800 }) if op.returns_null_on_null()
801 && (is_null(left.as_ref()) || is_null(right.as_ref())) =>
802 {
803 Transformed::yes(Expr::Literal(
804 ScalarValue::try_new_null(&info.get_data_type(expr)?)?,
805 None,
806 ))
807 }
808
809 Expr::BinaryExpr(BinaryExpr {
811 left,
812 op: And | Or,
813 right,
814 }) if is_null(&left) && is_null(&right) => Transformed::yes(lit_bool_null()),
815
816 Expr::BinaryExpr(BinaryExpr {
824 left,
825 op: Eq,
826 right,
827 }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
828 Transformed::yes(match as_bool_lit(&left)? {
829 Some(true) => *right,
830 Some(false) => Expr::Not(right),
831 None => lit_bool_null(),
832 })
833 }
834 Expr::BinaryExpr(BinaryExpr {
838 left,
839 op: Eq,
840 right,
841 }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
842 Transformed::yes(match as_bool_lit(&right)? {
843 Some(true) => *left,
844 Some(false) => Expr::Not(left),
845 None => lit_bool_null(),
846 })
847 }
848 Expr::BinaryExpr(BinaryExpr {
853 left,
854 op: Eq,
855 right,
856 }) if (left == right) & !left.is_volatile() => {
857 Transformed::yes(match !info.nullable(&left)? {
858 true => lit(true),
859 false => Expr::BinaryExpr(BinaryExpr {
860 left: Box::new(Expr::IsNotNull(left)),
861 op: Or,
862 right: Box::new(lit_bool_null()),
863 }),
864 })
865 }
866
867 Expr::BinaryExpr(BinaryExpr {
874 left,
875 op: NotEq,
876 right,
877 }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
878 Transformed::yes(match as_bool_lit(&left)? {
879 Some(true) => Expr::Not(right),
880 Some(false) => *right,
881 None => lit_bool_null(),
882 })
883 }
884 Expr::BinaryExpr(BinaryExpr {
888 left,
889 op: NotEq,
890 right,
891 }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
892 Transformed::yes(match as_bool_lit(&right)? {
893 Some(true) => Expr::Not(left),
894 Some(false) => *left,
895 None => lit_bool_null(),
896 })
897 }
898
899 Expr::BinaryExpr(BinaryExpr {
905 left,
906 op: Or,
907 right: _,
908 }) if is_true(&left) => Transformed::yes(*left),
909 Expr::BinaryExpr(BinaryExpr {
911 left,
912 op: Or,
913 right,
914 }) if is_false(&left) => Transformed::yes(*right),
915 Expr::BinaryExpr(BinaryExpr {
917 left: _,
918 op: Or,
919 right,
920 }) if is_true(&right) => Transformed::yes(*right),
921 Expr::BinaryExpr(BinaryExpr {
923 left,
924 op: Or,
925 right,
926 }) if is_false(&right) => Transformed::yes(*left),
927 Expr::BinaryExpr(BinaryExpr {
929 left,
930 op: Or,
931 right,
932 }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
933 Transformed::yes(lit(true))
934 }
935 Expr::BinaryExpr(BinaryExpr {
937 left,
938 op: Or,
939 right,
940 }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
941 Transformed::yes(lit(true))
942 }
943 Expr::BinaryExpr(BinaryExpr {
945 left,
946 op: Or,
947 right,
948 }) if expr_contains(&left, &right, Or) => Transformed::yes(*left),
949 Expr::BinaryExpr(BinaryExpr {
951 left,
952 op: Or,
953 right,
954 }) if expr_contains(&right, &left, Or) => Transformed::yes(*right),
955 Expr::BinaryExpr(BinaryExpr {
957 left,
958 op: Or,
959 right,
960 }) if is_op_with(And, &right, &left) => Transformed::yes(*left),
961 Expr::BinaryExpr(BinaryExpr {
963 left,
964 op: Or,
965 right,
966 }) if is_op_with(And, &left, &right) => Transformed::yes(*right),
967 Expr::BinaryExpr(BinaryExpr {
970 left,
971 op: Or,
972 right,
973 }) if has_common_conjunction(&left, &right) => {
974 let lhs: IndexSet<Expr> = iter_conjunction_owned(*left).collect();
975 let (common, rhs): (Vec<_>, Vec<_>) = iter_conjunction_owned(*right)
976 .partition(|e| lhs.contains(e) && !e.is_volatile());
977
978 let new_rhs = rhs.into_iter().reduce(and);
979 let new_lhs = lhs.into_iter().filter(|e| !common.contains(e)).reduce(and);
980 let common_conjunction = common.into_iter().reduce(and).unwrap();
981
982 let new_expr = match (new_lhs, new_rhs) {
983 (Some(lhs), Some(rhs)) => and(common_conjunction, or(lhs, rhs)),
984 (_, _) => common_conjunction,
985 };
986 Transformed::yes(new_expr)
987 }
988
989 Expr::BinaryExpr(BinaryExpr {
995 left,
996 op: And,
997 right,
998 }) if is_true(&left) => Transformed::yes(*right),
999 Expr::BinaryExpr(BinaryExpr {
1001 left,
1002 op: And,
1003 right: _,
1004 }) if is_false(&left) => Transformed::yes(*left),
1005 Expr::BinaryExpr(BinaryExpr {
1007 left,
1008 op: And,
1009 right,
1010 }) if is_true(&right) => Transformed::yes(*left),
1011 Expr::BinaryExpr(BinaryExpr {
1013 left: _,
1014 op: And,
1015 right,
1016 }) if is_false(&right) => Transformed::yes(*right),
1017 Expr::BinaryExpr(BinaryExpr {
1019 left,
1020 op: And,
1021 right,
1022 }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
1023 Transformed::yes(lit(false))
1024 }
1025 Expr::BinaryExpr(BinaryExpr {
1027 left,
1028 op: And,
1029 right,
1030 }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
1031 Transformed::yes(lit(false))
1032 }
1033 Expr::BinaryExpr(BinaryExpr {
1035 left,
1036 op: And,
1037 right,
1038 }) if expr_contains(&left, &right, And) => Transformed::yes(*left),
1039 Expr::BinaryExpr(BinaryExpr {
1041 left,
1042 op: And,
1043 right,
1044 }) if expr_contains(&right, &left, And) => Transformed::yes(*right),
1045 Expr::BinaryExpr(BinaryExpr {
1047 left,
1048 op: And,
1049 right,
1050 }) if is_op_with(Or, &right, &left) => Transformed::yes(*left),
1051 Expr::BinaryExpr(BinaryExpr {
1053 left,
1054 op: And,
1055 right,
1056 }) if is_op_with(Or, &left, &right) => Transformed::yes(*right),
1057 Expr::BinaryExpr(BinaryExpr {
1059 left,
1060 op: And,
1061 right,
1062 }) if can_reduce_to_equal_statement(&left, &right) => {
1063 if let Expr::BinaryExpr(BinaryExpr {
1064 left: left_left,
1065 right: left_right,
1066 ..
1067 }) = *left
1068 {
1069 Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1070 left: left_left,
1071 op: Eq,
1072 right: left_right,
1073 }))
1074 } else {
1075 return internal_err!(
1076 "can_reduce_to_equal_statement should only be called with a BinaryExpr"
1077 );
1078 }
1079 }
1080 Expr::BinaryExpr(BinaryExpr {
1082 left,
1083 op: And,
1084 right,
1085 }) if is_eq_and_ne_with_different_literal(&left, &right) => {
1086 Transformed::yes(*left)
1087 }
1088 Expr::BinaryExpr(BinaryExpr {
1090 left,
1091 op: And,
1092 right,
1093 }) if is_eq_and_ne_with_different_literal(&right, &left) => {
1094 Transformed::yes(*right)
1095 }
1096
1097 Expr::BinaryExpr(BinaryExpr {
1103 left,
1104 op: Multiply,
1105 right,
1106 }) if is_one(&right) => {
1107 simplify_right_is_one_case(info, left, &Multiply, &right)?
1108 }
1109 Expr::BinaryExpr(BinaryExpr {
1111 left,
1112 op: Multiply,
1113 right,
1114 }) if is_one(&left) => {
1115 simplify_right_is_one_case(info, right, &Multiply, &left)?
1117 }
1118
1119 Expr::BinaryExpr(BinaryExpr {
1121 left,
1122 op: Multiply,
1123 right,
1124 }) if !info.nullable(&left)?
1125 && !info.get_data_type(&left)?.is_floating()
1126 && is_zero(&right) =>
1127 {
1128 Transformed::yes(*right)
1129 }
1130 Expr::BinaryExpr(BinaryExpr {
1132 left,
1133 op: Multiply,
1134 right,
1135 }) if !info.nullable(&right)?
1136 && !info.get_data_type(&right)?.is_floating()
1137 && is_zero(&left) =>
1138 {
1139 Transformed::yes(*left)
1140 }
1141
1142 Expr::BinaryExpr(BinaryExpr {
1148 left,
1149 op: Divide,
1150 right,
1151 }) if is_one(&right) => {
1152 simplify_right_is_one_case(info, left, &Divide, &right)?
1153 }
1154
1155 Expr::BinaryExpr(BinaryExpr {
1161 left,
1162 op: Modulo,
1163 right,
1164 }) if !info.nullable(&left)?
1165 && !info.get_data_type(&left)?.is_floating()
1166 && is_one(&right) =>
1167 {
1168 Transformed::yes(Expr::Literal(
1169 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1170 None,
1171 ))
1172 }
1173
1174 Expr::BinaryExpr(BinaryExpr {
1180 left,
1181 op: BitwiseAnd,
1182 right,
1183 }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*right),
1184
1185 Expr::BinaryExpr(BinaryExpr {
1187 left,
1188 op: BitwiseAnd,
1189 right,
1190 }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*left),
1191
1192 Expr::BinaryExpr(BinaryExpr {
1194 left,
1195 op: BitwiseAnd,
1196 right,
1197 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1198 Transformed::yes(Expr::Literal(
1199 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1200 None,
1201 ))
1202 }
1203
1204 Expr::BinaryExpr(BinaryExpr {
1206 left,
1207 op: BitwiseAnd,
1208 right,
1209 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1210 Transformed::yes(Expr::Literal(
1211 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1212 None,
1213 ))
1214 }
1215
1216 Expr::BinaryExpr(BinaryExpr {
1218 left,
1219 op: BitwiseAnd,
1220 right,
1221 }) if expr_contains(&left, &right, BitwiseAnd) => Transformed::yes(*left),
1222
1223 Expr::BinaryExpr(BinaryExpr {
1225 left,
1226 op: BitwiseAnd,
1227 right,
1228 }) if expr_contains(&right, &left, BitwiseAnd) => Transformed::yes(*right),
1229
1230 Expr::BinaryExpr(BinaryExpr {
1232 left,
1233 op: BitwiseAnd,
1234 right,
1235 }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => {
1236 Transformed::yes(*left)
1237 }
1238
1239 Expr::BinaryExpr(BinaryExpr {
1241 left,
1242 op: BitwiseAnd,
1243 right,
1244 }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => {
1245 Transformed::yes(*right)
1246 }
1247
1248 Expr::BinaryExpr(BinaryExpr {
1254 left,
1255 op: BitwiseOr,
1256 right,
1257 }) if is_zero(&right) => Transformed::yes(*left),
1258
1259 Expr::BinaryExpr(BinaryExpr {
1261 left,
1262 op: BitwiseOr,
1263 right,
1264 }) if is_zero(&left) => Transformed::yes(*right),
1265
1266 Expr::BinaryExpr(BinaryExpr {
1268 left,
1269 op: BitwiseOr,
1270 right,
1271 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1272 Transformed::yes(Expr::Literal(
1273 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1274 None,
1275 ))
1276 }
1277
1278 Expr::BinaryExpr(BinaryExpr {
1280 left,
1281 op: BitwiseOr,
1282 right,
1283 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1284 Transformed::yes(Expr::Literal(
1285 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1286 None,
1287 ))
1288 }
1289
1290 Expr::BinaryExpr(BinaryExpr {
1292 left,
1293 op: BitwiseOr,
1294 right,
1295 }) if expr_contains(&left, &right, BitwiseOr) => Transformed::yes(*left),
1296
1297 Expr::BinaryExpr(BinaryExpr {
1299 left,
1300 op: BitwiseOr,
1301 right,
1302 }) if expr_contains(&right, &left, BitwiseOr) => Transformed::yes(*right),
1303
1304 Expr::BinaryExpr(BinaryExpr {
1306 left,
1307 op: BitwiseOr,
1308 right,
1309 }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => {
1310 Transformed::yes(*left)
1311 }
1312
1313 Expr::BinaryExpr(BinaryExpr {
1315 left,
1316 op: BitwiseOr,
1317 right,
1318 }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => {
1319 Transformed::yes(*right)
1320 }
1321
1322 Expr::BinaryExpr(BinaryExpr {
1328 left,
1329 op: BitwiseXor,
1330 right,
1331 }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*left),
1332
1333 Expr::BinaryExpr(BinaryExpr {
1335 left,
1336 op: BitwiseXor,
1337 right,
1338 }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*right),
1339
1340 Expr::BinaryExpr(BinaryExpr {
1342 left,
1343 op: BitwiseXor,
1344 right,
1345 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1346 Transformed::yes(Expr::Literal(
1347 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1348 None,
1349 ))
1350 }
1351
1352 Expr::BinaryExpr(BinaryExpr {
1354 left,
1355 op: BitwiseXor,
1356 right,
1357 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1358 Transformed::yes(Expr::Literal(
1359 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1360 None,
1361 ))
1362 }
1363
1364 Expr::BinaryExpr(BinaryExpr {
1366 left,
1367 op: BitwiseXor,
1368 right,
1369 }) if expr_contains(&left, &right, BitwiseXor) => {
1370 let expr = delete_xor_in_complex_expr(&left, &right, false);
1371 Transformed::yes(if expr == *right {
1372 Expr::Literal(
1373 ScalarValue::new_zero(&info.get_data_type(&right)?)?,
1374 None,
1375 )
1376 } else {
1377 expr
1378 })
1379 }
1380
1381 Expr::BinaryExpr(BinaryExpr {
1383 left,
1384 op: BitwiseXor,
1385 right,
1386 }) if expr_contains(&right, &left, BitwiseXor) => {
1387 let expr = delete_xor_in_complex_expr(&right, &left, true);
1388 Transformed::yes(if expr == *left {
1389 Expr::Literal(
1390 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1391 None,
1392 )
1393 } else {
1394 expr
1395 })
1396 }
1397
1398 Expr::BinaryExpr(BinaryExpr {
1404 left,
1405 op: BitwiseShiftRight,
1406 right,
1407 }) if is_zero(&right) => Transformed::yes(*left),
1408
1409 Expr::BinaryExpr(BinaryExpr {
1415 left,
1416 op: BitwiseShiftLeft,
1417 right,
1418 }) if is_zero(&right) => Transformed::yes(*left),
1419
1420 Expr::Not(inner) => Transformed::yes(negate_clause(*inner)),
1424
1425 Expr::Negative(inner) => Transformed::yes(distribute_negation(*inner)),
1429
1430 Expr::BinaryExpr(BinaryExpr {
1438 left,
1439 op: op @ (Eq | NotEq),
1440 right,
1441 }) if is_case_with_literal_outputs(&left) && is_lit(&right) => {
1442 let case = into_case(*left)?;
1443 Transformed::yes(Expr::Case(Case {
1444 expr: None,
1445 when_then_expr: case
1446 .when_then_expr
1447 .into_iter()
1448 .map(|(when, then)| {
1449 (
1450 when,
1451 Box::new(Expr::BinaryExpr(BinaryExpr {
1452 left: then,
1453 op,
1454 right: right.clone(),
1455 })),
1456 )
1457 })
1458 .collect(),
1459 else_expr: case.else_expr.map(|els| {
1460 Box::new(Expr::BinaryExpr(BinaryExpr {
1461 left: els,
1462 op,
1463 right,
1464 }))
1465 }),
1466 }))
1467 }
1468
1469 Expr::Case(Case {
1475 expr: None,
1476 when_then_expr,
1477 mut else_expr,
1478 }) if when_then_expr
1479 .iter()
1480 .any(|(when, _)| is_true(when.as_ref()) || is_false(when.as_ref())) =>
1481 {
1482 let out_type = info.get_data_type(&when_then_expr[0].1)?;
1483 let mut new_when_then_expr = Vec::with_capacity(when_then_expr.len());
1484
1485 for (when, then) in when_then_expr.into_iter() {
1486 if is_true(when.as_ref()) {
1487 else_expr = Some(then);
1490 break;
1491 } else if !is_false(when.as_ref()) {
1492 new_when_then_expr.push((when, then));
1493 }
1494 }
1496
1497 if new_when_then_expr.is_empty() {
1499 if let Some(else_expr) = else_expr {
1501 return Ok(Transformed::yes(*else_expr));
1502 } else {
1504 let null =
1505 Expr::Literal(ScalarValue::try_new_null(&out_type)?, None);
1506 return Ok(Transformed::yes(null));
1507 }
1508 }
1509
1510 Transformed::yes(Expr::Case(Case {
1511 expr: None,
1512 when_then_expr: new_when_then_expr,
1513 else_expr,
1514 }))
1515 }
1516
1517 Expr::Case(Case {
1529 expr: None,
1530 when_then_expr,
1531 else_expr,
1532 }) if !when_then_expr.is_empty()
1533 && (when_then_expr.len() < 3 || (when_then_expr.iter().all(|(_, then)| is_bool_lit(then))
1537 && when_then_expr.iter().filter(|(_, then)| is_true(then)).count() < 3))
1538 && info.is_boolean_type(&when_then_expr[0].1)? =>
1539 {
1540 let mut filter_expr = lit(false);
1542 let mut out_expr = lit(false);
1544
1545 for (when, then) in when_then_expr {
1546 let when = is_exactly_true(*when, info)?;
1547 let case_expr =
1548 when.clone().and(filter_expr.clone().not()).and(*then);
1549
1550 out_expr = out_expr.or(case_expr);
1551 filter_expr = filter_expr.or(when);
1552 }
1553
1554 let else_expr = else_expr.map(|b| *b).unwrap_or_else(lit_bool_null);
1555 let case_expr = filter_expr.not().and(else_expr);
1556 out_expr = out_expr.or(case_expr);
1557
1558 out_expr.rewrite(self)?
1560 }
1561 Expr::Case(Case {
1582 expr: None,
1583 when_then_expr,
1584 else_expr,
1585 }) if !when_then_expr.is_empty()
1586 && when_then_expr
1587 .iter()
1588 .all(|(_, then)| is_bool_lit(then)) && when_then_expr
1591 .iter()
1592 .filter(|(_, then)| is_false(then))
1593 .count()
1594 < 3
1595 && else_expr.as_deref().is_none_or(is_bool_lit) =>
1596 {
1597 Transformed::yes(
1598 Expr::Case(Case {
1599 expr: None,
1600 when_then_expr: when_then_expr
1601 .into_iter()
1602 .map(|(when, then)| (when, Box::new(Expr::Not(then))))
1603 .collect(),
1604 else_expr: else_expr
1605 .map(|else_expr| Box::new(Expr::Not(else_expr))),
1606 })
1607 .not(),
1608 )
1609 }
1610 Expr::ScalarFunction(ScalarFunction { func: udf, args }) => {
1611 match udf.simplify(args, info)? {
1612 ExprSimplifyResult::Original(args) => {
1613 Transformed::no(Expr::ScalarFunction(ScalarFunction {
1614 func: udf,
1615 args,
1616 }))
1617 }
1618 ExprSimplifyResult::Simplified(expr) => Transformed::yes(expr),
1619 }
1620 }
1621
1622 Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction {
1623 ref func,
1624 ..
1625 }) => match (func.simplify(), expr) {
1626 (Some(simplify_function), Expr::AggregateFunction(af)) => {
1627 Transformed::yes(simplify_function(af, info)?)
1628 }
1629 (_, expr) => Transformed::no(expr),
1630 },
1631
1632 Expr::WindowFunction(ref window_fun) => match (window_fun.simplify(), expr) {
1633 (Some(simplify_function), Expr::WindowFunction(wf)) => {
1634 Transformed::yes(simplify_function(*wf, info)?)
1635 }
1636 (_, expr) => Transformed::no(expr),
1637 },
1638
1639 Expr::Between(between) => Transformed::yes(if between.negated {
1646 let l = *between.expr.clone();
1647 let r = *between.expr;
1648 or(l.lt(*between.low), r.gt(*between.high))
1649 } else {
1650 and(
1651 between.expr.clone().gt_eq(*between.low),
1652 between.expr.lt_eq(*between.high),
1653 )
1654 }),
1655
1656 Expr::BinaryExpr(BinaryExpr {
1660 left,
1661 op: op @ (RegexMatch | RegexNotMatch | RegexIMatch | RegexNotIMatch),
1662 right,
1663 }) => Transformed::yes(simplify_regex_expr(left, op, right)?),
1664
1665 Expr::Like(like) => {
1667 let escape_char = like.escape_char.unwrap_or('\\');
1669 match as_string_scalar(&like.pattern) {
1670 Some((data_type, pattern_str)) => {
1671 match pattern_str {
1672 None => return Ok(Transformed::yes(lit_bool_null())),
1673 Some(pattern_str) if pattern_str == "%" => {
1674 let result_for_non_null = lit(!like.negated);
1681 Transformed::yes(if !info.nullable(&like.expr)? {
1682 result_for_non_null
1683 } else {
1684 Expr::Case(Case {
1685 expr: Some(Box::new(Expr::IsNotNull(like.expr))),
1686 when_then_expr: vec![(
1687 Box::new(lit(true)),
1688 Box::new(result_for_non_null),
1689 )],
1690 else_expr: None,
1691 })
1692 })
1693 }
1694 Some(pattern_str)
1695 if pattern_str.contains("%%")
1696 && !pattern_str.contains(escape_char) =>
1697 {
1698 let simplified_pattern = Regex::new("%%+")
1701 .unwrap()
1702 .replace_all(pattern_str, "%")
1703 .to_string();
1704 Transformed::yes(Expr::Like(Like {
1705 pattern: Box::new(to_string_scalar(
1706 &data_type,
1707 Some(simplified_pattern),
1708 )),
1709 ..like
1710 }))
1711 }
1712 Some(pattern_str)
1713 if !like.case_insensitive
1714 && !pattern_str
1715 .contains(['%', '_', escape_char].as_ref()) =>
1716 {
1717 Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1720 left: like.expr.clone(),
1721 op: if like.negated { NotEq } else { Eq },
1722 right: like.pattern.clone(),
1723 }))
1724 }
1725
1726 Some(_pattern_str) => Transformed::no(Expr::Like(like)),
1727 }
1728 }
1729 None => Transformed::no(Expr::Like(like)),
1730 }
1731 }
1732
1733 Expr::IsNotNull(expr) | Expr::IsNotUnknown(expr)
1735 if !info.nullable(&expr)? =>
1736 {
1737 Transformed::yes(lit(true))
1738 }
1739
1740 Expr::IsNull(expr) | Expr::IsUnknown(expr) if !info.nullable(&expr)? => {
1742 Transformed::yes(lit(false))
1743 }
1744
1745 Expr::InList(InList {
1748 expr: _,
1749 list,
1750 negated,
1751 }) if list.is_empty() => Transformed::yes(lit(negated)),
1752
1753 Expr::InList(InList {
1756 expr,
1757 list,
1758 negated: _,
1759 }) if is_null(expr.as_ref()) && !list.is_empty() => {
1760 Transformed::yes(lit_bool_null())
1761 }
1762
1763 Expr::InList(InList {
1765 expr,
1766 mut list,
1767 negated,
1768 }) if list.len() == 1
1769 && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
1770 {
1771 let Expr::ScalarSubquery(subquery) = list.remove(0) else {
1772 unreachable!()
1773 };
1774
1775 Transformed::yes(Expr::InSubquery(InSubquery::new(
1776 expr, subquery, negated,
1777 )))
1778 }
1779
1780 Expr::BinaryExpr(BinaryExpr {
1784 left,
1785 op: Or,
1786 right,
1787 }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
1788 let lhs = to_inlist(*left).unwrap();
1789 let rhs = to_inlist(*right).unwrap();
1790 let mut seen: HashSet<Expr> = HashSet::new();
1791 let list = lhs
1792 .list
1793 .into_iter()
1794 .chain(rhs.list)
1795 .filter(|e| seen.insert(e.to_owned()))
1796 .collect::<Vec<_>>();
1797
1798 let merged_inlist = InList {
1799 expr: lhs.expr,
1800 list,
1801 negated: false,
1802 };
1803
1804 Transformed::yes(Expr::InList(merged_inlist))
1805 }
1806
1807 Expr::BinaryExpr(BinaryExpr {
1824 left,
1825 op: And,
1826 right,
1827 }) if are_inlist_and_eq_and_match_neg(
1828 left.as_ref(),
1829 right.as_ref(),
1830 false,
1831 false,
1832 ) =>
1833 {
1834 match (*left, *right) {
1835 (Expr::InList(l1), Expr::InList(l2)) => {
1836 return inlist_intersection(l1, &l2, false).map(Transformed::yes);
1837 }
1838 _ => unreachable!(),
1840 }
1841 }
1842
1843 Expr::BinaryExpr(BinaryExpr {
1844 left,
1845 op: And,
1846 right,
1847 }) if are_inlist_and_eq_and_match_neg(
1848 left.as_ref(),
1849 right.as_ref(),
1850 true,
1851 true,
1852 ) =>
1853 {
1854 match (*left, *right) {
1855 (Expr::InList(l1), Expr::InList(l2)) => {
1856 return inlist_union(l1, l2, true).map(Transformed::yes);
1857 }
1858 _ => unreachable!(),
1860 }
1861 }
1862
1863 Expr::BinaryExpr(BinaryExpr {
1864 left,
1865 op: And,
1866 right,
1867 }) if are_inlist_and_eq_and_match_neg(
1868 left.as_ref(),
1869 right.as_ref(),
1870 false,
1871 true,
1872 ) =>
1873 {
1874 match (*left, *right) {
1875 (Expr::InList(l1), Expr::InList(l2)) => {
1876 return inlist_except(l1, &l2).map(Transformed::yes);
1877 }
1878 _ => unreachable!(),
1880 }
1881 }
1882
1883 Expr::BinaryExpr(BinaryExpr {
1884 left,
1885 op: And,
1886 right,
1887 }) if are_inlist_and_eq_and_match_neg(
1888 left.as_ref(),
1889 right.as_ref(),
1890 true,
1891 false,
1892 ) =>
1893 {
1894 match (*left, *right) {
1895 (Expr::InList(l1), Expr::InList(l2)) => {
1896 return inlist_except(l2, &l1).map(Transformed::yes);
1897 }
1898 _ => unreachable!(),
1900 }
1901 }
1902
1903 Expr::BinaryExpr(BinaryExpr {
1904 left,
1905 op: Or,
1906 right,
1907 }) if are_inlist_and_eq_and_match_neg(
1908 left.as_ref(),
1909 right.as_ref(),
1910 true,
1911 true,
1912 ) =>
1913 {
1914 match (*left, *right) {
1915 (Expr::InList(l1), Expr::InList(l2)) => {
1916 return inlist_intersection(l1, &l2, true).map(Transformed::yes);
1917 }
1918 _ => unreachable!(),
1920 }
1921 }
1922
1923 Expr::BinaryExpr(BinaryExpr { left, op, right })
1930 if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1931 info, &left, op, &right,
1932 ) && op.supports_propagation() =>
1933 {
1934 unwrap_cast_in_comparison_for_binary(info, *left, *right, op)?
1935 }
1936 Expr::BinaryExpr(BinaryExpr { left, op, right })
1940 if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1941 info, &right, op, &left,
1942 ) && op.supports_propagation()
1943 && op.swap().is_some() =>
1944 {
1945 unwrap_cast_in_comparison_for_binary(
1946 info,
1947 *right,
1948 *left,
1949 op.swap().unwrap(),
1950 )?
1951 }
1952 Expr::InList(InList {
1955 expr: mut left,
1956 list,
1957 negated,
1958 }) if is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist(
1959 info, &left, &list,
1960 ) =>
1961 {
1962 let (Expr::TryCast(TryCast {
1963 expr: left_expr, ..
1964 })
1965 | Expr::Cast(Cast {
1966 expr: left_expr, ..
1967 })) = left.as_mut()
1968 else {
1969 return internal_err!("Expect cast expr, but got {:?}", left)?;
1970 };
1971
1972 let expr_type = info.get_data_type(left_expr)?;
1973 let right_exprs = list
1974 .into_iter()
1975 .map(|right| {
1976 match right {
1977 Expr::Literal(right_lit_value, _) => {
1978 let Some(value) = try_cast_literal_to_type(&right_lit_value, &expr_type) else {
1981 internal_err!(
1982 "Can't cast the list expr {:?} to type {}",
1983 right_lit_value, &expr_type
1984 )?
1985 };
1986 Ok(lit(value))
1987 }
1988 other_expr => internal_err!(
1989 "Only support literal expr to optimize, but the expr is {:?}",
1990 &other_expr
1991 ),
1992 }
1993 })
1994 .collect::<Result<Vec<_>>>()?;
1995
1996 Transformed::yes(Expr::InList(InList {
1997 expr: std::mem::take(left_expr),
1998 list: right_exprs,
1999 negated,
2000 }))
2001 }
2002
2003 Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2012 use datafusion_expr::Operator::*;
2013 let is_preimage_op = matches!(
2014 op,
2015 Eq | NotEq
2016 | Lt
2017 | LtEq
2018 | Gt
2019 | GtEq
2020 | IsDistinctFrom
2021 | IsNotDistinctFrom
2022 );
2023 if !is_preimage_op || is_null(&right) {
2024 return Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
2025 left,
2026 op,
2027 right,
2028 })));
2029 }
2030
2031 if let PreimageResult::Range { interval, expr } =
2032 get_preimage(left.as_ref(), right.as_ref(), info)?
2033 {
2034 rewrite_with_preimage(*interval, op, expr)?
2035 } else if let Some(swapped) = op.swap() {
2036 if let PreimageResult::Range { interval, expr } =
2037 get_preimage(right.as_ref(), left.as_ref(), info)?
2038 {
2039 rewrite_with_preimage(*interval, swapped, expr)?
2040 } else {
2041 Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, right }))
2042 }
2043 } else {
2044 Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, right }))
2045 }
2046 }
2047 Expr::InList(InList {
2050 expr,
2051 list,
2052 negated,
2053 }) => {
2054 if list.len() > THRESHOLD_INLINE_INLIST || list.iter().any(is_null) {
2055 return Ok(Transformed::no(Expr::InList(InList {
2056 expr,
2057 list,
2058 negated,
2059 })));
2060 }
2061
2062 let (op, combiner): (Operator, fn(Expr, Expr) -> Expr) =
2063 if negated { (NotEq, and) } else { (Eq, or) };
2064
2065 let mut rewritten: Option<Expr> = None;
2066 for item in &list {
2067 let PreimageResult::Range { interval, expr } =
2068 get_preimage(expr.as_ref(), item, info)?
2069 else {
2070 return Ok(Transformed::no(Expr::InList(InList {
2071 expr,
2072 list,
2073 negated,
2074 })));
2075 };
2076
2077 let range_expr = rewrite_with_preimage(*interval, op, expr)?.data;
2078 rewritten = Some(match rewritten {
2079 None => range_expr,
2080 Some(acc) => combiner(acc, range_expr),
2081 });
2082 }
2083
2084 if let Some(rewritten) = rewritten {
2085 Transformed::yes(rewritten)
2086 } else {
2087 Transformed::no(Expr::InList(InList {
2088 expr,
2089 list,
2090 negated,
2091 }))
2092 }
2093 }
2094
2095 expr => Transformed::no(expr),
2097 })
2098 }
2099}
2100
2101fn get_preimage(
2102 left_expr: &Expr,
2103 right_expr: &Expr,
2104 info: &SimplifyContext,
2105) -> Result<PreimageResult> {
2106 let Expr::ScalarFunction(ScalarFunction { func, args }) = left_expr else {
2107 return Ok(PreimageResult::None);
2108 };
2109 if !is_literal_or_literal_cast(right_expr) {
2110 return Ok(PreimageResult::None);
2111 }
2112 if func.signature().volatility != Volatility::Immutable {
2113 return Ok(PreimageResult::None);
2114 }
2115 func.preimage(args, right_expr, info)
2116}
2117
2118fn is_literal_or_literal_cast(expr: &Expr) -> bool {
2119 match expr {
2120 Expr::Literal(_, _) => true,
2121 Expr::Cast(Cast { expr, .. }) => matches!(expr.as_ref(), Expr::Literal(_, _)),
2122 Expr::TryCast(TryCast { expr, .. }) => {
2123 matches!(expr.as_ref(), Expr::Literal(_, _))
2124 }
2125 _ => false,
2126 }
2127}
2128
2129fn as_string_scalar(expr: &Expr) -> Option<(DataType, &Option<String>)> {
2130 match expr {
2131 Expr::Literal(ScalarValue::Utf8(s), _) => Some((DataType::Utf8, s)),
2132 Expr::Literal(ScalarValue::LargeUtf8(s), _) => Some((DataType::LargeUtf8, s)),
2133 Expr::Literal(ScalarValue::Utf8View(s), _) => Some((DataType::Utf8View, s)),
2134 _ => None,
2135 }
2136}
2137
2138fn to_string_scalar(data_type: &DataType, value: Option<String>) -> Expr {
2139 match data_type {
2140 DataType::Utf8 => Expr::Literal(ScalarValue::Utf8(value), None),
2141 DataType::LargeUtf8 => Expr::Literal(ScalarValue::LargeUtf8(value), None),
2142 DataType::Utf8View => Expr::Literal(ScalarValue::Utf8View(value), None),
2143 _ => unreachable!(),
2144 }
2145}
2146
2147fn has_common_conjunction(lhs: &Expr, rhs: &Expr) -> bool {
2148 let lhs_set: HashSet<&Expr> = iter_conjunction(lhs).collect();
2149 iter_conjunction(rhs).any(|e| lhs_set.contains(&e) && !e.is_volatile())
2150}
2151
2152fn are_inlist_and_eq_and_match_neg(
2154 left: &Expr,
2155 right: &Expr,
2156 is_left_neg: bool,
2157 is_right_neg: bool,
2158) -> bool {
2159 match (left, right) {
2160 (Expr::InList(l), Expr::InList(r)) => {
2161 l.expr == r.expr && l.negated == is_left_neg && r.negated == is_right_neg
2162 }
2163 _ => false,
2164 }
2165}
2166
2167fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
2169 let left = as_inlist(left);
2170 let right = as_inlist(right);
2171 if let (Some(lhs), Some(rhs)) = (left, right) {
2172 matches!(lhs.expr.as_ref(), Expr::Column(_))
2173 && matches!(rhs.expr.as_ref(), Expr::Column(_))
2174 && lhs.expr == rhs.expr
2175 && !lhs.negated
2176 && !rhs.negated
2177 } else {
2178 false
2179 }
2180}
2181
2182fn as_inlist(expr: &'_ Expr) -> Option<Cow<'_, InList>> {
2184 match expr {
2185 Expr::InList(inlist) => Some(Cow::Borrowed(inlist)),
2186 Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::Eq => {
2187 match (left.as_ref(), right.as_ref()) {
2188 (Expr::Column(_), Expr::Literal(_, _)) => Some(Cow::Owned(InList {
2189 expr: left.clone(),
2190 list: vec![*right.clone()],
2191 negated: false,
2192 })),
2193 (Expr::Literal(_, _), Expr::Column(_)) => Some(Cow::Owned(InList {
2194 expr: right.clone(),
2195 list: vec![*left.clone()],
2196 negated: false,
2197 })),
2198 _ => None,
2199 }
2200 }
2201 _ => None,
2202 }
2203}
2204
2205fn to_inlist(expr: Expr) -> Option<InList> {
2206 match expr {
2207 Expr::InList(inlist) => Some(inlist),
2208 Expr::BinaryExpr(BinaryExpr {
2209 left,
2210 op: Operator::Eq,
2211 right,
2212 }) => match (left.as_ref(), right.as_ref()) {
2213 (Expr::Column(_), Expr::Literal(_, _)) => Some(InList {
2214 expr: left,
2215 list: vec![*right],
2216 negated: false,
2217 }),
2218 (Expr::Literal(_, _), Expr::Column(_)) => Some(InList {
2219 expr: right,
2220 list: vec![*left],
2221 negated: false,
2222 }),
2223 _ => None,
2224 },
2225 _ => None,
2226 }
2227}
2228
2229fn inlist_union(mut l1: InList, l2: InList, negated: bool) -> Result<Expr> {
2232 let l1_items: HashSet<_> = l1.list.iter().collect();
2234
2235 let keep_l2: Vec<_> = l2
2237 .list
2238 .into_iter()
2239 .filter_map(|e| if l1_items.contains(&e) { None } else { Some(e) })
2240 .collect();
2241
2242 l1.list.extend(keep_l2);
2243 l1.negated = negated;
2244 Ok(Expr::InList(l1))
2245}
2246
2247fn inlist_intersection(mut l1: InList, l2: &InList, negated: bool) -> Result<Expr> {
2250 let l2_items = l2.list.iter().collect::<HashSet<_>>();
2251
2252 l1.list.retain(|e| l2_items.contains(e));
2254
2255 if l1.list.is_empty() {
2258 return Ok(lit(negated));
2259 }
2260 Ok(Expr::InList(l1))
2261}
2262
2263fn inlist_except(mut l1: InList, l2: &InList) -> Result<Expr> {
2266 let l2_items = l2.list.iter().collect::<HashSet<_>>();
2267
2268 l1.list.retain(|e| !l2_items.contains(e));
2270
2271 if l1.list.is_empty() {
2272 return Ok(lit(false));
2273 }
2274 Ok(Expr::InList(l1))
2275}
2276
2277fn is_exactly_true(expr: Expr, info: &SimplifyContext) -> Result<Expr> {
2279 if !info.nullable(&expr)? {
2280 Ok(expr)
2281 } else {
2282 Ok(Expr::BinaryExpr(BinaryExpr {
2283 left: Box::new(expr),
2284 op: Operator::IsNotDistinctFrom,
2285 right: Box::new(lit(true)),
2286 }))
2287 }
2288}
2289
2290fn simplify_right_is_one_case(
2295 info: &SimplifyContext,
2296 left: Box<Expr>,
2297 op: &Operator,
2298 right: &Expr,
2299) -> Result<Transformed<Expr>> {
2300 let left_type = info.get_data_type(&left)?;
2302 let right_type = info.get_data_type(right)?;
2303 match BinaryTypeCoercer::new(&left_type, op, &right_type).get_result_type() {
2304 Ok(result_type) => {
2305 if left_type != result_type {
2307 Ok(Transformed::yes(Expr::Cast(Cast::new(left, result_type))))
2308 } else {
2309 Ok(Transformed::yes(*left))
2310 }
2311 }
2312 Err(_) => Ok(Transformed::yes(*left)),
2313 }
2314}
2315
2316#[cfg(test)]
2317mod tests {
2318 use super::*;
2319 use crate::simplify_expressions::SimplifyContext;
2320 use crate::test::test_table_scan_with_name;
2321 use arrow::{
2322 array::{Int32Array, StructArray},
2323 datatypes::{FieldRef, Fields},
2324 };
2325 use datafusion_common::{DFSchemaRef, ToDFSchema, assert_contains};
2326 use datafusion_expr::{
2327 expr::WindowFunction,
2328 function::{
2329 AccumulatorArgs, AggregateFunctionSimplification,
2330 WindowFunctionSimplification,
2331 },
2332 interval_arithmetic::Interval,
2333 *,
2334 };
2335 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
2336 use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
2337 use datafusion_physical_expr::PhysicalExpr;
2338 use std::hash::Hash;
2339 use std::sync::LazyLock;
2340 use std::{
2341 collections::HashMap,
2342 ops::{BitAnd, BitOr, BitXor},
2343 sync::Arc,
2344 };
2345
2346 #[test]
2350 fn api_basic() {
2351 let simplifier =
2352 ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
2353
2354 let expr = lit(1) + lit(2);
2355 let expected = lit(3);
2356 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2357 }
2358
2359 #[test]
2360 fn basic_coercion() {
2361 let schema = test_schema();
2362 let simplifier = ExprSimplifier::new(
2363 SimplifyContext::default().with_schema(Arc::clone(&schema)),
2364 );
2365
2366 let expr = (lit(1i64) + lit(2i32)).lt(col("i"));
2369 let expected = lit(3i64).lt(col("i"));
2371
2372 let expr = simplifier.coerce(expr, &schema).unwrap();
2373
2374 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2375 }
2376
2377 fn test_schema() -> DFSchemaRef {
2378 static TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
2379 Schema::new(vec![
2380 Field::new("i", DataType::Int64, false),
2381 Field::new("b", DataType::Boolean, true),
2382 ])
2383 .to_dfschema_ref()
2384 .unwrap()
2385 });
2386 Arc::clone(&TEST_SCHEMA)
2387 }
2388
2389 #[test]
2390 fn simplify_and_constant_prop() {
2391 let simplifier =
2392 ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
2393
2394 let expr = (col("i") * (lit(1) - lit(1))).gt(lit(0));
2397 let expected = lit(false);
2398 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2399 }
2400
2401 #[test]
2402 fn simplify_and_constant_prop_with_case() {
2403 let simplifier =
2404 ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
2405
2406 let expr = when(col("i").gt(lit(5)).and(lit(false)), col("i").gt(lit(5)))
2414 .when(col("i").lt(lit(5)).and(lit(true)), col("i").lt(lit(5)))
2415 .otherwise(lit(false))
2416 .unwrap();
2417 let expected = col("i").lt(lit(5));
2418 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2419 }
2420
2421 #[test]
2426 fn test_simplify_canonicalize() {
2427 {
2428 let expr = lit(1).lt(col("c2")).and(col("c2").gt(lit(1)));
2429 let expected = col("c2").gt(lit(1));
2430 assert_eq!(simplify(expr), expected);
2431 }
2432 {
2433 let expr = col("c1").lt(col("c2")).and(col("c2").gt(col("c1")));
2434 let expected = col("c2").gt(col("c1"));
2435 assert_eq!(simplify(expr), expected);
2436 }
2437 {
2438 let expr = col("c1")
2439 .eq(lit(1))
2440 .and(lit(1).eq(col("c1")))
2441 .and(col("c1").eq(lit(3)));
2442 let expected = col("c1").eq(lit(1)).and(col("c1").eq(lit(3)));
2443 assert_eq!(simplify(expr), expected);
2444 }
2445 {
2446 let expr = col("c1")
2447 .eq(col("c2"))
2448 .and(col("c1").gt(lit(5)))
2449 .and(col("c2").eq(col("c1")));
2450 let expected = col("c2").eq(col("c1")).and(col("c1").gt(lit(5)));
2451 assert_eq!(simplify(expr), expected);
2452 }
2453 {
2454 let expr = col("c1")
2455 .eq(lit(1))
2456 .and(col("c2").gt(lit(3)).or(lit(3).lt(col("c2"))));
2457 let expected = col("c1").eq(lit(1)).and(col("c2").gt(lit(3)));
2458 assert_eq!(simplify(expr), expected);
2459 }
2460 {
2461 let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2462 let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2463 assert_eq!(simplify(expr), expected);
2464 }
2465 {
2466 let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2467 let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2468 assert_eq!(simplify(expr), expected);
2469 }
2470 {
2471 let expr = col("c1").gt(col("c2")).and(col("c1").gt(col("c2")));
2472 let expected = col("c2").lt(col("c1"));
2473 assert_eq!(simplify(expr), expected);
2474 }
2475 }
2476
2477 #[test]
2478 fn test_simplify_eq_not_self() {
2479 let expr_a = col("c2").eq(col("c2"));
2482 let expected_a = col("c2").is_not_null().or(lit_bool_null());
2483
2484 let expr_b = col("c2_non_null").eq(col("c2_non_null"));
2486 let expected_b = lit(true);
2487
2488 assert_eq!(simplify(expr_a), expected_a);
2489 assert_eq!(simplify(expr_b), expected_b);
2490 }
2491
2492 #[test]
2493 fn test_simplify_or_true() {
2494 let expr_a = col("c2").or(lit(true));
2495 let expr_b = lit(true).or(col("c2"));
2496 let expected = lit(true);
2497
2498 assert_eq!(simplify(expr_a), expected);
2499 assert_eq!(simplify(expr_b), expected);
2500 }
2501
2502 #[test]
2503 fn test_simplify_or_false() {
2504 let expr_a = lit(false).or(col("c2"));
2505 let expr_b = col("c2").or(lit(false));
2506 let expected = col("c2");
2507
2508 assert_eq!(simplify(expr_a), expected);
2509 assert_eq!(simplify(expr_b), expected);
2510 }
2511
2512 #[test]
2513 fn test_simplify_or_same() {
2514 let expr = col("c2").or(col("c2"));
2515 let expected = col("c2");
2516
2517 assert_eq!(simplify(expr), expected);
2518 }
2519
2520 #[test]
2521 fn test_simplify_or_not_self() {
2522 let expr_a = col("c2_non_null").or(col("c2_non_null").not());
2525 let expr_b = col("c2_non_null").not().or(col("c2_non_null"));
2526 let expected = lit(true);
2527
2528 assert_eq!(simplify(expr_a), expected);
2529 assert_eq!(simplify(expr_b), expected);
2530 }
2531
2532 #[test]
2533 fn test_simplify_and_false() {
2534 let expr_a = lit(false).and(col("c2"));
2535 let expr_b = col("c2").and(lit(false));
2536 let expected = lit(false);
2537
2538 assert_eq!(simplify(expr_a), expected);
2539 assert_eq!(simplify(expr_b), expected);
2540 }
2541
2542 #[test]
2543 fn test_simplify_and_same() {
2544 let expr = col("c2").and(col("c2"));
2545 let expected = col("c2");
2546
2547 assert_eq!(simplify(expr), expected);
2548 }
2549
2550 #[test]
2551 fn test_simplify_and_true() {
2552 let expr_a = lit(true).and(col("c2"));
2553 let expr_b = col("c2").and(lit(true));
2554 let expected = col("c2");
2555
2556 assert_eq!(simplify(expr_a), expected);
2557 assert_eq!(simplify(expr_b), expected);
2558 }
2559
2560 #[test]
2561 fn test_simplify_and_not_self() {
2562 let expr_a = col("c2_non_null").and(col("c2_non_null").not());
2565 let expr_b = col("c2_non_null").not().and(col("c2_non_null"));
2566 let expected = lit(false);
2567
2568 assert_eq!(simplify(expr_a), expected);
2569 assert_eq!(simplify(expr_b), expected);
2570 }
2571
2572 #[test]
2573 fn test_simplify_eq_and_neq_with_different_literals() {
2574 let expr = col("c2").eq(lit(1)).and(col("c2").not_eq(lit(0)));
2576 let expected = col("c2").eq(lit(1));
2577 assert_eq!(simplify(expr), expected);
2578
2579 let expr = col("c2").not_eq(lit(0)).and(col("c2").eq(lit(1)));
2581 let expected = col("c2").eq(lit(1));
2582 assert_eq!(simplify(expr), expected);
2583
2584 let expr = col("c2").eq(lit(1)).and(col("c2").not_eq(lit(1)));
2587 let result = simplify(expr.clone());
2589 assert_eq!(result, expr);
2591 }
2592
2593 #[test]
2594 fn test_simplify_multiply_by_one() {
2595 let expr_a = col("c2") * lit(1);
2596 let expr_b = lit(1) * col("c2");
2597 let expected = col("c2");
2598
2599 assert_eq!(simplify(expr_a), expected);
2600 assert_eq!(simplify(expr_b), expected);
2601
2602 let expr = col("c2") * lit(ScalarValue::Decimal128(Some(10000000000), 38, 10));
2603 assert_eq!(simplify(expr), expected);
2604
2605 let expr = lit(ScalarValue::Decimal128(Some(10000000000), 31, 10)) * col("c2");
2606 assert_eq!(simplify(expr), expected);
2607 }
2608
2609 #[test]
2610 fn test_simplify_multiply_by_null() {
2611 let null = lit(ScalarValue::Int64(None));
2612 {
2614 let expr = col("c3") * null.clone();
2615 assert_eq!(simplify(expr), null);
2616 }
2617 {
2619 let expr = null.clone() * col("c3");
2620 assert_eq!(simplify(expr), null);
2621 }
2622 }
2623
2624 #[test]
2625 fn test_simplify_multiply_by_zero() {
2626 {
2628 let expr_a = col("c2") * lit(0);
2629 let expr_b = lit(0) * col("c2");
2630
2631 assert_eq!(simplify(expr_a.clone()), expr_a);
2632 assert_eq!(simplify(expr_b.clone()), expr_b);
2633 }
2634 {
2636 let expr = lit(0) * col("c2_non_null");
2637 assert_eq!(simplify(expr), lit(0));
2638 }
2639 {
2641 let expr = col("c2_non_null") * lit(0);
2642 assert_eq!(simplify(expr), lit(0));
2643 }
2644 {
2646 let expr = col("c2_non_null") * lit(ScalarValue::Decimal128(Some(0), 31, 10));
2647 assert_eq!(
2648 simplify(expr),
2649 lit(ScalarValue::Decimal128(Some(0), 31, 10))
2650 );
2651 let expr = binary_expr(
2652 lit(ScalarValue::Decimal128(Some(0), 31, 10)),
2653 Operator::Multiply,
2654 col("c2_non_null"),
2655 );
2656 assert_eq!(
2657 simplify(expr),
2658 lit(ScalarValue::Decimal128(Some(0), 31, 10))
2659 );
2660 }
2661 }
2662
2663 #[test]
2664 fn test_simplify_divide_by_one() {
2665 let expr = binary_expr(col("c2"), Operator::Divide, lit(1));
2666 let expected = col("c2");
2667 assert_eq!(simplify(expr), expected);
2668 let expr = col("c2") / lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2669 assert_eq!(simplify(expr), expected);
2670 }
2671
2672 #[test]
2673 fn test_simplify_divide_null() {
2674 let null = lit(ScalarValue::Int64(None));
2676 {
2677 let expr = col("c3") / null.clone();
2678 assert_eq!(simplify(expr), null);
2679 }
2680 {
2682 let expr = null.clone() / col("c3");
2683 assert_eq!(simplify(expr), null);
2684 }
2685 }
2686
2687 #[test]
2688 fn test_simplify_divide_by_same() {
2689 let expr = col("c2") / col("c2");
2690 let expected = expr.clone();
2692
2693 assert_eq!(simplify(expr), expected);
2694 }
2695
2696 #[test]
2697 fn test_simplify_modulo_by_null() {
2698 let null = lit(ScalarValue::Int64(None));
2699 {
2701 let expr = col("c3") % null.clone();
2702 assert_eq!(simplify(expr), null);
2703 }
2704 {
2706 let expr = null.clone() % col("c3");
2707 assert_eq!(simplify(expr), null);
2708 }
2709 }
2710
2711 #[test]
2712 fn test_simplify_modulo_by_one() {
2713 let expr = col("c2") % lit(1);
2714 let expected = expr.clone();
2716
2717 assert_eq!(simplify(expr), expected);
2718 }
2719
2720 #[test]
2721 fn test_simplify_divide_zero_by_zero() {
2722 let expr = lit(0) / lit(0);
2725 let expected = expr.clone();
2726
2727 assert_eq!(simplify(expr), expected);
2728 }
2729
2730 #[test]
2731 fn test_simplify_divide_by_zero() {
2732 let expr = col("c2_non_null") / lit(0);
2735 let expected = expr.clone();
2736
2737 assert_eq!(simplify(expr), expected);
2738 }
2739
2740 #[test]
2741 fn test_simplify_modulo_by_one_non_null() {
2742 let expr = col("c3_non_null") % lit(1);
2743 let expected = lit(0_i64);
2744 assert_eq!(simplify(expr), expected);
2745 let expr =
2746 col("c3_non_null") % lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2747 assert_eq!(simplify(expr), expected);
2748 }
2749
2750 #[test]
2751 fn test_simplify_bitwise_xor_by_null() {
2752 let null = lit(ScalarValue::Int64(None));
2753 {
2755 let expr = col("c3") ^ null.clone();
2756 assert_eq!(simplify(expr), null);
2757 }
2758 {
2760 let expr = null.clone() ^ col("c3");
2761 assert_eq!(simplify(expr), null);
2762 }
2763 }
2764
2765 #[test]
2766 fn test_simplify_bitwise_shift_right_by_null() {
2767 let null = lit(ScalarValue::Int64(None));
2768 {
2770 let expr = col("c3") >> null.clone();
2771 assert_eq!(simplify(expr), null);
2772 }
2773 {
2775 let expr = null.clone() >> col("c3");
2776 assert_eq!(simplify(expr), null);
2777 }
2778 }
2779
2780 #[test]
2781 fn test_simplify_bitwise_shift_left_by_null() {
2782 let null = lit(ScalarValue::Int64(None));
2783 {
2785 let expr = col("c3") << null.clone();
2786 assert_eq!(simplify(expr), null);
2787 }
2788 {
2790 let expr = null.clone() << col("c3");
2791 assert_eq!(simplify(expr), null);
2792 }
2793 }
2794
2795 #[test]
2796 fn test_simplify_bitwise_and_by_zero() {
2797 {
2799 let expr = col("c2_non_null") & lit(0);
2800 assert_eq!(simplify(expr), lit(0));
2801 }
2802 {
2804 let expr = lit(0) & col("c2_non_null");
2805 assert_eq!(simplify(expr), lit(0));
2806 }
2807 }
2808
2809 #[test]
2810 fn test_simplify_bitwise_or_by_zero() {
2811 {
2813 let expr = col("c2_non_null") | lit(0);
2814 assert_eq!(simplify(expr), col("c2_non_null"));
2815 }
2816 {
2818 let expr = lit(0) | col("c2_non_null");
2819 assert_eq!(simplify(expr), col("c2_non_null"));
2820 }
2821 }
2822
2823 #[test]
2824 fn test_simplify_bitwise_xor_by_zero() {
2825 {
2827 let expr = col("c2_non_null") ^ lit(0);
2828 assert_eq!(simplify(expr), col("c2_non_null"));
2829 }
2830 {
2832 let expr = lit(0) ^ col("c2_non_null");
2833 assert_eq!(simplify(expr), col("c2_non_null"));
2834 }
2835 }
2836
2837 #[test]
2838 fn test_simplify_bitwise_bitwise_shift_right_by_zero() {
2839 {
2841 let expr = col("c2_non_null") >> lit(0);
2842 assert_eq!(simplify(expr), col("c2_non_null"));
2843 }
2844 }
2845
2846 #[test]
2847 fn test_simplify_bitwise_bitwise_shift_left_by_zero() {
2848 {
2850 let expr = col("c2_non_null") << lit(0);
2851 assert_eq!(simplify(expr), col("c2_non_null"));
2852 }
2853 }
2854
2855 #[test]
2856 fn test_simplify_bitwise_and_by_null() {
2857 let null = Expr::Literal(ScalarValue::Int64(None), None);
2858 {
2860 let expr = col("c3") & null.clone();
2861 assert_eq!(simplify(expr), null);
2862 }
2863 {
2865 let expr = null.clone() & col("c3");
2866 assert_eq!(simplify(expr), null);
2867 }
2868 }
2869
2870 #[test]
2871 fn test_simplify_composed_bitwise_and() {
2872 let expr = bitwise_and(
2875 bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2876 col("c2").gt(lit(5)),
2877 );
2878 let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2879
2880 assert_eq!(simplify(expr), expected);
2881
2882 let expr = bitwise_and(
2885 col("c2").gt(lit(5)),
2886 bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2887 );
2888 let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2889 assert_eq!(simplify(expr), expected);
2890 }
2891
2892 #[test]
2893 fn test_simplify_composed_bitwise_or() {
2894 let expr = bitwise_or(
2897 bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2898 col("c2").gt(lit(5)),
2899 );
2900 let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2901
2902 assert_eq!(simplify(expr), expected);
2903
2904 let expr = bitwise_or(
2907 col("c2").gt(lit(5)),
2908 bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2909 );
2910 let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2911
2912 assert_eq!(simplify(expr), expected);
2913 }
2914
2915 #[test]
2916 fn test_simplify_composed_bitwise_xor() {
2917 let expr = bitwise_xor(
2921 col("c2"),
2922 bitwise_xor(
2923 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2924 bitwise_and(col("c1"), col("c2")),
2925 ),
2926 );
2927
2928 let expected = bitwise_xor(
2929 bitwise_or(col("c2"), col("c1")),
2930 bitwise_and(col("c1"), col("c2")),
2931 );
2932
2933 assert_eq!(simplify(expr), expected);
2934
2935 let expr = bitwise_xor(
2939 col("c2"),
2940 bitwise_xor(
2941 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2942 bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2943 ),
2944 );
2945
2946 let expected = bitwise_xor(
2947 col("c2"),
2948 bitwise_xor(
2949 bitwise_or(col("c2"), col("c1")),
2950 bitwise_and(col("c1"), col("c2")),
2951 ),
2952 );
2953
2954 assert_eq!(simplify(expr), expected);
2955
2956 let expr = bitwise_xor(
2960 bitwise_xor(
2961 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2962 bitwise_and(col("c1"), col("c2")),
2963 ),
2964 col("c2"),
2965 );
2966
2967 let expected = bitwise_xor(
2968 bitwise_or(col("c2"), col("c1")),
2969 bitwise_and(col("c1"), col("c2")),
2970 );
2971
2972 assert_eq!(simplify(expr), expected);
2973
2974 let expr = bitwise_xor(
2978 bitwise_xor(
2979 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2980 bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2981 ),
2982 col("c2"),
2983 );
2984
2985 let expected = bitwise_xor(
2986 bitwise_xor(
2987 bitwise_or(col("c2"), col("c1")),
2988 bitwise_and(col("c1"), col("c2")),
2989 ),
2990 col("c2"),
2991 );
2992
2993 assert_eq!(simplify(expr), expected);
2994 }
2995
2996 #[test]
2997 fn test_simplify_negated_bitwise_and() {
2998 let expr = (-col("c4_non_null")) & col("c4_non_null");
3000 let expected = lit(0u32);
3001
3002 assert_eq!(simplify(expr), expected);
3003 let expr = col("c4_non_null") & (-col("c4_non_null"));
3005 let expected = lit(0u32);
3006
3007 assert_eq!(simplify(expr), expected);
3008
3009 let expr = (-col("c3_non_null")) & col("c3_non_null");
3011 let expected = lit(0i64);
3012
3013 assert_eq!(simplify(expr), expected);
3014 let expr = col("c3_non_null") & (-col("c3_non_null"));
3016 let expected = lit(0i64);
3017
3018 assert_eq!(simplify(expr), expected);
3019 }
3020
3021 #[test]
3022 fn test_simplify_negated_bitwise_or() {
3023 let expr = (-col("c4_non_null")) | col("c4_non_null");
3025 let expected = lit(-1i32);
3026
3027 assert_eq!(simplify(expr), expected);
3028
3029 let expr = col("c4_non_null") | (-col("c4_non_null"));
3031 let expected = lit(-1i32);
3032
3033 assert_eq!(simplify(expr), expected);
3034
3035 let expr = (-col("c3_non_null")) | col("c3_non_null");
3037 let expected = lit(-1i64);
3038
3039 assert_eq!(simplify(expr), expected);
3040
3041 let expr = col("c3_non_null") | (-col("c3_non_null"));
3043 let expected = lit(-1i64);
3044
3045 assert_eq!(simplify(expr), expected);
3046 }
3047
3048 #[test]
3049 fn test_simplify_negated_bitwise_xor() {
3050 let expr = (-col("c4_non_null")) ^ col("c4_non_null");
3052 let expected = lit(-1i32);
3053
3054 assert_eq!(simplify(expr), expected);
3055
3056 let expr = col("c4_non_null") ^ (-col("c4_non_null"));
3058 let expected = lit(-1i32);
3059
3060 assert_eq!(simplify(expr), expected);
3061
3062 let expr = (-col("c3_non_null")) ^ col("c3_non_null");
3064 let expected = lit(-1i64);
3065
3066 assert_eq!(simplify(expr), expected);
3067
3068 let expr = col("c3_non_null") ^ (-col("c3_non_null"));
3070 let expected = lit(-1i64);
3071
3072 assert_eq!(simplify(expr), expected);
3073 }
3074
3075 #[test]
3076 fn test_simplify_bitwise_and_or() {
3077 let expr = bitwise_and(
3079 col("c2_non_null").lt(lit(3)),
3080 bitwise_or(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
3081 );
3082 let expected = col("c2_non_null").lt(lit(3));
3083
3084 assert_eq!(simplify(expr), expected);
3085 }
3086
3087 #[test]
3088 fn test_simplify_bitwise_or_and() {
3089 let expr = bitwise_or(
3091 col("c2_non_null").lt(lit(3)),
3092 bitwise_and(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
3093 );
3094 let expected = col("c2_non_null").lt(lit(3));
3095
3096 assert_eq!(simplify(expr), expected);
3097 }
3098
3099 #[test]
3100 fn test_simplify_simple_bitwise_and() {
3101 let expr = (col("c2").gt(lit(5))).bitand(col("c2").gt(lit(5)));
3103 let expected = col("c2").gt(lit(5));
3104
3105 assert_eq!(simplify(expr), expected);
3106 }
3107
3108 #[test]
3109 fn test_simplify_simple_bitwise_or() {
3110 let expr = (col("c2").gt(lit(5))).bitor(col("c2").gt(lit(5)));
3112 let expected = col("c2").gt(lit(5));
3113
3114 assert_eq!(simplify(expr), expected);
3115 }
3116
3117 #[test]
3118 fn test_simplify_simple_bitwise_xor() {
3119 let expr = (col("c4")).bitxor(col("c4"));
3121 let expected = lit(0u32);
3122
3123 assert_eq!(simplify(expr), expected);
3124
3125 let expr = col("c3").bitxor(col("c3"));
3127 let expected = lit(0i64);
3128
3129 assert_eq!(simplify(expr), expected);
3130 }
3131
3132 #[test]
3133 fn test_simplify_modulo_by_zero_non_null() {
3134 let expr = col("c2_non_null") % lit(0);
3137 let expected = expr.clone();
3138
3139 assert_eq!(simplify(expr), expected);
3140 }
3141
3142 #[test]
3143 fn test_simplify_simple_and() {
3144 let expr = (col("c2").gt(lit(5))).and(col("c2").gt(lit(5)));
3146 let expected = col("c2").gt(lit(5));
3147
3148 assert_eq!(simplify(expr), expected);
3149 }
3150
3151 #[test]
3152 fn test_simplify_composed_and() {
3153 let expr = and(
3155 and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
3156 col("c2").gt(lit(5)),
3157 );
3158 let expected = and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
3159
3160 assert_eq!(simplify(expr), expected);
3161 }
3162
3163 #[test]
3164 fn test_simplify_negated_and() {
3165 let expr = and(col("c2").gt(lit(5)), Expr::not(col("c2").gt(lit(5))));
3167 let expected = col("c2").gt(lit(5)).and(col("c2").lt_eq(lit(5)));
3168
3169 assert_eq!(simplify(expr), expected);
3170 }
3171
3172 #[test]
3173 fn test_simplify_or_and() {
3174 let l = col("c2").gt(lit(5));
3175 let r = and(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3176
3177 let expr = or(l.clone(), r.clone());
3179
3180 let expected = l.clone();
3181 assert_eq!(simplify(expr), expected);
3182
3183 let expr = or(r, l);
3185 assert_eq!(simplify(expr), expected);
3186 }
3187
3188 #[test]
3189 fn test_simplify_or_and_non_null() {
3190 let l = col("c2_non_null").gt(lit(5));
3191 let r = and(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3192
3193 let expr = or(l.clone(), r.clone());
3195
3196 let expected = col("c2_non_null").gt(lit(5));
3198
3199 assert_eq!(simplify(expr), expected);
3200
3201 let expr = or(l, r);
3203
3204 assert_eq!(simplify(expr), expected);
3205 }
3206
3207 #[test]
3208 fn test_simplify_and_or() {
3209 let l = col("c2").gt(lit(5));
3210 let r = or(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3211
3212 let expr = and(l.clone(), r.clone());
3214
3215 let expected = l.clone();
3216 assert_eq!(simplify(expr), expected);
3217
3218 let expr = and(r, l);
3220 assert_eq!(simplify(expr), expected);
3221 }
3222
3223 #[test]
3224 fn test_simplify_and_or_non_null() {
3225 let l = col("c2_non_null").gt(lit(5));
3226 let r = or(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3227
3228 let expr = and(l.clone(), r.clone());
3230
3231 let expected = col("c2_non_null").gt(lit(5));
3233
3234 assert_eq!(simplify(expr), expected);
3235
3236 let expr = and(l, r);
3238
3239 assert_eq!(simplify(expr), expected);
3240 }
3241
3242 #[test]
3243 fn test_simplify_by_de_morgan_laws() {
3244 let expr = and(col("c3"), col("c4")).not();
3247 let expected = or(col("c3").not(), col("c4").not());
3248 assert_eq!(simplify(expr), expected);
3249 let expr = or(col("c3"), col("c4")).not();
3251 let expected = and(col("c3").not(), col("c4").not());
3252 assert_eq!(simplify(expr), expected);
3253 let expr = col("c3").not().not();
3255 let expected = col("c3");
3256 assert_eq!(simplify(expr), expected);
3257
3258 let expr = -bitwise_and(col("c3"), col("c4"));
3261 let expected = bitwise_or(-col("c3"), -col("c4"));
3262 assert_eq!(simplify(expr), expected);
3263 let expr = -bitwise_or(col("c3"), col("c4"));
3265 let expected = bitwise_and(-col("c3"), -col("c4"));
3266 assert_eq!(simplify(expr), expected);
3267 let expr = -(-col("c3"));
3269 let expected = col("c3");
3270 assert_eq!(simplify(expr), expected);
3271 }
3272
3273 #[test]
3274 fn test_simplify_null_and_false() {
3275 let expr = and(lit_bool_null(), lit(false));
3276 let expr_eq = lit(false);
3277
3278 assert_eq!(simplify(expr), expr_eq);
3279 }
3280
3281 #[test]
3282 fn test_simplify_divide_null_by_null() {
3283 let null = lit(ScalarValue::Int32(None));
3284 let expr_plus = null.clone() / null.clone();
3285 let expr_eq = null;
3286
3287 assert_eq!(simplify(expr_plus), expr_eq);
3288 }
3289
3290 #[test]
3291 fn test_simplify_simplify_arithmetic_expr() {
3292 let expr_plus = lit(1) + lit(1);
3293
3294 assert_eq!(simplify(expr_plus), lit(2));
3295 }
3296
3297 #[test]
3298 fn test_simplify_simplify_eq_expr() {
3299 let expr_eq = binary_expr(lit(1), Operator::Eq, lit(1));
3300
3301 assert_eq!(simplify(expr_eq), lit(true));
3302 }
3303
3304 #[test]
3305 fn test_simplify_regex() {
3306 assert_contains!(
3308 try_simplify(regex_match(col("c1"), lit("foo{")))
3309 .unwrap_err()
3310 .to_string(),
3311 "regex parse error"
3312 );
3313
3314 assert_no_change(regex_match(col("c1"), lit("foo.*")));
3316 assert_no_change(regex_match(col("c1"), lit("(foo)")));
3317 assert_no_change(regex_match(col("c1"), lit("%")));
3318 assert_no_change(regex_match(col("c1"), lit("_")));
3319 assert_no_change(regex_match(col("c1"), lit("f%o")));
3320 assert_no_change(regex_match(col("c1"), lit("^f%o")));
3321 assert_no_change(regex_match(col("c1"), lit("f_o")));
3322
3323 assert_change(
3325 regex_match(col("c1"), lit("")),
3326 if_not_null(col("c1"), true),
3327 );
3328 assert_change(
3329 regex_not_match(col("c1"), lit("")),
3330 if_not_null(col("c1"), false),
3331 );
3332 assert_change(
3333 regex_imatch(col("c1"), lit("")),
3334 if_not_null(col("c1"), true),
3335 );
3336 assert_change(
3337 regex_not_imatch(col("c1"), lit("")),
3338 if_not_null(col("c1"), false),
3339 );
3340
3341 assert_change(regex_match(col("c1"), lit("x")), col("c1").like(lit("%x%")));
3343
3344 assert_change(
3346 regex_match(col("c1"), lit("foo")),
3347 col("c1").like(lit("%foo%")),
3348 );
3349
3350 assert_change(regex_match(col("c1"), lit("^$")), col("c1").eq(lit("")));
3352 assert_change(
3353 regex_not_match(col("c1"), lit("^$")),
3354 col("c1").not_eq(lit("")),
3355 );
3356 assert_change(
3357 regex_match(col("c1"), lit("^foo$")),
3358 col("c1").eq(lit("foo")),
3359 );
3360 assert_change(
3361 regex_not_match(col("c1"), lit("^foo$")),
3362 col("c1").not_eq(lit("foo")),
3363 );
3364
3365 assert_change(
3367 regex_match(col("c1"), lit("^(foo|bar)$")),
3368 col("c1").eq(lit("foo")).or(col("c1").eq(lit("bar"))),
3369 );
3370 assert_change(
3371 regex_not_match(col("c1"), lit("^(foo|bar)$")),
3372 col("c1")
3373 .not_eq(lit("foo"))
3374 .and(col("c1").not_eq(lit("bar"))),
3375 );
3376 assert_change(
3377 regex_match(col("c1"), lit("^(foo)$")),
3378 col("c1").eq(lit("foo")),
3379 );
3380 assert_change(
3381 regex_match(col("c1"), lit("^(foo|bar|baz)$")),
3382 ((col("c1").eq(lit("foo"))).or(col("c1").eq(lit("bar"))))
3383 .or(col("c1").eq(lit("baz"))),
3384 );
3385 assert_change(
3386 regex_match(col("c1"), lit("^(foo|bar|baz|qux)$")),
3387 col("c1")
3388 .in_list(vec![lit("foo"), lit("bar"), lit("baz"), lit("qux")], false),
3389 );
3390 assert_change(
3391 regex_match(col("c1"), lit("^(fo_o)$")),
3392 col("c1").eq(lit("fo_o")),
3393 );
3394 assert_change(
3395 regex_match(col("c1"), lit("^(fo_o)$")),
3396 col("c1").eq(lit("fo_o")),
3397 );
3398 assert_change(
3399 regex_match(col("c1"), lit("^(fo_o|ba_r)$")),
3400 col("c1").eq(lit("fo_o")).or(col("c1").eq(lit("ba_r"))),
3401 );
3402 assert_change(
3403 regex_not_match(col("c1"), lit("^(fo_o|ba_r)$")),
3404 col("c1")
3405 .not_eq(lit("fo_o"))
3406 .and(col("c1").not_eq(lit("ba_r"))),
3407 );
3408 assert_change(
3409 regex_match(col("c1"), lit("^(fo_o|ba_r|ba_z)$")),
3410 ((col("c1").eq(lit("fo_o"))).or(col("c1").eq(lit("ba_r"))))
3411 .or(col("c1").eq(lit("ba_z"))),
3412 );
3413 assert_change(
3414 regex_match(col("c1"), lit("^(fo_o|ba_r|baz|qu_x)$")),
3415 col("c1").in_list(
3416 vec![lit("fo_o"), lit("ba_r"), lit("baz"), lit("qu_x")],
3417 false,
3418 ),
3419 );
3420
3421 assert_no_change(regex_match(col("c1"), lit("(foo|bar)")));
3423 assert_no_change(regex_match(col("c1"), lit("(foo|bar)*")));
3424 assert_no_change(regex_match(col("c1"), lit("(fo_o|b_ar)")));
3425 assert_no_change(regex_match(col("c1"), lit("(foo|ba_r)*")));
3426 assert_no_change(regex_match(col("c1"), lit("(fo_o|ba_r)*")));
3427 assert_no_change(regex_match(col("c1"), lit("^(foo|bar)*")));
3428 assert_no_change(regex_match(col("c1"), lit("^(foo)(bar)$")));
3429 assert_no_change(regex_match(col("c1"), lit("^")));
3430 assert_no_change(regex_match(col("c1"), lit("$")));
3431 assert_no_change(regex_match(col("c1"), lit("$^")));
3432 assert_no_change(regex_match(col("c1"), lit("$foo^")));
3433
3434 assert_change(
3436 regex_match(col("c1"), lit("^foo")),
3437 col("c1").like(lit("foo%")),
3438 );
3439 assert_change(
3440 regex_match(col("c1"), lit("foo$")),
3441 col("c1").like(lit("%foo")),
3442 );
3443 assert_change(
3444 regex_match(col("c1"), lit("^foo|bar$")),
3445 col("c1").like(lit("foo%")).or(col("c1").like(lit("%bar"))),
3446 );
3447
3448 assert_change(
3450 regex_match(col("c1"), lit("foo|bar|baz")),
3451 col("c1")
3452 .like(lit("%foo%"))
3453 .or(col("c1").like(lit("%bar%")))
3454 .or(col("c1").like(lit("%baz%"))),
3455 );
3456 assert_change(
3457 regex_match(col("c1"), lit("foo|x|baz")),
3458 col("c1")
3459 .like(lit("%foo%"))
3460 .or(col("c1").like(lit("%x%")))
3461 .or(col("c1").like(lit("%baz%"))),
3462 );
3463 assert_change(
3464 regex_not_match(col("c1"), lit("foo|bar|baz")),
3465 col("c1")
3466 .not_like(lit("%foo%"))
3467 .and(col("c1").not_like(lit("%bar%")))
3468 .and(col("c1").not_like(lit("%baz%"))),
3469 );
3470 assert_change(
3472 regex_match(col("c1"), lit("foo|^x$|baz")),
3473 col("c1")
3474 .like(lit("%foo%"))
3475 .or(col("c1").eq(lit("x")))
3476 .or(col("c1").like(lit("%baz%"))),
3477 );
3478 assert_change(
3479 regex_not_match(col("c1"), lit("foo|^bar$|baz")),
3480 col("c1")
3481 .not_like(lit("%foo%"))
3482 .and(col("c1").not_eq(lit("bar")))
3483 .and(col("c1").not_like(lit("%baz%"))),
3484 );
3485 assert_no_change(regex_match(col("c1"), lit("foo|bar|baz|blarg|bozo|etc")));
3487 }
3488
3489 #[track_caller]
3490 fn assert_no_change(expr: Expr) {
3491 let optimized = simplify(expr.clone());
3492 assert_eq!(expr, optimized);
3493 }
3494
3495 #[track_caller]
3496 fn assert_change(expr: Expr, expected: Expr) {
3497 let optimized = simplify(expr);
3498 assert_eq!(optimized, expected);
3499 }
3500
3501 fn regex_match(left: Expr, right: Expr) -> Expr {
3502 Expr::BinaryExpr(BinaryExpr {
3503 left: Box::new(left),
3504 op: Operator::RegexMatch,
3505 right: Box::new(right),
3506 })
3507 }
3508
3509 fn regex_not_match(left: Expr, right: Expr) -> Expr {
3510 Expr::BinaryExpr(BinaryExpr {
3511 left: Box::new(left),
3512 op: Operator::RegexNotMatch,
3513 right: Box::new(right),
3514 })
3515 }
3516
3517 fn regex_imatch(left: Expr, right: Expr) -> Expr {
3518 Expr::BinaryExpr(BinaryExpr {
3519 left: Box::new(left),
3520 op: Operator::RegexIMatch,
3521 right: Box::new(right),
3522 })
3523 }
3524
3525 fn regex_not_imatch(left: Expr, right: Expr) -> Expr {
3526 Expr::BinaryExpr(BinaryExpr {
3527 left: Box::new(left),
3528 op: Operator::RegexNotIMatch,
3529 right: Box::new(right),
3530 })
3531 }
3532
3533 fn try_simplify(expr: Expr) -> Result<Expr> {
3538 let schema = expr_test_schema();
3539 let simplifier =
3540 ExprSimplifier::new(SimplifyContext::default().with_schema(schema));
3541 simplifier.simplify(expr)
3542 }
3543
3544 fn coerce(expr: Expr) -> Expr {
3545 let schema = expr_test_schema();
3546 let simplifier = ExprSimplifier::new(
3547 SimplifyContext::default().with_schema(Arc::clone(&schema)),
3548 );
3549 simplifier.coerce(expr, schema.as_ref()).unwrap()
3550 }
3551
3552 fn simplify(expr: Expr) -> Expr {
3553 try_simplify(expr).unwrap()
3554 }
3555
3556 fn try_simplify_with_cycle_count(expr: Expr) -> Result<(Expr, u32)> {
3557 let schema = expr_test_schema();
3558 let simplifier =
3559 ExprSimplifier::new(SimplifyContext::default().with_schema(schema));
3560 let (expr, count) = simplifier.simplify_with_cycle_count_transformed(expr)?;
3561 Ok((expr.data, count))
3562 }
3563
3564 fn simplify_with_cycle_count(expr: Expr) -> (Expr, u32) {
3565 try_simplify_with_cycle_count(expr).unwrap()
3566 }
3567
3568 fn simplify_with_guarantee(
3569 expr: Expr,
3570 guarantees: Vec<(Expr, NullableInterval)>,
3571 ) -> Expr {
3572 let schema = expr_test_schema();
3573 let simplifier =
3574 ExprSimplifier::new(SimplifyContext::default().with_schema(schema))
3575 .with_guarantees(guarantees);
3576 simplifier.simplify(expr).unwrap()
3577 }
3578
3579 fn expr_test_schema() -> DFSchemaRef {
3580 static EXPR_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
3581 Arc::new(
3582 DFSchema::from_unqualified_fields(
3583 vec![
3584 Field::new("c1", DataType::Utf8, true),
3585 Field::new("c2", DataType::Boolean, true),
3586 Field::new("c3", DataType::Int64, true),
3587 Field::new("c4", DataType::UInt32, true),
3588 Field::new("c1_non_null", DataType::Utf8, false),
3589 Field::new("c2_non_null", DataType::Boolean, false),
3590 Field::new("c3_non_null", DataType::Int64, false),
3591 Field::new("c4_non_null", DataType::UInt32, false),
3592 Field::new("c5", DataType::FixedSizeBinary(3), true),
3593 ]
3594 .into(),
3595 HashMap::new(),
3596 )
3597 .unwrap(),
3598 )
3599 });
3600 Arc::clone(&EXPR_TEST_SCHEMA)
3601 }
3602
3603 #[test]
3604 fn simplify_expr_null_comparison() {
3605 assert_eq!(
3607 simplify(lit(true).eq(lit(ScalarValue::Boolean(None)))),
3608 lit(ScalarValue::Boolean(None)),
3609 );
3610
3611 assert_eq!(
3613 simplify(
3614 lit(ScalarValue::Boolean(None)).not_eq(lit(ScalarValue::Boolean(None)))
3615 ),
3616 lit(ScalarValue::Boolean(None)),
3617 );
3618
3619 assert_eq!(
3621 simplify(col("c2").not_eq(lit(ScalarValue::Boolean(None)))),
3622 lit(ScalarValue::Boolean(None)),
3623 );
3624
3625 assert_eq!(
3627 simplify(lit(ScalarValue::Boolean(None)).eq(col("c2"))),
3628 lit(ScalarValue::Boolean(None)),
3629 );
3630 }
3631
3632 #[test]
3633 fn simplify_expr_is_not_null() {
3634 assert_eq!(
3635 simplify(Expr::IsNotNull(Box::new(col("c1")))),
3636 Expr::IsNotNull(Box::new(col("c1")))
3637 );
3638
3639 assert_eq!(
3641 simplify(Expr::IsNotNull(Box::new(col("c1_non_null")))),
3642 lit(true)
3643 );
3644 }
3645
3646 #[test]
3647 fn simplify_expr_is_null() {
3648 assert_eq!(
3649 simplify(Expr::IsNull(Box::new(col("c1")))),
3650 Expr::IsNull(Box::new(col("c1")))
3651 );
3652
3653 assert_eq!(
3655 simplify(Expr::IsNull(Box::new(col("c1_non_null")))),
3656 lit(false)
3657 );
3658 }
3659
3660 #[test]
3661 fn simplify_expr_is_unknown() {
3662 assert_eq!(simplify(col("c2").is_unknown()), col("c2").is_unknown(),);
3663
3664 assert_eq!(simplify(col("c2_non_null").is_unknown()), lit(false));
3666 }
3667
3668 #[test]
3669 fn simplify_expr_is_not_known() {
3670 assert_eq!(
3671 simplify(col("c2").is_not_unknown()),
3672 col("c2").is_not_unknown()
3673 );
3674
3675 assert_eq!(simplify(col("c2_non_null").is_not_unknown()), lit(true));
3677 }
3678
3679 #[test]
3680 fn simplify_expr_eq() {
3681 let schema = expr_test_schema();
3682 assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3683
3684 assert_eq!(simplify(lit(true).eq(lit(true))), lit(true));
3686
3687 assert_eq!(simplify(lit(true).eq(lit(false))), lit(false),);
3689
3690 assert_eq!(simplify(col("c2").eq(lit(true))), col("c2"));
3692
3693 assert_eq!(simplify(col("c2").eq(lit(false))), col("c2").not(),);
3695 }
3696
3697 #[test]
3698 fn simplify_expr_eq_skip_nonboolean_type() {
3699 let schema = expr_test_schema();
3700
3701 assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3707
3708 assert_eq!(simplify(col("c1").eq(lit("foo"))), col("c1").eq(lit("foo")),);
3710 }
3711
3712 #[test]
3713 fn simplify_expr_not_eq() {
3714 let schema = expr_test_schema();
3715
3716 assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3717
3718 assert_eq!(simplify(col("c2").not_eq(lit(true))), col("c2").not(),);
3720
3721 assert_eq!(simplify(col("c2").not_eq(lit(false))), col("c2"),);
3723
3724 assert_eq!(simplify(lit(true).not_eq(lit(true))), lit(false),);
3726
3727 assert_eq!(simplify(lit(true).not_eq(lit(false))), lit(true),);
3728 }
3729
3730 #[test]
3731 fn simplify_expr_not_eq_skip_nonboolean_type() {
3732 let schema = expr_test_schema();
3733
3734 assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3738
3739 assert_eq!(
3740 simplify(col("c1").not_eq(lit("foo"))),
3741 col("c1").not_eq(lit("foo")),
3742 );
3743 }
3744
3745 #[test]
3746 fn simplify_literal_case_equality() {
3747 let simple_case = Expr::Case(Case::new(
3749 None,
3750 vec![(
3751 Box::new(col("c2_non_null").not_eq(lit(false))),
3752 Box::new(lit("ok")),
3753 )],
3754 Some(Box::new(lit("not_ok"))),
3755 ));
3756
3757 assert_eq!(
3765 simplify(binary_expr(simple_case.clone(), Operator::Eq, lit("ok"),)),
3766 col("c2_non_null"),
3767 );
3768
3769 assert_eq!(
3777 simplify(binary_expr(simple_case, Operator::NotEq, lit("ok"),)),
3778 not(col("c2_non_null")),
3779 );
3780
3781 let complex_case = Expr::Case(Case::new(
3782 None,
3783 vec![
3784 (
3785 Box::new(col("c1").eq(lit("inboxed"))),
3786 Box::new(lit("pending")),
3787 ),
3788 (
3789 Box::new(col("c1").eq(lit("scheduled"))),
3790 Box::new(lit("pending")),
3791 ),
3792 (
3793 Box::new(col("c1").eq(lit("completed"))),
3794 Box::new(lit("completed")),
3795 ),
3796 (
3797 Box::new(col("c1").eq(lit("paused"))),
3798 Box::new(lit("paused")),
3799 ),
3800 (Box::new(col("c2")), Box::new(lit("running"))),
3801 (
3802 Box::new(col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0)))),
3803 Box::new(lit("backing-off")),
3804 ),
3805 ],
3806 Some(Box::new(lit("ready"))),
3807 ));
3808
3809 assert_eq!(
3810 simplify(binary_expr(
3811 complex_case.clone(),
3812 Operator::Eq,
3813 lit("completed"),
3814 )),
3815 not_distinct_from(col("c1").eq(lit("completed")), lit(true)).and(
3816 distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3817 .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3818 )
3819 );
3820
3821 assert_eq!(
3822 simplify(binary_expr(
3823 complex_case.clone(),
3824 Operator::NotEq,
3825 lit("completed"),
3826 )),
3827 distinct_from(col("c1").eq(lit("completed")), lit(true))
3828 .or(not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3829 .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true))))
3830 );
3831
3832 assert_eq!(
3833 simplify(binary_expr(
3834 complex_case.clone(),
3835 Operator::Eq,
3836 lit("running"),
3837 )),
3838 not_distinct_from(col("c2"), lit(true)).and(
3839 distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3840 .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3841 .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3842 .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3843 )
3844 );
3845
3846 assert_eq!(
3847 simplify(binary_expr(
3848 complex_case.clone(),
3849 Operator::Eq,
3850 lit("ready"),
3851 )),
3852 distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3853 .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3854 .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3855 .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3856 .and(distinct_from(col("c2"), lit(true)))
3857 .and(distinct_from(
3858 col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3859 lit(true)
3860 ))
3861 );
3862
3863 assert_eq!(
3864 simplify(binary_expr(
3865 complex_case.clone(),
3866 Operator::NotEq,
3867 lit("ready"),
3868 )),
3869 not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3870 .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3871 .or(not_distinct_from(col("c1").eq(lit("completed")), lit(true)))
3872 .or(not_distinct_from(col("c1").eq(lit("paused")), lit(true)))
3873 .or(not_distinct_from(col("c2"), lit(true)))
3874 .or(not_distinct_from(
3875 col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3876 lit(true)
3877 ))
3878 );
3879 }
3880
3881 #[test]
3882 fn simplify_expr_case_when_then_else() {
3883 assert_eq!(
3889 simplify(Expr::Case(Case::new(
3890 None,
3891 vec![(
3892 Box::new(col("c2_non_null").not_eq(lit(false))),
3893 Box::new(lit("ok").eq(lit("not_ok"))),
3894 )],
3895 Some(Box::new(col("c2_non_null").eq(lit(true)))),
3896 ))),
3897 lit(false) );
3899
3900 assert_eq!(
3909 simplify(simplify(Expr::Case(Case::new(
3910 None,
3911 vec![(
3912 Box::new(col("c2_non_null").not_eq(lit(false))),
3913 Box::new(lit("ok").eq(lit("ok"))),
3914 )],
3915 Some(Box::new(col("c2_non_null").eq(lit(true)))),
3916 )))),
3917 col("c2_non_null")
3918 );
3919
3920 assert_eq!(
3927 simplify(simplify(Expr::Case(Case::new(
3928 None,
3929 vec![(Box::new(col("c2").is_null()), Box::new(lit(true)),)],
3930 Some(Box::new(col("c2"))),
3931 )))),
3932 col("c2")
3933 .is_null()
3934 .or(col("c2").is_not_null().and(col("c2")))
3935 );
3936
3937 assert_eq!(
3945 simplify(simplify(Expr::Case(Case::new(
3946 None,
3947 vec![
3948 (Box::new(col("c1_non_null")), Box::new(lit(true)),),
3949 (Box::new(col("c2_non_null")), Box::new(lit(false)),),
3950 ],
3951 Some(Box::new(lit(true))),
3952 )))),
3953 col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
3954 );
3955
3956 assert_eq!(
3964 simplify(simplify(Expr::Case(Case::new(
3965 None,
3966 vec![
3967 (Box::new(col("c1_non_null")), Box::new(lit(true)),),
3968 (Box::new(col("c2_non_null")), Box::new(lit(false)),),
3969 ],
3970 Some(Box::new(lit(true))),
3971 )))),
3972 col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
3973 );
3974
3975 assert_eq!(
3977 simplify(simplify(Expr::Case(Case::new(
3978 None,
3979 vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
3980 None,
3981 )))),
3982 not_distinct_from(col("c3").gt(lit(0_i64)), lit(true)).or(distinct_from(
3983 col("c3").gt(lit(0_i64)),
3984 lit(true)
3985 )
3986 .and(lit_bool_null()))
3987 );
3988
3989 assert_eq!(
3991 simplify(simplify(Expr::Case(Case::new(
3992 None,
3993 vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
3994 Some(Box::new(lit(false))),
3995 )))),
3996 not_distinct_from(col("c3").gt(lit(0_i64)), lit(true))
3997 );
3998 }
3999
4000 #[test]
4001 fn simplify_expr_case_when_first_true() {
4002 assert_eq!(
4004 simplify(Expr::Case(Case::new(
4005 None,
4006 vec![(Box::new(lit(true)), Box::new(lit(1)),)],
4007 Some(Box::new(col("c1"))),
4008 ))),
4009 lit(1)
4010 );
4011
4012 assert_eq!(
4014 simplify(Expr::Case(Case::new(
4015 None,
4016 vec![(Box::new(lit(true)), Box::new(lit("a")),)],
4017 Some(Box::new(lit("b"))),
4018 ))),
4019 lit("a")
4020 );
4021
4022 assert_eq!(
4024 simplify(Expr::Case(Case::new(
4025 None,
4026 vec![
4027 (Box::new(lit(true)), Box::new(lit("a"))),
4028 (Box::new(lit("x").gt(lit(5))), Box::new(lit("b"))),
4029 ],
4030 Some(Box::new(lit("c"))),
4031 ))),
4032 lit("a")
4033 );
4034
4035 assert_eq!(
4037 simplify(Expr::Case(Case::new(
4038 None,
4039 vec![(Box::new(lit(true)), Box::new(lit("a")),)],
4040 None,
4041 ))),
4042 lit("a")
4043 );
4044
4045 let expr = Expr::Case(Case::new(
4047 None,
4048 vec![(Box::new(col("c2")), Box::new(lit(1)))],
4049 Some(Box::new(lit(2))),
4050 ));
4051 assert_eq!(simplify(expr.clone()), expr);
4052
4053 let expr = Expr::Case(Case::new(
4055 None,
4056 vec![(Box::new(lit(false)), Box::new(lit(1)))],
4057 Some(Box::new(lit(2))),
4058 ));
4059 assert_ne!(simplify(expr), lit(1));
4060
4061 let expr = Expr::Case(Case::new(
4063 None,
4064 vec![(Box::new(col("c1").gt(lit(5))), Box::new(lit(1)))],
4065 Some(Box::new(lit(2))),
4066 ));
4067 assert_eq!(simplify(expr.clone()), expr);
4068 }
4069
4070 #[test]
4071 fn simplify_expr_case_when_any_true() {
4072 assert_eq!(
4074 simplify(Expr::Case(Case::new(
4075 None,
4076 vec![
4077 (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4078 (Box::new(lit(true)), Box::new(lit("b"))),
4079 ],
4080 Some(Box::new(lit("c"))),
4081 ))),
4082 Expr::Case(Case::new(
4083 None,
4084 vec![(Box::new(col("c3").gt(lit(0))), Box::new(lit("a")))],
4085 Some(Box::new(lit("b"))),
4086 ))
4087 );
4088
4089 assert_eq!(
4092 simplify(Expr::Case(Case::new(
4093 None,
4094 vec![
4095 (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4096 (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4097 (Box::new(lit(true)), Box::new(lit("c"))),
4098 (Box::new(col("c3").eq(lit(0))), Box::new(lit("d"))),
4099 ],
4100 Some(Box::new(lit("e"))),
4101 ))),
4102 Expr::Case(Case::new(
4103 None,
4104 vec![
4105 (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4106 (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4107 ],
4108 Some(Box::new(lit("c"))),
4109 ))
4110 );
4111
4112 assert_eq!(
4115 simplify(Expr::Case(Case::new(
4116 None,
4117 vec![
4118 (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4119 (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4120 (Box::new(lit(true)), Box::new(lit(3))),
4121 ],
4122 None,
4123 ))),
4124 Expr::Case(Case::new(
4125 None,
4126 vec![
4127 (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4128 (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4129 ],
4130 Some(Box::new(lit(3))),
4131 ))
4132 );
4133
4134 let expr = Expr::Case(Case::new(
4136 None,
4137 vec![
4138 (Box::new(col("c3").gt(lit(0))), Box::new(col("c3"))),
4139 (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4140 ],
4141 Some(Box::new(lit(3))),
4142 ));
4143 assert_eq!(simplify(expr.clone()), expr);
4144 }
4145
4146 #[test]
4147 fn simplify_expr_case_when_any_false() {
4148 assert_eq!(
4150 simplify(Expr::Case(Case::new(
4151 None,
4152 vec![(Box::new(lit(false)), Box::new(lit("a")))],
4153 None,
4154 ))),
4155 Expr::Literal(ScalarValue::Utf8(None), None)
4156 );
4157
4158 assert_eq!(
4160 simplify(Expr::Case(Case::new(
4161 None,
4162 vec![(Box::new(lit(false)), Box::new(lit(2)))],
4163 Some(Box::new(lit(1))),
4164 ))),
4165 lit(1),
4166 );
4167
4168 assert_eq!(
4170 simplify(Expr::Case(Case::new(
4171 None,
4172 vec![
4173 (Box::new(col("c3").lt(lit(10))), Box::new(lit("b"))),
4174 (Box::new(lit(false)), Box::new(col("c3"))),
4175 ],
4176 Some(Box::new(col("c4"))),
4177 ))),
4178 Expr::Case(Case::new(
4179 None,
4180 vec![(Box::new(col("c3").lt(lit(10))), Box::new(lit("b")))],
4181 Some(Box::new(col("c4"))),
4182 ))
4183 );
4184
4185 let expr = Expr::Case(Case::new(
4187 None,
4188 vec![(Box::new(col("c3").eq(lit(4))), Box::new(lit(1)))],
4189 Some(Box::new(lit(2))),
4190 ));
4191 assert_eq!(simplify(expr.clone()), expr);
4192 }
4193
4194 fn distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4195 Expr::BinaryExpr(BinaryExpr {
4196 left: Box::new(left.into()),
4197 op: Operator::IsDistinctFrom,
4198 right: Box::new(right.into()),
4199 })
4200 }
4201
4202 fn not_distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4203 Expr::BinaryExpr(BinaryExpr {
4204 left: Box::new(left.into()),
4205 op: Operator::IsNotDistinctFrom,
4206 right: Box::new(right.into()),
4207 })
4208 }
4209
4210 #[test]
4211 fn simplify_expr_bool_or() {
4212 assert_eq!(simplify(col("c2").or(lit(true))), lit(true),);
4214
4215 assert_eq!(simplify(col("c2").or(lit(false))), col("c2"),);
4217
4218 assert_eq!(simplify(lit(true).or(lit_bool_null())), lit(true),);
4220
4221 assert_eq!(simplify(lit_bool_null().or(lit(true))), lit(true),);
4223
4224 assert_eq!(simplify(lit(false).or(lit_bool_null())), lit_bool_null(),);
4226
4227 assert_eq!(simplify(lit_bool_null().or(lit(false))), lit_bool_null(),);
4229
4230 let expr = col("c1").between(lit(0), lit(10));
4234 let expr = expr.or(lit_bool_null());
4235 let result = simplify(expr);
4236
4237 let expected_expr = or(
4238 and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4239 lit_bool_null(),
4240 );
4241 assert_eq!(expected_expr, result);
4242 }
4243
4244 #[test]
4245 fn simplify_inlist() {
4246 assert_eq!(simplify(in_list(col("c1"), vec![], false)), lit(false));
4247 assert_eq!(simplify(in_list(col("c1"), vec![], true)), lit(true));
4248
4249 assert_eq!(
4251 simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], false)),
4252 lit_bool_null()
4253 );
4254
4255 assert_eq!(
4257 simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], true)),
4258 lit_bool_null()
4259 );
4260
4261 assert_eq!(
4262 simplify(in_list(col("c1"), vec![lit(1)], false)),
4263 col("c1").eq(lit(1))
4264 );
4265 assert_eq!(
4266 simplify(in_list(col("c1"), vec![lit(1)], true)),
4267 col("c1").not_eq(lit(1))
4268 );
4269
4270 assert_eq!(
4273 simplify(in_list(col("c1") * lit(10), vec![lit(2)], false)),
4274 (col("c1") * lit(10)).eq(lit(2))
4275 );
4276
4277 assert_eq!(
4278 simplify(in_list(col("c1"), vec![lit(1), lit(2)], false)),
4279 col("c1").eq(lit(1)).or(col("c1").eq(lit(2)))
4280 );
4281 assert_eq!(
4282 simplify(in_list(col("c1"), vec![lit(1), lit(2)], true)),
4283 col("c1").not_eq(lit(1)).and(col("c1").not_eq(lit(2)))
4284 );
4285
4286 let subquery = Arc::new(test_table_scan_with_name("test").unwrap());
4287 assert_eq!(
4288 simplify(in_list(
4289 col("c1"),
4290 vec![scalar_subquery(Arc::clone(&subquery))],
4291 false
4292 )),
4293 in_subquery(col("c1"), Arc::clone(&subquery))
4294 );
4295 assert_eq!(
4296 simplify(in_list(
4297 col("c1"),
4298 vec![scalar_subquery(Arc::clone(&subquery))],
4299 true
4300 )),
4301 not_in_subquery(col("c1"), subquery)
4302 );
4303
4304 let subquery1 =
4305 scalar_subquery(Arc::new(test_table_scan_with_name("test1").unwrap()));
4306 let subquery2 =
4307 scalar_subquery(Arc::new(test_table_scan_with_name("test2").unwrap()));
4308
4309 assert_eq!(
4311 simplify(in_list(
4312 col("c1"),
4313 vec![subquery1.clone(), subquery2.clone()],
4314 true
4315 )),
4316 col("c1")
4317 .not_eq(subquery1.clone())
4318 .and(col("c1").not_eq(subquery2.clone()))
4319 );
4320
4321 assert_eq!(
4323 simplify(in_list(
4324 col("c1"),
4325 vec![subquery1.clone(), subquery2.clone()],
4326 false
4327 )),
4328 col("c1").eq(subquery1).or(col("c1").eq(subquery2))
4329 );
4330
4331 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4333 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false),
4334 );
4335 assert_eq!(simplify(expr), lit(false));
4336
4337 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4339 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], false),
4340 );
4341 assert_eq!(simplify(expr), col("c1").eq(lit(4)));
4342
4343 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4345 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4346 );
4347 assert_eq!(simplify(expr), lit(true));
4348
4349 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4351 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4352 );
4353 assert_eq!(simplify(expr), col("c1").not_eq(lit(4)));
4354
4355 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4357 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4358 );
4359 assert_eq!(
4360 simplify(expr),
4361 in_list(
4362 col("c1"),
4363 vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6), lit(7)],
4364 true
4365 )
4366 );
4367
4368 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).or(
4370 in_list(col("c1"), vec![lit(2), lit(3), lit(4), lit(5)], false),
4371 );
4372 assert_eq!(
4373 simplify(expr),
4374 in_list(
4375 col("c1"),
4376 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4377 false
4378 )
4379 );
4380
4381 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3)], false).and(in_list(
4383 col("c1"),
4384 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4385 true,
4386 ));
4387 assert_eq!(simplify(expr), lit(false));
4388
4389 let expr =
4391 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(in_list(
4392 col("c1"),
4393 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4394 false,
4395 ));
4396 assert_eq!(simplify(expr), col("c1").eq(lit(5)));
4397
4398 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4400 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4401 );
4402 assert_eq!(
4403 simplify(expr),
4404 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false)
4405 );
4406
4407 let expr = in_list(
4410 col("c1"),
4411 vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6)],
4412 false,
4413 )
4414 .and(in_list(
4415 col("c1"),
4416 vec![lit(1), lit(3), lit(5), lit(6)],
4417 false,
4418 ))
4419 .and(in_list(col("c1"), vec![lit(3), lit(6)], false));
4420 assert_eq!(
4421 simplify(expr),
4422 col("c1").eq(lit(3)).or(col("c1").eq(lit(6)))
4423 );
4424
4425 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4427 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false)
4428 .and(in_list(
4429 col("c1"),
4430 vec![lit(3), lit(4), lit(5), lit(6)],
4431 true,
4432 ))
4433 .and(in_list(col("c1"), vec![lit(8), lit(9), lit(10)], false)),
4434 );
4435 assert_eq!(simplify(expr), col("c1").eq(lit(8)));
4436
4437 let expr =
4440 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(col("c1")
4441 .not_eq(lit(5))
4442 .or(in_list(
4443 col("c1"),
4444 vec![lit(6), lit(7), lit(8), lit(9)],
4445 true,
4446 )));
4447 assert_eq!(simplify(expr.clone()), expr);
4451 }
4452
4453 #[test]
4454 fn simplify_null_in_empty_inlist() {
4455 let expr = in_list(lit_bool_null(), vec![], false);
4457 assert_eq!(simplify(expr), lit(false));
4458
4459 let expr = in_list(lit_bool_null(), vec![], true);
4461 assert_eq!(simplify(expr), lit(true));
4462
4463 let null_null = || Expr::Literal(ScalarValue::Null, None);
4465 let expr = in_list(null_null(), vec![], false);
4466 assert_eq!(simplify(expr), lit(false));
4467
4468 let expr = in_list(null_null(), vec![], true);
4470 assert_eq!(simplify(expr), lit(true));
4471 }
4472
4473 #[test]
4474 fn just_simplifier_simplify_null_in_empty_inlist() {
4475 let simplify = |expr: Expr| -> Expr {
4476 let schema = expr_test_schema();
4477 let info = SimplifyContext::default().with_schema(schema);
4478 let simplifier = &mut Simplifier::new(&info);
4479 expr.rewrite(simplifier)
4480 .expect("Failed to simplify expression")
4481 .data
4482 };
4483
4484 let expr = in_list(lit_bool_null(), vec![], false);
4486 assert_eq!(simplify(expr), lit(false));
4487
4488 let expr = in_list(lit_bool_null(), vec![], true);
4490 assert_eq!(simplify(expr), lit(true));
4491
4492 let null_null = || Expr::Literal(ScalarValue::Null, None);
4494 let expr = in_list(null_null(), vec![], false);
4495 assert_eq!(simplify(expr), lit(false));
4496
4497 let expr = in_list(null_null(), vec![], true);
4499 assert_eq!(simplify(expr), lit(true));
4500 }
4501
4502 #[test]
4503 fn simplify_large_or() {
4504 let expr = (0..5)
4505 .map(|i| col("c1").eq(lit(i)))
4506 .fold(lit(false), |acc, e| acc.or(e));
4507 assert_eq!(
4508 simplify(expr),
4509 in_list(col("c1"), (0..5).map(lit).collect(), false),
4510 );
4511 }
4512
4513 #[test]
4514 fn simplify_expr_bool_and() {
4515 assert_eq!(simplify(col("c2").and(lit(true))), col("c2"),);
4517 assert_eq!(simplify(col("c2").and(lit(false))), lit(false),);
4519
4520 assert_eq!(simplify(lit(true).and(lit_bool_null())), lit_bool_null(),);
4522
4523 assert_eq!(simplify(lit_bool_null().and(lit(true))), lit_bool_null(),);
4525
4526 assert_eq!(simplify(lit(false).and(lit_bool_null())), lit(false),);
4528
4529 assert_eq!(simplify(lit_bool_null().and(lit(false))), lit(false),);
4531
4532 let expr = col("c1").between(lit(0), lit(10));
4536 let expr = expr.and(lit_bool_null());
4537 let result = simplify(expr);
4538
4539 let expected_expr = and(
4540 and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4541 lit_bool_null(),
4542 );
4543 assert_eq!(expected_expr, result);
4544 }
4545
4546 #[test]
4547 fn simplify_expr_between() {
4548 let expr = col("c2").between(lit(3), lit(4));
4550 assert_eq!(
4551 simplify(expr),
4552 and(col("c2").gt_eq(lit(3)), col("c2").lt_eq(lit(4)))
4553 );
4554
4555 let expr = col("c2").not_between(lit(3), lit(4));
4557 assert_eq!(
4558 simplify(expr),
4559 or(col("c2").lt(lit(3)), col("c2").gt(lit(4)))
4560 );
4561 }
4562
4563 #[test]
4564 fn test_like_and_ilike() {
4565 let null = lit(ScalarValue::Utf8(None));
4566
4567 let expr = col("c1").like(null.clone());
4569 assert_eq!(simplify(expr), lit_bool_null());
4570
4571 let expr = col("c1").not_like(null.clone());
4572 assert_eq!(simplify(expr), lit_bool_null());
4573
4574 let expr = col("c1").ilike(null.clone());
4575 assert_eq!(simplify(expr), lit_bool_null());
4576
4577 let expr = col("c1").not_ilike(null.clone());
4578 assert_eq!(simplify(expr), lit_bool_null());
4579
4580 let expr = col("c1").like(lit("%"));
4582 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4583
4584 let expr = col("c1").not_like(lit("%"));
4585 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4586
4587 let expr = col("c1").ilike(lit("%"));
4588 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4589
4590 let expr = col("c1").not_ilike(lit("%"));
4591 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4592
4593 let expr = col("c1").like(lit("%%"));
4595 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4596
4597 let expr = col("c1").not_like(lit("%%"));
4598 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4599
4600 let expr = col("c1").ilike(lit("%%"));
4601 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4602
4603 let expr = col("c1").not_ilike(lit("%%"));
4604 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4605
4606 let expr = col("c1_non_null").like(lit("%"));
4608 assert_eq!(simplify(expr), lit(true));
4609
4610 let expr = col("c1_non_null").not_like(lit("%"));
4611 assert_eq!(simplify(expr), lit(false));
4612
4613 let expr = col("c1_non_null").ilike(lit("%"));
4614 assert_eq!(simplify(expr), lit(true));
4615
4616 let expr = col("c1_non_null").not_ilike(lit("%"));
4617 assert_eq!(simplify(expr), lit(false));
4618
4619 let expr = col("c1_non_null").like(lit("%%"));
4621 assert_eq!(simplify(expr), lit(true));
4622
4623 let expr = col("c1_non_null").not_like(lit("%%"));
4624 assert_eq!(simplify(expr), lit(false));
4625
4626 let expr = col("c1_non_null").ilike(lit("%%"));
4627 assert_eq!(simplify(expr), lit(true));
4628
4629 let expr = col("c1_non_null").not_ilike(lit("%%"));
4630 assert_eq!(simplify(expr), lit(false));
4631
4632 let expr = null.clone().like(lit("%"));
4634 assert_eq!(simplify(expr), lit_bool_null());
4635
4636 let expr = null.clone().not_like(lit("%"));
4637 assert_eq!(simplify(expr), lit_bool_null());
4638
4639 let expr = null.clone().ilike(lit("%"));
4640 assert_eq!(simplify(expr), lit_bool_null());
4641
4642 let expr = null.clone().not_ilike(lit("%"));
4643 assert_eq!(simplify(expr), lit_bool_null());
4644
4645 let expr = null.clone().like(lit("%%"));
4647 assert_eq!(simplify(expr), lit_bool_null());
4648
4649 let expr = null.clone().not_like(lit("%%"));
4650 assert_eq!(simplify(expr), lit_bool_null());
4651
4652 let expr = null.clone().ilike(lit("%%"));
4653 assert_eq!(simplify(expr), lit_bool_null());
4654
4655 let expr = null.clone().not_ilike(lit("%%"));
4656 assert_eq!(simplify(expr), lit_bool_null());
4657
4658 let expr = null.clone().like(lit("a%"));
4660 assert_eq!(simplify(expr), lit_bool_null());
4661
4662 let expr = null.clone().not_like(lit("a%"));
4663 assert_eq!(simplify(expr), lit_bool_null());
4664
4665 let expr = null.clone().ilike(lit("a%"));
4666 assert_eq!(simplify(expr), lit_bool_null());
4667
4668 let expr = null.clone().not_ilike(lit("a%"));
4669 assert_eq!(simplify(expr), lit_bool_null());
4670
4671 let expr = col("c1").like(lit("a"));
4673 assert_eq!(simplify(expr), col("c1").eq(lit("a")));
4674 let expr = col("c1").not_like(lit("a"));
4675 assert_eq!(simplify(expr), col("c1").not_eq(lit("a")));
4676 let expr = col("c1").like(lit("a_"));
4677 assert_eq!(simplify(expr), col("c1").like(lit("a_")));
4678 let expr = col("c1").not_like(lit("a_"));
4679 assert_eq!(simplify(expr), col("c1").not_like(lit("a_")));
4680
4681 let expr = col("c1").ilike(lit("a"));
4682 assert_eq!(simplify(expr), col("c1").ilike(lit("a")));
4683 let expr = col("c1").not_ilike(lit("a"));
4684 assert_eq!(simplify(expr), col("c1").not_ilike(lit("a")));
4685 }
4686
4687 #[test]
4688 fn test_simplify_with_guarantee() {
4689 let expr_x = col("c3").gt(lit(3_i64));
4691 let expr_y = (col("c4") + lit(2_u32)).lt(lit(10_u32));
4692 let expr_z = col("c1").in_list(vec![lit("a"), lit("b")], true);
4693 let expr = expr_x.clone().and(expr_y.or(expr_z));
4694
4695 let guarantees = vec![
4697 (col("c3"), NullableInterval::from(ScalarValue::Int64(None))),
4698 (col("c4"), NullableInterval::from(ScalarValue::UInt32(None))),
4699 (col("c1"), NullableInterval::from(ScalarValue::Utf8(None))),
4700 ];
4701
4702 let output = simplify_with_guarantee(expr.clone(), guarantees);
4703 assert_eq!(output, lit_bool_null());
4704
4705 let guarantees = vec![
4707 (
4708 col("c3"),
4709 NullableInterval::NotNull {
4710 values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4711 },
4712 ),
4713 (
4714 col("c4"),
4715 NullableInterval::from(ScalarValue::UInt32(Some(9))),
4716 ),
4717 (col("c1"), NullableInterval::from(ScalarValue::from("a"))),
4718 ];
4719 let output = simplify_with_guarantee(expr.clone(), guarantees);
4720 assert_eq!(output, lit(false));
4721
4722 let guarantees = vec![
4724 (
4725 col("c3"),
4726 NullableInterval::MaybeNull {
4727 values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4728 },
4729 ),
4730 (
4731 col("c4"),
4732 NullableInterval::MaybeNull {
4733 values: Interval::make(Some(9_u32), Some(9_u32)).unwrap(),
4734 },
4735 ),
4736 (
4737 col("c1"),
4738 NullableInterval::NotNull {
4739 values: Interval::try_new(
4740 ScalarValue::from("d"),
4741 ScalarValue::from("f"),
4742 )
4743 .unwrap(),
4744 },
4745 ),
4746 ];
4747 let output = simplify_with_guarantee(expr.clone(), guarantees);
4748 assert_eq!(&output, &expr_x);
4749
4750 let guarantees = vec![
4752 (
4753 col("c3"),
4754 NullableInterval::from(ScalarValue::Int64(Some(9))),
4755 ),
4756 (
4757 col("c4"),
4758 NullableInterval::from(ScalarValue::UInt32(Some(3))),
4759 ),
4760 ];
4761 let output = simplify_with_guarantee(expr.clone(), guarantees);
4762 assert_eq!(output, lit(true));
4763
4764 let guarantees = vec![(
4766 col("c4"),
4767 NullableInterval::from(ScalarValue::UInt32(Some(3))),
4768 )];
4769 let output = simplify_with_guarantee(expr, guarantees);
4770 assert_eq!(&output, &expr_x);
4771 }
4772
4773 #[test]
4774 fn test_expression_partial_simplify_1() {
4775 let expr = (lit(1) + lit(2)) + (lit(4) / lit(0));
4777 let expected = (lit(3)) + (lit(4) / lit(0));
4778
4779 assert_eq!(simplify(expr), expected);
4780 }
4781
4782 #[test]
4783 fn test_expression_partial_simplify_2() {
4784 let expr = (lit(1).gt(lit(2))).and(lit(4) / lit(0));
4786 let expected = lit(false);
4787
4788 assert_eq!(simplify(expr), expected);
4789 }
4790
4791 #[test]
4792 fn test_simplify_cycles() {
4793 let expr = lit(true);
4795 let expected = lit(true);
4796 let (expr, num_iter) = simplify_with_cycle_count(expr);
4797 assert_eq!(expr, expected);
4798 assert_eq!(num_iter, 1);
4799
4800 let expr = lit(true).not_eq(lit_bool_null()).or(lit(5).gt(lit(10)));
4802 let expected = lit_bool_null();
4803 let (expr, num_iter) = simplify_with_cycle_count(expr);
4804 assert_eq!(expr, expected);
4805 assert_eq!(num_iter, 2);
4806
4807 let expr = (((col("c4") - lit(10)) + lit(10)) * lit(100)) / lit(100);
4810 let expected = expr.clone();
4811 let (expr, num_iter) = simplify_with_cycle_count(expr);
4812 assert_eq!(expr, expected);
4813 assert_eq!(num_iter, 1);
4814
4815 let expr = col("c4")
4817 .lt(lit(1))
4818 .or(col("c3").lt(lit(2)))
4819 .and(col("c3_non_null").lt(lit(3)))
4820 .and(lit(false));
4821 let expected = lit(false);
4822 let (expr, num_iter) = simplify_with_cycle_count(expr);
4823 assert_eq!(expr, expected);
4824 assert_eq!(num_iter, 2);
4825 }
4826
4827 fn boolean_test_schema() -> DFSchemaRef {
4828 static BOOLEAN_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
4829 Schema::new(vec![
4830 Field::new("A", DataType::Boolean, false),
4831 Field::new("B", DataType::Boolean, false),
4832 Field::new("C", DataType::Boolean, false),
4833 Field::new("D", DataType::Boolean, false),
4834 ])
4835 .to_dfschema_ref()
4836 .unwrap()
4837 });
4838 Arc::clone(&BOOLEAN_TEST_SCHEMA)
4839 }
4840
4841 #[test]
4842 fn simplify_common_factor_conjunction_in_disjunction() {
4843 let schema = boolean_test_schema();
4844 let simplifier =
4845 ExprSimplifier::new(SimplifyContext::default().with_schema(schema));
4846
4847 let a = || col("A");
4848 let b = || col("B");
4849 let c = || col("C");
4850 let d = || col("D");
4851
4852 let expr = a().and(b()).or(a().and(c()));
4854 let expected = a().and(b().or(c()));
4855
4856 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4857
4858 let expr = a().and(b()).or(a().and(c())).or(a().and(d()));
4860 let expected = a().and(b().or(c()).or(d()));
4861 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4862
4863 let expr = a().or(b().and(c().and(a())));
4865 let expected = a();
4866 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4867 }
4868
4869 #[test]
4870 fn test_simplify_udaf() {
4871 let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_with_simplify());
4872 let aggregate_function_expr =
4873 Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4874 udaf.into(),
4875 vec![],
4876 false,
4877 None,
4878 vec![],
4879 None,
4880 ));
4881
4882 let expected = col("result_column");
4883 assert_eq!(simplify(aggregate_function_expr), expected);
4884
4885 let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_without_simplify());
4886 let aggregate_function_expr =
4887 Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4888 udaf.into(),
4889 vec![],
4890 false,
4891 None,
4892 vec![],
4893 None,
4894 ));
4895
4896 let expected = aggregate_function_expr.clone();
4897 assert_eq!(simplify(aggregate_function_expr), expected);
4898 }
4899
4900 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4903 struct SimplifyMockUdaf {
4904 simplify: bool,
4905 }
4906
4907 impl SimplifyMockUdaf {
4908 fn new_with_simplify() -> Self {
4910 Self { simplify: true }
4911 }
4912 fn new_without_simplify() -> Self {
4914 Self { simplify: false }
4915 }
4916 }
4917
4918 impl AggregateUDFImpl for SimplifyMockUdaf {
4919 fn as_any(&self) -> &dyn std::any::Any {
4920 self
4921 }
4922
4923 fn name(&self) -> &str {
4924 "mock_simplify"
4925 }
4926
4927 fn signature(&self) -> &Signature {
4928 unimplemented!()
4929 }
4930
4931 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4932 unimplemented!("not needed for tests")
4933 }
4934
4935 fn accumulator(
4936 &self,
4937 _acc_args: AccumulatorArgs,
4938 ) -> Result<Box<dyn Accumulator>> {
4939 unimplemented!("not needed for tests")
4940 }
4941
4942 fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
4943 unimplemented!("not needed for testing")
4944 }
4945
4946 fn create_groups_accumulator(
4947 &self,
4948 _args: AccumulatorArgs,
4949 ) -> Result<Box<dyn GroupsAccumulator>> {
4950 unimplemented!("not needed for testing")
4951 }
4952
4953 fn simplify(&self) -> Option<AggregateFunctionSimplification> {
4954 if self.simplify {
4955 Some(Box::new(|_, _| Ok(col("result_column"))))
4956 } else {
4957 None
4958 }
4959 }
4960 }
4961
4962 #[test]
4963 fn test_simplify_udwf() {
4964 let udwf = WindowFunctionDefinition::WindowUDF(
4965 WindowUDF::new_from_impl(SimplifyMockUdwf::new_with_simplify()).into(),
4966 );
4967 let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
4968
4969 let expected = col("result_column");
4970 assert_eq!(simplify(window_function_expr), expected);
4971
4972 let udwf = WindowFunctionDefinition::WindowUDF(
4973 WindowUDF::new_from_impl(SimplifyMockUdwf::new_without_simplify()).into(),
4974 );
4975 let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
4976
4977 let expected = window_function_expr.clone();
4978 assert_eq!(simplify(window_function_expr), expected);
4979 }
4980
4981 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4984 struct SimplifyMockUdwf {
4985 simplify: bool,
4986 }
4987
4988 impl SimplifyMockUdwf {
4989 fn new_with_simplify() -> Self {
4991 Self { simplify: true }
4992 }
4993 fn new_without_simplify() -> Self {
4995 Self { simplify: false }
4996 }
4997 }
4998
4999 impl WindowUDFImpl for SimplifyMockUdwf {
5000 fn as_any(&self) -> &dyn std::any::Any {
5001 self
5002 }
5003
5004 fn name(&self) -> &str {
5005 "mock_simplify"
5006 }
5007
5008 fn signature(&self) -> &Signature {
5009 unimplemented!()
5010 }
5011
5012 fn simplify(&self) -> Option<WindowFunctionSimplification> {
5013 if self.simplify {
5014 Some(Box::new(|_, _| Ok(col("result_column"))))
5015 } else {
5016 None
5017 }
5018 }
5019
5020 fn partition_evaluator(
5021 &self,
5022 _partition_evaluator_args: PartitionEvaluatorArgs,
5023 ) -> Result<Box<dyn PartitionEvaluator>> {
5024 unimplemented!("not needed for tests")
5025 }
5026
5027 fn field(&self, _field_args: WindowUDFFieldArgs) -> Result<FieldRef> {
5028 unimplemented!("not needed for tests")
5029 }
5030
5031 fn limit_effect(&self, _args: &[Arc<dyn PhysicalExpr>]) -> LimitEffect {
5032 LimitEffect::Unknown
5033 }
5034 }
5035 #[derive(Debug, PartialEq, Eq, Hash)]
5036 struct VolatileUdf {
5037 signature: Signature,
5038 }
5039
5040 impl VolatileUdf {
5041 pub fn new() -> Self {
5042 Self {
5043 signature: Signature::exact(vec![], Volatility::Volatile),
5044 }
5045 }
5046 }
5047 impl ScalarUDFImpl for VolatileUdf {
5048 fn as_any(&self) -> &dyn std::any::Any {
5049 self
5050 }
5051
5052 fn name(&self) -> &str {
5053 "VolatileUdf"
5054 }
5055
5056 fn signature(&self) -> &Signature {
5057 &self.signature
5058 }
5059
5060 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
5061 Ok(DataType::Int16)
5062 }
5063
5064 fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
5065 panic!("dummy - not implemented")
5066 }
5067 }
5068
5069 #[test]
5070 fn test_optimize_volatile_conditions() {
5071 let fun = Arc::new(ScalarUDF::new_from_impl(VolatileUdf::new()));
5072 let rand = Expr::ScalarFunction(ScalarFunction::new_udf(fun, vec![]));
5073 {
5074 let expr = rand
5075 .clone()
5076 .eq(lit(0))
5077 .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
5078
5079 assert_eq!(simplify(expr.clone()), expr);
5080 }
5081
5082 {
5083 let expr = col("column1")
5084 .eq(lit(2))
5085 .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
5086
5087 assert_eq!(simplify(expr), col("column1").eq(lit(2)));
5088 }
5089
5090 {
5091 let expr = (col("column1").eq(lit(2)).and(rand.clone().eq(lit(0)))).or(col(
5092 "column1",
5093 )
5094 .eq(lit(2))
5095 .and(rand.clone().eq(lit(0))));
5096
5097 assert_eq!(
5098 simplify(expr),
5099 col("column1")
5100 .eq(lit(2))
5101 .and((rand.clone().eq(lit(0))).or(rand.clone().eq(lit(0))))
5102 );
5103 }
5104 }
5105
5106 #[test]
5107 fn simplify_fixed_size_binary_eq_lit() {
5108 let bytes = [1u8, 2, 3].as_slice();
5109
5110 let expr = col("c5").eq(lit(bytes));
5112
5113 let coerced = coerce(expr.clone());
5115 let schema = expr_test_schema();
5116 assert_eq!(
5117 coerced,
5118 col("c5")
5119 .cast_to(&DataType::Binary, schema.as_ref())
5120 .unwrap()
5121 .eq(lit(bytes))
5122 );
5123
5124 assert_eq!(
5126 simplify(coerced),
5127 col("c5").eq(Expr::Literal(
5128 ScalarValue::FixedSizeBinary(3, Some(bytes.to_vec()),),
5129 None
5130 ))
5131 );
5132 }
5133
5134 #[test]
5135 fn simplify_cast_literal() {
5136 let expr = Expr::Cast(Cast::new(Box::new(lit(123i32)), DataType::Int64));
5140 let expected = lit(123i64);
5141 assert_eq!(simplify(expr), expected);
5142
5143 let expr = Expr::Cast(Cast::new(
5146 Box::new(lit(1761630189642i64)),
5147 DataType::Timestamp(
5148 arrow::datatypes::TimeUnit::Nanosecond,
5149 Some("+00:00".into()),
5150 ),
5151 ));
5152 let result = simplify(expr);
5154 match result {
5155 Expr::Literal(ScalarValue::TimestampNanosecond(Some(val), tz), _) => {
5156 assert_eq!(val, 1761630189642i64);
5157 assert_eq!(tz.as_deref(), Some("+00:00"));
5158 }
5159 other => panic!("Expected TimestampNanosecond literal, got: {other:?}"),
5160 }
5161
5162 let expr = Expr::Cast(Cast::new(
5166 Box::new(lit("1761630189642")),
5167 DataType::Timestamp(
5168 arrow::datatypes::TimeUnit::Nanosecond,
5169 Some("+00:00".into()),
5170 ),
5171 ));
5172
5173 let schema = test_schema();
5175 let simplifier =
5176 ExprSimplifier::new(SimplifyContext::default().with_schema(schema));
5177 let result = simplifier.simplify(expr);
5178 assert!(result.is_err(), "Expected error for invalid cast");
5179 let err_msg = result.unwrap_err().to_string();
5180 assert_contains!(err_msg, "Error parsing timestamp");
5181 }
5182
5183 fn if_not_null(expr: Expr, then: bool) -> Expr {
5184 Expr::Case(Case {
5185 expr: Some(expr.is_not_null().into()),
5186 when_then_expr: vec![(lit(true).into(), lit(then).into())],
5187 else_expr: None,
5188 })
5189 }
5190
5191 fn make_struct_cast_expr(source_fields: Fields, target_fields: Fields) -> Expr {
5197 let arrays: Vec<Arc<dyn Array>> = vec![
5199 Arc::new(Int32Array::from(vec![Some(1)])),
5200 Arc::new(Int32Array::from(vec![Some(2)])),
5201 ];
5202 let struct_array = StructArray::try_new(source_fields, arrays, None).unwrap();
5203
5204 Expr::Cast(Cast::new(
5205 Box::new(Expr::Literal(
5206 ScalarValue::Struct(Arc::new(struct_array)),
5207 None,
5208 )),
5209 DataType::Struct(target_fields),
5210 ))
5211 }
5212
5213 #[test]
5214 fn test_struct_cast_different_field_counts_not_foldable() {
5215 let source_fields = Fields::from(vec![
5219 Arc::new(Field::new("a", DataType::Int32, true)),
5220 Arc::new(Field::new("b", DataType::Int32, true)),
5221 ]);
5222
5223 let target_fields = Fields::from(vec![
5224 Arc::new(Field::new("x", DataType::Int32, true)),
5225 Arc::new(Field::new("y", DataType::Int32, true)),
5226 Arc::new(Field::new("z", DataType::Int32, true)),
5227 ]);
5228
5229 let expr = make_struct_cast_expr(source_fields, target_fields);
5230
5231 let simplifier =
5232 ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
5233
5234 let result = simplifier.simplify(expr.clone()).unwrap();
5236 assert_eq!(
5238 result, expr,
5239 "Struct cast with different field counts should remain unchanged (no const-folding)"
5240 );
5241 }
5242
5243 #[test]
5244 fn test_struct_cast_same_field_count_foldable() {
5245 let source_fields = Fields::from(vec![
5248 Arc::new(Field::new("a", DataType::Int32, true)),
5249 Arc::new(Field::new("b", DataType::Int32, true)),
5250 ]);
5251
5252 let target_fields = Fields::from(vec![
5253 Arc::new(Field::new("a", DataType::Int32, true)),
5254 Arc::new(Field::new("b", DataType::Int32, true)),
5255 ]);
5256
5257 let expr = make_struct_cast_expr(source_fields, target_fields);
5258
5259 let simplifier =
5260 ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
5261
5262 let result = simplifier.simplify(expr.clone()).unwrap();
5264 assert!(matches!(result, Expr::Literal(_, _)));
5266 assert_ne!(
5268 result, expr,
5269 "Struct cast with same field count should be simplified (not identical to input)"
5270 );
5271 }
5272
5273 #[test]
5274 fn test_struct_cast_different_names_same_count() {
5275 let source_fields = Fields::from(vec![
5279 Arc::new(Field::new("a", DataType::Int32, true)),
5280 Arc::new(Field::new("b", DataType::Int32, true)),
5281 ]);
5282
5283 let target_fields = Fields::from(vec![
5284 Arc::new(Field::new("x", DataType::Int32, true)),
5285 Arc::new(Field::new("y", DataType::Int32, true)),
5286 ]);
5287
5288 let expr = make_struct_cast_expr(source_fields, target_fields);
5289
5290 let simplifier =
5291 ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
5292
5293 let result = simplifier.simplify(expr.clone()).unwrap();
5295 assert_eq!(
5296 result, expr,
5297 "Struct cast with different names but same field count should not be simplified"
5298 );
5299 }
5300
5301 #[test]
5302 fn test_struct_cast_empty_array_not_foldable() {
5303 let source_fields = Fields::from(vec![
5308 Arc::new(Field::new("a", DataType::Int32, true)),
5309 Arc::new(Field::new("b", DataType::Int32, true)),
5310 ]);
5311
5312 let target_fields = Fields::from(vec![
5313 Arc::new(Field::new("a", DataType::Int32, true)),
5314 Arc::new(Field::new("b", DataType::Int32, true)),
5315 ]);
5316
5317 let arrays: Vec<Arc<dyn Array>> = vec![
5319 Arc::new(Int32Array::new(vec![].into(), None)),
5320 Arc::new(Int32Array::new(vec![].into(), None)),
5321 ];
5322 let struct_array = StructArray::try_new(source_fields, arrays, None).unwrap();
5323
5324 let expr = Expr::Cast(Cast::new(
5325 Box::new(Expr::Literal(
5326 ScalarValue::Struct(Arc::new(struct_array)),
5327 None,
5328 )),
5329 DataType::Struct(target_fields),
5330 ));
5331
5332 let simplifier =
5333 ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
5334
5335 let result = simplifier.simplify(expr.clone()).unwrap();
5337 assert_eq!(
5338 result, expr,
5339 "Struct cast with empty (0-row) array should remain unchanged"
5340 );
5341 }
5342}