1use arrow::{
21 array::{Array, AsArray, new_null_array},
22 datatypes::{DataType, Field, Schema},
23 record_batch::RecordBatch,
24};
25use std::borrow::Cow;
26use std::collections::HashSet;
27use std::ops::Not;
28use std::sync::Arc;
29use std::sync::LazyLock;
30
31use datafusion_common::config::ConfigOptions;
32use datafusion_common::nested_struct::has_one_of_more_common_fields;
33use datafusion_common::{
34 DFSchema, DataFusionError, Result, ScalarValue, exec_datafusion_err, internal_err,
35};
36use datafusion_common::{
37 HashMap,
38 cast::{as_large_list_array, as_list_array},
39 metadata::FieldMetadata,
40 tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
41};
42use datafusion_expr::expr::HigherOrderFunction;
43use datafusion_expr::{
44 BinaryExpr, Case, ColumnarValue, Expr, ExprSchemable, Like, Operator, Volatility,
45 and, binary::BinaryTypeCoercer, lit, or, preimage::PreimageResult,
46};
47use datafusion_expr::{Cast, TryCast, simplify::ExprSimplifyResult};
48use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval};
49use datafusion_expr::{
50 expr::{InList, InSubquery},
51 utils::{iter_conjunction, iter_conjunction_owned},
52};
53use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
54
55use super::inlist_simplifier::ShortenInListSimplifier;
56use super::utils::*;
57use crate::simplify_expressions::SimplifyContext;
58use crate::simplify_expressions::regex::simplify_regex_expr;
59use crate::simplify_expressions::unwrap_cast::{
60 is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary,
61 is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist,
62 unwrap_cast_in_comparison_for_binary,
63};
64use crate::{
65 analyzer::type_coercion::TypeCoercionRewriter,
66 simplify_expressions::udf_preimage::rewrite_with_preimage,
67};
68use datafusion_expr::expr_rewriter::rewrite_with_guarantees_map;
69use datafusion_expr_common::casts::try_cast_literal_to_type;
70use indexmap::IndexSet;
71use regex::Regex;
72
73pub struct ExprSimplifier {
105 info: SimplifyContext,
106 guarantees: Vec<(Expr, NullableInterval)>,
109 canonicalize: bool,
112 max_simplifier_cycles: u32,
114}
115
116pub const THRESHOLD_INLINE_INLIST: usize = 3;
117pub const DEFAULT_MAX_SIMPLIFIER_CYCLES: u32 = 3;
118
119impl ExprSimplifier {
120 pub fn new(info: SimplifyContext) -> Self {
125 Self {
126 info,
127 guarantees: vec![],
128 canonicalize: true,
129 max_simplifier_cycles: DEFAULT_MAX_SIMPLIFIER_CYCLES,
130 }
131 }
132
133 pub fn simplify(&self, expr: Expr) -> Result<Expr> {
176 Ok(self.simplify_with_cycle_count_transformed(expr)?.0.data)
177 }
178
179 #[deprecated(
186 since = "48.0.0",
187 note = "Use `simplify_with_cycle_count_transformed` instead"
188 )]
189 #[expect(unused_mut)]
190 pub fn simplify_with_cycle_count(&self, mut expr: Expr) -> Result<(Expr, u32)> {
191 let (transformed, cycle_count) =
192 self.simplify_with_cycle_count_transformed(expr)?;
193 Ok((transformed.data, cycle_count))
194 }
195
196 pub fn simplify_with_cycle_count_transformed(
209 &self,
210 mut expr: Expr,
211 ) -> Result<(Transformed<Expr>, u32)> {
212 let mut simplifier = Simplifier::new(&self.info);
213 let config_options = Some(Arc::clone(self.info.config_options()));
214 let mut const_evaluator = ConstEvaluator::try_new(config_options)?;
215 let mut shorten_in_list_simplifier = ShortenInListSimplifier::new();
216 let guarantees_map: HashMap<&Expr, &NullableInterval> =
217 self.guarantees.iter().map(|(k, v)| (k, v)).collect();
218
219 if self.canonicalize {
220 expr = expr.rewrite(&mut Canonicalizer::new()).data()?
221 }
222
223 let mut num_cycles = 0;
227 let mut has_transformed = false;
228 loop {
229 let Transformed {
230 data, transformed, ..
231 } = expr
232 .rewrite(&mut const_evaluator)?
233 .transform_data(|expr| expr.rewrite(&mut simplifier))?
234 .transform_data(|expr| {
235 rewrite_with_guarantees_map(expr, &guarantees_map)
236 })?;
237 expr = data;
238 num_cycles += 1;
239 has_transformed = has_transformed || transformed;
241 if !transformed || num_cycles >= self.max_simplifier_cycles {
242 break;
243 }
244 }
245 expr = expr.rewrite(&mut shorten_in_list_simplifier).data()?;
247 Ok((
248 Transformed::new_transformed(expr, has_transformed),
249 num_cycles,
250 ))
251 }
252
253 pub fn coerce(&self, expr: Expr, schema: &DFSchema) -> Result<Expr> {
259 let mut expr_rewrite = TypeCoercionRewriter { schema };
260 expr.rewrite(&mut expr_rewrite).data()
261 }
262
263 pub fn with_guarantees(mut self, guarantees: Vec<(Expr, NullableInterval)>) -> Self {
319 self.guarantees = guarantees;
320 self
321 }
322
323 pub fn with_canonicalize(mut self, canonicalize: bool) -> Self {
371 self.canonicalize = canonicalize;
372 self
373 }
374
375 pub fn with_max_cycles(mut self, max_simplifier_cycles: u32) -> Self {
424 self.max_simplifier_cycles = max_simplifier_cycles;
425 self
426 }
427}
428
429struct Canonicalizer {}
436
437impl Canonicalizer {
438 fn new() -> Self {
439 Self {}
440 }
441}
442
443impl TreeNodeRewriter for Canonicalizer {
444 type Node = Expr;
445
446 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
447 let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr else {
448 return Ok(Transformed::no(expr));
449 };
450 match (left.as_ref(), right.as_ref(), op.swap()) {
451 (Expr::Column(left_col), Expr::Column(right_col), Some(swapped_op))
453 if right_col > left_col =>
454 {
455 Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
456 left: right,
457 op: swapped_op,
458 right: left,
459 })))
460 }
461 (Expr::Literal(_a, _), Expr::Column(_b), Some(swapped_op)) => {
463 Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
464 left: right,
465 op: swapped_op,
466 right: left,
467 })))
468 }
469 _ => Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
470 left,
471 op,
472 right,
473 }))),
474 }
475 }
476}
477
478struct ConstEvaluator {
483 can_evaluate: Vec<bool>,
496 execution_props: ExecutionProps,
503}
504
505enum ConstSimplifyResult {
507 Simplified(ScalarValue, Option<FieldMetadata>),
509 NotSimplified(ScalarValue, Option<FieldMetadata>),
511 SimplifyRuntimeError(DataFusionError, Expr),
513}
514
515impl TreeNodeRewriter for ConstEvaluator {
516 type Node = Expr;
517
518 fn f_down(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
519 self.can_evaluate.push(true);
521
522 if !Self::can_evaluate(&expr) {
527 let parent_iter = self.can_evaluate.iter_mut().rev();
529 for p in parent_iter {
530 if !*p {
531 break;
534 }
535 *p = false;
536 }
537 }
538
539 Ok(Transformed::no(expr))
543 }
544
545 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
546 match self.can_evaluate.pop() {
547 Some(true) => match self.evaluate_to_scalar(expr) {
552 ConstSimplifyResult::Simplified(s, m) => {
553 Ok(Transformed::yes(Expr::Literal(s, m)))
554 }
555 ConstSimplifyResult::NotSimplified(s, m) => {
556 Ok(Transformed::no(Expr::Literal(s, m)))
557 }
558 ConstSimplifyResult::SimplifyRuntimeError(err, expr) => {
559 if let Expr::Cast(Cast { ref expr, .. })
562 | Expr::TryCast(TryCast { ref expr, .. }) = expr
563 && matches!(expr.as_ref(), Expr::Literal(_, _))
564 {
565 return Err(err);
566 }
567 Ok(Transformed::yes(expr))
570 }
571 },
572 Some(false) => Ok(Transformed::no(expr)),
573 _ => internal_err!("Failed to pop can_evaluate"),
574 }
575 }
576}
577
578static DUMMY_SCHEMA: LazyLock<Arc<Schema>> =
579 LazyLock::new(|| Arc::new(Schema::new(vec![Field::new(".", DataType::Null, true)])));
580
581static DUMMY_DF_SCHEMA: LazyLock<DFSchema> =
582 LazyLock::new(|| DFSchema::try_from(Arc::clone(&*DUMMY_SCHEMA)).unwrap());
583
584static DUMMY_BATCH: LazyLock<RecordBatch> = LazyLock::new(|| {
585 let col = new_null_array(&DataType::Null, 1);
587 RecordBatch::try_new(DUMMY_SCHEMA.clone(), vec![col]).unwrap()
588});
589
590impl ConstEvaluator {
591 pub fn try_new(config_options: Option<Arc<ConfigOptions>>) -> Result<Self> {
601 let mut execution_props = ExecutionProps::new();
605 execution_props.config_options = config_options;
606
607 Ok(Self {
608 can_evaluate: vec![],
609 execution_props,
610 })
611 }
612
613 fn volatility_ok(volatility: Volatility) -> bool {
615 match volatility {
616 Volatility::Immutable => true,
617 Volatility::Stable => true,
619 Volatility::Volatile => false,
620 }
621 }
622
623 fn can_evaluate(expr: &Expr) -> bool {
626 match expr {
632 #[expect(deprecated)]
634 Expr::AggregateFunction { .. }
635 | Expr::ScalarVariable(_, _)
636 | Expr::Column(_)
637 | Expr::OuterReferenceColumn(_, _)
638 | Expr::Exists { .. }
639 | Expr::InSubquery(_)
640 | Expr::SetComparison(_)
641 | Expr::ScalarSubquery(_)
642 | Expr::WindowFunction { .. }
643 | Expr::GroupingSet(_)
644 | Expr::Wildcard { .. }
645 | Expr::Placeholder(_) => false,
646 Expr::ScalarFunction(ScalarFunction { func, .. }) => {
647 Self::volatility_ok(func.signature().volatility)
648 }
649 Expr::HigherOrderFunction(HigherOrderFunction { func, .. }) => {
650 Self::volatility_ok(func.signature().volatility)
651 }
652 Expr::Cast(Cast { expr, field }) | Expr::TryCast(TryCast { expr, field }) => {
653 if let (
654 Ok(DataType::Struct(source_fields)),
655 DataType::Struct(target_fields),
656 ) = (expr.get_type(&DFSchema::empty()), field.data_type())
657 {
658 if source_fields.len() != target_fields.len() {
660 return false;
661 }
662
663 if !has_one_of_more_common_fields(&source_fields, target_fields) {
665 return false;
666 }
667
668 if let Expr::Literal(ScalarValue::Struct(struct_array), _) =
672 expr.as_ref()
673 && struct_array.len() == 0
674 {
675 return false;
676 }
677 }
678 true
679 }
680 Expr::Literal(_, _)
681 | Expr::Alias(..)
682 | Expr::Unnest(_)
683 | Expr::BinaryExpr { .. }
684 | Expr::Not(_)
685 | Expr::IsNotNull(_)
686 | Expr::IsNull(_)
687 | Expr::IsTrue(_)
688 | Expr::IsFalse(_)
689 | Expr::IsUnknown(_)
690 | Expr::IsNotTrue(_)
691 | Expr::IsNotFalse(_)
692 | Expr::IsNotUnknown(_)
693 | Expr::Negative(_)
694 | Expr::Between { .. }
695 | Expr::Like { .. }
696 | Expr::SimilarTo { .. }
697 | Expr::Case(_)
698 | Expr::InList { .. }
699 | Expr::Lambda(_)
700 | Expr::LambdaVariable(_) => true,
701 }
702 }
703
704 pub(crate) fn evaluate_to_scalar(&mut self, expr: Expr) -> ConstSimplifyResult {
706 if let Expr::Literal(s, m) = expr {
707 return ConstSimplifyResult::NotSimplified(s, m);
708 }
709
710 let phys_expr =
711 match create_physical_expr(&expr, &DUMMY_DF_SCHEMA, &self.execution_props) {
712 Ok(e) => e,
713 Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
714 };
715 let metadata = phys_expr
716 .return_field(DUMMY_BATCH.schema_ref())
717 .ok()
718 .and_then(|f| {
719 let m = f.metadata();
720 match m.is_empty() {
721 true => None,
722 false => Some(FieldMetadata::from(m)),
723 }
724 });
725 let col_val = match phys_expr.evaluate(&DUMMY_BATCH) {
726 Ok(v) => v,
727 Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
728 };
729 match col_val {
730 ColumnarValue::Array(a) => {
731 if a.len() != 1 {
732 ConstSimplifyResult::SimplifyRuntimeError(
733 exec_datafusion_err!(
734 "Could not evaluate the expression, found a result of length {}",
735 a.len()
736 ),
737 expr,
738 )
739 } else if as_list_array(&a).is_ok() {
740 ConstSimplifyResult::Simplified(
741 ScalarValue::List(a.as_list::<i32>().to_owned().into()),
742 metadata,
743 )
744 } else if as_large_list_array(&a).is_ok() {
745 ConstSimplifyResult::Simplified(
746 ScalarValue::LargeList(a.as_list::<i64>().to_owned().into()),
747 metadata,
748 )
749 } else {
750 match ScalarValue::try_from_array(&a, 0) {
752 Ok(s) => ConstSimplifyResult::Simplified(s, metadata),
753 Err(err) => ConstSimplifyResult::SimplifyRuntimeError(err, expr),
754 }
755 }
756 }
757 ColumnarValue::Scalar(s) => ConstSimplifyResult::Simplified(s, metadata),
758 }
759 }
760}
761
762struct Simplifier<'a> {
772 info: &'a SimplifyContext,
773}
774
775impl<'a> Simplifier<'a> {
776 pub fn new(info: &'a SimplifyContext) -> Self {
777 Self { info }
778 }
779}
780
781impl TreeNodeRewriter for Simplifier<'_> {
782 type Node = Expr;
783
784 fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
786 use datafusion_expr::Operator::{
787 And, BitwiseAnd, BitwiseOr, BitwiseShiftLeft, BitwiseShiftRight, BitwiseXor,
788 Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch,
789 RegexNotIMatch, RegexNotMatch,
790 };
791
792 let info = self.info;
793 Ok(match expr {
794 ref expr @ Expr::BinaryExpr(BinaryExpr {
798 ref left,
799 ref op,
800 ref right,
801 }) if op.returns_null_on_null()
802 && (is_null(left.as_ref()) || is_null(right.as_ref())) =>
803 {
804 Transformed::yes(Expr::Literal(
805 ScalarValue::try_new_null(&info.get_data_type(expr)?)?,
806 None,
807 ))
808 }
809
810 Expr::BinaryExpr(BinaryExpr {
812 left,
813 op: And | Or,
814 right,
815 }) if is_null(&left) && is_null(&right) => Transformed::yes(lit_bool_null()),
816
817 Expr::BinaryExpr(BinaryExpr {
825 left,
826 op: Eq,
827 right,
828 }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
829 Transformed::yes(match as_bool_lit(&left)? {
830 Some(true) => *right,
831 Some(false) => Expr::Not(right),
832 None => lit_bool_null(),
833 })
834 }
835 Expr::BinaryExpr(BinaryExpr {
839 left,
840 op: Eq,
841 right,
842 }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
843 Transformed::yes(match as_bool_lit(&right)? {
844 Some(true) => *left,
845 Some(false) => Expr::Not(left),
846 None => lit_bool_null(),
847 })
848 }
849 Expr::BinaryExpr(BinaryExpr {
854 left,
855 op: Eq,
856 right,
857 }) if (left == right) & !left.is_volatile() => {
858 Transformed::yes(match !info.nullable(&left)? {
859 true => lit(true),
860 false => Expr::BinaryExpr(BinaryExpr {
861 left: Box::new(Expr::IsNotNull(left)),
862 op: Or,
863 right: Box::new(lit_bool_null()),
864 }),
865 })
866 }
867
868 Expr::BinaryExpr(BinaryExpr {
875 left,
876 op: NotEq,
877 right,
878 }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
879 Transformed::yes(match as_bool_lit(&left)? {
880 Some(true) => Expr::Not(right),
881 Some(false) => *right,
882 None => lit_bool_null(),
883 })
884 }
885 Expr::BinaryExpr(BinaryExpr {
889 left,
890 op: NotEq,
891 right,
892 }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
893 Transformed::yes(match as_bool_lit(&right)? {
894 Some(true) => Expr::Not(left),
895 Some(false) => *left,
896 None => lit_bool_null(),
897 })
898 }
899
900 Expr::BinaryExpr(BinaryExpr {
906 left,
907 op: Or,
908 right: _,
909 }) if is_true(&left) => Transformed::yes(*left),
910 Expr::BinaryExpr(BinaryExpr {
912 left,
913 op: Or,
914 right,
915 }) if is_false(&left) => Transformed::yes(*right),
916 Expr::BinaryExpr(BinaryExpr {
918 left: _,
919 op: Or,
920 right,
921 }) if is_true(&right) => Transformed::yes(*right),
922 Expr::BinaryExpr(BinaryExpr {
924 left,
925 op: Or,
926 right,
927 }) if is_false(&right) => Transformed::yes(*left),
928 Expr::BinaryExpr(BinaryExpr {
930 left,
931 op: Or,
932 right,
933 }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
934 Transformed::yes(lit(true))
935 }
936 Expr::BinaryExpr(BinaryExpr {
938 left,
939 op: Or,
940 right,
941 }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
942 Transformed::yes(lit(true))
943 }
944 Expr::BinaryExpr(BinaryExpr {
946 left,
947 op: Or,
948 right,
949 }) if expr_contains(&left, &right, Or) => Transformed::yes(*left),
950 Expr::BinaryExpr(BinaryExpr {
952 left,
953 op: Or,
954 right,
955 }) if expr_contains(&right, &left, Or) => Transformed::yes(*right),
956 Expr::BinaryExpr(BinaryExpr {
958 left,
959 op: Or,
960 right,
961 }) if is_op_with(And, &right, &left) => Transformed::yes(*left),
962 Expr::BinaryExpr(BinaryExpr {
964 left,
965 op: Or,
966 right,
967 }) if is_op_with(And, &left, &right) => Transformed::yes(*right),
968 Expr::BinaryExpr(BinaryExpr {
971 left,
972 op: Or,
973 right,
974 }) if has_common_conjunction(&left, &right) => {
975 let lhs: IndexSet<Expr> = iter_conjunction_owned(*left).collect();
976 let (common, rhs): (Vec<_>, Vec<_>) = iter_conjunction_owned(*right)
977 .partition(|e| lhs.contains(e) && !e.is_volatile());
978
979 let new_rhs = rhs.into_iter().reduce(and);
980 let new_lhs = lhs.into_iter().filter(|e| !common.contains(e)).reduce(and);
981 let common_conjunction = common.into_iter().reduce(and).unwrap();
982
983 let new_expr = match (new_lhs, new_rhs) {
984 (Some(lhs), Some(rhs)) => and(common_conjunction, or(lhs, rhs)),
985 (_, _) => common_conjunction,
986 };
987 Transformed::yes(new_expr)
988 }
989
990 Expr::BinaryExpr(BinaryExpr {
996 left,
997 op: And,
998 right,
999 }) if is_true(&left) => Transformed::yes(*right),
1000 Expr::BinaryExpr(BinaryExpr {
1002 left,
1003 op: And,
1004 right: _,
1005 }) if is_false(&left) => Transformed::yes(*left),
1006 Expr::BinaryExpr(BinaryExpr {
1008 left,
1009 op: And,
1010 right,
1011 }) if is_true(&right) => Transformed::yes(*left),
1012 Expr::BinaryExpr(BinaryExpr {
1014 left: _,
1015 op: And,
1016 right,
1017 }) if is_false(&right) => Transformed::yes(*right),
1018 Expr::BinaryExpr(BinaryExpr {
1020 left,
1021 op: And,
1022 right,
1023 }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
1024 Transformed::yes(lit(false))
1025 }
1026 Expr::BinaryExpr(BinaryExpr {
1028 left,
1029 op: And,
1030 right,
1031 }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
1032 Transformed::yes(lit(false))
1033 }
1034 Expr::BinaryExpr(BinaryExpr {
1036 left,
1037 op: And,
1038 right,
1039 }) if expr_contains(&left, &right, And) => Transformed::yes(*left),
1040 Expr::BinaryExpr(BinaryExpr {
1042 left,
1043 op: And,
1044 right,
1045 }) if expr_contains(&right, &left, And) => Transformed::yes(*right),
1046 Expr::BinaryExpr(BinaryExpr {
1048 left,
1049 op: And,
1050 right,
1051 }) if is_op_with(Or, &right, &left) => Transformed::yes(*left),
1052 Expr::BinaryExpr(BinaryExpr {
1054 left,
1055 op: And,
1056 right,
1057 }) if is_op_with(Or, &left, &right) => Transformed::yes(*right),
1058 Expr::BinaryExpr(BinaryExpr {
1060 left,
1061 op: And,
1062 right,
1063 }) if can_reduce_to_equal_statement(&left, &right) => {
1064 if let Expr::BinaryExpr(BinaryExpr {
1065 left: left_left,
1066 right: left_right,
1067 ..
1068 }) = *left
1069 {
1070 Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1071 left: left_left,
1072 op: Eq,
1073 right: left_right,
1074 }))
1075 } else {
1076 return internal_err!(
1077 "can_reduce_to_equal_statement should only be called with a BinaryExpr"
1078 );
1079 }
1080 }
1081 Expr::BinaryExpr(BinaryExpr {
1083 left,
1084 op: And,
1085 right,
1086 }) if is_eq_and_ne_with_different_literal(&left, &right) => {
1087 Transformed::yes(*left)
1088 }
1089 Expr::BinaryExpr(BinaryExpr {
1091 left,
1092 op: And,
1093 right,
1094 }) if is_eq_and_ne_with_different_literal(&right, &left) => {
1095 Transformed::yes(*right)
1096 }
1097
1098 Expr::BinaryExpr(BinaryExpr {
1104 left,
1105 op: Multiply,
1106 right,
1107 }) if is_one(&right) => {
1108 simplify_right_is_one_case(info, left, &Multiply, &right)?
1109 }
1110 Expr::BinaryExpr(BinaryExpr {
1112 left,
1113 op: Multiply,
1114 right,
1115 }) if is_one(&left) => {
1116 simplify_right_is_one_case(info, right, &Multiply, &left)?
1118 }
1119
1120 Expr::BinaryExpr(BinaryExpr {
1122 left,
1123 op: Multiply,
1124 right,
1125 }) if !info.nullable(&left)?
1126 && !info.get_data_type(&left)?.is_floating()
1127 && is_zero(&right) =>
1128 {
1129 Transformed::yes(*right)
1130 }
1131 Expr::BinaryExpr(BinaryExpr {
1133 left,
1134 op: Multiply,
1135 right,
1136 }) if !info.nullable(&right)?
1137 && !info.get_data_type(&right)?.is_floating()
1138 && is_zero(&left) =>
1139 {
1140 Transformed::yes(*left)
1141 }
1142
1143 Expr::BinaryExpr(BinaryExpr {
1149 left,
1150 op: Divide,
1151 right,
1152 }) if is_one(&right) => {
1153 simplify_right_is_one_case(info, left, &Divide, &right)?
1154 }
1155
1156 Expr::BinaryExpr(BinaryExpr {
1162 left,
1163 op: Modulo,
1164 right,
1165 }) if !info.nullable(&left)?
1166 && !info.get_data_type(&left)?.is_floating()
1167 && is_one(&right) =>
1168 {
1169 Transformed::yes(Expr::Literal(
1170 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1171 None,
1172 ))
1173 }
1174
1175 Expr::BinaryExpr(BinaryExpr {
1181 left,
1182 op: BitwiseAnd,
1183 right,
1184 }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*right),
1185
1186 Expr::BinaryExpr(BinaryExpr {
1188 left,
1189 op: BitwiseAnd,
1190 right,
1191 }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*left),
1192
1193 Expr::BinaryExpr(BinaryExpr {
1195 left,
1196 op: BitwiseAnd,
1197 right,
1198 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1199 Transformed::yes(Expr::Literal(
1200 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1201 None,
1202 ))
1203 }
1204
1205 Expr::BinaryExpr(BinaryExpr {
1207 left,
1208 op: BitwiseAnd,
1209 right,
1210 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1211 Transformed::yes(Expr::Literal(
1212 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1213 None,
1214 ))
1215 }
1216
1217 Expr::BinaryExpr(BinaryExpr {
1219 left,
1220 op: BitwiseAnd,
1221 right,
1222 }) if expr_contains(&left, &right, BitwiseAnd) => Transformed::yes(*left),
1223
1224 Expr::BinaryExpr(BinaryExpr {
1226 left,
1227 op: BitwiseAnd,
1228 right,
1229 }) if expr_contains(&right, &left, BitwiseAnd) => Transformed::yes(*right),
1230
1231 Expr::BinaryExpr(BinaryExpr {
1233 left,
1234 op: BitwiseAnd,
1235 right,
1236 }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => {
1237 Transformed::yes(*left)
1238 }
1239
1240 Expr::BinaryExpr(BinaryExpr {
1242 left,
1243 op: BitwiseAnd,
1244 right,
1245 }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => {
1246 Transformed::yes(*right)
1247 }
1248
1249 Expr::BinaryExpr(BinaryExpr {
1255 left,
1256 op: BitwiseOr,
1257 right,
1258 }) if is_zero(&right) => Transformed::yes(*left),
1259
1260 Expr::BinaryExpr(BinaryExpr {
1262 left,
1263 op: BitwiseOr,
1264 right,
1265 }) if is_zero(&left) => Transformed::yes(*right),
1266
1267 Expr::BinaryExpr(BinaryExpr {
1269 left,
1270 op: BitwiseOr,
1271 right,
1272 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1273 Transformed::yes(Expr::Literal(
1274 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1275 None,
1276 ))
1277 }
1278
1279 Expr::BinaryExpr(BinaryExpr {
1281 left,
1282 op: BitwiseOr,
1283 right,
1284 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1285 Transformed::yes(Expr::Literal(
1286 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1287 None,
1288 ))
1289 }
1290
1291 Expr::BinaryExpr(BinaryExpr {
1293 left,
1294 op: BitwiseOr,
1295 right,
1296 }) if expr_contains(&left, &right, BitwiseOr) => Transformed::yes(*left),
1297
1298 Expr::BinaryExpr(BinaryExpr {
1300 left,
1301 op: BitwiseOr,
1302 right,
1303 }) if expr_contains(&right, &left, BitwiseOr) => Transformed::yes(*right),
1304
1305 Expr::BinaryExpr(BinaryExpr {
1307 left,
1308 op: BitwiseOr,
1309 right,
1310 }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => {
1311 Transformed::yes(*left)
1312 }
1313
1314 Expr::BinaryExpr(BinaryExpr {
1316 left,
1317 op: BitwiseOr,
1318 right,
1319 }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => {
1320 Transformed::yes(*right)
1321 }
1322
1323 Expr::BinaryExpr(BinaryExpr {
1329 left,
1330 op: BitwiseXor,
1331 right,
1332 }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*left),
1333
1334 Expr::BinaryExpr(BinaryExpr {
1336 left,
1337 op: BitwiseXor,
1338 right,
1339 }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*right),
1340
1341 Expr::BinaryExpr(BinaryExpr {
1343 left,
1344 op: BitwiseXor,
1345 right,
1346 }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1347 Transformed::yes(Expr::Literal(
1348 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1349 None,
1350 ))
1351 }
1352
1353 Expr::BinaryExpr(BinaryExpr {
1355 left,
1356 op: BitwiseXor,
1357 right,
1358 }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1359 Transformed::yes(Expr::Literal(
1360 ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1361 None,
1362 ))
1363 }
1364
1365 Expr::BinaryExpr(BinaryExpr {
1367 left,
1368 op: BitwiseXor,
1369 right,
1370 }) if expr_contains(&left, &right, BitwiseXor) => {
1371 let expr = delete_xor_in_complex_expr(&left, &right, false);
1372 Transformed::yes(if expr == *right {
1373 Expr::Literal(
1374 ScalarValue::new_zero(&info.get_data_type(&right)?)?,
1375 None,
1376 )
1377 } else {
1378 expr
1379 })
1380 }
1381
1382 Expr::BinaryExpr(BinaryExpr {
1384 left,
1385 op: BitwiseXor,
1386 right,
1387 }) if expr_contains(&right, &left, BitwiseXor) => {
1388 let expr = delete_xor_in_complex_expr(&right, &left, true);
1389 Transformed::yes(if expr == *left {
1390 Expr::Literal(
1391 ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1392 None,
1393 )
1394 } else {
1395 expr
1396 })
1397 }
1398
1399 Expr::BinaryExpr(BinaryExpr {
1405 left,
1406 op: BitwiseShiftRight,
1407 right,
1408 }) if is_zero(&right) => Transformed::yes(*left),
1409
1410 Expr::BinaryExpr(BinaryExpr {
1416 left,
1417 op: BitwiseShiftLeft,
1418 right,
1419 }) if is_zero(&right) => Transformed::yes(*left),
1420
1421 Expr::Not(inner) => Transformed::yes(negate_clause(*inner)),
1425
1426 Expr::Negative(inner) => Transformed::yes(distribute_negation(*inner)),
1430
1431 Expr::BinaryExpr(BinaryExpr {
1439 left,
1440 op: op @ (Eq | NotEq),
1441 right,
1442 }) if is_case_with_literal_outputs(&left) && is_lit(&right) => {
1443 let case = into_case(*left)?;
1444 Transformed::yes(Expr::Case(Case {
1445 expr: None,
1446 when_then_expr: case
1447 .when_then_expr
1448 .into_iter()
1449 .map(|(when, then)| {
1450 (
1451 when,
1452 Box::new(Expr::BinaryExpr(BinaryExpr {
1453 left: then,
1454 op,
1455 right: right.clone(),
1456 })),
1457 )
1458 })
1459 .collect(),
1460 else_expr: case.else_expr.map(|els| {
1461 Box::new(Expr::BinaryExpr(BinaryExpr {
1462 left: els,
1463 op,
1464 right,
1465 }))
1466 }),
1467 }))
1468 }
1469
1470 Expr::Case(Case {
1476 expr: None,
1477 when_then_expr,
1478 mut else_expr,
1479 }) if when_then_expr
1480 .iter()
1481 .any(|(when, _)| is_true(when.as_ref()) || is_false(when.as_ref())) =>
1482 {
1483 let out_type = info.get_data_type(&when_then_expr[0].1)?;
1484 let mut new_when_then_expr = Vec::with_capacity(when_then_expr.len());
1485
1486 for (when, then) in when_then_expr.into_iter() {
1487 if is_true(when.as_ref()) {
1488 else_expr = Some(then);
1491 break;
1492 } else if !is_false(when.as_ref()) {
1493 new_when_then_expr.push((when, then));
1494 }
1495 }
1497
1498 if new_when_then_expr.is_empty() {
1500 if let Some(else_expr) = else_expr {
1502 return Ok(Transformed::yes(*else_expr));
1503 } else {
1505 let null =
1506 Expr::Literal(ScalarValue::try_new_null(&out_type)?, None);
1507 return Ok(Transformed::yes(null));
1508 }
1509 }
1510
1511 Transformed::yes(Expr::Case(Case {
1512 expr: None,
1513 when_then_expr: new_when_then_expr,
1514 else_expr,
1515 }))
1516 }
1517
1518 Expr::Case(Case {
1530 expr: None,
1531 when_then_expr,
1532 else_expr,
1533 }) if !when_then_expr.is_empty()
1534 && (when_then_expr.len() < 3 || (when_then_expr.iter().all(|(_, then)| is_bool_lit(then))
1538 && when_then_expr.iter().filter(|(_, then)| is_true(then)).count() < 3))
1539 && info.is_boolean_type(&when_then_expr[0].1)? =>
1540 {
1541 let mut filter_expr = lit(false);
1543 let mut out_expr = lit(false);
1545
1546 for (when, then) in when_then_expr {
1547 let when = is_exactly_true(*when, info)?;
1548 let case_expr =
1549 when.clone().and(filter_expr.clone().not()).and(*then);
1550
1551 out_expr = out_expr.or(case_expr);
1552 filter_expr = filter_expr.or(when);
1553 }
1554
1555 let else_expr = else_expr.map(|b| *b).unwrap_or_else(lit_bool_null);
1556 let case_expr = filter_expr.not().and(else_expr);
1557 out_expr = out_expr.or(case_expr);
1558
1559 out_expr.rewrite(self)?
1561 }
1562 Expr::Case(Case {
1583 expr: None,
1584 when_then_expr,
1585 else_expr,
1586 }) if !when_then_expr.is_empty()
1587 && when_then_expr
1588 .iter()
1589 .all(|(_, then)| is_bool_lit(then)) && when_then_expr
1592 .iter()
1593 .filter(|(_, then)| is_false(then))
1594 .count()
1595 < 3
1596 && else_expr.as_deref().is_none_or(is_bool_lit) =>
1597 {
1598 Transformed::yes(
1599 Expr::Case(Case {
1600 expr: None,
1601 when_then_expr: when_then_expr
1602 .into_iter()
1603 .map(|(when, then)| (when, Box::new(Expr::Not(then))))
1604 .collect(),
1605 else_expr: else_expr
1606 .map(|else_expr| Box::new(Expr::Not(else_expr))),
1607 })
1608 .not(),
1609 )
1610 }
1611 Expr::ScalarFunction(ScalarFunction { func: udf, args }) => {
1612 match udf.simplify(args, info)? {
1613 ExprSimplifyResult::Original(args) => {
1614 Transformed::no(Expr::ScalarFunction(ScalarFunction {
1615 func: udf,
1616 args,
1617 }))
1618 }
1619 ExprSimplifyResult::Simplified(expr) => Transformed::yes(expr),
1620 }
1621 }
1622
1623 Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction {
1624 ref func,
1625 ..
1626 }) => match (func.simplify(), expr) {
1627 (Some(simplify_function), Expr::AggregateFunction(af)) => {
1628 Transformed::yes(simplify_function(af, info)?)
1629 }
1630 (_, expr) => Transformed::no(expr),
1631 },
1632
1633 Expr::WindowFunction(ref window_fun) => match (window_fun.simplify(), expr) {
1634 (Some(simplify_function), Expr::WindowFunction(wf)) => {
1635 Transformed::yes(simplify_function(*wf, info)?)
1636 }
1637 (_, expr) => Transformed::no(expr),
1638 },
1639
1640 Expr::Between(between) => Transformed::yes(if between.negated {
1647 let l = *between.expr.clone();
1648 let r = *between.expr;
1649 or(l.lt(*between.low), r.gt(*between.high))
1650 } else {
1651 and(
1652 between.expr.clone().gt_eq(*between.low),
1653 between.expr.lt_eq(*between.high),
1654 )
1655 }),
1656
1657 Expr::BinaryExpr(BinaryExpr {
1661 left,
1662 op: op @ (RegexMatch | RegexNotMatch | RegexIMatch | RegexNotIMatch),
1663 right,
1664 }) => simplify_regex_expr(left, op, right)?,
1665
1666 Expr::Like(like) => {
1668 let escape_char = like.escape_char.unwrap_or('\\');
1670
1671 match StringScalar::try_from_expr(&like.pattern) {
1672 Some(string_scalar) => {
1673 let pattern_str = string_scalar.as_str();
1674 match pattern_str {
1675 None => return Ok(Transformed::yes(lit_bool_null())),
1676 Some("%") => {
1677 let result_for_non_null = lit(!like.negated);
1684 Transformed::yes(if !info.nullable(&like.expr)? {
1685 result_for_non_null
1686 } else {
1687 Expr::Case(Case {
1688 expr: Some(Box::new(Expr::IsNotNull(like.expr))),
1689 when_then_expr: vec![(
1690 Box::new(lit(true)),
1691 Box::new(result_for_non_null),
1692 )],
1693 else_expr: None,
1694 })
1695 })
1696 }
1697 Some(pattern_str)
1698 if pattern_str.contains("%%")
1699 && !pattern_str.contains(escape_char) =>
1700 {
1701 static LIKE_REGEX: LazyLock<Regex> =
1705 LazyLock::new(|| Regex::new("%%+").unwrap());
1706 let simplified_pattern =
1707 LIKE_REGEX.replace_all(pattern_str, "%").to_string();
1708 Transformed::yes(Expr::Like(Like {
1709 pattern: Box::new(
1710 string_scalar.to_expr(&simplified_pattern),
1711 ),
1712 ..like
1713 }))
1714 }
1715 Some(pattern_str)
1716 if !like.case_insensitive
1717 && !pattern_str
1718 .contains(['%', '_', escape_char].as_ref()) =>
1719 {
1720 Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1723 left: like.expr.clone(),
1724 op: if like.negated { NotEq } else { Eq },
1725 right: like.pattern.clone(),
1726 }))
1727 }
1728
1729 Some(_pattern_str) => Transformed::no(Expr::Like(like)),
1730 }
1731 }
1732 None => Transformed::no(Expr::Like(like)),
1733 }
1734 }
1735
1736 Expr::IsNotNull(expr) | Expr::IsNotUnknown(expr)
1738 if !info.nullable(&expr)? =>
1739 {
1740 Transformed::yes(lit(true))
1741 }
1742
1743 Expr::IsNull(expr) | Expr::IsUnknown(expr) if !info.nullable(&expr)? => {
1745 Transformed::yes(lit(false))
1746 }
1747
1748 Expr::InList(InList {
1751 expr: _,
1752 list,
1753 negated,
1754 }) if list.is_empty() => Transformed::yes(lit(negated)),
1755
1756 Expr::InList(InList {
1759 expr,
1760 list,
1761 negated: _,
1762 }) if is_null(expr.as_ref()) && !list.is_empty() => {
1763 Transformed::yes(lit_bool_null())
1764 }
1765
1766 Expr::InList(InList {
1768 expr,
1769 mut list,
1770 negated,
1771 }) if list.len() == 1
1772 && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
1773 {
1774 let Expr::ScalarSubquery(subquery) = list.remove(0) else {
1775 unreachable!()
1776 };
1777
1778 Transformed::yes(Expr::InSubquery(InSubquery::new(
1779 expr, subquery, negated,
1780 )))
1781 }
1782
1783 Expr::BinaryExpr(BinaryExpr {
1787 left,
1788 op: Or,
1789 right,
1790 }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
1791 let lhs = to_inlist(*left).unwrap();
1792 let rhs = to_inlist(*right).unwrap();
1793 #[allow(clippy::allow_attributes, clippy::mutable_key_type)]
1794 let mut seen: HashSet<Expr> = HashSet::new();
1796 let list = lhs
1797 .list
1798 .into_iter()
1799 .chain(rhs.list)
1800 .filter(|e| seen.insert(e.to_owned()))
1801 .collect::<Vec<_>>();
1802
1803 let merged_inlist = InList {
1804 expr: lhs.expr,
1805 list,
1806 negated: false,
1807 };
1808
1809 Transformed::yes(Expr::InList(merged_inlist))
1810 }
1811
1812 Expr::BinaryExpr(BinaryExpr {
1829 left,
1830 op: And,
1831 right,
1832 }) if are_inlist_and_eq_and_match_neg(
1833 left.as_ref(),
1834 right.as_ref(),
1835 false,
1836 false,
1837 ) =>
1838 {
1839 match (*left, *right) {
1840 (Expr::InList(l1), Expr::InList(l2)) => {
1841 return inlist_intersection(l1, &l2, false).map(Transformed::yes);
1842 }
1843 _ => unreachable!(),
1845 }
1846 }
1847
1848 Expr::BinaryExpr(BinaryExpr {
1849 left,
1850 op: And,
1851 right,
1852 }) if are_inlist_and_eq_and_match_neg(
1853 left.as_ref(),
1854 right.as_ref(),
1855 true,
1856 true,
1857 ) =>
1858 {
1859 match (*left, *right) {
1860 (Expr::InList(l1), Expr::InList(l2)) => {
1861 return inlist_union(l1, l2, true).map(Transformed::yes);
1862 }
1863 _ => unreachable!(),
1865 }
1866 }
1867
1868 Expr::BinaryExpr(BinaryExpr {
1869 left,
1870 op: And,
1871 right,
1872 }) if are_inlist_and_eq_and_match_neg(
1873 left.as_ref(),
1874 right.as_ref(),
1875 false,
1876 true,
1877 ) =>
1878 {
1879 match (*left, *right) {
1880 (Expr::InList(l1), Expr::InList(l2)) => {
1881 return inlist_except(l1, &l2).map(Transformed::yes);
1882 }
1883 _ => unreachable!(),
1885 }
1886 }
1887
1888 Expr::BinaryExpr(BinaryExpr {
1889 left,
1890 op: And,
1891 right,
1892 }) if are_inlist_and_eq_and_match_neg(
1893 left.as_ref(),
1894 right.as_ref(),
1895 true,
1896 false,
1897 ) =>
1898 {
1899 match (*left, *right) {
1900 (Expr::InList(l1), Expr::InList(l2)) => {
1901 return inlist_except(l2, &l1).map(Transformed::yes);
1902 }
1903 _ => unreachable!(),
1905 }
1906 }
1907
1908 Expr::BinaryExpr(BinaryExpr {
1909 left,
1910 op: Or,
1911 right,
1912 }) if are_inlist_and_eq_and_match_neg(
1913 left.as_ref(),
1914 right.as_ref(),
1915 true,
1916 true,
1917 ) =>
1918 {
1919 match (*left, *right) {
1920 (Expr::InList(l1), Expr::InList(l2)) => {
1921 return inlist_intersection(l1, &l2, true).map(Transformed::yes);
1922 }
1923 _ => unreachable!(),
1925 }
1926 }
1927
1928 Expr::BinaryExpr(BinaryExpr { left, op, right })
1935 if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1936 info, &left, op, &right,
1937 ) && op.supports_propagation() =>
1938 {
1939 unwrap_cast_in_comparison_for_binary(info, *left, *right, op)?
1940 }
1941 Expr::BinaryExpr(BinaryExpr { left, op, right })
1945 if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1946 info, &right, op, &left,
1947 ) && op.supports_propagation()
1948 && op.swap().is_some() =>
1949 {
1950 unwrap_cast_in_comparison_for_binary(
1951 info,
1952 *right,
1953 *left,
1954 op.swap().unwrap(),
1955 )?
1956 }
1957 Expr::InList(InList {
1960 expr: mut left,
1961 list,
1962 negated,
1963 }) if is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist(
1964 info, &left, &list,
1965 ) =>
1966 {
1967 let (Expr::TryCast(TryCast {
1968 expr: left_expr, ..
1969 })
1970 | Expr::Cast(Cast {
1971 expr: left_expr, ..
1972 })) = left.as_mut()
1973 else {
1974 return internal_err!("Expect cast expr, but got {:?}", left)?;
1975 };
1976
1977 let expr_type = info.get_data_type(left_expr)?;
1978 let right_exprs = list
1979 .into_iter()
1980 .map(|right| {
1981 match right {
1982 Expr::Literal(right_lit_value, _) => {
1983 let Some(value) = try_cast_literal_to_type(&right_lit_value, &expr_type) else {
1986 internal_err!(
1987 "Can't cast the list expr {:?} to type {}",
1988 right_lit_value, &expr_type
1989 )?
1990 };
1991 Ok(lit(value))
1992 }
1993 other_expr => internal_err!(
1994 "Only support literal expr to optimize, but the expr is {:?}",
1995 &other_expr
1996 ),
1997 }
1998 })
1999 .collect::<Result<Vec<_>>>()?;
2000
2001 Transformed::yes(Expr::InList(InList {
2002 expr: std::mem::take(left_expr),
2003 list: right_exprs,
2004 negated,
2005 }))
2006 }
2007
2008 Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2017 use datafusion_expr::Operator::*;
2018 let is_preimage_op = matches!(
2019 op,
2020 Eq | NotEq
2021 | Lt
2022 | LtEq
2023 | Gt
2024 | GtEq
2025 | IsDistinctFrom
2026 | IsNotDistinctFrom
2027 );
2028 if !is_preimage_op || is_null(&right) {
2029 return Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
2030 left,
2031 op,
2032 right,
2033 })));
2034 }
2035
2036 if let PreimageResult::Range { interval, expr } =
2037 get_preimage(left.as_ref(), right.as_ref(), info)?
2038 {
2039 rewrite_with_preimage(*interval, op, expr)?
2040 } else if let Some(swapped) = op.swap() {
2041 if let PreimageResult::Range { interval, expr } =
2042 get_preimage(right.as_ref(), left.as_ref(), info)?
2043 {
2044 rewrite_with_preimage(*interval, swapped, expr)?
2045 } else {
2046 Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, right }))
2047 }
2048 } else {
2049 Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, right }))
2050 }
2051 }
2052 Expr::InList(InList {
2055 expr,
2056 list,
2057 negated,
2058 }) => {
2059 if list.len() > THRESHOLD_INLINE_INLIST || list.iter().any(is_null) {
2060 return Ok(Transformed::no(Expr::InList(InList {
2061 expr,
2062 list,
2063 negated,
2064 })));
2065 }
2066
2067 let (op, combiner): (Operator, fn(Expr, Expr) -> Expr) =
2068 if negated { (NotEq, and) } else { (Eq, or) };
2069
2070 let mut rewritten: Option<Expr> = None;
2071 for item in &list {
2072 let PreimageResult::Range { interval, expr } =
2073 get_preimage(expr.as_ref(), item, info)?
2074 else {
2075 return Ok(Transformed::no(Expr::InList(InList {
2076 expr,
2077 list,
2078 negated,
2079 })));
2080 };
2081
2082 let range_expr = rewrite_with_preimage(*interval, op, expr)?.data;
2083 rewritten = Some(match rewritten {
2084 None => range_expr,
2085 Some(acc) => combiner(acc, range_expr),
2086 });
2087 }
2088
2089 if let Some(rewritten) = rewritten {
2090 Transformed::yes(rewritten)
2091 } else {
2092 Transformed::no(Expr::InList(InList {
2093 expr,
2094 list,
2095 negated,
2096 }))
2097 }
2098 }
2099
2100 expr => Transformed::no(expr),
2102 })
2103 }
2104}
2105
2106fn get_preimage(
2107 left_expr: &Expr,
2108 right_expr: &Expr,
2109 info: &SimplifyContext,
2110) -> Result<PreimageResult> {
2111 let Expr::ScalarFunction(ScalarFunction { func, args }) = left_expr else {
2112 return Ok(PreimageResult::None);
2113 };
2114 if !is_literal_or_literal_cast(right_expr) {
2115 return Ok(PreimageResult::None);
2116 }
2117 if func.signature().volatility != Volatility::Immutable {
2118 return Ok(PreimageResult::None);
2119 }
2120 func.preimage(args, right_expr, info)
2121}
2122
2123fn is_literal_or_literal_cast(expr: &Expr) -> bool {
2124 match expr {
2125 Expr::Literal(_, _) => true,
2126 Expr::Cast(Cast { expr, .. }) => matches!(expr.as_ref(), Expr::Literal(_, _)),
2127 Expr::TryCast(TryCast { expr, .. }) => {
2128 matches!(expr.as_ref(), Expr::Literal(_, _))
2129 }
2130 _ => false,
2131 }
2132}
2133
2134pub(crate) enum StringScalar<'a> {
2136 Utf8(&'a ScalarValue),
2137 LargeUtf8(&'a ScalarValue),
2138 Utf8View(&'a ScalarValue),
2139}
2140
2141impl<'a> StringScalar<'a> {
2142 pub(crate) fn try_from_expr(expr: &'a Expr) -> Option<Self> {
2145 match expr {
2146 Expr::Literal(scalar, _) => Self::try_from_scalar(scalar),
2147 _ => None,
2148 }
2149 }
2150
2151 fn try_from_scalar(scalar: &'a ScalarValue) -> Option<Self> {
2154 match scalar {
2155 ScalarValue::Utf8(_) => Some(Self::Utf8(scalar)),
2156 ScalarValue::LargeUtf8(_) => Some(Self::LargeUtf8(scalar)),
2157 ScalarValue::Utf8View(_) => Some(Self::Utf8View(scalar)),
2158 _ => None,
2159 }
2160 }
2161
2162 pub(crate) fn as_str(&self) -> Option<&'a str> {
2164 match self {
2165 Self::Utf8(scalar) | Self::LargeUtf8(scalar) | Self::Utf8View(scalar) => {
2166 scalar.try_as_str().flatten()
2167 }
2168 }
2169 }
2170
2171 pub(crate) fn to_expr(&self, val: &str) -> Expr {
2173 match self {
2174 Self::Utf8(_) => Expr::Literal(ScalarValue::Utf8(Some(val.to_owned())), None),
2175 Self::LargeUtf8(_) => {
2176 Expr::Literal(ScalarValue::LargeUtf8(Some(val.to_owned())), None)
2177 }
2178 Self::Utf8View(_) => {
2179 Expr::Literal(ScalarValue::Utf8View(Some(val.to_owned())), None)
2180 }
2181 }
2182 }
2183}
2184
2185#[allow(clippy::allow_attributes, clippy::mutable_key_type)] fn has_common_conjunction(lhs: &Expr, rhs: &Expr) -> bool {
2187 let lhs_set: HashSet<&Expr> = iter_conjunction(lhs).collect();
2188 iter_conjunction(rhs).any(|e| lhs_set.contains(&e) && !e.is_volatile())
2189}
2190
2191fn are_inlist_and_eq_and_match_neg(
2193 left: &Expr,
2194 right: &Expr,
2195 is_left_neg: bool,
2196 is_right_neg: bool,
2197) -> bool {
2198 match (left, right) {
2199 (Expr::InList(l), Expr::InList(r)) => {
2200 l.expr == r.expr && l.negated == is_left_neg && r.negated == is_right_neg
2201 }
2202 _ => false,
2203 }
2204}
2205
2206fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
2208 let left = as_inlist(left);
2209 let right = as_inlist(right);
2210 if let (Some(lhs), Some(rhs)) = (left, right) {
2211 matches!(lhs.expr.as_ref(), Expr::Column(_))
2212 && matches!(rhs.expr.as_ref(), Expr::Column(_))
2213 && lhs.expr == rhs.expr
2214 && !lhs.negated
2215 && !rhs.negated
2216 } else {
2217 false
2218 }
2219}
2220
2221fn as_inlist(expr: &'_ Expr) -> Option<Cow<'_, InList>> {
2223 match expr {
2224 Expr::InList(inlist) => Some(Cow::Borrowed(inlist)),
2225 Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::Eq => {
2226 match (left.as_ref(), right.as_ref()) {
2227 (Expr::Column(_), Expr::Literal(_, _)) => Some(Cow::Owned(InList {
2228 expr: left.clone(),
2229 list: vec![*right.clone()],
2230 negated: false,
2231 })),
2232 (Expr::Literal(_, _), Expr::Column(_)) => Some(Cow::Owned(InList {
2233 expr: right.clone(),
2234 list: vec![*left.clone()],
2235 negated: false,
2236 })),
2237 _ => None,
2238 }
2239 }
2240 _ => None,
2241 }
2242}
2243
2244fn to_inlist(expr: Expr) -> Option<InList> {
2245 match expr {
2246 Expr::InList(inlist) => Some(inlist),
2247 Expr::BinaryExpr(BinaryExpr {
2248 left,
2249 op: Operator::Eq,
2250 right,
2251 }) => match (left.as_ref(), right.as_ref()) {
2252 (Expr::Column(_), Expr::Literal(_, _)) => Some(InList {
2253 expr: left,
2254 list: vec![*right],
2255 negated: false,
2256 }),
2257 (Expr::Literal(_, _), Expr::Column(_)) => Some(InList {
2258 expr: right,
2259 list: vec![*left],
2260 negated: false,
2261 }),
2262 _ => None,
2263 },
2264 _ => None,
2265 }
2266}
2267
2268#[allow(clippy::allow_attributes, clippy::mutable_key_type)] fn inlist_union(mut l1: InList, l2: InList, negated: bool) -> Result<Expr> {
2272 let l1_items: HashSet<_> = l1.list.iter().collect();
2274
2275 let keep_l2: Vec<_> = l2
2277 .list
2278 .into_iter()
2279 .filter_map(|e| if l1_items.contains(&e) { None } else { Some(e) })
2280 .collect();
2281
2282 l1.list.extend(keep_l2);
2283 l1.negated = negated;
2284 Ok(Expr::InList(l1))
2285}
2286
2287#[allow(clippy::allow_attributes, clippy::mutable_key_type)] fn inlist_intersection(mut l1: InList, l2: &InList, negated: bool) -> Result<Expr> {
2291 let l2_items = l2.list.iter().collect::<HashSet<_>>();
2292
2293 l1.list.retain(|e| l2_items.contains(e));
2295
2296 if l1.list.is_empty() {
2299 return Ok(lit(negated));
2300 }
2301 Ok(Expr::InList(l1))
2302}
2303
2304#[allow(clippy::allow_attributes, clippy::mutable_key_type)] fn inlist_except(mut l1: InList, l2: &InList) -> Result<Expr> {
2308 let l2_items = l2.list.iter().collect::<HashSet<_>>();
2309
2310 l1.list.retain(|e| !l2_items.contains(e));
2312
2313 if l1.list.is_empty() {
2314 return Ok(lit(false));
2315 }
2316 Ok(Expr::InList(l1))
2317}
2318
2319fn is_exactly_true(expr: Expr, info: &SimplifyContext) -> Result<Expr> {
2321 if !info.nullable(&expr)? {
2322 Ok(expr)
2323 } else {
2324 Ok(Expr::BinaryExpr(BinaryExpr {
2325 left: Box::new(expr),
2326 op: Operator::IsNotDistinctFrom,
2327 right: Box::new(lit(true)),
2328 }))
2329 }
2330}
2331
2332fn simplify_right_is_one_case(
2337 info: &SimplifyContext,
2338 left: Box<Expr>,
2339 op: &Operator,
2340 right: &Expr,
2341) -> Result<Transformed<Expr>> {
2342 let left_type = info.get_data_type(&left)?;
2344 let right_type = info.get_data_type(right)?;
2345 match BinaryTypeCoercer::new(&left_type, op, &right_type).get_result_type() {
2346 Ok(result_type) => {
2347 if left_type != result_type {
2349 Ok(Transformed::yes(Expr::Cast(Cast::new(left, result_type))))
2350 } else {
2351 Ok(Transformed::yes(*left))
2352 }
2353 }
2354 Err(_) => Ok(Transformed::yes(*left)),
2355 }
2356}
2357
2358#[cfg(test)]
2359mod tests {
2360 use super::*;
2361 use crate::test::test_table_scan_with_name;
2362 use arrow::{
2363 array::{Int32Array, StructArray},
2364 datatypes::{FieldRef, Fields},
2365 };
2366 use datafusion_common::{DFSchemaRef, ToDFSchema, assert_contains};
2367 use datafusion_expr::{
2368 expr::WindowFunction,
2369 function::{
2370 AccumulatorArgs, AggregateFunctionSimplification,
2371 WindowFunctionSimplification,
2372 },
2373 interval_arithmetic::Interval,
2374 *,
2375 };
2376 use datafusion_functions_window_common::field::WindowUDFFieldArgs;
2377 use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
2378 use datafusion_physical_expr::PhysicalExpr;
2379 use std::hash::Hash;
2380 use std::sync::LazyLock;
2381 use std::{
2382 collections::HashMap,
2383 ops::{BitAnd, BitOr, BitXor},
2384 sync::Arc,
2385 };
2386
2387 #[test]
2391 fn api_basic() {
2392 let simplifier = ExprSimplifier::new(
2393 SimplifyContext::builder()
2394 .with_schema(test_schema())
2395 .build(),
2396 );
2397
2398 let expr = lit(1) + lit(2);
2399 let expected = lit(3);
2400 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2401 }
2402
2403 #[test]
2404 fn basic_coercion() {
2405 let schema = test_schema();
2406 let simplifier = ExprSimplifier::new(
2407 SimplifyContext::builder()
2408 .with_schema(Arc::clone(&schema))
2409 .build(),
2410 );
2411
2412 let expr = (lit(1i64) + lit(2i32)).lt(col("i"));
2415 let expected = lit(3i64).lt(col("i"));
2417
2418 let expr = simplifier.coerce(expr, &schema).unwrap();
2419
2420 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2421 }
2422
2423 fn test_schema() -> DFSchemaRef {
2424 static TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
2425 Schema::new(vec![
2426 Field::new("i", DataType::Int64, false),
2427 Field::new("b", DataType::Boolean, true),
2428 ])
2429 .to_dfschema_ref()
2430 .unwrap()
2431 });
2432 Arc::clone(&TEST_SCHEMA)
2433 }
2434
2435 #[test]
2436 fn simplify_and_constant_prop() {
2437 let simplifier = ExprSimplifier::new(
2438 SimplifyContext::builder()
2439 .with_schema(test_schema())
2440 .build(),
2441 );
2442
2443 let expr = (col("i") * (lit(1) - lit(1))).gt(lit(0));
2446 let expected = lit(false);
2447 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2448 }
2449
2450 #[test]
2451 fn simplify_and_constant_prop_with_case() {
2452 let simplifier = ExprSimplifier::new(
2453 SimplifyContext::builder()
2454 .with_schema(test_schema())
2455 .build(),
2456 );
2457
2458 let expr = when(col("i").gt(lit(5)).and(lit(false)), col("i").gt(lit(5)))
2466 .when(col("i").lt(lit(5)).and(lit(true)), col("i").lt(lit(5)))
2467 .otherwise(lit(false))
2468 .unwrap();
2469 let expected = col("i").lt(lit(5));
2470 assert_eq!(expected, simplifier.simplify(expr).unwrap());
2471 }
2472
2473 #[test]
2478 fn test_simplify_canonicalize() {
2479 {
2480 let expr = lit(1).lt(col("c2")).and(col("c2").gt(lit(1)));
2481 let expected = col("c2").gt(lit(1));
2482 assert_eq!(simplify(expr), expected);
2483 }
2484 {
2485 let expr = col("c1").lt(col("c2")).and(col("c2").gt(col("c1")));
2486 let expected = col("c2").gt(col("c1"));
2487 assert_eq!(simplify(expr), expected);
2488 }
2489 {
2490 let expr = col("c1")
2491 .eq(lit(1))
2492 .and(lit(1).eq(col("c1")))
2493 .and(col("c1").eq(lit(3)));
2494 let expected = col("c1").eq(lit(1)).and(col("c1").eq(lit(3)));
2495 assert_eq!(simplify(expr), expected);
2496 }
2497 {
2498 let expr = col("c1")
2499 .eq(col("c2"))
2500 .and(col("c1").gt(lit(5)))
2501 .and(col("c2").eq(col("c1")));
2502 let expected = col("c2").eq(col("c1")).and(col("c1").gt(lit(5)));
2503 assert_eq!(simplify(expr), expected);
2504 }
2505 {
2506 let expr = col("c1")
2507 .eq(lit(1))
2508 .and(col("c2").gt(lit(3)).or(lit(3).lt(col("c2"))));
2509 let expected = col("c1").eq(lit(1)).and(col("c2").gt(lit(3)));
2510 assert_eq!(simplify(expr), expected);
2511 }
2512 {
2513 let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2514 let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2515 assert_eq!(simplify(expr), expected);
2516 }
2517 {
2518 let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2519 let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2520 assert_eq!(simplify(expr), expected);
2521 }
2522 {
2523 let expr = col("c1").gt(col("c2")).and(col("c1").gt(col("c2")));
2524 let expected = col("c2").lt(col("c1"));
2525 assert_eq!(simplify(expr), expected);
2526 }
2527 }
2528
2529 #[test]
2530 fn test_simplify_eq_not_self() {
2531 let expr_a = col("c2").eq(col("c2"));
2534 let expected_a = col("c2").is_not_null().or(lit_bool_null());
2535
2536 let expr_b = col("c2_non_null").eq(col("c2_non_null"));
2538 let expected_b = lit(true);
2539
2540 assert_eq!(simplify(expr_a), expected_a);
2541 assert_eq!(simplify(expr_b), expected_b);
2542 }
2543
2544 #[test]
2545 fn test_simplify_or_true() {
2546 let expr_a = col("c2").or(lit(true));
2547 let expr_b = lit(true).or(col("c2"));
2548 let expected = lit(true);
2549
2550 assert_eq!(simplify(expr_a), expected);
2551 assert_eq!(simplify(expr_b), expected);
2552 }
2553
2554 #[test]
2555 fn test_simplify_or_false() {
2556 let expr_a = lit(false).or(col("c2"));
2557 let expr_b = col("c2").or(lit(false));
2558 let expected = col("c2");
2559
2560 assert_eq!(simplify(expr_a), expected);
2561 assert_eq!(simplify(expr_b), expected);
2562 }
2563
2564 #[test]
2565 fn test_simplify_or_same() {
2566 let expr = col("c2").or(col("c2"));
2567 let expected = col("c2");
2568
2569 assert_eq!(simplify(expr), expected);
2570 }
2571
2572 #[test]
2573 fn test_simplify_or_not_self() {
2574 let expr_a = col("c2_non_null").or(col("c2_non_null").not());
2577 let expr_b = col("c2_non_null").not().or(col("c2_non_null"));
2578 let expected = lit(true);
2579
2580 assert_eq!(simplify(expr_a), expected);
2581 assert_eq!(simplify(expr_b), expected);
2582 }
2583
2584 #[test]
2585 fn test_simplify_and_false() {
2586 let expr_a = lit(false).and(col("c2"));
2587 let expr_b = col("c2").and(lit(false));
2588 let expected = lit(false);
2589
2590 assert_eq!(simplify(expr_a), expected);
2591 assert_eq!(simplify(expr_b), expected);
2592 }
2593
2594 #[test]
2595 fn test_simplify_and_same() {
2596 let expr = col("c2").and(col("c2"));
2597 let expected = col("c2");
2598
2599 assert_eq!(simplify(expr), expected);
2600 }
2601
2602 #[test]
2603 fn test_simplify_and_true() {
2604 let expr_a = lit(true).and(col("c2"));
2605 let expr_b = col("c2").and(lit(true));
2606 let expected = col("c2");
2607
2608 assert_eq!(simplify(expr_a), expected);
2609 assert_eq!(simplify(expr_b), expected);
2610 }
2611
2612 #[test]
2613 fn test_simplify_and_not_self() {
2614 let expr_a = col("c2_non_null").and(col("c2_non_null").not());
2617 let expr_b = col("c2_non_null").not().and(col("c2_non_null"));
2618 let expected = lit(false);
2619
2620 assert_eq!(simplify(expr_a), expected);
2621 assert_eq!(simplify(expr_b), expected);
2622 }
2623
2624 #[test]
2625 fn test_simplify_eq_and_neq_with_different_literals() {
2626 let expr = col("c2").eq(lit(1)).and(col("c2").not_eq(lit(0)));
2628 let expected = col("c2").eq(lit(1));
2629 assert_eq!(simplify(expr), expected);
2630
2631 let expr = col("c2").not_eq(lit(0)).and(col("c2").eq(lit(1)));
2633 let expected = col("c2").eq(lit(1));
2634 assert_eq!(simplify(expr), expected);
2635
2636 let expr = col("c2").eq(lit(1)).and(col("c2").not_eq(lit(1)));
2639 let result = simplify(expr.clone());
2641 assert_eq!(result, expr);
2643 }
2644
2645 #[test]
2646 fn test_simplify_multiply_by_one() {
2647 let expr_a = col("c2") * lit(1);
2648 let expr_b = lit(1) * col("c2");
2649 let expected = col("c2");
2650
2651 assert_eq!(simplify(expr_a), expected);
2652 assert_eq!(simplify(expr_b), expected);
2653
2654 let expr = col("c2") * lit(ScalarValue::Decimal128(Some(10000000000), 38, 10));
2655 assert_eq!(simplify(expr), expected);
2656
2657 let expr = lit(ScalarValue::Decimal128(Some(10000000000), 31, 10)) * col("c2");
2658 assert_eq!(simplify(expr), expected);
2659 }
2660
2661 #[test]
2662 fn test_simplify_multiply_by_null() {
2663 let null = lit(ScalarValue::Int64(None));
2664 {
2666 let expr = col("c3") * null.clone();
2667 assert_eq!(simplify(expr), null);
2668 }
2669 {
2671 let expr = null.clone() * col("c3");
2672 assert_eq!(simplify(expr), null);
2673 }
2674 }
2675
2676 #[test]
2677 fn test_simplify_multiply_by_zero() {
2678 {
2680 let expr_a = col("c2") * lit(0);
2681 let expr_b = lit(0) * col("c2");
2682
2683 assert_eq!(simplify(expr_a.clone()), expr_a);
2684 assert_eq!(simplify(expr_b.clone()), expr_b);
2685 }
2686 {
2688 let expr = lit(0) * col("c2_non_null");
2689 assert_eq!(simplify(expr), lit(0));
2690 }
2691 {
2693 let expr = col("c2_non_null") * lit(0);
2694 assert_eq!(simplify(expr), lit(0));
2695 }
2696 {
2698 let expr = col("c2_non_null") * lit(ScalarValue::Decimal128(Some(0), 31, 10));
2699 assert_eq!(
2700 simplify(expr),
2701 lit(ScalarValue::Decimal128(Some(0), 31, 10))
2702 );
2703 let expr = binary_expr(
2704 lit(ScalarValue::Decimal128(Some(0), 31, 10)),
2705 Operator::Multiply,
2706 col("c2_non_null"),
2707 );
2708 assert_eq!(
2709 simplify(expr),
2710 lit(ScalarValue::Decimal128(Some(0), 31, 10))
2711 );
2712 }
2713 }
2714
2715 #[test]
2716 fn test_simplify_divide_by_one() {
2717 let expr = binary_expr(col("c2"), Operator::Divide, lit(1));
2718 let expected = col("c2");
2719 assert_eq!(simplify(expr), expected);
2720 let expr = col("c2") / lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2721 assert_eq!(simplify(expr), expected);
2722 }
2723
2724 #[test]
2725 fn test_simplify_divide_null() {
2726 let null = lit(ScalarValue::Int64(None));
2728 {
2729 let expr = col("c3") / null.clone();
2730 assert_eq!(simplify(expr), null);
2731 }
2732 {
2734 let expr = null.clone() / col("c3");
2735 assert_eq!(simplify(expr), null);
2736 }
2737 }
2738
2739 #[test]
2740 fn test_simplify_divide_by_same() {
2741 let expr = col("c2") / col("c2");
2742 let expected = expr.clone();
2744
2745 assert_eq!(simplify(expr), expected);
2746 }
2747
2748 #[test]
2749 fn test_simplify_modulo_by_null() {
2750 let null = lit(ScalarValue::Int64(None));
2751 {
2753 let expr = col("c3") % null.clone();
2754 assert_eq!(simplify(expr), null);
2755 }
2756 {
2758 let expr = null.clone() % col("c3");
2759 assert_eq!(simplify(expr), null);
2760 }
2761 }
2762
2763 #[test]
2764 fn test_simplify_modulo_by_one() {
2765 let expr = col("c2") % lit(1);
2766 let expected = expr.clone();
2768
2769 assert_eq!(simplify(expr), expected);
2770 }
2771
2772 #[test]
2773 fn test_simplify_divide_zero_by_zero() {
2774 let expr = lit(0) / lit(0);
2777 let expected = expr.clone();
2778
2779 assert_eq!(simplify(expr), expected);
2780 }
2781
2782 #[test]
2783 fn test_simplify_divide_by_zero() {
2784 let expr = col("c2_non_null") / lit(0);
2787 let expected = expr.clone();
2788
2789 assert_eq!(simplify(expr), expected);
2790 }
2791
2792 #[test]
2793 fn test_simplify_modulo_by_one_non_null() {
2794 let expr = col("c3_non_null") % lit(1);
2795 let expected = lit(0_i64);
2796 assert_eq!(simplify(expr), expected);
2797 let expr =
2798 col("c3_non_null") % lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2799 assert_eq!(simplify(expr), expected);
2800 }
2801
2802 #[test]
2803 fn test_simplify_bitwise_xor_by_null() {
2804 let null = lit(ScalarValue::Int64(None));
2805 {
2807 let expr = col("c3") ^ null.clone();
2808 assert_eq!(simplify(expr), null);
2809 }
2810 {
2812 let expr = null.clone() ^ col("c3");
2813 assert_eq!(simplify(expr), null);
2814 }
2815 }
2816
2817 #[test]
2818 fn test_simplify_bitwise_shift_right_by_null() {
2819 let null = lit(ScalarValue::Int64(None));
2820 {
2822 let expr = col("c3") >> null.clone();
2823 assert_eq!(simplify(expr), null);
2824 }
2825 {
2827 let expr = null.clone() >> col("c3");
2828 assert_eq!(simplify(expr), null);
2829 }
2830 }
2831
2832 #[test]
2833 fn test_simplify_bitwise_shift_left_by_null() {
2834 let null = lit(ScalarValue::Int64(None));
2835 {
2837 let expr = col("c3") << null.clone();
2838 assert_eq!(simplify(expr), null);
2839 }
2840 {
2842 let expr = null.clone() << col("c3");
2843 assert_eq!(simplify(expr), null);
2844 }
2845 }
2846
2847 #[test]
2848 fn test_simplify_bitwise_and_by_zero() {
2849 {
2851 let expr = col("c2_non_null") & lit(0);
2852 assert_eq!(simplify(expr), lit(0));
2853 }
2854 {
2856 let expr = lit(0) & col("c2_non_null");
2857 assert_eq!(simplify(expr), lit(0));
2858 }
2859 }
2860
2861 #[test]
2862 fn test_simplify_bitwise_or_by_zero() {
2863 {
2865 let expr = col("c2_non_null") | lit(0);
2866 assert_eq!(simplify(expr), col("c2_non_null"));
2867 }
2868 {
2870 let expr = lit(0) | col("c2_non_null");
2871 assert_eq!(simplify(expr), col("c2_non_null"));
2872 }
2873 }
2874
2875 #[test]
2876 fn test_simplify_bitwise_xor_by_zero() {
2877 {
2879 let expr = col("c2_non_null") ^ lit(0);
2880 assert_eq!(simplify(expr), col("c2_non_null"));
2881 }
2882 {
2884 let expr = lit(0) ^ col("c2_non_null");
2885 assert_eq!(simplify(expr), col("c2_non_null"));
2886 }
2887 }
2888
2889 #[test]
2890 fn test_simplify_bitwise_bitwise_shift_right_by_zero() {
2891 {
2893 let expr = col("c2_non_null") >> lit(0);
2894 assert_eq!(simplify(expr), col("c2_non_null"));
2895 }
2896 }
2897
2898 #[test]
2899 fn test_simplify_bitwise_bitwise_shift_left_by_zero() {
2900 {
2902 let expr = col("c2_non_null") << lit(0);
2903 assert_eq!(simplify(expr), col("c2_non_null"));
2904 }
2905 }
2906
2907 #[test]
2908 fn test_simplify_bitwise_and_by_null() {
2909 let null = Expr::Literal(ScalarValue::Int64(None), None);
2910 {
2912 let expr = col("c3") & null.clone();
2913 assert_eq!(simplify(expr), null);
2914 }
2915 {
2917 let expr = null.clone() & col("c3");
2918 assert_eq!(simplify(expr), null);
2919 }
2920 }
2921
2922 #[test]
2923 fn test_simplify_composed_bitwise_and() {
2924 let expr = bitwise_and(
2927 bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2928 col("c2").gt(lit(5)),
2929 );
2930 let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2931
2932 assert_eq!(simplify(expr), expected);
2933
2934 let expr = bitwise_and(
2937 col("c2").gt(lit(5)),
2938 bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2939 );
2940 let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2941 assert_eq!(simplify(expr), expected);
2942 }
2943
2944 #[test]
2945 fn test_simplify_composed_bitwise_or() {
2946 let expr = bitwise_or(
2949 bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2950 col("c2").gt(lit(5)),
2951 );
2952 let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2953
2954 assert_eq!(simplify(expr), expected);
2955
2956 let expr = bitwise_or(
2959 col("c2").gt(lit(5)),
2960 bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2961 );
2962 let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2963
2964 assert_eq!(simplify(expr), expected);
2965 }
2966
2967 #[test]
2968 fn test_simplify_composed_bitwise_xor() {
2969 let expr = bitwise_xor(
2973 col("c2"),
2974 bitwise_xor(
2975 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2976 bitwise_and(col("c1"), col("c2")),
2977 ),
2978 );
2979
2980 let expected = bitwise_xor(
2981 bitwise_or(col("c2"), col("c1")),
2982 bitwise_and(col("c1"), col("c2")),
2983 );
2984
2985 assert_eq!(simplify(expr), expected);
2986
2987 let expr = bitwise_xor(
2991 col("c2"),
2992 bitwise_xor(
2993 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2994 bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2995 ),
2996 );
2997
2998 let expected = bitwise_xor(
2999 col("c2"),
3000 bitwise_xor(
3001 bitwise_or(col("c2"), col("c1")),
3002 bitwise_and(col("c1"), col("c2")),
3003 ),
3004 );
3005
3006 assert_eq!(simplify(expr), expected);
3007
3008 let expr = bitwise_xor(
3012 bitwise_xor(
3013 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
3014 bitwise_and(col("c1"), col("c2")),
3015 ),
3016 col("c2"),
3017 );
3018
3019 let expected = bitwise_xor(
3020 bitwise_or(col("c2"), col("c1")),
3021 bitwise_and(col("c1"), col("c2")),
3022 );
3023
3024 assert_eq!(simplify(expr), expected);
3025
3026 let expr = bitwise_xor(
3030 bitwise_xor(
3031 bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
3032 bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
3033 ),
3034 col("c2"),
3035 );
3036
3037 let expected = bitwise_xor(
3038 bitwise_xor(
3039 bitwise_or(col("c2"), col("c1")),
3040 bitwise_and(col("c1"), col("c2")),
3041 ),
3042 col("c2"),
3043 );
3044
3045 assert_eq!(simplify(expr), expected);
3046 }
3047
3048 #[test]
3049 fn test_simplify_negated_bitwise_and() {
3050 let expr = (-col("c4_non_null")) & col("c4_non_null");
3052 let expected = lit(0u32);
3053
3054 assert_eq!(simplify(expr), expected);
3055 let expr = col("c4_non_null") & (-col("c4_non_null"));
3057 let expected = lit(0u32);
3058
3059 assert_eq!(simplify(expr), expected);
3060
3061 let expr = (-col("c3_non_null")) & col("c3_non_null");
3063 let expected = lit(0i64);
3064
3065 assert_eq!(simplify(expr), expected);
3066 let expr = col("c3_non_null") & (-col("c3_non_null"));
3068 let expected = lit(0i64);
3069
3070 assert_eq!(simplify(expr), expected);
3071 }
3072
3073 #[test]
3074 fn test_simplify_negated_bitwise_or() {
3075 let expr = (-col("c4_non_null")) | col("c4_non_null");
3077 let expected = lit(-1i32);
3078
3079 assert_eq!(simplify(expr), expected);
3080
3081 let expr = col("c4_non_null") | (-col("c4_non_null"));
3083 let expected = lit(-1i32);
3084
3085 assert_eq!(simplify(expr), expected);
3086
3087 let expr = (-col("c3_non_null")) | col("c3_non_null");
3089 let expected = lit(-1i64);
3090
3091 assert_eq!(simplify(expr), expected);
3092
3093 let expr = col("c3_non_null") | (-col("c3_non_null"));
3095 let expected = lit(-1i64);
3096
3097 assert_eq!(simplify(expr), expected);
3098 }
3099
3100 #[test]
3101 fn test_simplify_negated_bitwise_xor() {
3102 let expr = (-col("c4_non_null")) ^ col("c4_non_null");
3104 let expected = lit(-1i32);
3105
3106 assert_eq!(simplify(expr), expected);
3107
3108 let expr = col("c4_non_null") ^ (-col("c4_non_null"));
3110 let expected = lit(-1i32);
3111
3112 assert_eq!(simplify(expr), expected);
3113
3114 let expr = (-col("c3_non_null")) ^ col("c3_non_null");
3116 let expected = lit(-1i64);
3117
3118 assert_eq!(simplify(expr), expected);
3119
3120 let expr = col("c3_non_null") ^ (-col("c3_non_null"));
3122 let expected = lit(-1i64);
3123
3124 assert_eq!(simplify(expr), expected);
3125 }
3126
3127 #[test]
3128 fn test_simplify_bitwise_and_or() {
3129 let expr = bitwise_and(
3131 col("c2_non_null").lt(lit(3)),
3132 bitwise_or(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
3133 );
3134 let expected = col("c2_non_null").lt(lit(3));
3135
3136 assert_eq!(simplify(expr), expected);
3137 }
3138
3139 #[test]
3140 fn test_simplify_bitwise_or_and() {
3141 let expr = bitwise_or(
3143 col("c2_non_null").lt(lit(3)),
3144 bitwise_and(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
3145 );
3146 let expected = col("c2_non_null").lt(lit(3));
3147
3148 assert_eq!(simplify(expr), expected);
3149 }
3150
3151 #[test]
3152 fn test_simplify_simple_bitwise_and() {
3153 let expr = (col("c2").gt(lit(5))).bitand(col("c2").gt(lit(5)));
3155 let expected = col("c2").gt(lit(5));
3156
3157 assert_eq!(simplify(expr), expected);
3158 }
3159
3160 #[test]
3161 fn test_simplify_simple_bitwise_or() {
3162 let expr = (col("c2").gt(lit(5))).bitor(col("c2").gt(lit(5)));
3164 let expected = col("c2").gt(lit(5));
3165
3166 assert_eq!(simplify(expr), expected);
3167 }
3168
3169 #[test]
3170 fn test_simplify_simple_bitwise_xor() {
3171 let expr = (col("c4")).bitxor(col("c4"));
3173 let expected = lit(0u32);
3174
3175 assert_eq!(simplify(expr), expected);
3176
3177 let expr = col("c3").bitxor(col("c3"));
3179 let expected = lit(0i64);
3180
3181 assert_eq!(simplify(expr), expected);
3182 }
3183
3184 #[test]
3185 fn test_simplify_modulo_by_zero_non_null() {
3186 let expr = col("c2_non_null") % lit(0);
3189 let expected = expr.clone();
3190
3191 assert_eq!(simplify(expr), expected);
3192 }
3193
3194 #[test]
3195 fn test_simplify_simple_and() {
3196 let expr = (col("c2").gt(lit(5))).and(col("c2").gt(lit(5)));
3198 let expected = col("c2").gt(lit(5));
3199
3200 assert_eq!(simplify(expr), expected);
3201 }
3202
3203 #[test]
3204 fn test_simplify_composed_and() {
3205 let expr = and(
3207 and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
3208 col("c2").gt(lit(5)),
3209 );
3210 let expected = and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
3211
3212 assert_eq!(simplify(expr), expected);
3213 }
3214
3215 #[test]
3216 fn test_simplify_negated_and() {
3217 let expr = and(col("c2").gt(lit(5)), Expr::not(col("c2").gt(lit(5))));
3219 let expected = col("c2").gt(lit(5)).and(col("c2").lt_eq(lit(5)));
3220
3221 assert_eq!(simplify(expr), expected);
3222 }
3223
3224 #[test]
3225 fn test_simplify_or_and() {
3226 let l = col("c2").gt(lit(5));
3227 let r = and(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3228
3229 let expr = or(l.clone(), r.clone());
3231
3232 let expected = l.clone();
3233 assert_eq!(simplify(expr), expected);
3234
3235 let expr = or(r, l);
3237 assert_eq!(simplify(expr), expected);
3238 }
3239
3240 #[test]
3241 fn test_simplify_or_and_non_null() {
3242 let l = col("c2_non_null").gt(lit(5));
3243 let r = and(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3244
3245 let expr = or(l.clone(), r.clone());
3247
3248 let expected = col("c2_non_null").gt(lit(5));
3250
3251 assert_eq!(simplify(expr), expected);
3252
3253 let expr = or(l, r);
3255
3256 assert_eq!(simplify(expr), expected);
3257 }
3258
3259 #[test]
3260 fn test_simplify_and_or() {
3261 let l = col("c2").gt(lit(5));
3262 let r = or(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3263
3264 let expr = and(l.clone(), r.clone());
3266
3267 let expected = l.clone();
3268 assert_eq!(simplify(expr), expected);
3269
3270 let expr = and(r, l);
3272 assert_eq!(simplify(expr), expected);
3273 }
3274
3275 #[test]
3276 fn test_simplify_and_or_non_null() {
3277 let l = col("c2_non_null").gt(lit(5));
3278 let r = or(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3279
3280 let expr = and(l.clone(), r.clone());
3282
3283 let expected = col("c2_non_null").gt(lit(5));
3285
3286 assert_eq!(simplify(expr), expected);
3287
3288 let expr = and(l, r);
3290
3291 assert_eq!(simplify(expr), expected);
3292 }
3293
3294 #[test]
3295 fn test_simplify_by_de_morgan_laws() {
3296 let expr = and(col("c3"), col("c4")).not();
3299 let expected = or(col("c3").not(), col("c4").not());
3300 assert_eq!(simplify(expr), expected);
3301 let expr = or(col("c3"), col("c4")).not();
3303 let expected = and(col("c3").not(), col("c4").not());
3304 assert_eq!(simplify(expr), expected);
3305 let expr = col("c3").not().not();
3307 let expected = col("c3");
3308 assert_eq!(simplify(expr), expected);
3309
3310 let expr = -bitwise_and(col("c3"), col("c4"));
3313 let expected = bitwise_or(-col("c3"), -col("c4"));
3314 assert_eq!(simplify(expr), expected);
3315 let expr = -bitwise_or(col("c3"), col("c4"));
3317 let expected = bitwise_and(-col("c3"), -col("c4"));
3318 assert_eq!(simplify(expr), expected);
3319 let expr = -(-col("c3"));
3321 let expected = col("c3");
3322 assert_eq!(simplify(expr), expected);
3323 }
3324
3325 #[test]
3326 fn test_simplify_null_and_false() {
3327 let expr = and(lit_bool_null(), lit(false));
3328 let expr_eq = lit(false);
3329
3330 assert_eq!(simplify(expr), expr_eq);
3331 }
3332
3333 #[test]
3334 fn test_simplify_divide_null_by_null() {
3335 let null = lit(ScalarValue::Int32(None));
3336 let expr_plus = null.clone() / null.clone();
3337 let expr_eq = null;
3338
3339 assert_eq!(simplify(expr_plus), expr_eq);
3340 }
3341
3342 #[test]
3343 fn test_simplify_simplify_arithmetic_expr() {
3344 let expr_plus = lit(1) + lit(1);
3345
3346 assert_eq!(simplify(expr_plus), lit(2));
3347 }
3348
3349 #[test]
3350 fn test_simplify_simplify_eq_expr() {
3351 let expr_eq = binary_expr(lit(1), Operator::Eq, lit(1));
3352
3353 assert_eq!(simplify(expr_eq), lit(true));
3354 }
3355
3356 #[test]
3357 fn test_simplify_regex() {
3358 assert_contains!(
3360 try_simplify(regex_match(col("c1"), lit("foo{")))
3361 .unwrap_err()
3362 .to_string(),
3363 "regex parse error"
3364 );
3365
3366 assert_no_change(regex_match(col("c1"), lit("foo.*")));
3368 assert_no_change(regex_match(col("c1"), lit("(foo)")));
3369 assert_no_change(regex_match(col("c1"), lit("%")));
3370 assert_no_change(regex_match(col("c1"), lit("_")));
3371 assert_no_change(regex_match(col("c1"), lit("f%o")));
3372 assert_no_change(regex_match(col("c1"), lit("^f%o")));
3373 assert_no_change(regex_match(col("c1"), lit("f_o")));
3374
3375 assert_change(
3377 regex_match(col("c1"), lit("")),
3378 if_not_null(col("c1"), true),
3379 );
3380 assert_change(
3381 regex_not_match(col("c1"), lit("")),
3382 if_not_null(col("c1"), false),
3383 );
3384 assert_change(
3385 regex_imatch(col("c1"), lit("")),
3386 if_not_null(col("c1"), true),
3387 );
3388 assert_change(
3389 regex_not_imatch(col("c1"), lit("")),
3390 if_not_null(col("c1"), false),
3391 );
3392
3393 assert_change(regex_match(col("c1"), lit("x")), col("c1").like(lit("%x%")));
3395
3396 assert_change(
3398 regex_match(col("c1"), lit("foo")),
3399 col("c1").like(lit("%foo%")),
3400 );
3401
3402 assert_change(regex_match(col("c1"), lit("^$")), col("c1").eq(lit("")));
3404 assert_change(
3405 regex_not_match(col("c1"), lit("^$")),
3406 col("c1").not_eq(lit("")),
3407 );
3408 assert_change(
3409 regex_match(col("c1"), lit("^foo$")),
3410 col("c1").eq(lit("foo")),
3411 );
3412 assert_change(
3413 regex_not_match(col("c1"), lit("^foo$")),
3414 col("c1").not_eq(lit("foo")),
3415 );
3416
3417 assert_change(
3419 regex_match(col("c1"), lit("^(foo|bar)$")),
3420 col("c1").eq(lit("foo")).or(col("c1").eq(lit("bar"))),
3421 );
3422 assert_change(
3423 regex_not_match(col("c1"), lit("^(foo|bar)$")),
3424 col("c1")
3425 .not_eq(lit("foo"))
3426 .and(col("c1").not_eq(lit("bar"))),
3427 );
3428 assert_change(
3429 regex_match(col("c1"), lit("^(foo)$")),
3430 col("c1").eq(lit("foo")),
3431 );
3432 assert_change(
3433 regex_match(col("c1"), lit("^(foo|bar|baz)$")),
3434 ((col("c1").eq(lit("foo"))).or(col("c1").eq(lit("bar"))))
3435 .or(col("c1").eq(lit("baz"))),
3436 );
3437 assert_change(
3438 regex_match(col("c1"), lit("^(foo|bar|baz|qux)$")),
3439 col("c1")
3440 .in_list(vec![lit("foo"), lit("bar"), lit("baz"), lit("qux")], false),
3441 );
3442 assert_change(
3443 regex_match(col("c1"), lit("^(fo_o)$")),
3444 col("c1").eq(lit("fo_o")),
3445 );
3446 assert_change(
3447 regex_match(col("c1"), lit("^(fo_o)$")),
3448 col("c1").eq(lit("fo_o")),
3449 );
3450 assert_change(
3451 regex_match(col("c1"), lit("^(fo_o|ba_r)$")),
3452 col("c1").eq(lit("fo_o")).or(col("c1").eq(lit("ba_r"))),
3453 );
3454 assert_change(
3455 regex_not_match(col("c1"), lit("^(fo_o|ba_r)$")),
3456 col("c1")
3457 .not_eq(lit("fo_o"))
3458 .and(col("c1").not_eq(lit("ba_r"))),
3459 );
3460 assert_change(
3461 regex_match(col("c1"), lit("^(fo_o|ba_r|ba_z)$")),
3462 ((col("c1").eq(lit("fo_o"))).or(col("c1").eq(lit("ba_r"))))
3463 .or(col("c1").eq(lit("ba_z"))),
3464 );
3465 assert_change(
3466 regex_match(col("c1"), lit("^(fo_o|ba_r|baz|qu_x)$")),
3467 col("c1").in_list(
3468 vec![lit("fo_o"), lit("ba_r"), lit("baz"), lit("qu_x")],
3469 false,
3470 ),
3471 );
3472
3473 assert_no_change(regex_match(col("c1"), lit("(foo|bar)")));
3475 assert_no_change(regex_match(col("c1"), lit("(foo|bar)*")));
3476 assert_no_change(regex_match(col("c1"), lit("(fo_o|b_ar)")));
3477 assert_no_change(regex_match(col("c1"), lit("(foo|ba_r)*")));
3478 assert_no_change(regex_match(col("c1"), lit("(fo_o|ba_r)*")));
3479 assert_no_change(regex_match(col("c1"), lit("^(foo|bar)*")));
3480 assert_no_change(regex_match(col("c1"), lit("^(foo)(bar)$")));
3481 assert_no_change(regex_match(col("c1"), lit("^")));
3482 assert_no_change(regex_match(col("c1"), lit("$")));
3483 assert_no_change(regex_match(col("c1"), lit("$^")));
3484 assert_no_change(regex_match(col("c1"), lit("$foo^")));
3485
3486 assert_change(
3488 regex_match(col("c1"), lit("^foo")),
3489 col("c1").like(lit("foo%")),
3490 );
3491 assert_change(
3492 regex_match(col("c1"), lit("foo$")),
3493 col("c1").like(lit("%foo")),
3494 );
3495 assert_change(
3496 regex_match(col("c1"), lit("^foo|bar$")),
3497 col("c1").like(lit("foo%")).or(col("c1").like(lit("%bar"))),
3498 );
3499
3500 assert_change(
3502 regex_match(col("c1"), lit("foo|bar|baz")),
3503 col("c1")
3504 .like(lit("%foo%"))
3505 .or(col("c1").like(lit("%bar%")))
3506 .or(col("c1").like(lit("%baz%"))),
3507 );
3508 assert_change(
3509 regex_match(col("c1"), lit("foo|x|baz")),
3510 col("c1")
3511 .like(lit("%foo%"))
3512 .or(col("c1").like(lit("%x%")))
3513 .or(col("c1").like(lit("%baz%"))),
3514 );
3515 assert_change(
3516 regex_not_match(col("c1"), lit("foo|bar|baz")),
3517 col("c1")
3518 .not_like(lit("%foo%"))
3519 .and(col("c1").not_like(lit("%bar%")))
3520 .and(col("c1").not_like(lit("%baz%"))),
3521 );
3522 assert_change(
3524 regex_match(col("c1"), lit("foo|^x$|baz")),
3525 col("c1")
3526 .like(lit("%foo%"))
3527 .or(col("c1").eq(lit("x")))
3528 .or(col("c1").like(lit("%baz%"))),
3529 );
3530 assert_change(
3531 regex_not_match(col("c1"), lit("foo|^bar$|baz")),
3532 col("c1")
3533 .not_like(lit("%foo%"))
3534 .and(col("c1").not_eq(lit("bar")))
3535 .and(col("c1").not_like(lit("%baz%"))),
3536 );
3537 assert_no_change(regex_match(col("c1"), lit("foo|bar|baz|blarg|bozo|etc")));
3539 }
3540
3541 #[track_caller]
3542 fn assert_no_change(expr: Expr) {
3543 let optimized = simplify(expr.clone());
3544 assert_eq!(expr, optimized);
3545 }
3546
3547 #[track_caller]
3548 fn assert_change(expr: Expr, expected: Expr) {
3549 let optimized = simplify(expr);
3550 assert_eq!(optimized, expected);
3551 }
3552
3553 fn regex_match(left: Expr, right: Expr) -> Expr {
3554 Expr::BinaryExpr(BinaryExpr {
3555 left: Box::new(left),
3556 op: Operator::RegexMatch,
3557 right: Box::new(right),
3558 })
3559 }
3560
3561 fn regex_not_match(left: Expr, right: Expr) -> Expr {
3562 Expr::BinaryExpr(BinaryExpr {
3563 left: Box::new(left),
3564 op: Operator::RegexNotMatch,
3565 right: Box::new(right),
3566 })
3567 }
3568
3569 fn regex_imatch(left: Expr, right: Expr) -> Expr {
3570 Expr::BinaryExpr(BinaryExpr {
3571 left: Box::new(left),
3572 op: Operator::RegexIMatch,
3573 right: Box::new(right),
3574 })
3575 }
3576
3577 fn regex_not_imatch(left: Expr, right: Expr) -> Expr {
3578 Expr::BinaryExpr(BinaryExpr {
3579 left: Box::new(left),
3580 op: Operator::RegexNotIMatch,
3581 right: Box::new(right),
3582 })
3583 }
3584
3585 fn try_simplify(expr: Expr) -> Result<Expr> {
3590 let schema = expr_test_schema();
3591 let simplifier =
3592 ExprSimplifier::new(SimplifyContext::builder().with_schema(schema).build());
3593 simplifier.simplify(expr)
3594 }
3595
3596 fn coerce(expr: Expr) -> Expr {
3597 let schema = expr_test_schema();
3598 let simplifier = ExprSimplifier::new(
3599 SimplifyContext::builder()
3600 .with_schema(Arc::clone(&schema))
3601 .build(),
3602 );
3603 simplifier.coerce(expr, schema.as_ref()).unwrap()
3604 }
3605
3606 fn simplify(expr: Expr) -> Expr {
3607 try_simplify(expr).unwrap()
3608 }
3609
3610 fn try_simplify_with_cycle_count(expr: Expr) -> Result<(Expr, u32)> {
3611 let schema = expr_test_schema();
3612 let simplifier =
3613 ExprSimplifier::new(SimplifyContext::builder().with_schema(schema).build());
3614 let (expr, count) = simplifier.simplify_with_cycle_count_transformed(expr)?;
3615 Ok((expr.data, count))
3616 }
3617
3618 fn simplify_with_cycle_count(expr: Expr) -> (Expr, u32) {
3619 try_simplify_with_cycle_count(expr).unwrap()
3620 }
3621
3622 fn simplify_with_guarantee(
3623 expr: Expr,
3624 guarantees: Vec<(Expr, NullableInterval)>,
3625 ) -> Expr {
3626 let schema = expr_test_schema();
3627 let simplifier =
3628 ExprSimplifier::new(SimplifyContext::builder().with_schema(schema).build())
3629 .with_guarantees(guarantees);
3630 simplifier.simplify(expr).unwrap()
3631 }
3632
3633 fn expr_test_schema() -> DFSchemaRef {
3634 static EXPR_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
3635 Arc::new(
3636 DFSchema::from_unqualified_fields(
3637 vec![
3638 Field::new("c1", DataType::Utf8, true),
3639 Field::new("c2", DataType::Boolean, true),
3640 Field::new("c3", DataType::Int64, true),
3641 Field::new("c4", DataType::UInt32, true),
3642 Field::new("c1_non_null", DataType::Utf8, false),
3643 Field::new("c2_non_null", DataType::Boolean, false),
3644 Field::new("c3_non_null", DataType::Int64, false),
3645 Field::new("c4_non_null", DataType::UInt32, false),
3646 Field::new("c5", DataType::FixedSizeBinary(3), true),
3647 ]
3648 .into(),
3649 HashMap::new(),
3650 )
3651 .unwrap(),
3652 )
3653 });
3654 Arc::clone(&EXPR_TEST_SCHEMA)
3655 }
3656
3657 #[test]
3658 fn simplify_expr_null_comparison() {
3659 assert_eq!(
3661 simplify(lit(true).eq(lit(ScalarValue::Boolean(None)))),
3662 lit(ScalarValue::Boolean(None)),
3663 );
3664
3665 assert_eq!(
3667 simplify(
3668 lit(ScalarValue::Boolean(None)).not_eq(lit(ScalarValue::Boolean(None)))
3669 ),
3670 lit(ScalarValue::Boolean(None)),
3671 );
3672
3673 assert_eq!(
3675 simplify(col("c2").not_eq(lit(ScalarValue::Boolean(None)))),
3676 lit(ScalarValue::Boolean(None)),
3677 );
3678
3679 assert_eq!(
3681 simplify(lit(ScalarValue::Boolean(None)).eq(col("c2"))),
3682 lit(ScalarValue::Boolean(None)),
3683 );
3684 }
3685
3686 #[test]
3687 fn simplify_expr_is_not_null() {
3688 assert_eq!(
3689 simplify(Expr::IsNotNull(Box::new(col("c1")))),
3690 Expr::IsNotNull(Box::new(col("c1")))
3691 );
3692
3693 assert_eq!(
3695 simplify(Expr::IsNotNull(Box::new(col("c1_non_null")))),
3696 lit(true)
3697 );
3698 }
3699
3700 #[test]
3701 fn simplify_expr_is_null() {
3702 assert_eq!(
3703 simplify(Expr::IsNull(Box::new(col("c1")))),
3704 Expr::IsNull(Box::new(col("c1")))
3705 );
3706
3707 assert_eq!(
3709 simplify(Expr::IsNull(Box::new(col("c1_non_null")))),
3710 lit(false)
3711 );
3712 }
3713
3714 #[test]
3715 fn simplify_expr_is_unknown() {
3716 assert_eq!(simplify(col("c2").is_unknown()), col("c2").is_unknown(),);
3717
3718 assert_eq!(simplify(col("c2_non_null").is_unknown()), lit(false));
3720 }
3721
3722 #[test]
3723 fn simplify_expr_is_not_known() {
3724 assert_eq!(
3725 simplify(col("c2").is_not_unknown()),
3726 col("c2").is_not_unknown()
3727 );
3728
3729 assert_eq!(simplify(col("c2_non_null").is_not_unknown()), lit(true));
3731 }
3732
3733 #[test]
3734 fn simplify_expr_eq() {
3735 let schema = expr_test_schema();
3736 assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3737
3738 assert_eq!(simplify(lit(true).eq(lit(true))), lit(true));
3740
3741 assert_eq!(simplify(lit(true).eq(lit(false))), lit(false),);
3743
3744 assert_eq!(simplify(col("c2").eq(lit(true))), col("c2"));
3746
3747 assert_eq!(simplify(col("c2").eq(lit(false))), col("c2").not(),);
3749 }
3750
3751 #[test]
3752 fn simplify_expr_eq_skip_nonboolean_type() {
3753 let schema = expr_test_schema();
3754
3755 assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3761
3762 assert_eq!(simplify(col("c1").eq(lit("foo"))), col("c1").eq(lit("foo")),);
3764 }
3765
3766 #[test]
3767 fn simplify_expr_not_eq() {
3768 let schema = expr_test_schema();
3769
3770 assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3771
3772 assert_eq!(simplify(col("c2").not_eq(lit(true))), col("c2").not(),);
3774
3775 assert_eq!(simplify(col("c2").not_eq(lit(false))), col("c2"),);
3777
3778 assert_eq!(simplify(lit(true).not_eq(lit(true))), lit(false),);
3780
3781 assert_eq!(simplify(lit(true).not_eq(lit(false))), lit(true),);
3782 }
3783
3784 #[test]
3785 fn simplify_expr_not_eq_skip_nonboolean_type() {
3786 let schema = expr_test_schema();
3787
3788 assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3792
3793 assert_eq!(
3794 simplify(col("c1").not_eq(lit("foo"))),
3795 col("c1").not_eq(lit("foo")),
3796 );
3797 }
3798
3799 #[test]
3800 fn simplify_literal_case_equality() {
3801 let simple_case = Expr::Case(Case::new(
3803 None,
3804 vec![(
3805 Box::new(col("c2_non_null").not_eq(lit(false))),
3806 Box::new(lit("ok")),
3807 )],
3808 Some(Box::new(lit("not_ok"))),
3809 ));
3810
3811 assert_eq!(
3819 simplify(binary_expr(simple_case.clone(), Operator::Eq, lit("ok"),)),
3820 col("c2_non_null"),
3821 );
3822
3823 assert_eq!(
3831 simplify(binary_expr(simple_case, Operator::NotEq, lit("ok"),)),
3832 not(col("c2_non_null")),
3833 );
3834
3835 let complex_case = Expr::Case(Case::new(
3836 None,
3837 vec![
3838 (
3839 Box::new(col("c1").eq(lit("inboxed"))),
3840 Box::new(lit("pending")),
3841 ),
3842 (
3843 Box::new(col("c1").eq(lit("scheduled"))),
3844 Box::new(lit("pending")),
3845 ),
3846 (
3847 Box::new(col("c1").eq(lit("completed"))),
3848 Box::new(lit("completed")),
3849 ),
3850 (
3851 Box::new(col("c1").eq(lit("paused"))),
3852 Box::new(lit("paused")),
3853 ),
3854 (Box::new(col("c2")), Box::new(lit("running"))),
3855 (
3856 Box::new(col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0)))),
3857 Box::new(lit("backing-off")),
3858 ),
3859 ],
3860 Some(Box::new(lit("ready"))),
3861 ));
3862
3863 assert_eq!(
3864 simplify(binary_expr(
3865 complex_case.clone(),
3866 Operator::Eq,
3867 lit("completed"),
3868 )),
3869 not_distinct_from(col("c1").eq(lit("completed")), lit(true)).and(
3870 distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3871 .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3872 )
3873 );
3874
3875 assert_eq!(
3876 simplify(binary_expr(
3877 complex_case.clone(),
3878 Operator::NotEq,
3879 lit("completed"),
3880 )),
3881 distinct_from(col("c1").eq(lit("completed")), lit(true))
3882 .or(not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3883 .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true))))
3884 );
3885
3886 assert_eq!(
3887 simplify(binary_expr(
3888 complex_case.clone(),
3889 Operator::Eq,
3890 lit("running"),
3891 )),
3892 not_distinct_from(col("c2"), lit(true)).and(
3893 distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3894 .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3895 .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3896 .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3897 )
3898 );
3899
3900 assert_eq!(
3901 simplify(binary_expr(
3902 complex_case.clone(),
3903 Operator::Eq,
3904 lit("ready"),
3905 )),
3906 distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3907 .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3908 .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3909 .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3910 .and(distinct_from(col("c2"), lit(true)))
3911 .and(distinct_from(
3912 col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3913 lit(true)
3914 ))
3915 );
3916
3917 assert_eq!(
3918 simplify(binary_expr(
3919 complex_case.clone(),
3920 Operator::NotEq,
3921 lit("ready"),
3922 )),
3923 not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3924 .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3925 .or(not_distinct_from(col("c1").eq(lit("completed")), lit(true)))
3926 .or(not_distinct_from(col("c1").eq(lit("paused")), lit(true)))
3927 .or(not_distinct_from(col("c2"), lit(true)))
3928 .or(not_distinct_from(
3929 col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3930 lit(true)
3931 ))
3932 );
3933 }
3934
3935 #[test]
3936 fn simplify_expr_case_when_then_else() {
3937 assert_eq!(
3943 simplify(Expr::Case(Case::new(
3944 None,
3945 vec![(
3946 Box::new(col("c2_non_null").not_eq(lit(false))),
3947 Box::new(lit("ok").eq(lit("not_ok"))),
3948 )],
3949 Some(Box::new(col("c2_non_null").eq(lit(true)))),
3950 ))),
3951 lit(false) );
3953
3954 assert_eq!(
3963 simplify(simplify(Expr::Case(Case::new(
3964 None,
3965 vec![(
3966 Box::new(col("c2_non_null").not_eq(lit(false))),
3967 Box::new(lit("ok").eq(lit("ok"))),
3968 )],
3969 Some(Box::new(col("c2_non_null").eq(lit(true)))),
3970 )))),
3971 col("c2_non_null")
3972 );
3973
3974 assert_eq!(
3981 simplify(simplify(Expr::Case(Case::new(
3982 None,
3983 vec![(Box::new(col("c2").is_null()), Box::new(lit(true)),)],
3984 Some(Box::new(col("c2"))),
3985 )))),
3986 col("c2")
3987 .is_null()
3988 .or(col("c2").is_not_null().and(col("c2")))
3989 );
3990
3991 assert_eq!(
3999 simplify(simplify(Expr::Case(Case::new(
4000 None,
4001 vec![
4002 (Box::new(col("c1_non_null")), Box::new(lit(true)),),
4003 (Box::new(col("c2_non_null")), Box::new(lit(false)),),
4004 ],
4005 Some(Box::new(lit(true))),
4006 )))),
4007 col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
4008 );
4009
4010 assert_eq!(
4018 simplify(simplify(Expr::Case(Case::new(
4019 None,
4020 vec![
4021 (Box::new(col("c1_non_null")), Box::new(lit(true)),),
4022 (Box::new(col("c2_non_null")), Box::new(lit(false)),),
4023 ],
4024 Some(Box::new(lit(true))),
4025 )))),
4026 col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
4027 );
4028
4029 assert_eq!(
4031 simplify(simplify(Expr::Case(Case::new(
4032 None,
4033 vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
4034 None,
4035 )))),
4036 not_distinct_from(col("c3").gt(lit(0_i64)), lit(true)).or(distinct_from(
4037 col("c3").gt(lit(0_i64)),
4038 lit(true)
4039 )
4040 .and(lit_bool_null()))
4041 );
4042
4043 assert_eq!(
4045 simplify(simplify(Expr::Case(Case::new(
4046 None,
4047 vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
4048 Some(Box::new(lit(false))),
4049 )))),
4050 not_distinct_from(col("c3").gt(lit(0_i64)), lit(true))
4051 );
4052 }
4053
4054 #[test]
4055 fn simplify_expr_case_when_first_true() {
4056 assert_eq!(
4058 simplify(Expr::Case(Case::new(
4059 None,
4060 vec![(Box::new(lit(true)), Box::new(lit(1)),)],
4061 Some(Box::new(col("c1"))),
4062 ))),
4063 lit(1)
4064 );
4065
4066 assert_eq!(
4068 simplify(Expr::Case(Case::new(
4069 None,
4070 vec![(Box::new(lit(true)), Box::new(lit("a")),)],
4071 Some(Box::new(lit("b"))),
4072 ))),
4073 lit("a")
4074 );
4075
4076 assert_eq!(
4078 simplify(Expr::Case(Case::new(
4079 None,
4080 vec![
4081 (Box::new(lit(true)), Box::new(lit("a"))),
4082 (Box::new(lit("x").gt(lit(5))), Box::new(lit("b"))),
4083 ],
4084 Some(Box::new(lit("c"))),
4085 ))),
4086 lit("a")
4087 );
4088
4089 assert_eq!(
4091 simplify(Expr::Case(Case::new(
4092 None,
4093 vec![(Box::new(lit(true)), Box::new(lit("a")),)],
4094 None,
4095 ))),
4096 lit("a")
4097 );
4098
4099 let expr = Expr::Case(Case::new(
4101 None,
4102 vec![(Box::new(col("c2")), Box::new(lit(1)))],
4103 Some(Box::new(lit(2))),
4104 ));
4105 assert_eq!(simplify(expr.clone()), expr);
4106
4107 let expr = Expr::Case(Case::new(
4109 None,
4110 vec![(Box::new(lit(false)), Box::new(lit(1)))],
4111 Some(Box::new(lit(2))),
4112 ));
4113 assert_ne!(simplify(expr), lit(1));
4114
4115 let expr = Expr::Case(Case::new(
4117 None,
4118 vec![(Box::new(col("c1").gt(lit(5))), Box::new(lit(1)))],
4119 Some(Box::new(lit(2))),
4120 ));
4121 assert_eq!(simplify(expr.clone()), expr);
4122 }
4123
4124 #[test]
4125 fn simplify_expr_case_when_any_true() {
4126 assert_eq!(
4128 simplify(Expr::Case(Case::new(
4129 None,
4130 vec![
4131 (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4132 (Box::new(lit(true)), Box::new(lit("b"))),
4133 ],
4134 Some(Box::new(lit("c"))),
4135 ))),
4136 Expr::Case(Case::new(
4137 None,
4138 vec![(Box::new(col("c3").gt(lit(0))), Box::new(lit("a")))],
4139 Some(Box::new(lit("b"))),
4140 ))
4141 );
4142
4143 assert_eq!(
4146 simplify(Expr::Case(Case::new(
4147 None,
4148 vec![
4149 (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4150 (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4151 (Box::new(lit(true)), Box::new(lit("c"))),
4152 (Box::new(col("c3").eq(lit(0))), Box::new(lit("d"))),
4153 ],
4154 Some(Box::new(lit("e"))),
4155 ))),
4156 Expr::Case(Case::new(
4157 None,
4158 vec![
4159 (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4160 (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4161 ],
4162 Some(Box::new(lit("c"))),
4163 ))
4164 );
4165
4166 assert_eq!(
4169 simplify(Expr::Case(Case::new(
4170 None,
4171 vec![
4172 (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4173 (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4174 (Box::new(lit(true)), Box::new(lit(3))),
4175 ],
4176 None,
4177 ))),
4178 Expr::Case(Case::new(
4179 None,
4180 vec![
4181 (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4182 (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4183 ],
4184 Some(Box::new(lit(3))),
4185 ))
4186 );
4187
4188 let expr = Expr::Case(Case::new(
4190 None,
4191 vec![
4192 (Box::new(col("c3").gt(lit(0))), Box::new(col("c3"))),
4193 (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4194 ],
4195 Some(Box::new(lit(3))),
4196 ));
4197 assert_eq!(simplify(expr.clone()), expr);
4198 }
4199
4200 #[test]
4201 fn simplify_expr_case_when_any_false() {
4202 assert_eq!(
4204 simplify(Expr::Case(Case::new(
4205 None,
4206 vec![(Box::new(lit(false)), Box::new(lit("a")))],
4207 None,
4208 ))),
4209 Expr::Literal(ScalarValue::Utf8(None), None)
4210 );
4211
4212 assert_eq!(
4214 simplify(Expr::Case(Case::new(
4215 None,
4216 vec![(Box::new(lit(false)), Box::new(lit(2)))],
4217 Some(Box::new(lit(1))),
4218 ))),
4219 lit(1),
4220 );
4221
4222 assert_eq!(
4224 simplify(Expr::Case(Case::new(
4225 None,
4226 vec![
4227 (Box::new(col("c3").lt(lit(10))), Box::new(lit("b"))),
4228 (Box::new(lit(false)), Box::new(col("c3"))),
4229 ],
4230 Some(Box::new(col("c4"))),
4231 ))),
4232 Expr::Case(Case::new(
4233 None,
4234 vec![(Box::new(col("c3").lt(lit(10))), Box::new(lit("b")))],
4235 Some(Box::new(col("c4"))),
4236 ))
4237 );
4238
4239 let expr = Expr::Case(Case::new(
4241 None,
4242 vec![(Box::new(col("c3").eq(lit(4))), Box::new(lit(1)))],
4243 Some(Box::new(lit(2))),
4244 ));
4245 assert_eq!(simplify(expr.clone()), expr);
4246 }
4247
4248 fn distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4249 Expr::BinaryExpr(BinaryExpr {
4250 left: Box::new(left.into()),
4251 op: Operator::IsDistinctFrom,
4252 right: Box::new(right.into()),
4253 })
4254 }
4255
4256 fn not_distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4257 Expr::BinaryExpr(BinaryExpr {
4258 left: Box::new(left.into()),
4259 op: Operator::IsNotDistinctFrom,
4260 right: Box::new(right.into()),
4261 })
4262 }
4263
4264 #[test]
4265 fn simplify_expr_bool_or() {
4266 assert_eq!(simplify(col("c2").or(lit(true))), lit(true),);
4268
4269 assert_eq!(simplify(col("c2").or(lit(false))), col("c2"),);
4271
4272 assert_eq!(simplify(lit(true).or(lit_bool_null())), lit(true),);
4274
4275 assert_eq!(simplify(lit_bool_null().or(lit(true))), lit(true),);
4277
4278 assert_eq!(simplify(lit(false).or(lit_bool_null())), lit_bool_null(),);
4280
4281 assert_eq!(simplify(lit_bool_null().or(lit(false))), lit_bool_null(),);
4283
4284 let expr = col("c1").between(lit(0), lit(10));
4288 let expr = expr.or(lit_bool_null());
4289 let result = simplify(expr);
4290
4291 let expected_expr = or(
4292 and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4293 lit_bool_null(),
4294 );
4295 assert_eq!(expected_expr, result);
4296 }
4297
4298 #[test]
4299 fn simplify_inlist() {
4300 assert_eq!(simplify(in_list(col("c1"), vec![], false)), lit(false));
4301 assert_eq!(simplify(in_list(col("c1"), vec![], true)), lit(true));
4302
4303 assert_eq!(
4305 simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], false)),
4306 lit_bool_null()
4307 );
4308
4309 assert_eq!(
4311 simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], true)),
4312 lit_bool_null()
4313 );
4314
4315 assert_eq!(
4316 simplify(in_list(col("c1"), vec![lit(1)], false)),
4317 col("c1").eq(lit(1))
4318 );
4319 assert_eq!(
4320 simplify(in_list(col("c1"), vec![lit(1)], true)),
4321 col("c1").not_eq(lit(1))
4322 );
4323
4324 assert_eq!(
4327 simplify(in_list(col("c1") * lit(10), vec![lit(2)], false)),
4328 (col("c1") * lit(10)).eq(lit(2))
4329 );
4330
4331 assert_eq!(
4332 simplify(in_list(col("c1"), vec![lit(1), lit(2)], false)),
4333 col("c1").eq(lit(1)).or(col("c1").eq(lit(2)))
4334 );
4335 assert_eq!(
4336 simplify(in_list(col("c1"), vec![lit(1), lit(2)], true)),
4337 col("c1").not_eq(lit(1)).and(col("c1").not_eq(lit(2)))
4338 );
4339
4340 let subquery = Arc::new(test_table_scan_with_name("test").unwrap());
4341 assert_eq!(
4342 simplify(in_list(
4343 col("c1"),
4344 vec![scalar_subquery(Arc::clone(&subquery))],
4345 false
4346 )),
4347 in_subquery(col("c1"), Arc::clone(&subquery))
4348 );
4349 assert_eq!(
4350 simplify(in_list(
4351 col("c1"),
4352 vec![scalar_subquery(Arc::clone(&subquery))],
4353 true
4354 )),
4355 not_in_subquery(col("c1"), subquery)
4356 );
4357
4358 let subquery1 =
4359 scalar_subquery(Arc::new(test_table_scan_with_name("test1").unwrap()));
4360 let subquery2 =
4361 scalar_subquery(Arc::new(test_table_scan_with_name("test2").unwrap()));
4362
4363 assert_eq!(
4365 simplify(in_list(
4366 col("c1"),
4367 vec![subquery1.clone(), subquery2.clone()],
4368 true
4369 )),
4370 col("c1")
4371 .not_eq(subquery1.clone())
4372 .and(col("c1").not_eq(subquery2.clone()))
4373 );
4374
4375 assert_eq!(
4377 simplify(in_list(
4378 col("c1"),
4379 vec![subquery1.clone(), subquery2.clone()],
4380 false
4381 )),
4382 col("c1").eq(subquery1).or(col("c1").eq(subquery2))
4383 );
4384
4385 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4387 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false),
4388 );
4389 assert_eq!(simplify(expr), lit(false));
4390
4391 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4393 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], false),
4394 );
4395 assert_eq!(simplify(expr), col("c1").eq(lit(4)));
4396
4397 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4399 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4400 );
4401 assert_eq!(simplify(expr), lit(true));
4402
4403 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4405 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4406 );
4407 assert_eq!(simplify(expr), col("c1").not_eq(lit(4)));
4408
4409 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4411 in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4412 );
4413 assert_eq!(
4414 simplify(expr),
4415 in_list(
4416 col("c1"),
4417 vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6), lit(7)],
4418 true
4419 )
4420 );
4421
4422 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).or(
4424 in_list(col("c1"), vec![lit(2), lit(3), lit(4), lit(5)], false),
4425 );
4426 assert_eq!(
4427 simplify(expr),
4428 in_list(
4429 col("c1"),
4430 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4431 false
4432 )
4433 );
4434
4435 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3)], false).and(in_list(
4437 col("c1"),
4438 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4439 true,
4440 ));
4441 assert_eq!(simplify(expr), lit(false));
4442
4443 let expr =
4445 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(in_list(
4446 col("c1"),
4447 vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4448 false,
4449 ));
4450 assert_eq!(simplify(expr), col("c1").eq(lit(5)));
4451
4452 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4454 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4455 );
4456 assert_eq!(
4457 simplify(expr),
4458 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false)
4459 );
4460
4461 let expr = in_list(
4464 col("c1"),
4465 vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6)],
4466 false,
4467 )
4468 .and(in_list(
4469 col("c1"),
4470 vec![lit(1), lit(3), lit(5), lit(6)],
4471 false,
4472 ))
4473 .and(in_list(col("c1"), vec![lit(3), lit(6)], false));
4474 assert_eq!(
4475 simplify(expr),
4476 col("c1").eq(lit(3)).or(col("c1").eq(lit(6)))
4477 );
4478
4479 let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4481 in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false)
4482 .and(in_list(
4483 col("c1"),
4484 vec![lit(3), lit(4), lit(5), lit(6)],
4485 true,
4486 ))
4487 .and(in_list(col("c1"), vec![lit(8), lit(9), lit(10)], false)),
4488 );
4489 assert_eq!(simplify(expr), col("c1").eq(lit(8)));
4490
4491 let expr =
4494 in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(col("c1")
4495 .not_eq(lit(5))
4496 .or(in_list(
4497 col("c1"),
4498 vec![lit(6), lit(7), lit(8), lit(9)],
4499 true,
4500 )));
4501 assert_eq!(simplify(expr.clone()), expr);
4505 }
4506
4507 #[test]
4508 fn simplify_null_in_empty_inlist() {
4509 let expr = in_list(lit_bool_null(), vec![], false);
4511 assert_eq!(simplify(expr), lit(false));
4512
4513 let expr = in_list(lit_bool_null(), vec![], true);
4515 assert_eq!(simplify(expr), lit(true));
4516
4517 let null_null = || Expr::Literal(ScalarValue::Null, None);
4519 let expr = in_list(null_null(), vec![], false);
4520 assert_eq!(simplify(expr), lit(false));
4521
4522 let expr = in_list(null_null(), vec![], true);
4524 assert_eq!(simplify(expr), lit(true));
4525 }
4526
4527 #[test]
4528 fn just_simplifier_simplify_null_in_empty_inlist() {
4529 let simplify = |expr: Expr| -> Expr {
4530 let schema = expr_test_schema();
4531 let info = SimplifyContext::builder().with_schema(schema).build();
4532 let simplifier = &mut Simplifier::new(&info);
4533 expr.rewrite(simplifier)
4534 .expect("Failed to simplify expression")
4535 .data
4536 };
4537
4538 let expr = in_list(lit_bool_null(), vec![], false);
4540 assert_eq!(simplify(expr), lit(false));
4541
4542 let expr = in_list(lit_bool_null(), vec![], true);
4544 assert_eq!(simplify(expr), lit(true));
4545
4546 let null_null = || Expr::Literal(ScalarValue::Null, None);
4548 let expr = in_list(null_null(), vec![], false);
4549 assert_eq!(simplify(expr), lit(false));
4550
4551 let expr = in_list(null_null(), vec![], true);
4553 assert_eq!(simplify(expr), lit(true));
4554 }
4555
4556 #[test]
4557 fn simplify_large_or() {
4558 let expr = (0..5)
4559 .map(|i| col("c1").eq(lit(i)))
4560 .fold(lit(false), |acc, e| acc.or(e));
4561 assert_eq!(
4562 simplify(expr),
4563 in_list(col("c1"), (0..5).map(lit).collect(), false),
4564 );
4565 }
4566
4567 #[test]
4568 fn simplify_expr_bool_and() {
4569 assert_eq!(simplify(col("c2").and(lit(true))), col("c2"),);
4571 assert_eq!(simplify(col("c2").and(lit(false))), lit(false),);
4573
4574 assert_eq!(simplify(lit(true).and(lit_bool_null())), lit_bool_null(),);
4576
4577 assert_eq!(simplify(lit_bool_null().and(lit(true))), lit_bool_null(),);
4579
4580 assert_eq!(simplify(lit(false).and(lit_bool_null())), lit(false),);
4582
4583 assert_eq!(simplify(lit_bool_null().and(lit(false))), lit(false),);
4585
4586 let expr = col("c1").between(lit(0), lit(10));
4590 let expr = expr.and(lit_bool_null());
4591 let result = simplify(expr);
4592
4593 let expected_expr = and(
4594 and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4595 lit_bool_null(),
4596 );
4597 assert_eq!(expected_expr, result);
4598 }
4599
4600 #[test]
4601 fn simplify_expr_between() {
4602 let expr = col("c2").between(lit(3), lit(4));
4604 assert_eq!(
4605 simplify(expr),
4606 and(col("c2").gt_eq(lit(3)), col("c2").lt_eq(lit(4)))
4607 );
4608
4609 let expr = col("c2").not_between(lit(3), lit(4));
4611 assert_eq!(
4612 simplify(expr),
4613 or(col("c2").lt(lit(3)), col("c2").gt(lit(4)))
4614 );
4615 }
4616
4617 #[test]
4618 fn test_like_and_ilike() {
4619 let null = lit(ScalarValue::Utf8(None));
4620
4621 let expr = col("c1").like(null.clone());
4623 assert_eq!(simplify(expr), lit_bool_null());
4624
4625 let expr = col("c1").not_like(null.clone());
4626 assert_eq!(simplify(expr), lit_bool_null());
4627
4628 let expr = col("c1").ilike(null.clone());
4629 assert_eq!(simplify(expr), lit_bool_null());
4630
4631 let expr = col("c1").not_ilike(null.clone());
4632 assert_eq!(simplify(expr), lit_bool_null());
4633
4634 let expr = col("c1").like(lit("%"));
4636 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4637
4638 let expr = col("c1").not_like(lit("%"));
4639 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4640
4641 let expr = col("c1").ilike(lit("%"));
4642 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4643
4644 let expr = col("c1").not_ilike(lit("%"));
4645 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4646
4647 let expr = col("c1").like(lit("%%"));
4649 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4650
4651 let expr = col("c1").not_like(lit("%%"));
4652 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4653
4654 let expr = col("c1").ilike(lit("%%"));
4655 assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4656
4657 let expr = col("c1").not_ilike(lit("%%"));
4658 assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4659
4660 let expr = col("c1_non_null").like(lit("%"));
4662 assert_eq!(simplify(expr), lit(true));
4663
4664 let expr = col("c1_non_null").not_like(lit("%"));
4665 assert_eq!(simplify(expr), lit(false));
4666
4667 let expr = col("c1_non_null").ilike(lit("%"));
4668 assert_eq!(simplify(expr), lit(true));
4669
4670 let expr = col("c1_non_null").not_ilike(lit("%"));
4671 assert_eq!(simplify(expr), lit(false));
4672
4673 let expr = col("c1_non_null").like(lit("%%"));
4675 assert_eq!(simplify(expr), lit(true));
4676
4677 let expr = col("c1_non_null").not_like(lit("%%"));
4678 assert_eq!(simplify(expr), lit(false));
4679
4680 let expr = col("c1_non_null").ilike(lit("%%"));
4681 assert_eq!(simplify(expr), lit(true));
4682
4683 let expr = col("c1_non_null").not_ilike(lit("%%"));
4684 assert_eq!(simplify(expr), lit(false));
4685
4686 let expr = null.clone().like(lit("%"));
4688 assert_eq!(simplify(expr), lit_bool_null());
4689
4690 let expr = null.clone().not_like(lit("%"));
4691 assert_eq!(simplify(expr), lit_bool_null());
4692
4693 let expr = null.clone().ilike(lit("%"));
4694 assert_eq!(simplify(expr), lit_bool_null());
4695
4696 let expr = null.clone().not_ilike(lit("%"));
4697 assert_eq!(simplify(expr), lit_bool_null());
4698
4699 let expr = null.clone().like(lit("%%"));
4701 assert_eq!(simplify(expr), lit_bool_null());
4702
4703 let expr = null.clone().not_like(lit("%%"));
4704 assert_eq!(simplify(expr), lit_bool_null());
4705
4706 let expr = null.clone().ilike(lit("%%"));
4707 assert_eq!(simplify(expr), lit_bool_null());
4708
4709 let expr = null.clone().not_ilike(lit("%%"));
4710 assert_eq!(simplify(expr), lit_bool_null());
4711
4712 let expr = null.clone().like(lit("a%"));
4714 assert_eq!(simplify(expr), lit_bool_null());
4715
4716 let expr = null.clone().not_like(lit("a%"));
4717 assert_eq!(simplify(expr), lit_bool_null());
4718
4719 let expr = null.clone().ilike(lit("a%"));
4720 assert_eq!(simplify(expr), lit_bool_null());
4721
4722 let expr = null.clone().not_ilike(lit("a%"));
4723 assert_eq!(simplify(expr), lit_bool_null());
4724
4725 let expr = col("c1").like(lit("a"));
4727 assert_eq!(simplify(expr), col("c1").eq(lit("a")));
4728 let expr = col("c1").not_like(lit("a"));
4729 assert_eq!(simplify(expr), col("c1").not_eq(lit("a")));
4730 let expr = col("c1").like(lit("a_"));
4731 assert_eq!(simplify(expr), col("c1").like(lit("a_")));
4732 let expr = col("c1").not_like(lit("a_"));
4733 assert_eq!(simplify(expr), col("c1").not_like(lit("a_")));
4734
4735 let expr = col("c1").ilike(lit("a"));
4736 assert_eq!(simplify(expr), col("c1").ilike(lit("a")));
4737 let expr = col("c1").not_ilike(lit("a"));
4738 assert_eq!(simplify(expr), col("c1").not_ilike(lit("a")));
4739 }
4740
4741 #[test]
4742 fn test_simplify_with_guarantee() {
4743 let expr_x = col("c3").gt(lit(3_i64));
4745 let expr_y = (col("c4") + lit(2_u32)).lt(lit(10_u32));
4746 let expr_z = col("c1").in_list(vec![lit("a"), lit("b")], true);
4747 let expr = expr_x.clone().and(expr_y.or(expr_z));
4748
4749 let guarantees = vec![
4751 (col("c3"), NullableInterval::from(ScalarValue::Int64(None))),
4752 (col("c4"), NullableInterval::from(ScalarValue::UInt32(None))),
4753 (col("c1"), NullableInterval::from(ScalarValue::Utf8(None))),
4754 ];
4755
4756 let output = simplify_with_guarantee(expr.clone(), guarantees);
4757 assert_eq!(output, lit_bool_null());
4758
4759 let guarantees = vec![
4761 (
4762 col("c3"),
4763 NullableInterval::NotNull {
4764 values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4765 },
4766 ),
4767 (
4768 col("c4"),
4769 NullableInterval::from(ScalarValue::UInt32(Some(9))),
4770 ),
4771 (col("c1"), NullableInterval::from(ScalarValue::from("a"))),
4772 ];
4773 let output = simplify_with_guarantee(expr.clone(), guarantees);
4774 assert_eq!(output, lit(false));
4775
4776 let guarantees = vec![
4778 (
4779 col("c3"),
4780 NullableInterval::MaybeNull {
4781 values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4782 },
4783 ),
4784 (
4785 col("c4"),
4786 NullableInterval::MaybeNull {
4787 values: Interval::make(Some(9_u32), Some(9_u32)).unwrap(),
4788 },
4789 ),
4790 (
4791 col("c1"),
4792 NullableInterval::NotNull {
4793 values: Interval::try_new(
4794 ScalarValue::from("d"),
4795 ScalarValue::from("f"),
4796 )
4797 .unwrap(),
4798 },
4799 ),
4800 ];
4801 let output = simplify_with_guarantee(expr.clone(), guarantees);
4802 assert_eq!(&output, &expr_x);
4803
4804 let guarantees = vec![
4806 (
4807 col("c3"),
4808 NullableInterval::from(ScalarValue::Int64(Some(9))),
4809 ),
4810 (
4811 col("c4"),
4812 NullableInterval::from(ScalarValue::UInt32(Some(3))),
4813 ),
4814 ];
4815 let output = simplify_with_guarantee(expr.clone(), guarantees);
4816 assert_eq!(output, lit(true));
4817
4818 let guarantees = vec![(
4820 col("c4"),
4821 NullableInterval::from(ScalarValue::UInt32(Some(3))),
4822 )];
4823 let output = simplify_with_guarantee(expr, guarantees);
4824 assert_eq!(&output, &expr_x);
4825 }
4826
4827 #[test]
4828 fn test_expression_partial_simplify_1() {
4829 let expr = (lit(1) + lit(2)) + (lit(4) / lit(0));
4831 let expected = (lit(3)) + (lit(4) / lit(0));
4832
4833 assert_eq!(simplify(expr), expected);
4834 }
4835
4836 #[test]
4837 fn test_expression_partial_simplify_2() {
4838 let expr = (lit(1).gt(lit(2))).and(lit(4) / lit(0));
4840 let expected = lit(false);
4841
4842 assert_eq!(simplify(expr), expected);
4843 }
4844
4845 #[test]
4846 fn test_simplify_cycles() {
4847 let expr = lit(true);
4849 let expected = lit(true);
4850 let (expr, num_iter) = simplify_with_cycle_count(expr);
4851 assert_eq!(expr, expected);
4852 assert_eq!(num_iter, 1);
4853
4854 let expr = lit(true).not_eq(lit_bool_null()).or(lit(5).gt(lit(10)));
4856 let expected = lit_bool_null();
4857 let (expr, num_iter) = simplify_with_cycle_count(expr);
4858 assert_eq!(expr, expected);
4859 assert_eq!(num_iter, 2);
4860
4861 let expr = (((col("c4") - lit(10)) + lit(10)) * lit(100)) / lit(100);
4864 let expected = expr.clone();
4865 let (expr, num_iter) = simplify_with_cycle_count(expr);
4866 assert_eq!(expr, expected);
4867 assert_eq!(num_iter, 1);
4868
4869 let expr = col("c4")
4871 .lt(lit(1))
4872 .or(col("c3").lt(lit(2)))
4873 .and(col("c3_non_null").lt(lit(3)))
4874 .and(lit(false));
4875 let expected = lit(false);
4876 let (expr, num_iter) = simplify_with_cycle_count(expr);
4877 assert_eq!(expr, expected);
4878 assert_eq!(num_iter, 2);
4879 }
4880
4881 fn boolean_test_schema() -> DFSchemaRef {
4882 static BOOLEAN_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
4883 Schema::new(vec![
4884 Field::new("A", DataType::Boolean, false),
4885 Field::new("B", DataType::Boolean, false),
4886 Field::new("C", DataType::Boolean, false),
4887 Field::new("D", DataType::Boolean, false),
4888 ])
4889 .to_dfschema_ref()
4890 .unwrap()
4891 });
4892 Arc::clone(&BOOLEAN_TEST_SCHEMA)
4893 }
4894
4895 #[test]
4896 fn simplify_common_factor_conjunction_in_disjunction() {
4897 let schema = boolean_test_schema();
4898 let simplifier =
4899 ExprSimplifier::new(SimplifyContext::builder().with_schema(schema).build());
4900
4901 let a = || col("A");
4902 let b = || col("B");
4903 let c = || col("C");
4904 let d = || col("D");
4905
4906 let expr = a().and(b()).or(a().and(c()));
4908 let expected = a().and(b().or(c()));
4909
4910 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4911
4912 let expr = a().and(b()).or(a().and(c())).or(a().and(d()));
4914 let expected = a().and(b().or(c()).or(d()));
4915 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4916
4917 let expr = a().or(b().and(c().and(a())));
4919 let expected = a();
4920 assert_eq!(expected, simplifier.simplify(expr).unwrap());
4921 }
4922
4923 #[test]
4924 fn test_simplify_udaf() {
4925 let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_with_simplify());
4926 let aggregate_function_expr =
4927 Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4928 udaf.into(),
4929 vec![],
4930 false,
4931 None,
4932 vec![],
4933 None,
4934 ));
4935
4936 let expected = col("result_column");
4937 assert_eq!(simplify(aggregate_function_expr), expected);
4938
4939 let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_without_simplify());
4940 let aggregate_function_expr =
4941 Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4942 udaf.into(),
4943 vec![],
4944 false,
4945 None,
4946 vec![],
4947 None,
4948 ));
4949
4950 let expected = aggregate_function_expr.clone();
4951 assert_eq!(simplify(aggregate_function_expr), expected);
4952 }
4953
4954 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4957 struct SimplifyMockUdaf {
4958 simplify: bool,
4959 }
4960
4961 impl SimplifyMockUdaf {
4962 fn new_with_simplify() -> Self {
4964 Self { simplify: true }
4965 }
4966 fn new_without_simplify() -> Self {
4968 Self { simplify: false }
4969 }
4970 }
4971
4972 impl AggregateUDFImpl for SimplifyMockUdaf {
4973 fn name(&self) -> &str {
4974 "mock_simplify"
4975 }
4976
4977 fn signature(&self) -> &Signature {
4978 unimplemented!()
4979 }
4980
4981 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4982 unimplemented!("not needed for tests")
4983 }
4984
4985 fn accumulator(
4986 &self,
4987 _acc_args: AccumulatorArgs,
4988 ) -> Result<Box<dyn Accumulator>> {
4989 unimplemented!("not needed for tests")
4990 }
4991
4992 fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
4993 unimplemented!("not needed for testing")
4994 }
4995
4996 fn create_groups_accumulator(
4997 &self,
4998 _args: AccumulatorArgs,
4999 ) -> Result<Box<dyn GroupsAccumulator>> {
5000 unimplemented!("not needed for testing")
5001 }
5002
5003 fn simplify(&self) -> Option<AggregateFunctionSimplification> {
5004 if self.simplify {
5005 Some(Box::new(|_, _| Ok(col("result_column"))))
5006 } else {
5007 None
5008 }
5009 }
5010 }
5011
5012 #[test]
5013 fn test_simplify_udwf() {
5014 let udwf = WindowFunctionDefinition::WindowUDF(
5015 WindowUDF::new_from_impl(SimplifyMockUdwf::new_with_simplify()).into(),
5016 );
5017 let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
5018
5019 let expected = col("result_column");
5020 assert_eq!(simplify(window_function_expr), expected);
5021
5022 let udwf = WindowFunctionDefinition::WindowUDF(
5023 WindowUDF::new_from_impl(SimplifyMockUdwf::new_without_simplify()).into(),
5024 );
5025 let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
5026
5027 let expected = window_function_expr.clone();
5028 assert_eq!(simplify(window_function_expr), expected);
5029 }
5030
5031 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
5034 struct SimplifyMockUdwf {
5035 simplify: bool,
5036 }
5037
5038 impl SimplifyMockUdwf {
5039 fn new_with_simplify() -> Self {
5041 Self { simplify: true }
5042 }
5043 fn new_without_simplify() -> Self {
5045 Self { simplify: false }
5046 }
5047 }
5048
5049 impl WindowUDFImpl for SimplifyMockUdwf {
5050 fn name(&self) -> &str {
5051 "mock_simplify"
5052 }
5053
5054 fn signature(&self) -> &Signature {
5055 unimplemented!()
5056 }
5057
5058 fn simplify(&self) -> Option<WindowFunctionSimplification> {
5059 if self.simplify {
5060 Some(Box::new(|_, _| Ok(col("result_column"))))
5061 } else {
5062 None
5063 }
5064 }
5065
5066 fn partition_evaluator(
5067 &self,
5068 _partition_evaluator_args: PartitionEvaluatorArgs,
5069 ) -> Result<Box<dyn PartitionEvaluator>> {
5070 unimplemented!("not needed for tests")
5071 }
5072
5073 fn field(&self, _field_args: WindowUDFFieldArgs) -> Result<FieldRef> {
5074 unimplemented!("not needed for tests")
5075 }
5076
5077 fn limit_effect(&self, _args: &[Arc<dyn PhysicalExpr>]) -> LimitEffect {
5078 LimitEffect::Unknown
5079 }
5080 }
5081 #[derive(Debug, PartialEq, Eq, Hash)]
5082 struct VolatileUdf {
5083 signature: Signature,
5084 }
5085
5086 impl VolatileUdf {
5087 pub fn new() -> Self {
5088 Self {
5089 signature: Signature::exact(vec![], Volatility::Volatile),
5090 }
5091 }
5092 }
5093 impl ScalarUDFImpl for VolatileUdf {
5094 fn name(&self) -> &str {
5095 "VolatileUdf"
5096 }
5097
5098 fn signature(&self) -> &Signature {
5099 &self.signature
5100 }
5101
5102 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
5103 Ok(DataType::Int16)
5104 }
5105
5106 fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
5107 panic!("dummy - not implemented")
5108 }
5109 }
5110
5111 #[test]
5112 fn test_optimize_volatile_conditions() {
5113 let fun = Arc::new(ScalarUDF::new_from_impl(VolatileUdf::new()));
5114 let rand = Expr::ScalarFunction(ScalarFunction::new_udf(fun, vec![]));
5115 {
5116 let expr = rand
5117 .clone()
5118 .eq(lit(0))
5119 .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
5120
5121 assert_eq!(simplify(expr.clone()), expr);
5122 }
5123
5124 {
5125 let expr = col("column1")
5126 .eq(lit(2))
5127 .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
5128
5129 assert_eq!(simplify(expr), col("column1").eq(lit(2)));
5130 }
5131
5132 {
5133 let expr = (col("column1").eq(lit(2)).and(rand.clone().eq(lit(0)))).or(col(
5134 "column1",
5135 )
5136 .eq(lit(2))
5137 .and(rand.clone().eq(lit(0))));
5138
5139 assert_eq!(
5140 simplify(expr),
5141 col("column1")
5142 .eq(lit(2))
5143 .and((rand.clone().eq(lit(0))).or(rand.clone().eq(lit(0))))
5144 );
5145 }
5146 }
5147
5148 #[test]
5149 fn simplify_fixed_size_binary_eq_lit() {
5150 let bytes = [1u8, 2, 3].as_slice();
5151
5152 let expr = col("c5").eq(lit(bytes));
5154
5155 let coerced = coerce(expr.clone());
5157 let schema = expr_test_schema();
5158 assert_eq!(
5159 coerced,
5160 col("c5")
5161 .cast_to(&DataType::Binary, schema.as_ref())
5162 .unwrap()
5163 .eq(lit(bytes))
5164 );
5165
5166 assert_eq!(
5168 simplify(coerced),
5169 col("c5").eq(Expr::Literal(
5170 ScalarValue::FixedSizeBinary(3, Some(bytes.to_vec()),),
5171 None
5172 ))
5173 );
5174 }
5175
5176 #[test]
5177 fn simplify_cast_literal() {
5178 let expr = Expr::Cast(Cast::new(Box::new(lit(123i32)), DataType::Int64));
5182 let expected = lit(123i64);
5183 assert_eq!(simplify(expr), expected);
5184
5185 let expr = Expr::Cast(Cast::new(
5188 Box::new(lit(1761630189642i64)),
5189 DataType::Timestamp(
5190 arrow::datatypes::TimeUnit::Nanosecond,
5191 Some("+00:00".into()),
5192 ),
5193 ));
5194 let result = simplify(expr);
5196 match result {
5197 Expr::Literal(ScalarValue::TimestampNanosecond(Some(val), tz), _) => {
5198 assert_eq!(val, 1761630189642i64);
5199 assert_eq!(tz.as_deref(), Some("+00:00"));
5200 }
5201 other => panic!("Expected TimestampNanosecond literal, got: {other:?}"),
5202 }
5203
5204 let expr = Expr::Cast(Cast::new(
5208 Box::new(lit("1761630189642")),
5209 DataType::Timestamp(
5210 arrow::datatypes::TimeUnit::Nanosecond,
5211 Some("+00:00".into()),
5212 ),
5213 ));
5214
5215 let schema = test_schema();
5217 let simplifier =
5218 ExprSimplifier::new(SimplifyContext::builder().with_schema(schema).build());
5219 let result = simplifier.simplify(expr);
5220 assert!(result.is_err(), "Expected error for invalid cast");
5221 let err_msg = result.unwrap_err().to_string();
5222 assert_contains!(err_msg, "Error parsing timestamp");
5223 }
5224
5225 fn if_not_null(expr: Expr, then: bool) -> Expr {
5226 Expr::Case(Case {
5227 expr: Some(expr.is_not_null().into()),
5228 when_then_expr: vec![(lit(true).into(), lit(then).into())],
5229 else_expr: None,
5230 })
5231 }
5232
5233 fn make_struct_cast_expr(source_fields: Fields, target_fields: Fields) -> Expr {
5239 let arrays: Vec<Arc<dyn Array>> = vec![
5241 Arc::new(Int32Array::from(vec![Some(1)])),
5242 Arc::new(Int32Array::from(vec![Some(2)])),
5243 ];
5244 let struct_array = StructArray::try_new(source_fields, arrays, None).unwrap();
5245
5246 Expr::Cast(Cast::new(
5247 Box::new(Expr::Literal(
5248 ScalarValue::Struct(Arc::new(struct_array)),
5249 None,
5250 )),
5251 DataType::Struct(target_fields),
5252 ))
5253 }
5254
5255 #[test]
5256 fn test_struct_cast_different_field_counts_not_foldable() {
5257 let source_fields = Fields::from(vec![
5261 Arc::new(Field::new("a", DataType::Int32, true)),
5262 Arc::new(Field::new("b", DataType::Int32, true)),
5263 ]);
5264
5265 let target_fields = Fields::from(vec![
5266 Arc::new(Field::new("x", DataType::Int32, true)),
5267 Arc::new(Field::new("y", DataType::Int32, true)),
5268 Arc::new(Field::new("z", DataType::Int32, true)),
5269 ]);
5270
5271 let expr = make_struct_cast_expr(source_fields, target_fields);
5272
5273 let simplifier = ExprSimplifier::new(
5274 SimplifyContext::builder()
5275 .with_schema(test_schema())
5276 .build(),
5277 );
5278
5279 let result = simplifier.simplify(expr.clone()).unwrap();
5281 assert_eq!(
5283 result, expr,
5284 "Struct cast with different field counts should remain unchanged (no const-folding)"
5285 );
5286 }
5287
5288 #[test]
5289 fn test_struct_cast_same_field_count_foldable() {
5290 let source_fields = Fields::from(vec![
5293 Arc::new(Field::new("a", DataType::Int32, true)),
5294 Arc::new(Field::new("b", DataType::Int32, true)),
5295 ]);
5296
5297 let target_fields = Fields::from(vec![
5298 Arc::new(Field::new("a", DataType::Int32, true)),
5299 Arc::new(Field::new("b", DataType::Int32, true)),
5300 ]);
5301
5302 let expr = make_struct_cast_expr(source_fields, target_fields);
5303
5304 let simplifier = ExprSimplifier::new(
5305 SimplifyContext::builder()
5306 .with_schema(test_schema())
5307 .build(),
5308 );
5309
5310 let result = simplifier.simplify(expr.clone()).unwrap();
5312 assert!(matches!(result, Expr::Literal(_, _)));
5314 assert_ne!(
5316 result, expr,
5317 "Struct cast with same field count should be simplified (not identical to input)"
5318 );
5319 }
5320
5321 #[test]
5322 fn test_struct_cast_different_names_same_count() {
5323 let source_fields = Fields::from(vec![
5327 Arc::new(Field::new("a", DataType::Int32, true)),
5328 Arc::new(Field::new("b", DataType::Int32, true)),
5329 ]);
5330
5331 let target_fields = Fields::from(vec![
5332 Arc::new(Field::new("x", DataType::Int32, true)),
5333 Arc::new(Field::new("y", DataType::Int32, true)),
5334 ]);
5335
5336 let expr = make_struct_cast_expr(source_fields, target_fields);
5337
5338 let simplifier = ExprSimplifier::new(
5339 SimplifyContext::builder()
5340 .with_schema(test_schema())
5341 .build(),
5342 );
5343
5344 let result = simplifier.simplify(expr.clone()).unwrap();
5346 assert_eq!(
5347 result, expr,
5348 "Struct cast with different names but same field count should not be simplified"
5349 );
5350 }
5351
5352 #[test]
5353 fn test_struct_cast_empty_array_not_foldable() {
5354 let source_fields = Fields::from(vec![
5359 Arc::new(Field::new("a", DataType::Int32, true)),
5360 Arc::new(Field::new("b", DataType::Int32, true)),
5361 ]);
5362
5363 let target_fields = Fields::from(vec![
5364 Arc::new(Field::new("a", DataType::Int32, true)),
5365 Arc::new(Field::new("b", DataType::Int32, true)),
5366 ]);
5367
5368 let arrays: Vec<Arc<dyn Array>> = vec![
5370 Arc::new(Int32Array::new(vec![].into(), None)),
5371 Arc::new(Int32Array::new(vec![].into(), None)),
5372 ];
5373 let struct_array = StructArray::try_new(source_fields, arrays, None).unwrap();
5374
5375 let expr = Expr::Cast(Cast::new(
5376 Box::new(Expr::Literal(
5377 ScalarValue::Struct(Arc::new(struct_array)),
5378 None,
5379 )),
5380 DataType::Struct(target_fields),
5381 ));
5382
5383 let simplifier = ExprSimplifier::new(
5384 SimplifyContext::builder()
5385 .with_schema(test_schema())
5386 .build(),
5387 );
5388
5389 let result = simplifier.simplify(expr.clone()).unwrap();
5391 assert_eq!(
5392 result, expr,
5393 "Struct cast with empty (0-row) array should remain unchanged"
5394 );
5395 }
5396}