Skip to main content

datafusion_optimizer/simplify_expressions/
expr_simplifier.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Expression simplification API
19
20use arrow::{
21    array::{Array, AsArray, new_null_array},
22    datatypes::{DataType, Field, Schema},
23    record_batch::RecordBatch,
24};
25use std::borrow::Cow;
26use std::collections::HashSet;
27use std::ops::Not;
28use std::sync::Arc;
29
30use datafusion_common::config::ConfigOptions;
31use datafusion_common::nested_struct::has_one_of_more_common_fields;
32use datafusion_common::{
33    DFSchema, DataFusionError, Result, ScalarValue, exec_datafusion_err, internal_err,
34};
35use datafusion_common::{
36    HashMap,
37    cast::{as_large_list_array, as_list_array},
38    metadata::FieldMetadata,
39    tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
40};
41use datafusion_expr::{
42    BinaryExpr, Case, ColumnarValue, Expr, ExprSchemable, Like, Operator, Volatility,
43    and, binary::BinaryTypeCoercer, lit, or, preimage::PreimageResult,
44};
45use datafusion_expr::{Cast, TryCast, simplify::ExprSimplifyResult};
46use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval};
47use datafusion_expr::{
48    expr::{InList, InSubquery},
49    utils::{iter_conjunction, iter_conjunction_owned},
50};
51use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
52
53use super::inlist_simplifier::ShortenInListSimplifier;
54use super::utils::*;
55use crate::simplify_expressions::SimplifyContext;
56use crate::simplify_expressions::regex::simplify_regex_expr;
57use crate::simplify_expressions::unwrap_cast::{
58    is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary,
59    is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist,
60    unwrap_cast_in_comparison_for_binary,
61};
62use crate::{
63    analyzer::type_coercion::TypeCoercionRewriter,
64    simplify_expressions::udf_preimage::rewrite_with_preimage,
65};
66use datafusion_expr::expr_rewriter::rewrite_with_guarantees_map;
67use datafusion_expr_common::casts::try_cast_literal_to_type;
68use indexmap::IndexSet;
69use regex::Regex;
70
71/// This structure handles API for expression simplification
72///
73/// Provides simplification information based on DFSchema and
74/// [`ExecutionProps`]. This is the default implementation used by DataFusion
75///
76/// For example:
77/// ```
78/// use arrow::datatypes::{DataType, Field, Schema};
79/// use datafusion_common::{DataFusionError, ToDFSchema};
80/// use datafusion_expr::simplify::SimplifyContext;
81/// use datafusion_expr::{col, lit};
82/// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
83///
84/// // Create the schema
85/// let schema = Schema::new(vec![Field::new("i", DataType::Int64, false)])
86///     .to_dfschema_ref()
87///     .unwrap();
88///
89/// // Create the simplifier
90/// let context = SimplifyContext::default().with_schema(schema);
91/// let simplifier = ExprSimplifier::new(context);
92///
93/// // Use the simplifier
94///
95/// // b < 2 or (1 > 3)
96/// let expr = col("b").lt(lit(2)).or(lit(1).gt(lit(3)));
97///
98/// // b < 2
99/// let simplified = simplifier.simplify(expr).unwrap();
100/// assert_eq!(simplified, col("b").lt(lit(2)));
101/// ```
102pub struct ExprSimplifier {
103    info: SimplifyContext,
104    /// Guarantees about the values of columns. This is provided by the user
105    /// in [ExprSimplifier::with_guarantees()].
106    guarantees: Vec<(Expr, NullableInterval)>,
107    /// Should expressions be canonicalized before simplification? Defaults to
108    /// true
109    canonicalize: bool,
110    /// Maximum number of simplifier cycles
111    max_simplifier_cycles: u32,
112}
113
114pub const THRESHOLD_INLINE_INLIST: usize = 3;
115pub const DEFAULT_MAX_SIMPLIFIER_CYCLES: u32 = 3;
116
117impl ExprSimplifier {
118    /// Create a new `ExprSimplifier` with the given [`SimplifyContext`].
119    /// See [`simplify`](Self::simplify) for an example.
120    ///
121    /// [`SimplifyContext`]: datafusion_expr::simplify::SimplifyContext
122    pub fn new(info: SimplifyContext) -> Self {
123        Self {
124            info,
125            guarantees: vec![],
126            canonicalize: true,
127            max_simplifier_cycles: DEFAULT_MAX_SIMPLIFIER_CYCLES,
128        }
129    }
130
131    /// Simplifies this [`Expr`] as much as possible, evaluating
132    /// constants and applying algebraic simplifications.
133    ///
134    /// The types of the expression must match what operators expect,
135    /// or else an error may occur trying to evaluate. See
136    /// [`coerce`](Self::coerce) for a function to help.
137    ///
138    /// # Example:
139    ///
140    /// `b > 2 AND b > 2`
141    ///
142    /// can be written to
143    ///
144    /// `b > 2`
145    ///
146    /// ```
147    /// use arrow::datatypes::{DataType, Field, Schema};
148    /// use datafusion_common::{DFSchema, ToDFSchema};
149    /// use datafusion_common::Result;
150    /// use datafusion_expr::simplify::SimplifyContext;
151    /// use datafusion_expr::{col, lit, Expr};
152    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
153    /// use std::sync::Arc;
154    ///
155    /// // Create a schema and SimplifyContext
156    /// let schema = Schema::new(vec![Field::new("b", DataType::Int32, true)])
157    ///     .to_dfschema_ref()
158    ///     .unwrap();
159    /// // Create the simplifier
160    /// let context = SimplifyContext::default().with_schema(schema);
161    /// let simplifier = ExprSimplifier::new(context);
162    ///
163    /// // b < 2
164    /// let b_lt_2 = col("b").gt(lit(2));
165    ///
166    /// // (b < 2) OR (b < 2)
167    /// let expr = b_lt_2.clone().or(b_lt_2.clone());
168    ///
169    /// // (b < 2) OR (b < 2) --> (b < 2)
170    /// let expr = simplifier.simplify(expr).unwrap();
171    /// assert_eq!(expr, b_lt_2);
172    /// ```
173    pub fn simplify(&self, expr: Expr) -> Result<Expr> {
174        Ok(self.simplify_with_cycle_count_transformed(expr)?.0.data)
175    }
176
177    /// Like [Self::simplify], simplifies this [`Expr`] as much as possible, evaluating
178    /// constants and applying algebraic simplifications. Additionally returns a `u32`
179    /// representing the number of simplification cycles performed, which can be useful for testing
180    /// optimizations.
181    ///
182    /// See [Self::simplify] for details and usage examples.
183    #[deprecated(
184        since = "48.0.0",
185        note = "Use `simplify_with_cycle_count_transformed` instead"
186    )]
187    #[expect(unused_mut)]
188    pub fn simplify_with_cycle_count(&self, mut expr: Expr) -> Result<(Expr, u32)> {
189        let (transformed, cycle_count) =
190            self.simplify_with_cycle_count_transformed(expr)?;
191        Ok((transformed.data, cycle_count))
192    }
193
194    /// Like [Self::simplify], simplifies this [`Expr`] as much as possible, evaluating
195    /// constants and applying algebraic simplifications. Additionally returns a `u32`
196    /// representing the number of simplification cycles performed, which can be useful for testing
197    /// optimizations.
198    ///
199    /// # Returns
200    ///
201    /// A tuple containing:
202    /// - The simplified expression wrapped in a `Transformed<Expr>` indicating if changes were made
203    /// - The number of simplification cycles that were performed
204    ///
205    /// See [Self::simplify] for details and usage examples.
206    pub fn simplify_with_cycle_count_transformed(
207        &self,
208        mut expr: Expr,
209    ) -> Result<(Transformed<Expr>, u32)> {
210        let mut simplifier = Simplifier::new(&self.info);
211        let config_options = Some(Arc::clone(self.info.config_options()));
212        let mut const_evaluator = ConstEvaluator::try_new(config_options)?;
213        let mut shorten_in_list_simplifier = ShortenInListSimplifier::new();
214        let guarantees_map: HashMap<&Expr, &NullableInterval> =
215            self.guarantees.iter().map(|(k, v)| (k, v)).collect();
216
217        if self.canonicalize {
218            expr = expr.rewrite(&mut Canonicalizer::new()).data()?
219        }
220
221        // Evaluating constants can enable new simplifications and
222        // simplifications can enable new constant evaluation
223        // see `Self::with_max_cycles`
224        let mut num_cycles = 0;
225        let mut has_transformed = false;
226        loop {
227            let Transformed {
228                data, transformed, ..
229            } = expr
230                .rewrite(&mut const_evaluator)?
231                .transform_data(|expr| expr.rewrite(&mut simplifier))?
232                .transform_data(|expr| {
233                    rewrite_with_guarantees_map(expr, &guarantees_map)
234                })?;
235            expr = data;
236            num_cycles += 1;
237            // Track if any transformation occurred
238            has_transformed = has_transformed || transformed;
239            if !transformed || num_cycles >= self.max_simplifier_cycles {
240                break;
241            }
242        }
243        // shorten inlist should be started after other inlist rules are applied
244        expr = expr.rewrite(&mut shorten_in_list_simplifier).data()?;
245        Ok((
246            Transformed::new_transformed(expr, has_transformed),
247            num_cycles,
248        ))
249    }
250
251    /// Apply type coercion to an [`Expr`] so that it can be
252    /// evaluated as a [`PhysicalExpr`](datafusion_physical_expr::PhysicalExpr).
253    ///
254    /// See the [type coercion module](datafusion_expr::type_coercion)
255    /// documentation for more details on type coercion
256    pub fn coerce(&self, expr: Expr, schema: &DFSchema) -> Result<Expr> {
257        let mut expr_rewrite = TypeCoercionRewriter { schema };
258        expr.rewrite(&mut expr_rewrite).data()
259    }
260
261    /// Input guarantees about the values of columns.
262    ///
263    /// The guarantees can simplify expressions. For example, if a column `x` is
264    /// guaranteed to be `3`, then the expression `x > 1` can be replaced by the
265    /// literal `true`.
266    ///
267    /// The guarantees are provided as a `Vec<(Expr, NullableInterval)>`,
268    /// where the [Expr] is a column reference and the [NullableInterval]
269    /// is an interval representing the known possible values of that column.
270    ///
271    /// ```rust
272    /// use arrow::datatypes::{DataType, Field, Schema};
273    /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
274    /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
275    /// use datafusion_expr::simplify::SimplifyContext;
276    /// use datafusion_expr::{col, lit, Expr};
277    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
278    ///
279    /// let schema = Schema::new(vec![
280    ///     Field::new("x", DataType::Int64, false),
281    ///     Field::new("y", DataType::UInt32, false),
282    ///     Field::new("z", DataType::Int64, false),
283    /// ])
284    /// .to_dfschema_ref()
285    /// .unwrap();
286    ///
287    /// // Create the simplifier
288    /// let context = SimplifyContext::default().with_schema(schema);
289    ///
290    /// // Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5)
291    /// let expr_x = col("x").gt_eq(lit(3_i64));
292    /// let expr_y = (col("y") + lit(2_u32)).lt(lit(10_u32));
293    /// let expr_z = col("z").gt(lit(5_i64));
294    /// let expr = expr_x.and(expr_y).and(expr_z.clone());
295    ///
296    /// let guarantees = vec![
297    ///     // x ∈ [3, 5]
298    ///     (
299    ///         col("x"),
300    ///         NullableInterval::NotNull {
301    ///             values: Interval::make(Some(3_i64), Some(5_i64)).unwrap(),
302    ///         },
303    ///     ),
304    ///     // y = 3
305    ///     (
306    ///         col("y"),
307    ///         NullableInterval::from(ScalarValue::UInt32(Some(3))),
308    ///     ),
309    /// ];
310    /// let simplifier = ExprSimplifier::new(context).with_guarantees(guarantees);
311    /// let output = simplifier.simplify(expr).unwrap();
312    /// // Expression becomes: true AND true AND (z > 5), which simplifies to
313    /// // z > 5.
314    /// assert_eq!(output, expr_z);
315    /// ```
316    pub fn with_guarantees(mut self, guarantees: Vec<(Expr, NullableInterval)>) -> Self {
317        self.guarantees = guarantees;
318        self
319    }
320
321    /// Should `Canonicalizer` be applied before simplification?
322    ///
323    /// If true (the default), the expression will be rewritten to canonical
324    /// form before simplification. This is useful to ensure that the simplifier
325    /// can apply all possible simplifications.
326    ///
327    /// Some expressions, such as those in some Joins, can not be canonicalized
328    /// without changing their meaning. In these cases, canonicalization should
329    /// be disabled.
330    ///
331    /// ```rust
332    /// use arrow::datatypes::{DataType, Field, Schema};
333    /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
334    /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
335    /// use datafusion_expr::simplify::SimplifyContext;
336    /// use datafusion_expr::{col, lit, Expr};
337    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
338    ///
339    /// let schema = Schema::new(vec![
340    ///     Field::new("a", DataType::Int64, false),
341    ///     Field::new("b", DataType::Int64, false),
342    ///     Field::new("c", DataType::Int64, false),
343    /// ])
344    /// .to_dfschema_ref()
345    /// .unwrap();
346    ///
347    /// // Create the simplifier
348    /// let context = SimplifyContext::default().with_schema(schema);
349    /// let simplifier = ExprSimplifier::new(context);
350    ///
351    /// // Expression: a = c AND 1 = b
352    /// let expr = col("a").eq(col("c")).and(lit(1).eq(col("b")));
353    ///
354    /// // With canonicalization, the expression is rewritten to canonical form
355    /// // (though it is no simpler in this case):
356    /// let canonical = simplifier.simplify(expr.clone()).unwrap();
357    /// // Expression has been rewritten to: (c = a AND b = 1)
358    /// assert_eq!(canonical, col("c").eq(col("a")).and(col("b").eq(lit(1))));
359    ///
360    /// // If canonicalization is disabled, the expression is not changed
361    /// let non_canonicalized = simplifier
362    ///     .with_canonicalize(false)
363    ///     .simplify(expr.clone())
364    ///     .unwrap();
365    ///
366    /// assert_eq!(non_canonicalized, expr);
367    /// ```
368    pub fn with_canonicalize(mut self, canonicalize: bool) -> Self {
369        self.canonicalize = canonicalize;
370        self
371    }
372
373    /// Specifies the maximum number of simplification cycles to run.
374    ///
375    /// The simplifier can perform multiple passes of simplification. This is
376    /// because the output of one simplification step can allow more optimizations
377    /// in another simplification step. For example, constant evaluation can allow more
378    /// expression simplifications, and expression simplifications can allow more constant
379    /// evaluations.
380    ///
381    /// This method specifies the maximum number of allowed iteration cycles before the simplifier
382    /// returns an [Expr] output. However, it does not always perform the maximum number of cycles.
383    /// The simplifier will attempt to detect when an [Expr] is unchanged by all the simplification
384    /// passes, and return early. This avoids wasting time on unnecessary [Expr] tree traversals.
385    ///
386    /// If no maximum is specified, the value of [DEFAULT_MAX_SIMPLIFIER_CYCLES] is used
387    /// instead.
388    ///
389    /// ```rust
390    /// use arrow::datatypes::{DataType, Field, Schema};
391    /// use datafusion_expr::{col, lit, Expr};
392    /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
393    /// use datafusion_expr::simplify::SimplifyContext;
394    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
395    ///
396    /// let schema = Schema::new(vec![
397    ///   Field::new("a", DataType::Int64, false),
398    ///   ])
399    ///   .to_dfschema_ref().unwrap();
400    ///
401    /// // Create the simplifier
402    /// let context = SimplifyContext::default().with_schema(schema);
403    /// let simplifier = ExprSimplifier::new(context);
404    ///
405    /// // Expression: a IS NOT NULL
406    /// let expr = col("a").is_not_null();
407    ///
408    /// // When using default maximum cycles, 2 cycles will be performed.
409    /// let (simplified_expr, count) = simplifier.simplify_with_cycle_count_transformed(expr.clone()).unwrap();
410    /// assert_eq!(simplified_expr.data, lit(true));
411    /// // 2 cycles were executed, but only 1 was needed
412    /// assert_eq!(count, 2);
413    ///
414    /// // Only 1 simplification pass is necessary here, so we can set the maximum cycles to 1.
415    /// let (simplified_expr, count) = simplifier.with_max_cycles(1).simplify_with_cycle_count_transformed(expr.clone()).unwrap();
416    /// // Expression has been rewritten to: (c = a AND b = 1)
417    /// assert_eq!(simplified_expr.data, lit(true));
418    /// // Only 1 cycle was executed
419    /// assert_eq!(count, 1);
420    /// ```
421    pub fn with_max_cycles(mut self, max_simplifier_cycles: u32) -> Self {
422        self.max_simplifier_cycles = max_simplifier_cycles;
423        self
424    }
425}
426
427/// Canonicalize any BinaryExprs that are not in canonical form
428///
429/// `<literal> <op> <col>` is rewritten to `<col> <op> <literal>`
430///
431/// `<col1> <op> <col2>` is rewritten so that the name of `col1` sorts higher
432/// than `col2` (`a > b` would be canonicalized to `b < a`)
433struct Canonicalizer {}
434
435impl Canonicalizer {
436    fn new() -> Self {
437        Self {}
438    }
439}
440
441impl TreeNodeRewriter for Canonicalizer {
442    type Node = Expr;
443
444    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
445        let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr else {
446            return Ok(Transformed::no(expr));
447        };
448        match (left.as_ref(), right.as_ref(), op.swap()) {
449            // <col1> <op> <col2>
450            (Expr::Column(left_col), Expr::Column(right_col), Some(swapped_op))
451                if right_col > left_col =>
452            {
453                Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
454                    left: right,
455                    op: swapped_op,
456                    right: left,
457                })))
458            }
459            // <literal> <op> <col>
460            (Expr::Literal(_a, _), Expr::Column(_b), Some(swapped_op)) => {
461                Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
462                    left: right,
463                    op: swapped_op,
464                    right: left,
465                })))
466            }
467            _ => Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
468                left,
469                op,
470                right,
471            }))),
472        }
473    }
474}
475
476/// Partially evaluate `Expr`s so constant subtrees are evaluated at plan time.
477///
478/// Note it does not handle algebraic rewrites such as `(a or false)`
479/// --> `a`, which is handled by [`Simplifier`]
480struct ConstEvaluator {
481    /// `can_evaluate` is used during the depth-first-search of the
482    /// `Expr` tree to track if any siblings (or their descendants) were
483    /// non evaluatable (e.g. had a column reference or volatile
484    /// function)
485    ///
486    /// Specifically, `can_evaluate[N]` represents the state of
487    /// traversal when we are N levels deep in the tree, one entry for
488    /// this Expr and each of its parents.
489    ///
490    /// After visiting all siblings if `can_evaluate.top()` is true, that
491    /// means there were no non evaluatable siblings (or their
492    /// descendants) so this `Expr` can be evaluated
493    can_evaluate: Vec<bool>,
494    /// Execution properties needed to call [`create_physical_expr`].
495    /// `ConstEvaluator` only evaluates expressions without column references
496    /// (i.e. constant expressions) and doesn't use the variable binding features
497    /// of `ExecutionProps` (we explicitly filter out [`Expr::ScalarVariable`]).
498    /// The `config_options` are passed from the session to allow scalar functions
499    /// to access configuration like timezone.
500    execution_props: ExecutionProps,
501    input_schema: DFSchema,
502    input_batch: RecordBatch,
503}
504
505/// The simplify result of ConstEvaluator
506enum ConstSimplifyResult {
507    // Expr was simplified and contains the new expression
508    Simplified(ScalarValue, Option<FieldMetadata>),
509    // Expr was not simplified and original value is returned
510    NotSimplified(ScalarValue, Option<FieldMetadata>),
511    // Evaluation encountered an error, contains the original expression
512    SimplifyRuntimeError(DataFusionError, Expr),
513}
514
515impl TreeNodeRewriter for ConstEvaluator {
516    type Node = Expr;
517
518    fn f_down(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
519        // Default to being able to evaluate this node
520        self.can_evaluate.push(true);
521
522        // if this expr is not ok to evaluate, mark entire parent
523        // stack as not ok (as all parents have at least one child or
524        // descendant that can not be evaluated
525
526        if !Self::can_evaluate(&expr) {
527            // walk back up stack, marking first parent that is not mutable
528            let parent_iter = self.can_evaluate.iter_mut().rev();
529            for p in parent_iter {
530                if !*p {
531                    // optimization: if we find an element on the
532                    // stack already marked, know all elements above are also marked
533                    break;
534                }
535                *p = false;
536            }
537        }
538
539        // NB: do not short circuit recursion even if we find a non
540        // evaluatable node (so we can fold other children, args to
541        // functions, etc.)
542        Ok(Transformed::no(expr))
543    }
544
545    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
546        match self.can_evaluate.pop() {
547            // Certain expressions such as `CASE` and `COALESCE` are short-circuiting
548            // and may not evaluate all their sub expressions. Thus, if
549            // any error is countered during simplification, return the original
550            // so that normal evaluation can occur
551            Some(true) => match self.evaluate_to_scalar(expr) {
552                ConstSimplifyResult::Simplified(s, m) => {
553                    Ok(Transformed::yes(Expr::Literal(s, m)))
554                }
555                ConstSimplifyResult::NotSimplified(s, m) => {
556                    Ok(Transformed::no(Expr::Literal(s, m)))
557                }
558                ConstSimplifyResult::SimplifyRuntimeError(err, expr) => {
559                    // For CAST expressions with literal inputs, propagate the error at plan time rather than deferring to execution time.
560                    // This provides clearer error messages and fails fast.
561                    if let Expr::Cast(Cast { ref expr, .. })
562                    | Expr::TryCast(TryCast { ref expr, .. }) = expr
563                        && matches!(expr.as_ref(), Expr::Literal(_, _))
564                    {
565                        return Err(err);
566                    }
567                    // For other expressions (like CASE, COALESCE), preserve the original
568                    // to allow short-circuit evaluation at execution time
569                    Ok(Transformed::yes(expr))
570                }
571            },
572            Some(false) => Ok(Transformed::no(expr)),
573            _ => internal_err!("Failed to pop can_evaluate"),
574        }
575    }
576}
577
578impl ConstEvaluator {
579    /// Create a new `ConstantEvaluator`.
580    ///
581    /// Note: `ConstEvaluator` filters out expressions with scalar variables
582    /// (like `$var`) and volatile functions, so it creates its own default
583    /// `ExecutionProps` internally. The filtered expressions will be evaluated
584    /// at runtime where proper variable bindings are available.
585    ///
586    /// The `config_options` parameter is used to pass session configuration
587    /// (like timezone) to scalar functions during constant evaluation.
588    pub fn try_new(config_options: Option<Arc<ConfigOptions>>) -> Result<Self> {
589        // The dummy column name is unused and doesn't matter as only
590        // expressions without column references can be evaluated
591        static DUMMY_COL_NAME: &str = ".";
592        let schema = Arc::new(Schema::new(vec![Field::new(
593            DUMMY_COL_NAME,
594            DataType::Null,
595            true,
596        )]));
597        let input_schema = DFSchema::try_from(Arc::clone(&schema))?;
598        // Need a single "input" row to produce a single output row
599        let col = new_null_array(&DataType::Null, 1);
600        let input_batch = RecordBatch::try_new(schema, vec![col])?;
601
602        let mut execution_props = ExecutionProps::new();
603        execution_props.config_options = config_options;
604
605        Ok(Self {
606            can_evaluate: vec![],
607            execution_props,
608            input_schema,
609            input_batch,
610        })
611    }
612
613    /// Can a function of the specified volatility be evaluated?
614    fn volatility_ok(volatility: Volatility) -> bool {
615        match volatility {
616            Volatility::Immutable => true,
617            // Values for functions such as now() are taken from ExecutionProps
618            Volatility::Stable => true,
619            Volatility::Volatile => false,
620        }
621    }
622
623    /// Can the expression be evaluated at plan time, (assuming all of
624    /// its children can also be evaluated)?
625    fn can_evaluate(expr: &Expr) -> bool {
626        // check for reasons we can't evaluate this node
627        //
628        // NOTE all expr types are listed here so when new ones are
629        // added they can be checked for their ability to be evaluated
630        // at plan time
631        match expr {
632            // TODO: remove the next line after `Expr::Wildcard` is removed
633            #[expect(deprecated)]
634            Expr::AggregateFunction { .. }
635            | Expr::ScalarVariable(_, _)
636            | Expr::Column(_)
637            | Expr::OuterReferenceColumn(_, _)
638            | Expr::Exists { .. }
639            | Expr::InSubquery(_)
640            | Expr::SetComparison(_)
641            | Expr::ScalarSubquery(_)
642            | Expr::WindowFunction { .. }
643            | Expr::GroupingSet(_)
644            | Expr::Wildcard { .. }
645            | Expr::Placeholder(_) => false,
646            Expr::ScalarFunction(ScalarFunction { func, .. }) => {
647                Self::volatility_ok(func.signature().volatility)
648            }
649            Expr::Cast(Cast { expr, data_type })
650            | Expr::TryCast(TryCast { expr, data_type }) => {
651                if let (
652                    Ok(DataType::Struct(source_fields)),
653                    DataType::Struct(target_fields),
654                ) = (expr.get_type(&DFSchema::empty()), data_type)
655                {
656                    // Don't const-fold struct casts with different field counts
657                    if source_fields.len() != target_fields.len() {
658                        return false;
659                    }
660
661                    // Skip const-folding when there is no field name overlap
662                    if !has_one_of_more_common_fields(&source_fields, target_fields) {
663                        return false;
664                    }
665
666                    // Don't const-fold struct casts with empty (0-row) literals
667                    // The simplifier uses a 1-row input batch, which causes dimension mismatches
668                    // when evaluating 0-row struct literals
669                    if let Expr::Literal(ScalarValue::Struct(struct_array), _) =
670                        expr.as_ref()
671                        && struct_array.len() == 0
672                    {
673                        return false;
674                    }
675                }
676                true
677            }
678            Expr::Literal(_, _)
679            | Expr::Alias(..)
680            | Expr::Unnest(_)
681            | Expr::BinaryExpr { .. }
682            | Expr::Not(_)
683            | Expr::IsNotNull(_)
684            | Expr::IsNull(_)
685            | Expr::IsTrue(_)
686            | Expr::IsFalse(_)
687            | Expr::IsUnknown(_)
688            | Expr::IsNotTrue(_)
689            | Expr::IsNotFalse(_)
690            | Expr::IsNotUnknown(_)
691            | Expr::Negative(_)
692            | Expr::Between { .. }
693            | Expr::Like { .. }
694            | Expr::SimilarTo { .. }
695            | Expr::Case(_)
696            | Expr::InList { .. } => true,
697        }
698    }
699
700    /// Internal helper to evaluates an Expr
701    pub(crate) fn evaluate_to_scalar(&mut self, expr: Expr) -> ConstSimplifyResult {
702        if let Expr::Literal(s, m) = expr {
703            return ConstSimplifyResult::NotSimplified(s, m);
704        }
705
706        let phys_expr = match create_physical_expr(
707            &expr,
708            &self.input_schema,
709            &self.execution_props,
710        ) {
711            Ok(e) => e,
712            Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
713        };
714        let metadata = phys_expr
715            .return_field(self.input_batch.schema_ref())
716            .ok()
717            .and_then(|f| {
718                let m = f.metadata();
719                match m.is_empty() {
720                    true => None,
721                    false => Some(FieldMetadata::from(m)),
722                }
723            });
724        let col_val = match phys_expr.evaluate(&self.input_batch) {
725            Ok(v) => v,
726            Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
727        };
728        match col_val {
729            ColumnarValue::Array(a) => {
730                if a.len() != 1 {
731                    ConstSimplifyResult::SimplifyRuntimeError(
732                        exec_datafusion_err!(
733                            "Could not evaluate the expression, found a result of length {}",
734                            a.len()
735                        ),
736                        expr,
737                    )
738                } else if as_list_array(&a).is_ok() {
739                    ConstSimplifyResult::Simplified(
740                        ScalarValue::List(a.as_list::<i32>().to_owned().into()),
741                        metadata,
742                    )
743                } else if as_large_list_array(&a).is_ok() {
744                    ConstSimplifyResult::Simplified(
745                        ScalarValue::LargeList(a.as_list::<i64>().to_owned().into()),
746                        metadata,
747                    )
748                } else {
749                    // Non-ListArray
750                    match ScalarValue::try_from_array(&a, 0) {
751                        Ok(s) => ConstSimplifyResult::Simplified(s, metadata),
752                        Err(err) => ConstSimplifyResult::SimplifyRuntimeError(err, expr),
753                    }
754                }
755            }
756            ColumnarValue::Scalar(s) => ConstSimplifyResult::Simplified(s, metadata),
757        }
758    }
759}
760
761/// Simplifies [`Expr`]s by applying algebraic transformation rules
762///
763/// Example transformations that are applied:
764/// * `expr = true` and `expr != false` to `expr` when `expr` is of boolean type
765/// * `expr = false` and `expr != true` to `!expr` when `expr` is of boolean type
766/// * `true = true` and `false = false` to `true`
767/// * `false = true` and `true = false` to `false`
768/// * `!!expr` to `expr`
769/// * `expr = null` and `expr != null` to `null`
770struct Simplifier<'a> {
771    info: &'a SimplifyContext,
772}
773
774impl<'a> Simplifier<'a> {
775    pub fn new(info: &'a SimplifyContext) -> Self {
776        Self { info }
777    }
778}
779
780impl TreeNodeRewriter for Simplifier<'_> {
781    type Node = Expr;
782
783    /// rewrite the expression simplifying any constant expressions
784    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
785        use datafusion_expr::Operator::{
786            And, BitwiseAnd, BitwiseOr, BitwiseShiftLeft, BitwiseShiftRight, BitwiseXor,
787            Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch,
788            RegexNotIMatch, RegexNotMatch,
789        };
790
791        let info = self.info;
792        Ok(match expr {
793            // `value op NULL` -> `NULL`
794            // `NULL op value` -> `NULL`
795            // except for few operators that can return non-null value even when one of the operands is NULL
796            ref expr @ Expr::BinaryExpr(BinaryExpr {
797                ref left,
798                ref op,
799                ref right,
800            }) if op.returns_null_on_null()
801                && (is_null(left.as_ref()) || is_null(right.as_ref())) =>
802            {
803                Transformed::yes(Expr::Literal(
804                    ScalarValue::try_new_null(&info.get_data_type(expr)?)?,
805                    None,
806                ))
807            }
808
809            // `NULL {AND, OR} NULL` -> `NULL`
810            Expr::BinaryExpr(BinaryExpr {
811                left,
812                op: And | Or,
813                right,
814            }) if is_null(&left) && is_null(&right) => Transformed::yes(lit_bool_null()),
815
816            //
817            // Rules for Eq
818            //
819
820            // true = A  --> A
821            // false = A --> !A
822            // null = A --> null
823            Expr::BinaryExpr(BinaryExpr {
824                left,
825                op: Eq,
826                right,
827            }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
828                Transformed::yes(match as_bool_lit(&left)? {
829                    Some(true) => *right,
830                    Some(false) => Expr::Not(right),
831                    None => lit_bool_null(),
832                })
833            }
834            // A = true  --> A
835            // A = false --> !A
836            // A = null --> null
837            Expr::BinaryExpr(BinaryExpr {
838                left,
839                op: Eq,
840                right,
841            }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
842                Transformed::yes(match as_bool_lit(&right)? {
843                    Some(true) => *left,
844                    Some(false) => Expr::Not(left),
845                    None => lit_bool_null(),
846                })
847            }
848            // According to SQL's null semantics, NULL = NULL evaluates to NULL
849            // Both sides are the same expression (A = A) and A is non-volatile expression
850            // A = A --> A IS NOT NULL OR NULL
851            // A = A --> true (if A not nullable)
852            Expr::BinaryExpr(BinaryExpr {
853                left,
854                op: Eq,
855                right,
856            }) if (left == right) & !left.is_volatile() => {
857                Transformed::yes(match !info.nullable(&left)? {
858                    true => lit(true),
859                    false => Expr::BinaryExpr(BinaryExpr {
860                        left: Box::new(Expr::IsNotNull(left)),
861                        op: Or,
862                        right: Box::new(lit_bool_null()),
863                    }),
864                })
865            }
866
867            // Rules for NotEq
868            //
869
870            // true != A  --> !A
871            // false != A --> A
872            // null != A --> null
873            Expr::BinaryExpr(BinaryExpr {
874                left,
875                op: NotEq,
876                right,
877            }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
878                Transformed::yes(match as_bool_lit(&left)? {
879                    Some(true) => Expr::Not(right),
880                    Some(false) => *right,
881                    None => lit_bool_null(),
882                })
883            }
884            // A != true  --> !A
885            // A != false --> A
886            // A != null --> null,
887            Expr::BinaryExpr(BinaryExpr {
888                left,
889                op: NotEq,
890                right,
891            }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
892                Transformed::yes(match as_bool_lit(&right)? {
893                    Some(true) => Expr::Not(left),
894                    Some(false) => *left,
895                    None => lit_bool_null(),
896                })
897            }
898
899            //
900            // Rules for OR
901            //
902
903            // true OR A --> true (even if A is null)
904            Expr::BinaryExpr(BinaryExpr {
905                left,
906                op: Or,
907                right: _,
908            }) if is_true(&left) => Transformed::yes(*left),
909            // false OR A --> A
910            Expr::BinaryExpr(BinaryExpr {
911                left,
912                op: Or,
913                right,
914            }) if is_false(&left) => Transformed::yes(*right),
915            // A OR true --> true (even if A is null)
916            Expr::BinaryExpr(BinaryExpr {
917                left: _,
918                op: Or,
919                right,
920            }) if is_true(&right) => Transformed::yes(*right),
921            // A OR false --> A
922            Expr::BinaryExpr(BinaryExpr {
923                left,
924                op: Or,
925                right,
926            }) if is_false(&right) => Transformed::yes(*left),
927            // A OR !A ---> true (if A not nullable)
928            Expr::BinaryExpr(BinaryExpr {
929                left,
930                op: Or,
931                right,
932            }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
933                Transformed::yes(lit(true))
934            }
935            // !A OR A ---> true (if A not nullable)
936            Expr::BinaryExpr(BinaryExpr {
937                left,
938                op: Or,
939                right,
940            }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
941                Transformed::yes(lit(true))
942            }
943            // (..A..) OR A --> (..A..)
944            Expr::BinaryExpr(BinaryExpr {
945                left,
946                op: Or,
947                right,
948            }) if expr_contains(&left, &right, Or) => Transformed::yes(*left),
949            // A OR (..A..) --> (..A..)
950            Expr::BinaryExpr(BinaryExpr {
951                left,
952                op: Or,
953                right,
954            }) if expr_contains(&right, &left, Or) => Transformed::yes(*right),
955            // A OR (A AND B) --> A
956            Expr::BinaryExpr(BinaryExpr {
957                left,
958                op: Or,
959                right,
960            }) if is_op_with(And, &right, &left) => Transformed::yes(*left),
961            // (A AND B) OR A --> A
962            Expr::BinaryExpr(BinaryExpr {
963                left,
964                op: Or,
965                right,
966            }) if is_op_with(And, &left, &right) => Transformed::yes(*right),
967            // Eliminate common factors in conjunctions e.g
968            // (A AND B) OR (A AND C) -> A AND (B OR C)
969            Expr::BinaryExpr(BinaryExpr {
970                left,
971                op: Or,
972                right,
973            }) if has_common_conjunction(&left, &right) => {
974                let lhs: IndexSet<Expr> = iter_conjunction_owned(*left).collect();
975                let (common, rhs): (Vec<_>, Vec<_>) = iter_conjunction_owned(*right)
976                    .partition(|e| lhs.contains(e) && !e.is_volatile());
977
978                let new_rhs = rhs.into_iter().reduce(and);
979                let new_lhs = lhs.into_iter().filter(|e| !common.contains(e)).reduce(and);
980                let common_conjunction = common.into_iter().reduce(and).unwrap();
981
982                let new_expr = match (new_lhs, new_rhs) {
983                    (Some(lhs), Some(rhs)) => and(common_conjunction, or(lhs, rhs)),
984                    (_, _) => common_conjunction,
985                };
986                Transformed::yes(new_expr)
987            }
988
989            //
990            // Rules for AND
991            //
992
993            // true AND A --> A
994            Expr::BinaryExpr(BinaryExpr {
995                left,
996                op: And,
997                right,
998            }) if is_true(&left) => Transformed::yes(*right),
999            // false AND A --> false (even if A is null)
1000            Expr::BinaryExpr(BinaryExpr {
1001                left,
1002                op: And,
1003                right: _,
1004            }) if is_false(&left) => Transformed::yes(*left),
1005            // A AND true --> A
1006            Expr::BinaryExpr(BinaryExpr {
1007                left,
1008                op: And,
1009                right,
1010            }) if is_true(&right) => Transformed::yes(*left),
1011            // A AND false --> false (even if A is null)
1012            Expr::BinaryExpr(BinaryExpr {
1013                left: _,
1014                op: And,
1015                right,
1016            }) if is_false(&right) => Transformed::yes(*right),
1017            // A AND !A ---> false (if A not nullable)
1018            Expr::BinaryExpr(BinaryExpr {
1019                left,
1020                op: And,
1021                right,
1022            }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
1023                Transformed::yes(lit(false))
1024            }
1025            // !A AND A ---> false (if A not nullable)
1026            Expr::BinaryExpr(BinaryExpr {
1027                left,
1028                op: And,
1029                right,
1030            }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
1031                Transformed::yes(lit(false))
1032            }
1033            // (..A..) AND A --> (..A..)
1034            Expr::BinaryExpr(BinaryExpr {
1035                left,
1036                op: And,
1037                right,
1038            }) if expr_contains(&left, &right, And) => Transformed::yes(*left),
1039            // A AND (..A..) --> (..A..)
1040            Expr::BinaryExpr(BinaryExpr {
1041                left,
1042                op: And,
1043                right,
1044            }) if expr_contains(&right, &left, And) => Transformed::yes(*right),
1045            // A AND (A OR B) --> A
1046            Expr::BinaryExpr(BinaryExpr {
1047                left,
1048                op: And,
1049                right,
1050            }) if is_op_with(Or, &right, &left) => Transformed::yes(*left),
1051            // (A OR B) AND A --> A
1052            Expr::BinaryExpr(BinaryExpr {
1053                left,
1054                op: And,
1055                right,
1056            }) if is_op_with(Or, &left, &right) => Transformed::yes(*right),
1057            // A >= constant AND constant <= A --> A = constant
1058            Expr::BinaryExpr(BinaryExpr {
1059                left,
1060                op: And,
1061                right,
1062            }) if can_reduce_to_equal_statement(&left, &right) => {
1063                if let Expr::BinaryExpr(BinaryExpr {
1064                    left: left_left,
1065                    right: left_right,
1066                    ..
1067                }) = *left
1068                {
1069                    Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1070                        left: left_left,
1071                        op: Eq,
1072                        right: left_right,
1073                    }))
1074                } else {
1075                    return internal_err!(
1076                        "can_reduce_to_equal_statement should only be called with a BinaryExpr"
1077                    );
1078                }
1079            }
1080            // A = L1 AND A != L2 --> A = L1 (when L1 != L2)
1081            Expr::BinaryExpr(BinaryExpr {
1082                left,
1083                op: And,
1084                right,
1085            }) if is_eq_and_ne_with_different_literal(&left, &right) => {
1086                Transformed::yes(*left)
1087            }
1088            // A != L2 AND A = L1 --> A = L1 (when L1 != L2)
1089            Expr::BinaryExpr(BinaryExpr {
1090                left,
1091                op: And,
1092                right,
1093            }) if is_eq_and_ne_with_different_literal(&right, &left) => {
1094                Transformed::yes(*right)
1095            }
1096
1097            //
1098            // Rules for Multiply
1099            //
1100
1101            // A * 1 --> A (with type coercion if needed)
1102            Expr::BinaryExpr(BinaryExpr {
1103                left,
1104                op: Multiply,
1105                right,
1106            }) if is_one(&right) => {
1107                simplify_right_is_one_case(info, left, &Multiply, &right)?
1108            }
1109            // 1 * A --> A
1110            Expr::BinaryExpr(BinaryExpr {
1111                left,
1112                op: Multiply,
1113                right,
1114            }) if is_one(&left) => {
1115                // 1 * A is equivalent to A * 1
1116                simplify_right_is_one_case(info, right, &Multiply, &left)?
1117            }
1118
1119            // A * 0 --> 0 (if A is not null and not floating, since NAN * 0 -> NAN)
1120            Expr::BinaryExpr(BinaryExpr {
1121                left,
1122                op: Multiply,
1123                right,
1124            }) if !info.nullable(&left)?
1125                && !info.get_data_type(&left)?.is_floating()
1126                && is_zero(&right) =>
1127            {
1128                Transformed::yes(*right)
1129            }
1130            // 0 * A --> 0 (if A is not null and not floating, since 0 * NAN -> NAN)
1131            Expr::BinaryExpr(BinaryExpr {
1132                left,
1133                op: Multiply,
1134                right,
1135            }) if !info.nullable(&right)?
1136                && !info.get_data_type(&right)?.is_floating()
1137                && is_zero(&left) =>
1138            {
1139                Transformed::yes(*left)
1140            }
1141
1142            //
1143            // Rules for Divide
1144            //
1145
1146            // A / 1 --> A
1147            Expr::BinaryExpr(BinaryExpr {
1148                left,
1149                op: Divide,
1150                right,
1151            }) if is_one(&right) => {
1152                simplify_right_is_one_case(info, left, &Divide, &right)?
1153            }
1154
1155            //
1156            // Rules for Modulo
1157            //
1158
1159            // A % 1 --> 0 (if A is not nullable and not floating, since NAN % 1 --> NAN)
1160            Expr::BinaryExpr(BinaryExpr {
1161                left,
1162                op: Modulo,
1163                right,
1164            }) if !info.nullable(&left)?
1165                && !info.get_data_type(&left)?.is_floating()
1166                && is_one(&right) =>
1167            {
1168                Transformed::yes(Expr::Literal(
1169                    ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1170                    None,
1171                ))
1172            }
1173
1174            //
1175            // Rules for BitwiseAnd
1176            //
1177
1178            // A & 0 -> 0 (if A not nullable)
1179            Expr::BinaryExpr(BinaryExpr {
1180                left,
1181                op: BitwiseAnd,
1182                right,
1183            }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*right),
1184
1185            // 0 & A -> 0 (if A not nullable)
1186            Expr::BinaryExpr(BinaryExpr {
1187                left,
1188                op: BitwiseAnd,
1189                right,
1190            }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*left),
1191
1192            // !A & A -> 0 (if A not nullable)
1193            Expr::BinaryExpr(BinaryExpr {
1194                left,
1195                op: BitwiseAnd,
1196                right,
1197            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1198                Transformed::yes(Expr::Literal(
1199                    ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1200                    None,
1201                ))
1202            }
1203
1204            // A & !A -> 0 (if A not nullable)
1205            Expr::BinaryExpr(BinaryExpr {
1206                left,
1207                op: BitwiseAnd,
1208                right,
1209            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1210                Transformed::yes(Expr::Literal(
1211                    ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1212                    None,
1213                ))
1214            }
1215
1216            // (..A..) & A --> (..A..)
1217            Expr::BinaryExpr(BinaryExpr {
1218                left,
1219                op: BitwiseAnd,
1220                right,
1221            }) if expr_contains(&left, &right, BitwiseAnd) => Transformed::yes(*left),
1222
1223            // A & (..A..) --> (..A..)
1224            Expr::BinaryExpr(BinaryExpr {
1225                left,
1226                op: BitwiseAnd,
1227                right,
1228            }) if expr_contains(&right, &left, BitwiseAnd) => Transformed::yes(*right),
1229
1230            // A & (A | B) --> A (if B not null)
1231            Expr::BinaryExpr(BinaryExpr {
1232                left,
1233                op: BitwiseAnd,
1234                right,
1235            }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => {
1236                Transformed::yes(*left)
1237            }
1238
1239            // (A | B) & A --> A (if B not null)
1240            Expr::BinaryExpr(BinaryExpr {
1241                left,
1242                op: BitwiseAnd,
1243                right,
1244            }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => {
1245                Transformed::yes(*right)
1246            }
1247
1248            //
1249            // Rules for BitwiseOr
1250            //
1251
1252            // A | 0 -> A (even if A is null)
1253            Expr::BinaryExpr(BinaryExpr {
1254                left,
1255                op: BitwiseOr,
1256                right,
1257            }) if is_zero(&right) => Transformed::yes(*left),
1258
1259            // 0 | A -> A (even if A is null)
1260            Expr::BinaryExpr(BinaryExpr {
1261                left,
1262                op: BitwiseOr,
1263                right,
1264            }) if is_zero(&left) => Transformed::yes(*right),
1265
1266            // !A | A -> -1 (if A not nullable)
1267            Expr::BinaryExpr(BinaryExpr {
1268                left,
1269                op: BitwiseOr,
1270                right,
1271            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1272                Transformed::yes(Expr::Literal(
1273                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1274                    None,
1275                ))
1276            }
1277
1278            // A | !A -> -1 (if A not nullable)
1279            Expr::BinaryExpr(BinaryExpr {
1280                left,
1281                op: BitwiseOr,
1282                right,
1283            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1284                Transformed::yes(Expr::Literal(
1285                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1286                    None,
1287                ))
1288            }
1289
1290            // (..A..) | A --> (..A..)
1291            Expr::BinaryExpr(BinaryExpr {
1292                left,
1293                op: BitwiseOr,
1294                right,
1295            }) if expr_contains(&left, &right, BitwiseOr) => Transformed::yes(*left),
1296
1297            // A | (..A..) --> (..A..)
1298            Expr::BinaryExpr(BinaryExpr {
1299                left,
1300                op: BitwiseOr,
1301                right,
1302            }) if expr_contains(&right, &left, BitwiseOr) => Transformed::yes(*right),
1303
1304            // A | (A & B) --> A (if B not null)
1305            Expr::BinaryExpr(BinaryExpr {
1306                left,
1307                op: BitwiseOr,
1308                right,
1309            }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => {
1310                Transformed::yes(*left)
1311            }
1312
1313            // (A & B) | A --> A (if B not null)
1314            Expr::BinaryExpr(BinaryExpr {
1315                left,
1316                op: BitwiseOr,
1317                right,
1318            }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => {
1319                Transformed::yes(*right)
1320            }
1321
1322            //
1323            // Rules for BitwiseXor
1324            //
1325
1326            // A ^ 0 -> A (if A not nullable)
1327            Expr::BinaryExpr(BinaryExpr {
1328                left,
1329                op: BitwiseXor,
1330                right,
1331            }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*left),
1332
1333            // 0 ^ A -> A (if A not nullable)
1334            Expr::BinaryExpr(BinaryExpr {
1335                left,
1336                op: BitwiseXor,
1337                right,
1338            }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*right),
1339
1340            // !A ^ A -> -1 (if A not nullable)
1341            Expr::BinaryExpr(BinaryExpr {
1342                left,
1343                op: BitwiseXor,
1344                right,
1345            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1346                Transformed::yes(Expr::Literal(
1347                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1348                    None,
1349                ))
1350            }
1351
1352            // A ^ !A -> -1 (if A not nullable)
1353            Expr::BinaryExpr(BinaryExpr {
1354                left,
1355                op: BitwiseXor,
1356                right,
1357            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1358                Transformed::yes(Expr::Literal(
1359                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1360                    None,
1361                ))
1362            }
1363
1364            // (..A..) ^ A --> (the expression without A, if number of A is odd, otherwise one A)
1365            Expr::BinaryExpr(BinaryExpr {
1366                left,
1367                op: BitwiseXor,
1368                right,
1369            }) if expr_contains(&left, &right, BitwiseXor) => {
1370                let expr = delete_xor_in_complex_expr(&left, &right, false);
1371                Transformed::yes(if expr == *right {
1372                    Expr::Literal(
1373                        ScalarValue::new_zero(&info.get_data_type(&right)?)?,
1374                        None,
1375                    )
1376                } else {
1377                    expr
1378                })
1379            }
1380
1381            // A ^ (..A..) --> (the expression without A, if number of A is odd, otherwise one A)
1382            Expr::BinaryExpr(BinaryExpr {
1383                left,
1384                op: BitwiseXor,
1385                right,
1386            }) if expr_contains(&right, &left, BitwiseXor) => {
1387                let expr = delete_xor_in_complex_expr(&right, &left, true);
1388                Transformed::yes(if expr == *left {
1389                    Expr::Literal(
1390                        ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1391                        None,
1392                    )
1393                } else {
1394                    expr
1395                })
1396            }
1397
1398            //
1399            // Rules for BitwiseShiftRight
1400            //
1401
1402            // A >> 0 -> A (even if A is null)
1403            Expr::BinaryExpr(BinaryExpr {
1404                left,
1405                op: BitwiseShiftRight,
1406                right,
1407            }) if is_zero(&right) => Transformed::yes(*left),
1408
1409            //
1410            // Rules for BitwiseShiftRight
1411            //
1412
1413            // A << 0 -> A (even if A is null)
1414            Expr::BinaryExpr(BinaryExpr {
1415                left,
1416                op: BitwiseShiftLeft,
1417                right,
1418            }) if is_zero(&right) => Transformed::yes(*left),
1419
1420            //
1421            // Rules for Not
1422            //
1423            Expr::Not(inner) => Transformed::yes(negate_clause(*inner)),
1424
1425            //
1426            // Rules for Negative
1427            //
1428            Expr::Negative(inner) => Transformed::yes(distribute_negation(*inner)),
1429
1430            //
1431            // Rules for Case
1432            //
1433
1434            // Inline a comparison to a literal with the case statement into the `THEN` clauses.
1435            // which can enable further simplifications
1436            // CASE WHEN X THEN "a" WHEN Y THEN "b" ... END = "a" --> CASE WHEN X THEN "a" = "a" WHEN Y THEN "b" = "a" END
1437            Expr::BinaryExpr(BinaryExpr {
1438                left,
1439                op: op @ (Eq | NotEq),
1440                right,
1441            }) if is_case_with_literal_outputs(&left) && is_lit(&right) => {
1442                let case = into_case(*left)?;
1443                Transformed::yes(Expr::Case(Case {
1444                    expr: None,
1445                    when_then_expr: case
1446                        .when_then_expr
1447                        .into_iter()
1448                        .map(|(when, then)| {
1449                            (
1450                                when,
1451                                Box::new(Expr::BinaryExpr(BinaryExpr {
1452                                    left: then,
1453                                    op,
1454                                    right: right.clone(),
1455                                })),
1456                            )
1457                        })
1458                        .collect(),
1459                    else_expr: case.else_expr.map(|els| {
1460                        Box::new(Expr::BinaryExpr(BinaryExpr {
1461                            left: els,
1462                            op,
1463                            right,
1464                        }))
1465                    }),
1466                }))
1467            }
1468
1469            // CASE WHEN true THEN A ... END --> A
1470            // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END
1471            // CASE WHEN false THEN A END --> NULL
1472            // CASE WHEN false THEN A ELSE B END --> B
1473            // CASE WHEN X THEN A WHEN false THEN B END --> CASE WHEN X THEN A ELSE B END
1474            Expr::Case(Case {
1475                expr: None,
1476                when_then_expr,
1477                mut else_expr,
1478            }) if when_then_expr
1479                .iter()
1480                .any(|(when, _)| is_true(when.as_ref()) || is_false(when.as_ref())) =>
1481            {
1482                let out_type = info.get_data_type(&when_then_expr[0].1)?;
1483                let mut new_when_then_expr = Vec::with_capacity(when_then_expr.len());
1484
1485                for (when, then) in when_then_expr.into_iter() {
1486                    if is_true(when.as_ref()) {
1487                        // Skip adding the rest of the when-then expressions after WHEN true
1488                        // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END
1489                        else_expr = Some(then);
1490                        break;
1491                    } else if !is_false(when.as_ref()) {
1492                        new_when_then_expr.push((when, then));
1493                    }
1494                    // else: skip WHEN false cases
1495                }
1496
1497                // Exclude CASE statement altogether if there are no when-then expressions left
1498                if new_when_then_expr.is_empty() {
1499                    // CASE WHEN false THEN A ELSE B END --> B
1500                    if let Some(else_expr) = else_expr {
1501                        return Ok(Transformed::yes(*else_expr));
1502                    // CASE WHEN false THEN A END --> NULL
1503                    } else {
1504                        let null =
1505                            Expr::Literal(ScalarValue::try_new_null(&out_type)?, None);
1506                        return Ok(Transformed::yes(null));
1507                    }
1508                }
1509
1510                Transformed::yes(Expr::Case(Case {
1511                    expr: None,
1512                    when_then_expr: new_when_then_expr,
1513                    else_expr,
1514                }))
1515            }
1516
1517            // CASE
1518            //   WHEN X THEN A
1519            //   WHEN Y THEN B
1520            //   ...
1521            //   ELSE Q
1522            // END
1523            //
1524            // ---> (X AND A) OR (Y AND B AND NOT X) OR ... (NOT (X OR Y) AND Q)
1525            //
1526            // Note: the rationale for this rewrite is that the expr can then be further
1527            // simplified using the existing rules for AND/OR
1528            Expr::Case(Case {
1529                expr: None,
1530                when_then_expr,
1531                else_expr,
1532            }) if !when_then_expr.is_empty()
1533                // The rewrite is O(n²) in general so limit to small number of when-thens that can be true
1534                && (when_then_expr.len() < 3 // small number of input whens
1535                    // or all thens are literal bools and a small number of them are true
1536                    || (when_then_expr.iter().all(|(_, then)| is_bool_lit(then))
1537                        && when_then_expr.iter().filter(|(_, then)| is_true(then)).count() < 3))
1538                && info.is_boolean_type(&when_then_expr[0].1)? =>
1539            {
1540                // String disjunction of all the when predicates encountered so far. Not nullable.
1541                let mut filter_expr = lit(false);
1542                // The disjunction of all the cases
1543                let mut out_expr = lit(false);
1544
1545                for (when, then) in when_then_expr {
1546                    let when = is_exactly_true(*when, info)?;
1547                    let case_expr =
1548                        when.clone().and(filter_expr.clone().not()).and(*then);
1549
1550                    out_expr = out_expr.or(case_expr);
1551                    filter_expr = filter_expr.or(when);
1552                }
1553
1554                let else_expr = else_expr.map(|b| *b).unwrap_or_else(lit_bool_null);
1555                let case_expr = filter_expr.not().and(else_expr);
1556                out_expr = out_expr.or(case_expr);
1557
1558                // Do a first pass at simplification
1559                out_expr.rewrite(self)?
1560            }
1561            // CASE
1562            //   WHEN X THEN true
1563            //   WHEN Y THEN true
1564            //   WHEN Z THEN false
1565            //   ...
1566            //   ELSE true
1567            // END
1568            //
1569            // --->
1570            //
1571            // NOT(CASE
1572            //   WHEN X THEN false
1573            //   WHEN Y THEN false
1574            //   WHEN Z THEN true
1575            //   ...
1576            //   ELSE false
1577            // END)
1578            //
1579            // Note: the rationale for this rewrite is that the case can then be further
1580            // simplified into a small number of ANDs and ORs
1581            Expr::Case(Case {
1582                expr: None,
1583                when_then_expr,
1584                else_expr,
1585            }) if !when_then_expr.is_empty()
1586                && when_then_expr
1587                    .iter()
1588                    .all(|(_, then)| is_bool_lit(then)) // all thens are literal bools
1589                // This simplification is only helpful if we end up with a small number of true thens
1590                && when_then_expr
1591                    .iter()
1592                    .filter(|(_, then)| is_false(then))
1593                    .count()
1594                    < 3
1595                && else_expr.as_deref().is_none_or(is_bool_lit) =>
1596            {
1597                Transformed::yes(
1598                    Expr::Case(Case {
1599                        expr: None,
1600                        when_then_expr: when_then_expr
1601                            .into_iter()
1602                            .map(|(when, then)| (when, Box::new(Expr::Not(then))))
1603                            .collect(),
1604                        else_expr: else_expr
1605                            .map(|else_expr| Box::new(Expr::Not(else_expr))),
1606                    })
1607                    .not(),
1608                )
1609            }
1610            Expr::ScalarFunction(ScalarFunction { func: udf, args }) => {
1611                match udf.simplify(args, info)? {
1612                    ExprSimplifyResult::Original(args) => {
1613                        Transformed::no(Expr::ScalarFunction(ScalarFunction {
1614                            func: udf,
1615                            args,
1616                        }))
1617                    }
1618                    ExprSimplifyResult::Simplified(expr) => Transformed::yes(expr),
1619                }
1620            }
1621
1622            Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction {
1623                ref func,
1624                ..
1625            }) => match (func.simplify(), expr) {
1626                (Some(simplify_function), Expr::AggregateFunction(af)) => {
1627                    Transformed::yes(simplify_function(af, info)?)
1628                }
1629                (_, expr) => Transformed::no(expr),
1630            },
1631
1632            Expr::WindowFunction(ref window_fun) => match (window_fun.simplify(), expr) {
1633                (Some(simplify_function), Expr::WindowFunction(wf)) => {
1634                    Transformed::yes(simplify_function(*wf, info)?)
1635                }
1636                (_, expr) => Transformed::no(expr),
1637            },
1638
1639            //
1640            // Rules for Between
1641            //
1642
1643            // a between 3 and 5  -->  a >= 3 AND a <=5
1644            // a not between 3 and 5  -->  a < 3 OR a > 5
1645            Expr::Between(between) => Transformed::yes(if between.negated {
1646                let l = *between.expr.clone();
1647                let r = *between.expr;
1648                or(l.lt(*between.low), r.gt(*between.high))
1649            } else {
1650                and(
1651                    between.expr.clone().gt_eq(*between.low),
1652                    between.expr.lt_eq(*between.high),
1653                )
1654            }),
1655
1656            //
1657            // Rules for regexes
1658            //
1659            Expr::BinaryExpr(BinaryExpr {
1660                left,
1661                op: op @ (RegexMatch | RegexNotMatch | RegexIMatch | RegexNotIMatch),
1662                right,
1663            }) => Transformed::yes(simplify_regex_expr(left, op, right)?),
1664
1665            // Rules for Like
1666            Expr::Like(like) => {
1667                // `\` is implicit escape, see https://github.com/apache/datafusion/issues/13291
1668                let escape_char = like.escape_char.unwrap_or('\\');
1669                match as_string_scalar(&like.pattern) {
1670                    Some((data_type, pattern_str)) => {
1671                        match pattern_str {
1672                            None => return Ok(Transformed::yes(lit_bool_null())),
1673                            Some(pattern_str) if pattern_str == "%" => {
1674                                // exp LIKE '%' is
1675                                //   - when exp is not NULL, it's true
1676                                //   - when exp is NULL, it's NULL
1677                                // exp NOT LIKE '%' is
1678                                //   - when exp is not NULL, it's false
1679                                //   - when exp is NULL, it's NULL
1680                                let result_for_non_null = lit(!like.negated);
1681                                Transformed::yes(if !info.nullable(&like.expr)? {
1682                                    result_for_non_null
1683                                } else {
1684                                    Expr::Case(Case {
1685                                        expr: Some(Box::new(Expr::IsNotNull(like.expr))),
1686                                        when_then_expr: vec![(
1687                                            Box::new(lit(true)),
1688                                            Box::new(result_for_non_null),
1689                                        )],
1690                                        else_expr: None,
1691                                    })
1692                                })
1693                            }
1694                            Some(pattern_str)
1695                                if pattern_str.contains("%%")
1696                                    && !pattern_str.contains(escape_char) =>
1697                            {
1698                                // Repeated occurrences of wildcard are redundant so remove them
1699                                // exp LIKE '%%'  --> exp LIKE '%'
1700                                let simplified_pattern = Regex::new("%%+")
1701                                    .unwrap()
1702                                    .replace_all(pattern_str, "%")
1703                                    .to_string();
1704                                Transformed::yes(Expr::Like(Like {
1705                                    pattern: Box::new(to_string_scalar(
1706                                        &data_type,
1707                                        Some(simplified_pattern),
1708                                    )),
1709                                    ..like
1710                                }))
1711                            }
1712                            Some(pattern_str)
1713                                if !like.case_insensitive
1714                                    && !pattern_str
1715                                        .contains(['%', '_', escape_char].as_ref()) =>
1716                            {
1717                                // If the pattern does not contain any wildcards, we can simplify the like expression to an equality expression
1718                                // TODO: handle escape characters
1719                                Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1720                                    left: like.expr.clone(),
1721                                    op: if like.negated { NotEq } else { Eq },
1722                                    right: like.pattern.clone(),
1723                                }))
1724                            }
1725
1726                            Some(_pattern_str) => Transformed::no(Expr::Like(like)),
1727                        }
1728                    }
1729                    None => Transformed::no(Expr::Like(like)),
1730                }
1731            }
1732
1733            // a is not null/unknown --> true (if a is not nullable)
1734            Expr::IsNotNull(expr) | Expr::IsNotUnknown(expr)
1735                if !info.nullable(&expr)? =>
1736            {
1737                Transformed::yes(lit(true))
1738            }
1739
1740            // a is null/unknown --> false (if a is not nullable)
1741            Expr::IsNull(expr) | Expr::IsUnknown(expr) if !info.nullable(&expr)? => {
1742                Transformed::yes(lit(false))
1743            }
1744
1745            // expr IN () --> false
1746            // expr NOT IN () --> true
1747            Expr::InList(InList {
1748                expr: _,
1749                list,
1750                negated,
1751            }) if list.is_empty() => Transformed::yes(lit(negated)),
1752
1753            // null in (x, y, z) --> null
1754            // null not in (x, y, z) --> null
1755            Expr::InList(InList {
1756                expr,
1757                list,
1758                negated: _,
1759            }) if is_null(expr.as_ref()) && !list.is_empty() => {
1760                Transformed::yes(lit_bool_null())
1761            }
1762
1763            // expr IN ((subquery)) -> expr IN (subquery), see ##5529
1764            Expr::InList(InList {
1765                expr,
1766                mut list,
1767                negated,
1768            }) if list.len() == 1
1769                && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
1770            {
1771                let Expr::ScalarSubquery(subquery) = list.remove(0) else {
1772                    unreachable!()
1773                };
1774
1775                Transformed::yes(Expr::InSubquery(InSubquery::new(
1776                    expr, subquery, negated,
1777                )))
1778            }
1779
1780            // Combine multiple OR expressions into a single IN list expression if possible
1781            //
1782            // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)`
1783            Expr::BinaryExpr(BinaryExpr {
1784                left,
1785                op: Or,
1786                right,
1787            }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
1788                let lhs = to_inlist(*left).unwrap();
1789                let rhs = to_inlist(*right).unwrap();
1790                let mut seen: HashSet<Expr> = HashSet::new();
1791                let list = lhs
1792                    .list
1793                    .into_iter()
1794                    .chain(rhs.list)
1795                    .filter(|e| seen.insert(e.to_owned()))
1796                    .collect::<Vec<_>>();
1797
1798                let merged_inlist = InList {
1799                    expr: lhs.expr,
1800                    list,
1801                    negated: false,
1802                };
1803
1804                Transformed::yes(Expr::InList(merged_inlist))
1805            }
1806
1807            // Simplify expressions that is guaranteed to be true or false to a literal boolean expression
1808            //
1809            // Rules:
1810            // If both expressions are `IN` or `NOT IN`, then we can apply intersection or union on both lists
1811            //   Intersection:
1812            //     1. `a in (1,2,3) AND a in (4,5) -> a in (), which is false`
1813            //     2. `a in (1,2,3) AND a in (2,3,4) -> a in (2,3)`
1814            //     3. `a not in (1,2,3) OR a not in (3,4,5,6) -> a not in (3)`
1815            //   Union:
1816            //     4. `a not int (1,2,3) AND a not in (4,5,6) -> a not in (1,2,3,4,5,6)`
1817            //     # This rule is handled by `or_in_list_simplifier.rs`
1818            //     5. `a in (1,2,3) OR a in (4,5,6) -> a in (1,2,3,4,5,6)`
1819            // If one of the expressions is `IN` and another one is `NOT IN`, then we apply exception on `In` expression
1820            //     6. `a in (1,2,3,4) AND a not in (1,2,3,4,5) -> a in (), which is false`
1821            //     7. `a not in (1,2,3,4) AND a in (1,2,3,4,5) -> a = 5`
1822            //     8. `a in (1,2,3,4) AND a not in (5,6,7,8) -> a in (1,2,3,4)`
1823            Expr::BinaryExpr(BinaryExpr {
1824                left,
1825                op: And,
1826                right,
1827            }) if are_inlist_and_eq_and_match_neg(
1828                left.as_ref(),
1829                right.as_ref(),
1830                false,
1831                false,
1832            ) =>
1833            {
1834                match (*left, *right) {
1835                    (Expr::InList(l1), Expr::InList(l2)) => {
1836                        return inlist_intersection(l1, &l2, false).map(Transformed::yes);
1837                    }
1838                    // Matched previously once
1839                    _ => unreachable!(),
1840                }
1841            }
1842
1843            Expr::BinaryExpr(BinaryExpr {
1844                left,
1845                op: And,
1846                right,
1847            }) if are_inlist_and_eq_and_match_neg(
1848                left.as_ref(),
1849                right.as_ref(),
1850                true,
1851                true,
1852            ) =>
1853            {
1854                match (*left, *right) {
1855                    (Expr::InList(l1), Expr::InList(l2)) => {
1856                        return inlist_union(l1, l2, true).map(Transformed::yes);
1857                    }
1858                    // Matched previously once
1859                    _ => unreachable!(),
1860                }
1861            }
1862
1863            Expr::BinaryExpr(BinaryExpr {
1864                left,
1865                op: And,
1866                right,
1867            }) if are_inlist_and_eq_and_match_neg(
1868                left.as_ref(),
1869                right.as_ref(),
1870                false,
1871                true,
1872            ) =>
1873            {
1874                match (*left, *right) {
1875                    (Expr::InList(l1), Expr::InList(l2)) => {
1876                        return inlist_except(l1, &l2).map(Transformed::yes);
1877                    }
1878                    // Matched previously once
1879                    _ => unreachable!(),
1880                }
1881            }
1882
1883            Expr::BinaryExpr(BinaryExpr {
1884                left,
1885                op: And,
1886                right,
1887            }) if are_inlist_and_eq_and_match_neg(
1888                left.as_ref(),
1889                right.as_ref(),
1890                true,
1891                false,
1892            ) =>
1893            {
1894                match (*left, *right) {
1895                    (Expr::InList(l1), Expr::InList(l2)) => {
1896                        return inlist_except(l2, &l1).map(Transformed::yes);
1897                    }
1898                    // Matched previously once
1899                    _ => unreachable!(),
1900                }
1901            }
1902
1903            Expr::BinaryExpr(BinaryExpr {
1904                left,
1905                op: Or,
1906                right,
1907            }) if are_inlist_and_eq_and_match_neg(
1908                left.as_ref(),
1909                right.as_ref(),
1910                true,
1911                true,
1912            ) =>
1913            {
1914                match (*left, *right) {
1915                    (Expr::InList(l1), Expr::InList(l2)) => {
1916                        return inlist_intersection(l1, &l2, true).map(Transformed::yes);
1917                    }
1918                    // Matched previously once
1919                    _ => unreachable!(),
1920                }
1921            }
1922
1923            // =======================================
1924            // unwrap_cast_in_comparison
1925            // =======================================
1926            //
1927            // For case:
1928            // try_cast/cast(expr as data_type) op literal
1929            Expr::BinaryExpr(BinaryExpr { left, op, right })
1930                if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1931                    info, &left, op, &right,
1932                ) && op.supports_propagation() =>
1933            {
1934                unwrap_cast_in_comparison_for_binary(info, *left, *right, op)?
1935            }
1936            // literal op try_cast/cast(expr as data_type)
1937            // -->
1938            // try_cast/cast(expr as data_type) op_swap literal
1939            Expr::BinaryExpr(BinaryExpr { left, op, right })
1940                if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1941                    info, &right, op, &left,
1942                ) && op.supports_propagation()
1943                    && op.swap().is_some() =>
1944            {
1945                unwrap_cast_in_comparison_for_binary(
1946                    info,
1947                    *right,
1948                    *left,
1949                    op.swap().unwrap(),
1950                )?
1951            }
1952            // For case:
1953            // try_cast/cast(expr as left_type) in (expr1,expr2,expr3)
1954            Expr::InList(InList {
1955                expr: mut left,
1956                list,
1957                negated,
1958            }) if is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist(
1959                info, &left, &list,
1960            ) =>
1961            {
1962                let (Expr::TryCast(TryCast {
1963                    expr: left_expr, ..
1964                })
1965                | Expr::Cast(Cast {
1966                    expr: left_expr, ..
1967                })) = left.as_mut()
1968                else {
1969                    return internal_err!("Expect cast expr, but got {:?}", left)?;
1970                };
1971
1972                let expr_type = info.get_data_type(left_expr)?;
1973                let right_exprs = list
1974                    .into_iter()
1975                    .map(|right| {
1976                        match right {
1977                            Expr::Literal(right_lit_value, _) => {
1978                                // if the right_lit_value can be casted to the type of internal_left_expr
1979                                // we need to unwrap the cast for cast/try_cast expr, and add cast to the literal
1980                                let Some(value) = try_cast_literal_to_type(&right_lit_value, &expr_type) else {
1981                                    internal_err!(
1982                                        "Can't cast the list expr {:?} to type {}",
1983                                        right_lit_value, &expr_type
1984                                    )?
1985                                };
1986                                Ok(lit(value))
1987                            }
1988                            other_expr => internal_err!(
1989                                "Only support literal expr to optimize, but the expr is {:?}",
1990                                &other_expr
1991                            ),
1992                        }
1993                    })
1994                    .collect::<Result<Vec<_>>>()?;
1995
1996                Transformed::yes(Expr::InList(InList {
1997                    expr: std::mem::take(left_expr),
1998                    list: right_exprs,
1999                    negated,
2000                }))
2001            }
2002
2003            // =======================================
2004            // preimage_in_comparison
2005            // =======================================
2006            //
2007            // For case:
2008            // date_part('YEAR', expr) op literal
2009            //
2010            // For details see datafusion_expr::ScalarUDFImpl::preimage
2011            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2012                use datafusion_expr::Operator::*;
2013                let is_preimage_op = matches!(
2014                    op,
2015                    Eq | NotEq
2016                        | Lt
2017                        | LtEq
2018                        | Gt
2019                        | GtEq
2020                        | IsDistinctFrom
2021                        | IsNotDistinctFrom
2022                );
2023                if !is_preimage_op || is_null(&right) {
2024                    return Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
2025                        left,
2026                        op,
2027                        right,
2028                    })));
2029                }
2030
2031                if let PreimageResult::Range { interval, expr } =
2032                    get_preimage(left.as_ref(), right.as_ref(), info)?
2033                {
2034                    rewrite_with_preimage(*interval, op, expr)?
2035                } else if let Some(swapped) = op.swap() {
2036                    if let PreimageResult::Range { interval, expr } =
2037                        get_preimage(right.as_ref(), left.as_ref(), info)?
2038                    {
2039                        rewrite_with_preimage(*interval, swapped, expr)?
2040                    } else {
2041                        Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, right }))
2042                    }
2043                } else {
2044                    Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, right }))
2045                }
2046            }
2047            // For case:
2048            // date_part('YEAR', expr) IN (literal1, literal2, ...)
2049            Expr::InList(InList {
2050                expr,
2051                list,
2052                negated,
2053            }) => {
2054                if list.len() > THRESHOLD_INLINE_INLIST || list.iter().any(is_null) {
2055                    return Ok(Transformed::no(Expr::InList(InList {
2056                        expr,
2057                        list,
2058                        negated,
2059                    })));
2060                }
2061
2062                let (op, combiner): (Operator, fn(Expr, Expr) -> Expr) =
2063                    if negated { (NotEq, and) } else { (Eq, or) };
2064
2065                let mut rewritten: Option<Expr> = None;
2066                for item in &list {
2067                    let PreimageResult::Range { interval, expr } =
2068                        get_preimage(expr.as_ref(), item, info)?
2069                    else {
2070                        return Ok(Transformed::no(Expr::InList(InList {
2071                            expr,
2072                            list,
2073                            negated,
2074                        })));
2075                    };
2076
2077                    let range_expr = rewrite_with_preimage(*interval, op, expr)?.data;
2078                    rewritten = Some(match rewritten {
2079                        None => range_expr,
2080                        Some(acc) => combiner(acc, range_expr),
2081                    });
2082                }
2083
2084                if let Some(rewritten) = rewritten {
2085                    Transformed::yes(rewritten)
2086                } else {
2087                    Transformed::no(Expr::InList(InList {
2088                        expr,
2089                        list,
2090                        negated,
2091                    }))
2092                }
2093            }
2094
2095            // no additional rewrites possible
2096            expr => Transformed::no(expr),
2097        })
2098    }
2099}
2100
2101fn get_preimage(
2102    left_expr: &Expr,
2103    right_expr: &Expr,
2104    info: &SimplifyContext,
2105) -> Result<PreimageResult> {
2106    let Expr::ScalarFunction(ScalarFunction { func, args }) = left_expr else {
2107        return Ok(PreimageResult::None);
2108    };
2109    if !is_literal_or_literal_cast(right_expr) {
2110        return Ok(PreimageResult::None);
2111    }
2112    if func.signature().volatility != Volatility::Immutable {
2113        return Ok(PreimageResult::None);
2114    }
2115    func.preimage(args, right_expr, info)
2116}
2117
2118fn is_literal_or_literal_cast(expr: &Expr) -> bool {
2119    match expr {
2120        Expr::Literal(_, _) => true,
2121        Expr::Cast(Cast { expr, .. }) => matches!(expr.as_ref(), Expr::Literal(_, _)),
2122        Expr::TryCast(TryCast { expr, .. }) => {
2123            matches!(expr.as_ref(), Expr::Literal(_, _))
2124        }
2125        _ => false,
2126    }
2127}
2128
2129fn as_string_scalar(expr: &Expr) -> Option<(DataType, &Option<String>)> {
2130    match expr {
2131        Expr::Literal(ScalarValue::Utf8(s), _) => Some((DataType::Utf8, s)),
2132        Expr::Literal(ScalarValue::LargeUtf8(s), _) => Some((DataType::LargeUtf8, s)),
2133        Expr::Literal(ScalarValue::Utf8View(s), _) => Some((DataType::Utf8View, s)),
2134        _ => None,
2135    }
2136}
2137
2138fn to_string_scalar(data_type: &DataType, value: Option<String>) -> Expr {
2139    match data_type {
2140        DataType::Utf8 => Expr::Literal(ScalarValue::Utf8(value), None),
2141        DataType::LargeUtf8 => Expr::Literal(ScalarValue::LargeUtf8(value), None),
2142        DataType::Utf8View => Expr::Literal(ScalarValue::Utf8View(value), None),
2143        _ => unreachable!(),
2144    }
2145}
2146
2147fn has_common_conjunction(lhs: &Expr, rhs: &Expr) -> bool {
2148    let lhs_set: HashSet<&Expr> = iter_conjunction(lhs).collect();
2149    iter_conjunction(rhs).any(|e| lhs_set.contains(&e) && !e.is_volatile())
2150}
2151
2152// TODO: We might not need this after defer pattern for Box is stabilized. https://github.com/rust-lang/rust/issues/87121
2153fn are_inlist_and_eq_and_match_neg(
2154    left: &Expr,
2155    right: &Expr,
2156    is_left_neg: bool,
2157    is_right_neg: bool,
2158) -> bool {
2159    match (left, right) {
2160        (Expr::InList(l), Expr::InList(r)) => {
2161            l.expr == r.expr && l.negated == is_left_neg && r.negated == is_right_neg
2162        }
2163        _ => false,
2164    }
2165}
2166
2167// TODO: We might not need this after defer pattern for Box is stabilized. https://github.com/rust-lang/rust/issues/87121
2168fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
2169    let left = as_inlist(left);
2170    let right = as_inlist(right);
2171    if let (Some(lhs), Some(rhs)) = (left, right) {
2172        matches!(lhs.expr.as_ref(), Expr::Column(_))
2173            && matches!(rhs.expr.as_ref(), Expr::Column(_))
2174            && lhs.expr == rhs.expr
2175            && !lhs.negated
2176            && !rhs.negated
2177    } else {
2178        false
2179    }
2180}
2181
2182/// Try to convert an expression to an in-list expression
2183fn as_inlist(expr: &'_ Expr) -> Option<Cow<'_, InList>> {
2184    match expr {
2185        Expr::InList(inlist) => Some(Cow::Borrowed(inlist)),
2186        Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::Eq => {
2187            match (left.as_ref(), right.as_ref()) {
2188                (Expr::Column(_), Expr::Literal(_, _)) => Some(Cow::Owned(InList {
2189                    expr: left.clone(),
2190                    list: vec![*right.clone()],
2191                    negated: false,
2192                })),
2193                (Expr::Literal(_, _), Expr::Column(_)) => Some(Cow::Owned(InList {
2194                    expr: right.clone(),
2195                    list: vec![*left.clone()],
2196                    negated: false,
2197                })),
2198                _ => None,
2199            }
2200        }
2201        _ => None,
2202    }
2203}
2204
2205fn to_inlist(expr: Expr) -> Option<InList> {
2206    match expr {
2207        Expr::InList(inlist) => Some(inlist),
2208        Expr::BinaryExpr(BinaryExpr {
2209            left,
2210            op: Operator::Eq,
2211            right,
2212        }) => match (left.as_ref(), right.as_ref()) {
2213            (Expr::Column(_), Expr::Literal(_, _)) => Some(InList {
2214                expr: left,
2215                list: vec![*right],
2216                negated: false,
2217            }),
2218            (Expr::Literal(_, _), Expr::Column(_)) => Some(InList {
2219                expr: right,
2220                list: vec![*left],
2221                negated: false,
2222            }),
2223            _ => None,
2224        },
2225        _ => None,
2226    }
2227}
2228
2229/// Return the union of two inlist expressions
2230/// maintaining the order of the elements in the two lists
2231fn inlist_union(mut l1: InList, l2: InList, negated: bool) -> Result<Expr> {
2232    // extend the list in l1 with the elements in l2 that are not already in l1
2233    let l1_items: HashSet<_> = l1.list.iter().collect();
2234
2235    // keep all l2 items that do not also appear in l1
2236    let keep_l2: Vec<_> = l2
2237        .list
2238        .into_iter()
2239        .filter_map(|e| if l1_items.contains(&e) { None } else { Some(e) })
2240        .collect();
2241
2242    l1.list.extend(keep_l2);
2243    l1.negated = negated;
2244    Ok(Expr::InList(l1))
2245}
2246
2247/// Return the intersection of two inlist expressions
2248/// maintaining the order of the elements in the two lists
2249fn inlist_intersection(mut l1: InList, l2: &InList, negated: bool) -> Result<Expr> {
2250    let l2_items = l2.list.iter().collect::<HashSet<_>>();
2251
2252    // remove all items from l1 that are not in l2
2253    l1.list.retain(|e| l2_items.contains(e));
2254
2255    // e in () is always false
2256    // e not in () is always true
2257    if l1.list.is_empty() {
2258        return Ok(lit(negated));
2259    }
2260    Ok(Expr::InList(l1))
2261}
2262
2263/// Return the all items in l1 that are not in l2
2264/// maintaining the order of the elements in the two lists
2265fn inlist_except(mut l1: InList, l2: &InList) -> Result<Expr> {
2266    let l2_items = l2.list.iter().collect::<HashSet<_>>();
2267
2268    // keep only items from l1 that are not in l2
2269    l1.list.retain(|e| !l2_items.contains(e));
2270
2271    if l1.list.is_empty() {
2272        return Ok(lit(false));
2273    }
2274    Ok(Expr::InList(l1))
2275}
2276
2277/// Returns expression testing a boolean `expr` for being exactly `true` (not `false` or NULL).
2278fn is_exactly_true(expr: Expr, info: &SimplifyContext) -> Result<Expr> {
2279    if !info.nullable(&expr)? {
2280        Ok(expr)
2281    } else {
2282        Ok(Expr::BinaryExpr(BinaryExpr {
2283            left: Box::new(expr),
2284            op: Operator::IsNotDistinctFrom,
2285            right: Box::new(lit(true)),
2286        }))
2287    }
2288}
2289
2290// A * 1 -> A
2291// A / 1 -> A
2292//
2293// Move this function body out of the large match branch avoid stack overflow
2294fn simplify_right_is_one_case(
2295    info: &SimplifyContext,
2296    left: Box<Expr>,
2297    op: &Operator,
2298    right: &Expr,
2299) -> Result<Transformed<Expr>> {
2300    // Check if resulting type would be different due to coercion
2301    let left_type = info.get_data_type(&left)?;
2302    let right_type = info.get_data_type(right)?;
2303    match BinaryTypeCoercer::new(&left_type, op, &right_type).get_result_type() {
2304        Ok(result_type) => {
2305            // Only cast if the types differ
2306            if left_type != result_type {
2307                Ok(Transformed::yes(Expr::Cast(Cast::new(left, result_type))))
2308            } else {
2309                Ok(Transformed::yes(*left))
2310            }
2311        }
2312        Err(_) => Ok(Transformed::yes(*left)),
2313    }
2314}
2315
2316#[cfg(test)]
2317mod tests {
2318    use super::*;
2319    use crate::simplify_expressions::SimplifyContext;
2320    use crate::test::test_table_scan_with_name;
2321    use arrow::{
2322        array::{Int32Array, StructArray},
2323        datatypes::{FieldRef, Fields},
2324    };
2325    use datafusion_common::{DFSchemaRef, ToDFSchema, assert_contains};
2326    use datafusion_expr::{
2327        expr::WindowFunction,
2328        function::{
2329            AccumulatorArgs, AggregateFunctionSimplification,
2330            WindowFunctionSimplification,
2331        },
2332        interval_arithmetic::Interval,
2333        *,
2334    };
2335    use datafusion_functions_window_common::field::WindowUDFFieldArgs;
2336    use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
2337    use datafusion_physical_expr::PhysicalExpr;
2338    use std::hash::Hash;
2339    use std::sync::LazyLock;
2340    use std::{
2341        collections::HashMap,
2342        ops::{BitAnd, BitOr, BitXor},
2343        sync::Arc,
2344    };
2345
2346    // ------------------------------
2347    // --- ExprSimplifier tests -----
2348    // ------------------------------
2349    #[test]
2350    fn api_basic() {
2351        let simplifier =
2352            ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
2353
2354        let expr = lit(1) + lit(2);
2355        let expected = lit(3);
2356        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2357    }
2358
2359    #[test]
2360    fn basic_coercion() {
2361        let schema = test_schema();
2362        let simplifier = ExprSimplifier::new(
2363            SimplifyContext::default().with_schema(Arc::clone(&schema)),
2364        );
2365
2366        // Note expr type is int32 (not int64)
2367        // (1i64 + 2i32) < i
2368        let expr = (lit(1i64) + lit(2i32)).lt(col("i"));
2369        // should fully simplify to 3 < i (though i has been coerced to i64)
2370        let expected = lit(3i64).lt(col("i"));
2371
2372        let expr = simplifier.coerce(expr, &schema).unwrap();
2373
2374        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2375    }
2376
2377    fn test_schema() -> DFSchemaRef {
2378        static TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
2379            Schema::new(vec![
2380                Field::new("i", DataType::Int64, false),
2381                Field::new("b", DataType::Boolean, true),
2382            ])
2383            .to_dfschema_ref()
2384            .unwrap()
2385        });
2386        Arc::clone(&TEST_SCHEMA)
2387    }
2388
2389    #[test]
2390    fn simplify_and_constant_prop() {
2391        let simplifier =
2392            ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
2393
2394        // should be able to simplify to false
2395        // (i * (1 - 2)) > 0
2396        let expr = (col("i") * (lit(1) - lit(1))).gt(lit(0));
2397        let expected = lit(false);
2398        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2399    }
2400
2401    #[test]
2402    fn simplify_and_constant_prop_with_case() {
2403        let simplifier =
2404            ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
2405
2406        //   CASE
2407        //     WHEN i>5 AND false THEN i > 5
2408        //     WHEN i<5 AND true THEN i < 5
2409        //     ELSE false
2410        //   END
2411        //
2412        // Can be simplified to `i < 5`
2413        let expr = when(col("i").gt(lit(5)).and(lit(false)), col("i").gt(lit(5)))
2414            .when(col("i").lt(lit(5)).and(lit(true)), col("i").lt(lit(5)))
2415            .otherwise(lit(false))
2416            .unwrap();
2417        let expected = col("i").lt(lit(5));
2418        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2419    }
2420
2421    // ------------------------------
2422    // --- Simplifier tests -----
2423    // ------------------------------
2424
2425    #[test]
2426    fn test_simplify_canonicalize() {
2427        {
2428            let expr = lit(1).lt(col("c2")).and(col("c2").gt(lit(1)));
2429            let expected = col("c2").gt(lit(1));
2430            assert_eq!(simplify(expr), expected);
2431        }
2432        {
2433            let expr = col("c1").lt(col("c2")).and(col("c2").gt(col("c1")));
2434            let expected = col("c2").gt(col("c1"));
2435            assert_eq!(simplify(expr), expected);
2436        }
2437        {
2438            let expr = col("c1")
2439                .eq(lit(1))
2440                .and(lit(1).eq(col("c1")))
2441                .and(col("c1").eq(lit(3)));
2442            let expected = col("c1").eq(lit(1)).and(col("c1").eq(lit(3)));
2443            assert_eq!(simplify(expr), expected);
2444        }
2445        {
2446            let expr = col("c1")
2447                .eq(col("c2"))
2448                .and(col("c1").gt(lit(5)))
2449                .and(col("c2").eq(col("c1")));
2450            let expected = col("c2").eq(col("c1")).and(col("c1").gt(lit(5)));
2451            assert_eq!(simplify(expr), expected);
2452        }
2453        {
2454            let expr = col("c1")
2455                .eq(lit(1))
2456                .and(col("c2").gt(lit(3)).or(lit(3).lt(col("c2"))));
2457            let expected = col("c1").eq(lit(1)).and(col("c2").gt(lit(3)));
2458            assert_eq!(simplify(expr), expected);
2459        }
2460        {
2461            let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2462            let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2463            assert_eq!(simplify(expr), expected);
2464        }
2465        {
2466            let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2467            let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2468            assert_eq!(simplify(expr), expected);
2469        }
2470        {
2471            let expr = col("c1").gt(col("c2")).and(col("c1").gt(col("c2")));
2472            let expected = col("c2").lt(col("c1"));
2473            assert_eq!(simplify(expr), expected);
2474        }
2475    }
2476
2477    #[test]
2478    fn test_simplify_eq_not_self() {
2479        // `expr_a`: column `c2` is nullable, so `c2 = c2` simplifies to `c2 IS NOT NULL OR NULL`
2480        // This ensures the expression is only true when `c2` is not NULL, accounting for SQL's NULL semantics.
2481        let expr_a = col("c2").eq(col("c2"));
2482        let expected_a = col("c2").is_not_null().or(lit_bool_null());
2483
2484        // `expr_b`: column `c2_non_null` is explicitly non-nullable, so `c2_non_null = c2_non_null` is always true
2485        let expr_b = col("c2_non_null").eq(col("c2_non_null"));
2486        let expected_b = lit(true);
2487
2488        assert_eq!(simplify(expr_a), expected_a);
2489        assert_eq!(simplify(expr_b), expected_b);
2490    }
2491
2492    #[test]
2493    fn test_simplify_or_true() {
2494        let expr_a = col("c2").or(lit(true));
2495        let expr_b = lit(true).or(col("c2"));
2496        let expected = lit(true);
2497
2498        assert_eq!(simplify(expr_a), expected);
2499        assert_eq!(simplify(expr_b), expected);
2500    }
2501
2502    #[test]
2503    fn test_simplify_or_false() {
2504        let expr_a = lit(false).or(col("c2"));
2505        let expr_b = col("c2").or(lit(false));
2506        let expected = col("c2");
2507
2508        assert_eq!(simplify(expr_a), expected);
2509        assert_eq!(simplify(expr_b), expected);
2510    }
2511
2512    #[test]
2513    fn test_simplify_or_same() {
2514        let expr = col("c2").or(col("c2"));
2515        let expected = col("c2");
2516
2517        assert_eq!(simplify(expr), expected);
2518    }
2519
2520    #[test]
2521    fn test_simplify_or_not_self() {
2522        // A OR !A if A is not nullable --> true
2523        // !A OR A if A is not nullable --> true
2524        let expr_a = col("c2_non_null").or(col("c2_non_null").not());
2525        let expr_b = col("c2_non_null").not().or(col("c2_non_null"));
2526        let expected = lit(true);
2527
2528        assert_eq!(simplify(expr_a), expected);
2529        assert_eq!(simplify(expr_b), expected);
2530    }
2531
2532    #[test]
2533    fn test_simplify_and_false() {
2534        let expr_a = lit(false).and(col("c2"));
2535        let expr_b = col("c2").and(lit(false));
2536        let expected = lit(false);
2537
2538        assert_eq!(simplify(expr_a), expected);
2539        assert_eq!(simplify(expr_b), expected);
2540    }
2541
2542    #[test]
2543    fn test_simplify_and_same() {
2544        let expr = col("c2").and(col("c2"));
2545        let expected = col("c2");
2546
2547        assert_eq!(simplify(expr), expected);
2548    }
2549
2550    #[test]
2551    fn test_simplify_and_true() {
2552        let expr_a = lit(true).and(col("c2"));
2553        let expr_b = col("c2").and(lit(true));
2554        let expected = col("c2");
2555
2556        assert_eq!(simplify(expr_a), expected);
2557        assert_eq!(simplify(expr_b), expected);
2558    }
2559
2560    #[test]
2561    fn test_simplify_and_not_self() {
2562        // A AND !A if A is not nullable --> false
2563        // !A AND A if A is not nullable --> false
2564        let expr_a = col("c2_non_null").and(col("c2_non_null").not());
2565        let expr_b = col("c2_non_null").not().and(col("c2_non_null"));
2566        let expected = lit(false);
2567
2568        assert_eq!(simplify(expr_a), expected);
2569        assert_eq!(simplify(expr_b), expected);
2570    }
2571
2572    #[test]
2573    fn test_simplify_eq_and_neq_with_different_literals() {
2574        // A = 1 AND A != 0 --> A = 1 (when 1 != 0)
2575        let expr = col("c2").eq(lit(1)).and(col("c2").not_eq(lit(0)));
2576        let expected = col("c2").eq(lit(1));
2577        assert_eq!(simplify(expr), expected);
2578
2579        // A != 0 AND A = 1 --> A = 1 (when 1 != 0)
2580        let expr = col("c2").not_eq(lit(0)).and(col("c2").eq(lit(1)));
2581        let expected = col("c2").eq(lit(1));
2582        assert_eq!(simplify(expr), expected);
2583
2584        // Should NOT simplify when literals are the same (A = 1 AND A != 1)
2585        // This is a contradiction but handled by other rules
2586        let expr = col("c2").eq(lit(1)).and(col("c2").not_eq(lit(1)));
2587        // Should not be simplified by this rule (left unchanged or handled elsewhere)
2588        let result = simplify(expr.clone());
2589        // The expression should not have been simplified
2590        assert_eq!(result, expr);
2591    }
2592
2593    #[test]
2594    fn test_simplify_multiply_by_one() {
2595        let expr_a = col("c2") * lit(1);
2596        let expr_b = lit(1) * col("c2");
2597        let expected = col("c2");
2598
2599        assert_eq!(simplify(expr_a), expected);
2600        assert_eq!(simplify(expr_b), expected);
2601
2602        let expr = col("c2") * lit(ScalarValue::Decimal128(Some(10000000000), 38, 10));
2603        assert_eq!(simplify(expr), expected);
2604
2605        let expr = lit(ScalarValue::Decimal128(Some(10000000000), 31, 10)) * col("c2");
2606        assert_eq!(simplify(expr), expected);
2607    }
2608
2609    #[test]
2610    fn test_simplify_multiply_by_null() {
2611        let null = lit(ScalarValue::Int64(None));
2612        // A * null --> null
2613        {
2614            let expr = col("c3") * null.clone();
2615            assert_eq!(simplify(expr), null);
2616        }
2617        // null * A --> null
2618        {
2619            let expr = null.clone() * col("c3");
2620            assert_eq!(simplify(expr), null);
2621        }
2622    }
2623
2624    #[test]
2625    fn test_simplify_multiply_by_zero() {
2626        // cannot optimize A * null (null * A) if A is nullable
2627        {
2628            let expr_a = col("c2") * lit(0);
2629            let expr_b = lit(0) * col("c2");
2630
2631            assert_eq!(simplify(expr_a.clone()), expr_a);
2632            assert_eq!(simplify(expr_b.clone()), expr_b);
2633        }
2634        // 0 * A --> 0 if A is not nullable
2635        {
2636            let expr = lit(0) * col("c2_non_null");
2637            assert_eq!(simplify(expr), lit(0));
2638        }
2639        // A * 0 --> 0 if A is not nullable
2640        {
2641            let expr = col("c2_non_null") * lit(0);
2642            assert_eq!(simplify(expr), lit(0));
2643        }
2644        // A * Decimal128(0) --> 0 if A is not nullable
2645        {
2646            let expr = col("c2_non_null") * lit(ScalarValue::Decimal128(Some(0), 31, 10));
2647            assert_eq!(
2648                simplify(expr),
2649                lit(ScalarValue::Decimal128(Some(0), 31, 10))
2650            );
2651            let expr = binary_expr(
2652                lit(ScalarValue::Decimal128(Some(0), 31, 10)),
2653                Operator::Multiply,
2654                col("c2_non_null"),
2655            );
2656            assert_eq!(
2657                simplify(expr),
2658                lit(ScalarValue::Decimal128(Some(0), 31, 10))
2659            );
2660        }
2661    }
2662
2663    #[test]
2664    fn test_simplify_divide_by_one() {
2665        let expr = binary_expr(col("c2"), Operator::Divide, lit(1));
2666        let expected = col("c2");
2667        assert_eq!(simplify(expr), expected);
2668        let expr = col("c2") / lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2669        assert_eq!(simplify(expr), expected);
2670    }
2671
2672    #[test]
2673    fn test_simplify_divide_null() {
2674        // A / null --> null
2675        let null = lit(ScalarValue::Int64(None));
2676        {
2677            let expr = col("c3") / null.clone();
2678            assert_eq!(simplify(expr), null);
2679        }
2680        // null / A --> null
2681        {
2682            let expr = null.clone() / col("c3");
2683            assert_eq!(simplify(expr), null);
2684        }
2685    }
2686
2687    #[test]
2688    fn test_simplify_divide_by_same() {
2689        let expr = col("c2") / col("c2");
2690        // if c2 is null, c2 / c2 = null, so can't simplify
2691        let expected = expr.clone();
2692
2693        assert_eq!(simplify(expr), expected);
2694    }
2695
2696    #[test]
2697    fn test_simplify_modulo_by_null() {
2698        let null = lit(ScalarValue::Int64(None));
2699        // A % null --> null
2700        {
2701            let expr = col("c3") % null.clone();
2702            assert_eq!(simplify(expr), null);
2703        }
2704        // null % A --> null
2705        {
2706            let expr = null.clone() % col("c3");
2707            assert_eq!(simplify(expr), null);
2708        }
2709    }
2710
2711    #[test]
2712    fn test_simplify_modulo_by_one() {
2713        let expr = col("c2") % lit(1);
2714        // if c2 is null, c2 % 1 = null, so can't simplify
2715        let expected = expr.clone();
2716
2717        assert_eq!(simplify(expr), expected);
2718    }
2719
2720    #[test]
2721    fn test_simplify_divide_zero_by_zero() {
2722        // because divide by 0 maybe occur in short-circuit expression
2723        // so we should not simplify this, and throw error in runtime
2724        let expr = lit(0) / lit(0);
2725        let expected = expr.clone();
2726
2727        assert_eq!(simplify(expr), expected);
2728    }
2729
2730    #[test]
2731    fn test_simplify_divide_by_zero() {
2732        // because divide by 0 maybe occur in short-circuit expression
2733        // so we should not simplify this, and throw error in runtime
2734        let expr = col("c2_non_null") / lit(0);
2735        let expected = expr.clone();
2736
2737        assert_eq!(simplify(expr), expected);
2738    }
2739
2740    #[test]
2741    fn test_simplify_modulo_by_one_non_null() {
2742        let expr = col("c3_non_null") % lit(1);
2743        let expected = lit(0_i64);
2744        assert_eq!(simplify(expr), expected);
2745        let expr =
2746            col("c3_non_null") % lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2747        assert_eq!(simplify(expr), expected);
2748    }
2749
2750    #[test]
2751    fn test_simplify_bitwise_xor_by_null() {
2752        let null = lit(ScalarValue::Int64(None));
2753        // A ^ null --> null
2754        {
2755            let expr = col("c3") ^ null.clone();
2756            assert_eq!(simplify(expr), null);
2757        }
2758        // null ^ A --> null
2759        {
2760            let expr = null.clone() ^ col("c3");
2761            assert_eq!(simplify(expr), null);
2762        }
2763    }
2764
2765    #[test]
2766    fn test_simplify_bitwise_shift_right_by_null() {
2767        let null = lit(ScalarValue::Int64(None));
2768        // A >> null --> null
2769        {
2770            let expr = col("c3") >> null.clone();
2771            assert_eq!(simplify(expr), null);
2772        }
2773        // null >> A --> null
2774        {
2775            let expr = null.clone() >> col("c3");
2776            assert_eq!(simplify(expr), null);
2777        }
2778    }
2779
2780    #[test]
2781    fn test_simplify_bitwise_shift_left_by_null() {
2782        let null = lit(ScalarValue::Int64(None));
2783        // A << null --> null
2784        {
2785            let expr = col("c3") << null.clone();
2786            assert_eq!(simplify(expr), null);
2787        }
2788        // null << A --> null
2789        {
2790            let expr = null.clone() << col("c3");
2791            assert_eq!(simplify(expr), null);
2792        }
2793    }
2794
2795    #[test]
2796    fn test_simplify_bitwise_and_by_zero() {
2797        // A & 0 --> 0
2798        {
2799            let expr = col("c2_non_null") & lit(0);
2800            assert_eq!(simplify(expr), lit(0));
2801        }
2802        // 0 & A --> 0
2803        {
2804            let expr = lit(0) & col("c2_non_null");
2805            assert_eq!(simplify(expr), lit(0));
2806        }
2807    }
2808
2809    #[test]
2810    fn test_simplify_bitwise_or_by_zero() {
2811        // A | 0 --> A
2812        {
2813            let expr = col("c2_non_null") | lit(0);
2814            assert_eq!(simplify(expr), col("c2_non_null"));
2815        }
2816        // 0 | A --> A
2817        {
2818            let expr = lit(0) | col("c2_non_null");
2819            assert_eq!(simplify(expr), col("c2_non_null"));
2820        }
2821    }
2822
2823    #[test]
2824    fn test_simplify_bitwise_xor_by_zero() {
2825        // A ^ 0 --> A
2826        {
2827            let expr = col("c2_non_null") ^ lit(0);
2828            assert_eq!(simplify(expr), col("c2_non_null"));
2829        }
2830        // 0 ^ A --> A
2831        {
2832            let expr = lit(0) ^ col("c2_non_null");
2833            assert_eq!(simplify(expr), col("c2_non_null"));
2834        }
2835    }
2836
2837    #[test]
2838    fn test_simplify_bitwise_bitwise_shift_right_by_zero() {
2839        // A >> 0 --> A
2840        {
2841            let expr = col("c2_non_null") >> lit(0);
2842            assert_eq!(simplify(expr), col("c2_non_null"));
2843        }
2844    }
2845
2846    #[test]
2847    fn test_simplify_bitwise_bitwise_shift_left_by_zero() {
2848        // A << 0 --> A
2849        {
2850            let expr = col("c2_non_null") << lit(0);
2851            assert_eq!(simplify(expr), col("c2_non_null"));
2852        }
2853    }
2854
2855    #[test]
2856    fn test_simplify_bitwise_and_by_null() {
2857        let null = Expr::Literal(ScalarValue::Int64(None), None);
2858        // A & null --> null
2859        {
2860            let expr = col("c3") & null.clone();
2861            assert_eq!(simplify(expr), null);
2862        }
2863        // null & A --> null
2864        {
2865            let expr = null.clone() & col("c3");
2866            assert_eq!(simplify(expr), null);
2867        }
2868    }
2869
2870    #[test]
2871    fn test_simplify_composed_bitwise_and() {
2872        // ((c2 > 5) & (c1 < 6)) & (c2 > 5) --> (c2 > 5) & (c1 < 6)
2873
2874        let expr = bitwise_and(
2875            bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2876            col("c2").gt(lit(5)),
2877        );
2878        let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2879
2880        assert_eq!(simplify(expr), expected);
2881
2882        // (c2 > 5) & ((c2 > 5) & (c1 < 6)) --> (c2 > 5) & (c1 < 6)
2883
2884        let expr = bitwise_and(
2885            col("c2").gt(lit(5)),
2886            bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2887        );
2888        let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2889        assert_eq!(simplify(expr), expected);
2890    }
2891
2892    #[test]
2893    fn test_simplify_composed_bitwise_or() {
2894        // ((c2 > 5) | (c1 < 6)) | (c2 > 5) --> (c2 > 5) | (c1 < 6)
2895
2896        let expr = bitwise_or(
2897            bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2898            col("c2").gt(lit(5)),
2899        );
2900        let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2901
2902        assert_eq!(simplify(expr), expected);
2903
2904        // (c2 > 5) | ((c2 > 5) | (c1 < 6)) --> (c2 > 5) | (c1 < 6)
2905
2906        let expr = bitwise_or(
2907            col("c2").gt(lit(5)),
2908            bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2909        );
2910        let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2911
2912        assert_eq!(simplify(expr), expected);
2913    }
2914
2915    #[test]
2916    fn test_simplify_composed_bitwise_xor() {
2917        // with an even number of the column "c2"
2918        // c2 ^ ((c2 ^ (c2 | c1)) ^ (c1 & c2)) --> (c2 | c1) ^ (c1 & c2)
2919
2920        let expr = bitwise_xor(
2921            col("c2"),
2922            bitwise_xor(
2923                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2924                bitwise_and(col("c1"), col("c2")),
2925            ),
2926        );
2927
2928        let expected = bitwise_xor(
2929            bitwise_or(col("c2"), col("c1")),
2930            bitwise_and(col("c1"), col("c2")),
2931        );
2932
2933        assert_eq!(simplify(expr), expected);
2934
2935        // with an odd number of the column "c2"
2936        // c2 ^ (c2 ^ (c2 | c1)) ^ ((c1 & c2) ^ c2) --> c2 ^ ((c2 | c1) ^ (c1 & c2))
2937
2938        let expr = bitwise_xor(
2939            col("c2"),
2940            bitwise_xor(
2941                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2942                bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2943            ),
2944        );
2945
2946        let expected = bitwise_xor(
2947            col("c2"),
2948            bitwise_xor(
2949                bitwise_or(col("c2"), col("c1")),
2950                bitwise_and(col("c1"), col("c2")),
2951            ),
2952        );
2953
2954        assert_eq!(simplify(expr), expected);
2955
2956        // with an even number of the column "c2"
2957        // ((c2 ^ (c2 | c1)) ^ (c1 & c2)) ^ c2 --> (c2 | c1) ^ (c1 & c2)
2958
2959        let expr = bitwise_xor(
2960            bitwise_xor(
2961                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2962                bitwise_and(col("c1"), col("c2")),
2963            ),
2964            col("c2"),
2965        );
2966
2967        let expected = bitwise_xor(
2968            bitwise_or(col("c2"), col("c1")),
2969            bitwise_and(col("c1"), col("c2")),
2970        );
2971
2972        assert_eq!(simplify(expr), expected);
2973
2974        // with an odd number of the column "c2"
2975        // (c2 ^ (c2 | c1)) ^ ((c1 & c2) ^ c2) ^ c2 --> ((c2 | c1) ^ (c1 & c2)) ^ c2
2976
2977        let expr = bitwise_xor(
2978            bitwise_xor(
2979                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2980                bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2981            ),
2982            col("c2"),
2983        );
2984
2985        let expected = bitwise_xor(
2986            bitwise_xor(
2987                bitwise_or(col("c2"), col("c1")),
2988                bitwise_and(col("c1"), col("c2")),
2989            ),
2990            col("c2"),
2991        );
2992
2993        assert_eq!(simplify(expr), expected);
2994    }
2995
2996    #[test]
2997    fn test_simplify_negated_bitwise_and() {
2998        // !c4 & c4 --> 0
2999        let expr = (-col("c4_non_null")) & col("c4_non_null");
3000        let expected = lit(0u32);
3001
3002        assert_eq!(simplify(expr), expected);
3003        // c4 & !c4 --> 0
3004        let expr = col("c4_non_null") & (-col("c4_non_null"));
3005        let expected = lit(0u32);
3006
3007        assert_eq!(simplify(expr), expected);
3008
3009        // !c3 & c3 --> 0
3010        let expr = (-col("c3_non_null")) & col("c3_non_null");
3011        let expected = lit(0i64);
3012
3013        assert_eq!(simplify(expr), expected);
3014        // c3 & !c3 --> 0
3015        let expr = col("c3_non_null") & (-col("c3_non_null"));
3016        let expected = lit(0i64);
3017
3018        assert_eq!(simplify(expr), expected);
3019    }
3020
3021    #[test]
3022    fn test_simplify_negated_bitwise_or() {
3023        // !c4 | c4 --> -1
3024        let expr = (-col("c4_non_null")) | col("c4_non_null");
3025        let expected = lit(-1i32);
3026
3027        assert_eq!(simplify(expr), expected);
3028
3029        // c4 | !c4 --> -1
3030        let expr = col("c4_non_null") | (-col("c4_non_null"));
3031        let expected = lit(-1i32);
3032
3033        assert_eq!(simplify(expr), expected);
3034
3035        // !c3 | c3 --> -1
3036        let expr = (-col("c3_non_null")) | col("c3_non_null");
3037        let expected = lit(-1i64);
3038
3039        assert_eq!(simplify(expr), expected);
3040
3041        // c3 | !c3 --> -1
3042        let expr = col("c3_non_null") | (-col("c3_non_null"));
3043        let expected = lit(-1i64);
3044
3045        assert_eq!(simplify(expr), expected);
3046    }
3047
3048    #[test]
3049    fn test_simplify_negated_bitwise_xor() {
3050        // !c4 ^ c4 --> -1
3051        let expr = (-col("c4_non_null")) ^ col("c4_non_null");
3052        let expected = lit(-1i32);
3053
3054        assert_eq!(simplify(expr), expected);
3055
3056        // c4 ^ !c4 --> -1
3057        let expr = col("c4_non_null") ^ (-col("c4_non_null"));
3058        let expected = lit(-1i32);
3059
3060        assert_eq!(simplify(expr), expected);
3061
3062        // !c3 ^ c3 --> -1
3063        let expr = (-col("c3_non_null")) ^ col("c3_non_null");
3064        let expected = lit(-1i64);
3065
3066        assert_eq!(simplify(expr), expected);
3067
3068        // c3 ^ !c3 --> -1
3069        let expr = col("c3_non_null") ^ (-col("c3_non_null"));
3070        let expected = lit(-1i64);
3071
3072        assert_eq!(simplify(expr), expected);
3073    }
3074
3075    #[test]
3076    fn test_simplify_bitwise_and_or() {
3077        // (c2 < 3) & ((c2 < 3) | c1) -> (c2 < 3)
3078        let expr = bitwise_and(
3079            col("c2_non_null").lt(lit(3)),
3080            bitwise_or(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
3081        );
3082        let expected = col("c2_non_null").lt(lit(3));
3083
3084        assert_eq!(simplify(expr), expected);
3085    }
3086
3087    #[test]
3088    fn test_simplify_bitwise_or_and() {
3089        // (c2 < 3) | ((c2 < 3) & c1) -> (c2 < 3)
3090        let expr = bitwise_or(
3091            col("c2_non_null").lt(lit(3)),
3092            bitwise_and(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
3093        );
3094        let expected = col("c2_non_null").lt(lit(3));
3095
3096        assert_eq!(simplify(expr), expected);
3097    }
3098
3099    #[test]
3100    fn test_simplify_simple_bitwise_and() {
3101        // (c2 > 5) & (c2 > 5) -> (c2 > 5)
3102        let expr = (col("c2").gt(lit(5))).bitand(col("c2").gt(lit(5)));
3103        let expected = col("c2").gt(lit(5));
3104
3105        assert_eq!(simplify(expr), expected);
3106    }
3107
3108    #[test]
3109    fn test_simplify_simple_bitwise_or() {
3110        // (c2 > 5) | (c2 > 5) -> (c2 > 5)
3111        let expr = (col("c2").gt(lit(5))).bitor(col("c2").gt(lit(5)));
3112        let expected = col("c2").gt(lit(5));
3113
3114        assert_eq!(simplify(expr), expected);
3115    }
3116
3117    #[test]
3118    fn test_simplify_simple_bitwise_xor() {
3119        // c4 ^ c4 -> 0
3120        let expr = (col("c4")).bitxor(col("c4"));
3121        let expected = lit(0u32);
3122
3123        assert_eq!(simplify(expr), expected);
3124
3125        // c3 ^ c3 -> 0
3126        let expr = col("c3").bitxor(col("c3"));
3127        let expected = lit(0i64);
3128
3129        assert_eq!(simplify(expr), expected);
3130    }
3131
3132    #[test]
3133    fn test_simplify_modulo_by_zero_non_null() {
3134        // because modulo by 0 maybe occur in short-circuit expression
3135        // so we should not simplify this, and throw error in runtime.
3136        let expr = col("c2_non_null") % lit(0);
3137        let expected = expr.clone();
3138
3139        assert_eq!(simplify(expr), expected);
3140    }
3141
3142    #[test]
3143    fn test_simplify_simple_and() {
3144        // (c2 > 5) AND (c2 > 5) -> (c2 > 5)
3145        let expr = (col("c2").gt(lit(5))).and(col("c2").gt(lit(5)));
3146        let expected = col("c2").gt(lit(5));
3147
3148        assert_eq!(simplify(expr), expected);
3149    }
3150
3151    #[test]
3152    fn test_simplify_composed_and() {
3153        // ((c2 > 5) AND (c1 < 6)) AND (c2 > 5)
3154        let expr = and(
3155            and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
3156            col("c2").gt(lit(5)),
3157        );
3158        let expected = and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
3159
3160        assert_eq!(simplify(expr), expected);
3161    }
3162
3163    #[test]
3164    fn test_simplify_negated_and() {
3165        // (c2 > 5) AND !(c2 > 5) --> (c2 > 5) AND (c2 <= 5)
3166        let expr = and(col("c2").gt(lit(5)), Expr::not(col("c2").gt(lit(5))));
3167        let expected = col("c2").gt(lit(5)).and(col("c2").lt_eq(lit(5)));
3168
3169        assert_eq!(simplify(expr), expected);
3170    }
3171
3172    #[test]
3173    fn test_simplify_or_and() {
3174        let l = col("c2").gt(lit(5));
3175        let r = and(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3176
3177        // (c2 > 5) OR ((c1 < 6) AND (c2 > 5))
3178        let expr = or(l.clone(), r.clone());
3179
3180        let expected = l.clone();
3181        assert_eq!(simplify(expr), expected);
3182
3183        // ((c1 < 6) AND (c2 > 5)) OR (c2 > 5)
3184        let expr = or(r, l);
3185        assert_eq!(simplify(expr), expected);
3186    }
3187
3188    #[test]
3189    fn test_simplify_or_and_non_null() {
3190        let l = col("c2_non_null").gt(lit(5));
3191        let r = and(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3192
3193        // (c2 > 5) OR ((c1 < 6) AND (c2 > 5)) --> c2 > 5
3194        let expr = or(l.clone(), r.clone());
3195
3196        // This is only true if `c1 < 6` is not nullable / can not be null.
3197        let expected = col("c2_non_null").gt(lit(5));
3198
3199        assert_eq!(simplify(expr), expected);
3200
3201        // ((c1 < 6) AND (c2 > 5)) OR (c2 > 5) --> c2 > 5
3202        let expr = or(l, r);
3203
3204        assert_eq!(simplify(expr), expected);
3205    }
3206
3207    #[test]
3208    fn test_simplify_and_or() {
3209        let l = col("c2").gt(lit(5));
3210        let r = or(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3211
3212        // (c2 > 5) AND ((c1 < 6) OR (c2 > 5)) --> c2 > 5
3213        let expr = and(l.clone(), r.clone());
3214
3215        let expected = l.clone();
3216        assert_eq!(simplify(expr), expected);
3217
3218        // ((c1 < 6) OR (c2 > 5)) AND (c2 > 5) --> c2 > 5
3219        let expr = and(r, l);
3220        assert_eq!(simplify(expr), expected);
3221    }
3222
3223    #[test]
3224    fn test_simplify_and_or_non_null() {
3225        let l = col("c2_non_null").gt(lit(5));
3226        let r = or(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3227
3228        // (c2 > 5) AND ((c1 < 6) OR (c2 > 5)) --> c2 > 5
3229        let expr = and(l.clone(), r.clone());
3230
3231        // This is only true if `c1 < 6` is not nullable / can not be null.
3232        let expected = col("c2_non_null").gt(lit(5));
3233
3234        assert_eq!(simplify(expr), expected);
3235
3236        // ((c1 < 6) OR (c2 > 5)) AND (c2 > 5) --> c2 > 5
3237        let expr = and(l, r);
3238
3239        assert_eq!(simplify(expr), expected);
3240    }
3241
3242    #[test]
3243    fn test_simplify_by_de_morgan_laws() {
3244        // Laws with logical operations
3245        // !(c3 AND c4) --> !c3 OR !c4
3246        let expr = and(col("c3"), col("c4")).not();
3247        let expected = or(col("c3").not(), col("c4").not());
3248        assert_eq!(simplify(expr), expected);
3249        // !(c3 OR c4) --> !c3 AND !c4
3250        let expr = or(col("c3"), col("c4")).not();
3251        let expected = and(col("c3").not(), col("c4").not());
3252        assert_eq!(simplify(expr), expected);
3253        // !(!c3) --> c3
3254        let expr = col("c3").not().not();
3255        let expected = col("c3");
3256        assert_eq!(simplify(expr), expected);
3257
3258        // Laws with bitwise operations
3259        // !(c3 & c4) --> !c3 | !c4
3260        let expr = -bitwise_and(col("c3"), col("c4"));
3261        let expected = bitwise_or(-col("c3"), -col("c4"));
3262        assert_eq!(simplify(expr), expected);
3263        // !(c3 | c4) --> !c3 & !c4
3264        let expr = -bitwise_or(col("c3"), col("c4"));
3265        let expected = bitwise_and(-col("c3"), -col("c4"));
3266        assert_eq!(simplify(expr), expected);
3267        // !(!c3) --> c3
3268        let expr = -(-col("c3"));
3269        let expected = col("c3");
3270        assert_eq!(simplify(expr), expected);
3271    }
3272
3273    #[test]
3274    fn test_simplify_null_and_false() {
3275        let expr = and(lit_bool_null(), lit(false));
3276        let expr_eq = lit(false);
3277
3278        assert_eq!(simplify(expr), expr_eq);
3279    }
3280
3281    #[test]
3282    fn test_simplify_divide_null_by_null() {
3283        let null = lit(ScalarValue::Int32(None));
3284        let expr_plus = null.clone() / null.clone();
3285        let expr_eq = null;
3286
3287        assert_eq!(simplify(expr_plus), expr_eq);
3288    }
3289
3290    #[test]
3291    fn test_simplify_simplify_arithmetic_expr() {
3292        let expr_plus = lit(1) + lit(1);
3293
3294        assert_eq!(simplify(expr_plus), lit(2));
3295    }
3296
3297    #[test]
3298    fn test_simplify_simplify_eq_expr() {
3299        let expr_eq = binary_expr(lit(1), Operator::Eq, lit(1));
3300
3301        assert_eq!(simplify(expr_eq), lit(true));
3302    }
3303
3304    #[test]
3305    fn test_simplify_regex() {
3306        // malformed regex
3307        assert_contains!(
3308            try_simplify(regex_match(col("c1"), lit("foo{")))
3309                .unwrap_err()
3310                .to_string(),
3311            "regex parse error"
3312        );
3313
3314        // unsupported cases
3315        assert_no_change(regex_match(col("c1"), lit("foo.*")));
3316        assert_no_change(regex_match(col("c1"), lit("(foo)")));
3317        assert_no_change(regex_match(col("c1"), lit("%")));
3318        assert_no_change(regex_match(col("c1"), lit("_")));
3319        assert_no_change(regex_match(col("c1"), lit("f%o")));
3320        assert_no_change(regex_match(col("c1"), lit("^f%o")));
3321        assert_no_change(regex_match(col("c1"), lit("f_o")));
3322
3323        // empty cases
3324        assert_change(
3325            regex_match(col("c1"), lit("")),
3326            if_not_null(col("c1"), true),
3327        );
3328        assert_change(
3329            regex_not_match(col("c1"), lit("")),
3330            if_not_null(col("c1"), false),
3331        );
3332        assert_change(
3333            regex_imatch(col("c1"), lit("")),
3334            if_not_null(col("c1"), true),
3335        );
3336        assert_change(
3337            regex_not_imatch(col("c1"), lit("")),
3338            if_not_null(col("c1"), false),
3339        );
3340
3341        // single character
3342        assert_change(regex_match(col("c1"), lit("x")), col("c1").like(lit("%x%")));
3343
3344        // single word
3345        assert_change(
3346            regex_match(col("c1"), lit("foo")),
3347            col("c1").like(lit("%foo%")),
3348        );
3349
3350        // regular expressions that match an exact literal
3351        assert_change(regex_match(col("c1"), lit("^$")), col("c1").eq(lit("")));
3352        assert_change(
3353            regex_not_match(col("c1"), lit("^$")),
3354            col("c1").not_eq(lit("")),
3355        );
3356        assert_change(
3357            regex_match(col("c1"), lit("^foo$")),
3358            col("c1").eq(lit("foo")),
3359        );
3360        assert_change(
3361            regex_not_match(col("c1"), lit("^foo$")),
3362            col("c1").not_eq(lit("foo")),
3363        );
3364
3365        // regular expressions that match exact captured literals
3366        assert_change(
3367            regex_match(col("c1"), lit("^(foo|bar)$")),
3368            col("c1").eq(lit("foo")).or(col("c1").eq(lit("bar"))),
3369        );
3370        assert_change(
3371            regex_not_match(col("c1"), lit("^(foo|bar)$")),
3372            col("c1")
3373                .not_eq(lit("foo"))
3374                .and(col("c1").not_eq(lit("bar"))),
3375        );
3376        assert_change(
3377            regex_match(col("c1"), lit("^(foo)$")),
3378            col("c1").eq(lit("foo")),
3379        );
3380        assert_change(
3381            regex_match(col("c1"), lit("^(foo|bar|baz)$")),
3382            ((col("c1").eq(lit("foo"))).or(col("c1").eq(lit("bar"))))
3383                .or(col("c1").eq(lit("baz"))),
3384        );
3385        assert_change(
3386            regex_match(col("c1"), lit("^(foo|bar|baz|qux)$")),
3387            col("c1")
3388                .in_list(vec![lit("foo"), lit("bar"), lit("baz"), lit("qux")], false),
3389        );
3390        assert_change(
3391            regex_match(col("c1"), lit("^(fo_o)$")),
3392            col("c1").eq(lit("fo_o")),
3393        );
3394        assert_change(
3395            regex_match(col("c1"), lit("^(fo_o)$")),
3396            col("c1").eq(lit("fo_o")),
3397        );
3398        assert_change(
3399            regex_match(col("c1"), lit("^(fo_o|ba_r)$")),
3400            col("c1").eq(lit("fo_o")).or(col("c1").eq(lit("ba_r"))),
3401        );
3402        assert_change(
3403            regex_not_match(col("c1"), lit("^(fo_o|ba_r)$")),
3404            col("c1")
3405                .not_eq(lit("fo_o"))
3406                .and(col("c1").not_eq(lit("ba_r"))),
3407        );
3408        assert_change(
3409            regex_match(col("c1"), lit("^(fo_o|ba_r|ba_z)$")),
3410            ((col("c1").eq(lit("fo_o"))).or(col("c1").eq(lit("ba_r"))))
3411                .or(col("c1").eq(lit("ba_z"))),
3412        );
3413        assert_change(
3414            regex_match(col("c1"), lit("^(fo_o|ba_r|baz|qu_x)$")),
3415            col("c1").in_list(
3416                vec![lit("fo_o"), lit("ba_r"), lit("baz"), lit("qu_x")],
3417                false,
3418            ),
3419        );
3420
3421        // regular expressions that mismatch captured literals
3422        assert_no_change(regex_match(col("c1"), lit("(foo|bar)")));
3423        assert_no_change(regex_match(col("c1"), lit("(foo|bar)*")));
3424        assert_no_change(regex_match(col("c1"), lit("(fo_o|b_ar)")));
3425        assert_no_change(regex_match(col("c1"), lit("(foo|ba_r)*")));
3426        assert_no_change(regex_match(col("c1"), lit("(fo_o|ba_r)*")));
3427        assert_no_change(regex_match(col("c1"), lit("^(foo|bar)*")));
3428        assert_no_change(regex_match(col("c1"), lit("^(foo)(bar)$")));
3429        assert_no_change(regex_match(col("c1"), lit("^")));
3430        assert_no_change(regex_match(col("c1"), lit("$")));
3431        assert_no_change(regex_match(col("c1"), lit("$^")));
3432        assert_no_change(regex_match(col("c1"), lit("$foo^")));
3433
3434        // regular expressions that match a partial literal
3435        assert_change(
3436            regex_match(col("c1"), lit("^foo")),
3437            col("c1").like(lit("foo%")),
3438        );
3439        assert_change(
3440            regex_match(col("c1"), lit("foo$")),
3441            col("c1").like(lit("%foo")),
3442        );
3443        assert_change(
3444            regex_match(col("c1"), lit("^foo|bar$")),
3445            col("c1").like(lit("foo%")).or(col("c1").like(lit("%bar"))),
3446        );
3447
3448        // OR-chain
3449        assert_change(
3450            regex_match(col("c1"), lit("foo|bar|baz")),
3451            col("c1")
3452                .like(lit("%foo%"))
3453                .or(col("c1").like(lit("%bar%")))
3454                .or(col("c1").like(lit("%baz%"))),
3455        );
3456        assert_change(
3457            regex_match(col("c1"), lit("foo|x|baz")),
3458            col("c1")
3459                .like(lit("%foo%"))
3460                .or(col("c1").like(lit("%x%")))
3461                .or(col("c1").like(lit("%baz%"))),
3462        );
3463        assert_change(
3464            regex_not_match(col("c1"), lit("foo|bar|baz")),
3465            col("c1")
3466                .not_like(lit("%foo%"))
3467                .and(col("c1").not_like(lit("%bar%")))
3468                .and(col("c1").not_like(lit("%baz%"))),
3469        );
3470        // both anchored expressions (translated to equality) and unanchored
3471        assert_change(
3472            regex_match(col("c1"), lit("foo|^x$|baz")),
3473            col("c1")
3474                .like(lit("%foo%"))
3475                .or(col("c1").eq(lit("x")))
3476                .or(col("c1").like(lit("%baz%"))),
3477        );
3478        assert_change(
3479            regex_not_match(col("c1"), lit("foo|^bar$|baz")),
3480            col("c1")
3481                .not_like(lit("%foo%"))
3482                .and(col("c1").not_eq(lit("bar")))
3483                .and(col("c1").not_like(lit("%baz%"))),
3484        );
3485        // Too many patterns (MAX_REGEX_ALTERNATIONS_EXPANSION)
3486        assert_no_change(regex_match(col("c1"), lit("foo|bar|baz|blarg|bozo|etc")));
3487    }
3488
3489    #[track_caller]
3490    fn assert_no_change(expr: Expr) {
3491        let optimized = simplify(expr.clone());
3492        assert_eq!(expr, optimized);
3493    }
3494
3495    #[track_caller]
3496    fn assert_change(expr: Expr, expected: Expr) {
3497        let optimized = simplify(expr);
3498        assert_eq!(optimized, expected);
3499    }
3500
3501    fn regex_match(left: Expr, right: Expr) -> Expr {
3502        Expr::BinaryExpr(BinaryExpr {
3503            left: Box::new(left),
3504            op: Operator::RegexMatch,
3505            right: Box::new(right),
3506        })
3507    }
3508
3509    fn regex_not_match(left: Expr, right: Expr) -> Expr {
3510        Expr::BinaryExpr(BinaryExpr {
3511            left: Box::new(left),
3512            op: Operator::RegexNotMatch,
3513            right: Box::new(right),
3514        })
3515    }
3516
3517    fn regex_imatch(left: Expr, right: Expr) -> Expr {
3518        Expr::BinaryExpr(BinaryExpr {
3519            left: Box::new(left),
3520            op: Operator::RegexIMatch,
3521            right: Box::new(right),
3522        })
3523    }
3524
3525    fn regex_not_imatch(left: Expr, right: Expr) -> Expr {
3526        Expr::BinaryExpr(BinaryExpr {
3527            left: Box::new(left),
3528            op: Operator::RegexNotIMatch,
3529            right: Box::new(right),
3530        })
3531    }
3532
3533    // ------------------------------
3534    // ----- Simplifier tests -------
3535    // ------------------------------
3536
3537    fn try_simplify(expr: Expr) -> Result<Expr> {
3538        let schema = expr_test_schema();
3539        let simplifier =
3540            ExprSimplifier::new(SimplifyContext::default().with_schema(schema));
3541        simplifier.simplify(expr)
3542    }
3543
3544    fn coerce(expr: Expr) -> Expr {
3545        let schema = expr_test_schema();
3546        let simplifier = ExprSimplifier::new(
3547            SimplifyContext::default().with_schema(Arc::clone(&schema)),
3548        );
3549        simplifier.coerce(expr, schema.as_ref()).unwrap()
3550    }
3551
3552    fn simplify(expr: Expr) -> Expr {
3553        try_simplify(expr).unwrap()
3554    }
3555
3556    fn try_simplify_with_cycle_count(expr: Expr) -> Result<(Expr, u32)> {
3557        let schema = expr_test_schema();
3558        let simplifier =
3559            ExprSimplifier::new(SimplifyContext::default().with_schema(schema));
3560        let (expr, count) = simplifier.simplify_with_cycle_count_transformed(expr)?;
3561        Ok((expr.data, count))
3562    }
3563
3564    fn simplify_with_cycle_count(expr: Expr) -> (Expr, u32) {
3565        try_simplify_with_cycle_count(expr).unwrap()
3566    }
3567
3568    fn simplify_with_guarantee(
3569        expr: Expr,
3570        guarantees: Vec<(Expr, NullableInterval)>,
3571    ) -> Expr {
3572        let schema = expr_test_schema();
3573        let simplifier =
3574            ExprSimplifier::new(SimplifyContext::default().with_schema(schema))
3575                .with_guarantees(guarantees);
3576        simplifier.simplify(expr).unwrap()
3577    }
3578
3579    fn expr_test_schema() -> DFSchemaRef {
3580        static EXPR_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
3581            Arc::new(
3582                DFSchema::from_unqualified_fields(
3583                    vec![
3584                        Field::new("c1", DataType::Utf8, true),
3585                        Field::new("c2", DataType::Boolean, true),
3586                        Field::new("c3", DataType::Int64, true),
3587                        Field::new("c4", DataType::UInt32, true),
3588                        Field::new("c1_non_null", DataType::Utf8, false),
3589                        Field::new("c2_non_null", DataType::Boolean, false),
3590                        Field::new("c3_non_null", DataType::Int64, false),
3591                        Field::new("c4_non_null", DataType::UInt32, false),
3592                        Field::new("c5", DataType::FixedSizeBinary(3), true),
3593                    ]
3594                    .into(),
3595                    HashMap::new(),
3596                )
3597                .unwrap(),
3598            )
3599        });
3600        Arc::clone(&EXPR_TEST_SCHEMA)
3601    }
3602
3603    #[test]
3604    fn simplify_expr_null_comparison() {
3605        // x = null is always null
3606        assert_eq!(
3607            simplify(lit(true).eq(lit(ScalarValue::Boolean(None)))),
3608            lit(ScalarValue::Boolean(None)),
3609        );
3610
3611        // null != null is always null
3612        assert_eq!(
3613            simplify(
3614                lit(ScalarValue::Boolean(None)).not_eq(lit(ScalarValue::Boolean(None)))
3615            ),
3616            lit(ScalarValue::Boolean(None)),
3617        );
3618
3619        // x != null is always null
3620        assert_eq!(
3621            simplify(col("c2").not_eq(lit(ScalarValue::Boolean(None)))),
3622            lit(ScalarValue::Boolean(None)),
3623        );
3624
3625        // null = x is always null
3626        assert_eq!(
3627            simplify(lit(ScalarValue::Boolean(None)).eq(col("c2"))),
3628            lit(ScalarValue::Boolean(None)),
3629        );
3630    }
3631
3632    #[test]
3633    fn simplify_expr_is_not_null() {
3634        assert_eq!(
3635            simplify(Expr::IsNotNull(Box::new(col("c1")))),
3636            Expr::IsNotNull(Box::new(col("c1")))
3637        );
3638
3639        // 'c1_non_null IS NOT NULL' is always true
3640        assert_eq!(
3641            simplify(Expr::IsNotNull(Box::new(col("c1_non_null")))),
3642            lit(true)
3643        );
3644    }
3645
3646    #[test]
3647    fn simplify_expr_is_null() {
3648        assert_eq!(
3649            simplify(Expr::IsNull(Box::new(col("c1")))),
3650            Expr::IsNull(Box::new(col("c1")))
3651        );
3652
3653        // 'c1_non_null IS NULL' is always false
3654        assert_eq!(
3655            simplify(Expr::IsNull(Box::new(col("c1_non_null")))),
3656            lit(false)
3657        );
3658    }
3659
3660    #[test]
3661    fn simplify_expr_is_unknown() {
3662        assert_eq!(simplify(col("c2").is_unknown()), col("c2").is_unknown(),);
3663
3664        // 'c2_non_null is unknown' is always false
3665        assert_eq!(simplify(col("c2_non_null").is_unknown()), lit(false));
3666    }
3667
3668    #[test]
3669    fn simplify_expr_is_not_known() {
3670        assert_eq!(
3671            simplify(col("c2").is_not_unknown()),
3672            col("c2").is_not_unknown()
3673        );
3674
3675        // 'c2_non_null is not unknown' is always true
3676        assert_eq!(simplify(col("c2_non_null").is_not_unknown()), lit(true));
3677    }
3678
3679    #[test]
3680    fn simplify_expr_eq() {
3681        let schema = expr_test_schema();
3682        assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3683
3684        // true = true -> true
3685        assert_eq!(simplify(lit(true).eq(lit(true))), lit(true));
3686
3687        // true = false -> false
3688        assert_eq!(simplify(lit(true).eq(lit(false))), lit(false),);
3689
3690        // c2 = true -> c2
3691        assert_eq!(simplify(col("c2").eq(lit(true))), col("c2"));
3692
3693        // c2 = false => !c2
3694        assert_eq!(simplify(col("c2").eq(lit(false))), col("c2").not(),);
3695    }
3696
3697    #[test]
3698    fn simplify_expr_eq_skip_nonboolean_type() {
3699        let schema = expr_test_schema();
3700
3701        // When one of the operand is not of boolean type, folding the
3702        // other boolean constant will change return type of
3703        // expression to non-boolean.
3704        //
3705        // Make sure c1 column to be used in tests is not boolean type
3706        assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3707
3708        // don't fold c1 = foo
3709        assert_eq!(simplify(col("c1").eq(lit("foo"))), col("c1").eq(lit("foo")),);
3710    }
3711
3712    #[test]
3713    fn simplify_expr_not_eq() {
3714        let schema = expr_test_schema();
3715
3716        assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3717
3718        // c2 != true -> !c2
3719        assert_eq!(simplify(col("c2").not_eq(lit(true))), col("c2").not(),);
3720
3721        // c2 != false -> c2
3722        assert_eq!(simplify(col("c2").not_eq(lit(false))), col("c2"),);
3723
3724        // test constant
3725        assert_eq!(simplify(lit(true).not_eq(lit(true))), lit(false),);
3726
3727        assert_eq!(simplify(lit(true).not_eq(lit(false))), lit(true),);
3728    }
3729
3730    #[test]
3731    fn simplify_expr_not_eq_skip_nonboolean_type() {
3732        let schema = expr_test_schema();
3733
3734        // when one of the operand is not of boolean type, folding the
3735        // other boolean constant will change return type of
3736        // expression to non-boolean.
3737        assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3738
3739        assert_eq!(
3740            simplify(col("c1").not_eq(lit("foo"))),
3741            col("c1").not_eq(lit("foo")),
3742        );
3743    }
3744
3745    #[test]
3746    fn simplify_literal_case_equality() {
3747        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok"
3748        let simple_case = Expr::Case(Case::new(
3749            None,
3750            vec![(
3751                Box::new(col("c2_non_null").not_eq(lit(false))),
3752                Box::new(lit("ok")),
3753            )],
3754            Some(Box::new(lit("not_ok"))),
3755        ));
3756
3757        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok" == "ok"
3758        // -->
3759        // CASE WHEN c2 != false THEN "ok" == "ok" ELSE "not_ok" == "ok"
3760        // -->
3761        // CASE WHEN c2 != false THEN true ELSE false
3762        // -->
3763        // c2
3764        assert_eq!(
3765            simplify(binary_expr(simple_case.clone(), Operator::Eq, lit("ok"),)),
3766            col("c2_non_null"),
3767        );
3768
3769        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok" != "ok"
3770        // -->
3771        // NOT(CASE WHEN c2 != false THEN "ok" == "ok" ELSE "not_ok" == "ok")
3772        // -->
3773        // NOT(CASE WHEN c2 != false THEN true ELSE false)
3774        // -->
3775        // NOT(c2)
3776        assert_eq!(
3777            simplify(binary_expr(simple_case, Operator::NotEq, lit("ok"),)),
3778            not(col("c2_non_null")),
3779        );
3780
3781        let complex_case = Expr::Case(Case::new(
3782            None,
3783            vec![
3784                (
3785                    Box::new(col("c1").eq(lit("inboxed"))),
3786                    Box::new(lit("pending")),
3787                ),
3788                (
3789                    Box::new(col("c1").eq(lit("scheduled"))),
3790                    Box::new(lit("pending")),
3791                ),
3792                (
3793                    Box::new(col("c1").eq(lit("completed"))),
3794                    Box::new(lit("completed")),
3795                ),
3796                (
3797                    Box::new(col("c1").eq(lit("paused"))),
3798                    Box::new(lit("paused")),
3799                ),
3800                (Box::new(col("c2")), Box::new(lit("running"))),
3801                (
3802                    Box::new(col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0)))),
3803                    Box::new(lit("backing-off")),
3804                ),
3805            ],
3806            Some(Box::new(lit("ready"))),
3807        ));
3808
3809        assert_eq!(
3810            simplify(binary_expr(
3811                complex_case.clone(),
3812                Operator::Eq,
3813                lit("completed"),
3814            )),
3815            not_distinct_from(col("c1").eq(lit("completed")), lit(true)).and(
3816                distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3817                    .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3818            )
3819        );
3820
3821        assert_eq!(
3822            simplify(binary_expr(
3823                complex_case.clone(),
3824                Operator::NotEq,
3825                lit("completed"),
3826            )),
3827            distinct_from(col("c1").eq(lit("completed")), lit(true))
3828                .or(not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3829                    .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true))))
3830        );
3831
3832        assert_eq!(
3833            simplify(binary_expr(
3834                complex_case.clone(),
3835                Operator::Eq,
3836                lit("running"),
3837            )),
3838            not_distinct_from(col("c2"), lit(true)).and(
3839                distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3840                    .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3841                    .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3842                    .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3843            )
3844        );
3845
3846        assert_eq!(
3847            simplify(binary_expr(
3848                complex_case.clone(),
3849                Operator::Eq,
3850                lit("ready"),
3851            )),
3852            distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3853                .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3854                .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3855                .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3856                .and(distinct_from(col("c2"), lit(true)))
3857                .and(distinct_from(
3858                    col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3859                    lit(true)
3860                ))
3861        );
3862
3863        assert_eq!(
3864            simplify(binary_expr(
3865                complex_case.clone(),
3866                Operator::NotEq,
3867                lit("ready"),
3868            )),
3869            not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3870                .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3871                .or(not_distinct_from(col("c1").eq(lit("completed")), lit(true)))
3872                .or(not_distinct_from(col("c1").eq(lit("paused")), lit(true)))
3873                .or(not_distinct_from(col("c2"), lit(true)))
3874                .or(not_distinct_from(
3875                    col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3876                    lit(true)
3877                ))
3878        );
3879    }
3880
3881    #[test]
3882    fn simplify_expr_case_when_then_else() {
3883        // CASE WHEN c2 != false THEN "ok" == "not_ok" ELSE c2 == true
3884        // -->
3885        // CASE WHEN c2 THEN false ELSE c2
3886        // -->
3887        // false
3888        assert_eq!(
3889            simplify(Expr::Case(Case::new(
3890                None,
3891                vec![(
3892                    Box::new(col("c2_non_null").not_eq(lit(false))),
3893                    Box::new(lit("ok").eq(lit("not_ok"))),
3894                )],
3895                Some(Box::new(col("c2_non_null").eq(lit(true)))),
3896            ))),
3897            lit(false) // #1716
3898        );
3899
3900        // CASE WHEN c2 != false THEN "ok" == "ok" ELSE c2
3901        // -->
3902        // CASE WHEN c2 THEN true ELSE c2
3903        // -->
3904        // c2
3905        //
3906        // Need to call simplify 2x due to
3907        // https://github.com/apache/datafusion/issues/1160
3908        assert_eq!(
3909            simplify(simplify(Expr::Case(Case::new(
3910                None,
3911                vec![(
3912                    Box::new(col("c2_non_null").not_eq(lit(false))),
3913                    Box::new(lit("ok").eq(lit("ok"))),
3914                )],
3915                Some(Box::new(col("c2_non_null").eq(lit(true)))),
3916            )))),
3917            col("c2_non_null")
3918        );
3919
3920        // CASE WHEN ISNULL(c2) THEN true ELSE c2
3921        // -->
3922        // ISNULL(c2) OR c2
3923        //
3924        // Need to call simplify 2x due to
3925        // https://github.com/apache/datafusion/issues/1160
3926        assert_eq!(
3927            simplify(simplify(Expr::Case(Case::new(
3928                None,
3929                vec![(Box::new(col("c2").is_null()), Box::new(lit(true)),)],
3930                Some(Box::new(col("c2"))),
3931            )))),
3932            col("c2")
3933                .is_null()
3934                .or(col("c2").is_not_null().and(col("c2")))
3935        );
3936
3937        // CASE WHEN c1 then true WHEN c2 then false ELSE true
3938        // --> c1 OR (NOT(c1) AND c2 AND FALSE) OR (NOT(c1 OR c2) AND TRUE)
3939        // --> c1 OR (NOT(c1) AND NOT(c2))
3940        // --> c1 OR NOT(c2)
3941        //
3942        // Need to call simplify 2x due to
3943        // https://github.com/apache/datafusion/issues/1160
3944        assert_eq!(
3945            simplify(simplify(Expr::Case(Case::new(
3946                None,
3947                vec![
3948                    (Box::new(col("c1_non_null")), Box::new(lit(true)),),
3949                    (Box::new(col("c2_non_null")), Box::new(lit(false)),),
3950                ],
3951                Some(Box::new(lit(true))),
3952            )))),
3953            col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
3954        );
3955
3956        // CASE WHEN c1 then true WHEN c2 then true ELSE false
3957        // --> c1 OR (NOT(c1) AND c2 AND TRUE) OR (NOT(c1 OR c2) AND FALSE)
3958        // --> c1 OR (NOT(c1) AND c2)
3959        // --> c1 OR c2
3960        //
3961        // Need to call simplify 2x due to
3962        // https://github.com/apache/datafusion/issues/1160
3963        assert_eq!(
3964            simplify(simplify(Expr::Case(Case::new(
3965                None,
3966                vec![
3967                    (Box::new(col("c1_non_null")), Box::new(lit(true)),),
3968                    (Box::new(col("c2_non_null")), Box::new(lit(false)),),
3969                ],
3970                Some(Box::new(lit(true))),
3971            )))),
3972            col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
3973        );
3974
3975        // CASE WHEN c > 0 THEN true END AS c1
3976        assert_eq!(
3977            simplify(simplify(Expr::Case(Case::new(
3978                None,
3979                vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
3980                None,
3981            )))),
3982            not_distinct_from(col("c3").gt(lit(0_i64)), lit(true)).or(distinct_from(
3983                col("c3").gt(lit(0_i64)),
3984                lit(true)
3985            )
3986            .and(lit_bool_null()))
3987        );
3988
3989        // CASE WHEN c > 0 THEN true ELSE false END AS c1
3990        assert_eq!(
3991            simplify(simplify(Expr::Case(Case::new(
3992                None,
3993                vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
3994                Some(Box::new(lit(false))),
3995            )))),
3996            not_distinct_from(col("c3").gt(lit(0_i64)), lit(true))
3997        );
3998    }
3999
4000    #[test]
4001    fn simplify_expr_case_when_first_true() {
4002        // CASE WHEN true THEN 1 ELSE c1 END --> 1
4003        assert_eq!(
4004            simplify(Expr::Case(Case::new(
4005                None,
4006                vec![(Box::new(lit(true)), Box::new(lit(1)),)],
4007                Some(Box::new(col("c1"))),
4008            ))),
4009            lit(1)
4010        );
4011
4012        // CASE WHEN true THEN col('a') ELSE col('b') END --> col('a')
4013        assert_eq!(
4014            simplify(Expr::Case(Case::new(
4015                None,
4016                vec![(Box::new(lit(true)), Box::new(lit("a")),)],
4017                Some(Box::new(lit("b"))),
4018            ))),
4019            lit("a")
4020        );
4021
4022        // CASE WHEN true THEN col('a') WHEN col('x') > 5 THEN col('b') ELSE col('c') END --> col('a')
4023        assert_eq!(
4024            simplify(Expr::Case(Case::new(
4025                None,
4026                vec![
4027                    (Box::new(lit(true)), Box::new(lit("a"))),
4028                    (Box::new(lit("x").gt(lit(5))), Box::new(lit("b"))),
4029                ],
4030                Some(Box::new(lit("c"))),
4031            ))),
4032            lit("a")
4033        );
4034
4035        // CASE WHEN true THEN col('a') END --> col('a') (no else clause)
4036        assert_eq!(
4037            simplify(Expr::Case(Case::new(
4038                None,
4039                vec![(Box::new(lit(true)), Box::new(lit("a")),)],
4040                None,
4041            ))),
4042            lit("a")
4043        );
4044
4045        // Negative test: CASE WHEN c2 THEN 1 ELSE 2 END should not be simplified
4046        let expr = Expr::Case(Case::new(
4047            None,
4048            vec![(Box::new(col("c2")), Box::new(lit(1)))],
4049            Some(Box::new(lit(2))),
4050        ));
4051        assert_eq!(simplify(expr.clone()), expr);
4052
4053        // Negative test: CASE WHEN false THEN 1 ELSE 2 END should not use this rule
4054        let expr = Expr::Case(Case::new(
4055            None,
4056            vec![(Box::new(lit(false)), Box::new(lit(1)))],
4057            Some(Box::new(lit(2))),
4058        ));
4059        assert_ne!(simplify(expr), lit(1));
4060
4061        // Negative test: CASE WHEN col('c1') > 5 THEN 1 ELSE 2 END should not be simplified
4062        let expr = Expr::Case(Case::new(
4063            None,
4064            vec![(Box::new(col("c1").gt(lit(5))), Box::new(lit(1)))],
4065            Some(Box::new(lit(2))),
4066        ));
4067        assert_eq!(simplify(expr.clone()), expr);
4068    }
4069
4070    #[test]
4071    fn simplify_expr_case_when_any_true() {
4072        // CASE WHEN c3 > 0 THEN 'a' WHEN true THEN 'b' ELSE 'c' END --> CASE WHEN c3 > 0 THEN 'a' ELSE 'b' END
4073        assert_eq!(
4074            simplify(Expr::Case(Case::new(
4075                None,
4076                vec![
4077                    (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4078                    (Box::new(lit(true)), Box::new(lit("b"))),
4079                ],
4080                Some(Box::new(lit("c"))),
4081            ))),
4082            Expr::Case(Case::new(
4083                None,
4084                vec![(Box::new(col("c3").gt(lit(0))), Box::new(lit("a")))],
4085                Some(Box::new(lit("b"))),
4086            ))
4087        );
4088
4089        // CASE WHEN c3 > 0 THEN 'a' WHEN c4 < 0 THEN 'b' WHEN true THEN 'c' WHEN c3 = 0 THEN 'd' ELSE 'e' END
4090        // --> CASE WHEN c3 > 0 THEN 'a' WHEN c4 < 0 THEN 'b' ELSE 'c' END
4091        assert_eq!(
4092            simplify(Expr::Case(Case::new(
4093                None,
4094                vec![
4095                    (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4096                    (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4097                    (Box::new(lit(true)), Box::new(lit("c"))),
4098                    (Box::new(col("c3").eq(lit(0))), Box::new(lit("d"))),
4099                ],
4100                Some(Box::new(lit("e"))),
4101            ))),
4102            Expr::Case(Case::new(
4103                None,
4104                vec![
4105                    (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4106                    (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4107                ],
4108                Some(Box::new(lit("c"))),
4109            ))
4110        );
4111
4112        // CASE WHEN c3 > 0 THEN 1 WHEN c4 < 0 THEN 2 WHEN true THEN 3 END (no else)
4113        // --> CASE WHEN c3 > 0 THEN 1 WHEN c4 < 0 THEN 2 ELSE 3 END
4114        assert_eq!(
4115            simplify(Expr::Case(Case::new(
4116                None,
4117                vec![
4118                    (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4119                    (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4120                    (Box::new(lit(true)), Box::new(lit(3))),
4121                ],
4122                None,
4123            ))),
4124            Expr::Case(Case::new(
4125                None,
4126                vec![
4127                    (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4128                    (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4129                ],
4130                Some(Box::new(lit(3))),
4131            ))
4132        );
4133
4134        // Negative test: CASE WHEN c3 > 0 THEN c3 WHEN c4 < 0 THEN 2 ELSE 3 END should not be simplified
4135        let expr = Expr::Case(Case::new(
4136            None,
4137            vec![
4138                (Box::new(col("c3").gt(lit(0))), Box::new(col("c3"))),
4139                (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4140            ],
4141            Some(Box::new(lit(3))),
4142        ));
4143        assert_eq!(simplify(expr.clone()), expr);
4144    }
4145
4146    #[test]
4147    fn simplify_expr_case_when_any_false() {
4148        // CASE WHEN false THEN 'a' END --> NULL
4149        assert_eq!(
4150            simplify(Expr::Case(Case::new(
4151                None,
4152                vec![(Box::new(lit(false)), Box::new(lit("a")))],
4153                None,
4154            ))),
4155            Expr::Literal(ScalarValue::Utf8(None), None)
4156        );
4157
4158        // CASE WHEN false THEN 2 ELSE 1 END --> 1
4159        assert_eq!(
4160            simplify(Expr::Case(Case::new(
4161                None,
4162                vec![(Box::new(lit(false)), Box::new(lit(2)))],
4163                Some(Box::new(lit(1))),
4164            ))),
4165            lit(1),
4166        );
4167
4168        // CASE WHEN c3 < 10 THEN 'b' WHEN false then c3 ELSE c4 END --> CASE WHEN c3 < 10 THEN b ELSE c4 END
4169        assert_eq!(
4170            simplify(Expr::Case(Case::new(
4171                None,
4172                vec![
4173                    (Box::new(col("c3").lt(lit(10))), Box::new(lit("b"))),
4174                    (Box::new(lit(false)), Box::new(col("c3"))),
4175                ],
4176                Some(Box::new(col("c4"))),
4177            ))),
4178            Expr::Case(Case::new(
4179                None,
4180                vec![(Box::new(col("c3").lt(lit(10))), Box::new(lit("b")))],
4181                Some(Box::new(col("c4"))),
4182            ))
4183        );
4184
4185        // Negative test: CASE WHEN c3 = 4 THEN 1 ELSE 2 END should not be simplified
4186        let expr = Expr::Case(Case::new(
4187            None,
4188            vec![(Box::new(col("c3").eq(lit(4))), Box::new(lit(1)))],
4189            Some(Box::new(lit(2))),
4190        ));
4191        assert_eq!(simplify(expr.clone()), expr);
4192    }
4193
4194    fn distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4195        Expr::BinaryExpr(BinaryExpr {
4196            left: Box::new(left.into()),
4197            op: Operator::IsDistinctFrom,
4198            right: Box::new(right.into()),
4199        })
4200    }
4201
4202    fn not_distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4203        Expr::BinaryExpr(BinaryExpr {
4204            left: Box::new(left.into()),
4205            op: Operator::IsNotDistinctFrom,
4206            right: Box::new(right.into()),
4207        })
4208    }
4209
4210    #[test]
4211    fn simplify_expr_bool_or() {
4212        // col || true is always true
4213        assert_eq!(simplify(col("c2").or(lit(true))), lit(true),);
4214
4215        // col || false is always col
4216        assert_eq!(simplify(col("c2").or(lit(false))), col("c2"),);
4217
4218        // true || null is always true
4219        assert_eq!(simplify(lit(true).or(lit_bool_null())), lit(true),);
4220
4221        // null || true is always true
4222        assert_eq!(simplify(lit_bool_null().or(lit(true))), lit(true),);
4223
4224        // false || null is always null
4225        assert_eq!(simplify(lit(false).or(lit_bool_null())), lit_bool_null(),);
4226
4227        // null || false is always null
4228        assert_eq!(simplify(lit_bool_null().or(lit(false))), lit_bool_null(),);
4229
4230        // ( c1 BETWEEN Int32(0) AND Int32(10) ) OR Boolean(NULL)
4231        // it can be either NULL or  TRUE depending on the value of `c1 BETWEEN Int32(0) AND Int32(10)`
4232        // and should not be rewritten
4233        let expr = col("c1").between(lit(0), lit(10));
4234        let expr = expr.or(lit_bool_null());
4235        let result = simplify(expr);
4236
4237        let expected_expr = or(
4238            and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4239            lit_bool_null(),
4240        );
4241        assert_eq!(expected_expr, result);
4242    }
4243
4244    #[test]
4245    fn simplify_inlist() {
4246        assert_eq!(simplify(in_list(col("c1"), vec![], false)), lit(false));
4247        assert_eq!(simplify(in_list(col("c1"), vec![], true)), lit(true));
4248
4249        // null in (...)  --> null
4250        assert_eq!(
4251            simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], false)),
4252            lit_bool_null()
4253        );
4254
4255        // null not in (...)  --> null
4256        assert_eq!(
4257            simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], true)),
4258            lit_bool_null()
4259        );
4260
4261        assert_eq!(
4262            simplify(in_list(col("c1"), vec![lit(1)], false)),
4263            col("c1").eq(lit(1))
4264        );
4265        assert_eq!(
4266            simplify(in_list(col("c1"), vec![lit(1)], true)),
4267            col("c1").not_eq(lit(1))
4268        );
4269
4270        // more complex expressions can be simplified if list contains
4271        // one element only
4272        assert_eq!(
4273            simplify(in_list(col("c1") * lit(10), vec![lit(2)], false)),
4274            (col("c1") * lit(10)).eq(lit(2))
4275        );
4276
4277        assert_eq!(
4278            simplify(in_list(col("c1"), vec![lit(1), lit(2)], false)),
4279            col("c1").eq(lit(1)).or(col("c1").eq(lit(2)))
4280        );
4281        assert_eq!(
4282            simplify(in_list(col("c1"), vec![lit(1), lit(2)], true)),
4283            col("c1").not_eq(lit(1)).and(col("c1").not_eq(lit(2)))
4284        );
4285
4286        let subquery = Arc::new(test_table_scan_with_name("test").unwrap());
4287        assert_eq!(
4288            simplify(in_list(
4289                col("c1"),
4290                vec![scalar_subquery(Arc::clone(&subquery))],
4291                false
4292            )),
4293            in_subquery(col("c1"), Arc::clone(&subquery))
4294        );
4295        assert_eq!(
4296            simplify(in_list(
4297                col("c1"),
4298                vec![scalar_subquery(Arc::clone(&subquery))],
4299                true
4300            )),
4301            not_in_subquery(col("c1"), subquery)
4302        );
4303
4304        let subquery1 =
4305            scalar_subquery(Arc::new(test_table_scan_with_name("test1").unwrap()));
4306        let subquery2 =
4307            scalar_subquery(Arc::new(test_table_scan_with_name("test2").unwrap()));
4308
4309        // c1 NOT IN (<subquery1>, <subquery2>) -> c1 != <subquery1> AND c1 != <subquery2>
4310        assert_eq!(
4311            simplify(in_list(
4312                col("c1"),
4313                vec![subquery1.clone(), subquery2.clone()],
4314                true
4315            )),
4316            col("c1")
4317                .not_eq(subquery1.clone())
4318                .and(col("c1").not_eq(subquery2.clone()))
4319        );
4320
4321        // c1 IN (<subquery1>, <subquery2>) -> c1 == <subquery1> OR c1 == <subquery2>
4322        assert_eq!(
4323            simplify(in_list(
4324                col("c1"),
4325                vec![subquery1.clone(), subquery2.clone()],
4326                false
4327            )),
4328            col("c1").eq(subquery1).or(col("c1").eq(subquery2))
4329        );
4330
4331        // 1. c1 IN (1,2,3,4) AND c1 IN (5,6,7,8) -> false
4332        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4333            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false),
4334        );
4335        assert_eq!(simplify(expr), lit(false));
4336
4337        // 2. c1 IN (1,2,3,4) AND c1 IN (4,5,6,7) -> c1 = 4
4338        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4339            in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], false),
4340        );
4341        assert_eq!(simplify(expr), col("c1").eq(lit(4)));
4342
4343        // 3. c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (5, 6, 7, 8) -> true
4344        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4345            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4346        );
4347        assert_eq!(simplify(expr), lit(true));
4348
4349        // 3.5 c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (4, 5, 6, 7) -> c1 != 4 (4 overlaps)
4350        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4351            in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4352        );
4353        assert_eq!(simplify(expr), col("c1").not_eq(lit(4)));
4354
4355        // 4. c1 NOT IN (1,2,3,4) AND c1 NOT IN (4,5,6,7) -> c1 NOT IN (1,2,3,4,5,6,7)
4356        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4357            in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4358        );
4359        assert_eq!(
4360            simplify(expr),
4361            in_list(
4362                col("c1"),
4363                vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6), lit(7)],
4364                true
4365            )
4366        );
4367
4368        // 5. c1 IN (1,2,3,4) OR c1 IN (2,3,4,5) -> c1 IN (1,2,3,4,5)
4369        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).or(
4370            in_list(col("c1"), vec![lit(2), lit(3), lit(4), lit(5)], false),
4371        );
4372        assert_eq!(
4373            simplify(expr),
4374            in_list(
4375                col("c1"),
4376                vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4377                false
4378            )
4379        );
4380
4381        // 6. c1 IN (1,2,3) AND c1 NOT INT (1,2,3,4,5) -> false
4382        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3)], false).and(in_list(
4383            col("c1"),
4384            vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4385            true,
4386        ));
4387        assert_eq!(simplify(expr), lit(false));
4388
4389        // 7. c1 NOT IN (1,2,3,4) AND c1 IN (1,2,3,4,5) -> c1 = 5
4390        let expr =
4391            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(in_list(
4392                col("c1"),
4393                vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4394                false,
4395            ));
4396        assert_eq!(simplify(expr), col("c1").eq(lit(5)));
4397
4398        // 8. c1 IN (1,2,3,4) AND c1 NOT IN (5,6,7,8) -> c1 IN (1,2,3,4)
4399        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4400            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4401        );
4402        assert_eq!(
4403            simplify(expr),
4404            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false)
4405        );
4406
4407        // inlist with more than two expressions
4408        // c1 IN (1,2,3,4,5,6) AND c1 IN (1,3,5,6) AND c1 IN (3,6) -> c1 = 3 OR c1 = 6
4409        let expr = in_list(
4410            col("c1"),
4411            vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6)],
4412            false,
4413        )
4414        .and(in_list(
4415            col("c1"),
4416            vec![lit(1), lit(3), lit(5), lit(6)],
4417            false,
4418        ))
4419        .and(in_list(col("c1"), vec![lit(3), lit(6)], false));
4420        assert_eq!(
4421            simplify(expr),
4422            col("c1").eq(lit(3)).or(col("c1").eq(lit(6)))
4423        );
4424
4425        // c1 NOT IN (1,2,3,4) AND c1 IN (5,6,7,8) AND c1 NOT IN (3,4,5,6) AND c1 IN (8,9,10) -> c1 = 8
4426        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4427            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false)
4428                .and(in_list(
4429                    col("c1"),
4430                    vec![lit(3), lit(4), lit(5), lit(6)],
4431                    true,
4432                ))
4433                .and(in_list(col("c1"), vec![lit(8), lit(9), lit(10)], false)),
4434        );
4435        assert_eq!(simplify(expr), col("c1").eq(lit(8)));
4436
4437        // Contains non-InList expression
4438        // c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9) -> c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9)
4439        let expr =
4440            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(col("c1")
4441                .not_eq(lit(5))
4442                .or(in_list(
4443                    col("c1"),
4444                    vec![lit(6), lit(7), lit(8), lit(9)],
4445                    true,
4446                )));
4447        // TODO: Further simplify this expression
4448        // https://github.com/apache/datafusion/issues/8970
4449        // assert_eq!(simplify(expr.clone()), lit(true));
4450        assert_eq!(simplify(expr.clone()), expr);
4451    }
4452
4453    #[test]
4454    fn simplify_null_in_empty_inlist() {
4455        // `NULL::boolean IN ()` == `NULL::boolean IN (SELECT foo FROM empty)` == false
4456        let expr = in_list(lit_bool_null(), vec![], false);
4457        assert_eq!(simplify(expr), lit(false));
4458
4459        // `NULL::boolean NOT IN ()` == `NULL::boolean NOT IN (SELECT foo FROM empty)` == true
4460        let expr = in_list(lit_bool_null(), vec![], true);
4461        assert_eq!(simplify(expr), lit(true));
4462
4463        // `NULL IN ()` == `NULL IN (SELECT foo FROM empty)` == false
4464        let null_null = || Expr::Literal(ScalarValue::Null, None);
4465        let expr = in_list(null_null(), vec![], false);
4466        assert_eq!(simplify(expr), lit(false));
4467
4468        // `NULL NOT IN ()` == `NULL NOT IN (SELECT foo FROM empty)` == true
4469        let expr = in_list(null_null(), vec![], true);
4470        assert_eq!(simplify(expr), lit(true));
4471    }
4472
4473    #[test]
4474    fn just_simplifier_simplify_null_in_empty_inlist() {
4475        let simplify = |expr: Expr| -> Expr {
4476            let schema = expr_test_schema();
4477            let info = SimplifyContext::default().with_schema(schema);
4478            let simplifier = &mut Simplifier::new(&info);
4479            expr.rewrite(simplifier)
4480                .expect("Failed to simplify expression")
4481                .data
4482        };
4483
4484        // `NULL::boolean IN ()` == `NULL::boolean IN (SELECT foo FROM empty)` == false
4485        let expr = in_list(lit_bool_null(), vec![], false);
4486        assert_eq!(simplify(expr), lit(false));
4487
4488        // `NULL::boolean NOT IN ()` == `NULL::boolean NOT IN (SELECT foo FROM empty)` == true
4489        let expr = in_list(lit_bool_null(), vec![], true);
4490        assert_eq!(simplify(expr), lit(true));
4491
4492        // `NULL IN ()` == `NULL IN (SELECT foo FROM empty)` == false
4493        let null_null = || Expr::Literal(ScalarValue::Null, None);
4494        let expr = in_list(null_null(), vec![], false);
4495        assert_eq!(simplify(expr), lit(false));
4496
4497        // `NULL NOT IN ()` == `NULL NOT IN (SELECT foo FROM empty)` == true
4498        let expr = in_list(null_null(), vec![], true);
4499        assert_eq!(simplify(expr), lit(true));
4500    }
4501
4502    #[test]
4503    fn simplify_large_or() {
4504        let expr = (0..5)
4505            .map(|i| col("c1").eq(lit(i)))
4506            .fold(lit(false), |acc, e| acc.or(e));
4507        assert_eq!(
4508            simplify(expr),
4509            in_list(col("c1"), (0..5).map(lit).collect(), false),
4510        );
4511    }
4512
4513    #[test]
4514    fn simplify_expr_bool_and() {
4515        // col & true is always col
4516        assert_eq!(simplify(col("c2").and(lit(true))), col("c2"),);
4517        // col & false is always false
4518        assert_eq!(simplify(col("c2").and(lit(false))), lit(false),);
4519
4520        // true && null is always null
4521        assert_eq!(simplify(lit(true).and(lit_bool_null())), lit_bool_null(),);
4522
4523        // null && true is always null
4524        assert_eq!(simplify(lit_bool_null().and(lit(true))), lit_bool_null(),);
4525
4526        // false && null is always false
4527        assert_eq!(simplify(lit(false).and(lit_bool_null())), lit(false),);
4528
4529        // null && false is always false
4530        assert_eq!(simplify(lit_bool_null().and(lit(false))), lit(false),);
4531
4532        // c1 BETWEEN Int32(0) AND Int32(10) AND Boolean(NULL)
4533        // it can be either NULL or FALSE depending on the value of `c1 BETWEEN Int32(0) AND Int32(10)`
4534        // and the Boolean(NULL) should remain
4535        let expr = col("c1").between(lit(0), lit(10));
4536        let expr = expr.and(lit_bool_null());
4537        let result = simplify(expr);
4538
4539        let expected_expr = and(
4540            and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4541            lit_bool_null(),
4542        );
4543        assert_eq!(expected_expr, result);
4544    }
4545
4546    #[test]
4547    fn simplify_expr_between() {
4548        // c2 between 3 and 4 is c2 >= 3 and c2 <= 4
4549        let expr = col("c2").between(lit(3), lit(4));
4550        assert_eq!(
4551            simplify(expr),
4552            and(col("c2").gt_eq(lit(3)), col("c2").lt_eq(lit(4)))
4553        );
4554
4555        // c2 not between 3 and 4 is c2 < 3 or c2 > 4
4556        let expr = col("c2").not_between(lit(3), lit(4));
4557        assert_eq!(
4558            simplify(expr),
4559            or(col("c2").lt(lit(3)), col("c2").gt(lit(4)))
4560        );
4561    }
4562
4563    #[test]
4564    fn test_like_and_ilike() {
4565        let null = lit(ScalarValue::Utf8(None));
4566
4567        // expr [NOT] [I]LIKE NULL
4568        let expr = col("c1").like(null.clone());
4569        assert_eq!(simplify(expr), lit_bool_null());
4570
4571        let expr = col("c1").not_like(null.clone());
4572        assert_eq!(simplify(expr), lit_bool_null());
4573
4574        let expr = col("c1").ilike(null.clone());
4575        assert_eq!(simplify(expr), lit_bool_null());
4576
4577        let expr = col("c1").not_ilike(null.clone());
4578        assert_eq!(simplify(expr), lit_bool_null());
4579
4580        // expr [NOT] [I]LIKE '%'
4581        let expr = col("c1").like(lit("%"));
4582        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4583
4584        let expr = col("c1").not_like(lit("%"));
4585        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4586
4587        let expr = col("c1").ilike(lit("%"));
4588        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4589
4590        let expr = col("c1").not_ilike(lit("%"));
4591        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4592
4593        // expr [NOT] [I]LIKE '%%'
4594        let expr = col("c1").like(lit("%%"));
4595        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4596
4597        let expr = col("c1").not_like(lit("%%"));
4598        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4599
4600        let expr = col("c1").ilike(lit("%%"));
4601        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4602
4603        let expr = col("c1").not_ilike(lit("%%"));
4604        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4605
4606        // not_null_expr [NOT] [I]LIKE '%'
4607        let expr = col("c1_non_null").like(lit("%"));
4608        assert_eq!(simplify(expr), lit(true));
4609
4610        let expr = col("c1_non_null").not_like(lit("%"));
4611        assert_eq!(simplify(expr), lit(false));
4612
4613        let expr = col("c1_non_null").ilike(lit("%"));
4614        assert_eq!(simplify(expr), lit(true));
4615
4616        let expr = col("c1_non_null").not_ilike(lit("%"));
4617        assert_eq!(simplify(expr), lit(false));
4618
4619        // not_null_expr [NOT] [I]LIKE '%%'
4620        let expr = col("c1_non_null").like(lit("%%"));
4621        assert_eq!(simplify(expr), lit(true));
4622
4623        let expr = col("c1_non_null").not_like(lit("%%"));
4624        assert_eq!(simplify(expr), lit(false));
4625
4626        let expr = col("c1_non_null").ilike(lit("%%"));
4627        assert_eq!(simplify(expr), lit(true));
4628
4629        let expr = col("c1_non_null").not_ilike(lit("%%"));
4630        assert_eq!(simplify(expr), lit(false));
4631
4632        // null_constant [NOT] [I]LIKE '%'
4633        let expr = null.clone().like(lit("%"));
4634        assert_eq!(simplify(expr), lit_bool_null());
4635
4636        let expr = null.clone().not_like(lit("%"));
4637        assert_eq!(simplify(expr), lit_bool_null());
4638
4639        let expr = null.clone().ilike(lit("%"));
4640        assert_eq!(simplify(expr), lit_bool_null());
4641
4642        let expr = null.clone().not_ilike(lit("%"));
4643        assert_eq!(simplify(expr), lit_bool_null());
4644
4645        // null_constant [NOT] [I]LIKE '%%'
4646        let expr = null.clone().like(lit("%%"));
4647        assert_eq!(simplify(expr), lit_bool_null());
4648
4649        let expr = null.clone().not_like(lit("%%"));
4650        assert_eq!(simplify(expr), lit_bool_null());
4651
4652        let expr = null.clone().ilike(lit("%%"));
4653        assert_eq!(simplify(expr), lit_bool_null());
4654
4655        let expr = null.clone().not_ilike(lit("%%"));
4656        assert_eq!(simplify(expr), lit_bool_null());
4657
4658        // null_constant [NOT] [I]LIKE 'a%'
4659        let expr = null.clone().like(lit("a%"));
4660        assert_eq!(simplify(expr), lit_bool_null());
4661
4662        let expr = null.clone().not_like(lit("a%"));
4663        assert_eq!(simplify(expr), lit_bool_null());
4664
4665        let expr = null.clone().ilike(lit("a%"));
4666        assert_eq!(simplify(expr), lit_bool_null());
4667
4668        let expr = null.clone().not_ilike(lit("a%"));
4669        assert_eq!(simplify(expr), lit_bool_null());
4670
4671        // expr [NOT] [I]LIKE with pattern without wildcards
4672        let expr = col("c1").like(lit("a"));
4673        assert_eq!(simplify(expr), col("c1").eq(lit("a")));
4674        let expr = col("c1").not_like(lit("a"));
4675        assert_eq!(simplify(expr), col("c1").not_eq(lit("a")));
4676        let expr = col("c1").like(lit("a_"));
4677        assert_eq!(simplify(expr), col("c1").like(lit("a_")));
4678        let expr = col("c1").not_like(lit("a_"));
4679        assert_eq!(simplify(expr), col("c1").not_like(lit("a_")));
4680
4681        let expr = col("c1").ilike(lit("a"));
4682        assert_eq!(simplify(expr), col("c1").ilike(lit("a")));
4683        let expr = col("c1").not_ilike(lit("a"));
4684        assert_eq!(simplify(expr), col("c1").not_ilike(lit("a")));
4685    }
4686
4687    #[test]
4688    fn test_simplify_with_guarantee() {
4689        // (c3 >= 3) AND (c4 + 2 < 10 OR (c1 NOT IN ("a", "b")))
4690        let expr_x = col("c3").gt(lit(3_i64));
4691        let expr_y = (col("c4") + lit(2_u32)).lt(lit(10_u32));
4692        let expr_z = col("c1").in_list(vec![lit("a"), lit("b")], true);
4693        let expr = expr_x.clone().and(expr_y.or(expr_z));
4694
4695        // All guaranteed null
4696        let guarantees = vec![
4697            (col("c3"), NullableInterval::from(ScalarValue::Int64(None))),
4698            (col("c4"), NullableInterval::from(ScalarValue::UInt32(None))),
4699            (col("c1"), NullableInterval::from(ScalarValue::Utf8(None))),
4700        ];
4701
4702        let output = simplify_with_guarantee(expr.clone(), guarantees);
4703        assert_eq!(output, lit_bool_null());
4704
4705        // All guaranteed false
4706        let guarantees = vec![
4707            (
4708                col("c3"),
4709                NullableInterval::NotNull {
4710                    values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4711                },
4712            ),
4713            (
4714                col("c4"),
4715                NullableInterval::from(ScalarValue::UInt32(Some(9))),
4716            ),
4717            (col("c1"), NullableInterval::from(ScalarValue::from("a"))),
4718        ];
4719        let output = simplify_with_guarantee(expr.clone(), guarantees);
4720        assert_eq!(output, lit(false));
4721
4722        // Guaranteed false or null -> no change.
4723        let guarantees = vec![
4724            (
4725                col("c3"),
4726                NullableInterval::MaybeNull {
4727                    values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4728                },
4729            ),
4730            (
4731                col("c4"),
4732                NullableInterval::MaybeNull {
4733                    values: Interval::make(Some(9_u32), Some(9_u32)).unwrap(),
4734                },
4735            ),
4736            (
4737                col("c1"),
4738                NullableInterval::NotNull {
4739                    values: Interval::try_new(
4740                        ScalarValue::from("d"),
4741                        ScalarValue::from("f"),
4742                    )
4743                    .unwrap(),
4744                },
4745            ),
4746        ];
4747        let output = simplify_with_guarantee(expr.clone(), guarantees);
4748        assert_eq!(&output, &expr_x);
4749
4750        // Sufficient true guarantees
4751        let guarantees = vec![
4752            (
4753                col("c3"),
4754                NullableInterval::from(ScalarValue::Int64(Some(9))),
4755            ),
4756            (
4757                col("c4"),
4758                NullableInterval::from(ScalarValue::UInt32(Some(3))),
4759            ),
4760        ];
4761        let output = simplify_with_guarantee(expr.clone(), guarantees);
4762        assert_eq!(output, lit(true));
4763
4764        // Only partially simplify
4765        let guarantees = vec![(
4766            col("c4"),
4767            NullableInterval::from(ScalarValue::UInt32(Some(3))),
4768        )];
4769        let output = simplify_with_guarantee(expr, guarantees);
4770        assert_eq!(&output, &expr_x);
4771    }
4772
4773    #[test]
4774    fn test_expression_partial_simplify_1() {
4775        // (1 + 2) + (4 / 0) -> 3 + (4 / 0)
4776        let expr = (lit(1) + lit(2)) + (lit(4) / lit(0));
4777        let expected = (lit(3)) + (lit(4) / lit(0));
4778
4779        assert_eq!(simplify(expr), expected);
4780    }
4781
4782    #[test]
4783    fn test_expression_partial_simplify_2() {
4784        // (1 > 2) and (4 / 0) -> false
4785        let expr = (lit(1).gt(lit(2))).and(lit(4) / lit(0));
4786        let expected = lit(false);
4787
4788        assert_eq!(simplify(expr), expected);
4789    }
4790
4791    #[test]
4792    fn test_simplify_cycles() {
4793        // TRUE
4794        let expr = lit(true);
4795        let expected = lit(true);
4796        let (expr, num_iter) = simplify_with_cycle_count(expr);
4797        assert_eq!(expr, expected);
4798        assert_eq!(num_iter, 1);
4799
4800        // (true != NULL) OR (5 > 10)
4801        let expr = lit(true).not_eq(lit_bool_null()).or(lit(5).gt(lit(10)));
4802        let expected = lit_bool_null();
4803        let (expr, num_iter) = simplify_with_cycle_count(expr);
4804        assert_eq!(expr, expected);
4805        assert_eq!(num_iter, 2);
4806
4807        // NOTE: this currently does not simplify
4808        // (((c4 - 10) + 10) *100) / 100
4809        let expr = (((col("c4") - lit(10)) + lit(10)) * lit(100)) / lit(100);
4810        let expected = expr.clone();
4811        let (expr, num_iter) = simplify_with_cycle_count(expr);
4812        assert_eq!(expr, expected);
4813        assert_eq!(num_iter, 1);
4814
4815        // ((c4<1 or c3<2) and c3_non_null<3) and false
4816        let expr = col("c4")
4817            .lt(lit(1))
4818            .or(col("c3").lt(lit(2)))
4819            .and(col("c3_non_null").lt(lit(3)))
4820            .and(lit(false));
4821        let expected = lit(false);
4822        let (expr, num_iter) = simplify_with_cycle_count(expr);
4823        assert_eq!(expr, expected);
4824        assert_eq!(num_iter, 2);
4825    }
4826
4827    fn boolean_test_schema() -> DFSchemaRef {
4828        static BOOLEAN_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
4829            Schema::new(vec![
4830                Field::new("A", DataType::Boolean, false),
4831                Field::new("B", DataType::Boolean, false),
4832                Field::new("C", DataType::Boolean, false),
4833                Field::new("D", DataType::Boolean, false),
4834            ])
4835            .to_dfschema_ref()
4836            .unwrap()
4837        });
4838        Arc::clone(&BOOLEAN_TEST_SCHEMA)
4839    }
4840
4841    #[test]
4842    fn simplify_common_factor_conjunction_in_disjunction() {
4843        let schema = boolean_test_schema();
4844        let simplifier =
4845            ExprSimplifier::new(SimplifyContext::default().with_schema(schema));
4846
4847        let a = || col("A");
4848        let b = || col("B");
4849        let c = || col("C");
4850        let d = || col("D");
4851
4852        // (A AND B) OR (A AND C) -> A AND (B OR C)
4853        let expr = a().and(b()).or(a().and(c()));
4854        let expected = a().and(b().or(c()));
4855
4856        assert_eq!(expected, simplifier.simplify(expr).unwrap());
4857
4858        // (A AND B) OR (A AND C) OR (A AND D) -> A AND (B OR C OR D)
4859        let expr = a().and(b()).or(a().and(c())).or(a().and(d()));
4860        let expected = a().and(b().or(c()).or(d()));
4861        assert_eq!(expected, simplifier.simplify(expr).unwrap());
4862
4863        // A OR (B AND C AND A) -> A
4864        let expr = a().or(b().and(c().and(a())));
4865        let expected = a();
4866        assert_eq!(expected, simplifier.simplify(expr).unwrap());
4867    }
4868
4869    #[test]
4870    fn test_simplify_udaf() {
4871        let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_with_simplify());
4872        let aggregate_function_expr =
4873            Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4874                udaf.into(),
4875                vec![],
4876                false,
4877                None,
4878                vec![],
4879                None,
4880            ));
4881
4882        let expected = col("result_column");
4883        assert_eq!(simplify(aggregate_function_expr), expected);
4884
4885        let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_without_simplify());
4886        let aggregate_function_expr =
4887            Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4888                udaf.into(),
4889                vec![],
4890                false,
4891                None,
4892                vec![],
4893                None,
4894            ));
4895
4896        let expected = aggregate_function_expr.clone();
4897        assert_eq!(simplify(aggregate_function_expr), expected);
4898    }
4899
4900    /// A Mock UDAF which defines `simplify` to be used in tests
4901    /// related to UDAF simplification
4902    #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4903    struct SimplifyMockUdaf {
4904        simplify: bool,
4905    }
4906
4907    impl SimplifyMockUdaf {
4908        /// make simplify method return new expression
4909        fn new_with_simplify() -> Self {
4910            Self { simplify: true }
4911        }
4912        /// make simplify method return no change
4913        fn new_without_simplify() -> Self {
4914            Self { simplify: false }
4915        }
4916    }
4917
4918    impl AggregateUDFImpl for SimplifyMockUdaf {
4919        fn as_any(&self) -> &dyn std::any::Any {
4920            self
4921        }
4922
4923        fn name(&self) -> &str {
4924            "mock_simplify"
4925        }
4926
4927        fn signature(&self) -> &Signature {
4928            unimplemented!()
4929        }
4930
4931        fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4932            unimplemented!("not needed for tests")
4933        }
4934
4935        fn accumulator(
4936            &self,
4937            _acc_args: AccumulatorArgs,
4938        ) -> Result<Box<dyn Accumulator>> {
4939            unimplemented!("not needed for tests")
4940        }
4941
4942        fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
4943            unimplemented!("not needed for testing")
4944        }
4945
4946        fn create_groups_accumulator(
4947            &self,
4948            _args: AccumulatorArgs,
4949        ) -> Result<Box<dyn GroupsAccumulator>> {
4950            unimplemented!("not needed for testing")
4951        }
4952
4953        fn simplify(&self) -> Option<AggregateFunctionSimplification> {
4954            if self.simplify {
4955                Some(Box::new(|_, _| Ok(col("result_column"))))
4956            } else {
4957                None
4958            }
4959        }
4960    }
4961
4962    #[test]
4963    fn test_simplify_udwf() {
4964        let udwf = WindowFunctionDefinition::WindowUDF(
4965            WindowUDF::new_from_impl(SimplifyMockUdwf::new_with_simplify()).into(),
4966        );
4967        let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
4968
4969        let expected = col("result_column");
4970        assert_eq!(simplify(window_function_expr), expected);
4971
4972        let udwf = WindowFunctionDefinition::WindowUDF(
4973            WindowUDF::new_from_impl(SimplifyMockUdwf::new_without_simplify()).into(),
4974        );
4975        let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
4976
4977        let expected = window_function_expr.clone();
4978        assert_eq!(simplify(window_function_expr), expected);
4979    }
4980
4981    /// A Mock UDWF which defines `simplify` to be used in tests
4982    /// related to UDWF simplification
4983    #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4984    struct SimplifyMockUdwf {
4985        simplify: bool,
4986    }
4987
4988    impl SimplifyMockUdwf {
4989        /// make simplify method return new expression
4990        fn new_with_simplify() -> Self {
4991            Self { simplify: true }
4992        }
4993        /// make simplify method return no change
4994        fn new_without_simplify() -> Self {
4995            Self { simplify: false }
4996        }
4997    }
4998
4999    impl WindowUDFImpl for SimplifyMockUdwf {
5000        fn as_any(&self) -> &dyn std::any::Any {
5001            self
5002        }
5003
5004        fn name(&self) -> &str {
5005            "mock_simplify"
5006        }
5007
5008        fn signature(&self) -> &Signature {
5009            unimplemented!()
5010        }
5011
5012        fn simplify(&self) -> Option<WindowFunctionSimplification> {
5013            if self.simplify {
5014                Some(Box::new(|_, _| Ok(col("result_column"))))
5015            } else {
5016                None
5017            }
5018        }
5019
5020        fn partition_evaluator(
5021            &self,
5022            _partition_evaluator_args: PartitionEvaluatorArgs,
5023        ) -> Result<Box<dyn PartitionEvaluator>> {
5024            unimplemented!("not needed for tests")
5025        }
5026
5027        fn field(&self, _field_args: WindowUDFFieldArgs) -> Result<FieldRef> {
5028            unimplemented!("not needed for tests")
5029        }
5030
5031        fn limit_effect(&self, _args: &[Arc<dyn PhysicalExpr>]) -> LimitEffect {
5032            LimitEffect::Unknown
5033        }
5034    }
5035    #[derive(Debug, PartialEq, Eq, Hash)]
5036    struct VolatileUdf {
5037        signature: Signature,
5038    }
5039
5040    impl VolatileUdf {
5041        pub fn new() -> Self {
5042            Self {
5043                signature: Signature::exact(vec![], Volatility::Volatile),
5044            }
5045        }
5046    }
5047    impl ScalarUDFImpl for VolatileUdf {
5048        fn as_any(&self) -> &dyn std::any::Any {
5049            self
5050        }
5051
5052        fn name(&self) -> &str {
5053            "VolatileUdf"
5054        }
5055
5056        fn signature(&self) -> &Signature {
5057            &self.signature
5058        }
5059
5060        fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
5061            Ok(DataType::Int16)
5062        }
5063
5064        fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
5065            panic!("dummy - not implemented")
5066        }
5067    }
5068
5069    #[test]
5070    fn test_optimize_volatile_conditions() {
5071        let fun = Arc::new(ScalarUDF::new_from_impl(VolatileUdf::new()));
5072        let rand = Expr::ScalarFunction(ScalarFunction::new_udf(fun, vec![]));
5073        {
5074            let expr = rand
5075                .clone()
5076                .eq(lit(0))
5077                .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
5078
5079            assert_eq!(simplify(expr.clone()), expr);
5080        }
5081
5082        {
5083            let expr = col("column1")
5084                .eq(lit(2))
5085                .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
5086
5087            assert_eq!(simplify(expr), col("column1").eq(lit(2)));
5088        }
5089
5090        {
5091            let expr = (col("column1").eq(lit(2)).and(rand.clone().eq(lit(0)))).or(col(
5092                "column1",
5093            )
5094            .eq(lit(2))
5095            .and(rand.clone().eq(lit(0))));
5096
5097            assert_eq!(
5098                simplify(expr),
5099                col("column1")
5100                    .eq(lit(2))
5101                    .and((rand.clone().eq(lit(0))).or(rand.clone().eq(lit(0))))
5102            );
5103        }
5104    }
5105
5106    #[test]
5107    fn simplify_fixed_size_binary_eq_lit() {
5108        let bytes = [1u8, 2, 3].as_slice();
5109
5110        // The expression starts simple.
5111        let expr = col("c5").eq(lit(bytes));
5112
5113        // The type coercer introduces a cast.
5114        let coerced = coerce(expr.clone());
5115        let schema = expr_test_schema();
5116        assert_eq!(
5117            coerced,
5118            col("c5")
5119                .cast_to(&DataType::Binary, schema.as_ref())
5120                .unwrap()
5121                .eq(lit(bytes))
5122        );
5123
5124        // The simplifier removes the cast.
5125        assert_eq!(
5126            simplify(coerced),
5127            col("c5").eq(Expr::Literal(
5128                ScalarValue::FixedSizeBinary(3, Some(bytes.to_vec()),),
5129                None
5130            ))
5131        );
5132    }
5133
5134    #[test]
5135    fn simplify_cast_literal() {
5136        // Test that CAST(literal) expressions are evaluated at plan time
5137
5138        // CAST(123 AS Int64) should become 123i64
5139        let expr = Expr::Cast(Cast::new(Box::new(lit(123i32)), DataType::Int64));
5140        let expected = lit(123i64);
5141        assert_eq!(simplify(expr), expected);
5142
5143        // CAST(1761630189642 AS Timestamp(Nanosecond, Some("+00:00")))
5144        // Integer to timestamp cast
5145        let expr = Expr::Cast(Cast::new(
5146            Box::new(lit(1761630189642i64)),
5147            DataType::Timestamp(
5148                arrow::datatypes::TimeUnit::Nanosecond,
5149                Some("+00:00".into()),
5150            ),
5151        ));
5152        // Should evaluate to a timestamp literal
5153        let result = simplify(expr);
5154        match result {
5155            Expr::Literal(ScalarValue::TimestampNanosecond(Some(val), tz), _) => {
5156                assert_eq!(val, 1761630189642i64);
5157                assert_eq!(tz.as_deref(), Some("+00:00"));
5158            }
5159            other => panic!("Expected TimestampNanosecond literal, got: {other:?}"),
5160        }
5161
5162        // Test CAST of invalid string to timestamp - should return an error at plan time
5163        // This represents the case from the issue: CAST(Utf8("1761630189642") AS Timestamp)
5164        // "1761630189642" is NOT a valid timestamp string format
5165        let expr = Expr::Cast(Cast::new(
5166            Box::new(lit("1761630189642")),
5167            DataType::Timestamp(
5168                arrow::datatypes::TimeUnit::Nanosecond,
5169                Some("+00:00".into()),
5170            ),
5171        ));
5172
5173        // The simplification should now fail with an error at plan time
5174        let schema = test_schema();
5175        let simplifier =
5176            ExprSimplifier::new(SimplifyContext::default().with_schema(schema));
5177        let result = simplifier.simplify(expr);
5178        assert!(result.is_err(), "Expected error for invalid cast");
5179        let err_msg = result.unwrap_err().to_string();
5180        assert_contains!(err_msg, "Error parsing timestamp");
5181    }
5182
5183    fn if_not_null(expr: Expr, then: bool) -> Expr {
5184        Expr::Case(Case {
5185            expr: Some(expr.is_not_null().into()),
5186            when_then_expr: vec![(lit(true).into(), lit(then).into())],
5187            else_expr: None,
5188        })
5189    }
5190
5191    // --------------------------------
5192    // --- Struct Cast Tests -----
5193    // --------------------------------
5194
5195    /// Helper to create a `Struct` literal cast expression from `source_fields` and `target_fields`.
5196    fn make_struct_cast_expr(source_fields: Fields, target_fields: Fields) -> Expr {
5197        // Create 1-row struct array (not 0-row) so it can be evaluated by simplifier
5198        let arrays: Vec<Arc<dyn Array>> = vec![
5199            Arc::new(Int32Array::from(vec![Some(1)])),
5200            Arc::new(Int32Array::from(vec![Some(2)])),
5201        ];
5202        let struct_array = StructArray::try_new(source_fields, arrays, None).unwrap();
5203
5204        Expr::Cast(Cast::new(
5205            Box::new(Expr::Literal(
5206                ScalarValue::Struct(Arc::new(struct_array)),
5207                None,
5208            )),
5209            DataType::Struct(target_fields),
5210        ))
5211    }
5212
5213    #[test]
5214    fn test_struct_cast_different_field_counts_not_foldable() {
5215        // Test that struct casts with different field counts are NOT marked as foldable
5216        // When field counts differ, const-folding should not be attempted
5217
5218        let source_fields = Fields::from(vec![
5219            Arc::new(Field::new("a", DataType::Int32, true)),
5220            Arc::new(Field::new("b", DataType::Int32, true)),
5221        ]);
5222
5223        let target_fields = Fields::from(vec![
5224            Arc::new(Field::new("x", DataType::Int32, true)),
5225            Arc::new(Field::new("y", DataType::Int32, true)),
5226            Arc::new(Field::new("z", DataType::Int32, true)),
5227        ]);
5228
5229        let expr = make_struct_cast_expr(source_fields, target_fields);
5230
5231        let simplifier =
5232            ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
5233
5234        // The cast should remain unchanged since field counts differ
5235        let result = simplifier.simplify(expr.clone()).unwrap();
5236        // Ensure const-folding was not attempted (the expression remains exactly the same)
5237        assert_eq!(
5238            result, expr,
5239            "Struct cast with different field counts should remain unchanged (no const-folding)"
5240        );
5241    }
5242
5243    #[test]
5244    fn test_struct_cast_same_field_count_foldable() {
5245        // Test that struct casts with same field counts can be considered for const-folding
5246
5247        let source_fields = Fields::from(vec![
5248            Arc::new(Field::new("a", DataType::Int32, true)),
5249            Arc::new(Field::new("b", DataType::Int32, true)),
5250        ]);
5251
5252        let target_fields = Fields::from(vec![
5253            Arc::new(Field::new("a", DataType::Int32, true)),
5254            Arc::new(Field::new("b", DataType::Int32, true)),
5255        ]);
5256
5257        let expr = make_struct_cast_expr(source_fields, target_fields);
5258
5259        let simplifier =
5260            ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
5261
5262        // The cast should be simplified
5263        let result = simplifier.simplify(expr.clone()).unwrap();
5264        // Struct casts with same field count should be const-folded to a literal
5265        assert!(matches!(result, Expr::Literal(_, _)));
5266        // Ensure the simplifier made a change (not identical to original)
5267        assert_ne!(
5268            result, expr,
5269            "Struct cast with same field count should be simplified (not identical to input)"
5270        );
5271    }
5272
5273    #[test]
5274    fn test_struct_cast_different_names_same_count() {
5275        // Test struct cast with same field count but different names
5276        // Field count matches; simplification should be skipped because names do not overlap
5277
5278        let source_fields = Fields::from(vec![
5279            Arc::new(Field::new("a", DataType::Int32, true)),
5280            Arc::new(Field::new("b", DataType::Int32, true)),
5281        ]);
5282
5283        let target_fields = Fields::from(vec![
5284            Arc::new(Field::new("x", DataType::Int32, true)),
5285            Arc::new(Field::new("y", DataType::Int32, true)),
5286        ]);
5287
5288        let expr = make_struct_cast_expr(source_fields, target_fields);
5289
5290        let simplifier =
5291            ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
5292
5293        // The cast should remain unchanged because there is no name overlap
5294        let result = simplifier.simplify(expr.clone()).unwrap();
5295        assert_eq!(
5296            result, expr,
5297            "Struct cast with different names but same field count should not be simplified"
5298        );
5299    }
5300
5301    #[test]
5302    fn test_struct_cast_empty_array_not_foldable() {
5303        // Test that struct casts with 0-row (empty) struct arrays are NOT const-folded
5304        // The simplifier uses a 1-row input batch, which causes dimension mismatches
5305        // when evaluating 0-row struct literals
5306
5307        let source_fields = Fields::from(vec![
5308            Arc::new(Field::new("a", DataType::Int32, true)),
5309            Arc::new(Field::new("b", DataType::Int32, true)),
5310        ]);
5311
5312        let target_fields = Fields::from(vec![
5313            Arc::new(Field::new("a", DataType::Int32, true)),
5314            Arc::new(Field::new("b", DataType::Int32, true)),
5315        ]);
5316
5317        // Create a 0-row (empty) struct array
5318        let arrays: Vec<Arc<dyn Array>> = vec![
5319            Arc::new(Int32Array::new(vec![].into(), None)),
5320            Arc::new(Int32Array::new(vec![].into(), None)),
5321        ];
5322        let struct_array = StructArray::try_new(source_fields, arrays, None).unwrap();
5323
5324        let expr = Expr::Cast(Cast::new(
5325            Box::new(Expr::Literal(
5326                ScalarValue::Struct(Arc::new(struct_array)),
5327                None,
5328            )),
5329            DataType::Struct(target_fields),
5330        ));
5331
5332        let simplifier =
5333            ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema()));
5334
5335        // The cast should remain unchanged since the struct array is empty (0-row)
5336        let result = simplifier.simplify(expr.clone()).unwrap();
5337        assert_eq!(
5338            result, expr,
5339            "Struct cast with empty (0-row) array should remain unchanged"
5340        );
5341    }
5342}