Skip to main content

datafusion_optimizer/simplify_expressions/
expr_simplifier.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Expression simplification API
19
20use arrow::{
21    array::{Array, AsArray, new_null_array},
22    datatypes::{DataType, Field, Schema},
23    record_batch::RecordBatch,
24};
25use std::borrow::Cow;
26use std::collections::HashSet;
27use std::ops::Not;
28use std::sync::Arc;
29use std::sync::LazyLock;
30
31use datafusion_common::config::ConfigOptions;
32use datafusion_common::nested_struct::has_one_of_more_common_fields;
33use datafusion_common::{
34    DFSchema, DataFusionError, Result, ScalarValue, exec_datafusion_err, internal_err,
35};
36use datafusion_common::{
37    HashMap,
38    cast::{as_large_list_array, as_list_array},
39    metadata::FieldMetadata,
40    tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
41};
42use datafusion_expr::expr::HigherOrderFunction;
43use datafusion_expr::{
44    BinaryExpr, Case, ColumnarValue, Expr, ExprSchemable, Like, Operator, Volatility,
45    and, binary::BinaryTypeCoercer, lit, or, preimage::PreimageResult,
46};
47use datafusion_expr::{Cast, TryCast, simplify::ExprSimplifyResult};
48use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval};
49use datafusion_expr::{
50    expr::{InList, InSubquery},
51    utils::{iter_conjunction, iter_conjunction_owned},
52};
53use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
54
55use super::inlist_simplifier::ShortenInListSimplifier;
56use super::utils::*;
57use crate::simplify_expressions::SimplifyContext;
58use crate::simplify_expressions::regex::simplify_regex_expr;
59use crate::simplify_expressions::unwrap_cast::{
60    is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary,
61    is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist,
62    unwrap_cast_in_comparison_for_binary,
63};
64use crate::{
65    analyzer::type_coercion::TypeCoercionRewriter,
66    simplify_expressions::udf_preimage::rewrite_with_preimage,
67};
68use datafusion_expr::expr_rewriter::rewrite_with_guarantees_map;
69use datafusion_expr_common::casts::try_cast_literal_to_type;
70use indexmap::IndexSet;
71use regex::Regex;
72
73/// This structure handles API for expression simplification
74///
75/// Provides simplification information based on DFSchema and
76/// [`ExecutionProps`]. This is the default implementation used by DataFusion
77///
78/// For example:
79/// ```
80/// use arrow::datatypes::{DataType, Field, Schema};
81/// use datafusion_common::{DataFusionError, ToDFSchema};
82/// use datafusion_expr::simplify::SimplifyContext;
83/// use datafusion_expr::{col, lit};
84/// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
85///
86/// // Create the schema
87/// let schema = Schema::new(vec![Field::new("i", DataType::Int64, false)])
88///     .to_dfschema_ref()
89///     .unwrap();
90///
91/// // Create the simplifier
92/// let context = SimplifyContext::builder().with_schema(schema).build();
93/// let simplifier = ExprSimplifier::new(context);
94///
95/// // Use the simplifier
96///
97/// // b < 2 or (1 > 3)
98/// let expr = col("b").lt(lit(2)).or(lit(1).gt(lit(3)));
99///
100/// // b < 2
101/// let simplified = simplifier.simplify(expr).unwrap();
102/// assert_eq!(simplified, col("b").lt(lit(2)));
103/// ```
104pub struct ExprSimplifier {
105    info: SimplifyContext,
106    /// Guarantees about the values of columns. This is provided by the user
107    /// in [ExprSimplifier::with_guarantees()].
108    guarantees: Vec<(Expr, NullableInterval)>,
109    /// Should expressions be canonicalized before simplification? Defaults to
110    /// true
111    canonicalize: bool,
112    /// Maximum number of simplifier cycles
113    max_simplifier_cycles: u32,
114}
115
116pub const THRESHOLD_INLINE_INLIST: usize = 3;
117pub const DEFAULT_MAX_SIMPLIFIER_CYCLES: u32 = 3;
118
119impl ExprSimplifier {
120    /// Create a new `ExprSimplifier` with the given [`SimplifyContext`].
121    /// See [`simplify`](Self::simplify) for an example.
122    ///
123    /// [`SimplifyContext`]: datafusion_expr::simplify::SimplifyContext
124    pub fn new(info: SimplifyContext) -> Self {
125        Self {
126            info,
127            guarantees: vec![],
128            canonicalize: true,
129            max_simplifier_cycles: DEFAULT_MAX_SIMPLIFIER_CYCLES,
130        }
131    }
132
133    /// Simplifies this [`Expr`] as much as possible, evaluating
134    /// constants and applying algebraic simplifications.
135    ///
136    /// The types of the expression must match what operators expect,
137    /// or else an error may occur trying to evaluate. See
138    /// [`coerce`](Self::coerce) for a function to help.
139    ///
140    /// # Example:
141    ///
142    /// `b > 2 AND b > 2`
143    ///
144    /// can be written to
145    ///
146    /// `b > 2`
147    ///
148    /// ```
149    /// use arrow::datatypes::{DataType, Field, Schema};
150    /// use datafusion_common::{DFSchema, ToDFSchema};
151    /// use datafusion_common::Result;
152    /// use datafusion_expr::simplify::SimplifyContext;
153    /// use datafusion_expr::{col, lit, Expr};
154    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
155    /// use std::sync::Arc;
156    ///
157    /// // Create a schema and SimplifyContext
158    /// let schema = Schema::new(vec![Field::new("b", DataType::Int32, true)])
159    ///     .to_dfschema_ref()
160    ///     .unwrap();
161    /// // Create the simplifier
162    /// let context = SimplifyContext::builder().with_schema(schema).build();
163    /// let simplifier = ExprSimplifier::new(context);
164    ///
165    /// // b < 2
166    /// let b_lt_2 = col("b").gt(lit(2));
167    ///
168    /// // (b < 2) OR (b < 2)
169    /// let expr = b_lt_2.clone().or(b_lt_2.clone());
170    ///
171    /// // (b < 2) OR (b < 2) --> (b < 2)
172    /// let expr = simplifier.simplify(expr).unwrap();
173    /// assert_eq!(expr, b_lt_2);
174    /// ```
175    pub fn simplify(&self, expr: Expr) -> Result<Expr> {
176        Ok(self.simplify_with_cycle_count_transformed(expr)?.0.data)
177    }
178
179    /// Like [Self::simplify], simplifies this [`Expr`] as much as possible, evaluating
180    /// constants and applying algebraic simplifications. Additionally returns a `u32`
181    /// representing the number of simplification cycles performed, which can be useful for testing
182    /// optimizations.
183    ///
184    /// See [Self::simplify] for details and usage examples.
185    #[deprecated(
186        since = "48.0.0",
187        note = "Use `simplify_with_cycle_count_transformed` instead"
188    )]
189    #[expect(unused_mut)]
190    pub fn simplify_with_cycle_count(&self, mut expr: Expr) -> Result<(Expr, u32)> {
191        let (transformed, cycle_count) =
192            self.simplify_with_cycle_count_transformed(expr)?;
193        Ok((transformed.data, cycle_count))
194    }
195
196    /// Like [Self::simplify], simplifies this [`Expr`] as much as possible, evaluating
197    /// constants and applying algebraic simplifications. Additionally returns a `u32`
198    /// representing the number of simplification cycles performed, which can be useful for testing
199    /// optimizations.
200    ///
201    /// # Returns
202    ///
203    /// A tuple containing:
204    /// - The simplified expression wrapped in a `Transformed<Expr>` indicating if changes were made
205    /// - The number of simplification cycles that were performed
206    ///
207    /// See [Self::simplify] for details and usage examples.
208    pub fn simplify_with_cycle_count_transformed(
209        &self,
210        mut expr: Expr,
211    ) -> Result<(Transformed<Expr>, u32)> {
212        let mut simplifier = Simplifier::new(&self.info);
213        let config_options = Some(Arc::clone(self.info.config_options()));
214        let mut const_evaluator = ConstEvaluator::try_new(config_options)?;
215        let mut shorten_in_list_simplifier = ShortenInListSimplifier::new();
216        let guarantees_map: HashMap<&Expr, &NullableInterval> =
217            self.guarantees.iter().map(|(k, v)| (k, v)).collect();
218
219        if self.canonicalize {
220            expr = expr.rewrite(&mut Canonicalizer::new()).data()?
221        }
222
223        // Evaluating constants can enable new simplifications and
224        // simplifications can enable new constant evaluation
225        // see `Self::with_max_cycles`
226        let mut num_cycles = 0;
227        let mut has_transformed = false;
228        loop {
229            let Transformed {
230                data, transformed, ..
231            } = expr
232                .rewrite(&mut const_evaluator)?
233                .transform_data(|expr| expr.rewrite(&mut simplifier))?
234                .transform_data(|expr| {
235                    rewrite_with_guarantees_map(expr, &guarantees_map)
236                })?;
237            expr = data;
238            num_cycles += 1;
239            // Track if any transformation occurred
240            has_transformed = has_transformed || transformed;
241            if !transformed || num_cycles >= self.max_simplifier_cycles {
242                break;
243            }
244        }
245        // shorten inlist should be started after other inlist rules are applied
246        expr = expr.rewrite(&mut shorten_in_list_simplifier).data()?;
247        Ok((
248            Transformed::new_transformed(expr, has_transformed),
249            num_cycles,
250        ))
251    }
252
253    /// Apply type coercion to an [`Expr`] so that it can be
254    /// evaluated as a [`PhysicalExpr`](datafusion_physical_expr::PhysicalExpr).
255    ///
256    /// See the [type coercion module](datafusion_expr::type_coercion)
257    /// documentation for more details on type coercion
258    pub fn coerce(&self, expr: Expr, schema: &DFSchema) -> Result<Expr> {
259        let mut expr_rewrite = TypeCoercionRewriter { schema };
260        expr.rewrite(&mut expr_rewrite).data()
261    }
262
263    /// Input guarantees about the values of columns.
264    ///
265    /// The guarantees can simplify expressions. For example, if a column `x` is
266    /// guaranteed to be `3`, then the expression `x > 1` can be replaced by the
267    /// literal `true`.
268    ///
269    /// The guarantees are provided as a `Vec<(Expr, NullableInterval)>`,
270    /// where the [Expr] is a column reference and the [NullableInterval]
271    /// is an interval representing the known possible values of that column.
272    ///
273    /// ```rust
274    /// use arrow::datatypes::{DataType, Field, Schema};
275    /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
276    /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
277    /// use datafusion_expr::simplify::SimplifyContext;
278    /// use datafusion_expr::{col, lit, Expr};
279    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
280    ///
281    /// let schema = Schema::new(vec![
282    ///     Field::new("x", DataType::Int64, false),
283    ///     Field::new("y", DataType::UInt32, false),
284    ///     Field::new("z", DataType::Int64, false),
285    /// ])
286    /// .to_dfschema_ref()
287    /// .unwrap();
288    ///
289    /// // Create the simplifier
290    /// let context = SimplifyContext::builder().with_schema(schema).build();
291    ///
292    /// // Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5)
293    /// let expr_x = col("x").gt_eq(lit(3_i64));
294    /// let expr_y = (col("y") + lit(2_u32)).lt(lit(10_u32));
295    /// let expr_z = col("z").gt(lit(5_i64));
296    /// let expr = expr_x.and(expr_y).and(expr_z.clone());
297    ///
298    /// let guarantees = vec![
299    ///     // x ∈ [3, 5]
300    ///     (
301    ///         col("x"),
302    ///         NullableInterval::NotNull {
303    ///             values: Interval::make(Some(3_i64), Some(5_i64)).unwrap(),
304    ///         },
305    ///     ),
306    ///     // y = 3
307    ///     (
308    ///         col("y"),
309    ///         NullableInterval::from(ScalarValue::UInt32(Some(3))),
310    ///     ),
311    /// ];
312    /// let simplifier = ExprSimplifier::new(context).with_guarantees(guarantees);
313    /// let output = simplifier.simplify(expr).unwrap();
314    /// // Expression becomes: true AND true AND (z > 5), which simplifies to
315    /// // z > 5.
316    /// assert_eq!(output, expr_z);
317    /// ```
318    pub fn with_guarantees(mut self, guarantees: Vec<(Expr, NullableInterval)>) -> Self {
319        self.guarantees = guarantees;
320        self
321    }
322
323    /// Should `Canonicalizer` be applied before simplification?
324    ///
325    /// If true (the default), the expression will be rewritten to canonical
326    /// form before simplification. This is useful to ensure that the simplifier
327    /// can apply all possible simplifications.
328    ///
329    /// Some expressions, such as those in some Joins, can not be canonicalized
330    /// without changing their meaning. In these cases, canonicalization should
331    /// be disabled.
332    ///
333    /// ```rust
334    /// use arrow::datatypes::{DataType, Field, Schema};
335    /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
336    /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
337    /// use datafusion_expr::simplify::SimplifyContext;
338    /// use datafusion_expr::{col, lit, Expr};
339    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
340    ///
341    /// let schema = Schema::new(vec![
342    ///     Field::new("a", DataType::Int64, false),
343    ///     Field::new("b", DataType::Int64, false),
344    ///     Field::new("c", DataType::Int64, false),
345    /// ])
346    /// .to_dfschema_ref()
347    /// .unwrap();
348    ///
349    /// // Create the simplifier
350    /// let context = SimplifyContext::builder().with_schema(schema).build();
351    /// let simplifier = ExprSimplifier::new(context);
352    ///
353    /// // Expression: a = c AND 1 = b
354    /// let expr = col("a").eq(col("c")).and(lit(1).eq(col("b")));
355    ///
356    /// // With canonicalization, the expression is rewritten to canonical form
357    /// // (though it is no simpler in this case):
358    /// let canonical = simplifier.simplify(expr.clone()).unwrap();
359    /// // Expression has been rewritten to: (c = a AND b = 1)
360    /// assert_eq!(canonical, col("c").eq(col("a")).and(col("b").eq(lit(1))));
361    ///
362    /// // If canonicalization is disabled, the expression is not changed
363    /// let non_canonicalized = simplifier
364    ///     .with_canonicalize(false)
365    ///     .simplify(expr.clone())
366    ///     .unwrap();
367    ///
368    /// assert_eq!(non_canonicalized, expr);
369    /// ```
370    pub fn with_canonicalize(mut self, canonicalize: bool) -> Self {
371        self.canonicalize = canonicalize;
372        self
373    }
374
375    /// Specifies the maximum number of simplification cycles to run.
376    ///
377    /// The simplifier can perform multiple passes of simplification. This is
378    /// because the output of one simplification step can allow more optimizations
379    /// in another simplification step. For example, constant evaluation can allow more
380    /// expression simplifications, and expression simplifications can allow more constant
381    /// evaluations.
382    ///
383    /// This method specifies the maximum number of allowed iteration cycles before the simplifier
384    /// returns an [Expr] output. However, it does not always perform the maximum number of cycles.
385    /// The simplifier will attempt to detect when an [Expr] is unchanged by all the simplification
386    /// passes, and return early. This avoids wasting time on unnecessary [Expr] tree traversals.
387    ///
388    /// If no maximum is specified, the value of [DEFAULT_MAX_SIMPLIFIER_CYCLES] is used
389    /// instead.
390    ///
391    /// ```rust
392    /// use arrow::datatypes::{DataType, Field, Schema};
393    /// use datafusion_expr::{col, lit, Expr};
394    /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
395    /// use datafusion_expr::simplify::SimplifyContext;
396    /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
397    ///
398    /// let schema = Schema::new(vec![
399    ///   Field::new("a", DataType::Int64, false),
400    ///   ])
401    ///   .to_dfschema_ref().unwrap();
402    ///
403    /// // Create the simplifier
404    /// let context = SimplifyContext::builder().with_schema(schema).build();
405    /// let simplifier = ExprSimplifier::new(context);
406    ///
407    /// // Expression: a IS NOT NULL
408    /// let expr = col("a").is_not_null();
409    ///
410    /// // When using default maximum cycles, 2 cycles will be performed.
411    /// let (simplified_expr, count) = simplifier.simplify_with_cycle_count_transformed(expr.clone()).unwrap();
412    /// assert_eq!(simplified_expr.data, lit(true));
413    /// // 2 cycles were executed, but only 1 was needed
414    /// assert_eq!(count, 2);
415    ///
416    /// // Only 1 simplification pass is necessary here, so we can set the maximum cycles to 1.
417    /// let (simplified_expr, count) = simplifier.with_max_cycles(1).simplify_with_cycle_count_transformed(expr.clone()).unwrap();
418    /// // Expression has been rewritten to: (c = a AND b = 1)
419    /// assert_eq!(simplified_expr.data, lit(true));
420    /// // Only 1 cycle was executed
421    /// assert_eq!(count, 1);
422    /// ```
423    pub fn with_max_cycles(mut self, max_simplifier_cycles: u32) -> Self {
424        self.max_simplifier_cycles = max_simplifier_cycles;
425        self
426    }
427}
428
429/// Canonicalize any BinaryExprs that are not in canonical form
430///
431/// `<literal> <op> <col>` is rewritten to `<col> <op> <literal>`
432///
433/// `<col1> <op> <col2>` is rewritten so that the name of `col1` sorts higher
434/// than `col2` (`a > b` would be canonicalized to `b < a`)
435struct Canonicalizer {}
436
437impl Canonicalizer {
438    fn new() -> Self {
439        Self {}
440    }
441}
442
443impl TreeNodeRewriter for Canonicalizer {
444    type Node = Expr;
445
446    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
447        let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr else {
448            return Ok(Transformed::no(expr));
449        };
450        match (left.as_ref(), right.as_ref(), op.swap()) {
451            // <col1> <op> <col2>
452            (Expr::Column(left_col), Expr::Column(right_col), Some(swapped_op))
453                if right_col > left_col =>
454            {
455                Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
456                    left: right,
457                    op: swapped_op,
458                    right: left,
459                })))
460            }
461            // <literal> <op> <col>
462            (Expr::Literal(_a, _), Expr::Column(_b), Some(swapped_op)) => {
463                Ok(Transformed::yes(Expr::BinaryExpr(BinaryExpr {
464                    left: right,
465                    op: swapped_op,
466                    right: left,
467                })))
468            }
469            _ => Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
470                left,
471                op,
472                right,
473            }))),
474        }
475    }
476}
477
478/// Partially evaluate `Expr`s so constant subtrees are evaluated at plan time.
479///
480/// Note it does not handle algebraic rewrites such as `(a or false)`
481/// --> `a`, which is handled by [`Simplifier`]
482struct ConstEvaluator {
483    /// `can_evaluate` is used during the depth-first-search of the
484    /// `Expr` tree to track if any siblings (or their descendants) were
485    /// non evaluatable (e.g. had a column reference or volatile
486    /// function)
487    ///
488    /// Specifically, `can_evaluate[N]` represents the state of
489    /// traversal when we are N levels deep in the tree, one entry for
490    /// this Expr and each of its parents.
491    ///
492    /// After visiting all siblings if `can_evaluate.top()` is true, that
493    /// means there were no non evaluatable siblings (or their
494    /// descendants) so this `Expr` can be evaluated
495    can_evaluate: Vec<bool>,
496    /// Execution properties needed to call [`create_physical_expr`].
497    /// `ConstEvaluator` only evaluates expressions without column references
498    /// (i.e. constant expressions) and doesn't use the variable binding features
499    /// of `ExecutionProps` (we explicitly filter out [`Expr::ScalarVariable`]).
500    /// The `config_options` are passed from the session to allow scalar functions
501    /// to access configuration like timezone.
502    execution_props: ExecutionProps,
503}
504
505/// The simplify result of ConstEvaluator
506enum ConstSimplifyResult {
507    // Expr was simplified and contains the new expression
508    Simplified(ScalarValue, Option<FieldMetadata>),
509    // Expr was not simplified and original value is returned
510    NotSimplified(ScalarValue, Option<FieldMetadata>),
511    // Evaluation encountered an error, contains the original expression
512    SimplifyRuntimeError(DataFusionError, Expr),
513}
514
515impl TreeNodeRewriter for ConstEvaluator {
516    type Node = Expr;
517
518    fn f_down(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
519        // Default to being able to evaluate this node
520        self.can_evaluate.push(true);
521
522        // if this expr is not ok to evaluate, mark entire parent
523        // stack as not ok (as all parents have at least one child or
524        // descendant that can not be evaluated
525
526        if !Self::can_evaluate(&expr) {
527            // walk back up stack, marking first parent that is not mutable
528            let parent_iter = self.can_evaluate.iter_mut().rev();
529            for p in parent_iter {
530                if !*p {
531                    // optimization: if we find an element on the
532                    // stack already marked, know all elements above are also marked
533                    break;
534                }
535                *p = false;
536            }
537        }
538
539        // NB: do not short circuit recursion even if we find a non
540        // evaluatable node (so we can fold other children, args to
541        // functions, etc.)
542        Ok(Transformed::no(expr))
543    }
544
545    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
546        match self.can_evaluate.pop() {
547            // Certain expressions such as `CASE` and `COALESCE` are short-circuiting
548            // and may not evaluate all their sub expressions. Thus, if
549            // any error is countered during simplification, return the original
550            // so that normal evaluation can occur
551            Some(true) => match self.evaluate_to_scalar(expr) {
552                ConstSimplifyResult::Simplified(s, m) => {
553                    Ok(Transformed::yes(Expr::Literal(s, m)))
554                }
555                ConstSimplifyResult::NotSimplified(s, m) => {
556                    Ok(Transformed::no(Expr::Literal(s, m)))
557                }
558                ConstSimplifyResult::SimplifyRuntimeError(err, expr) => {
559                    // For CAST expressions with literal inputs, propagate the error at plan time rather than deferring to execution time.
560                    // This provides clearer error messages and fails fast.
561                    if let Expr::Cast(Cast { ref expr, .. })
562                    | Expr::TryCast(TryCast { ref expr, .. }) = expr
563                        && matches!(expr.as_ref(), Expr::Literal(_, _))
564                    {
565                        return Err(err);
566                    }
567                    // For other expressions (like CASE, COALESCE), preserve the original
568                    // to allow short-circuit evaluation at execution time
569                    Ok(Transformed::yes(expr))
570                }
571            },
572            Some(false) => Ok(Transformed::no(expr)),
573            _ => internal_err!("Failed to pop can_evaluate"),
574        }
575    }
576}
577
578static DUMMY_SCHEMA: LazyLock<Arc<Schema>> =
579    LazyLock::new(|| Arc::new(Schema::new(vec![Field::new(".", DataType::Null, true)])));
580
581static DUMMY_DF_SCHEMA: LazyLock<DFSchema> =
582    LazyLock::new(|| DFSchema::try_from(Arc::clone(&*DUMMY_SCHEMA)).unwrap());
583
584static DUMMY_BATCH: LazyLock<RecordBatch> = LazyLock::new(|| {
585    // Need a single "input" row to produce a single output row
586    let col = new_null_array(&DataType::Null, 1);
587    RecordBatch::try_new(DUMMY_SCHEMA.clone(), vec![col]).unwrap()
588});
589
590impl ConstEvaluator {
591    /// Create a new `ConstantEvaluator`.
592    ///
593    /// Note: `ConstEvaluator` filters out expressions with scalar variables
594    /// (like `$var`) and volatile functions, so it creates its own default
595    /// `ExecutionProps` internally. The filtered expressions will be evaluated
596    /// at runtime where proper variable bindings are available.
597    ///
598    /// The `config_options` parameter is used to pass session configuration
599    /// (like timezone) to scalar functions during constant evaluation.
600    pub fn try_new(config_options: Option<Arc<ConfigOptions>>) -> Result<Self> {
601        // The dummy column name is unused and doesn't matter as only
602        // expressions without column references can be evaluated
603
604        let mut execution_props = ExecutionProps::new();
605        execution_props.config_options = config_options;
606
607        Ok(Self {
608            can_evaluate: vec![],
609            execution_props,
610        })
611    }
612
613    /// Can a function of the specified volatility be evaluated?
614    fn volatility_ok(volatility: Volatility) -> bool {
615        match volatility {
616            Volatility::Immutable => true,
617            // Values for functions such as now() are taken from ExecutionProps
618            Volatility::Stable => true,
619            Volatility::Volatile => false,
620        }
621    }
622
623    /// Can the expression be evaluated at plan time, (assuming all of
624    /// its children can also be evaluated)?
625    fn can_evaluate(expr: &Expr) -> bool {
626        // check for reasons we can't evaluate this node
627        //
628        // NOTE all expr types are listed here so when new ones are
629        // added they can be checked for their ability to be evaluated
630        // at plan time
631        match expr {
632            // TODO: remove the next line after `Expr::Wildcard` is removed
633            #[expect(deprecated)]
634            Expr::AggregateFunction { .. }
635            | Expr::ScalarVariable(_, _)
636            | Expr::Column(_)
637            | Expr::OuterReferenceColumn(_, _)
638            | Expr::Exists { .. }
639            | Expr::InSubquery(_)
640            | Expr::SetComparison(_)
641            | Expr::ScalarSubquery(_)
642            | Expr::WindowFunction { .. }
643            | Expr::GroupingSet(_)
644            | Expr::Wildcard { .. }
645            | Expr::Placeholder(_) => false,
646            Expr::ScalarFunction(ScalarFunction { func, .. }) => {
647                Self::volatility_ok(func.signature().volatility)
648            }
649            Expr::HigherOrderFunction(HigherOrderFunction { func, .. }) => {
650                Self::volatility_ok(func.signature().volatility)
651            }
652            Expr::Cast(Cast { expr, field }) | Expr::TryCast(TryCast { expr, field }) => {
653                if let (
654                    Ok(DataType::Struct(source_fields)),
655                    DataType::Struct(target_fields),
656                ) = (expr.get_type(&DFSchema::empty()), field.data_type())
657                {
658                    // Don't const-fold struct casts with different field counts
659                    if source_fields.len() != target_fields.len() {
660                        return false;
661                    }
662
663                    // Skip const-folding when there is no field name overlap
664                    if !has_one_of_more_common_fields(&source_fields, target_fields) {
665                        return false;
666                    }
667
668                    // Don't const-fold struct casts with empty (0-row) literals
669                    // The simplifier uses a 1-row input batch, which causes dimension mismatches
670                    // when evaluating 0-row struct literals
671                    if let Expr::Literal(ScalarValue::Struct(struct_array), _) =
672                        expr.as_ref()
673                        && struct_array.len() == 0
674                    {
675                        return false;
676                    }
677                }
678                true
679            }
680            Expr::Literal(_, _)
681            | Expr::Alias(..)
682            | Expr::Unnest(_)
683            | Expr::BinaryExpr { .. }
684            | Expr::Not(_)
685            | Expr::IsNotNull(_)
686            | Expr::IsNull(_)
687            | Expr::IsTrue(_)
688            | Expr::IsFalse(_)
689            | Expr::IsUnknown(_)
690            | Expr::IsNotTrue(_)
691            | Expr::IsNotFalse(_)
692            | Expr::IsNotUnknown(_)
693            | Expr::Negative(_)
694            | Expr::Between { .. }
695            | Expr::Like { .. }
696            | Expr::SimilarTo { .. }
697            | Expr::Case(_)
698            | Expr::InList { .. }
699            | Expr::Lambda(_)
700            | Expr::LambdaVariable(_) => true,
701        }
702    }
703
704    /// Internal helper to evaluates an Expr
705    pub(crate) fn evaluate_to_scalar(&mut self, expr: Expr) -> ConstSimplifyResult {
706        if let Expr::Literal(s, m) = expr {
707            return ConstSimplifyResult::NotSimplified(s, m);
708        }
709
710        let phys_expr =
711            match create_physical_expr(&expr, &DUMMY_DF_SCHEMA, &self.execution_props) {
712                Ok(e) => e,
713                Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
714            };
715        let metadata = phys_expr
716            .return_field(DUMMY_BATCH.schema_ref())
717            .ok()
718            .and_then(|f| {
719                let m = f.metadata();
720                match m.is_empty() {
721                    true => None,
722                    false => Some(FieldMetadata::from(m)),
723                }
724            });
725        let col_val = match phys_expr.evaluate(&DUMMY_BATCH) {
726            Ok(v) => v,
727            Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
728        };
729        match col_val {
730            ColumnarValue::Array(a) => {
731                if a.len() != 1 {
732                    ConstSimplifyResult::SimplifyRuntimeError(
733                        exec_datafusion_err!(
734                            "Could not evaluate the expression, found a result of length {}",
735                            a.len()
736                        ),
737                        expr,
738                    )
739                } else if as_list_array(&a).is_ok() {
740                    ConstSimplifyResult::Simplified(
741                        ScalarValue::List(a.as_list::<i32>().to_owned().into()),
742                        metadata,
743                    )
744                } else if as_large_list_array(&a).is_ok() {
745                    ConstSimplifyResult::Simplified(
746                        ScalarValue::LargeList(a.as_list::<i64>().to_owned().into()),
747                        metadata,
748                    )
749                } else {
750                    // Non-ListArray
751                    match ScalarValue::try_from_array(&a, 0) {
752                        Ok(s) => ConstSimplifyResult::Simplified(s, metadata),
753                        Err(err) => ConstSimplifyResult::SimplifyRuntimeError(err, expr),
754                    }
755                }
756            }
757            ColumnarValue::Scalar(s) => ConstSimplifyResult::Simplified(s, metadata),
758        }
759    }
760}
761
762/// Simplifies [`Expr`]s by applying algebraic transformation rules
763///
764/// Example transformations that are applied:
765/// * `expr = true` and `expr != false` to `expr` when `expr` is of boolean type
766/// * `expr = false` and `expr != true` to `!expr` when `expr` is of boolean type
767/// * `true = true` and `false = false` to `true`
768/// * `false = true` and `true = false` to `false`
769/// * `!!expr` to `expr`
770/// * `expr = null` and `expr != null` to `null`
771struct Simplifier<'a> {
772    info: &'a SimplifyContext,
773}
774
775impl<'a> Simplifier<'a> {
776    pub fn new(info: &'a SimplifyContext) -> Self {
777        Self { info }
778    }
779}
780
781impl TreeNodeRewriter for Simplifier<'_> {
782    type Node = Expr;
783
784    /// rewrite the expression simplifying any constant expressions
785    fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
786        use datafusion_expr::Operator::{
787            And, BitwiseAnd, BitwiseOr, BitwiseShiftLeft, BitwiseShiftRight, BitwiseXor,
788            Divide, Eq, Modulo, Multiply, NotEq, Or, RegexIMatch, RegexMatch,
789            RegexNotIMatch, RegexNotMatch,
790        };
791
792        let info = self.info;
793        Ok(match expr {
794            // `value op NULL` -> `NULL`
795            // `NULL op value` -> `NULL`
796            // except for few operators that can return non-null value even when one of the operands is NULL
797            ref expr @ Expr::BinaryExpr(BinaryExpr {
798                ref left,
799                ref op,
800                ref right,
801            }) if op.returns_null_on_null()
802                && (is_null(left.as_ref()) || is_null(right.as_ref())) =>
803            {
804                Transformed::yes(Expr::Literal(
805                    ScalarValue::try_new_null(&info.get_data_type(expr)?)?,
806                    None,
807                ))
808            }
809
810            // `NULL {AND, OR} NULL` -> `NULL`
811            Expr::BinaryExpr(BinaryExpr {
812                left,
813                op: And | Or,
814                right,
815            }) if is_null(&left) && is_null(&right) => Transformed::yes(lit_bool_null()),
816
817            //
818            // Rules for Eq
819            //
820
821            // true = A  --> A
822            // false = A --> !A
823            // null = A --> null
824            Expr::BinaryExpr(BinaryExpr {
825                left,
826                op: Eq,
827                right,
828            }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
829                Transformed::yes(match as_bool_lit(&left)? {
830                    Some(true) => *right,
831                    Some(false) => Expr::Not(right),
832                    None => lit_bool_null(),
833                })
834            }
835            // A = true  --> A
836            // A = false --> !A
837            // A = null --> null
838            Expr::BinaryExpr(BinaryExpr {
839                left,
840                op: Eq,
841                right,
842            }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
843                Transformed::yes(match as_bool_lit(&right)? {
844                    Some(true) => *left,
845                    Some(false) => Expr::Not(left),
846                    None => lit_bool_null(),
847                })
848            }
849            // According to SQL's null semantics, NULL = NULL evaluates to NULL
850            // Both sides are the same expression (A = A) and A is non-volatile expression
851            // A = A --> A IS NOT NULL OR NULL
852            // A = A --> true (if A not nullable)
853            Expr::BinaryExpr(BinaryExpr {
854                left,
855                op: Eq,
856                right,
857            }) if (left == right) & !left.is_volatile() => {
858                Transformed::yes(match !info.nullable(&left)? {
859                    true => lit(true),
860                    false => Expr::BinaryExpr(BinaryExpr {
861                        left: Box::new(Expr::IsNotNull(left)),
862                        op: Or,
863                        right: Box::new(lit_bool_null()),
864                    }),
865                })
866            }
867
868            // Rules for NotEq
869            //
870
871            // true != A  --> !A
872            // false != A --> A
873            // null != A --> null
874            Expr::BinaryExpr(BinaryExpr {
875                left,
876                op: NotEq,
877                right,
878            }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => {
879                Transformed::yes(match as_bool_lit(&left)? {
880                    Some(true) => Expr::Not(right),
881                    Some(false) => *right,
882                    None => lit_bool_null(),
883                })
884            }
885            // A != true  --> !A
886            // A != false --> A
887            // A != null --> null,
888            Expr::BinaryExpr(BinaryExpr {
889                left,
890                op: NotEq,
891                right,
892            }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => {
893                Transformed::yes(match as_bool_lit(&right)? {
894                    Some(true) => Expr::Not(left),
895                    Some(false) => *left,
896                    None => lit_bool_null(),
897                })
898            }
899
900            //
901            // Rules for OR
902            //
903
904            // true OR A --> true (even if A is null)
905            Expr::BinaryExpr(BinaryExpr {
906                left,
907                op: Or,
908                right: _,
909            }) if is_true(&left) => Transformed::yes(*left),
910            // false OR A --> A
911            Expr::BinaryExpr(BinaryExpr {
912                left,
913                op: Or,
914                right,
915            }) if is_false(&left) => Transformed::yes(*right),
916            // A OR true --> true (even if A is null)
917            Expr::BinaryExpr(BinaryExpr {
918                left: _,
919                op: Or,
920                right,
921            }) if is_true(&right) => Transformed::yes(*right),
922            // A OR false --> A
923            Expr::BinaryExpr(BinaryExpr {
924                left,
925                op: Or,
926                right,
927            }) if is_false(&right) => Transformed::yes(*left),
928            // A OR !A ---> true (if A not nullable)
929            Expr::BinaryExpr(BinaryExpr {
930                left,
931                op: Or,
932                right,
933            }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
934                Transformed::yes(lit(true))
935            }
936            // !A OR A ---> true (if A not nullable)
937            Expr::BinaryExpr(BinaryExpr {
938                left,
939                op: Or,
940                right,
941            }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
942                Transformed::yes(lit(true))
943            }
944            // (..A..) OR A --> (..A..)
945            Expr::BinaryExpr(BinaryExpr {
946                left,
947                op: Or,
948                right,
949            }) if expr_contains(&left, &right, Or) => Transformed::yes(*left),
950            // A OR (..A..) --> (..A..)
951            Expr::BinaryExpr(BinaryExpr {
952                left,
953                op: Or,
954                right,
955            }) if expr_contains(&right, &left, Or) => Transformed::yes(*right),
956            // A OR (A AND B) --> A
957            Expr::BinaryExpr(BinaryExpr {
958                left,
959                op: Or,
960                right,
961            }) if is_op_with(And, &right, &left) => Transformed::yes(*left),
962            // (A AND B) OR A --> A
963            Expr::BinaryExpr(BinaryExpr {
964                left,
965                op: Or,
966                right,
967            }) if is_op_with(And, &left, &right) => Transformed::yes(*right),
968            // Eliminate common factors in conjunctions e.g
969            // (A AND B) OR (A AND C) -> A AND (B OR C)
970            Expr::BinaryExpr(BinaryExpr {
971                left,
972                op: Or,
973                right,
974            }) if has_common_conjunction(&left, &right) => {
975                let lhs: IndexSet<Expr> = iter_conjunction_owned(*left).collect();
976                let (common, rhs): (Vec<_>, Vec<_>) = iter_conjunction_owned(*right)
977                    .partition(|e| lhs.contains(e) && !e.is_volatile());
978
979                let new_rhs = rhs.into_iter().reduce(and);
980                let new_lhs = lhs.into_iter().filter(|e| !common.contains(e)).reduce(and);
981                let common_conjunction = common.into_iter().reduce(and).unwrap();
982
983                let new_expr = match (new_lhs, new_rhs) {
984                    (Some(lhs), Some(rhs)) => and(common_conjunction, or(lhs, rhs)),
985                    (_, _) => common_conjunction,
986                };
987                Transformed::yes(new_expr)
988            }
989
990            //
991            // Rules for AND
992            //
993
994            // true AND A --> A
995            Expr::BinaryExpr(BinaryExpr {
996                left,
997                op: And,
998                right,
999            }) if is_true(&left) => Transformed::yes(*right),
1000            // false AND A --> false (even if A is null)
1001            Expr::BinaryExpr(BinaryExpr {
1002                left,
1003                op: And,
1004                right: _,
1005            }) if is_false(&left) => Transformed::yes(*left),
1006            // A AND true --> A
1007            Expr::BinaryExpr(BinaryExpr {
1008                left,
1009                op: And,
1010                right,
1011            }) if is_true(&right) => Transformed::yes(*left),
1012            // A AND false --> false (even if A is null)
1013            Expr::BinaryExpr(BinaryExpr {
1014                left: _,
1015                op: And,
1016                right,
1017            }) if is_false(&right) => Transformed::yes(*right),
1018            // A AND !A ---> false (if A not nullable)
1019            Expr::BinaryExpr(BinaryExpr {
1020                left,
1021                op: And,
1022                right,
1023            }) if is_not_of(&right, &left) && !info.nullable(&left)? => {
1024                Transformed::yes(lit(false))
1025            }
1026            // !A AND A ---> false (if A not nullable)
1027            Expr::BinaryExpr(BinaryExpr {
1028                left,
1029                op: And,
1030                right,
1031            }) if is_not_of(&left, &right) && !info.nullable(&right)? => {
1032                Transformed::yes(lit(false))
1033            }
1034            // (..A..) AND A --> (..A..)
1035            Expr::BinaryExpr(BinaryExpr {
1036                left,
1037                op: And,
1038                right,
1039            }) if expr_contains(&left, &right, And) => Transformed::yes(*left),
1040            // A AND (..A..) --> (..A..)
1041            Expr::BinaryExpr(BinaryExpr {
1042                left,
1043                op: And,
1044                right,
1045            }) if expr_contains(&right, &left, And) => Transformed::yes(*right),
1046            // A AND (A OR B) --> A
1047            Expr::BinaryExpr(BinaryExpr {
1048                left,
1049                op: And,
1050                right,
1051            }) if is_op_with(Or, &right, &left) => Transformed::yes(*left),
1052            // (A OR B) AND A --> A
1053            Expr::BinaryExpr(BinaryExpr {
1054                left,
1055                op: And,
1056                right,
1057            }) if is_op_with(Or, &left, &right) => Transformed::yes(*right),
1058            // A >= constant AND constant <= A --> A = constant
1059            Expr::BinaryExpr(BinaryExpr {
1060                left,
1061                op: And,
1062                right,
1063            }) if can_reduce_to_equal_statement(&left, &right) => {
1064                if let Expr::BinaryExpr(BinaryExpr {
1065                    left: left_left,
1066                    right: left_right,
1067                    ..
1068                }) = *left
1069                {
1070                    Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1071                        left: left_left,
1072                        op: Eq,
1073                        right: left_right,
1074                    }))
1075                } else {
1076                    return internal_err!(
1077                        "can_reduce_to_equal_statement should only be called with a BinaryExpr"
1078                    );
1079                }
1080            }
1081            // A = L1 AND A != L2 --> A = L1 (when L1 != L2)
1082            Expr::BinaryExpr(BinaryExpr {
1083                left,
1084                op: And,
1085                right,
1086            }) if is_eq_and_ne_with_different_literal(&left, &right) => {
1087                Transformed::yes(*left)
1088            }
1089            // A != L2 AND A = L1 --> A = L1 (when L1 != L2)
1090            Expr::BinaryExpr(BinaryExpr {
1091                left,
1092                op: And,
1093                right,
1094            }) if is_eq_and_ne_with_different_literal(&right, &left) => {
1095                Transformed::yes(*right)
1096            }
1097
1098            //
1099            // Rules for Multiply
1100            //
1101
1102            // A * 1 --> A (with type coercion if needed)
1103            Expr::BinaryExpr(BinaryExpr {
1104                left,
1105                op: Multiply,
1106                right,
1107            }) if is_one(&right) => {
1108                simplify_right_is_one_case(info, left, &Multiply, &right)?
1109            }
1110            // 1 * A --> A
1111            Expr::BinaryExpr(BinaryExpr {
1112                left,
1113                op: Multiply,
1114                right,
1115            }) if is_one(&left) => {
1116                // 1 * A is equivalent to A * 1
1117                simplify_right_is_one_case(info, right, &Multiply, &left)?
1118            }
1119
1120            // A * 0 --> 0 (if A is not null and not floating, since NAN * 0 -> NAN)
1121            Expr::BinaryExpr(BinaryExpr {
1122                left,
1123                op: Multiply,
1124                right,
1125            }) if !info.nullable(&left)?
1126                && !info.get_data_type(&left)?.is_floating()
1127                && is_zero(&right) =>
1128            {
1129                Transformed::yes(*right)
1130            }
1131            // 0 * A --> 0 (if A is not null and not floating, since 0 * NAN -> NAN)
1132            Expr::BinaryExpr(BinaryExpr {
1133                left,
1134                op: Multiply,
1135                right,
1136            }) if !info.nullable(&right)?
1137                && !info.get_data_type(&right)?.is_floating()
1138                && is_zero(&left) =>
1139            {
1140                Transformed::yes(*left)
1141            }
1142
1143            //
1144            // Rules for Divide
1145            //
1146
1147            // A / 1 --> A
1148            Expr::BinaryExpr(BinaryExpr {
1149                left,
1150                op: Divide,
1151                right,
1152            }) if is_one(&right) => {
1153                simplify_right_is_one_case(info, left, &Divide, &right)?
1154            }
1155
1156            //
1157            // Rules for Modulo
1158            //
1159
1160            // A % 1 --> 0 (if A is not nullable and not floating, since NAN % 1 --> NAN)
1161            Expr::BinaryExpr(BinaryExpr {
1162                left,
1163                op: Modulo,
1164                right,
1165            }) if !info.nullable(&left)?
1166                && !info.get_data_type(&left)?.is_floating()
1167                && is_one(&right) =>
1168            {
1169                Transformed::yes(Expr::Literal(
1170                    ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1171                    None,
1172                ))
1173            }
1174
1175            //
1176            // Rules for BitwiseAnd
1177            //
1178
1179            // A & 0 -> 0 (if A not nullable)
1180            Expr::BinaryExpr(BinaryExpr {
1181                left,
1182                op: BitwiseAnd,
1183                right,
1184            }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*right),
1185
1186            // 0 & A -> 0 (if A not nullable)
1187            Expr::BinaryExpr(BinaryExpr {
1188                left,
1189                op: BitwiseAnd,
1190                right,
1191            }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*left),
1192
1193            // !A & A -> 0 (if A not nullable)
1194            Expr::BinaryExpr(BinaryExpr {
1195                left,
1196                op: BitwiseAnd,
1197                right,
1198            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1199                Transformed::yes(Expr::Literal(
1200                    ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1201                    None,
1202                ))
1203            }
1204
1205            // A & !A -> 0 (if A not nullable)
1206            Expr::BinaryExpr(BinaryExpr {
1207                left,
1208                op: BitwiseAnd,
1209                right,
1210            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1211                Transformed::yes(Expr::Literal(
1212                    ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1213                    None,
1214                ))
1215            }
1216
1217            // (..A..) & A --> (..A..)
1218            Expr::BinaryExpr(BinaryExpr {
1219                left,
1220                op: BitwiseAnd,
1221                right,
1222            }) if expr_contains(&left, &right, BitwiseAnd) => Transformed::yes(*left),
1223
1224            // A & (..A..) --> (..A..)
1225            Expr::BinaryExpr(BinaryExpr {
1226                left,
1227                op: BitwiseAnd,
1228                right,
1229            }) if expr_contains(&right, &left, BitwiseAnd) => Transformed::yes(*right),
1230
1231            // A & (A | B) --> A (if B not null)
1232            Expr::BinaryExpr(BinaryExpr {
1233                left,
1234                op: BitwiseAnd,
1235                right,
1236            }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => {
1237                Transformed::yes(*left)
1238            }
1239
1240            // (A | B) & A --> A (if B not null)
1241            Expr::BinaryExpr(BinaryExpr {
1242                left,
1243                op: BitwiseAnd,
1244                right,
1245            }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => {
1246                Transformed::yes(*right)
1247            }
1248
1249            //
1250            // Rules for BitwiseOr
1251            //
1252
1253            // A | 0 -> A (even if A is null)
1254            Expr::BinaryExpr(BinaryExpr {
1255                left,
1256                op: BitwiseOr,
1257                right,
1258            }) if is_zero(&right) => Transformed::yes(*left),
1259
1260            // 0 | A -> A (even if A is null)
1261            Expr::BinaryExpr(BinaryExpr {
1262                left,
1263                op: BitwiseOr,
1264                right,
1265            }) if is_zero(&left) => Transformed::yes(*right),
1266
1267            // !A | A -> -1 (if A not nullable)
1268            Expr::BinaryExpr(BinaryExpr {
1269                left,
1270                op: BitwiseOr,
1271                right,
1272            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1273                Transformed::yes(Expr::Literal(
1274                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1275                    None,
1276                ))
1277            }
1278
1279            // A | !A -> -1 (if A not nullable)
1280            Expr::BinaryExpr(BinaryExpr {
1281                left,
1282                op: BitwiseOr,
1283                right,
1284            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1285                Transformed::yes(Expr::Literal(
1286                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1287                    None,
1288                ))
1289            }
1290
1291            // (..A..) | A --> (..A..)
1292            Expr::BinaryExpr(BinaryExpr {
1293                left,
1294                op: BitwiseOr,
1295                right,
1296            }) if expr_contains(&left, &right, BitwiseOr) => Transformed::yes(*left),
1297
1298            // A | (..A..) --> (..A..)
1299            Expr::BinaryExpr(BinaryExpr {
1300                left,
1301                op: BitwiseOr,
1302                right,
1303            }) if expr_contains(&right, &left, BitwiseOr) => Transformed::yes(*right),
1304
1305            // A | (A & B) --> A (if B not null)
1306            Expr::BinaryExpr(BinaryExpr {
1307                left,
1308                op: BitwiseOr,
1309                right,
1310            }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => {
1311                Transformed::yes(*left)
1312            }
1313
1314            // (A & B) | A --> A (if B not null)
1315            Expr::BinaryExpr(BinaryExpr {
1316                left,
1317                op: BitwiseOr,
1318                right,
1319            }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => {
1320                Transformed::yes(*right)
1321            }
1322
1323            //
1324            // Rules for BitwiseXor
1325            //
1326
1327            // A ^ 0 -> A (if A not nullable)
1328            Expr::BinaryExpr(BinaryExpr {
1329                left,
1330                op: BitwiseXor,
1331                right,
1332            }) if !info.nullable(&left)? && is_zero(&right) => Transformed::yes(*left),
1333
1334            // 0 ^ A -> A (if A not nullable)
1335            Expr::BinaryExpr(BinaryExpr {
1336                left,
1337                op: BitwiseXor,
1338                right,
1339            }) if !info.nullable(&right)? && is_zero(&left) => Transformed::yes(*right),
1340
1341            // !A ^ A -> -1 (if A not nullable)
1342            Expr::BinaryExpr(BinaryExpr {
1343                left,
1344                op: BitwiseXor,
1345                right,
1346            }) if is_negative_of(&left, &right) && !info.nullable(&right)? => {
1347                Transformed::yes(Expr::Literal(
1348                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1349                    None,
1350                ))
1351            }
1352
1353            // A ^ !A -> -1 (if A not nullable)
1354            Expr::BinaryExpr(BinaryExpr {
1355                left,
1356                op: BitwiseXor,
1357                right,
1358            }) if is_negative_of(&right, &left) && !info.nullable(&left)? => {
1359                Transformed::yes(Expr::Literal(
1360                    ScalarValue::new_negative_one(&info.get_data_type(&left)?)?,
1361                    None,
1362                ))
1363            }
1364
1365            // (..A..) ^ A --> (the expression without A, if number of A is odd, otherwise one A)
1366            Expr::BinaryExpr(BinaryExpr {
1367                left,
1368                op: BitwiseXor,
1369                right,
1370            }) if expr_contains(&left, &right, BitwiseXor) => {
1371                let expr = delete_xor_in_complex_expr(&left, &right, false);
1372                Transformed::yes(if expr == *right {
1373                    Expr::Literal(
1374                        ScalarValue::new_zero(&info.get_data_type(&right)?)?,
1375                        None,
1376                    )
1377                } else {
1378                    expr
1379                })
1380            }
1381
1382            // A ^ (..A..) --> (the expression without A, if number of A is odd, otherwise one A)
1383            Expr::BinaryExpr(BinaryExpr {
1384                left,
1385                op: BitwiseXor,
1386                right,
1387            }) if expr_contains(&right, &left, BitwiseXor) => {
1388                let expr = delete_xor_in_complex_expr(&right, &left, true);
1389                Transformed::yes(if expr == *left {
1390                    Expr::Literal(
1391                        ScalarValue::new_zero(&info.get_data_type(&left)?)?,
1392                        None,
1393                    )
1394                } else {
1395                    expr
1396                })
1397            }
1398
1399            //
1400            // Rules for BitwiseShiftRight
1401            //
1402
1403            // A >> 0 -> A (even if A is null)
1404            Expr::BinaryExpr(BinaryExpr {
1405                left,
1406                op: BitwiseShiftRight,
1407                right,
1408            }) if is_zero(&right) => Transformed::yes(*left),
1409
1410            //
1411            // Rules for BitwiseShiftRight
1412            //
1413
1414            // A << 0 -> A (even if A is null)
1415            Expr::BinaryExpr(BinaryExpr {
1416                left,
1417                op: BitwiseShiftLeft,
1418                right,
1419            }) if is_zero(&right) => Transformed::yes(*left),
1420
1421            //
1422            // Rules for Not
1423            //
1424            Expr::Not(inner) => Transformed::yes(negate_clause(*inner)),
1425
1426            //
1427            // Rules for Negative
1428            //
1429            Expr::Negative(inner) => Transformed::yes(distribute_negation(*inner)),
1430
1431            //
1432            // Rules for Case
1433            //
1434
1435            // Inline a comparison to a literal with the case statement into the `THEN` clauses.
1436            // which can enable further simplifications
1437            // CASE WHEN X THEN "a" WHEN Y THEN "b" ... END = "a" --> CASE WHEN X THEN "a" = "a" WHEN Y THEN "b" = "a" END
1438            Expr::BinaryExpr(BinaryExpr {
1439                left,
1440                op: op @ (Eq | NotEq),
1441                right,
1442            }) if is_case_with_literal_outputs(&left) && is_lit(&right) => {
1443                let case = into_case(*left)?;
1444                Transformed::yes(Expr::Case(Case {
1445                    expr: None,
1446                    when_then_expr: case
1447                        .when_then_expr
1448                        .into_iter()
1449                        .map(|(when, then)| {
1450                            (
1451                                when,
1452                                Box::new(Expr::BinaryExpr(BinaryExpr {
1453                                    left: then,
1454                                    op,
1455                                    right: right.clone(),
1456                                })),
1457                            )
1458                        })
1459                        .collect(),
1460                    else_expr: case.else_expr.map(|els| {
1461                        Box::new(Expr::BinaryExpr(BinaryExpr {
1462                            left: els,
1463                            op,
1464                            right,
1465                        }))
1466                    }),
1467                }))
1468            }
1469
1470            // CASE WHEN true THEN A ... END --> A
1471            // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END
1472            // CASE WHEN false THEN A END --> NULL
1473            // CASE WHEN false THEN A ELSE B END --> B
1474            // CASE WHEN X THEN A WHEN false THEN B END --> CASE WHEN X THEN A ELSE B END
1475            Expr::Case(Case {
1476                expr: None,
1477                when_then_expr,
1478                mut else_expr,
1479            }) if when_then_expr
1480                .iter()
1481                .any(|(when, _)| is_true(when.as_ref()) || is_false(when.as_ref())) =>
1482            {
1483                let out_type = info.get_data_type(&when_then_expr[0].1)?;
1484                let mut new_when_then_expr = Vec::with_capacity(when_then_expr.len());
1485
1486                for (when, then) in when_then_expr.into_iter() {
1487                    if is_true(when.as_ref()) {
1488                        // Skip adding the rest of the when-then expressions after WHEN true
1489                        // CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X THEN A ELSE B END
1490                        else_expr = Some(then);
1491                        break;
1492                    } else if !is_false(when.as_ref()) {
1493                        new_when_then_expr.push((when, then));
1494                    }
1495                    // else: skip WHEN false cases
1496                }
1497
1498                // Exclude CASE statement altogether if there are no when-then expressions left
1499                if new_when_then_expr.is_empty() {
1500                    // CASE WHEN false THEN A ELSE B END --> B
1501                    if let Some(else_expr) = else_expr {
1502                        return Ok(Transformed::yes(*else_expr));
1503                    // CASE WHEN false THEN A END --> NULL
1504                    } else {
1505                        let null =
1506                            Expr::Literal(ScalarValue::try_new_null(&out_type)?, None);
1507                        return Ok(Transformed::yes(null));
1508                    }
1509                }
1510
1511                Transformed::yes(Expr::Case(Case {
1512                    expr: None,
1513                    when_then_expr: new_when_then_expr,
1514                    else_expr,
1515                }))
1516            }
1517
1518            // CASE
1519            //   WHEN X THEN A
1520            //   WHEN Y THEN B
1521            //   ...
1522            //   ELSE Q
1523            // END
1524            //
1525            // ---> (X AND A) OR (Y AND B AND NOT X) OR ... (NOT (X OR Y) AND Q)
1526            //
1527            // Note: the rationale for this rewrite is that the expr can then be further
1528            // simplified using the existing rules for AND/OR
1529            Expr::Case(Case {
1530                expr: None,
1531                when_then_expr,
1532                else_expr,
1533            }) if !when_then_expr.is_empty()
1534                // The rewrite is O(n²) in general so limit to small number of when-thens that can be true
1535                && (when_then_expr.len() < 3 // small number of input whens
1536                    // or all thens are literal bools and a small number of them are true
1537                    || (when_then_expr.iter().all(|(_, then)| is_bool_lit(then))
1538                        && when_then_expr.iter().filter(|(_, then)| is_true(then)).count() < 3))
1539                && info.is_boolean_type(&when_then_expr[0].1)? =>
1540            {
1541                // String disjunction of all the when predicates encountered so far. Not nullable.
1542                let mut filter_expr = lit(false);
1543                // The disjunction of all the cases
1544                let mut out_expr = lit(false);
1545
1546                for (when, then) in when_then_expr {
1547                    let when = is_exactly_true(*when, info)?;
1548                    let case_expr =
1549                        when.clone().and(filter_expr.clone().not()).and(*then);
1550
1551                    out_expr = out_expr.or(case_expr);
1552                    filter_expr = filter_expr.or(when);
1553                }
1554
1555                let else_expr = else_expr.map(|b| *b).unwrap_or_else(lit_bool_null);
1556                let case_expr = filter_expr.not().and(else_expr);
1557                out_expr = out_expr.or(case_expr);
1558
1559                // Do a first pass at simplification
1560                out_expr.rewrite(self)?
1561            }
1562            // CASE
1563            //   WHEN X THEN true
1564            //   WHEN Y THEN true
1565            //   WHEN Z THEN false
1566            //   ...
1567            //   ELSE true
1568            // END
1569            //
1570            // --->
1571            //
1572            // NOT(CASE
1573            //   WHEN X THEN false
1574            //   WHEN Y THEN false
1575            //   WHEN Z THEN true
1576            //   ...
1577            //   ELSE false
1578            // END)
1579            //
1580            // Note: the rationale for this rewrite is that the case can then be further
1581            // simplified into a small number of ANDs and ORs
1582            Expr::Case(Case {
1583                expr: None,
1584                when_then_expr,
1585                else_expr,
1586            }) if !when_then_expr.is_empty()
1587                && when_then_expr
1588                    .iter()
1589                    .all(|(_, then)| is_bool_lit(then)) // all thens are literal bools
1590                // This simplification is only helpful if we end up with a small number of true thens
1591                && when_then_expr
1592                    .iter()
1593                    .filter(|(_, then)| is_false(then))
1594                    .count()
1595                    < 3
1596                && else_expr.as_deref().is_none_or(is_bool_lit) =>
1597            {
1598                Transformed::yes(
1599                    Expr::Case(Case {
1600                        expr: None,
1601                        when_then_expr: when_then_expr
1602                            .into_iter()
1603                            .map(|(when, then)| (when, Box::new(Expr::Not(then))))
1604                            .collect(),
1605                        else_expr: else_expr
1606                            .map(|else_expr| Box::new(Expr::Not(else_expr))),
1607                    })
1608                    .not(),
1609                )
1610            }
1611            Expr::ScalarFunction(ScalarFunction { func: udf, args }) => {
1612                match udf.simplify(args, info)? {
1613                    ExprSimplifyResult::Original(args) => {
1614                        Transformed::no(Expr::ScalarFunction(ScalarFunction {
1615                            func: udf,
1616                            args,
1617                        }))
1618                    }
1619                    ExprSimplifyResult::Simplified(expr) => Transformed::yes(expr),
1620                }
1621            }
1622
1623            Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction {
1624                ref func,
1625                ..
1626            }) => match (func.simplify(), expr) {
1627                (Some(simplify_function), Expr::AggregateFunction(af)) => {
1628                    Transformed::yes(simplify_function(af, info)?)
1629                }
1630                (_, expr) => Transformed::no(expr),
1631            },
1632
1633            Expr::WindowFunction(ref window_fun) => match (window_fun.simplify(), expr) {
1634                (Some(simplify_function), Expr::WindowFunction(wf)) => {
1635                    Transformed::yes(simplify_function(*wf, info)?)
1636                }
1637                (_, expr) => Transformed::no(expr),
1638            },
1639
1640            //
1641            // Rules for Between
1642            //
1643
1644            // a between 3 and 5  -->  a >= 3 AND a <=5
1645            // a not between 3 and 5  -->  a < 3 OR a > 5
1646            Expr::Between(between) => Transformed::yes(if between.negated {
1647                let l = *between.expr.clone();
1648                let r = *between.expr;
1649                or(l.lt(*between.low), r.gt(*between.high))
1650            } else {
1651                and(
1652                    between.expr.clone().gt_eq(*between.low),
1653                    between.expr.lt_eq(*between.high),
1654                )
1655            }),
1656
1657            //
1658            // Rules for regexes
1659            //
1660            Expr::BinaryExpr(BinaryExpr {
1661                left,
1662                op: op @ (RegexMatch | RegexNotMatch | RegexIMatch | RegexNotIMatch),
1663                right,
1664            }) => simplify_regex_expr(left, op, right)?,
1665
1666            // Rules for Like
1667            Expr::Like(like) => {
1668                // `\` is implicit escape, see https://github.com/apache/datafusion/issues/13291
1669                let escape_char = like.escape_char.unwrap_or('\\');
1670
1671                match StringScalar::try_from_expr(&like.pattern) {
1672                    Some(string_scalar) => {
1673                        let pattern_str = string_scalar.as_str();
1674                        match pattern_str {
1675                            None => return Ok(Transformed::yes(lit_bool_null())),
1676                            Some("%") => {
1677                                // exp LIKE '%' is
1678                                //   - when exp is not NULL, it's true
1679                                //   - when exp is NULL, it's NULL
1680                                // exp NOT LIKE '%' is
1681                                //   - when exp is not NULL, it's false
1682                                //   - when exp is NULL, it's NULL
1683                                let result_for_non_null = lit(!like.negated);
1684                                Transformed::yes(if !info.nullable(&like.expr)? {
1685                                    result_for_non_null
1686                                } else {
1687                                    Expr::Case(Case {
1688                                        expr: Some(Box::new(Expr::IsNotNull(like.expr))),
1689                                        when_then_expr: vec![(
1690                                            Box::new(lit(true)),
1691                                            Box::new(result_for_non_null),
1692                                        )],
1693                                        else_expr: None,
1694                                    })
1695                                })
1696                            }
1697                            Some(pattern_str)
1698                                if pattern_str.contains("%%")
1699                                    && !pattern_str.contains(escape_char) =>
1700                            {
1701                                // Repeated occurrences of wildcard are redundant so remove them
1702                                // exp LIKE '%%'  --> exp LIKE '%'
1703
1704                                static LIKE_REGEX: LazyLock<Regex> =
1705                                    LazyLock::new(|| Regex::new("%%+").unwrap());
1706                                let simplified_pattern =
1707                                    LIKE_REGEX.replace_all(pattern_str, "%").to_string();
1708                                Transformed::yes(Expr::Like(Like {
1709                                    pattern: Box::new(
1710                                        string_scalar.to_expr(&simplified_pattern),
1711                                    ),
1712                                    ..like
1713                                }))
1714                            }
1715                            Some(pattern_str)
1716                                if !like.case_insensitive
1717                                    && !pattern_str
1718                                        .contains(['%', '_', escape_char].as_ref()) =>
1719                            {
1720                                // If the pattern does not contain any wildcards, we can simplify the like expression to an equality expression
1721                                // TODO: handle escape characters
1722                                Transformed::yes(Expr::BinaryExpr(BinaryExpr {
1723                                    left: like.expr.clone(),
1724                                    op: if like.negated { NotEq } else { Eq },
1725                                    right: like.pattern.clone(),
1726                                }))
1727                            }
1728
1729                            Some(_pattern_str) => Transformed::no(Expr::Like(like)),
1730                        }
1731                    }
1732                    None => Transformed::no(Expr::Like(like)),
1733                }
1734            }
1735
1736            // a is not null/unknown --> true (if a is not nullable)
1737            Expr::IsNotNull(expr) | Expr::IsNotUnknown(expr)
1738                if !info.nullable(&expr)? =>
1739            {
1740                Transformed::yes(lit(true))
1741            }
1742
1743            // a is null/unknown --> false (if a is not nullable)
1744            Expr::IsNull(expr) | Expr::IsUnknown(expr) if !info.nullable(&expr)? => {
1745                Transformed::yes(lit(false))
1746            }
1747
1748            // expr IN () --> false
1749            // expr NOT IN () --> true
1750            Expr::InList(InList {
1751                expr: _,
1752                list,
1753                negated,
1754            }) if list.is_empty() => Transformed::yes(lit(negated)),
1755
1756            // null in (x, y, z) --> null
1757            // null not in (x, y, z) --> null
1758            Expr::InList(InList {
1759                expr,
1760                list,
1761                negated: _,
1762            }) if is_null(expr.as_ref()) && !list.is_empty() => {
1763                Transformed::yes(lit_bool_null())
1764            }
1765
1766            // expr IN ((subquery)) -> expr IN (subquery), see ##5529
1767            Expr::InList(InList {
1768                expr,
1769                mut list,
1770                negated,
1771            }) if list.len() == 1
1772                && matches!(list.first(), Some(Expr::ScalarSubquery { .. })) =>
1773            {
1774                let Expr::ScalarSubquery(subquery) = list.remove(0) else {
1775                    unreachable!()
1776                };
1777
1778                Transformed::yes(Expr::InSubquery(InSubquery::new(
1779                    expr, subquery, negated,
1780                )))
1781            }
1782
1783            // Combine multiple OR expressions into a single IN list expression if possible
1784            //
1785            // i.e. `a = 1 OR a = 2 OR a = 3` -> `a IN (1, 2, 3)`
1786            Expr::BinaryExpr(BinaryExpr {
1787                left,
1788                op: Or,
1789                right,
1790            }) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
1791                let lhs = to_inlist(*left).unwrap();
1792                let rhs = to_inlist(*right).unwrap();
1793                #[allow(clippy::allow_attributes, clippy::mutable_key_type)]
1794                // Expr contains Arc with interior mutability but is intentionally used as hash key
1795                let mut seen: HashSet<Expr> = HashSet::new();
1796                let list = lhs
1797                    .list
1798                    .into_iter()
1799                    .chain(rhs.list)
1800                    .filter(|e| seen.insert(e.to_owned()))
1801                    .collect::<Vec<_>>();
1802
1803                let merged_inlist = InList {
1804                    expr: lhs.expr,
1805                    list,
1806                    negated: false,
1807                };
1808
1809                Transformed::yes(Expr::InList(merged_inlist))
1810            }
1811
1812            // Simplify expressions that is guaranteed to be true or false to a literal boolean expression
1813            //
1814            // Rules:
1815            // If both expressions are `IN` or `NOT IN`, then we can apply intersection or union on both lists
1816            //   Intersection:
1817            //     1. `a in (1,2,3) AND a in (4,5) -> a in (), which is false`
1818            //     2. `a in (1,2,3) AND a in (2,3,4) -> a in (2,3)`
1819            //     3. `a not in (1,2,3) OR a not in (3,4,5,6) -> a not in (3)`
1820            //   Union:
1821            //     4. `a not int (1,2,3) AND a not in (4,5,6) -> a not in (1,2,3,4,5,6)`
1822            //     # This rule is handled by `or_in_list_simplifier.rs`
1823            //     5. `a in (1,2,3) OR a in (4,5,6) -> a in (1,2,3,4,5,6)`
1824            // If one of the expressions is `IN` and another one is `NOT IN`, then we apply exception on `In` expression
1825            //     6. `a in (1,2,3,4) AND a not in (1,2,3,4,5) -> a in (), which is false`
1826            //     7. `a not in (1,2,3,4) AND a in (1,2,3,4,5) -> a = 5`
1827            //     8. `a in (1,2,3,4) AND a not in (5,6,7,8) -> a in (1,2,3,4)`
1828            Expr::BinaryExpr(BinaryExpr {
1829                left,
1830                op: And,
1831                right,
1832            }) if are_inlist_and_eq_and_match_neg(
1833                left.as_ref(),
1834                right.as_ref(),
1835                false,
1836                false,
1837            ) =>
1838            {
1839                match (*left, *right) {
1840                    (Expr::InList(l1), Expr::InList(l2)) => {
1841                        return inlist_intersection(l1, &l2, false).map(Transformed::yes);
1842                    }
1843                    // Matched previously once
1844                    _ => unreachable!(),
1845                }
1846            }
1847
1848            Expr::BinaryExpr(BinaryExpr {
1849                left,
1850                op: And,
1851                right,
1852            }) if are_inlist_and_eq_and_match_neg(
1853                left.as_ref(),
1854                right.as_ref(),
1855                true,
1856                true,
1857            ) =>
1858            {
1859                match (*left, *right) {
1860                    (Expr::InList(l1), Expr::InList(l2)) => {
1861                        return inlist_union(l1, l2, true).map(Transformed::yes);
1862                    }
1863                    // Matched previously once
1864                    _ => unreachable!(),
1865                }
1866            }
1867
1868            Expr::BinaryExpr(BinaryExpr {
1869                left,
1870                op: And,
1871                right,
1872            }) if are_inlist_and_eq_and_match_neg(
1873                left.as_ref(),
1874                right.as_ref(),
1875                false,
1876                true,
1877            ) =>
1878            {
1879                match (*left, *right) {
1880                    (Expr::InList(l1), Expr::InList(l2)) => {
1881                        return inlist_except(l1, &l2).map(Transformed::yes);
1882                    }
1883                    // Matched previously once
1884                    _ => unreachable!(),
1885                }
1886            }
1887
1888            Expr::BinaryExpr(BinaryExpr {
1889                left,
1890                op: And,
1891                right,
1892            }) if are_inlist_and_eq_and_match_neg(
1893                left.as_ref(),
1894                right.as_ref(),
1895                true,
1896                false,
1897            ) =>
1898            {
1899                match (*left, *right) {
1900                    (Expr::InList(l1), Expr::InList(l2)) => {
1901                        return inlist_except(l2, &l1).map(Transformed::yes);
1902                    }
1903                    // Matched previously once
1904                    _ => unreachable!(),
1905                }
1906            }
1907
1908            Expr::BinaryExpr(BinaryExpr {
1909                left,
1910                op: Or,
1911                right,
1912            }) if are_inlist_and_eq_and_match_neg(
1913                left.as_ref(),
1914                right.as_ref(),
1915                true,
1916                true,
1917            ) =>
1918            {
1919                match (*left, *right) {
1920                    (Expr::InList(l1), Expr::InList(l2)) => {
1921                        return inlist_intersection(l1, &l2, true).map(Transformed::yes);
1922                    }
1923                    // Matched previously once
1924                    _ => unreachable!(),
1925                }
1926            }
1927
1928            // =======================================
1929            // unwrap_cast_in_comparison
1930            // =======================================
1931            //
1932            // For case:
1933            // try_cast/cast(expr as data_type) op literal
1934            Expr::BinaryExpr(BinaryExpr { left, op, right })
1935                if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1936                    info, &left, op, &right,
1937                ) && op.supports_propagation() =>
1938            {
1939                unwrap_cast_in_comparison_for_binary(info, *left, *right, op)?
1940            }
1941            // literal op try_cast/cast(expr as data_type)
1942            // -->
1943            // try_cast/cast(expr as data_type) op_swap literal
1944            Expr::BinaryExpr(BinaryExpr { left, op, right })
1945                if is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary(
1946                    info, &right, op, &left,
1947                ) && op.supports_propagation()
1948                    && op.swap().is_some() =>
1949            {
1950                unwrap_cast_in_comparison_for_binary(
1951                    info,
1952                    *right,
1953                    *left,
1954                    op.swap().unwrap(),
1955                )?
1956            }
1957            // For case:
1958            // try_cast/cast(expr as left_type) in (expr1,expr2,expr3)
1959            Expr::InList(InList {
1960                expr: mut left,
1961                list,
1962                negated,
1963            }) if is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist(
1964                info, &left, &list,
1965            ) =>
1966            {
1967                let (Expr::TryCast(TryCast {
1968                    expr: left_expr, ..
1969                })
1970                | Expr::Cast(Cast {
1971                    expr: left_expr, ..
1972                })) = left.as_mut()
1973                else {
1974                    return internal_err!("Expect cast expr, but got {:?}", left)?;
1975                };
1976
1977                let expr_type = info.get_data_type(left_expr)?;
1978                let right_exprs = list
1979                    .into_iter()
1980                    .map(|right| {
1981                        match right {
1982                            Expr::Literal(right_lit_value, _) => {
1983                                // if the right_lit_value can be casted to the type of internal_left_expr
1984                                // we need to unwrap the cast for cast/try_cast expr, and add cast to the literal
1985                                let Some(value) = try_cast_literal_to_type(&right_lit_value, &expr_type) else {
1986                                    internal_err!(
1987                                        "Can't cast the list expr {:?} to type {}",
1988                                        right_lit_value, &expr_type
1989                                    )?
1990                                };
1991                                Ok(lit(value))
1992                            }
1993                            other_expr => internal_err!(
1994                                "Only support literal expr to optimize, but the expr is {:?}",
1995                                &other_expr
1996                            ),
1997                        }
1998                    })
1999                    .collect::<Result<Vec<_>>>()?;
2000
2001                Transformed::yes(Expr::InList(InList {
2002                    expr: std::mem::take(left_expr),
2003                    list: right_exprs,
2004                    negated,
2005                }))
2006            }
2007
2008            // =======================================
2009            // preimage_in_comparison
2010            // =======================================
2011            //
2012            // For case:
2013            // date_part('YEAR', expr) op literal
2014            //
2015            // For details see datafusion_expr::ScalarUDFImpl::preimage
2016            Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
2017                use datafusion_expr::Operator::*;
2018                let is_preimage_op = matches!(
2019                    op,
2020                    Eq | NotEq
2021                        | Lt
2022                        | LtEq
2023                        | Gt
2024                        | GtEq
2025                        | IsDistinctFrom
2026                        | IsNotDistinctFrom
2027                );
2028                if !is_preimage_op || is_null(&right) {
2029                    return Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
2030                        left,
2031                        op,
2032                        right,
2033                    })));
2034                }
2035
2036                if let PreimageResult::Range { interval, expr } =
2037                    get_preimage(left.as_ref(), right.as_ref(), info)?
2038                {
2039                    rewrite_with_preimage(*interval, op, expr)?
2040                } else if let Some(swapped) = op.swap() {
2041                    if let PreimageResult::Range { interval, expr } =
2042                        get_preimage(right.as_ref(), left.as_ref(), info)?
2043                    {
2044                        rewrite_with_preimage(*interval, swapped, expr)?
2045                    } else {
2046                        Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, right }))
2047                    }
2048                } else {
2049                    Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, right }))
2050                }
2051            }
2052            // For case:
2053            // date_part('YEAR', expr) IN (literal1, literal2, ...)
2054            Expr::InList(InList {
2055                expr,
2056                list,
2057                negated,
2058            }) => {
2059                if list.len() > THRESHOLD_INLINE_INLIST || list.iter().any(is_null) {
2060                    return Ok(Transformed::no(Expr::InList(InList {
2061                        expr,
2062                        list,
2063                        negated,
2064                    })));
2065                }
2066
2067                let (op, combiner): (Operator, fn(Expr, Expr) -> Expr) =
2068                    if negated { (NotEq, and) } else { (Eq, or) };
2069
2070                let mut rewritten: Option<Expr> = None;
2071                for item in &list {
2072                    let PreimageResult::Range { interval, expr } =
2073                        get_preimage(expr.as_ref(), item, info)?
2074                    else {
2075                        return Ok(Transformed::no(Expr::InList(InList {
2076                            expr,
2077                            list,
2078                            negated,
2079                        })));
2080                    };
2081
2082                    let range_expr = rewrite_with_preimage(*interval, op, expr)?.data;
2083                    rewritten = Some(match rewritten {
2084                        None => range_expr,
2085                        Some(acc) => combiner(acc, range_expr),
2086                    });
2087                }
2088
2089                if let Some(rewritten) = rewritten {
2090                    Transformed::yes(rewritten)
2091                } else {
2092                    Transformed::no(Expr::InList(InList {
2093                        expr,
2094                        list,
2095                        negated,
2096                    }))
2097                }
2098            }
2099
2100            // no additional rewrites possible
2101            expr => Transformed::no(expr),
2102        })
2103    }
2104}
2105
2106fn get_preimage(
2107    left_expr: &Expr,
2108    right_expr: &Expr,
2109    info: &SimplifyContext,
2110) -> Result<PreimageResult> {
2111    let Expr::ScalarFunction(ScalarFunction { func, args }) = left_expr else {
2112        return Ok(PreimageResult::None);
2113    };
2114    if !is_literal_or_literal_cast(right_expr) {
2115        return Ok(PreimageResult::None);
2116    }
2117    if func.signature().volatility != Volatility::Immutable {
2118        return Ok(PreimageResult::None);
2119    }
2120    func.preimage(args, right_expr, info)
2121}
2122
2123fn is_literal_or_literal_cast(expr: &Expr) -> bool {
2124    match expr {
2125        Expr::Literal(_, _) => true,
2126        Expr::Cast(Cast { expr, .. }) => matches!(expr.as_ref(), Expr::Literal(_, _)),
2127        Expr::TryCast(TryCast { expr, .. }) => {
2128            matches!(expr.as_ref(), Expr::Literal(_, _))
2129        }
2130        _ => false,
2131    }
2132}
2133
2134/// Helper for working with string scalar values (Utf8, LargeUtf8, Utf8View)
2135pub(crate) enum StringScalar<'a> {
2136    Utf8(&'a ScalarValue),
2137    LargeUtf8(&'a ScalarValue),
2138    Utf8View(&'a ScalarValue),
2139}
2140
2141impl<'a> StringScalar<'a> {
2142    /// Create a `StringScalar` view from an `Expr` if it is a supported string literal.
2143    /// Returns `None` if the expression is not a string literal.
2144    pub(crate) fn try_from_expr(expr: &'a Expr) -> Option<Self> {
2145        match expr {
2146            Expr::Literal(scalar, _) => Self::try_from_scalar(scalar),
2147            _ => None,
2148        }
2149    }
2150
2151    /// Create a `StringScalar` view from a `ScalarValue` if it is a supported string type.
2152    /// Returns `None` if the scalar value is not a supported string type.
2153    fn try_from_scalar(scalar: &'a ScalarValue) -> Option<Self> {
2154        match scalar {
2155            ScalarValue::Utf8(_) => Some(Self::Utf8(scalar)),
2156            ScalarValue::LargeUtf8(_) => Some(Self::LargeUtf8(scalar)),
2157            ScalarValue::Utf8View(_) => Some(Self::Utf8View(scalar)),
2158            _ => None,
2159        }
2160    }
2161
2162    /// Returns the underlying string slice.
2163    pub(crate) fn as_str(&self) -> Option<&'a str> {
2164        match self {
2165            Self::Utf8(scalar) | Self::LargeUtf8(scalar) | Self::Utf8View(scalar) => {
2166                scalar.try_as_str().flatten()
2167            }
2168        }
2169    }
2170
2171    /// Build a new `Expr` of the same string type with the given value.
2172    pub(crate) fn to_expr(&self, val: &str) -> Expr {
2173        match self {
2174            Self::Utf8(_) => Expr::Literal(ScalarValue::Utf8(Some(val.to_owned())), None),
2175            Self::LargeUtf8(_) => {
2176                Expr::Literal(ScalarValue::LargeUtf8(Some(val.to_owned())), None)
2177            }
2178            Self::Utf8View(_) => {
2179                Expr::Literal(ScalarValue::Utf8View(Some(val.to_owned())), None)
2180            }
2181        }
2182    }
2183}
2184
2185#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
2186fn has_common_conjunction(lhs: &Expr, rhs: &Expr) -> bool {
2187    let lhs_set: HashSet<&Expr> = iter_conjunction(lhs).collect();
2188    iter_conjunction(rhs).any(|e| lhs_set.contains(&e) && !e.is_volatile())
2189}
2190
2191// TODO: We might not need this after defer pattern for Box is stabilized. https://github.com/rust-lang/rust/issues/87121
2192fn are_inlist_and_eq_and_match_neg(
2193    left: &Expr,
2194    right: &Expr,
2195    is_left_neg: bool,
2196    is_right_neg: bool,
2197) -> bool {
2198    match (left, right) {
2199        (Expr::InList(l), Expr::InList(r)) => {
2200            l.expr == r.expr && l.negated == is_left_neg && r.negated == is_right_neg
2201        }
2202        _ => false,
2203    }
2204}
2205
2206// TODO: We might not need this after defer pattern for Box is stabilized. https://github.com/rust-lang/rust/issues/87121
2207fn are_inlist_and_eq(left: &Expr, right: &Expr) -> bool {
2208    let left = as_inlist(left);
2209    let right = as_inlist(right);
2210    if let (Some(lhs), Some(rhs)) = (left, right) {
2211        matches!(lhs.expr.as_ref(), Expr::Column(_))
2212            && matches!(rhs.expr.as_ref(), Expr::Column(_))
2213            && lhs.expr == rhs.expr
2214            && !lhs.negated
2215            && !rhs.negated
2216    } else {
2217        false
2218    }
2219}
2220
2221/// Try to convert an expression to an in-list expression
2222fn as_inlist(expr: &'_ Expr) -> Option<Cow<'_, InList>> {
2223    match expr {
2224        Expr::InList(inlist) => Some(Cow::Borrowed(inlist)),
2225        Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == Operator::Eq => {
2226            match (left.as_ref(), right.as_ref()) {
2227                (Expr::Column(_), Expr::Literal(_, _)) => Some(Cow::Owned(InList {
2228                    expr: left.clone(),
2229                    list: vec![*right.clone()],
2230                    negated: false,
2231                })),
2232                (Expr::Literal(_, _), Expr::Column(_)) => Some(Cow::Owned(InList {
2233                    expr: right.clone(),
2234                    list: vec![*left.clone()],
2235                    negated: false,
2236                })),
2237                _ => None,
2238            }
2239        }
2240        _ => None,
2241    }
2242}
2243
2244fn to_inlist(expr: Expr) -> Option<InList> {
2245    match expr {
2246        Expr::InList(inlist) => Some(inlist),
2247        Expr::BinaryExpr(BinaryExpr {
2248            left,
2249            op: Operator::Eq,
2250            right,
2251        }) => match (left.as_ref(), right.as_ref()) {
2252            (Expr::Column(_), Expr::Literal(_, _)) => Some(InList {
2253                expr: left,
2254                list: vec![*right],
2255                negated: false,
2256            }),
2257            (Expr::Literal(_, _), Expr::Column(_)) => Some(InList {
2258                expr: right,
2259                list: vec![*left],
2260                negated: false,
2261            }),
2262            _ => None,
2263        },
2264        _ => None,
2265    }
2266}
2267
2268/// Return the union of two inlist expressions
2269/// maintaining the order of the elements in the two lists
2270#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
2271fn inlist_union(mut l1: InList, l2: InList, negated: bool) -> Result<Expr> {
2272    // extend the list in l1 with the elements in l2 that are not already in l1
2273    let l1_items: HashSet<_> = l1.list.iter().collect();
2274
2275    // keep all l2 items that do not also appear in l1
2276    let keep_l2: Vec<_> = l2
2277        .list
2278        .into_iter()
2279        .filter_map(|e| if l1_items.contains(&e) { None } else { Some(e) })
2280        .collect();
2281
2282    l1.list.extend(keep_l2);
2283    l1.negated = negated;
2284    Ok(Expr::InList(l1))
2285}
2286
2287/// Return the intersection of two inlist expressions
2288/// maintaining the order of the elements in the two lists
2289#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
2290fn inlist_intersection(mut l1: InList, l2: &InList, negated: bool) -> Result<Expr> {
2291    let l2_items = l2.list.iter().collect::<HashSet<_>>();
2292
2293    // remove all items from l1 that are not in l2
2294    l1.list.retain(|e| l2_items.contains(e));
2295
2296    // e in () is always false
2297    // e not in () is always true
2298    if l1.list.is_empty() {
2299        return Ok(lit(negated));
2300    }
2301    Ok(Expr::InList(l1))
2302}
2303
2304/// Return the all items in l1 that are not in l2
2305/// maintaining the order of the elements in the two lists
2306#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
2307fn inlist_except(mut l1: InList, l2: &InList) -> Result<Expr> {
2308    let l2_items = l2.list.iter().collect::<HashSet<_>>();
2309
2310    // keep only items from l1 that are not in l2
2311    l1.list.retain(|e| !l2_items.contains(e));
2312
2313    if l1.list.is_empty() {
2314        return Ok(lit(false));
2315    }
2316    Ok(Expr::InList(l1))
2317}
2318
2319/// Returns expression testing a boolean `expr` for being exactly `true` (not `false` or NULL).
2320fn is_exactly_true(expr: Expr, info: &SimplifyContext) -> Result<Expr> {
2321    if !info.nullable(&expr)? {
2322        Ok(expr)
2323    } else {
2324        Ok(Expr::BinaryExpr(BinaryExpr {
2325            left: Box::new(expr),
2326            op: Operator::IsNotDistinctFrom,
2327            right: Box::new(lit(true)),
2328        }))
2329    }
2330}
2331
2332// A * 1 -> A
2333// A / 1 -> A
2334//
2335// Move this function body out of the large match branch avoid stack overflow
2336fn simplify_right_is_one_case(
2337    info: &SimplifyContext,
2338    left: Box<Expr>,
2339    op: &Operator,
2340    right: &Expr,
2341) -> Result<Transformed<Expr>> {
2342    // Check if resulting type would be different due to coercion
2343    let left_type = info.get_data_type(&left)?;
2344    let right_type = info.get_data_type(right)?;
2345    match BinaryTypeCoercer::new(&left_type, op, &right_type).get_result_type() {
2346        Ok(result_type) => {
2347            // Only cast if the types differ
2348            if left_type != result_type {
2349                Ok(Transformed::yes(Expr::Cast(Cast::new(left, result_type))))
2350            } else {
2351                Ok(Transformed::yes(*left))
2352            }
2353        }
2354        Err(_) => Ok(Transformed::yes(*left)),
2355    }
2356}
2357
2358#[cfg(test)]
2359mod tests {
2360    use super::*;
2361    use crate::test::test_table_scan_with_name;
2362    use arrow::{
2363        array::{Int32Array, StructArray},
2364        datatypes::{FieldRef, Fields},
2365    };
2366    use datafusion_common::{DFSchemaRef, ToDFSchema, assert_contains};
2367    use datafusion_expr::{
2368        expr::WindowFunction,
2369        function::{
2370            AccumulatorArgs, AggregateFunctionSimplification,
2371            WindowFunctionSimplification,
2372        },
2373        interval_arithmetic::Interval,
2374        *,
2375    };
2376    use datafusion_functions_window_common::field::WindowUDFFieldArgs;
2377    use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
2378    use datafusion_physical_expr::PhysicalExpr;
2379    use std::hash::Hash;
2380    use std::sync::LazyLock;
2381    use std::{
2382        collections::HashMap,
2383        ops::{BitAnd, BitOr, BitXor},
2384        sync::Arc,
2385    };
2386
2387    // ------------------------------
2388    // --- ExprSimplifier tests -----
2389    // ------------------------------
2390    #[test]
2391    fn api_basic() {
2392        let simplifier = ExprSimplifier::new(
2393            SimplifyContext::builder()
2394                .with_schema(test_schema())
2395                .build(),
2396        );
2397
2398        let expr = lit(1) + lit(2);
2399        let expected = lit(3);
2400        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2401    }
2402
2403    #[test]
2404    fn basic_coercion() {
2405        let schema = test_schema();
2406        let simplifier = ExprSimplifier::new(
2407            SimplifyContext::builder()
2408                .with_schema(Arc::clone(&schema))
2409                .build(),
2410        );
2411
2412        // Note expr type is int32 (not int64)
2413        // (1i64 + 2i32) < i
2414        let expr = (lit(1i64) + lit(2i32)).lt(col("i"));
2415        // should fully simplify to 3 < i (though i has been coerced to i64)
2416        let expected = lit(3i64).lt(col("i"));
2417
2418        let expr = simplifier.coerce(expr, &schema).unwrap();
2419
2420        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2421    }
2422
2423    fn test_schema() -> DFSchemaRef {
2424        static TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
2425            Schema::new(vec![
2426                Field::new("i", DataType::Int64, false),
2427                Field::new("b", DataType::Boolean, true),
2428            ])
2429            .to_dfschema_ref()
2430            .unwrap()
2431        });
2432        Arc::clone(&TEST_SCHEMA)
2433    }
2434
2435    #[test]
2436    fn simplify_and_constant_prop() {
2437        let simplifier = ExprSimplifier::new(
2438            SimplifyContext::builder()
2439                .with_schema(test_schema())
2440                .build(),
2441        );
2442
2443        // should be able to simplify to false
2444        // (i * (1 - 2)) > 0
2445        let expr = (col("i") * (lit(1) - lit(1))).gt(lit(0));
2446        let expected = lit(false);
2447        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2448    }
2449
2450    #[test]
2451    fn simplify_and_constant_prop_with_case() {
2452        let simplifier = ExprSimplifier::new(
2453            SimplifyContext::builder()
2454                .with_schema(test_schema())
2455                .build(),
2456        );
2457
2458        //   CASE
2459        //     WHEN i>5 AND false THEN i > 5
2460        //     WHEN i<5 AND true THEN i < 5
2461        //     ELSE false
2462        //   END
2463        //
2464        // Can be simplified to `i < 5`
2465        let expr = when(col("i").gt(lit(5)).and(lit(false)), col("i").gt(lit(5)))
2466            .when(col("i").lt(lit(5)).and(lit(true)), col("i").lt(lit(5)))
2467            .otherwise(lit(false))
2468            .unwrap();
2469        let expected = col("i").lt(lit(5));
2470        assert_eq!(expected, simplifier.simplify(expr).unwrap());
2471    }
2472
2473    // ------------------------------
2474    // --- Simplifier tests -----
2475    // ------------------------------
2476
2477    #[test]
2478    fn test_simplify_canonicalize() {
2479        {
2480            let expr = lit(1).lt(col("c2")).and(col("c2").gt(lit(1)));
2481            let expected = col("c2").gt(lit(1));
2482            assert_eq!(simplify(expr), expected);
2483        }
2484        {
2485            let expr = col("c1").lt(col("c2")).and(col("c2").gt(col("c1")));
2486            let expected = col("c2").gt(col("c1"));
2487            assert_eq!(simplify(expr), expected);
2488        }
2489        {
2490            let expr = col("c1")
2491                .eq(lit(1))
2492                .and(lit(1).eq(col("c1")))
2493                .and(col("c1").eq(lit(3)));
2494            let expected = col("c1").eq(lit(1)).and(col("c1").eq(lit(3)));
2495            assert_eq!(simplify(expr), expected);
2496        }
2497        {
2498            let expr = col("c1")
2499                .eq(col("c2"))
2500                .and(col("c1").gt(lit(5)))
2501                .and(col("c2").eq(col("c1")));
2502            let expected = col("c2").eq(col("c1")).and(col("c1").gt(lit(5)));
2503            assert_eq!(simplify(expr), expected);
2504        }
2505        {
2506            let expr = col("c1")
2507                .eq(lit(1))
2508                .and(col("c2").gt(lit(3)).or(lit(3).lt(col("c2"))));
2509            let expected = col("c1").eq(lit(1)).and(col("c2").gt(lit(3)));
2510            assert_eq!(simplify(expr), expected);
2511        }
2512        {
2513            let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2514            let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2515            assert_eq!(simplify(expr), expected);
2516        }
2517        {
2518            let expr = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2519            let expected = col("c1").lt(lit(5)).and(col("c1").gt_eq(lit(5)));
2520            assert_eq!(simplify(expr), expected);
2521        }
2522        {
2523            let expr = col("c1").gt(col("c2")).and(col("c1").gt(col("c2")));
2524            let expected = col("c2").lt(col("c1"));
2525            assert_eq!(simplify(expr), expected);
2526        }
2527    }
2528
2529    #[test]
2530    fn test_simplify_eq_not_self() {
2531        // `expr_a`: column `c2` is nullable, so `c2 = c2` simplifies to `c2 IS NOT NULL OR NULL`
2532        // This ensures the expression is only true when `c2` is not NULL, accounting for SQL's NULL semantics.
2533        let expr_a = col("c2").eq(col("c2"));
2534        let expected_a = col("c2").is_not_null().or(lit_bool_null());
2535
2536        // `expr_b`: column `c2_non_null` is explicitly non-nullable, so `c2_non_null = c2_non_null` is always true
2537        let expr_b = col("c2_non_null").eq(col("c2_non_null"));
2538        let expected_b = lit(true);
2539
2540        assert_eq!(simplify(expr_a), expected_a);
2541        assert_eq!(simplify(expr_b), expected_b);
2542    }
2543
2544    #[test]
2545    fn test_simplify_or_true() {
2546        let expr_a = col("c2").or(lit(true));
2547        let expr_b = lit(true).or(col("c2"));
2548        let expected = lit(true);
2549
2550        assert_eq!(simplify(expr_a), expected);
2551        assert_eq!(simplify(expr_b), expected);
2552    }
2553
2554    #[test]
2555    fn test_simplify_or_false() {
2556        let expr_a = lit(false).or(col("c2"));
2557        let expr_b = col("c2").or(lit(false));
2558        let expected = col("c2");
2559
2560        assert_eq!(simplify(expr_a), expected);
2561        assert_eq!(simplify(expr_b), expected);
2562    }
2563
2564    #[test]
2565    fn test_simplify_or_same() {
2566        let expr = col("c2").or(col("c2"));
2567        let expected = col("c2");
2568
2569        assert_eq!(simplify(expr), expected);
2570    }
2571
2572    #[test]
2573    fn test_simplify_or_not_self() {
2574        // A OR !A if A is not nullable --> true
2575        // !A OR A if A is not nullable --> true
2576        let expr_a = col("c2_non_null").or(col("c2_non_null").not());
2577        let expr_b = col("c2_non_null").not().or(col("c2_non_null"));
2578        let expected = lit(true);
2579
2580        assert_eq!(simplify(expr_a), expected);
2581        assert_eq!(simplify(expr_b), expected);
2582    }
2583
2584    #[test]
2585    fn test_simplify_and_false() {
2586        let expr_a = lit(false).and(col("c2"));
2587        let expr_b = col("c2").and(lit(false));
2588        let expected = lit(false);
2589
2590        assert_eq!(simplify(expr_a), expected);
2591        assert_eq!(simplify(expr_b), expected);
2592    }
2593
2594    #[test]
2595    fn test_simplify_and_same() {
2596        let expr = col("c2").and(col("c2"));
2597        let expected = col("c2");
2598
2599        assert_eq!(simplify(expr), expected);
2600    }
2601
2602    #[test]
2603    fn test_simplify_and_true() {
2604        let expr_a = lit(true).and(col("c2"));
2605        let expr_b = col("c2").and(lit(true));
2606        let expected = col("c2");
2607
2608        assert_eq!(simplify(expr_a), expected);
2609        assert_eq!(simplify(expr_b), expected);
2610    }
2611
2612    #[test]
2613    fn test_simplify_and_not_self() {
2614        // A AND !A if A is not nullable --> false
2615        // !A AND A if A is not nullable --> false
2616        let expr_a = col("c2_non_null").and(col("c2_non_null").not());
2617        let expr_b = col("c2_non_null").not().and(col("c2_non_null"));
2618        let expected = lit(false);
2619
2620        assert_eq!(simplify(expr_a), expected);
2621        assert_eq!(simplify(expr_b), expected);
2622    }
2623
2624    #[test]
2625    fn test_simplify_eq_and_neq_with_different_literals() {
2626        // A = 1 AND A != 0 --> A = 1 (when 1 != 0)
2627        let expr = col("c2").eq(lit(1)).and(col("c2").not_eq(lit(0)));
2628        let expected = col("c2").eq(lit(1));
2629        assert_eq!(simplify(expr), expected);
2630
2631        // A != 0 AND A = 1 --> A = 1 (when 1 != 0)
2632        let expr = col("c2").not_eq(lit(0)).and(col("c2").eq(lit(1)));
2633        let expected = col("c2").eq(lit(1));
2634        assert_eq!(simplify(expr), expected);
2635
2636        // Should NOT simplify when literals are the same (A = 1 AND A != 1)
2637        // This is a contradiction but handled by other rules
2638        let expr = col("c2").eq(lit(1)).and(col("c2").not_eq(lit(1)));
2639        // Should not be simplified by this rule (left unchanged or handled elsewhere)
2640        let result = simplify(expr.clone());
2641        // The expression should not have been simplified
2642        assert_eq!(result, expr);
2643    }
2644
2645    #[test]
2646    fn test_simplify_multiply_by_one() {
2647        let expr_a = col("c2") * lit(1);
2648        let expr_b = lit(1) * col("c2");
2649        let expected = col("c2");
2650
2651        assert_eq!(simplify(expr_a), expected);
2652        assert_eq!(simplify(expr_b), expected);
2653
2654        let expr = col("c2") * lit(ScalarValue::Decimal128(Some(10000000000), 38, 10));
2655        assert_eq!(simplify(expr), expected);
2656
2657        let expr = lit(ScalarValue::Decimal128(Some(10000000000), 31, 10)) * col("c2");
2658        assert_eq!(simplify(expr), expected);
2659    }
2660
2661    #[test]
2662    fn test_simplify_multiply_by_null() {
2663        let null = lit(ScalarValue::Int64(None));
2664        // A * null --> null
2665        {
2666            let expr = col("c3") * null.clone();
2667            assert_eq!(simplify(expr), null);
2668        }
2669        // null * A --> null
2670        {
2671            let expr = null.clone() * col("c3");
2672            assert_eq!(simplify(expr), null);
2673        }
2674    }
2675
2676    #[test]
2677    fn test_simplify_multiply_by_zero() {
2678        // cannot optimize A * null (null * A) if A is nullable
2679        {
2680            let expr_a = col("c2") * lit(0);
2681            let expr_b = lit(0) * col("c2");
2682
2683            assert_eq!(simplify(expr_a.clone()), expr_a);
2684            assert_eq!(simplify(expr_b.clone()), expr_b);
2685        }
2686        // 0 * A --> 0 if A is not nullable
2687        {
2688            let expr = lit(0) * col("c2_non_null");
2689            assert_eq!(simplify(expr), lit(0));
2690        }
2691        // A * 0 --> 0 if A is not nullable
2692        {
2693            let expr = col("c2_non_null") * lit(0);
2694            assert_eq!(simplify(expr), lit(0));
2695        }
2696        // A * Decimal128(0) --> 0 if A is not nullable
2697        {
2698            let expr = col("c2_non_null") * lit(ScalarValue::Decimal128(Some(0), 31, 10));
2699            assert_eq!(
2700                simplify(expr),
2701                lit(ScalarValue::Decimal128(Some(0), 31, 10))
2702            );
2703            let expr = binary_expr(
2704                lit(ScalarValue::Decimal128(Some(0), 31, 10)),
2705                Operator::Multiply,
2706                col("c2_non_null"),
2707            );
2708            assert_eq!(
2709                simplify(expr),
2710                lit(ScalarValue::Decimal128(Some(0), 31, 10))
2711            );
2712        }
2713    }
2714
2715    #[test]
2716    fn test_simplify_divide_by_one() {
2717        let expr = binary_expr(col("c2"), Operator::Divide, lit(1));
2718        let expected = col("c2");
2719        assert_eq!(simplify(expr), expected);
2720        let expr = col("c2") / lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2721        assert_eq!(simplify(expr), expected);
2722    }
2723
2724    #[test]
2725    fn test_simplify_divide_null() {
2726        // A / null --> null
2727        let null = lit(ScalarValue::Int64(None));
2728        {
2729            let expr = col("c3") / null.clone();
2730            assert_eq!(simplify(expr), null);
2731        }
2732        // null / A --> null
2733        {
2734            let expr = null.clone() / col("c3");
2735            assert_eq!(simplify(expr), null);
2736        }
2737    }
2738
2739    #[test]
2740    fn test_simplify_divide_by_same() {
2741        let expr = col("c2") / col("c2");
2742        // if c2 is null, c2 / c2 = null, so can't simplify
2743        let expected = expr.clone();
2744
2745        assert_eq!(simplify(expr), expected);
2746    }
2747
2748    #[test]
2749    fn test_simplify_modulo_by_null() {
2750        let null = lit(ScalarValue::Int64(None));
2751        // A % null --> null
2752        {
2753            let expr = col("c3") % null.clone();
2754            assert_eq!(simplify(expr), null);
2755        }
2756        // null % A --> null
2757        {
2758            let expr = null.clone() % col("c3");
2759            assert_eq!(simplify(expr), null);
2760        }
2761    }
2762
2763    #[test]
2764    fn test_simplify_modulo_by_one() {
2765        let expr = col("c2") % lit(1);
2766        // if c2 is null, c2 % 1 = null, so can't simplify
2767        let expected = expr.clone();
2768
2769        assert_eq!(simplify(expr), expected);
2770    }
2771
2772    #[test]
2773    fn test_simplify_divide_zero_by_zero() {
2774        // because divide by 0 maybe occur in short-circuit expression
2775        // so we should not simplify this, and throw error in runtime
2776        let expr = lit(0) / lit(0);
2777        let expected = expr.clone();
2778
2779        assert_eq!(simplify(expr), expected);
2780    }
2781
2782    #[test]
2783    fn test_simplify_divide_by_zero() {
2784        // because divide by 0 maybe occur in short-circuit expression
2785        // so we should not simplify this, and throw error in runtime
2786        let expr = col("c2_non_null") / lit(0);
2787        let expected = expr.clone();
2788
2789        assert_eq!(simplify(expr), expected);
2790    }
2791
2792    #[test]
2793    fn test_simplify_modulo_by_one_non_null() {
2794        let expr = col("c3_non_null") % lit(1);
2795        let expected = lit(0_i64);
2796        assert_eq!(simplify(expr), expected);
2797        let expr =
2798            col("c3_non_null") % lit(ScalarValue::Decimal128(Some(10000000000), 31, 10));
2799        assert_eq!(simplify(expr), expected);
2800    }
2801
2802    #[test]
2803    fn test_simplify_bitwise_xor_by_null() {
2804        let null = lit(ScalarValue::Int64(None));
2805        // A ^ null --> null
2806        {
2807            let expr = col("c3") ^ null.clone();
2808            assert_eq!(simplify(expr), null);
2809        }
2810        // null ^ A --> null
2811        {
2812            let expr = null.clone() ^ col("c3");
2813            assert_eq!(simplify(expr), null);
2814        }
2815    }
2816
2817    #[test]
2818    fn test_simplify_bitwise_shift_right_by_null() {
2819        let null = lit(ScalarValue::Int64(None));
2820        // A >> null --> null
2821        {
2822            let expr = col("c3") >> null.clone();
2823            assert_eq!(simplify(expr), null);
2824        }
2825        // null >> A --> null
2826        {
2827            let expr = null.clone() >> col("c3");
2828            assert_eq!(simplify(expr), null);
2829        }
2830    }
2831
2832    #[test]
2833    fn test_simplify_bitwise_shift_left_by_null() {
2834        let null = lit(ScalarValue::Int64(None));
2835        // A << null --> null
2836        {
2837            let expr = col("c3") << null.clone();
2838            assert_eq!(simplify(expr), null);
2839        }
2840        // null << A --> null
2841        {
2842            let expr = null.clone() << col("c3");
2843            assert_eq!(simplify(expr), null);
2844        }
2845    }
2846
2847    #[test]
2848    fn test_simplify_bitwise_and_by_zero() {
2849        // A & 0 --> 0
2850        {
2851            let expr = col("c2_non_null") & lit(0);
2852            assert_eq!(simplify(expr), lit(0));
2853        }
2854        // 0 & A --> 0
2855        {
2856            let expr = lit(0) & col("c2_non_null");
2857            assert_eq!(simplify(expr), lit(0));
2858        }
2859    }
2860
2861    #[test]
2862    fn test_simplify_bitwise_or_by_zero() {
2863        // A | 0 --> A
2864        {
2865            let expr = col("c2_non_null") | lit(0);
2866            assert_eq!(simplify(expr), col("c2_non_null"));
2867        }
2868        // 0 | A --> A
2869        {
2870            let expr = lit(0) | col("c2_non_null");
2871            assert_eq!(simplify(expr), col("c2_non_null"));
2872        }
2873    }
2874
2875    #[test]
2876    fn test_simplify_bitwise_xor_by_zero() {
2877        // A ^ 0 --> A
2878        {
2879            let expr = col("c2_non_null") ^ lit(0);
2880            assert_eq!(simplify(expr), col("c2_non_null"));
2881        }
2882        // 0 ^ A --> A
2883        {
2884            let expr = lit(0) ^ col("c2_non_null");
2885            assert_eq!(simplify(expr), col("c2_non_null"));
2886        }
2887    }
2888
2889    #[test]
2890    fn test_simplify_bitwise_bitwise_shift_right_by_zero() {
2891        // A >> 0 --> A
2892        {
2893            let expr = col("c2_non_null") >> lit(0);
2894            assert_eq!(simplify(expr), col("c2_non_null"));
2895        }
2896    }
2897
2898    #[test]
2899    fn test_simplify_bitwise_bitwise_shift_left_by_zero() {
2900        // A << 0 --> A
2901        {
2902            let expr = col("c2_non_null") << lit(0);
2903            assert_eq!(simplify(expr), col("c2_non_null"));
2904        }
2905    }
2906
2907    #[test]
2908    fn test_simplify_bitwise_and_by_null() {
2909        let null = Expr::Literal(ScalarValue::Int64(None), None);
2910        // A & null --> null
2911        {
2912            let expr = col("c3") & null.clone();
2913            assert_eq!(simplify(expr), null);
2914        }
2915        // null & A --> null
2916        {
2917            let expr = null.clone() & col("c3");
2918            assert_eq!(simplify(expr), null);
2919        }
2920    }
2921
2922    #[test]
2923    fn test_simplify_composed_bitwise_and() {
2924        // ((c2 > 5) & (c1 < 6)) & (c2 > 5) --> (c2 > 5) & (c1 < 6)
2925
2926        let expr = bitwise_and(
2927            bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2928            col("c2").gt(lit(5)),
2929        );
2930        let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2931
2932        assert_eq!(simplify(expr), expected);
2933
2934        // (c2 > 5) & ((c2 > 5) & (c1 < 6)) --> (c2 > 5) & (c1 < 6)
2935
2936        let expr = bitwise_and(
2937            col("c2").gt(lit(5)),
2938            bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2939        );
2940        let expected = bitwise_and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2941        assert_eq!(simplify(expr), expected);
2942    }
2943
2944    #[test]
2945    fn test_simplify_composed_bitwise_or() {
2946        // ((c2 > 5) | (c1 < 6)) | (c2 > 5) --> (c2 > 5) | (c1 < 6)
2947
2948        let expr = bitwise_or(
2949            bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2950            col("c2").gt(lit(5)),
2951        );
2952        let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2953
2954        assert_eq!(simplify(expr), expected);
2955
2956        // (c2 > 5) | ((c2 > 5) | (c1 < 6)) --> (c2 > 5) | (c1 < 6)
2957
2958        let expr = bitwise_or(
2959            col("c2").gt(lit(5)),
2960            bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
2961        );
2962        let expected = bitwise_or(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
2963
2964        assert_eq!(simplify(expr), expected);
2965    }
2966
2967    #[test]
2968    fn test_simplify_composed_bitwise_xor() {
2969        // with an even number of the column "c2"
2970        // c2 ^ ((c2 ^ (c2 | c1)) ^ (c1 & c2)) --> (c2 | c1) ^ (c1 & c2)
2971
2972        let expr = bitwise_xor(
2973            col("c2"),
2974            bitwise_xor(
2975                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2976                bitwise_and(col("c1"), col("c2")),
2977            ),
2978        );
2979
2980        let expected = bitwise_xor(
2981            bitwise_or(col("c2"), col("c1")),
2982            bitwise_and(col("c1"), col("c2")),
2983        );
2984
2985        assert_eq!(simplify(expr), expected);
2986
2987        // with an odd number of the column "c2"
2988        // c2 ^ (c2 ^ (c2 | c1)) ^ ((c1 & c2) ^ c2) --> c2 ^ ((c2 | c1) ^ (c1 & c2))
2989
2990        let expr = bitwise_xor(
2991            col("c2"),
2992            bitwise_xor(
2993                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
2994                bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
2995            ),
2996        );
2997
2998        let expected = bitwise_xor(
2999            col("c2"),
3000            bitwise_xor(
3001                bitwise_or(col("c2"), col("c1")),
3002                bitwise_and(col("c1"), col("c2")),
3003            ),
3004        );
3005
3006        assert_eq!(simplify(expr), expected);
3007
3008        // with an even number of the column "c2"
3009        // ((c2 ^ (c2 | c1)) ^ (c1 & c2)) ^ c2 --> (c2 | c1) ^ (c1 & c2)
3010
3011        let expr = bitwise_xor(
3012            bitwise_xor(
3013                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
3014                bitwise_and(col("c1"), col("c2")),
3015            ),
3016            col("c2"),
3017        );
3018
3019        let expected = bitwise_xor(
3020            bitwise_or(col("c2"), col("c1")),
3021            bitwise_and(col("c1"), col("c2")),
3022        );
3023
3024        assert_eq!(simplify(expr), expected);
3025
3026        // with an odd number of the column "c2"
3027        // (c2 ^ (c2 | c1)) ^ ((c1 & c2) ^ c2) ^ c2 --> ((c2 | c1) ^ (c1 & c2)) ^ c2
3028
3029        let expr = bitwise_xor(
3030            bitwise_xor(
3031                bitwise_xor(col("c2"), bitwise_or(col("c2"), col("c1"))),
3032                bitwise_xor(bitwise_and(col("c1"), col("c2")), col("c2")),
3033            ),
3034            col("c2"),
3035        );
3036
3037        let expected = bitwise_xor(
3038            bitwise_xor(
3039                bitwise_or(col("c2"), col("c1")),
3040                bitwise_and(col("c1"), col("c2")),
3041            ),
3042            col("c2"),
3043        );
3044
3045        assert_eq!(simplify(expr), expected);
3046    }
3047
3048    #[test]
3049    fn test_simplify_negated_bitwise_and() {
3050        // !c4 & c4 --> 0
3051        let expr = (-col("c4_non_null")) & col("c4_non_null");
3052        let expected = lit(0u32);
3053
3054        assert_eq!(simplify(expr), expected);
3055        // c4 & !c4 --> 0
3056        let expr = col("c4_non_null") & (-col("c4_non_null"));
3057        let expected = lit(0u32);
3058
3059        assert_eq!(simplify(expr), expected);
3060
3061        // !c3 & c3 --> 0
3062        let expr = (-col("c3_non_null")) & col("c3_non_null");
3063        let expected = lit(0i64);
3064
3065        assert_eq!(simplify(expr), expected);
3066        // c3 & !c3 --> 0
3067        let expr = col("c3_non_null") & (-col("c3_non_null"));
3068        let expected = lit(0i64);
3069
3070        assert_eq!(simplify(expr), expected);
3071    }
3072
3073    #[test]
3074    fn test_simplify_negated_bitwise_or() {
3075        // !c4 | c4 --> -1
3076        let expr = (-col("c4_non_null")) | col("c4_non_null");
3077        let expected = lit(-1i32);
3078
3079        assert_eq!(simplify(expr), expected);
3080
3081        // c4 | !c4 --> -1
3082        let expr = col("c4_non_null") | (-col("c4_non_null"));
3083        let expected = lit(-1i32);
3084
3085        assert_eq!(simplify(expr), expected);
3086
3087        // !c3 | c3 --> -1
3088        let expr = (-col("c3_non_null")) | col("c3_non_null");
3089        let expected = lit(-1i64);
3090
3091        assert_eq!(simplify(expr), expected);
3092
3093        // c3 | !c3 --> -1
3094        let expr = col("c3_non_null") | (-col("c3_non_null"));
3095        let expected = lit(-1i64);
3096
3097        assert_eq!(simplify(expr), expected);
3098    }
3099
3100    #[test]
3101    fn test_simplify_negated_bitwise_xor() {
3102        // !c4 ^ c4 --> -1
3103        let expr = (-col("c4_non_null")) ^ col("c4_non_null");
3104        let expected = lit(-1i32);
3105
3106        assert_eq!(simplify(expr), expected);
3107
3108        // c4 ^ !c4 --> -1
3109        let expr = col("c4_non_null") ^ (-col("c4_non_null"));
3110        let expected = lit(-1i32);
3111
3112        assert_eq!(simplify(expr), expected);
3113
3114        // !c3 ^ c3 --> -1
3115        let expr = (-col("c3_non_null")) ^ col("c3_non_null");
3116        let expected = lit(-1i64);
3117
3118        assert_eq!(simplify(expr), expected);
3119
3120        // c3 ^ !c3 --> -1
3121        let expr = col("c3_non_null") ^ (-col("c3_non_null"));
3122        let expected = lit(-1i64);
3123
3124        assert_eq!(simplify(expr), expected);
3125    }
3126
3127    #[test]
3128    fn test_simplify_bitwise_and_or() {
3129        // (c2 < 3) & ((c2 < 3) | c1) -> (c2 < 3)
3130        let expr = bitwise_and(
3131            col("c2_non_null").lt(lit(3)),
3132            bitwise_or(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
3133        );
3134        let expected = col("c2_non_null").lt(lit(3));
3135
3136        assert_eq!(simplify(expr), expected);
3137    }
3138
3139    #[test]
3140    fn test_simplify_bitwise_or_and() {
3141        // (c2 < 3) | ((c2 < 3) & c1) -> (c2 < 3)
3142        let expr = bitwise_or(
3143            col("c2_non_null").lt(lit(3)),
3144            bitwise_and(col("c2_non_null").lt(lit(3)), col("c1_non_null")),
3145        );
3146        let expected = col("c2_non_null").lt(lit(3));
3147
3148        assert_eq!(simplify(expr), expected);
3149    }
3150
3151    #[test]
3152    fn test_simplify_simple_bitwise_and() {
3153        // (c2 > 5) & (c2 > 5) -> (c2 > 5)
3154        let expr = (col("c2").gt(lit(5))).bitand(col("c2").gt(lit(5)));
3155        let expected = col("c2").gt(lit(5));
3156
3157        assert_eq!(simplify(expr), expected);
3158    }
3159
3160    #[test]
3161    fn test_simplify_simple_bitwise_or() {
3162        // (c2 > 5) | (c2 > 5) -> (c2 > 5)
3163        let expr = (col("c2").gt(lit(5))).bitor(col("c2").gt(lit(5)));
3164        let expected = col("c2").gt(lit(5));
3165
3166        assert_eq!(simplify(expr), expected);
3167    }
3168
3169    #[test]
3170    fn test_simplify_simple_bitwise_xor() {
3171        // c4 ^ c4 -> 0
3172        let expr = (col("c4")).bitxor(col("c4"));
3173        let expected = lit(0u32);
3174
3175        assert_eq!(simplify(expr), expected);
3176
3177        // c3 ^ c3 -> 0
3178        let expr = col("c3").bitxor(col("c3"));
3179        let expected = lit(0i64);
3180
3181        assert_eq!(simplify(expr), expected);
3182    }
3183
3184    #[test]
3185    fn test_simplify_modulo_by_zero_non_null() {
3186        // because modulo by 0 maybe occur in short-circuit expression
3187        // so we should not simplify this, and throw error in runtime.
3188        let expr = col("c2_non_null") % lit(0);
3189        let expected = expr.clone();
3190
3191        assert_eq!(simplify(expr), expected);
3192    }
3193
3194    #[test]
3195    fn test_simplify_simple_and() {
3196        // (c2 > 5) AND (c2 > 5) -> (c2 > 5)
3197        let expr = (col("c2").gt(lit(5))).and(col("c2").gt(lit(5)));
3198        let expected = col("c2").gt(lit(5));
3199
3200        assert_eq!(simplify(expr), expected);
3201    }
3202
3203    #[test]
3204    fn test_simplify_composed_and() {
3205        // ((c2 > 5) AND (c1 < 6)) AND (c2 > 5)
3206        let expr = and(
3207            and(col("c2").gt(lit(5)), col("c1").lt(lit(6))),
3208            col("c2").gt(lit(5)),
3209        );
3210        let expected = and(col("c2").gt(lit(5)), col("c1").lt(lit(6)));
3211
3212        assert_eq!(simplify(expr), expected);
3213    }
3214
3215    #[test]
3216    fn test_simplify_negated_and() {
3217        // (c2 > 5) AND !(c2 > 5) --> (c2 > 5) AND (c2 <= 5)
3218        let expr = and(col("c2").gt(lit(5)), Expr::not(col("c2").gt(lit(5))));
3219        let expected = col("c2").gt(lit(5)).and(col("c2").lt_eq(lit(5)));
3220
3221        assert_eq!(simplify(expr), expected);
3222    }
3223
3224    #[test]
3225    fn test_simplify_or_and() {
3226        let l = col("c2").gt(lit(5));
3227        let r = and(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3228
3229        // (c2 > 5) OR ((c1 < 6) AND (c2 > 5))
3230        let expr = or(l.clone(), r.clone());
3231
3232        let expected = l.clone();
3233        assert_eq!(simplify(expr), expected);
3234
3235        // ((c1 < 6) AND (c2 > 5)) OR (c2 > 5)
3236        let expr = or(r, l);
3237        assert_eq!(simplify(expr), expected);
3238    }
3239
3240    #[test]
3241    fn test_simplify_or_and_non_null() {
3242        let l = col("c2_non_null").gt(lit(5));
3243        let r = and(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3244
3245        // (c2 > 5) OR ((c1 < 6) AND (c2 > 5)) --> c2 > 5
3246        let expr = or(l.clone(), r.clone());
3247
3248        // This is only true if `c1 < 6` is not nullable / can not be null.
3249        let expected = col("c2_non_null").gt(lit(5));
3250
3251        assert_eq!(simplify(expr), expected);
3252
3253        // ((c1 < 6) AND (c2 > 5)) OR (c2 > 5) --> c2 > 5
3254        let expr = or(l, r);
3255
3256        assert_eq!(simplify(expr), expected);
3257    }
3258
3259    #[test]
3260    fn test_simplify_and_or() {
3261        let l = col("c2").gt(lit(5));
3262        let r = or(col("c1").lt(lit(6)), col("c2").gt(lit(5)));
3263
3264        // (c2 > 5) AND ((c1 < 6) OR (c2 > 5)) --> c2 > 5
3265        let expr = and(l.clone(), r.clone());
3266
3267        let expected = l.clone();
3268        assert_eq!(simplify(expr), expected);
3269
3270        // ((c1 < 6) OR (c2 > 5)) AND (c2 > 5) --> c2 > 5
3271        let expr = and(r, l);
3272        assert_eq!(simplify(expr), expected);
3273    }
3274
3275    #[test]
3276    fn test_simplify_and_or_non_null() {
3277        let l = col("c2_non_null").gt(lit(5));
3278        let r = or(col("c1_non_null").lt(lit(6)), col("c2_non_null").gt(lit(5)));
3279
3280        // (c2 > 5) AND ((c1 < 6) OR (c2 > 5)) --> c2 > 5
3281        let expr = and(l.clone(), r.clone());
3282
3283        // This is only true if `c1 < 6` is not nullable / can not be null.
3284        let expected = col("c2_non_null").gt(lit(5));
3285
3286        assert_eq!(simplify(expr), expected);
3287
3288        // ((c1 < 6) OR (c2 > 5)) AND (c2 > 5) --> c2 > 5
3289        let expr = and(l, r);
3290
3291        assert_eq!(simplify(expr), expected);
3292    }
3293
3294    #[test]
3295    fn test_simplify_by_de_morgan_laws() {
3296        // Laws with logical operations
3297        // !(c3 AND c4) --> !c3 OR !c4
3298        let expr = and(col("c3"), col("c4")).not();
3299        let expected = or(col("c3").not(), col("c4").not());
3300        assert_eq!(simplify(expr), expected);
3301        // !(c3 OR c4) --> !c3 AND !c4
3302        let expr = or(col("c3"), col("c4")).not();
3303        let expected = and(col("c3").not(), col("c4").not());
3304        assert_eq!(simplify(expr), expected);
3305        // !(!c3) --> c3
3306        let expr = col("c3").not().not();
3307        let expected = col("c3");
3308        assert_eq!(simplify(expr), expected);
3309
3310        // Laws with bitwise operations
3311        // !(c3 & c4) --> !c3 | !c4
3312        let expr = -bitwise_and(col("c3"), col("c4"));
3313        let expected = bitwise_or(-col("c3"), -col("c4"));
3314        assert_eq!(simplify(expr), expected);
3315        // !(c3 | c4) --> !c3 & !c4
3316        let expr = -bitwise_or(col("c3"), col("c4"));
3317        let expected = bitwise_and(-col("c3"), -col("c4"));
3318        assert_eq!(simplify(expr), expected);
3319        // !(!c3) --> c3
3320        let expr = -(-col("c3"));
3321        let expected = col("c3");
3322        assert_eq!(simplify(expr), expected);
3323    }
3324
3325    #[test]
3326    fn test_simplify_null_and_false() {
3327        let expr = and(lit_bool_null(), lit(false));
3328        let expr_eq = lit(false);
3329
3330        assert_eq!(simplify(expr), expr_eq);
3331    }
3332
3333    #[test]
3334    fn test_simplify_divide_null_by_null() {
3335        let null = lit(ScalarValue::Int32(None));
3336        let expr_plus = null.clone() / null.clone();
3337        let expr_eq = null;
3338
3339        assert_eq!(simplify(expr_plus), expr_eq);
3340    }
3341
3342    #[test]
3343    fn test_simplify_simplify_arithmetic_expr() {
3344        let expr_plus = lit(1) + lit(1);
3345
3346        assert_eq!(simplify(expr_plus), lit(2));
3347    }
3348
3349    #[test]
3350    fn test_simplify_simplify_eq_expr() {
3351        let expr_eq = binary_expr(lit(1), Operator::Eq, lit(1));
3352
3353        assert_eq!(simplify(expr_eq), lit(true));
3354    }
3355
3356    #[test]
3357    fn test_simplify_regex() {
3358        // malformed regex
3359        assert_contains!(
3360            try_simplify(regex_match(col("c1"), lit("foo{")))
3361                .unwrap_err()
3362                .to_string(),
3363            "regex parse error"
3364        );
3365
3366        // unsupported cases
3367        assert_no_change(regex_match(col("c1"), lit("foo.*")));
3368        assert_no_change(regex_match(col("c1"), lit("(foo)")));
3369        assert_no_change(regex_match(col("c1"), lit("%")));
3370        assert_no_change(regex_match(col("c1"), lit("_")));
3371        assert_no_change(regex_match(col("c1"), lit("f%o")));
3372        assert_no_change(regex_match(col("c1"), lit("^f%o")));
3373        assert_no_change(regex_match(col("c1"), lit("f_o")));
3374
3375        // empty cases
3376        assert_change(
3377            regex_match(col("c1"), lit("")),
3378            if_not_null(col("c1"), true),
3379        );
3380        assert_change(
3381            regex_not_match(col("c1"), lit("")),
3382            if_not_null(col("c1"), false),
3383        );
3384        assert_change(
3385            regex_imatch(col("c1"), lit("")),
3386            if_not_null(col("c1"), true),
3387        );
3388        assert_change(
3389            regex_not_imatch(col("c1"), lit("")),
3390            if_not_null(col("c1"), false),
3391        );
3392
3393        // single character
3394        assert_change(regex_match(col("c1"), lit("x")), col("c1").like(lit("%x%")));
3395
3396        // single word
3397        assert_change(
3398            regex_match(col("c1"), lit("foo")),
3399            col("c1").like(lit("%foo%")),
3400        );
3401
3402        // regular expressions that match an exact literal
3403        assert_change(regex_match(col("c1"), lit("^$")), col("c1").eq(lit("")));
3404        assert_change(
3405            regex_not_match(col("c1"), lit("^$")),
3406            col("c1").not_eq(lit("")),
3407        );
3408        assert_change(
3409            regex_match(col("c1"), lit("^foo$")),
3410            col("c1").eq(lit("foo")),
3411        );
3412        assert_change(
3413            regex_not_match(col("c1"), lit("^foo$")),
3414            col("c1").not_eq(lit("foo")),
3415        );
3416
3417        // regular expressions that match exact captured literals
3418        assert_change(
3419            regex_match(col("c1"), lit("^(foo|bar)$")),
3420            col("c1").eq(lit("foo")).or(col("c1").eq(lit("bar"))),
3421        );
3422        assert_change(
3423            regex_not_match(col("c1"), lit("^(foo|bar)$")),
3424            col("c1")
3425                .not_eq(lit("foo"))
3426                .and(col("c1").not_eq(lit("bar"))),
3427        );
3428        assert_change(
3429            regex_match(col("c1"), lit("^(foo)$")),
3430            col("c1").eq(lit("foo")),
3431        );
3432        assert_change(
3433            regex_match(col("c1"), lit("^(foo|bar|baz)$")),
3434            ((col("c1").eq(lit("foo"))).or(col("c1").eq(lit("bar"))))
3435                .or(col("c1").eq(lit("baz"))),
3436        );
3437        assert_change(
3438            regex_match(col("c1"), lit("^(foo|bar|baz|qux)$")),
3439            col("c1")
3440                .in_list(vec![lit("foo"), lit("bar"), lit("baz"), lit("qux")], false),
3441        );
3442        assert_change(
3443            regex_match(col("c1"), lit("^(fo_o)$")),
3444            col("c1").eq(lit("fo_o")),
3445        );
3446        assert_change(
3447            regex_match(col("c1"), lit("^(fo_o)$")),
3448            col("c1").eq(lit("fo_o")),
3449        );
3450        assert_change(
3451            regex_match(col("c1"), lit("^(fo_o|ba_r)$")),
3452            col("c1").eq(lit("fo_o")).or(col("c1").eq(lit("ba_r"))),
3453        );
3454        assert_change(
3455            regex_not_match(col("c1"), lit("^(fo_o|ba_r)$")),
3456            col("c1")
3457                .not_eq(lit("fo_o"))
3458                .and(col("c1").not_eq(lit("ba_r"))),
3459        );
3460        assert_change(
3461            regex_match(col("c1"), lit("^(fo_o|ba_r|ba_z)$")),
3462            ((col("c1").eq(lit("fo_o"))).or(col("c1").eq(lit("ba_r"))))
3463                .or(col("c1").eq(lit("ba_z"))),
3464        );
3465        assert_change(
3466            regex_match(col("c1"), lit("^(fo_o|ba_r|baz|qu_x)$")),
3467            col("c1").in_list(
3468                vec![lit("fo_o"), lit("ba_r"), lit("baz"), lit("qu_x")],
3469                false,
3470            ),
3471        );
3472
3473        // regular expressions that mismatch captured literals
3474        assert_no_change(regex_match(col("c1"), lit("(foo|bar)")));
3475        assert_no_change(regex_match(col("c1"), lit("(foo|bar)*")));
3476        assert_no_change(regex_match(col("c1"), lit("(fo_o|b_ar)")));
3477        assert_no_change(regex_match(col("c1"), lit("(foo|ba_r)*")));
3478        assert_no_change(regex_match(col("c1"), lit("(fo_o|ba_r)*")));
3479        assert_no_change(regex_match(col("c1"), lit("^(foo|bar)*")));
3480        assert_no_change(regex_match(col("c1"), lit("^(foo)(bar)$")));
3481        assert_no_change(regex_match(col("c1"), lit("^")));
3482        assert_no_change(regex_match(col("c1"), lit("$")));
3483        assert_no_change(regex_match(col("c1"), lit("$^")));
3484        assert_no_change(regex_match(col("c1"), lit("$foo^")));
3485
3486        // regular expressions that match a partial literal
3487        assert_change(
3488            regex_match(col("c1"), lit("^foo")),
3489            col("c1").like(lit("foo%")),
3490        );
3491        assert_change(
3492            regex_match(col("c1"), lit("foo$")),
3493            col("c1").like(lit("%foo")),
3494        );
3495        assert_change(
3496            regex_match(col("c1"), lit("^foo|bar$")),
3497            col("c1").like(lit("foo%")).or(col("c1").like(lit("%bar"))),
3498        );
3499
3500        // OR-chain
3501        assert_change(
3502            regex_match(col("c1"), lit("foo|bar|baz")),
3503            col("c1")
3504                .like(lit("%foo%"))
3505                .or(col("c1").like(lit("%bar%")))
3506                .or(col("c1").like(lit("%baz%"))),
3507        );
3508        assert_change(
3509            regex_match(col("c1"), lit("foo|x|baz")),
3510            col("c1")
3511                .like(lit("%foo%"))
3512                .or(col("c1").like(lit("%x%")))
3513                .or(col("c1").like(lit("%baz%"))),
3514        );
3515        assert_change(
3516            regex_not_match(col("c1"), lit("foo|bar|baz")),
3517            col("c1")
3518                .not_like(lit("%foo%"))
3519                .and(col("c1").not_like(lit("%bar%")))
3520                .and(col("c1").not_like(lit("%baz%"))),
3521        );
3522        // both anchored expressions (translated to equality) and unanchored
3523        assert_change(
3524            regex_match(col("c1"), lit("foo|^x$|baz")),
3525            col("c1")
3526                .like(lit("%foo%"))
3527                .or(col("c1").eq(lit("x")))
3528                .or(col("c1").like(lit("%baz%"))),
3529        );
3530        assert_change(
3531            regex_not_match(col("c1"), lit("foo|^bar$|baz")),
3532            col("c1")
3533                .not_like(lit("%foo%"))
3534                .and(col("c1").not_eq(lit("bar")))
3535                .and(col("c1").not_like(lit("%baz%"))),
3536        );
3537        // Too many patterns (MAX_REGEX_ALTERNATIONS_EXPANSION)
3538        assert_no_change(regex_match(col("c1"), lit("foo|bar|baz|blarg|bozo|etc")));
3539    }
3540
3541    #[track_caller]
3542    fn assert_no_change(expr: Expr) {
3543        let optimized = simplify(expr.clone());
3544        assert_eq!(expr, optimized);
3545    }
3546
3547    #[track_caller]
3548    fn assert_change(expr: Expr, expected: Expr) {
3549        let optimized = simplify(expr);
3550        assert_eq!(optimized, expected);
3551    }
3552
3553    fn regex_match(left: Expr, right: Expr) -> Expr {
3554        Expr::BinaryExpr(BinaryExpr {
3555            left: Box::new(left),
3556            op: Operator::RegexMatch,
3557            right: Box::new(right),
3558        })
3559    }
3560
3561    fn regex_not_match(left: Expr, right: Expr) -> Expr {
3562        Expr::BinaryExpr(BinaryExpr {
3563            left: Box::new(left),
3564            op: Operator::RegexNotMatch,
3565            right: Box::new(right),
3566        })
3567    }
3568
3569    fn regex_imatch(left: Expr, right: Expr) -> Expr {
3570        Expr::BinaryExpr(BinaryExpr {
3571            left: Box::new(left),
3572            op: Operator::RegexIMatch,
3573            right: Box::new(right),
3574        })
3575    }
3576
3577    fn regex_not_imatch(left: Expr, right: Expr) -> Expr {
3578        Expr::BinaryExpr(BinaryExpr {
3579            left: Box::new(left),
3580            op: Operator::RegexNotIMatch,
3581            right: Box::new(right),
3582        })
3583    }
3584
3585    // ------------------------------
3586    // ----- Simplifier tests -------
3587    // ------------------------------
3588
3589    fn try_simplify(expr: Expr) -> Result<Expr> {
3590        let schema = expr_test_schema();
3591        let simplifier =
3592            ExprSimplifier::new(SimplifyContext::builder().with_schema(schema).build());
3593        simplifier.simplify(expr)
3594    }
3595
3596    fn coerce(expr: Expr) -> Expr {
3597        let schema = expr_test_schema();
3598        let simplifier = ExprSimplifier::new(
3599            SimplifyContext::builder()
3600                .with_schema(Arc::clone(&schema))
3601                .build(),
3602        );
3603        simplifier.coerce(expr, schema.as_ref()).unwrap()
3604    }
3605
3606    fn simplify(expr: Expr) -> Expr {
3607        try_simplify(expr).unwrap()
3608    }
3609
3610    fn try_simplify_with_cycle_count(expr: Expr) -> Result<(Expr, u32)> {
3611        let schema = expr_test_schema();
3612        let simplifier =
3613            ExprSimplifier::new(SimplifyContext::builder().with_schema(schema).build());
3614        let (expr, count) = simplifier.simplify_with_cycle_count_transformed(expr)?;
3615        Ok((expr.data, count))
3616    }
3617
3618    fn simplify_with_cycle_count(expr: Expr) -> (Expr, u32) {
3619        try_simplify_with_cycle_count(expr).unwrap()
3620    }
3621
3622    fn simplify_with_guarantee(
3623        expr: Expr,
3624        guarantees: Vec<(Expr, NullableInterval)>,
3625    ) -> Expr {
3626        let schema = expr_test_schema();
3627        let simplifier =
3628            ExprSimplifier::new(SimplifyContext::builder().with_schema(schema).build())
3629                .with_guarantees(guarantees);
3630        simplifier.simplify(expr).unwrap()
3631    }
3632
3633    fn expr_test_schema() -> DFSchemaRef {
3634        static EXPR_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
3635            Arc::new(
3636                DFSchema::from_unqualified_fields(
3637                    vec![
3638                        Field::new("c1", DataType::Utf8, true),
3639                        Field::new("c2", DataType::Boolean, true),
3640                        Field::new("c3", DataType::Int64, true),
3641                        Field::new("c4", DataType::UInt32, true),
3642                        Field::new("c1_non_null", DataType::Utf8, false),
3643                        Field::new("c2_non_null", DataType::Boolean, false),
3644                        Field::new("c3_non_null", DataType::Int64, false),
3645                        Field::new("c4_non_null", DataType::UInt32, false),
3646                        Field::new("c5", DataType::FixedSizeBinary(3), true),
3647                    ]
3648                    .into(),
3649                    HashMap::new(),
3650                )
3651                .unwrap(),
3652            )
3653        });
3654        Arc::clone(&EXPR_TEST_SCHEMA)
3655    }
3656
3657    #[test]
3658    fn simplify_expr_null_comparison() {
3659        // x = null is always null
3660        assert_eq!(
3661            simplify(lit(true).eq(lit(ScalarValue::Boolean(None)))),
3662            lit(ScalarValue::Boolean(None)),
3663        );
3664
3665        // null != null is always null
3666        assert_eq!(
3667            simplify(
3668                lit(ScalarValue::Boolean(None)).not_eq(lit(ScalarValue::Boolean(None)))
3669            ),
3670            lit(ScalarValue::Boolean(None)),
3671        );
3672
3673        // x != null is always null
3674        assert_eq!(
3675            simplify(col("c2").not_eq(lit(ScalarValue::Boolean(None)))),
3676            lit(ScalarValue::Boolean(None)),
3677        );
3678
3679        // null = x is always null
3680        assert_eq!(
3681            simplify(lit(ScalarValue::Boolean(None)).eq(col("c2"))),
3682            lit(ScalarValue::Boolean(None)),
3683        );
3684    }
3685
3686    #[test]
3687    fn simplify_expr_is_not_null() {
3688        assert_eq!(
3689            simplify(Expr::IsNotNull(Box::new(col("c1")))),
3690            Expr::IsNotNull(Box::new(col("c1")))
3691        );
3692
3693        // 'c1_non_null IS NOT NULL' is always true
3694        assert_eq!(
3695            simplify(Expr::IsNotNull(Box::new(col("c1_non_null")))),
3696            lit(true)
3697        );
3698    }
3699
3700    #[test]
3701    fn simplify_expr_is_null() {
3702        assert_eq!(
3703            simplify(Expr::IsNull(Box::new(col("c1")))),
3704            Expr::IsNull(Box::new(col("c1")))
3705        );
3706
3707        // 'c1_non_null IS NULL' is always false
3708        assert_eq!(
3709            simplify(Expr::IsNull(Box::new(col("c1_non_null")))),
3710            lit(false)
3711        );
3712    }
3713
3714    #[test]
3715    fn simplify_expr_is_unknown() {
3716        assert_eq!(simplify(col("c2").is_unknown()), col("c2").is_unknown(),);
3717
3718        // 'c2_non_null is unknown' is always false
3719        assert_eq!(simplify(col("c2_non_null").is_unknown()), lit(false));
3720    }
3721
3722    #[test]
3723    fn simplify_expr_is_not_known() {
3724        assert_eq!(
3725            simplify(col("c2").is_not_unknown()),
3726            col("c2").is_not_unknown()
3727        );
3728
3729        // 'c2_non_null is not unknown' is always true
3730        assert_eq!(simplify(col("c2_non_null").is_not_unknown()), lit(true));
3731    }
3732
3733    #[test]
3734    fn simplify_expr_eq() {
3735        let schema = expr_test_schema();
3736        assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3737
3738        // true = true -> true
3739        assert_eq!(simplify(lit(true).eq(lit(true))), lit(true));
3740
3741        // true = false -> false
3742        assert_eq!(simplify(lit(true).eq(lit(false))), lit(false),);
3743
3744        // c2 = true -> c2
3745        assert_eq!(simplify(col("c2").eq(lit(true))), col("c2"));
3746
3747        // c2 = false => !c2
3748        assert_eq!(simplify(col("c2").eq(lit(false))), col("c2").not(),);
3749    }
3750
3751    #[test]
3752    fn simplify_expr_eq_skip_nonboolean_type() {
3753        let schema = expr_test_schema();
3754
3755        // When one of the operand is not of boolean type, folding the
3756        // other boolean constant will change return type of
3757        // expression to non-boolean.
3758        //
3759        // Make sure c1 column to be used in tests is not boolean type
3760        assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3761
3762        // don't fold c1 = foo
3763        assert_eq!(simplify(col("c1").eq(lit("foo"))), col("c1").eq(lit("foo")),);
3764    }
3765
3766    #[test]
3767    fn simplify_expr_not_eq() {
3768        let schema = expr_test_schema();
3769
3770        assert_eq!(col("c2").get_type(&schema).unwrap(), DataType::Boolean);
3771
3772        // c2 != true -> !c2
3773        assert_eq!(simplify(col("c2").not_eq(lit(true))), col("c2").not(),);
3774
3775        // c2 != false -> c2
3776        assert_eq!(simplify(col("c2").not_eq(lit(false))), col("c2"),);
3777
3778        // test constant
3779        assert_eq!(simplify(lit(true).not_eq(lit(true))), lit(false),);
3780
3781        assert_eq!(simplify(lit(true).not_eq(lit(false))), lit(true),);
3782    }
3783
3784    #[test]
3785    fn simplify_expr_not_eq_skip_nonboolean_type() {
3786        let schema = expr_test_schema();
3787
3788        // when one of the operand is not of boolean type, folding the
3789        // other boolean constant will change return type of
3790        // expression to non-boolean.
3791        assert_eq!(col("c1").get_type(&schema).unwrap(), DataType::Utf8);
3792
3793        assert_eq!(
3794            simplify(col("c1").not_eq(lit("foo"))),
3795            col("c1").not_eq(lit("foo")),
3796        );
3797    }
3798
3799    #[test]
3800    fn simplify_literal_case_equality() {
3801        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok"
3802        let simple_case = Expr::Case(Case::new(
3803            None,
3804            vec![(
3805                Box::new(col("c2_non_null").not_eq(lit(false))),
3806                Box::new(lit("ok")),
3807            )],
3808            Some(Box::new(lit("not_ok"))),
3809        ));
3810
3811        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok" == "ok"
3812        // -->
3813        // CASE WHEN c2 != false THEN "ok" == "ok" ELSE "not_ok" == "ok"
3814        // -->
3815        // CASE WHEN c2 != false THEN true ELSE false
3816        // -->
3817        // c2
3818        assert_eq!(
3819            simplify(binary_expr(simple_case.clone(), Operator::Eq, lit("ok"),)),
3820            col("c2_non_null"),
3821        );
3822
3823        // CASE WHEN c2 != false THEN "ok" ELSE "not_ok" != "ok"
3824        // -->
3825        // NOT(CASE WHEN c2 != false THEN "ok" == "ok" ELSE "not_ok" == "ok")
3826        // -->
3827        // NOT(CASE WHEN c2 != false THEN true ELSE false)
3828        // -->
3829        // NOT(c2)
3830        assert_eq!(
3831            simplify(binary_expr(simple_case, Operator::NotEq, lit("ok"),)),
3832            not(col("c2_non_null")),
3833        );
3834
3835        let complex_case = Expr::Case(Case::new(
3836            None,
3837            vec![
3838                (
3839                    Box::new(col("c1").eq(lit("inboxed"))),
3840                    Box::new(lit("pending")),
3841                ),
3842                (
3843                    Box::new(col("c1").eq(lit("scheduled"))),
3844                    Box::new(lit("pending")),
3845                ),
3846                (
3847                    Box::new(col("c1").eq(lit("completed"))),
3848                    Box::new(lit("completed")),
3849                ),
3850                (
3851                    Box::new(col("c1").eq(lit("paused"))),
3852                    Box::new(lit("paused")),
3853                ),
3854                (Box::new(col("c2")), Box::new(lit("running"))),
3855                (
3856                    Box::new(col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0)))),
3857                    Box::new(lit("backing-off")),
3858                ),
3859            ],
3860            Some(Box::new(lit("ready"))),
3861        ));
3862
3863        assert_eq!(
3864            simplify(binary_expr(
3865                complex_case.clone(),
3866                Operator::Eq,
3867                lit("completed"),
3868            )),
3869            not_distinct_from(col("c1").eq(lit("completed")), lit(true)).and(
3870                distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3871                    .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3872            )
3873        );
3874
3875        assert_eq!(
3876            simplify(binary_expr(
3877                complex_case.clone(),
3878                Operator::NotEq,
3879                lit("completed"),
3880            )),
3881            distinct_from(col("c1").eq(lit("completed")), lit(true))
3882                .or(not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3883                    .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true))))
3884        );
3885
3886        assert_eq!(
3887            simplify(binary_expr(
3888                complex_case.clone(),
3889                Operator::Eq,
3890                lit("running"),
3891            )),
3892            not_distinct_from(col("c2"), lit(true)).and(
3893                distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3894                    .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3895                    .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3896                    .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3897            )
3898        );
3899
3900        assert_eq!(
3901            simplify(binary_expr(
3902                complex_case.clone(),
3903                Operator::Eq,
3904                lit("ready"),
3905            )),
3906            distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3907                .and(distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3908                .and(distinct_from(col("c1").eq(lit("completed")), lit(true)))
3909                .and(distinct_from(col("c1").eq(lit("paused")), lit(true)))
3910                .and(distinct_from(col("c2"), lit(true)))
3911                .and(distinct_from(
3912                    col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3913                    lit(true)
3914                ))
3915        );
3916
3917        assert_eq!(
3918            simplify(binary_expr(
3919                complex_case.clone(),
3920                Operator::NotEq,
3921                lit("ready"),
3922            )),
3923            not_distinct_from(col("c1").eq(lit("inboxed")), lit(true))
3924                .or(not_distinct_from(col("c1").eq(lit("scheduled")), lit(true)))
3925                .or(not_distinct_from(col("c1").eq(lit("completed")), lit(true)))
3926                .or(not_distinct_from(col("c1").eq(lit("paused")), lit(true)))
3927                .or(not_distinct_from(col("c2"), lit(true)))
3928                .or(not_distinct_from(
3929                    col("c1").eq(lit("invoked")).and(col("c3").gt(lit(0))),
3930                    lit(true)
3931                ))
3932        );
3933    }
3934
3935    #[test]
3936    fn simplify_expr_case_when_then_else() {
3937        // CASE WHEN c2 != false THEN "ok" == "not_ok" ELSE c2 == true
3938        // -->
3939        // CASE WHEN c2 THEN false ELSE c2
3940        // -->
3941        // false
3942        assert_eq!(
3943            simplify(Expr::Case(Case::new(
3944                None,
3945                vec![(
3946                    Box::new(col("c2_non_null").not_eq(lit(false))),
3947                    Box::new(lit("ok").eq(lit("not_ok"))),
3948                )],
3949                Some(Box::new(col("c2_non_null").eq(lit(true)))),
3950            ))),
3951            lit(false) // #1716
3952        );
3953
3954        // CASE WHEN c2 != false THEN "ok" == "ok" ELSE c2
3955        // -->
3956        // CASE WHEN c2 THEN true ELSE c2
3957        // -->
3958        // c2
3959        //
3960        // Need to call simplify 2x due to
3961        // https://github.com/apache/datafusion/issues/1160
3962        assert_eq!(
3963            simplify(simplify(Expr::Case(Case::new(
3964                None,
3965                vec![(
3966                    Box::new(col("c2_non_null").not_eq(lit(false))),
3967                    Box::new(lit("ok").eq(lit("ok"))),
3968                )],
3969                Some(Box::new(col("c2_non_null").eq(lit(true)))),
3970            )))),
3971            col("c2_non_null")
3972        );
3973
3974        // CASE WHEN ISNULL(c2) THEN true ELSE c2
3975        // -->
3976        // ISNULL(c2) OR c2
3977        //
3978        // Need to call simplify 2x due to
3979        // https://github.com/apache/datafusion/issues/1160
3980        assert_eq!(
3981            simplify(simplify(Expr::Case(Case::new(
3982                None,
3983                vec![(Box::new(col("c2").is_null()), Box::new(lit(true)),)],
3984                Some(Box::new(col("c2"))),
3985            )))),
3986            col("c2")
3987                .is_null()
3988                .or(col("c2").is_not_null().and(col("c2")))
3989        );
3990
3991        // CASE WHEN c1 then true WHEN c2 then false ELSE true
3992        // --> c1 OR (NOT(c1) AND c2 AND FALSE) OR (NOT(c1 OR c2) AND TRUE)
3993        // --> c1 OR (NOT(c1) AND NOT(c2))
3994        // --> c1 OR NOT(c2)
3995        //
3996        // Need to call simplify 2x due to
3997        // https://github.com/apache/datafusion/issues/1160
3998        assert_eq!(
3999            simplify(simplify(Expr::Case(Case::new(
4000                None,
4001                vec![
4002                    (Box::new(col("c1_non_null")), Box::new(lit(true)),),
4003                    (Box::new(col("c2_non_null")), Box::new(lit(false)),),
4004                ],
4005                Some(Box::new(lit(true))),
4006            )))),
4007            col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
4008        );
4009
4010        // CASE WHEN c1 then true WHEN c2 then true ELSE false
4011        // --> c1 OR (NOT(c1) AND c2 AND TRUE) OR (NOT(c1 OR c2) AND FALSE)
4012        // --> c1 OR (NOT(c1) AND c2)
4013        // --> c1 OR c2
4014        //
4015        // Need to call simplify 2x due to
4016        // https://github.com/apache/datafusion/issues/1160
4017        assert_eq!(
4018            simplify(simplify(Expr::Case(Case::new(
4019                None,
4020                vec![
4021                    (Box::new(col("c1_non_null")), Box::new(lit(true)),),
4022                    (Box::new(col("c2_non_null")), Box::new(lit(false)),),
4023                ],
4024                Some(Box::new(lit(true))),
4025            )))),
4026            col("c1_non_null").or(col("c1_non_null").not().and(col("c2_non_null").not()))
4027        );
4028
4029        // CASE WHEN c > 0 THEN true END AS c1
4030        assert_eq!(
4031            simplify(simplify(Expr::Case(Case::new(
4032                None,
4033                vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
4034                None,
4035            )))),
4036            not_distinct_from(col("c3").gt(lit(0_i64)), lit(true)).or(distinct_from(
4037                col("c3").gt(lit(0_i64)),
4038                lit(true)
4039            )
4040            .and(lit_bool_null()))
4041        );
4042
4043        // CASE WHEN c > 0 THEN true ELSE false END AS c1
4044        assert_eq!(
4045            simplify(simplify(Expr::Case(Case::new(
4046                None,
4047                vec![(Box::new(col("c3").gt(lit(0_i64))), Box::new(lit(true)))],
4048                Some(Box::new(lit(false))),
4049            )))),
4050            not_distinct_from(col("c3").gt(lit(0_i64)), lit(true))
4051        );
4052    }
4053
4054    #[test]
4055    fn simplify_expr_case_when_first_true() {
4056        // CASE WHEN true THEN 1 ELSE c1 END --> 1
4057        assert_eq!(
4058            simplify(Expr::Case(Case::new(
4059                None,
4060                vec![(Box::new(lit(true)), Box::new(lit(1)),)],
4061                Some(Box::new(col("c1"))),
4062            ))),
4063            lit(1)
4064        );
4065
4066        // CASE WHEN true THEN col('a') ELSE col('b') END --> col('a')
4067        assert_eq!(
4068            simplify(Expr::Case(Case::new(
4069                None,
4070                vec![(Box::new(lit(true)), Box::new(lit("a")),)],
4071                Some(Box::new(lit("b"))),
4072            ))),
4073            lit("a")
4074        );
4075
4076        // CASE WHEN true THEN col('a') WHEN col('x') > 5 THEN col('b') ELSE col('c') END --> col('a')
4077        assert_eq!(
4078            simplify(Expr::Case(Case::new(
4079                None,
4080                vec![
4081                    (Box::new(lit(true)), Box::new(lit("a"))),
4082                    (Box::new(lit("x").gt(lit(5))), Box::new(lit("b"))),
4083                ],
4084                Some(Box::new(lit("c"))),
4085            ))),
4086            lit("a")
4087        );
4088
4089        // CASE WHEN true THEN col('a') END --> col('a') (no else clause)
4090        assert_eq!(
4091            simplify(Expr::Case(Case::new(
4092                None,
4093                vec![(Box::new(lit(true)), Box::new(lit("a")),)],
4094                None,
4095            ))),
4096            lit("a")
4097        );
4098
4099        // Negative test: CASE WHEN c2 THEN 1 ELSE 2 END should not be simplified
4100        let expr = Expr::Case(Case::new(
4101            None,
4102            vec![(Box::new(col("c2")), Box::new(lit(1)))],
4103            Some(Box::new(lit(2))),
4104        ));
4105        assert_eq!(simplify(expr.clone()), expr);
4106
4107        // Negative test: CASE WHEN false THEN 1 ELSE 2 END should not use this rule
4108        let expr = Expr::Case(Case::new(
4109            None,
4110            vec![(Box::new(lit(false)), Box::new(lit(1)))],
4111            Some(Box::new(lit(2))),
4112        ));
4113        assert_ne!(simplify(expr), lit(1));
4114
4115        // Negative test: CASE WHEN col('c1') > 5 THEN 1 ELSE 2 END should not be simplified
4116        let expr = Expr::Case(Case::new(
4117            None,
4118            vec![(Box::new(col("c1").gt(lit(5))), Box::new(lit(1)))],
4119            Some(Box::new(lit(2))),
4120        ));
4121        assert_eq!(simplify(expr.clone()), expr);
4122    }
4123
4124    #[test]
4125    fn simplify_expr_case_when_any_true() {
4126        // CASE WHEN c3 > 0 THEN 'a' WHEN true THEN 'b' ELSE 'c' END --> CASE WHEN c3 > 0 THEN 'a' ELSE 'b' END
4127        assert_eq!(
4128            simplify(Expr::Case(Case::new(
4129                None,
4130                vec![
4131                    (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4132                    (Box::new(lit(true)), Box::new(lit("b"))),
4133                ],
4134                Some(Box::new(lit("c"))),
4135            ))),
4136            Expr::Case(Case::new(
4137                None,
4138                vec![(Box::new(col("c3").gt(lit(0))), Box::new(lit("a")))],
4139                Some(Box::new(lit("b"))),
4140            ))
4141        );
4142
4143        // CASE WHEN c3 > 0 THEN 'a' WHEN c4 < 0 THEN 'b' WHEN true THEN 'c' WHEN c3 = 0 THEN 'd' ELSE 'e' END
4144        // --> CASE WHEN c3 > 0 THEN 'a' WHEN c4 < 0 THEN 'b' ELSE 'c' END
4145        assert_eq!(
4146            simplify(Expr::Case(Case::new(
4147                None,
4148                vec![
4149                    (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4150                    (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4151                    (Box::new(lit(true)), Box::new(lit("c"))),
4152                    (Box::new(col("c3").eq(lit(0))), Box::new(lit("d"))),
4153                ],
4154                Some(Box::new(lit("e"))),
4155            ))),
4156            Expr::Case(Case::new(
4157                None,
4158                vec![
4159                    (Box::new(col("c3").gt(lit(0))), Box::new(lit("a"))),
4160                    (Box::new(col("c4").lt(lit(0))), Box::new(lit("b"))),
4161                ],
4162                Some(Box::new(lit("c"))),
4163            ))
4164        );
4165
4166        // CASE WHEN c3 > 0 THEN 1 WHEN c4 < 0 THEN 2 WHEN true THEN 3 END (no else)
4167        // --> CASE WHEN c3 > 0 THEN 1 WHEN c4 < 0 THEN 2 ELSE 3 END
4168        assert_eq!(
4169            simplify(Expr::Case(Case::new(
4170                None,
4171                vec![
4172                    (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4173                    (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4174                    (Box::new(lit(true)), Box::new(lit(3))),
4175                ],
4176                None,
4177            ))),
4178            Expr::Case(Case::new(
4179                None,
4180                vec![
4181                    (Box::new(col("c3").gt(lit(0))), Box::new(lit(1))),
4182                    (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4183                ],
4184                Some(Box::new(lit(3))),
4185            ))
4186        );
4187
4188        // Negative test: CASE WHEN c3 > 0 THEN c3 WHEN c4 < 0 THEN 2 ELSE 3 END should not be simplified
4189        let expr = Expr::Case(Case::new(
4190            None,
4191            vec![
4192                (Box::new(col("c3").gt(lit(0))), Box::new(col("c3"))),
4193                (Box::new(col("c4").lt(lit(0))), Box::new(lit(2))),
4194            ],
4195            Some(Box::new(lit(3))),
4196        ));
4197        assert_eq!(simplify(expr.clone()), expr);
4198    }
4199
4200    #[test]
4201    fn simplify_expr_case_when_any_false() {
4202        // CASE WHEN false THEN 'a' END --> NULL
4203        assert_eq!(
4204            simplify(Expr::Case(Case::new(
4205                None,
4206                vec![(Box::new(lit(false)), Box::new(lit("a")))],
4207                None,
4208            ))),
4209            Expr::Literal(ScalarValue::Utf8(None), None)
4210        );
4211
4212        // CASE WHEN false THEN 2 ELSE 1 END --> 1
4213        assert_eq!(
4214            simplify(Expr::Case(Case::new(
4215                None,
4216                vec![(Box::new(lit(false)), Box::new(lit(2)))],
4217                Some(Box::new(lit(1))),
4218            ))),
4219            lit(1),
4220        );
4221
4222        // CASE WHEN c3 < 10 THEN 'b' WHEN false then c3 ELSE c4 END --> CASE WHEN c3 < 10 THEN b ELSE c4 END
4223        assert_eq!(
4224            simplify(Expr::Case(Case::new(
4225                None,
4226                vec![
4227                    (Box::new(col("c3").lt(lit(10))), Box::new(lit("b"))),
4228                    (Box::new(lit(false)), Box::new(col("c3"))),
4229                ],
4230                Some(Box::new(col("c4"))),
4231            ))),
4232            Expr::Case(Case::new(
4233                None,
4234                vec![(Box::new(col("c3").lt(lit(10))), Box::new(lit("b")))],
4235                Some(Box::new(col("c4"))),
4236            ))
4237        );
4238
4239        // Negative test: CASE WHEN c3 = 4 THEN 1 ELSE 2 END should not be simplified
4240        let expr = Expr::Case(Case::new(
4241            None,
4242            vec![(Box::new(col("c3").eq(lit(4))), Box::new(lit(1)))],
4243            Some(Box::new(lit(2))),
4244        ));
4245        assert_eq!(simplify(expr.clone()), expr);
4246    }
4247
4248    fn distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4249        Expr::BinaryExpr(BinaryExpr {
4250            left: Box::new(left.into()),
4251            op: Operator::IsDistinctFrom,
4252            right: Box::new(right.into()),
4253        })
4254    }
4255
4256    fn not_distinct_from(left: impl Into<Expr>, right: impl Into<Expr>) -> Expr {
4257        Expr::BinaryExpr(BinaryExpr {
4258            left: Box::new(left.into()),
4259            op: Operator::IsNotDistinctFrom,
4260            right: Box::new(right.into()),
4261        })
4262    }
4263
4264    #[test]
4265    fn simplify_expr_bool_or() {
4266        // col || true is always true
4267        assert_eq!(simplify(col("c2").or(lit(true))), lit(true),);
4268
4269        // col || false is always col
4270        assert_eq!(simplify(col("c2").or(lit(false))), col("c2"),);
4271
4272        // true || null is always true
4273        assert_eq!(simplify(lit(true).or(lit_bool_null())), lit(true),);
4274
4275        // null || true is always true
4276        assert_eq!(simplify(lit_bool_null().or(lit(true))), lit(true),);
4277
4278        // false || null is always null
4279        assert_eq!(simplify(lit(false).or(lit_bool_null())), lit_bool_null(),);
4280
4281        // null || false is always null
4282        assert_eq!(simplify(lit_bool_null().or(lit(false))), lit_bool_null(),);
4283
4284        // ( c1 BETWEEN Int32(0) AND Int32(10) ) OR Boolean(NULL)
4285        // it can be either NULL or  TRUE depending on the value of `c1 BETWEEN Int32(0) AND Int32(10)`
4286        // and should not be rewritten
4287        let expr = col("c1").between(lit(0), lit(10));
4288        let expr = expr.or(lit_bool_null());
4289        let result = simplify(expr);
4290
4291        let expected_expr = or(
4292            and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4293            lit_bool_null(),
4294        );
4295        assert_eq!(expected_expr, result);
4296    }
4297
4298    #[test]
4299    fn simplify_inlist() {
4300        assert_eq!(simplify(in_list(col("c1"), vec![], false)), lit(false));
4301        assert_eq!(simplify(in_list(col("c1"), vec![], true)), lit(true));
4302
4303        // null in (...)  --> null
4304        assert_eq!(
4305            simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], false)),
4306            lit_bool_null()
4307        );
4308
4309        // null not in (...)  --> null
4310        assert_eq!(
4311            simplify(in_list(lit_bool_null(), vec![col("c1"), lit(1)], true)),
4312            lit_bool_null()
4313        );
4314
4315        assert_eq!(
4316            simplify(in_list(col("c1"), vec![lit(1)], false)),
4317            col("c1").eq(lit(1))
4318        );
4319        assert_eq!(
4320            simplify(in_list(col("c1"), vec![lit(1)], true)),
4321            col("c1").not_eq(lit(1))
4322        );
4323
4324        // more complex expressions can be simplified if list contains
4325        // one element only
4326        assert_eq!(
4327            simplify(in_list(col("c1") * lit(10), vec![lit(2)], false)),
4328            (col("c1") * lit(10)).eq(lit(2))
4329        );
4330
4331        assert_eq!(
4332            simplify(in_list(col("c1"), vec![lit(1), lit(2)], false)),
4333            col("c1").eq(lit(1)).or(col("c1").eq(lit(2)))
4334        );
4335        assert_eq!(
4336            simplify(in_list(col("c1"), vec![lit(1), lit(2)], true)),
4337            col("c1").not_eq(lit(1)).and(col("c1").not_eq(lit(2)))
4338        );
4339
4340        let subquery = Arc::new(test_table_scan_with_name("test").unwrap());
4341        assert_eq!(
4342            simplify(in_list(
4343                col("c1"),
4344                vec![scalar_subquery(Arc::clone(&subquery))],
4345                false
4346            )),
4347            in_subquery(col("c1"), Arc::clone(&subquery))
4348        );
4349        assert_eq!(
4350            simplify(in_list(
4351                col("c1"),
4352                vec![scalar_subquery(Arc::clone(&subquery))],
4353                true
4354            )),
4355            not_in_subquery(col("c1"), subquery)
4356        );
4357
4358        let subquery1 =
4359            scalar_subquery(Arc::new(test_table_scan_with_name("test1").unwrap()));
4360        let subquery2 =
4361            scalar_subquery(Arc::new(test_table_scan_with_name("test2").unwrap()));
4362
4363        // c1 NOT IN (<subquery1>, <subquery2>) -> c1 != <subquery1> AND c1 != <subquery2>
4364        assert_eq!(
4365            simplify(in_list(
4366                col("c1"),
4367                vec![subquery1.clone(), subquery2.clone()],
4368                true
4369            )),
4370            col("c1")
4371                .not_eq(subquery1.clone())
4372                .and(col("c1").not_eq(subquery2.clone()))
4373        );
4374
4375        // c1 IN (<subquery1>, <subquery2>) -> c1 == <subquery1> OR c1 == <subquery2>
4376        assert_eq!(
4377            simplify(in_list(
4378                col("c1"),
4379                vec![subquery1.clone(), subquery2.clone()],
4380                false
4381            )),
4382            col("c1").eq(subquery1).or(col("c1").eq(subquery2))
4383        );
4384
4385        // 1. c1 IN (1,2,3,4) AND c1 IN (5,6,7,8) -> false
4386        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4387            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false),
4388        );
4389        assert_eq!(simplify(expr), lit(false));
4390
4391        // 2. c1 IN (1,2,3,4) AND c1 IN (4,5,6,7) -> c1 = 4
4392        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4393            in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], false),
4394        );
4395        assert_eq!(simplify(expr), col("c1").eq(lit(4)));
4396
4397        // 3. c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (5, 6, 7, 8) -> true
4398        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4399            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4400        );
4401        assert_eq!(simplify(expr), lit(true));
4402
4403        // 3.5 c1 NOT IN (1, 2, 3, 4) OR c1 NOT IN (4, 5, 6, 7) -> c1 != 4 (4 overlaps)
4404        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(
4405            in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4406        );
4407        assert_eq!(simplify(expr), col("c1").not_eq(lit(4)));
4408
4409        // 4. c1 NOT IN (1,2,3,4) AND c1 NOT IN (4,5,6,7) -> c1 NOT IN (1,2,3,4,5,6,7)
4410        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4411            in_list(col("c1"), vec![lit(4), lit(5), lit(6), lit(7)], true),
4412        );
4413        assert_eq!(
4414            simplify(expr),
4415            in_list(
4416                col("c1"),
4417                vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6), lit(7)],
4418                true
4419            )
4420        );
4421
4422        // 5. c1 IN (1,2,3,4) OR c1 IN (2,3,4,5) -> c1 IN (1,2,3,4,5)
4423        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).or(
4424            in_list(col("c1"), vec![lit(2), lit(3), lit(4), lit(5)], false),
4425        );
4426        assert_eq!(
4427            simplify(expr),
4428            in_list(
4429                col("c1"),
4430                vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4431                false
4432            )
4433        );
4434
4435        // 6. c1 IN (1,2,3) AND c1 NOT INT (1,2,3,4,5) -> false
4436        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3)], false).and(in_list(
4437            col("c1"),
4438            vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4439            true,
4440        ));
4441        assert_eq!(simplify(expr), lit(false));
4442
4443        // 7. c1 NOT IN (1,2,3,4) AND c1 IN (1,2,3,4,5) -> c1 = 5
4444        let expr =
4445            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(in_list(
4446                col("c1"),
4447                vec![lit(1), lit(2), lit(3), lit(4), lit(5)],
4448                false,
4449            ));
4450        assert_eq!(simplify(expr), col("c1").eq(lit(5)));
4451
4452        // 8. c1 IN (1,2,3,4) AND c1 NOT IN (5,6,7,8) -> c1 IN (1,2,3,4)
4453        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false).and(
4454            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], true),
4455        );
4456        assert_eq!(
4457            simplify(expr),
4458            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], false)
4459        );
4460
4461        // inlist with more than two expressions
4462        // c1 IN (1,2,3,4,5,6) AND c1 IN (1,3,5,6) AND c1 IN (3,6) -> c1 = 3 OR c1 = 6
4463        let expr = in_list(
4464            col("c1"),
4465            vec![lit(1), lit(2), lit(3), lit(4), lit(5), lit(6)],
4466            false,
4467        )
4468        .and(in_list(
4469            col("c1"),
4470            vec![lit(1), lit(3), lit(5), lit(6)],
4471            false,
4472        ))
4473        .and(in_list(col("c1"), vec![lit(3), lit(6)], false));
4474        assert_eq!(
4475            simplify(expr),
4476            col("c1").eq(lit(3)).or(col("c1").eq(lit(6)))
4477        );
4478
4479        // c1 NOT IN (1,2,3,4) AND c1 IN (5,6,7,8) AND c1 NOT IN (3,4,5,6) AND c1 IN (8,9,10) -> c1 = 8
4480        let expr = in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).and(
4481            in_list(col("c1"), vec![lit(5), lit(6), lit(7), lit(8)], false)
4482                .and(in_list(
4483                    col("c1"),
4484                    vec![lit(3), lit(4), lit(5), lit(6)],
4485                    true,
4486                ))
4487                .and(in_list(col("c1"), vec![lit(8), lit(9), lit(10)], false)),
4488        );
4489        assert_eq!(simplify(expr), col("c1").eq(lit(8)));
4490
4491        // Contains non-InList expression
4492        // c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9) -> c1 NOT IN (1,2,3,4) OR c1 != 5 OR c1 NOT IN (6,7,8,9)
4493        let expr =
4494            in_list(col("c1"), vec![lit(1), lit(2), lit(3), lit(4)], true).or(col("c1")
4495                .not_eq(lit(5))
4496                .or(in_list(
4497                    col("c1"),
4498                    vec![lit(6), lit(7), lit(8), lit(9)],
4499                    true,
4500                )));
4501        // TODO: Further simplify this expression
4502        // https://github.com/apache/datafusion/issues/8970
4503        // assert_eq!(simplify(expr.clone()), lit(true));
4504        assert_eq!(simplify(expr.clone()), expr);
4505    }
4506
4507    #[test]
4508    fn simplify_null_in_empty_inlist() {
4509        // `NULL::boolean IN ()` == `NULL::boolean IN (SELECT foo FROM empty)` == false
4510        let expr = in_list(lit_bool_null(), vec![], false);
4511        assert_eq!(simplify(expr), lit(false));
4512
4513        // `NULL::boolean NOT IN ()` == `NULL::boolean NOT IN (SELECT foo FROM empty)` == true
4514        let expr = in_list(lit_bool_null(), vec![], true);
4515        assert_eq!(simplify(expr), lit(true));
4516
4517        // `NULL IN ()` == `NULL IN (SELECT foo FROM empty)` == false
4518        let null_null = || Expr::Literal(ScalarValue::Null, None);
4519        let expr = in_list(null_null(), vec![], false);
4520        assert_eq!(simplify(expr), lit(false));
4521
4522        // `NULL NOT IN ()` == `NULL NOT IN (SELECT foo FROM empty)` == true
4523        let expr = in_list(null_null(), vec![], true);
4524        assert_eq!(simplify(expr), lit(true));
4525    }
4526
4527    #[test]
4528    fn just_simplifier_simplify_null_in_empty_inlist() {
4529        let simplify = |expr: Expr| -> Expr {
4530            let schema = expr_test_schema();
4531            let info = SimplifyContext::builder().with_schema(schema).build();
4532            let simplifier = &mut Simplifier::new(&info);
4533            expr.rewrite(simplifier)
4534                .expect("Failed to simplify expression")
4535                .data
4536        };
4537
4538        // `NULL::boolean IN ()` == `NULL::boolean IN (SELECT foo FROM empty)` == false
4539        let expr = in_list(lit_bool_null(), vec![], false);
4540        assert_eq!(simplify(expr), lit(false));
4541
4542        // `NULL::boolean NOT IN ()` == `NULL::boolean NOT IN (SELECT foo FROM empty)` == true
4543        let expr = in_list(lit_bool_null(), vec![], true);
4544        assert_eq!(simplify(expr), lit(true));
4545
4546        // `NULL IN ()` == `NULL IN (SELECT foo FROM empty)` == false
4547        let null_null = || Expr::Literal(ScalarValue::Null, None);
4548        let expr = in_list(null_null(), vec![], false);
4549        assert_eq!(simplify(expr), lit(false));
4550
4551        // `NULL NOT IN ()` == `NULL NOT IN (SELECT foo FROM empty)` == true
4552        let expr = in_list(null_null(), vec![], true);
4553        assert_eq!(simplify(expr), lit(true));
4554    }
4555
4556    #[test]
4557    fn simplify_large_or() {
4558        let expr = (0..5)
4559            .map(|i| col("c1").eq(lit(i)))
4560            .fold(lit(false), |acc, e| acc.or(e));
4561        assert_eq!(
4562            simplify(expr),
4563            in_list(col("c1"), (0..5).map(lit).collect(), false),
4564        );
4565    }
4566
4567    #[test]
4568    fn simplify_expr_bool_and() {
4569        // col & true is always col
4570        assert_eq!(simplify(col("c2").and(lit(true))), col("c2"),);
4571        // col & false is always false
4572        assert_eq!(simplify(col("c2").and(lit(false))), lit(false),);
4573
4574        // true && null is always null
4575        assert_eq!(simplify(lit(true).and(lit_bool_null())), lit_bool_null(),);
4576
4577        // null && true is always null
4578        assert_eq!(simplify(lit_bool_null().and(lit(true))), lit_bool_null(),);
4579
4580        // false && null is always false
4581        assert_eq!(simplify(lit(false).and(lit_bool_null())), lit(false),);
4582
4583        // null && false is always false
4584        assert_eq!(simplify(lit_bool_null().and(lit(false))), lit(false),);
4585
4586        // c1 BETWEEN Int32(0) AND Int32(10) AND Boolean(NULL)
4587        // it can be either NULL or FALSE depending on the value of `c1 BETWEEN Int32(0) AND Int32(10)`
4588        // and the Boolean(NULL) should remain
4589        let expr = col("c1").between(lit(0), lit(10));
4590        let expr = expr.and(lit_bool_null());
4591        let result = simplify(expr);
4592
4593        let expected_expr = and(
4594            and(col("c1").gt_eq(lit(0)), col("c1").lt_eq(lit(10))),
4595            lit_bool_null(),
4596        );
4597        assert_eq!(expected_expr, result);
4598    }
4599
4600    #[test]
4601    fn simplify_expr_between() {
4602        // c2 between 3 and 4 is c2 >= 3 and c2 <= 4
4603        let expr = col("c2").between(lit(3), lit(4));
4604        assert_eq!(
4605            simplify(expr),
4606            and(col("c2").gt_eq(lit(3)), col("c2").lt_eq(lit(4)))
4607        );
4608
4609        // c2 not between 3 and 4 is c2 < 3 or c2 > 4
4610        let expr = col("c2").not_between(lit(3), lit(4));
4611        assert_eq!(
4612            simplify(expr),
4613            or(col("c2").lt(lit(3)), col("c2").gt(lit(4)))
4614        );
4615    }
4616
4617    #[test]
4618    fn test_like_and_ilike() {
4619        let null = lit(ScalarValue::Utf8(None));
4620
4621        // expr [NOT] [I]LIKE NULL
4622        let expr = col("c1").like(null.clone());
4623        assert_eq!(simplify(expr), lit_bool_null());
4624
4625        let expr = col("c1").not_like(null.clone());
4626        assert_eq!(simplify(expr), lit_bool_null());
4627
4628        let expr = col("c1").ilike(null.clone());
4629        assert_eq!(simplify(expr), lit_bool_null());
4630
4631        let expr = col("c1").not_ilike(null.clone());
4632        assert_eq!(simplify(expr), lit_bool_null());
4633
4634        // expr [NOT] [I]LIKE '%'
4635        let expr = col("c1").like(lit("%"));
4636        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4637
4638        let expr = col("c1").not_like(lit("%"));
4639        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4640
4641        let expr = col("c1").ilike(lit("%"));
4642        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4643
4644        let expr = col("c1").not_ilike(lit("%"));
4645        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4646
4647        // expr [NOT] [I]LIKE '%%'
4648        let expr = col("c1").like(lit("%%"));
4649        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4650
4651        let expr = col("c1").not_like(lit("%%"));
4652        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4653
4654        let expr = col("c1").ilike(lit("%%"));
4655        assert_eq!(simplify(expr), if_not_null(col("c1"), true));
4656
4657        let expr = col("c1").not_ilike(lit("%%"));
4658        assert_eq!(simplify(expr), if_not_null(col("c1"), false));
4659
4660        // not_null_expr [NOT] [I]LIKE '%'
4661        let expr = col("c1_non_null").like(lit("%"));
4662        assert_eq!(simplify(expr), lit(true));
4663
4664        let expr = col("c1_non_null").not_like(lit("%"));
4665        assert_eq!(simplify(expr), lit(false));
4666
4667        let expr = col("c1_non_null").ilike(lit("%"));
4668        assert_eq!(simplify(expr), lit(true));
4669
4670        let expr = col("c1_non_null").not_ilike(lit("%"));
4671        assert_eq!(simplify(expr), lit(false));
4672
4673        // not_null_expr [NOT] [I]LIKE '%%'
4674        let expr = col("c1_non_null").like(lit("%%"));
4675        assert_eq!(simplify(expr), lit(true));
4676
4677        let expr = col("c1_non_null").not_like(lit("%%"));
4678        assert_eq!(simplify(expr), lit(false));
4679
4680        let expr = col("c1_non_null").ilike(lit("%%"));
4681        assert_eq!(simplify(expr), lit(true));
4682
4683        let expr = col("c1_non_null").not_ilike(lit("%%"));
4684        assert_eq!(simplify(expr), lit(false));
4685
4686        // null_constant [NOT] [I]LIKE '%'
4687        let expr = null.clone().like(lit("%"));
4688        assert_eq!(simplify(expr), lit_bool_null());
4689
4690        let expr = null.clone().not_like(lit("%"));
4691        assert_eq!(simplify(expr), lit_bool_null());
4692
4693        let expr = null.clone().ilike(lit("%"));
4694        assert_eq!(simplify(expr), lit_bool_null());
4695
4696        let expr = null.clone().not_ilike(lit("%"));
4697        assert_eq!(simplify(expr), lit_bool_null());
4698
4699        // null_constant [NOT] [I]LIKE '%%'
4700        let expr = null.clone().like(lit("%%"));
4701        assert_eq!(simplify(expr), lit_bool_null());
4702
4703        let expr = null.clone().not_like(lit("%%"));
4704        assert_eq!(simplify(expr), lit_bool_null());
4705
4706        let expr = null.clone().ilike(lit("%%"));
4707        assert_eq!(simplify(expr), lit_bool_null());
4708
4709        let expr = null.clone().not_ilike(lit("%%"));
4710        assert_eq!(simplify(expr), lit_bool_null());
4711
4712        // null_constant [NOT] [I]LIKE 'a%'
4713        let expr = null.clone().like(lit("a%"));
4714        assert_eq!(simplify(expr), lit_bool_null());
4715
4716        let expr = null.clone().not_like(lit("a%"));
4717        assert_eq!(simplify(expr), lit_bool_null());
4718
4719        let expr = null.clone().ilike(lit("a%"));
4720        assert_eq!(simplify(expr), lit_bool_null());
4721
4722        let expr = null.clone().not_ilike(lit("a%"));
4723        assert_eq!(simplify(expr), lit_bool_null());
4724
4725        // expr [NOT] [I]LIKE with pattern without wildcards
4726        let expr = col("c1").like(lit("a"));
4727        assert_eq!(simplify(expr), col("c1").eq(lit("a")));
4728        let expr = col("c1").not_like(lit("a"));
4729        assert_eq!(simplify(expr), col("c1").not_eq(lit("a")));
4730        let expr = col("c1").like(lit("a_"));
4731        assert_eq!(simplify(expr), col("c1").like(lit("a_")));
4732        let expr = col("c1").not_like(lit("a_"));
4733        assert_eq!(simplify(expr), col("c1").not_like(lit("a_")));
4734
4735        let expr = col("c1").ilike(lit("a"));
4736        assert_eq!(simplify(expr), col("c1").ilike(lit("a")));
4737        let expr = col("c1").not_ilike(lit("a"));
4738        assert_eq!(simplify(expr), col("c1").not_ilike(lit("a")));
4739    }
4740
4741    #[test]
4742    fn test_simplify_with_guarantee() {
4743        // (c3 >= 3) AND (c4 + 2 < 10 OR (c1 NOT IN ("a", "b")))
4744        let expr_x = col("c3").gt(lit(3_i64));
4745        let expr_y = (col("c4") + lit(2_u32)).lt(lit(10_u32));
4746        let expr_z = col("c1").in_list(vec![lit("a"), lit("b")], true);
4747        let expr = expr_x.clone().and(expr_y.or(expr_z));
4748
4749        // All guaranteed null
4750        let guarantees = vec![
4751            (col("c3"), NullableInterval::from(ScalarValue::Int64(None))),
4752            (col("c4"), NullableInterval::from(ScalarValue::UInt32(None))),
4753            (col("c1"), NullableInterval::from(ScalarValue::Utf8(None))),
4754        ];
4755
4756        let output = simplify_with_guarantee(expr.clone(), guarantees);
4757        assert_eq!(output, lit_bool_null());
4758
4759        // All guaranteed false
4760        let guarantees = vec![
4761            (
4762                col("c3"),
4763                NullableInterval::NotNull {
4764                    values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4765                },
4766            ),
4767            (
4768                col("c4"),
4769                NullableInterval::from(ScalarValue::UInt32(Some(9))),
4770            ),
4771            (col("c1"), NullableInterval::from(ScalarValue::from("a"))),
4772        ];
4773        let output = simplify_with_guarantee(expr.clone(), guarantees);
4774        assert_eq!(output, lit(false));
4775
4776        // Guaranteed false or null -> no change.
4777        let guarantees = vec![
4778            (
4779                col("c3"),
4780                NullableInterval::MaybeNull {
4781                    values: Interval::make(Some(0_i64), Some(2_i64)).unwrap(),
4782                },
4783            ),
4784            (
4785                col("c4"),
4786                NullableInterval::MaybeNull {
4787                    values: Interval::make(Some(9_u32), Some(9_u32)).unwrap(),
4788                },
4789            ),
4790            (
4791                col("c1"),
4792                NullableInterval::NotNull {
4793                    values: Interval::try_new(
4794                        ScalarValue::from("d"),
4795                        ScalarValue::from("f"),
4796                    )
4797                    .unwrap(),
4798                },
4799            ),
4800        ];
4801        let output = simplify_with_guarantee(expr.clone(), guarantees);
4802        assert_eq!(&output, &expr_x);
4803
4804        // Sufficient true guarantees
4805        let guarantees = vec![
4806            (
4807                col("c3"),
4808                NullableInterval::from(ScalarValue::Int64(Some(9))),
4809            ),
4810            (
4811                col("c4"),
4812                NullableInterval::from(ScalarValue::UInt32(Some(3))),
4813            ),
4814        ];
4815        let output = simplify_with_guarantee(expr.clone(), guarantees);
4816        assert_eq!(output, lit(true));
4817
4818        // Only partially simplify
4819        let guarantees = vec![(
4820            col("c4"),
4821            NullableInterval::from(ScalarValue::UInt32(Some(3))),
4822        )];
4823        let output = simplify_with_guarantee(expr, guarantees);
4824        assert_eq!(&output, &expr_x);
4825    }
4826
4827    #[test]
4828    fn test_expression_partial_simplify_1() {
4829        // (1 + 2) + (4 / 0) -> 3 + (4 / 0)
4830        let expr = (lit(1) + lit(2)) + (lit(4) / lit(0));
4831        let expected = (lit(3)) + (lit(4) / lit(0));
4832
4833        assert_eq!(simplify(expr), expected);
4834    }
4835
4836    #[test]
4837    fn test_expression_partial_simplify_2() {
4838        // (1 > 2) and (4 / 0) -> false
4839        let expr = (lit(1).gt(lit(2))).and(lit(4) / lit(0));
4840        let expected = lit(false);
4841
4842        assert_eq!(simplify(expr), expected);
4843    }
4844
4845    #[test]
4846    fn test_simplify_cycles() {
4847        // TRUE
4848        let expr = lit(true);
4849        let expected = lit(true);
4850        let (expr, num_iter) = simplify_with_cycle_count(expr);
4851        assert_eq!(expr, expected);
4852        assert_eq!(num_iter, 1);
4853
4854        // (true != NULL) OR (5 > 10)
4855        let expr = lit(true).not_eq(lit_bool_null()).or(lit(5).gt(lit(10)));
4856        let expected = lit_bool_null();
4857        let (expr, num_iter) = simplify_with_cycle_count(expr);
4858        assert_eq!(expr, expected);
4859        assert_eq!(num_iter, 2);
4860
4861        // NOTE: this currently does not simplify
4862        // (((c4 - 10) + 10) *100) / 100
4863        let expr = (((col("c4") - lit(10)) + lit(10)) * lit(100)) / lit(100);
4864        let expected = expr.clone();
4865        let (expr, num_iter) = simplify_with_cycle_count(expr);
4866        assert_eq!(expr, expected);
4867        assert_eq!(num_iter, 1);
4868
4869        // ((c4<1 or c3<2) and c3_non_null<3) and false
4870        let expr = col("c4")
4871            .lt(lit(1))
4872            .or(col("c3").lt(lit(2)))
4873            .and(col("c3_non_null").lt(lit(3)))
4874            .and(lit(false));
4875        let expected = lit(false);
4876        let (expr, num_iter) = simplify_with_cycle_count(expr);
4877        assert_eq!(expr, expected);
4878        assert_eq!(num_iter, 2);
4879    }
4880
4881    fn boolean_test_schema() -> DFSchemaRef {
4882        static BOOLEAN_TEST_SCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
4883            Schema::new(vec![
4884                Field::new("A", DataType::Boolean, false),
4885                Field::new("B", DataType::Boolean, false),
4886                Field::new("C", DataType::Boolean, false),
4887                Field::new("D", DataType::Boolean, false),
4888            ])
4889            .to_dfschema_ref()
4890            .unwrap()
4891        });
4892        Arc::clone(&BOOLEAN_TEST_SCHEMA)
4893    }
4894
4895    #[test]
4896    fn simplify_common_factor_conjunction_in_disjunction() {
4897        let schema = boolean_test_schema();
4898        let simplifier =
4899            ExprSimplifier::new(SimplifyContext::builder().with_schema(schema).build());
4900
4901        let a = || col("A");
4902        let b = || col("B");
4903        let c = || col("C");
4904        let d = || col("D");
4905
4906        // (A AND B) OR (A AND C) -> A AND (B OR C)
4907        let expr = a().and(b()).or(a().and(c()));
4908        let expected = a().and(b().or(c()));
4909
4910        assert_eq!(expected, simplifier.simplify(expr).unwrap());
4911
4912        // (A AND B) OR (A AND C) OR (A AND D) -> A AND (B OR C OR D)
4913        let expr = a().and(b()).or(a().and(c())).or(a().and(d()));
4914        let expected = a().and(b().or(c()).or(d()));
4915        assert_eq!(expected, simplifier.simplify(expr).unwrap());
4916
4917        // A OR (B AND C AND A) -> A
4918        let expr = a().or(b().and(c().and(a())));
4919        let expected = a();
4920        assert_eq!(expected, simplifier.simplify(expr).unwrap());
4921    }
4922
4923    #[test]
4924    fn test_simplify_udaf() {
4925        let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_with_simplify());
4926        let aggregate_function_expr =
4927            Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4928                udaf.into(),
4929                vec![],
4930                false,
4931                None,
4932                vec![],
4933                None,
4934            ));
4935
4936        let expected = col("result_column");
4937        assert_eq!(simplify(aggregate_function_expr), expected);
4938
4939        let udaf = AggregateUDF::new_from_impl(SimplifyMockUdaf::new_without_simplify());
4940        let aggregate_function_expr =
4941            Expr::AggregateFunction(expr::AggregateFunction::new_udf(
4942                udaf.into(),
4943                vec![],
4944                false,
4945                None,
4946                vec![],
4947                None,
4948            ));
4949
4950        let expected = aggregate_function_expr.clone();
4951        assert_eq!(simplify(aggregate_function_expr), expected);
4952    }
4953
4954    /// A Mock UDAF which defines `simplify` to be used in tests
4955    /// related to UDAF simplification
4956    #[derive(Debug, Clone, PartialEq, Eq, Hash)]
4957    struct SimplifyMockUdaf {
4958        simplify: bool,
4959    }
4960
4961    impl SimplifyMockUdaf {
4962        /// make simplify method return new expression
4963        fn new_with_simplify() -> Self {
4964            Self { simplify: true }
4965        }
4966        /// make simplify method return no change
4967        fn new_without_simplify() -> Self {
4968            Self { simplify: false }
4969        }
4970    }
4971
4972    impl AggregateUDFImpl for SimplifyMockUdaf {
4973        fn name(&self) -> &str {
4974            "mock_simplify"
4975        }
4976
4977        fn signature(&self) -> &Signature {
4978            unimplemented!()
4979        }
4980
4981        fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
4982            unimplemented!("not needed for tests")
4983        }
4984
4985        fn accumulator(
4986            &self,
4987            _acc_args: AccumulatorArgs,
4988        ) -> Result<Box<dyn Accumulator>> {
4989            unimplemented!("not needed for tests")
4990        }
4991
4992        fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
4993            unimplemented!("not needed for testing")
4994        }
4995
4996        fn create_groups_accumulator(
4997            &self,
4998            _args: AccumulatorArgs,
4999        ) -> Result<Box<dyn GroupsAccumulator>> {
5000            unimplemented!("not needed for testing")
5001        }
5002
5003        fn simplify(&self) -> Option<AggregateFunctionSimplification> {
5004            if self.simplify {
5005                Some(Box::new(|_, _| Ok(col("result_column"))))
5006            } else {
5007                None
5008            }
5009        }
5010    }
5011
5012    #[test]
5013    fn test_simplify_udwf() {
5014        let udwf = WindowFunctionDefinition::WindowUDF(
5015            WindowUDF::new_from_impl(SimplifyMockUdwf::new_with_simplify()).into(),
5016        );
5017        let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
5018
5019        let expected = col("result_column");
5020        assert_eq!(simplify(window_function_expr), expected);
5021
5022        let udwf = WindowFunctionDefinition::WindowUDF(
5023            WindowUDF::new_from_impl(SimplifyMockUdwf::new_without_simplify()).into(),
5024        );
5025        let window_function_expr = Expr::from(WindowFunction::new(udwf, vec![]));
5026
5027        let expected = window_function_expr.clone();
5028        assert_eq!(simplify(window_function_expr), expected);
5029    }
5030
5031    /// A Mock UDWF which defines `simplify` to be used in tests
5032    /// related to UDWF simplification
5033    #[derive(Debug, Clone, PartialEq, Eq, Hash)]
5034    struct SimplifyMockUdwf {
5035        simplify: bool,
5036    }
5037
5038    impl SimplifyMockUdwf {
5039        /// make simplify method return new expression
5040        fn new_with_simplify() -> Self {
5041            Self { simplify: true }
5042        }
5043        /// make simplify method return no change
5044        fn new_without_simplify() -> Self {
5045            Self { simplify: false }
5046        }
5047    }
5048
5049    impl WindowUDFImpl for SimplifyMockUdwf {
5050        fn name(&self) -> &str {
5051            "mock_simplify"
5052        }
5053
5054        fn signature(&self) -> &Signature {
5055            unimplemented!()
5056        }
5057
5058        fn simplify(&self) -> Option<WindowFunctionSimplification> {
5059            if self.simplify {
5060                Some(Box::new(|_, _| Ok(col("result_column"))))
5061            } else {
5062                None
5063            }
5064        }
5065
5066        fn partition_evaluator(
5067            &self,
5068            _partition_evaluator_args: PartitionEvaluatorArgs,
5069        ) -> Result<Box<dyn PartitionEvaluator>> {
5070            unimplemented!("not needed for tests")
5071        }
5072
5073        fn field(&self, _field_args: WindowUDFFieldArgs) -> Result<FieldRef> {
5074            unimplemented!("not needed for tests")
5075        }
5076
5077        fn limit_effect(&self, _args: &[Arc<dyn PhysicalExpr>]) -> LimitEffect {
5078            LimitEffect::Unknown
5079        }
5080    }
5081    #[derive(Debug, PartialEq, Eq, Hash)]
5082    struct VolatileUdf {
5083        signature: Signature,
5084    }
5085
5086    impl VolatileUdf {
5087        pub fn new() -> Self {
5088            Self {
5089                signature: Signature::exact(vec![], Volatility::Volatile),
5090            }
5091        }
5092    }
5093    impl ScalarUDFImpl for VolatileUdf {
5094        fn name(&self) -> &str {
5095            "VolatileUdf"
5096        }
5097
5098        fn signature(&self) -> &Signature {
5099            &self.signature
5100        }
5101
5102        fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
5103            Ok(DataType::Int16)
5104        }
5105
5106        fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
5107            panic!("dummy - not implemented")
5108        }
5109    }
5110
5111    #[test]
5112    fn test_optimize_volatile_conditions() {
5113        let fun = Arc::new(ScalarUDF::new_from_impl(VolatileUdf::new()));
5114        let rand = Expr::ScalarFunction(ScalarFunction::new_udf(fun, vec![]));
5115        {
5116            let expr = rand
5117                .clone()
5118                .eq(lit(0))
5119                .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
5120
5121            assert_eq!(simplify(expr.clone()), expr);
5122        }
5123
5124        {
5125            let expr = col("column1")
5126                .eq(lit(2))
5127                .or(col("column1").eq(lit(2)).and(rand.clone().eq(lit(0))));
5128
5129            assert_eq!(simplify(expr), col("column1").eq(lit(2)));
5130        }
5131
5132        {
5133            let expr = (col("column1").eq(lit(2)).and(rand.clone().eq(lit(0)))).or(col(
5134                "column1",
5135            )
5136            .eq(lit(2))
5137            .and(rand.clone().eq(lit(0))));
5138
5139            assert_eq!(
5140                simplify(expr),
5141                col("column1")
5142                    .eq(lit(2))
5143                    .and((rand.clone().eq(lit(0))).or(rand.clone().eq(lit(0))))
5144            );
5145        }
5146    }
5147
5148    #[test]
5149    fn simplify_fixed_size_binary_eq_lit() {
5150        let bytes = [1u8, 2, 3].as_slice();
5151
5152        // The expression starts simple.
5153        let expr = col("c5").eq(lit(bytes));
5154
5155        // The type coercer introduces a cast.
5156        let coerced = coerce(expr.clone());
5157        let schema = expr_test_schema();
5158        assert_eq!(
5159            coerced,
5160            col("c5")
5161                .cast_to(&DataType::Binary, schema.as_ref())
5162                .unwrap()
5163                .eq(lit(bytes))
5164        );
5165
5166        // The simplifier removes the cast.
5167        assert_eq!(
5168            simplify(coerced),
5169            col("c5").eq(Expr::Literal(
5170                ScalarValue::FixedSizeBinary(3, Some(bytes.to_vec()),),
5171                None
5172            ))
5173        );
5174    }
5175
5176    #[test]
5177    fn simplify_cast_literal() {
5178        // Test that CAST(literal) expressions are evaluated at plan time
5179
5180        // CAST(123 AS Int64) should become 123i64
5181        let expr = Expr::Cast(Cast::new(Box::new(lit(123i32)), DataType::Int64));
5182        let expected = lit(123i64);
5183        assert_eq!(simplify(expr), expected);
5184
5185        // CAST(1761630189642 AS Timestamp(Nanosecond, Some("+00:00")))
5186        // Integer to timestamp cast
5187        let expr = Expr::Cast(Cast::new(
5188            Box::new(lit(1761630189642i64)),
5189            DataType::Timestamp(
5190                arrow::datatypes::TimeUnit::Nanosecond,
5191                Some("+00:00".into()),
5192            ),
5193        ));
5194        // Should evaluate to a timestamp literal
5195        let result = simplify(expr);
5196        match result {
5197            Expr::Literal(ScalarValue::TimestampNanosecond(Some(val), tz), _) => {
5198                assert_eq!(val, 1761630189642i64);
5199                assert_eq!(tz.as_deref(), Some("+00:00"));
5200            }
5201            other => panic!("Expected TimestampNanosecond literal, got: {other:?}"),
5202        }
5203
5204        // Test CAST of invalid string to timestamp - should return an error at plan time
5205        // This represents the case from the issue: CAST(Utf8("1761630189642") AS Timestamp)
5206        // "1761630189642" is NOT a valid timestamp string format
5207        let expr = Expr::Cast(Cast::new(
5208            Box::new(lit("1761630189642")),
5209            DataType::Timestamp(
5210                arrow::datatypes::TimeUnit::Nanosecond,
5211                Some("+00:00".into()),
5212            ),
5213        ));
5214
5215        // The simplification should now fail with an error at plan time
5216        let schema = test_schema();
5217        let simplifier =
5218            ExprSimplifier::new(SimplifyContext::builder().with_schema(schema).build());
5219        let result = simplifier.simplify(expr);
5220        assert!(result.is_err(), "Expected error for invalid cast");
5221        let err_msg = result.unwrap_err().to_string();
5222        assert_contains!(err_msg, "Error parsing timestamp");
5223    }
5224
5225    fn if_not_null(expr: Expr, then: bool) -> Expr {
5226        Expr::Case(Case {
5227            expr: Some(expr.is_not_null().into()),
5228            when_then_expr: vec![(lit(true).into(), lit(then).into())],
5229            else_expr: None,
5230        })
5231    }
5232
5233    // --------------------------------
5234    // --- Struct Cast Tests -----
5235    // --------------------------------
5236
5237    /// Helper to create a `Struct` literal cast expression from `source_fields` and `target_fields`.
5238    fn make_struct_cast_expr(source_fields: Fields, target_fields: Fields) -> Expr {
5239        // Create 1-row struct array (not 0-row) so it can be evaluated by simplifier
5240        let arrays: Vec<Arc<dyn Array>> = vec![
5241            Arc::new(Int32Array::from(vec![Some(1)])),
5242            Arc::new(Int32Array::from(vec![Some(2)])),
5243        ];
5244        let struct_array = StructArray::try_new(source_fields, arrays, None).unwrap();
5245
5246        Expr::Cast(Cast::new(
5247            Box::new(Expr::Literal(
5248                ScalarValue::Struct(Arc::new(struct_array)),
5249                None,
5250            )),
5251            DataType::Struct(target_fields),
5252        ))
5253    }
5254
5255    #[test]
5256    fn test_struct_cast_different_field_counts_not_foldable() {
5257        // Test that struct casts with different field counts are NOT marked as foldable
5258        // When field counts differ, const-folding should not be attempted
5259
5260        let source_fields = Fields::from(vec![
5261            Arc::new(Field::new("a", DataType::Int32, true)),
5262            Arc::new(Field::new("b", DataType::Int32, true)),
5263        ]);
5264
5265        let target_fields = Fields::from(vec![
5266            Arc::new(Field::new("x", DataType::Int32, true)),
5267            Arc::new(Field::new("y", DataType::Int32, true)),
5268            Arc::new(Field::new("z", DataType::Int32, true)),
5269        ]);
5270
5271        let expr = make_struct_cast_expr(source_fields, target_fields);
5272
5273        let simplifier = ExprSimplifier::new(
5274            SimplifyContext::builder()
5275                .with_schema(test_schema())
5276                .build(),
5277        );
5278
5279        // The cast should remain unchanged since field counts differ
5280        let result = simplifier.simplify(expr.clone()).unwrap();
5281        // Ensure const-folding was not attempted (the expression remains exactly the same)
5282        assert_eq!(
5283            result, expr,
5284            "Struct cast with different field counts should remain unchanged (no const-folding)"
5285        );
5286    }
5287
5288    #[test]
5289    fn test_struct_cast_same_field_count_foldable() {
5290        // Test that struct casts with same field counts can be considered for const-folding
5291
5292        let source_fields = Fields::from(vec![
5293            Arc::new(Field::new("a", DataType::Int32, true)),
5294            Arc::new(Field::new("b", DataType::Int32, true)),
5295        ]);
5296
5297        let target_fields = Fields::from(vec![
5298            Arc::new(Field::new("a", DataType::Int32, true)),
5299            Arc::new(Field::new("b", DataType::Int32, true)),
5300        ]);
5301
5302        let expr = make_struct_cast_expr(source_fields, target_fields);
5303
5304        let simplifier = ExprSimplifier::new(
5305            SimplifyContext::builder()
5306                .with_schema(test_schema())
5307                .build(),
5308        );
5309
5310        // The cast should be simplified
5311        let result = simplifier.simplify(expr.clone()).unwrap();
5312        // Struct casts with same field count should be const-folded to a literal
5313        assert!(matches!(result, Expr::Literal(_, _)));
5314        // Ensure the simplifier made a change (not identical to original)
5315        assert_ne!(
5316            result, expr,
5317            "Struct cast with same field count should be simplified (not identical to input)"
5318        );
5319    }
5320
5321    #[test]
5322    fn test_struct_cast_different_names_same_count() {
5323        // Test struct cast with same field count but different names
5324        // Field count matches; simplification should be skipped because names do not overlap
5325
5326        let source_fields = Fields::from(vec![
5327            Arc::new(Field::new("a", DataType::Int32, true)),
5328            Arc::new(Field::new("b", DataType::Int32, true)),
5329        ]);
5330
5331        let target_fields = Fields::from(vec![
5332            Arc::new(Field::new("x", DataType::Int32, true)),
5333            Arc::new(Field::new("y", DataType::Int32, true)),
5334        ]);
5335
5336        let expr = make_struct_cast_expr(source_fields, target_fields);
5337
5338        let simplifier = ExprSimplifier::new(
5339            SimplifyContext::builder()
5340                .with_schema(test_schema())
5341                .build(),
5342        );
5343
5344        // The cast should remain unchanged because there is no name overlap
5345        let result = simplifier.simplify(expr.clone()).unwrap();
5346        assert_eq!(
5347            result, expr,
5348            "Struct cast with different names but same field count should not be simplified"
5349        );
5350    }
5351
5352    #[test]
5353    fn test_struct_cast_empty_array_not_foldable() {
5354        // Test that struct casts with 0-row (empty) struct arrays are NOT const-folded
5355        // The simplifier uses a 1-row input batch, which causes dimension mismatches
5356        // when evaluating 0-row struct literals
5357
5358        let source_fields = Fields::from(vec![
5359            Arc::new(Field::new("a", DataType::Int32, true)),
5360            Arc::new(Field::new("b", DataType::Int32, true)),
5361        ]);
5362
5363        let target_fields = Fields::from(vec![
5364            Arc::new(Field::new("a", DataType::Int32, true)),
5365            Arc::new(Field::new("b", DataType::Int32, true)),
5366        ]);
5367
5368        // Create a 0-row (empty) struct array
5369        let arrays: Vec<Arc<dyn Array>> = vec![
5370            Arc::new(Int32Array::new(vec![].into(), None)),
5371            Arc::new(Int32Array::new(vec![].into(), None)),
5372        ];
5373        let struct_array = StructArray::try_new(source_fields, arrays, None).unwrap();
5374
5375        let expr = Expr::Cast(Cast::new(
5376            Box::new(Expr::Literal(
5377                ScalarValue::Struct(Arc::new(struct_array)),
5378                None,
5379            )),
5380            DataType::Struct(target_fields),
5381        ));
5382
5383        let simplifier = ExprSimplifier::new(
5384            SimplifyContext::builder()
5385                .with_schema(test_schema())
5386                .build(),
5387        );
5388
5389        // The cast should remain unchanged since the struct array is empty (0-row)
5390        let result = simplifier.simplify(expr.clone()).unwrap();
5391        assert_eq!(
5392            result, expr,
5393            "Struct cast with empty (0-row) array should remain unchanged"
5394        );
5395    }
5396}