datafusion_optimizer/simplify_expressions/
simplify_exprs.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Simplify expressions optimizer rule and implementation
19
20use std::sync::Arc;
21
22use datafusion_common::tree_node::{Transformed, TreeNode};
23use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result};
24use datafusion_expr::execution_props::ExecutionProps;
25use datafusion_expr::logical_plan::LogicalPlan;
26use datafusion_expr::simplify::SimplifyContext;
27use datafusion_expr::utils::merge_schema;
28use datafusion_expr::Expr;
29
30use crate::optimizer::ApplyOrder;
31use crate::utils::NamePreserver;
32use crate::{OptimizerConfig, OptimizerRule};
33
34use super::ExprSimplifier;
35
36/// Optimizer Pass that simplifies [`LogicalPlan`]s by rewriting
37/// [`Expr`]`s evaluating constants and applying algebraic
38/// simplifications
39///
40/// # Introduction
41/// It uses boolean algebra laws to simplify or reduce the number of terms in expressions.
42///
43/// # Example:
44/// `Filter: b > 2 AND b > 2`
45/// is optimized to
46/// `Filter: b > 2`
47///
48/// [`Expr`]: datafusion_expr::Expr
49#[derive(Default, Debug)]
50pub struct SimplifyExpressions {}
51
52impl OptimizerRule for SimplifyExpressions {
53    fn name(&self) -> &str {
54        "simplify_expressions"
55    }
56
57    fn apply_order(&self) -> Option<ApplyOrder> {
58        Some(ApplyOrder::BottomUp)
59    }
60
61    fn supports_rewrite(&self) -> bool {
62        true
63    }
64
65    fn rewrite(
66        &self,
67        plan: LogicalPlan,
68        config: &dyn OptimizerConfig,
69    ) -> Result<Transformed<LogicalPlan>, DataFusionError> {
70        let mut execution_props = ExecutionProps::new();
71        execution_props.query_execution_start_time = config.query_execution_start_time();
72        execution_props.config_options = Some(config.options());
73        Self::optimize_internal(plan, &execution_props)
74    }
75}
76
77impl SimplifyExpressions {
78    fn optimize_internal(
79        plan: LogicalPlan,
80        execution_props: &ExecutionProps,
81    ) -> Result<Transformed<LogicalPlan>> {
82        let schema = if !plan.inputs().is_empty() {
83            DFSchemaRef::new(merge_schema(&plan.inputs()))
84        } else if let LogicalPlan::TableScan(scan) = &plan {
85            // When predicates are pushed into a table scan, there is no input
86            // schema to resolve predicates against, so it must be handled specially
87            //
88            // Note that this is not `plan.schema()` which is the *output*
89            // schema, and reflects any pushed down projection. The output schema
90            // will not contain columns that *only* appear in pushed down predicates
91            // (and no where else) in the plan.
92            //
93            // Thus, use the full schema of the inner provider without any
94            // projection applied for simplification
95            Arc::new(DFSchema::try_from_qualified_schema(
96                scan.table_name.clone(),
97                &scan.source.schema(),
98            )?)
99        } else {
100            Arc::new(DFSchema::empty())
101        };
102
103        let info = SimplifyContext::new(execution_props).with_schema(schema);
104
105        // Inputs have already been rewritten (due to bottom-up traversal handled by Optimizer)
106        // Just need to rewrite our own expressions
107
108        let simplifier = ExprSimplifier::new(info);
109
110        // The left and right expressions in a Join on clause are not
111        // commutative, for reasons that are not entirely clear. Thus, do not
112        // reorder expressions in Join while simplifying.
113        //
114        // This is likely related to the fact that order of the columns must
115        // match the order of the children. see
116        // https://github.com/apache/datafusion/pull/8780 for more details
117        let simplifier = if let LogicalPlan::Join(_) = plan {
118            simplifier.with_canonicalize(false)
119        } else {
120            simplifier
121        };
122
123        // Preserve expression names to avoid changing the schema of the plan.
124        let name_preserver = NamePreserver::new(&plan);
125        let mut rewrite_expr = |expr: Expr| {
126            let name = name_preserver.save(&expr);
127            let expr = simplifier.simplify_with_cycle_count_transformed(expr)?.0;
128            Ok(Transformed::new_transformed(
129                name.restore(expr.data),
130                expr.transformed,
131            ))
132        };
133
134        plan.map_expressions(|expr| {
135            // Preserve the aliasing of grouping sets.
136            if let Expr::GroupingSet(_) = &expr {
137                expr.map_children(&mut rewrite_expr)
138            } else {
139                rewrite_expr(expr)
140            }
141        })
142    }
143}
144
145impl SimplifyExpressions {
146    #[allow(missing_docs)]
147    pub fn new() -> Self {
148        Self {}
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use std::ops::Not;
155
156    use arrow::datatypes::{DataType, Field, Schema};
157    use chrono::{DateTime, Utc};
158
159    use datafusion_expr::logical_plan::builder::table_scan_with_filters;
160    use datafusion_expr::logical_plan::table_scan;
161    use datafusion_expr::*;
162    use datafusion_functions_aggregate::expr_fn::{max, min};
163
164    use crate::assert_optimized_plan_eq_snapshot;
165    use crate::test::{assert_fields_eq, test_table_scan_with_name};
166    use crate::OptimizerContext;
167
168    use super::*;
169
170    fn test_table_scan() -> LogicalPlan {
171        let schema = Schema::new(vec![
172            Field::new("a", DataType::Boolean, false),
173            Field::new("b", DataType::Boolean, false),
174            Field::new("c", DataType::Boolean, false),
175            Field::new("d", DataType::UInt32, false),
176            Field::new("e", DataType::UInt32, true),
177        ]);
178        table_scan(Some("test"), &schema, None)
179            .expect("creating scan")
180            .build()
181            .expect("building plan")
182    }
183
184    macro_rules! assert_optimized_plan_equal {
185        (
186            $plan:expr,
187            @ $expected:literal $(,)?
188        ) => {{
189            let rules: Vec<Arc<dyn crate::OptimizerRule + Send + Sync>> = vec![Arc::new(SimplifyExpressions::new())];
190            let optimizer_ctx = OptimizerContext::new();
191            assert_optimized_plan_eq_snapshot!(
192                optimizer_ctx,
193                rules,
194                $plan,
195                @ $expected,
196            )
197        }};
198    }
199
200    #[test]
201    fn test_simplify_table_full_filter_in_scan() -> Result<()> {
202        let fields = vec![
203            Field::new("a", DataType::UInt32, false),
204            Field::new("b", DataType::UInt32, false),
205            Field::new("c", DataType::UInt32, false),
206        ];
207
208        let schema = Schema::new(fields);
209
210        let table_scan = table_scan_with_filters(
211            Some("test"),
212            &schema,
213            Some(vec![0]),
214            vec![col("b").is_not_null()],
215        )?
216        .build()?;
217        assert_eq!(1, table_scan.schema().fields().len());
218        assert_fields_eq(&table_scan, vec!["a"]);
219
220        assert_optimized_plan_equal!(
221            table_scan,
222            @ r"TableScan: test projection=[a], full_filters=[Boolean(true)]"
223        )
224    }
225
226    #[test]
227    fn test_simplify_filter_pushdown() -> Result<()> {
228        let table_scan = test_table_scan();
229        let plan = LogicalPlanBuilder::from(table_scan)
230            .project(vec![col("a")])?
231            .filter(and(col("b").gt(lit(1)), col("b").gt(lit(1))))?
232            .build()?;
233
234        assert_optimized_plan_equal!(
235            plan,
236            @ r"
237        Filter: test.b > Int32(1)
238          Projection: test.a
239            TableScan: test
240        "
241        )
242    }
243
244    #[test]
245    fn test_simplify_optimized_plan() -> Result<()> {
246        let table_scan = test_table_scan();
247        let plan = LogicalPlanBuilder::from(table_scan)
248            .project(vec![col("a")])?
249            .filter(and(col("b").gt(lit(1)), col("b").gt(lit(1))))?
250            .build()?;
251
252        assert_optimized_plan_equal!(
253            plan,
254            @ r"
255            Filter: test.b > Int32(1)
256              Projection: test.a
257                TableScan: test
258            "
259        )
260    }
261
262    #[test]
263    fn test_simplify_optimized_plan_with_or() -> Result<()> {
264        let table_scan = test_table_scan();
265        let plan = LogicalPlanBuilder::from(table_scan)
266            .project(vec![col("a")])?
267            .filter(or(col("b").gt(lit(1)), col("b").gt(lit(1))))?
268            .build()?;
269
270        assert_optimized_plan_equal!(
271            plan,
272            @ r"
273            Filter: test.b > Int32(1)
274              Projection: test.a
275                TableScan: test
276            "
277        )
278    }
279
280    #[test]
281    fn test_simplify_optimized_plan_with_composed_and() -> Result<()> {
282        let table_scan = test_table_scan();
283        // ((c > 5) AND (d < 6)) AND (c > 5) --> (c > 5) AND (d < 6)
284        let plan = LogicalPlanBuilder::from(table_scan)
285            .project(vec![col("a"), col("b")])?
286            .filter(and(
287                and(col("a").gt(lit(5)), col("b").lt(lit(6))),
288                col("a").gt(lit(5)),
289            ))?
290            .build()?;
291
292        assert_optimized_plan_equal!(
293            plan,
294            @ r"
295        Filter: test.a > Int32(5) AND test.b < Int32(6)
296          Projection: test.a, test.b
297            TableScan: test
298        "
299        )
300    }
301
302    #[test]
303    fn test_simplify_optimized_plan_eq_expr() -> Result<()> {
304        let table_scan = test_table_scan();
305        let plan = LogicalPlanBuilder::from(table_scan)
306            .filter(col("b").eq(lit(true)))?
307            .filter(col("c").eq(lit(false)))?
308            .project(vec![col("a")])?
309            .build()?;
310
311        assert_optimized_plan_equal!(
312            plan,
313            @ r"
314        Projection: test.a
315          Filter: NOT test.c
316            Filter: test.b
317              TableScan: test
318        "
319        )
320    }
321
322    #[test]
323    fn test_simplify_optimized_plan_not_eq_expr() -> Result<()> {
324        let table_scan = test_table_scan();
325        let plan = LogicalPlanBuilder::from(table_scan)
326            .filter(col("b").not_eq(lit(true)))?
327            .filter(col("c").not_eq(lit(false)))?
328            .limit(0, Some(1))?
329            .project(vec![col("a")])?
330            .build()?;
331
332        assert_optimized_plan_equal!(
333            plan,
334            @ r"
335        Projection: test.a
336          Limit: skip=0, fetch=1
337            Filter: test.c
338              Filter: NOT test.b
339                TableScan: test
340        "
341        )
342    }
343
344    #[test]
345    fn test_simplify_optimized_plan_and_expr() -> Result<()> {
346        let table_scan = test_table_scan();
347        let plan = LogicalPlanBuilder::from(table_scan)
348            .filter(col("b").not_eq(lit(true)).and(col("c").eq(lit(true))))?
349            .project(vec![col("a")])?
350            .build()?;
351
352        assert_optimized_plan_equal!(
353            plan,
354            @ r"
355        Projection: test.a
356          Filter: NOT test.b AND test.c
357            TableScan: test
358        "
359        )
360    }
361
362    #[test]
363    fn test_simplify_optimized_plan_or_expr() -> Result<()> {
364        let table_scan = test_table_scan();
365        let plan = LogicalPlanBuilder::from(table_scan)
366            .filter(col("b").not_eq(lit(true)).or(col("c").eq(lit(false))))?
367            .project(vec![col("a")])?
368            .build()?;
369
370        assert_optimized_plan_equal!(
371            plan,
372            @ r"
373        Projection: test.a
374          Filter: NOT test.b OR NOT test.c
375            TableScan: test
376        "
377        )
378    }
379
380    #[test]
381    fn test_simplify_optimized_plan_not_expr() -> Result<()> {
382        let table_scan = test_table_scan();
383        let plan = LogicalPlanBuilder::from(table_scan)
384            .filter(col("b").eq(lit(false)).not())?
385            .project(vec![col("a")])?
386            .build()?;
387
388        assert_optimized_plan_equal!(
389            plan,
390            @ r"
391        Projection: test.a
392          Filter: test.b
393            TableScan: test
394        "
395        )
396    }
397
398    #[test]
399    fn test_simplify_optimized_plan_support_projection() -> Result<()> {
400        let table_scan = test_table_scan();
401        let plan = LogicalPlanBuilder::from(table_scan)
402            .project(vec![col("a"), col("d"), col("b").eq(lit(false))])?
403            .build()?;
404
405        assert_optimized_plan_equal!(
406            plan,
407            @ r"
408        Projection: test.a, test.d, NOT test.b AS test.b = Boolean(false)
409          TableScan: test
410        "
411        )
412    }
413
414    #[test]
415    fn test_simplify_optimized_plan_support_aggregate() -> Result<()> {
416        let table_scan = test_table_scan();
417        let plan = LogicalPlanBuilder::from(table_scan)
418            .project(vec![col("a"), col("c"), col("b")])?
419            .aggregate(
420                vec![col("a"), col("c")],
421                vec![max(col("b").eq(lit(true))), min(col("b"))],
422            )?
423            .build()?;
424
425        assert_optimized_plan_equal!(
426            plan,
427            @ r"
428        Aggregate: groupBy=[[test.a, test.c]], aggr=[[max(test.b) AS max(test.b = Boolean(true)), min(test.b)]]
429          Projection: test.a, test.c, test.b
430            TableScan: test
431        "
432        )
433    }
434
435    #[test]
436    fn test_simplify_optimized_plan_support_values() -> Result<()> {
437        let expr1 = Expr::BinaryExpr(BinaryExpr::new(
438            Box::new(lit(1)),
439            Operator::Plus,
440            Box::new(lit(2)),
441        ));
442        let expr2 = Expr::BinaryExpr(BinaryExpr::new(
443            Box::new(lit(2)),
444            Operator::Minus,
445            Box::new(lit(1)),
446        ));
447        let values = vec![vec![expr1, expr2]];
448        let plan = LogicalPlanBuilder::values(values)?.build()?;
449
450        assert_optimized_plan_equal!(
451            plan,
452            @ "Values: (Int32(3) AS Int32(1) + Int32(2), Int32(1) AS Int32(2) - Int32(1))"
453        )
454    }
455
456    fn get_optimized_plan_formatted(
457        plan: LogicalPlan,
458        date_time: &DateTime<Utc>,
459    ) -> String {
460        let config = OptimizerContext::new().with_query_execution_start_time(*date_time);
461        let rule = SimplifyExpressions::new();
462
463        let optimized_plan = rule.rewrite(plan, &config).unwrap().data;
464        format!("{optimized_plan}")
465    }
466
467    #[test]
468    fn cast_expr() -> Result<()> {
469        let table_scan = test_table_scan();
470        let proj = vec![Expr::Cast(Cast::new(Box::new(lit("0")), DataType::Int32))];
471        let plan = LogicalPlanBuilder::from(table_scan)
472            .project(proj)?
473            .build()?;
474
475        let expected = "Projection: Int32(0) AS Utf8(\"0\")\
476            \n  TableScan: test";
477        let actual = get_optimized_plan_formatted(plan, &Utc::now());
478        assert_eq!(expected, actual);
479        Ok(())
480    }
481
482    #[test]
483    fn simplify_and_eval() -> Result<()> {
484        // demonstrate a case where the evaluation needs to run prior
485        // to the simplifier for it to work
486        let table_scan = test_table_scan();
487        let time = Utc::now();
488        // (true or false) != col --> !col
489        let proj = vec![lit(true).or(lit(false)).not_eq(col("a"))];
490        let plan = LogicalPlanBuilder::from(table_scan)
491            .project(proj)?
492            .build()?;
493
494        let actual = get_optimized_plan_formatted(plan, &time);
495        let expected =
496            "Projection: NOT test.a AS Boolean(true) OR Boolean(false) != test.a\
497                        \n  TableScan: test";
498
499        assert_eq!(expected, actual);
500        Ok(())
501    }
502
503    #[test]
504    fn simplify_not_binary() -> Result<()> {
505        let table_scan = test_table_scan();
506
507        let plan = LogicalPlanBuilder::from(table_scan)
508            .filter(col("d").gt(lit(10)).not())?
509            .build()?;
510
511        assert_optimized_plan_equal!(
512            plan,
513            @ r"
514        Filter: test.d <= Int32(10)
515          TableScan: test
516        "
517        )
518    }
519
520    #[test]
521    fn simplify_not_bool_and() -> Result<()> {
522        let table_scan = test_table_scan();
523
524        let plan = LogicalPlanBuilder::from(table_scan)
525            .filter(col("d").gt(lit(10)).and(col("d").lt(lit(100))).not())?
526            .build()?;
527
528        assert_optimized_plan_equal!(
529            plan,
530            @ r"
531        Filter: test.d <= Int32(10) OR test.d >= Int32(100)
532          TableScan: test
533        "
534        )
535    }
536
537    #[test]
538    fn simplify_not_bool_or() -> Result<()> {
539        let table_scan = test_table_scan();
540
541        let plan = LogicalPlanBuilder::from(table_scan)
542            .filter(col("d").gt(lit(10)).or(col("d").lt(lit(100))).not())?
543            .build()?;
544
545        assert_optimized_plan_equal!(
546            plan,
547            @ r"
548        Filter: test.d <= Int32(10) AND test.d >= Int32(100)
549          TableScan: test
550        "
551        )
552    }
553
554    #[test]
555    fn simplify_not_not() -> Result<()> {
556        let table_scan = test_table_scan();
557
558        let plan = LogicalPlanBuilder::from(table_scan)
559            .filter(col("d").gt(lit(10)).not().not())?
560            .build()?;
561
562        assert_optimized_plan_equal!(
563            plan,
564            @ r"
565        Filter: test.d > Int32(10)
566          TableScan: test
567        "
568        )
569    }
570
571    #[test]
572    fn simplify_not_null() -> Result<()> {
573        let table_scan = test_table_scan();
574
575        let plan = LogicalPlanBuilder::from(table_scan)
576            .filter(col("e").is_null().not())?
577            .build()?;
578
579        assert_optimized_plan_equal!(
580            plan,
581            @ r"
582        Filter: test.e IS NOT NULL
583          TableScan: test
584        "
585        )
586    }
587
588    #[test]
589    fn simplify_not_not_null() -> Result<()> {
590        let table_scan = test_table_scan();
591
592        let plan = LogicalPlanBuilder::from(table_scan)
593            .filter(col("e").is_not_null().not())?
594            .build()?;
595
596        assert_optimized_plan_equal!(
597            plan,
598            @ r"
599        Filter: test.e IS NULL
600          TableScan: test
601        "
602        )
603    }
604
605    #[test]
606    fn simplify_not_in() -> Result<()> {
607        let table_scan = test_table_scan();
608
609        let plan = LogicalPlanBuilder::from(table_scan)
610            .filter(col("d").in_list(vec![lit(1), lit(2), lit(3)], false).not())?
611            .build()?;
612
613        assert_optimized_plan_equal!(
614            plan,
615            @ r"
616        Filter: test.d != Int32(1) AND test.d != Int32(2) AND test.d != Int32(3)
617          TableScan: test
618        "
619        )
620    }
621
622    #[test]
623    fn simplify_not_not_in() -> Result<()> {
624        let table_scan = test_table_scan();
625
626        let plan = LogicalPlanBuilder::from(table_scan)
627            .filter(col("d").in_list(vec![lit(1), lit(2), lit(3)], true).not())?
628            .build()?;
629
630        assert_optimized_plan_equal!(
631            plan,
632            @ r"
633        Filter: test.d = Int32(1) OR test.d = Int32(2) OR test.d = Int32(3)
634          TableScan: test
635        "
636        )
637    }
638
639    #[test]
640    fn simplify_not_between() -> Result<()> {
641        let table_scan = test_table_scan();
642        let qual = col("d").between(lit(1), lit(10));
643
644        let plan = LogicalPlanBuilder::from(table_scan)
645            .filter(qual.not())?
646            .build()?;
647
648        assert_optimized_plan_equal!(
649            plan,
650            @ r"
651        Filter: test.d < Int32(1) OR test.d > Int32(10)
652          TableScan: test
653        "
654        )
655    }
656
657    #[test]
658    fn simplify_not_not_between() -> Result<()> {
659        let table_scan = test_table_scan();
660        let qual = col("d").not_between(lit(1), lit(10));
661
662        let plan = LogicalPlanBuilder::from(table_scan)
663            .filter(qual.not())?
664            .build()?;
665
666        assert_optimized_plan_equal!(
667            plan,
668            @ r"
669        Filter: test.d >= Int32(1) AND test.d <= Int32(10)
670          TableScan: test
671        "
672        )
673    }
674
675    #[test]
676    fn simplify_not_like() -> Result<()> {
677        let schema = Schema::new(vec![
678            Field::new("a", DataType::Utf8, false),
679            Field::new("b", DataType::Utf8, false),
680        ]);
681        let table_scan = table_scan(Some("test"), &schema, None)
682            .expect("creating scan")
683            .build()
684            .expect("building plan");
685
686        let plan = LogicalPlanBuilder::from(table_scan)
687            .filter(col("a").like(col("b")).not())?
688            .build()?;
689
690        assert_optimized_plan_equal!(
691            plan,
692            @ r"
693        Filter: test.a NOT LIKE test.b
694          TableScan: test
695        "
696        )
697    }
698
699    #[test]
700    fn simplify_not_not_like() -> Result<()> {
701        let schema = Schema::new(vec![
702            Field::new("a", DataType::Utf8, false),
703            Field::new("b", DataType::Utf8, false),
704        ]);
705        let table_scan = table_scan(Some("test"), &schema, None)
706            .expect("creating scan")
707            .build()
708            .expect("building plan");
709
710        let plan = LogicalPlanBuilder::from(table_scan)
711            .filter(col("a").not_like(col("b")).not())?
712            .build()?;
713
714        assert_optimized_plan_equal!(
715            plan,
716            @ r"
717        Filter: test.a LIKE test.b
718          TableScan: test
719        "
720        )
721    }
722
723    #[test]
724    fn simplify_not_ilike() -> Result<()> {
725        let schema = Schema::new(vec![
726            Field::new("a", DataType::Utf8, false),
727            Field::new("b", DataType::Utf8, false),
728        ]);
729        let table_scan = table_scan(Some("test"), &schema, None)
730            .expect("creating scan")
731            .build()
732            .expect("building plan");
733
734        let plan = LogicalPlanBuilder::from(table_scan)
735            .filter(col("a").ilike(col("b")).not())?
736            .build()?;
737
738        assert_optimized_plan_equal!(
739            plan,
740            @ r"
741        Filter: test.a NOT ILIKE test.b
742          TableScan: test
743        "
744        )
745    }
746
747    #[test]
748    fn simplify_not_distinct_from() -> Result<()> {
749        let table_scan = test_table_scan();
750
751        let plan = LogicalPlanBuilder::from(table_scan)
752            .filter(binary_expr(col("d"), Operator::IsDistinctFrom, lit(10)).not())?
753            .build()?;
754
755        assert_optimized_plan_equal!(
756            plan,
757            @ r"
758        Filter: test.d IS NOT DISTINCT FROM Int32(10)
759          TableScan: test
760        "
761        )
762    }
763
764    #[test]
765    fn simplify_not_not_distinct_from() -> Result<()> {
766        let table_scan = test_table_scan();
767
768        let plan = LogicalPlanBuilder::from(table_scan)
769            .filter(binary_expr(col("d"), Operator::IsNotDistinctFrom, lit(10)).not())?
770            .build()?;
771
772        assert_optimized_plan_equal!(
773            plan,
774            @ r"
775        Filter: test.d IS DISTINCT FROM Int32(10)
776          TableScan: test
777        "
778        )
779    }
780
781    #[test]
782    fn simplify_equijoin_predicate() -> Result<()> {
783        let t1 = test_table_scan_with_name("t1")?;
784        let t2 = test_table_scan_with_name("t2")?;
785
786        let left_key = col("t1.a") + lit(1i64).cast_to(&DataType::UInt32, t1.schema())?;
787        let right_key =
788            col("t2.a") + lit(2i64).cast_to(&DataType::UInt32, t2.schema())?;
789        let plan = LogicalPlanBuilder::from(t1)
790            .join_with_expr_keys(
791                t2,
792                JoinType::Inner,
793                (vec![left_key], vec![right_key]),
794                None,
795            )?
796            .build()?;
797
798        // before simplify: t1.a + CAST(Int64(1), UInt32) = t2.a + CAST(Int64(2), UInt32)
799        // after simplify: t1.a + UInt32(1) = t2.a + UInt32(2) AS t1.a + Int64(1) = t2.a + Int64(2)
800        assert_optimized_plan_equal!(
801            plan,
802            @ r"
803        Inner Join: t1.a + UInt32(1) = t2.a + UInt32(2)
804          TableScan: t1
805          TableScan: t2
806        "
807        )
808    }
809
810    #[test]
811    fn simplify_is_not_null() -> Result<()> {
812        let table_scan = test_table_scan();
813
814        let plan = LogicalPlanBuilder::from(table_scan)
815            .filter(col("d").is_not_null())?
816            .build()?;
817
818        assert_optimized_plan_equal!(
819            plan,
820            @ r"
821        Filter: Boolean(true)
822          TableScan: test
823        "
824        )
825    }
826
827    #[test]
828    fn simplify_is_null() -> Result<()> {
829        let table_scan = test_table_scan();
830
831        let plan = LogicalPlanBuilder::from(table_scan)
832            .filter(col("d").is_null())?
833            .build()?;
834
835        assert_optimized_plan_equal!(
836            plan,
837            @ r"
838        Filter: Boolean(false)
839          TableScan: test
840        "
841        )
842    }
843
844    #[test]
845    fn simplify_grouping_sets() -> Result<()> {
846        let table_scan = test_table_scan();
847        let plan = LogicalPlanBuilder::from(table_scan)
848            .aggregate(
849                [grouping_set(vec![
850                    vec![(lit(42).alias("prev") + lit(1)).alias("age"), col("a")],
851                    vec![col("a").or(col("b")).and(lit(1).lt(lit(0))).alias("cond")],
852                    vec![col("d").alias("e"), (lit(1) + lit(2))],
853                ])],
854                [] as [Expr; 0],
855            )?
856            .build()?;
857
858        assert_optimized_plan_equal!(
859            plan,
860            @ r"
861        Aggregate: groupBy=[[GROUPING SETS ((Int32(43) AS age, test.a), (Boolean(false) AS cond), (test.d AS e, Int32(3) AS Int32(1) + Int32(2)))]], aggr=[[]]
862          TableScan: test
863        "
864        )
865    }
866
867    #[test]
868    fn test_simplify_regex_special_cases() -> Result<()> {
869        let schema = Schema::new(vec![
870            Field::new("a", DataType::Utf8, true),
871            Field::new("b", DataType::Utf8, false),
872        ]);
873        let table_scan = table_scan(Some("test"), &schema, None)?.build()?;
874
875        // Test `= ".*"` transforms to true (except for empty strings)
876        let plan = LogicalPlanBuilder::from(table_scan.clone())
877            .filter(binary_expr(col("a"), Operator::RegexMatch, lit(".*")))?
878            .build()?;
879
880        assert_optimized_plan_equal!(
881            plan,
882            @ r"
883        Filter: test.a IS NOT NULL
884          TableScan: test
885        "
886        )?;
887
888        // Test `!= ".*"` transforms to checking if the column is empty
889        let plan = LogicalPlanBuilder::from(table_scan.clone())
890            .filter(binary_expr(col("a"), Operator::RegexNotMatch, lit(".*")))?
891            .build()?;
892
893        assert_optimized_plan_equal!(
894            plan,
895            @ r#"
896        Filter: test.a = Utf8("")
897          TableScan: test
898        "#
899        )?;
900
901        // Test case-insensitive versions
902
903        // Test `=~ ".*"` (case-insensitive) transforms to true (except for empty strings)
904        let plan = LogicalPlanBuilder::from(table_scan.clone())
905            .filter(binary_expr(col("b"), Operator::RegexIMatch, lit(".*")))?
906            .build()?;
907
908        assert_optimized_plan_equal!(
909            plan,
910            @ r"
911        Filter: Boolean(true)
912          TableScan: test
913        "
914        )?;
915
916        // Test `!~ ".*"` (case-insensitive) transforms to checking if the column is empty
917        let plan = LogicalPlanBuilder::from(table_scan.clone())
918            .filter(binary_expr(col("a"), Operator::RegexNotIMatch, lit(".*")))?
919            .build()?;
920
921        assert_optimized_plan_equal!(
922            plan,
923            @ r#"
924        Filter: test.a = Utf8("")
925          TableScan: test
926        "#
927        )
928    }
929}