Skip to main content

datafusion_optimizer/
eliminate_outer_join.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`EliminateOuterJoin`] converts `LEFT/RIGHT/FULL` joins to `INNER` joins
19use crate::{OptimizerConfig, OptimizerRule};
20use datafusion_common::{Column, DFSchema, Result};
21use datafusion_expr::logical_plan::{Join, JoinType, LogicalPlan};
22use datafusion_expr::{Expr, Filter, Operator};
23
24use crate::optimizer::ApplyOrder;
25use datafusion_common::tree_node::Transformed;
26use datafusion_expr::expr::{BinaryExpr, Cast, InList, Like, TryCast};
27use std::sync::Arc;
28
29///
30/// Attempt to replace outer joins with inner joins.
31///
32/// Outer joins are typically more expensive to compute at runtime
33/// than inner joins and prevent various forms of predicate pushdown
34/// and other optimizations, so removing them if possible is beneficial.
35///
36/// Inner joins filter out rows that do match. Outer joins pass rows
37/// that do not match padded with nulls. If there is a filter in the
38/// query that would filter any such null rows after the join the rows
39/// introduced by the outer join are filtered.
40///
41/// For example, in the `select ... from a left join b on ... where b.xx = 100;`
42///
43/// For rows when `b.xx` is null (as it would be after an outer join),
44/// the `b.xx = 100` predicate filters them out and there is no
45/// need to produce null rows for output.
46///
47/// Generally, an outer join can be rewritten to inner join if the
48/// filters from the WHERE clause return false while any inputs are
49/// null and columns of those quals are come from nullable side of
50/// outer join.
51#[derive(Default, Debug)]
52pub struct EliminateOuterJoin;
53
54impl EliminateOuterJoin {
55    #[expect(missing_docs)]
56    pub fn new() -> Self {
57        Self {}
58    }
59}
60
61/// Attempt to eliminate outer joins.
62impl OptimizerRule for EliminateOuterJoin {
63    fn name(&self) -> &str {
64        "eliminate_outer_join"
65    }
66
67    fn apply_order(&self) -> Option<ApplyOrder> {
68        Some(ApplyOrder::TopDown)
69    }
70
71    fn supports_rewrite(&self) -> bool {
72        true
73    }
74
75    fn rewrite(
76        &self,
77        plan: LogicalPlan,
78        _config: &dyn OptimizerConfig,
79    ) -> Result<Transformed<LogicalPlan>> {
80        match plan {
81            LogicalPlan::Filter(mut filter) => match Arc::unwrap_or_clone(filter.input) {
82                LogicalPlan::Join(join) => {
83                    let mut non_nullable_cols: Vec<Column> = vec![];
84
85                    extract_non_nullable_columns(
86                        &filter.predicate,
87                        &mut non_nullable_cols,
88                        join.left.schema(),
89                        join.right.schema(),
90                        true,
91                    );
92
93                    let new_join_type = if join.join_type.is_outer() {
94                        let mut left_non_nullable = false;
95                        let mut right_non_nullable = false;
96                        for col in non_nullable_cols.iter() {
97                            if join.left.schema().has_column(col) {
98                                left_non_nullable = true;
99                            }
100                            if join.right.schema().has_column(col) {
101                                right_non_nullable = true;
102                            }
103                        }
104                        eliminate_outer(
105                            join.join_type,
106                            left_non_nullable,
107                            right_non_nullable,
108                        )
109                    } else {
110                        join.join_type
111                    };
112
113                    let new_join = Arc::new(LogicalPlan::Join(Join {
114                        left: join.left,
115                        right: join.right,
116                        join_type: new_join_type,
117                        join_constraint: join.join_constraint,
118                        on: join.on.clone(),
119                        filter: join.filter.clone(),
120                        schema: Arc::clone(&join.schema),
121                        null_equality: join.null_equality,
122                        null_aware: join.null_aware,
123                    }));
124                    Filter::try_new(filter.predicate, new_join)
125                        .map(|f| Transformed::yes(LogicalPlan::Filter(f)))
126                }
127                filter_input => {
128                    filter.input = Arc::new(filter_input);
129                    Ok(Transformed::no(LogicalPlan::Filter(filter)))
130                }
131            },
132            _ => Ok(Transformed::no(plan)),
133        }
134    }
135}
136
137pub fn eliminate_outer(
138    join_type: JoinType,
139    left_non_nullable: bool,
140    right_non_nullable: bool,
141) -> JoinType {
142    let mut new_join_type = join_type;
143    match join_type {
144        JoinType::Left if right_non_nullable => {
145            new_join_type = JoinType::Inner;
146        }
147        JoinType::Left => {}
148        JoinType::Right if left_non_nullable => {
149            new_join_type = JoinType::Inner;
150        }
151        JoinType::Right => {}
152        JoinType::Full => {
153            if left_non_nullable && right_non_nullable {
154                new_join_type = JoinType::Inner;
155            } else if left_non_nullable {
156                new_join_type = JoinType::Left;
157            } else if right_non_nullable {
158                new_join_type = JoinType::Right;
159            }
160        }
161        _ => {}
162    }
163    new_join_type
164}
165
166/// Recursively traverses expr, if expr returns false when
167/// any inputs are null, treats columns of both sides as non_nullable columns.
168///
169/// For and/or expr, extracts from all sub exprs and merges the columns.
170/// For or expr, if one of sub exprs returns true, discards all columns from or expr.
171/// For IS NOT NULL/NOT expr, always returns false for NULL input.
172///     extracts columns from these exprs.
173/// For all other exprs, fall through
174fn extract_non_nullable_columns(
175    expr: &Expr,
176    non_nullable_cols: &mut Vec<Column>,
177    left_schema: &Arc<DFSchema>,
178    right_schema: &Arc<DFSchema>,
179    top_level: bool,
180) {
181    match expr {
182        Expr::Column(col) => {
183            non_nullable_cols.push(col.clone());
184        }
185        Expr::BinaryExpr(BinaryExpr { left, op, right }) => match op {
186            // If one of the inputs are null for these operators, the results should be false.
187            Operator::Eq
188            | Operator::NotEq
189            | Operator::Lt
190            | Operator::LtEq
191            | Operator::Gt
192            | Operator::GtEq => {
193                extract_non_nullable_columns(
194                    left,
195                    non_nullable_cols,
196                    left_schema,
197                    right_schema,
198                    false,
199                );
200                extract_non_nullable_columns(
201                    right,
202                    non_nullable_cols,
203                    left_schema,
204                    right_schema,
205                    false,
206                )
207            }
208            Operator::And | Operator::Or => {
209                // treat And as Or if does not from top level, such as
210                // not (c1 < 10 and c2 > 100)
211                if top_level && *op == Operator::And {
212                    extract_non_nullable_columns(
213                        left,
214                        non_nullable_cols,
215                        left_schema,
216                        right_schema,
217                        top_level,
218                    );
219                    extract_non_nullable_columns(
220                        right,
221                        non_nullable_cols,
222                        left_schema,
223                        right_schema,
224                        top_level,
225                    );
226                    return;
227                }
228
229                let mut left_non_nullable_cols: Vec<Column> = vec![];
230                let mut right_non_nullable_cols: Vec<Column> = vec![];
231
232                extract_non_nullable_columns(
233                    left,
234                    &mut left_non_nullable_cols,
235                    left_schema,
236                    right_schema,
237                    top_level,
238                );
239                extract_non_nullable_columns(
240                    right,
241                    &mut right_non_nullable_cols,
242                    left_schema,
243                    right_schema,
244                    top_level,
245                );
246
247                // for query: select *** from a left join b where b.c1 ... or b.c2 ...
248                // this can be eliminated to inner join.
249                // for query: select *** from a left join b where a.c1 ... or b.c2 ...
250                // this can not be eliminated.
251                // If columns of relation exist in both sub exprs, any columns of this relation
252                // can be added to non nullable columns.
253                if !left_non_nullable_cols.is_empty()
254                    && !right_non_nullable_cols.is_empty()
255                {
256                    for left_col in &left_non_nullable_cols {
257                        for right_col in &right_non_nullable_cols {
258                            if (left_schema.has_column(left_col)
259                                && left_schema.has_column(right_col))
260                                || (right_schema.has_column(left_col)
261                                    && right_schema.has_column(right_col))
262                            {
263                                non_nullable_cols.push(left_col.clone());
264                                break;
265                            }
266                        }
267                    }
268                }
269            }
270            _ => {}
271        },
272        Expr::Not(arg) => extract_non_nullable_columns(
273            arg,
274            non_nullable_cols,
275            left_schema,
276            right_schema,
277            false,
278        ),
279        Expr::IsNotNull(arg) => {
280            if !top_level {
281                return;
282            }
283            extract_non_nullable_columns(
284                arg,
285                non_nullable_cols,
286                left_schema,
287                right_schema,
288                false,
289            )
290        }
291        Expr::Cast(Cast { expr, field: _ })
292        | Expr::TryCast(TryCast { expr, field: _ }) => extract_non_nullable_columns(
293            expr,
294            non_nullable_cols,
295            left_schema,
296            right_schema,
297            false,
298        ),
299        // IN list and BETWEEN are null-rejecting on the input expression:
300        // if the input column is NULL, the result is NULL (filtered out),
301        // regardless of whether the list/range contains NULLs.
302        Expr::InList(InList { expr, .. }) => extract_non_nullable_columns(
303            expr,
304            non_nullable_cols,
305            left_schema,
306            right_schema,
307            false,
308        ),
309        Expr::Between(between) => extract_non_nullable_columns(
310            &between.expr,
311            non_nullable_cols,
312            left_schema,
313            right_schema,
314            false,
315        ),
316        // LIKE is null-rejecting: if either the input column or the pattern
317        // is NULL, the result is NULL (filtered out by WHERE).
318        Expr::Like(Like { expr, pattern, .. }) => {
319            extract_non_nullable_columns(
320                expr,
321                non_nullable_cols,
322                left_schema,
323                right_schema,
324                false,
325            );
326            extract_non_nullable_columns(
327                pattern,
328                non_nullable_cols,
329                left_schema,
330                right_schema,
331                false,
332            );
333        }
334        // IS TRUE, IS FALSE, and IS NOT UNKNOWN are null-rejecting:
335        // if the input is NULL, they return false (filtered out by WHERE).
336        // Note: IS NOT TRUE, IS NOT FALSE, and IS UNKNOWN are NOT null-rejecting
337        // because they return true for NULL input.
338        Expr::IsTrue(arg) | Expr::IsFalse(arg) | Expr::IsNotUnknown(arg) => {
339            extract_non_nullable_columns(
340                arg,
341                non_nullable_cols,
342                left_schema,
343                right_schema,
344                false,
345            )
346        }
347        _ => {}
348    }
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354    use crate::OptimizerContext;
355    use crate::assert_optimized_plan_eq_snapshot;
356    use crate::test::*;
357    use arrow::datatypes::DataType;
358    use datafusion_common::ScalarValue;
359    use datafusion_expr::{
360        Operator::{And, Or},
361        binary_expr, cast, col, lit,
362        logical_plan::builder::LogicalPlanBuilder,
363        try_cast,
364    };
365
366    macro_rules! assert_optimized_plan_equal {
367        (
368            $plan:expr,
369            @ $expected:literal $(,)?
370        ) => {{
371            let optimizer_ctx = OptimizerContext::new().with_max_passes(1);
372            let rules: Vec<Arc<dyn crate::OptimizerRule + Send + Sync>> = vec![Arc::new(EliminateOuterJoin::new())];
373            assert_optimized_plan_eq_snapshot!(
374                optimizer_ctx,
375                rules,
376                $plan,
377                @ $expected,
378            )
379        }};
380    }
381
382    #[test]
383    fn eliminate_left_with_null() -> Result<()> {
384        let t1 = test_table_scan_with_name("t1")?;
385        let t2 = test_table_scan_with_name("t2")?;
386
387        // could not eliminate to inner join
388        let plan = LogicalPlanBuilder::from(t1)
389            .join(
390                t2,
391                JoinType::Left,
392                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
393                None,
394            )?
395            .filter(col("t2.b").is_null())?
396            .build()?;
397
398        assert_optimized_plan_equal!(plan, @r"
399        Filter: t2.b IS NULL
400          Left Join: t1.a = t2.a
401            TableScan: t1
402            TableScan: t2
403        ")
404    }
405
406    #[test]
407    fn eliminate_left_with_not_null() -> Result<()> {
408        let t1 = test_table_scan_with_name("t1")?;
409        let t2 = test_table_scan_with_name("t2")?;
410
411        // eliminate to inner join
412        let plan = LogicalPlanBuilder::from(t1)
413            .join(
414                t2,
415                JoinType::Left,
416                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
417                None,
418            )?
419            .filter(col("t2.b").is_not_null())?
420            .build()?;
421
422        assert_optimized_plan_equal!(plan, @r"
423        Filter: t2.b IS NOT NULL
424          Inner Join: t1.a = t2.a
425            TableScan: t1
426            TableScan: t2
427        ")
428    }
429
430    #[test]
431    fn eliminate_right_with_or() -> Result<()> {
432        let t1 = test_table_scan_with_name("t1")?;
433        let t2 = test_table_scan_with_name("t2")?;
434
435        // eliminate to inner join
436        let plan = LogicalPlanBuilder::from(t1)
437            .join(
438                t2,
439                JoinType::Right,
440                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
441                None,
442            )?
443            .filter(binary_expr(
444                col("t1.b").gt(lit(10u32)),
445                Or,
446                col("t1.c").lt(lit(20u32)),
447            ))?
448            .build()?;
449
450        assert_optimized_plan_equal!(plan, @r"
451        Filter: t1.b > UInt32(10) OR t1.c < UInt32(20)
452          Inner Join: t1.a = t2.a
453            TableScan: t1
454            TableScan: t2
455        ")
456    }
457
458    #[test]
459    fn eliminate_full_with_and() -> Result<()> {
460        let t1 = test_table_scan_with_name("t1")?;
461        let t2 = test_table_scan_with_name("t2")?;
462
463        // eliminate to inner join
464        let plan = LogicalPlanBuilder::from(t1)
465            .join(
466                t2,
467                JoinType::Full,
468                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
469                None,
470            )?
471            .filter(binary_expr(
472                col("t1.b").gt(lit(10u32)),
473                And,
474                col("t2.c").lt(lit(20u32)),
475            ))?
476            .build()?;
477
478        assert_optimized_plan_equal!(plan, @r"
479        Filter: t1.b > UInt32(10) AND t2.c < UInt32(20)
480          Inner Join: t1.a = t2.a
481            TableScan: t1
482            TableScan: t2
483        ")
484    }
485
486    #[test]
487    fn eliminate_left_with_in_list() -> Result<()> {
488        let t1 = test_table_scan_with_name("t1")?;
489        let t2 = test_table_scan_with_name("t2")?;
490
491        // t2.b IN (1, 2, 3) rejects nulls — if t2.b is NULL the IN returns
492        // NULL which is filtered out. So Left Join should become Inner Join.
493        let plan = LogicalPlanBuilder::from(t1)
494            .join(
495                t2,
496                JoinType::Left,
497                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
498                None,
499            )?
500            .filter(col("t2.b").in_list(vec![lit(1u32), lit(2u32), lit(3u32)], false))?
501            .build()?;
502
503        assert_optimized_plan_equal!(plan, @r"
504        Filter: t2.b IN ([UInt32(1), UInt32(2), UInt32(3)])
505          Inner Join: t1.a = t2.a
506            TableScan: t1
507            TableScan: t2
508        ")
509    }
510
511    #[test]
512    fn eliminate_left_with_in_list_containing_null() -> Result<()> {
513        let t1 = test_table_scan_with_name("t1")?;
514        let t2 = test_table_scan_with_name("t2")?;
515
516        // IN list with NULL still rejects null input columns:
517        // if t2.b is NULL, NULL IN (1, NULL) evaluates to NULL, which is filtered out
518        let plan = LogicalPlanBuilder::from(t1)
519            .join(
520                t2,
521                JoinType::Left,
522                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
523                None,
524            )?
525            .filter(
526                col("t2.b")
527                    .in_list(vec![lit(1u32), lit(ScalarValue::UInt32(None))], false),
528            )?
529            .build()?;
530
531        assert_optimized_plan_equal!(plan, @r"
532        Filter: t2.b IN ([UInt32(1), UInt32(NULL)])
533          Inner Join: t1.a = t2.a
534            TableScan: t1
535            TableScan: t2
536        ")
537    }
538
539    #[test]
540    fn eliminate_left_with_not_in_list() -> Result<()> {
541        let t1 = test_table_scan_with_name("t1")?;
542        let t2 = test_table_scan_with_name("t2")?;
543
544        // NOT IN also rejects nulls: if t2.b is NULL, NOT (NULL IN (...))
545        // evaluates to NULL, which is filtered out
546        let plan = LogicalPlanBuilder::from(t1)
547            .join(
548                t2,
549                JoinType::Left,
550                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
551                None,
552            )?
553            .filter(col("t2.b").in_list(vec![lit(1u32), lit(2u32)], true))?
554            .build()?;
555
556        assert_optimized_plan_equal!(plan, @r"
557        Filter: t2.b NOT IN ([UInt32(1), UInt32(2)])
558          Inner Join: t1.a = t2.a
559            TableScan: t1
560            TableScan: t2
561        ")
562    }
563
564    #[test]
565    fn eliminate_left_with_between() -> Result<()> {
566        let t1 = test_table_scan_with_name("t1")?;
567        let t2 = test_table_scan_with_name("t2")?;
568
569        // BETWEEN rejects nulls: if t2.b is NULL, NULL BETWEEN 1 AND 10
570        // evaluates to NULL, which is filtered out
571        let plan = LogicalPlanBuilder::from(t1)
572            .join(
573                t2,
574                JoinType::Left,
575                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
576                None,
577            )?
578            .filter(col("t2.b").between(lit(1u32), lit(10u32)))?
579            .build()?;
580
581        assert_optimized_plan_equal!(plan, @r"
582        Filter: t2.b BETWEEN UInt32(1) AND UInt32(10)
583          Inner Join: t1.a = t2.a
584            TableScan: t1
585            TableScan: t2
586        ")
587    }
588
589    #[test]
590    fn eliminate_right_with_between() -> Result<()> {
591        let t1 = test_table_scan_with_name("t1")?;
592        let t2 = test_table_scan_with_name("t2")?;
593
594        // Right join: filter on left (nullable) side with BETWEEN should convert to Inner
595        let plan = LogicalPlanBuilder::from(t1)
596            .join(
597                t2,
598                JoinType::Right,
599                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
600                None,
601            )?
602            .filter(col("t1.b").between(lit(1u32), lit(10u32)))?
603            .build()?;
604
605        assert_optimized_plan_equal!(plan, @r"
606        Filter: t1.b BETWEEN UInt32(1) AND UInt32(10)
607          Inner Join: t1.a = t2.a
608            TableScan: t1
609            TableScan: t2
610        ")
611    }
612
613    #[test]
614    fn eliminate_full_with_between() -> Result<()> {
615        let t1 = test_table_scan_with_name("t1")?;
616        let t2 = test_table_scan_with_name("t2")?;
617
618        // Full join with BETWEEN on both sides should become Inner
619        let plan = LogicalPlanBuilder::from(t1)
620            .join(
621                t2,
622                JoinType::Full,
623                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
624                None,
625            )?
626            .filter(binary_expr(
627                col("t1.b").between(lit(1u32), lit(10u32)),
628                And,
629                col("t2.b").between(lit(5u32), lit(20u32)),
630            ))?
631            .build()?;
632
633        assert_optimized_plan_equal!(plan, @r"
634        Filter: t1.b BETWEEN UInt32(1) AND UInt32(10) AND t2.b BETWEEN UInt32(5) AND UInt32(20)
635          Inner Join: t1.a = t2.a
636            TableScan: t1
637            TableScan: t2
638        ")
639    }
640
641    #[test]
642    fn eliminate_full_with_in_list() -> Result<()> {
643        let t1 = test_table_scan_with_name("t1")?;
644        let t2 = test_table_scan_with_name("t2")?;
645
646        // Full join with IN filters on both sides should become Inner
647        let plan = LogicalPlanBuilder::from(t1)
648            .join(
649                t2,
650                JoinType::Full,
651                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
652                None,
653            )?
654            .filter(binary_expr(
655                col("t1.b").in_list(vec![lit(1u32), lit(2u32)], false),
656                And,
657                col("t2.b").in_list(vec![lit(3u32), lit(4u32)], false),
658            ))?
659            .build()?;
660
661        assert_optimized_plan_equal!(plan, @r"
662        Filter: t1.b IN ([UInt32(1), UInt32(2)]) AND t2.b IN ([UInt32(3), UInt32(4)])
663          Inner Join: t1.a = t2.a
664            TableScan: t1
665            TableScan: t2
666        ")
667    }
668
669    #[test]
670    fn no_eliminate_left_with_in_list_or_is_null() -> Result<()> {
671        let t1 = test_table_scan_with_name("t1")?;
672        let t2 = test_table_scan_with_name("t2")?;
673
674        // WHERE (t2.b IN (1, 2)) OR (t2.b IS NULL)
675        // The OR with IS NULL makes the predicate null-tolerant:
676        // when t2.b is NULL, IS NULL returns true, so the whole OR is true.
677        // The outer join must be preserved.
678        let plan = LogicalPlanBuilder::from(t1)
679            .join(
680                t2,
681                JoinType::Left,
682                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
683                None,
684            )?
685            .filter(binary_expr(
686                col("t2.b").in_list(vec![lit(1u32), lit(2u32)], false),
687                Or,
688                col("t2.b").is_null(),
689            ))?
690            .build()?;
691
692        // Should NOT be converted to Inner — OR with IS NULL preserves null rows
693        assert_optimized_plan_equal!(plan, @r"
694        Filter: t2.b IN ([UInt32(1), UInt32(2)]) OR t2.b IS NULL
695          Left Join: t1.a = t2.a
696            TableScan: t1
697            TableScan: t2
698        ")
699    }
700
701    #[test]
702    fn eliminate_left_with_like() -> Result<()> {
703        let t1 = test_table_scan_with_name("t1")?;
704        let t2 = test_table_scan_with_name("t2")?;
705
706        // LIKE rejects nulls: if t2.b is NULL, the result is NULL (filtered out)
707        let plan = LogicalPlanBuilder::from(t1)
708            .join(
709                t2,
710                JoinType::Left,
711                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
712                None,
713            )?
714            .filter(col("t2.b").like(lit("%pattern%")))?
715            .build()?;
716
717        assert_optimized_plan_equal!(plan, @r#"
718        Filter: t2.b LIKE Utf8("%pattern%")
719          Inner Join: t1.a = t2.a
720            TableScan: t1
721            TableScan: t2
722        "#)
723    }
724
725    #[test]
726    fn eliminate_left_with_like_pattern_column() -> Result<()> {
727        let t1 = test_table_scan_with_name("t1")?;
728        let t2 = test_table_scan_with_name("t2")?;
729
730        // LIKE with nullable column on the pattern side:
731        // 'x' LIKE t2.b → if t2.b is NULL, result is NULL (filtered out)
732        let plan = LogicalPlanBuilder::from(t1)
733            .join(
734                t2,
735                JoinType::Left,
736                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
737                None,
738            )?
739            .filter(lit("x").like(col("t2.b")))?
740            .build()?;
741
742        assert_optimized_plan_equal!(plan, @r#"
743        Filter: Utf8("x") LIKE t2.b
744          Inner Join: t1.a = t2.a
745            TableScan: t1
746            TableScan: t2
747        "#)
748    }
749
750    #[test]
751    fn eliminate_full_with_like_cross_side() -> Result<()> {
752        let t1 = test_table_scan_with_name("t1")?;
753        let t2 = test_table_scan_with_name("t2")?;
754
755        // LIKE with columns from both sides: t1.c LIKE t2.b
756        // If t1 is NULL → NULL LIKE t2.b → NULL → filtered out (left non-nullable)
757        // If t2 is NULL → t1.c LIKE NULL → NULL → filtered out (right non-nullable)
758        // Both sides are non-nullable → FULL → INNER
759        let plan = LogicalPlanBuilder::from(t1)
760            .join(
761                t2,
762                JoinType::Full,
763                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
764                None,
765            )?
766            .filter(col("t1.c").like(col("t2.b")))?
767            .build()?;
768
769        assert_optimized_plan_equal!(plan, @r"
770        Filter: t1.c LIKE t2.b
771          Inner Join: t1.a = t2.a
772            TableScan: t1
773            TableScan: t2
774        ")
775    }
776
777    #[test]
778    fn eliminate_left_with_is_true() -> Result<()> {
779        let t1 = test_table_scan_with_name("t1")?;
780        let t2 = test_table_scan_with_name("t2")?;
781
782        // IS TRUE rejects nulls: if the expression is NULL, IS TRUE returns false
783        let plan = LogicalPlanBuilder::from(t1)
784            .join(
785                t2,
786                JoinType::Left,
787                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
788                None,
789            )?
790            .filter(col("t2.b").gt(lit(10u32)).is_true())?
791            .build()?;
792
793        assert_optimized_plan_equal!(plan, @r"
794        Filter: t2.b > UInt32(10) IS TRUE
795          Inner Join: t1.a = t2.a
796            TableScan: t1
797            TableScan: t2
798        ")
799    }
800
801    #[test]
802    fn eliminate_left_with_is_false() -> Result<()> {
803        let t1 = test_table_scan_with_name("t1")?;
804        let t2 = test_table_scan_with_name("t2")?;
805
806        // IS FALSE rejects nulls: if the expression is NULL, IS FALSE returns false
807        let plan = LogicalPlanBuilder::from(t1)
808            .join(
809                t2,
810                JoinType::Left,
811                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
812                None,
813            )?
814            .filter(col("t2.b").gt(lit(10u32)).is_false())?
815            .build()?;
816
817        assert_optimized_plan_equal!(plan, @r"
818        Filter: t2.b > UInt32(10) IS FALSE
819          Inner Join: t1.a = t2.a
820            TableScan: t1
821            TableScan: t2
822        ")
823    }
824
825    #[test]
826    fn eliminate_left_with_is_not_unknown() -> Result<()> {
827        let t1 = test_table_scan_with_name("t1")?;
828        let t2 = test_table_scan_with_name("t2")?;
829
830        // IS NOT UNKNOWN rejects nulls: if the expression is NULL, IS NOT UNKNOWN returns false
831        let plan = LogicalPlanBuilder::from(t1)
832            .join(
833                t2,
834                JoinType::Left,
835                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
836                None,
837            )?
838            .filter(col("t2.b").gt(lit(10u32)).is_not_unknown())?
839            .build()?;
840
841        assert_optimized_plan_equal!(plan, @r"
842        Filter: t2.b > UInt32(10) IS NOT UNKNOWN
843          Inner Join: t1.a = t2.a
844            TableScan: t1
845            TableScan: t2
846        ")
847    }
848
849    #[test]
850    fn no_eliminate_left_with_is_not_true() -> Result<()> {
851        let t1 = test_table_scan_with_name("t1")?;
852        let t2 = test_table_scan_with_name("t2")?;
853
854        // IS NOT TRUE is NOT null-rejecting: if the expression is NULL,
855        // IS NOT TRUE returns true, so null rows pass through
856        let plan = LogicalPlanBuilder::from(t1)
857            .join(
858                t2,
859                JoinType::Left,
860                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
861                None,
862            )?
863            .filter(col("t2.b").gt(lit(10u32)).is_not_true())?
864            .build()?;
865
866        assert_optimized_plan_equal!(plan, @r"
867        Filter: t2.b > UInt32(10) IS NOT TRUE
868          Left Join: t1.a = t2.a
869            TableScan: t1
870            TableScan: t2
871        ")
872    }
873
874    #[test]
875    fn no_eliminate_left_with_is_unknown() -> Result<()> {
876        let t1 = test_table_scan_with_name("t1")?;
877        let t2 = test_table_scan_with_name("t2")?;
878
879        // IS UNKNOWN is NOT null-rejecting: if the expression is NULL,
880        // IS UNKNOWN returns true, so null rows pass through
881        let plan = LogicalPlanBuilder::from(t1)
882            .join(
883                t2,
884                JoinType::Left,
885                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
886                None,
887            )?
888            .filter(col("t2.b").gt(lit(10u32)).is_unknown())?
889            .build()?;
890
891        assert_optimized_plan_equal!(plan, @r"
892        Filter: t2.b > UInt32(10) IS UNKNOWN
893          Left Join: t1.a = t2.a
894            TableScan: t1
895            TableScan: t2
896        ")
897    }
898
899    #[test]
900    fn eliminate_full_with_type_cast() -> Result<()> {
901        let t1 = test_table_scan_with_name("t1")?;
902        let t2 = test_table_scan_with_name("t2")?;
903
904        // eliminate to inner join
905        let plan = LogicalPlanBuilder::from(t1)
906            .join(
907                t2,
908                JoinType::Full,
909                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
910                None,
911            )?
912            .filter(binary_expr(
913                cast(col("t1.b"), DataType::Int64).gt(lit(10u32)),
914                And,
915                try_cast(col("t2.c"), DataType::Int64).lt(lit(20u32)),
916            ))?
917            .build()?;
918
919        assert_optimized_plan_equal!(plan, @r"
920        Filter: CAST(t1.b AS Int64) > UInt32(10) AND TRY_CAST(t2.c AS Int64) < UInt32(20)
921          Inner Join: t1.a = t2.a
922            TableScan: t1
923            TableScan: t2
924        ")
925    }
926
927    // ----- FULL JOIN → LEFT / RIGHT tests -----
928    #[test]
929    fn eliminate_full_to_left_with_left_filter() -> Result<()> {
930        let t1 = test_table_scan_with_name("t1")?;
931        let t2 = test_table_scan_with_name("t2")?;
932
933        // FULL JOIN with null-rejecting filter only on left side → LEFT JOIN
934        // (left side becomes non-nullable, right side stays nullable)
935        let plan = LogicalPlanBuilder::from(t1)
936            .join(
937                t2,
938                JoinType::Full,
939                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
940                None,
941            )?
942            .filter(col("t1.b").gt(lit(10u32)))?
943            .build()?;
944
945        assert_optimized_plan_equal!(plan, @r"
946        Filter: t1.b > UInt32(10)
947          Left Join: t1.a = t2.a
948            TableScan: t1
949            TableScan: t2
950        ")
951    }
952
953    #[test]
954    fn eliminate_full_to_right_with_right_filter() -> Result<()> {
955        let t1 = test_table_scan_with_name("t1")?;
956        let t2 = test_table_scan_with_name("t2")?;
957
958        // FULL JOIN with null-rejecting filter only on right side → RIGHT JOIN
959        let plan = LogicalPlanBuilder::from(t1)
960            .join(
961                t2,
962                JoinType::Full,
963                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
964                None,
965            )?
966            .filter(col("t2.b").in_list(vec![lit(1u32), lit(2u32)], false))?
967            .build()?;
968
969        assert_optimized_plan_equal!(plan, @r"
970        Filter: t2.b IN ([UInt32(1), UInt32(2)])
971          Right Join: t1.a = t2.a
972            TableScan: t1
973            TableScan: t2
974        ")
975    }
976
977    #[test]
978    fn eliminate_full_to_left_with_like() -> Result<()> {
979        let t1 = test_table_scan_with_name("t1")?;
980        let t2 = test_table_scan_with_name("t2")?;
981
982        // FULL JOIN with LIKE on left side only → LEFT JOIN
983        let plan = LogicalPlanBuilder::from(t1)
984            .join(
985                t2,
986                JoinType::Full,
987                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
988                None,
989            )?
990            .filter(col("t1.b").like(lit("%val%")))?
991            .build()?;
992
993        assert_optimized_plan_equal!(plan, @r#"
994        Filter: t1.b LIKE Utf8("%val%")
995          Left Join: t1.a = t2.a
996            TableScan: t1
997            TableScan: t2
998        "#)
999    }
1000
1001    #[test]
1002    fn eliminate_full_to_right_with_is_true() -> Result<()> {
1003        let t1 = test_table_scan_with_name("t1")?;
1004        let t2 = test_table_scan_with_name("t2")?;
1005
1006        // FULL JOIN with IS TRUE on right side only → RIGHT JOIN
1007        let plan = LogicalPlanBuilder::from(t1)
1008            .join(
1009                t2,
1010                JoinType::Full,
1011                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
1012                None,
1013            )?
1014            .filter(col("t2.b").gt(lit(10u32)).is_true())?
1015            .build()?;
1016
1017        assert_optimized_plan_equal!(plan, @r"
1018        Filter: t2.b > UInt32(10) IS TRUE
1019          Right Join: t1.a = t2.a
1020            TableScan: t1
1021            TableScan: t2
1022        ")
1023    }
1024
1025    // ----- Nested AND / OR tests -----
1026
1027    #[test]
1028    fn eliminate_left_with_and_multiple_null_rejecting() -> Result<()> {
1029        let t1 = test_table_scan_with_name("t1")?;
1030        let t2 = test_table_scan_with_name("t2")?;
1031
1032        // Multiple null-rejecting predicates combined with AND on nullable side
1033        let plan = LogicalPlanBuilder::from(t1)
1034            .join(
1035                t2,
1036                JoinType::Left,
1037                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
1038                None,
1039            )?
1040            .filter(binary_expr(
1041                col("t2.b").in_list(vec![lit(1u32), lit(2u32)], false),
1042                And,
1043                col("t2.c").between(lit(5u32), lit(20u32)),
1044            ))?
1045            .build()?;
1046
1047        assert_optimized_plan_equal!(plan, @r"
1048        Filter: t2.b IN ([UInt32(1), UInt32(2)]) AND t2.c BETWEEN UInt32(5) AND UInt32(20)
1049          Inner Join: t1.a = t2.a
1050            TableScan: t1
1051            TableScan: t2
1052        ")
1053    }
1054
1055    #[test]
1056    fn eliminate_left_with_or_same_side() -> Result<()> {
1057        let t1 = test_table_scan_with_name("t1")?;
1058        let t2 = test_table_scan_with_name("t2")?;
1059
1060        // OR of two null-rejecting predicates on different columns of the same
1061        // nullable side. If t2 rows are NULL (from LEFT JOIN), both t2.b and
1062        // t2.c are NULL, so the entire OR evaluates to NULL → filtered out.
1063        // This IS null-rejecting, so join should be eliminated.
1064        let plan = LogicalPlanBuilder::from(t1)
1065            .join(
1066                t2,
1067                JoinType::Left,
1068                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
1069                None,
1070            )?
1071            .filter(binary_expr(
1072                col("t2.b").gt(lit(10u32)),
1073                Or,
1074                col("t2.c").lt(lit(20u32)),
1075            ))?
1076            .build()?;
1077
1078        assert_optimized_plan_equal!(plan, @r"
1079        Filter: t2.b > UInt32(10) OR t2.c < UInt32(20)
1080          Inner Join: t1.a = t2.a
1081            TableScan: t1
1082            TableScan: t2
1083        ")
1084    }
1085
1086    #[test]
1087    fn no_eliminate_left_with_or_cross_side() -> Result<()> {
1088        let t1 = test_table_scan_with_name("t1")?;
1089        let t2 = test_table_scan_with_name("t2")?;
1090
1091        // OR with columns from different sides — t1.b (preserved) OR t2.b
1092        // (nullable). When t2 is NULL, t1.b > 10 can still be true, so the
1093        // OR is NOT null-rejecting. Join must be preserved.
1094        let plan = LogicalPlanBuilder::from(t1)
1095            .join(
1096                t2,
1097                JoinType::Left,
1098                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
1099                None,
1100            )?
1101            .filter(binary_expr(
1102                col("t1.b").gt(lit(10u32)),
1103                Or,
1104                col("t2.b").lt(lit(20u32)),
1105            ))?
1106            .build()?;
1107
1108        assert_optimized_plan_equal!(plan, @r"
1109        Filter: t1.b > UInt32(10) OR t2.b < UInt32(20)
1110          Left Join: t1.a = t2.a
1111            TableScan: t1
1112            TableScan: t2
1113        ")
1114    }
1115
1116    // ----- Mixed predicate tests -----
1117
1118    #[test]
1119    fn eliminate_full_with_mixed_predicates() -> Result<()> {
1120        let t1 = test_table_scan_with_name("t1")?;
1121        let t2 = test_table_scan_with_name("t2")?;
1122
1123        // FULL JOIN with different null-rejecting expr types on each side:
1124        // LIKE on left, BETWEEN on right → INNER JOIN
1125        let plan = LogicalPlanBuilder::from(t1)
1126            .join(
1127                t2,
1128                JoinType::Full,
1129                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
1130                None,
1131            )?
1132            .filter(binary_expr(
1133                col("t1.b").like(lit("%pattern%")),
1134                And,
1135                col("t2.b").between(lit(1u32), lit(10u32)),
1136            ))?
1137            .build()?;
1138
1139        assert_optimized_plan_equal!(plan, @r#"
1140        Filter: t1.b LIKE Utf8("%pattern%") AND t2.b BETWEEN UInt32(1) AND UInt32(10)
1141          Inner Join: t1.a = t2.a
1142            TableScan: t1
1143            TableScan: t2
1144        "#)
1145    }
1146
1147    #[test]
1148    fn eliminate_left_with_is_true_and_in_list() -> Result<()> {
1149        let t1 = test_table_scan_with_name("t1")?;
1150        let t2 = test_table_scan_with_name("t2")?;
1151
1152        // AND of IS TRUE and IN on nullable side — both null-rejecting
1153        let plan = LogicalPlanBuilder::from(t1)
1154            .join(
1155                t2,
1156                JoinType::Left,
1157                (vec![Column::from_name("a")], vec![Column::from_name("a")]),
1158                None,
1159            )?
1160            .filter(binary_expr(
1161                col("t2.b").gt(lit(5u32)).is_true(),
1162                And,
1163                col("t2.c").in_list(vec![lit(1u32), lit(2u32)], false),
1164            ))?
1165            .build()?;
1166
1167        assert_optimized_plan_equal!(plan, @r"
1168        Filter: t2.b > UInt32(5) IS TRUE AND t2.c IN ([UInt32(1), UInt32(2)])
1169          Inner Join: t1.a = t2.a
1170            TableScan: t1
1171            TableScan: t2
1172        ")
1173    }
1174}