Skip to main content

datafusion_common/
join_type.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines the [`JoinType`], [`JoinConstraint`] and [`JoinSide`] types.
19
20use std::{
21    fmt::{self, Display, Formatter},
22    str::FromStr,
23};
24
25use crate::error::_not_impl_err;
26use crate::{DataFusionError, Result};
27
28/// Join type
29#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Hash)]
30pub enum JoinType {
31    /// Inner Join - Returns only rows where there is a matching value in both tables based on the join condition.
32    /// For example, if joining table A and B on A.id = B.id, only rows where A.id equals B.id will be included.
33    /// All columns from both tables are returned for the matching rows. Non-matching rows are excluded entirely.
34    Inner,
35    /// Left Join - Returns all rows from the left table and matching rows from the right table.
36    /// If no match, NULL values are returned for columns from the right table.
37    Left,
38    /// Right Join - Returns all rows from the right table and matching rows from the left table.
39    /// If no match, NULL values are returned for columns from the left table.
40    Right,
41    /// Full Join (also called Full Outer Join) - Returns all rows from both tables, matching rows where possible.
42    /// When a row from either table has no match in the other table, the missing columns are filled with NULL values.
43    /// For example, if table A has row X with no match in table B, the result will contain row X with NULL values for all of table B's columns.
44    /// This join type preserves all records from both tables, making it useful when you need to see all data regardless of matches.
45    Full,
46    /// Left Semi Join - Returns rows from the left table that have matching rows in the right table.
47    /// Only columns from the left table are returned.
48    LeftSemi,
49    /// Right Semi Join - Returns rows from the right table that have matching rows in the left table.
50    /// Only columns from the right table are returned.
51    RightSemi,
52    /// Left Anti Join - Returns rows from the left table that do not have a matching row in the right table.
53    LeftAnti,
54    /// Right Anti Join - Returns rows from the right table that do not have a matching row in the left table.
55    RightAnti,
56    /// Left Mark join
57    ///
58    /// Returns one record for each record from the left input. The output contains an additional
59    /// column "mark" which is true if there is at least one match in the right input where the
60    /// join condition evaluates to true. Otherwise, the mark column is false. For more details see
61    /// [1]. This join type is used to decorrelate EXISTS subqueries used inside disjunctive
62    /// predicates.
63    ///
64    /// Note: This we currently do not implement the full null semantics for the mark join described
65    /// in [1] which will be needed if we and ANY subqueries. In our version the mark column will
66    /// only be true for had a match and false when no match was found, never null.
67    ///
68    /// [1]: http://btw2017.informatik.uni-stuttgart.de/slidesandpapers/F1-10-37/paper_web.pdf
69    LeftMark,
70    /// Right Mark Join
71    ///
72    /// Same logic as the LeftMark Join above, however it returns a record for each record from the
73    /// right input.
74    RightMark,
75}
76
77impl JoinType {
78    pub fn is_outer(self) -> bool {
79        self == JoinType::Left || self == JoinType::Right || self == JoinType::Full
80    }
81
82    /// Returns the `JoinType` if the (2) inputs were swapped
83    ///
84    /// Panics if [`Self::supports_swap`] returns false
85    pub fn swap(&self) -> JoinType {
86        match self {
87            JoinType::Inner => JoinType::Inner,
88            JoinType::Full => JoinType::Full,
89            JoinType::Left => JoinType::Right,
90            JoinType::Right => JoinType::Left,
91            JoinType::LeftSemi => JoinType::RightSemi,
92            JoinType::RightSemi => JoinType::LeftSemi,
93            JoinType::LeftAnti => JoinType::RightAnti,
94            JoinType::RightAnti => JoinType::LeftAnti,
95            JoinType::LeftMark => JoinType::RightMark,
96            JoinType::RightMark => JoinType::LeftMark,
97        }
98    }
99
100    /// Whether each side of the join is preserved for ON-clause filter pushdown.
101    ///
102    /// It is only correct to push ON-clause filters below a join for preserved
103    /// inputs.
104    ///
105    /// # "Preserved" input definition
106    ///
107    /// A join side is preserved if the join returns all or a subset of the rows
108    /// from that side, such that each output row directly maps to an input row.
109    /// If a side is not preserved, the join can produce extra null rows that
110    /// don't map to any input row.
111    ///
112    /// # Return Value
113    ///
114    /// A tuple of booleans - (left_preserved, right_preserved).
115    pub fn on_lr_is_preserved(&self) -> (bool, bool) {
116        match self {
117            JoinType::Inner => (true, true),
118            JoinType::Left => (false, true),
119            JoinType::Right => (true, false),
120            JoinType::Full => (false, false),
121            JoinType::LeftSemi | JoinType::RightSemi => (true, true),
122            JoinType::LeftAnti => (false, true),
123            JoinType::RightAnti => (true, false),
124            JoinType::LeftMark => (false, true),
125            JoinType::RightMark => (true, false),
126        }
127    }
128
129    /// Does the join type support swapping inputs?
130    pub fn supports_swap(&self) -> bool {
131        matches!(
132            self,
133            JoinType::Inner
134                | JoinType::Left
135                | JoinType::Right
136                | JoinType::Full
137                | JoinType::LeftSemi
138                | JoinType::RightSemi
139                | JoinType::LeftAnti
140                | JoinType::RightAnti
141                | JoinType::LeftMark
142                | JoinType::RightMark
143        )
144    }
145
146    /// Returns true when an empty build side necessarily produces an empty
147    /// result for this join type.
148    pub fn empty_build_side_produces_empty_result(self) -> bool {
149        matches!(
150            self,
151            JoinType::Inner
152                | JoinType::Left
153                | JoinType::LeftSemi
154                | JoinType::LeftAnti
155                | JoinType::LeftMark
156                | JoinType::RightSemi
157        )
158    }
159}
160
161impl Display for JoinType {
162    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
163        let join_type = match self {
164            JoinType::Inner => "Inner",
165            JoinType::Left => "Left",
166            JoinType::Right => "Right",
167            JoinType::Full => "Full",
168            JoinType::LeftSemi => "LeftSemi",
169            JoinType::RightSemi => "RightSemi",
170            JoinType::LeftAnti => "LeftAnti",
171            JoinType::RightAnti => "RightAnti",
172            JoinType::LeftMark => "LeftMark",
173            JoinType::RightMark => "RightMark",
174        };
175        write!(f, "{join_type}")
176    }
177}
178
179impl FromStr for JoinType {
180    type Err = DataFusionError;
181
182    fn from_str(s: &str) -> Result<Self> {
183        let s = s.to_uppercase();
184        match s.as_str() {
185            "INNER" => Ok(JoinType::Inner),
186            "LEFT" => Ok(JoinType::Left),
187            "RIGHT" => Ok(JoinType::Right),
188            "FULL" => Ok(JoinType::Full),
189            "LEFTSEMI" => Ok(JoinType::LeftSemi),
190            "RIGHTSEMI" => Ok(JoinType::RightSemi),
191            "LEFTANTI" => Ok(JoinType::LeftAnti),
192            "RIGHTANTI" => Ok(JoinType::RightAnti),
193            "LEFTMARK" => Ok(JoinType::LeftMark),
194            "RIGHTMARK" => Ok(JoinType::RightMark),
195            _ => _not_impl_err!("The join type {s} does not exist or is not implemented"),
196        }
197    }
198}
199
200/// Join constraint
201#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Hash)]
202pub enum JoinConstraint {
203    /// Join ON
204    On,
205    /// Join USING
206    Using,
207}
208
209impl Display for JoinSide {
210    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
211        match self {
212            JoinSide::Left => write!(f, "left"),
213            JoinSide::Right => write!(f, "right"),
214            JoinSide::None => write!(f, "none"),
215        }
216    }
217}
218
219/// Join side.
220/// Stores the referred table side during calculations
221#[derive(Debug, Clone, Copy, PartialEq)]
222pub enum JoinSide {
223    /// Left side of the join
224    Left,
225    /// Right side of the join
226    Right,
227    /// Neither side of the join, used for Mark joins where the mark column does not belong to
228    /// either side of the join
229    None,
230}
231
232impl JoinSide {
233    /// Inverse the join side
234    pub fn negate(&self) -> Self {
235        match self {
236            JoinSide::Left => JoinSide::Right,
237            JoinSide::Right => JoinSide::Left,
238            JoinSide::None => JoinSide::None,
239        }
240    }
241}