datafusion_common/join_type.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines the [`JoinType`], [`JoinConstraint`] and [`JoinSide`] types.
19
20use std::{
21 fmt::{self, Display, Formatter},
22 str::FromStr,
23};
24
25use crate::error::_not_impl_err;
26use crate::{DataFusionError, Result};
27
28/// Join type
29#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Hash)]
30pub enum JoinType {
31 /// Inner Join - Returns only rows where there is a matching value in both tables based on the join condition.
32 /// For example, if joining table A and B on A.id = B.id, only rows where A.id equals B.id will be included.
33 /// All columns from both tables are returned for the matching rows. Non-matching rows are excluded entirely.
34 Inner,
35 /// Left Join - Returns all rows from the left table and matching rows from the right table.
36 /// If no match, NULL values are returned for columns from the right table.
37 Left,
38 /// Right Join - Returns all rows from the right table and matching rows from the left table.
39 /// If no match, NULL values are returned for columns from the left table.
40 Right,
41 /// Full Join (also called Full Outer Join) - Returns all rows from both tables, matching rows where possible.
42 /// When a row from either table has no match in the other table, the missing columns are filled with NULL values.
43 /// For example, if table A has row X with no match in table B, the result will contain row X with NULL values for all of table B's columns.
44 /// This join type preserves all records from both tables, making it useful when you need to see all data regardless of matches.
45 Full,
46 /// Left Semi Join - Returns rows from the left table that have matching rows in the right table.
47 /// Only columns from the left table are returned.
48 LeftSemi,
49 /// Right Semi Join - Returns rows from the right table that have matching rows in the left table.
50 /// Only columns from the right table are returned.
51 RightSemi,
52 /// Left Anti Join - Returns rows from the left table that do not have a matching row in the right table.
53 LeftAnti,
54 /// Right Anti Join - Returns rows from the right table that do not have a matching row in the left table.
55 RightAnti,
56 /// Left Mark join
57 ///
58 /// Returns one record for each record from the left input. The output contains an additional
59 /// column "mark" which is true if there is at least one match in the right input where the
60 /// join condition evaluates to true. Otherwise, the mark column is false. For more details see
61 /// [1]. This join type is used to decorrelate EXISTS subqueries used inside disjunctive
62 /// predicates.
63 ///
64 /// Note: This we currently do not implement the full null semantics for the mark join described
65 /// in [1] which will be needed if we and ANY subqueries. In our version the mark column will
66 /// only be true for had a match and false when no match was found, never null.
67 ///
68 /// [1]: http://btw2017.informatik.uni-stuttgart.de/slidesandpapers/F1-10-37/paper_web.pdf
69 LeftMark,
70 /// Right Mark Join
71 ///
72 /// Same logic as the LeftMark Join above, however it returns a record for each record from the
73 /// right input.
74 RightMark,
75}
76
77impl JoinType {
78 pub fn is_outer(self) -> bool {
79 self == JoinType::Left || self == JoinType::Right || self == JoinType::Full
80 }
81
82 /// Returns the `JoinType` if the (2) inputs were swapped
83 ///
84 /// Panics if [`Self::supports_swap`] returns false
85 pub fn swap(&self) -> JoinType {
86 match self {
87 JoinType::Inner => JoinType::Inner,
88 JoinType::Full => JoinType::Full,
89 JoinType::Left => JoinType::Right,
90 JoinType::Right => JoinType::Left,
91 JoinType::LeftSemi => JoinType::RightSemi,
92 JoinType::RightSemi => JoinType::LeftSemi,
93 JoinType::LeftAnti => JoinType::RightAnti,
94 JoinType::RightAnti => JoinType::LeftAnti,
95 JoinType::LeftMark => JoinType::RightMark,
96 JoinType::RightMark => JoinType::LeftMark,
97 }
98 }
99
100 /// Whether each side of the join is preserved for ON-clause filter pushdown.
101 ///
102 /// It is only correct to push ON-clause filters below a join for preserved
103 /// inputs.
104 ///
105 /// # "Preserved" input definition
106 ///
107 /// A join side is preserved if the join returns all or a subset of the rows
108 /// from that side, such that each output row directly maps to an input row.
109 /// If a side is not preserved, the join can produce extra null rows that
110 /// don't map to any input row.
111 ///
112 /// # Return Value
113 ///
114 /// A tuple of booleans - (left_preserved, right_preserved).
115 pub fn on_lr_is_preserved(&self) -> (bool, bool) {
116 match self {
117 JoinType::Inner => (true, true),
118 JoinType::Left => (false, true),
119 JoinType::Right => (true, false),
120 JoinType::Full => (false, false),
121 JoinType::LeftSemi | JoinType::RightSemi => (true, true),
122 JoinType::LeftAnti => (false, true),
123 JoinType::RightAnti => (true, false),
124 JoinType::LeftMark => (false, true),
125 JoinType::RightMark => (true, false),
126 }
127 }
128
129 /// Does the join type support swapping inputs?
130 pub fn supports_swap(&self) -> bool {
131 matches!(
132 self,
133 JoinType::Inner
134 | JoinType::Left
135 | JoinType::Right
136 | JoinType::Full
137 | JoinType::LeftSemi
138 | JoinType::RightSemi
139 | JoinType::LeftAnti
140 | JoinType::RightAnti
141 | JoinType::LeftMark
142 | JoinType::RightMark
143 )
144 }
145
146 /// Returns true when an empty build side necessarily produces an empty
147 /// result for this join type.
148 pub fn empty_build_side_produces_empty_result(self) -> bool {
149 matches!(
150 self,
151 JoinType::Inner
152 | JoinType::Left
153 | JoinType::LeftSemi
154 | JoinType::LeftAnti
155 | JoinType::LeftMark
156 | JoinType::RightSemi
157 )
158 }
159}
160
161impl Display for JoinType {
162 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
163 let join_type = match self {
164 JoinType::Inner => "Inner",
165 JoinType::Left => "Left",
166 JoinType::Right => "Right",
167 JoinType::Full => "Full",
168 JoinType::LeftSemi => "LeftSemi",
169 JoinType::RightSemi => "RightSemi",
170 JoinType::LeftAnti => "LeftAnti",
171 JoinType::RightAnti => "RightAnti",
172 JoinType::LeftMark => "LeftMark",
173 JoinType::RightMark => "RightMark",
174 };
175 write!(f, "{join_type}")
176 }
177}
178
179impl FromStr for JoinType {
180 type Err = DataFusionError;
181
182 fn from_str(s: &str) -> Result<Self> {
183 let s = s.to_uppercase();
184 match s.as_str() {
185 "INNER" => Ok(JoinType::Inner),
186 "LEFT" => Ok(JoinType::Left),
187 "RIGHT" => Ok(JoinType::Right),
188 "FULL" => Ok(JoinType::Full),
189 "LEFTSEMI" => Ok(JoinType::LeftSemi),
190 "RIGHTSEMI" => Ok(JoinType::RightSemi),
191 "LEFTANTI" => Ok(JoinType::LeftAnti),
192 "RIGHTANTI" => Ok(JoinType::RightAnti),
193 "LEFTMARK" => Ok(JoinType::LeftMark),
194 "RIGHTMARK" => Ok(JoinType::RightMark),
195 _ => _not_impl_err!("The join type {s} does not exist or is not implemented"),
196 }
197 }
198}
199
200/// Join constraint
201#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Hash)]
202pub enum JoinConstraint {
203 /// Join ON
204 On,
205 /// Join USING
206 Using,
207}
208
209impl Display for JoinSide {
210 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
211 match self {
212 JoinSide::Left => write!(f, "left"),
213 JoinSide::Right => write!(f, "right"),
214 JoinSide::None => write!(f, "none"),
215 }
216 }
217}
218
219/// Join side.
220/// Stores the referred table side during calculations
221#[derive(Debug, Clone, Copy, PartialEq)]
222pub enum JoinSide {
223 /// Left side of the join
224 Left,
225 /// Right side of the join
226 Right,
227 /// Neither side of the join, used for Mark joins where the mark column does not belong to
228 /// either side of the join
229 None,
230}
231
232impl JoinSide {
233 /// Inverse the join side
234 pub fn negate(&self) -> Self {
235 match self {
236 JoinSide::Left => JoinSide::Right,
237 JoinSide::Right => JoinSide::Left,
238 JoinSide::None => JoinSide::None,
239 }
240 }
241}