Skip to main content

datafusion_physical_plan/joins/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! DataFusion Join implementations
19
20use arrow::array::BooleanBufferBuilder;
21pub use cross_join::CrossJoinExec;
22use datafusion_physical_expr::PhysicalExprRef;
23pub use hash_join::{
24    HashExpr, HashJoinExec, HashJoinExecBuilder, HashTableLookupExpr, SeededRandomState,
25};
26pub use nested_loop_join::{NestedLoopJoinExec, NestedLoopJoinExecBuilder};
27use parking_lot::Mutex;
28// Note: SortMergeJoin is not used in plans yet
29pub use piecewise_merge_join::PiecewiseMergeJoinExec;
30pub use sort_merge_join::SortMergeJoinExec;
31pub use symmetric_hash_join::SymmetricHashJoinExec;
32pub mod chain;
33mod cross_join;
34mod hash_join;
35mod nested_loop_join;
36mod piecewise_merge_join;
37mod sort_merge_join;
38mod stream_join_utils;
39mod symmetric_hash_join;
40pub mod utils;
41
42mod array_map;
43mod join_filter;
44/// Hash map implementations for join operations.
45///
46/// Note: This module is public for internal testing purposes only
47/// and is not guaranteed to be stable across versions.
48pub mod join_hash_map;
49
50use array_map::ArrayMap;
51use utils::JoinHashMapType;
52
53pub enum Map {
54    HashMap(Box<dyn JoinHashMapType>),
55    ArrayMap(ArrayMap),
56}
57
58impl Map {
59    /// Returns the number of elements in the map.
60    pub fn num_of_distinct_key(&self) -> usize {
61        match self {
62            Map::HashMap(map) => map.len(),
63            Map::ArrayMap(array_map) => array_map.num_of_distinct_key(),
64        }
65    }
66
67    /// Returns `true` if the map contains no elements.
68    pub fn is_empty(&self) -> bool {
69        self.num_of_distinct_key() == 0
70    }
71}
72
73pub(crate) type MapOffset = (usize, Option<u64>);
74
75#[cfg(test)]
76pub mod test_utils;
77
78/// The on clause of the join, as vector of (left, right) columns.
79pub type JoinOn = Vec<(PhysicalExprRef, PhysicalExprRef)>;
80/// Reference for JoinOn.
81pub type JoinOnRef<'a> = &'a [(PhysicalExprRef, PhysicalExprRef)];
82
83#[derive(Clone, Copy, Debug, PartialEq, Eq)]
84/// Hash join Partitioning mode
85pub enum PartitionMode {
86    /// Left/right children are partitioned using the left and right keys
87    Partitioned,
88    /// Left side will collected into one partition
89    CollectLeft,
90    /// DataFusion optimizer decides which PartitionMode
91    /// mode(Partitioned/CollectLeft) is optimal based on statistics. It will
92    /// also consider swapping the left and right inputs for the Join
93    Auto,
94}
95
96/// Partitioning mode to use for symmetric hash join
97#[derive(Hash, Clone, Copy, Debug, PartialEq, Eq)]
98pub enum StreamJoinPartitionMode {
99    /// Left/right children are partitioned using the left and right keys
100    Partitioned,
101    /// Both sides will collected into one partition
102    SinglePartition,
103}
104
105/// Shared bitmap for visited left-side indices
106type SharedBitmapBuilder = Mutex<BooleanBufferBuilder>;