1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
use super::*;
fn probe_outer<T, F, G, H>(
probe_hashes: &[Vec<(u64, T)>],
hash_tbls: &mut [PlHashMap<T, (bool, Vec<IdxSize>)>],
results: &mut Vec<(Option<IdxSize>, Option<IdxSize>)>,
n_tables: u64,
swap_fn_match: F,
swap_fn_no_match: G,
swap_fn_drain: H,
) where
T: Send + Hash + Eq + Sync + Copy,
F: Fn(IdxSize, IdxSize) -> (Option<IdxSize>, Option<IdxSize>),
G: Fn(IdxSize) -> (Option<IdxSize>, Option<IdxSize>),
H: Fn(IdxSize) -> (Option<IdxSize>, Option<IdxSize>),
{
assert!(n_tables.is_power_of_two());
let mut idx_a = 0;
for probe_hashes in probe_hashes {
for (h, key) in probe_hashes {
let h = *h;
let current_probe_table =
unsafe { get_hash_tbl_threaded_join_mut_partitioned(h, hash_tbls, n_tables) };
let entry = current_probe_table
.raw_entry_mut()
.from_key_hashed_nocheck(h, key);
match entry {
RawEntryMut::Occupied(mut occupied) => {
let (tracker, indexes_b) = occupied.get_mut();
*tracker = true;
results.extend(indexes_b.iter().map(|&idx_b| swap_fn_match(idx_a, idx_b)))
}
RawEntryMut::Vacant(_) => results.push(swap_fn_no_match(idx_a)),
}
idx_a += 1;
}
}
for hash_tbl in hash_tbls {
hash_tbl.iter().for_each(|(_k, (tracker, indexes_b))| {
if !*tracker {
results.extend(indexes_b.iter().map(|&idx_b| swap_fn_drain(idx_b)))
}
});
}
}
pub(super) fn hash_join_tuples_outer<T, I, J>(
a: Vec<I>,
b: Vec<J>,
swap: bool,
) -> Vec<(Option<IdxSize>, Option<IdxSize>)>
where
I: Iterator<Item = T> + Send + TrustedLen,
J: Iterator<Item = T> + Send + TrustedLen,
T: Hash + Eq + Copy + Sync + Send,
{
let size = a.iter().map(|a| a.size_hint().0).sum::<usize>()
+ b.iter().map(|b| b.size_hint().0).sum::<usize>();
let mut results = Vec::with_capacity(size);
let mut hash_tbls = prepare_hashed_relation_threaded(b);
let random_state = hash_tbls[0].hasher().clone();
let (probe_hashes, _) = create_hash_and_keys_threaded_vectorized(a, Some(random_state));
let n_tables = hash_tbls.len() as u64;
if swap {
probe_outer(
&probe_hashes,
&mut hash_tbls,
&mut results,
n_tables,
|idx_a, idx_b| (Some(idx_b), Some(idx_a)),
|idx_a| (None, Some(idx_a)),
|idx_b| (Some(idx_b), None),
)
} else {
probe_outer(
&probe_hashes,
&mut hash_tbls,
&mut results,
n_tables,
|idx_a, idx_b| (Some(idx_a), Some(idx_b)),
|idx_a| (Some(idx_a), None),
|idx_b| (None, Some(idx_b)),
)
}
results
}