1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
use crate::bit_iterator::BitSliceIterator;
use crate::contains_nulls;
use arrow_buffer::bit_util::get_bit;
use std::mem::size_of;
use crate::data::ArrayData;
use super::utils::equal_len;
pub(crate) const NULL_SLICES_SELECTIVITY_THRESHOLD: f64 = 0.4;
pub(super) fn primitive_equal<T>(
lhs: &ArrayData,
rhs: &ArrayData,
lhs_start: usize,
rhs_start: usize,
len: usize,
) -> bool {
let byte_width = size_of::<T>();
let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * byte_width..];
let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * byte_width..];
if !contains_nulls(lhs.null_buffer(), lhs_start + lhs.offset(), len) {
equal_len(
lhs_values,
rhs_values,
lhs_start * byte_width,
rhs_start * byte_width,
len * byte_width,
)
} else {
let selectivity_frac = lhs.null_count() as f64 / lhs.len() as f64;
if selectivity_frac >= NULL_SLICES_SELECTIVITY_THRESHOLD {
let lhs_null_bytes = lhs.null_buffer().as_ref().unwrap().as_slice();
let rhs_null_bytes = rhs.null_buffer().as_ref().unwrap().as_slice();
(0..len).all(|i| {
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;
let lhs_is_null = !get_bit(lhs_null_bytes, lhs_pos + lhs.offset());
let rhs_is_null = !get_bit(rhs_null_bytes, rhs_pos + rhs.offset());
lhs_is_null
|| (lhs_is_null == rhs_is_null)
&& equal_len(
lhs_values,
rhs_values,
lhs_pos * byte_width,
rhs_pos * byte_width,
byte_width, )
})
} else {
let lhs_slices_iter = BitSliceIterator::new(
lhs.null_buffer().as_ref().unwrap(),
lhs_start + lhs.offset(),
len,
);
let rhs_slices_iter = BitSliceIterator::new(
rhs.null_buffer().as_ref().unwrap(),
rhs_start + rhs.offset(),
len,
);
lhs_slices_iter.zip(rhs_slices_iter).all(
|((l_start, l_end), (r_start, r_end))| {
l_start == r_start
&& l_end == r_end
&& equal_len(
lhs_values,
rhs_values,
(lhs_start + l_start) * byte_width,
(rhs_start + r_start) * byte_width,
(l_end - l_start) * byte_width,
)
},
)
}
}
}