vortex_array/arrays/varbin/compute/
compare.rs1use arrow_array::{BinaryArray, StringArray};
5use arrow_buffer::BooleanBuffer;
6use arrow_ord::cmp;
7use itertools::Itertools;
8use vortex_dtype::{DType, NativePType, match_each_native_ptype};
9use vortex_error::{VortexExpect as _, VortexResult, vortex_bail, vortex_err};
10
11use crate::arrays::{BoolArray, PrimitiveArray, VarBinArray, VarBinVTable};
12use crate::arrow::{Datum, from_arrow_array_with_len};
13use crate::compute::{
14 CompareKernel, CompareKernelAdapter, Operator, compare, compare_lengths_to_empty,
15};
16use crate::vtable::ValidityHelper;
17use crate::{Array, ArrayRef, IntoArray, ToCanonical, register_kernel};
18
19impl CompareKernel for VarBinVTable {
21 fn compare(
22 &self,
23 lhs: &VarBinArray,
24 rhs: &dyn Array,
25 operator: Operator,
26 ) -> VortexResult<Option<ArrayRef>> {
27 if let Some(rhs_const) = rhs.as_constant() {
28 let nullable = lhs.dtype().is_nullable() || rhs_const.dtype().is_nullable();
29 let len = lhs.len();
30
31 let rhs_is_empty = match rhs_const.dtype() {
32 DType::Binary(_) => rhs_const
33 .as_binary()
34 .is_empty()
35 .vortex_expect("RHS should not be null"),
36 DType::Utf8(_) => rhs_const
37 .as_utf8()
38 .is_empty()
39 .vortex_expect("RHS should not be null"),
40 _ => vortex_bail!("VarBinArray can only have type of Binary or Utf8"),
41 };
42
43 if rhs_is_empty {
44 let buffer = match operator {
45 Operator::Gte => BooleanBuffer::new_set(len),
47 Operator::Lt => BooleanBuffer::new_unset(len),
49 _ => {
50 let lhs_offsets = lhs.offsets().to_canonical()?.into_primitive()?;
51 match_each_native_ptype!(lhs_offsets.ptype(), |P| {
52 compare_offsets_to_empty::<P>(lhs_offsets, operator)
53 })
54 }
55 };
56
57 return Ok(Some(
58 BoolArray::new(
59 buffer,
60 lhs.validity()
61 .clone()
62 .union_nullability(rhs.dtype().nullability()),
63 )
64 .into_array(),
65 ));
66 }
67
68 let lhs = Datum::try_new(lhs.as_ref())?;
69
70 let arrow_rhs: &dyn arrow_array::Datum = match rhs_const.dtype() {
72 DType::Utf8(_) => &rhs_const
73 .as_utf8()
74 .value()
75 .map(StringArray::new_scalar)
76 .unwrap_or_else(|| arrow_array::Scalar::new(StringArray::new_null(1))),
77 DType::Binary(_) => &rhs_const
78 .as_binary()
79 .value()
80 .map(BinaryArray::new_scalar)
81 .unwrap_or_else(|| arrow_array::Scalar::new(BinaryArray::new_null(1))),
82 _ => vortex_bail!(
83 "VarBin array RHS can only be Utf8 or Binary, given {}",
84 rhs_const.dtype()
85 ),
86 };
87
88 let array = match operator {
89 Operator::Eq => cmp::eq(&lhs, arrow_rhs),
90 Operator::NotEq => cmp::neq(&lhs, arrow_rhs),
91 Operator::Gt => cmp::gt(&lhs, arrow_rhs),
92 Operator::Gte => cmp::gt_eq(&lhs, arrow_rhs),
93 Operator::Lt => cmp::lt(&lhs, arrow_rhs),
94 Operator::Lte => cmp::lt_eq(&lhs, arrow_rhs),
95 }
96 .map_err(|err| vortex_err!("Failed to compare VarBin array: {}", err))?;
97
98 Ok(Some(from_arrow_array_with_len(&array, len, nullable)?))
99 } else if !rhs.is::<VarBinVTable>() {
100 return Ok(Some(compare(lhs.to_varbinview()?.as_ref(), rhs, operator)?));
104 } else {
105 Ok(None)
106 }
107 }
108}
109
110register_kernel!(CompareKernelAdapter(VarBinVTable).lift());
111
112fn compare_offsets_to_empty<P: NativePType>(
113 offsets: PrimitiveArray,
114 operator: Operator,
115) -> BooleanBuffer {
116 let lengths_iter = offsets
117 .as_slice::<P>()
118 .iter()
119 .tuple_windows()
120 .map(|(&s, &e)| e - s);
121 compare_lengths_to_empty(lengths_iter, operator)
122}
123
124#[cfg(test)]
125mod test {
126 use arrow_buffer::BooleanBuffer;
127 use vortex_buffer::ByteBuffer;
128 use vortex_dtype::{DType, Nullability};
129 use vortex_scalar::Scalar;
130
131 use crate::ToCanonical;
132 use crate::arrays::{ConstantArray, VarBinArray, VarBinViewArray};
133 use crate::compute::{Operator, compare};
134
135 #[test]
136 fn test_binary_compare() {
137 let array = VarBinArray::from_iter(
138 [Some(b"abc".to_vec()), None, Some(b"def".to_vec())],
139 DType::Binary(Nullability::Nullable),
140 );
141 let result = compare(
142 array.as_ref(),
143 ConstantArray::new(
144 Scalar::binary(ByteBuffer::copy_from(b"abc"), Nullability::Nullable),
145 3,
146 )
147 .as_ref(),
148 Operator::Eq,
149 )
150 .unwrap()
151 .to_bool()
152 .unwrap();
153
154 assert_eq!(
155 &result.validity_mask().unwrap().to_boolean_buffer(),
156 &BooleanBuffer::from_iter([true, false, true])
157 );
158 assert_eq!(
159 result.boolean_buffer(),
160 &BooleanBuffer::from_iter([true, false, false])
161 );
162 }
163
164 #[test]
165 fn varbinview_compare() {
166 let array = VarBinArray::from_iter(
167 [Some(b"abc".to_vec()), None, Some(b"def".to_vec())],
168 DType::Binary(Nullability::Nullable),
169 );
170 let vbv = VarBinViewArray::from_iter(
171 [None, None, Some(b"def".to_vec())],
172 DType::Binary(Nullability::Nullable),
173 );
174 let result = compare(array.as_ref(), vbv.as_ref(), Operator::Eq)
175 .unwrap()
176 .to_bool()
177 .unwrap();
178
179 assert_eq!(
180 &result.validity_mask().unwrap().to_boolean_buffer(),
181 &BooleanBuffer::from_iter([false, false, true])
182 );
183 assert_eq!(
184 result.boolean_buffer(),
185 &BooleanBuffer::from_iter([false, true, true])
186 );
187 }
188}
189
190#[cfg(test)]
191mod tests {
192 use vortex_dtype::{DType, Nullability};
193 use vortex_scalar::Scalar;
194
195 use crate::Array;
196 use crate::arrays::{ConstantArray, VarBinArray};
197 use crate::compute::{Operator, compare};
198
199 #[test]
200 fn test_null_compare() {
201 let arr = VarBinArray::from_iter([Some("h")], DType::Utf8(Nullability::NonNullable));
202
203 let const_ = ConstantArray::new(Scalar::utf8("", Nullability::Nullable), 1);
204
205 assert_eq!(
206 compare(arr.as_ref(), const_.as_ref(), Operator::Eq)
207 .unwrap()
208 .dtype(),
209 &DType::Bool(Nullability::Nullable)
210 );
211 }
212}