vortex_array/arrays/varbin/compute/
compare.rs

1use arrow_array::{BinaryArray, StringArray};
2use arrow_buffer::BooleanBuffer;
3use arrow_ord::cmp;
4use itertools::Itertools;
5use vortex_dtype::{DType, NativePType, match_each_native_ptype};
6use vortex_error::{VortexExpect as _, VortexResult, vortex_bail, vortex_err};
7
8use crate::arrays::{BoolArray, PrimitiveArray, VarBinArray, VarBinEncoding};
9use crate::arrow::{Datum, from_arrow_array_with_len};
10use crate::compute::{CompareFn, Operator, compare_lengths_to_empty};
11use crate::variants::PrimitiveArrayTrait as _;
12use crate::{Array, ArrayRef};
13
14// This implementation exists so we can have custom translation of RHS to arrow that's not the same as IntoCanonical
15impl CompareFn<&VarBinArray> for VarBinEncoding {
16    fn compare(
17        &self,
18        lhs: &VarBinArray,
19        rhs: &dyn Array,
20        operator: Operator,
21    ) -> VortexResult<Option<ArrayRef>> {
22        if let Some(rhs_const) = rhs.as_constant() {
23            let nullable = lhs.dtype().is_nullable() || rhs_const.dtype().is_nullable();
24            let len = lhs.len();
25
26            let rhs_is_empty = match rhs_const.dtype() {
27                DType::Binary(_) => rhs_const
28                    .as_binary()
29                    .is_empty()
30                    .vortex_expect("RHS should not be null"),
31                DType::Utf8(_) => rhs_const
32                    .as_utf8()
33                    .is_empty()
34                    .vortex_expect("RHS should not be null"),
35                _ => vortex_bail!("VarBinArray can only have type of Binary or Utf8"),
36            };
37
38            if rhs_is_empty {
39                let buffer = match operator {
40                    // Every possible value is gte ""
41                    Operator::Gte => BooleanBuffer::new_set(len),
42                    // No value is lt ""
43                    Operator::Lt => BooleanBuffer::new_unset(len),
44                    _ => {
45                        let lhs_offsets = lhs.offsets().to_canonical()?.into_primitive()?;
46                        match_each_native_ptype!(lhs_offsets.ptype(), |$P| {
47                            compare_offsets_to_empty::<$P>(lhs_offsets, operator)
48                        })
49                    }
50                };
51
52                return Ok(Some(
53                    BoolArray::new(buffer, lhs.validity().clone()).into_array(),
54                ));
55            }
56
57            let lhs = Datum::try_new(lhs)?;
58
59            // TODO(robert): Handle LargeString/Binary arrays
60            let arrow_rhs: &dyn arrow_array::Datum = match rhs_const.dtype() {
61                DType::Utf8(_) => &rhs_const
62                    .as_utf8()
63                    .value()
64                    .map(StringArray::new_scalar)
65                    .unwrap_or_else(|| arrow_array::Scalar::new(StringArray::new_null(1))),
66                DType::Binary(_) => &rhs_const
67                    .as_binary()
68                    .value()
69                    .map(BinaryArray::new_scalar)
70                    .unwrap_or_else(|| arrow_array::Scalar::new(BinaryArray::new_null(1))),
71                _ => vortex_bail!(
72                    "VarBin array RHS can only be Utf8 or Binary, given {}",
73                    rhs_const.dtype()
74                ),
75            };
76
77            let array = match operator {
78                Operator::Eq => cmp::eq(&lhs, arrow_rhs),
79                Operator::NotEq => cmp::neq(&lhs, arrow_rhs),
80                Operator::Gt => cmp::gt(&lhs, arrow_rhs),
81                Operator::Gte => cmp::gt_eq(&lhs, arrow_rhs),
82                Operator::Lt => cmp::lt(&lhs, arrow_rhs),
83                Operator::Lte => cmp::lt_eq(&lhs, arrow_rhs),
84            }
85            .map_err(|err| vortex_err!("Failed to compare VarBin array: {}", err))?;
86
87            Ok(Some(from_arrow_array_with_len(&array, len, nullable)?))
88        } else {
89            Ok(None)
90        }
91    }
92}
93
94fn compare_offsets_to_empty<P: NativePType>(
95    offsets: PrimitiveArray,
96    operator: Operator,
97) -> BooleanBuffer {
98    let lengths_iter = offsets
99        .as_slice::<P>()
100        .iter()
101        .tuple_windows()
102        .map(|(&s, &e)| e - s);
103    compare_lengths_to_empty(lengths_iter, operator)
104}
105
106#[cfg(test)]
107mod test {
108    use arrow_buffer::BooleanBuffer;
109    use vortex_buffer::ByteBuffer;
110    use vortex_dtype::{DType, Nullability};
111    use vortex_scalar::Scalar;
112
113    use crate::ToCanonical;
114    use crate::arrays::{ConstantArray, VarBinArray};
115    use crate::compute::{Operator, compare};
116
117    #[test]
118    fn test_binary_compare() {
119        let array = VarBinArray::from_iter(
120            [Some(b"abc".to_vec()), None, Some(b"def".to_vec())],
121            DType::Binary(Nullability::Nullable),
122        );
123        let result = compare(
124            &array,
125            &ConstantArray::new(
126                Scalar::binary(ByteBuffer::copy_from(b"abc"), Nullability::Nullable),
127                3,
128            ),
129            Operator::Eq,
130        )
131        .unwrap()
132        .to_bool()
133        .unwrap();
134
135        assert_eq!(
136            result.boolean_buffer(),
137            &BooleanBuffer::from_iter([true, false, false])
138        );
139    }
140}