vortex_array/compute/
boolean.rs

1use std::any::Any;
2use std::sync::{Arc, LazyLock};
3
4use arrow_array::ArrayRef as ArrowArrayRef;
5use arrow_array::cast::AsArray;
6use arrow_schema::DataType;
7use vortex_dtype::DType;
8use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
9
10use crate::arcref::ArcRef;
11use crate::arrow::{FromArrowArray, IntoArrowArray};
12use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
13use crate::encoding::Encoding;
14use crate::{Array, ArrayRef};
15
16/// Point-wise logical _and_ between two Boolean arrays.
17///
18/// This method uses Arrow-style null propagation rather than the Kleene logic semantics.
19pub fn and(lhs: &dyn Array, rhs: &dyn Array) -> VortexResult<ArrayRef> {
20    boolean(lhs, rhs, BooleanOperator::And)
21}
22
23/// Point-wise Kleene logical _and_ between two Boolean arrays.
24pub fn and_kleene(lhs: &dyn Array, rhs: &dyn Array) -> VortexResult<ArrayRef> {
25    boolean(lhs, rhs, BooleanOperator::AndKleene)
26}
27
28/// Point-wise logical _or_ between two Boolean arrays.
29///
30/// This method uses Arrow-style null propagation rather than the Kleene logic semantics.
31pub fn or(lhs: &dyn Array, rhs: &dyn Array) -> VortexResult<ArrayRef> {
32    boolean(lhs, rhs, BooleanOperator::Or)
33}
34
35/// Point-wise Kleene logical _or_ between two Boolean arrays.
36pub fn or_kleene(lhs: &dyn Array, rhs: &dyn Array) -> VortexResult<ArrayRef> {
37    boolean(lhs, rhs, BooleanOperator::OrKleene)
38}
39
40/// Point-wise logical operator between two Boolean arrays.
41///
42/// This method uses Arrow-style null propagation rather than the Kleene logic semantics.
43pub fn boolean(lhs: &dyn Array, rhs: &dyn Array, op: BooleanOperator) -> VortexResult<ArrayRef> {
44    BOOLEAN_FN
45        .invoke(&InvocationArgs {
46            inputs: &[lhs.into(), rhs.into()],
47            options: &op,
48        })?
49        .unwrap_array()
50}
51
52pub struct BooleanKernelRef(ArcRef<dyn Kernel>);
53inventory::collect!(BooleanKernelRef);
54
55pub trait BooleanKernel: Encoding {
56    fn boolean(
57        &self,
58        array: &Self::Array,
59        other: &dyn Array,
60        op: BooleanOperator,
61    ) -> VortexResult<Option<ArrayRef>>;
62}
63
64#[derive(Debug)]
65pub struct BooleanKernelAdapter<E: Encoding>(pub E);
66
67impl<E: Encoding + BooleanKernel> BooleanKernelAdapter<E> {
68    pub const fn lift(&'static self) -> BooleanKernelRef {
69        BooleanKernelRef(ArcRef::new_ref(self))
70    }
71}
72
73impl<E: Encoding + BooleanKernel> Kernel for BooleanKernelAdapter<E> {
74    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
75        let inputs = BooleanArgs::try_from(args)?;
76        let Some(array) = inputs.lhs.as_any().downcast_ref::<E::Array>() else {
77            return Ok(None);
78        };
79        Ok(E::boolean(&self.0, array, inputs.rhs, inputs.operator)?.map(|array| array.into()))
80    }
81}
82
83pub static BOOLEAN_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
84    let compute = ComputeFn::new("boolean".into(), ArcRef::new_ref(&Boolean));
85    for kernel in inventory::iter::<BooleanKernelRef> {
86        compute.register_kernel(kernel.0.clone());
87    }
88    compute
89});
90
91struct Boolean;
92
93impl ComputeFnVTable for Boolean {
94    fn invoke(
95        &self,
96        args: &InvocationArgs,
97        kernels: &[ArcRef<dyn Kernel>],
98    ) -> VortexResult<Output> {
99        let BooleanArgs { lhs, rhs, operator } = BooleanArgs::try_from(args)?;
100
101        let rhs_is_constant = rhs.is_constant();
102
103        // If LHS is constant, then we make sure it's on the RHS.
104        if lhs.is_constant() && !rhs_is_constant {
105            return Ok(boolean(rhs, lhs, operator)?.into());
106        }
107
108        // If the RHS is constant and the LHS is Arrow, we can't do any better than arrow_compare.
109        if lhs.is_arrow() && (rhs.is_arrow() || rhs_is_constant) {
110            return Ok(arrow_boolean(lhs.to_array(), rhs.to_array(), operator)?.into());
111        }
112
113        // Check if either LHS or RHS supports the operation directly.
114        for kernel in kernels {
115            if let Some(output) = kernel.invoke(args)? {
116                return Ok(output);
117            }
118        }
119        if let Some(output) = lhs.invoke(&BOOLEAN_FN, args)? {
120            return Ok(output);
121        }
122
123        let inverse_args = InvocationArgs {
124            inputs: &[rhs.into(), lhs.into()],
125            options: &operator,
126        };
127        for kernel in kernels {
128            if let Some(output) = kernel.invoke(&inverse_args)? {
129                return Ok(output);
130            }
131        }
132        if let Some(output) = rhs.invoke(&BOOLEAN_FN, &inverse_args)? {
133            return Ok(output);
134        }
135
136        log::debug!(
137            "No boolean implementation found for LHS {}, RHS {}, and operator {:?} (or inverse)",
138            rhs.encoding(),
139            lhs.encoding(),
140            operator,
141        );
142
143        // If neither side implements the trait, then we delegate to Arrow compute.
144        Ok(arrow_boolean(lhs.to_array(), rhs.to_array(), operator)?.into())
145    }
146
147    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
148        let BooleanArgs { lhs, rhs, .. } = BooleanArgs::try_from(args)?;
149
150        if !lhs.dtype().is_boolean()
151            || !rhs.dtype().is_boolean()
152            || !lhs.dtype().eq_ignore_nullability(rhs.dtype())
153        {
154            vortex_bail!(
155                "Boolean operations are only supported on boolean arrays: {} and {}",
156                lhs.dtype(),
157                rhs.dtype()
158            )
159        }
160
161        Ok(DType::Bool(
162            (lhs.dtype().is_nullable() || rhs.dtype().is_nullable()).into(),
163        ))
164    }
165
166    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
167        let BooleanArgs { lhs, rhs, .. } = BooleanArgs::try_from(args)?;
168
169        if lhs.len() != rhs.len() {
170            vortex_bail!(
171                "Boolean operations aren't supported on arrays of different lengths: {} and {}",
172                lhs.len(),
173                rhs.len()
174            )
175        }
176
177        Ok(lhs.len())
178    }
179
180    fn is_elementwise(&self) -> bool {
181        true
182    }
183}
184
185#[derive(Debug, Clone, Copy, PartialEq, Eq)]
186pub enum BooleanOperator {
187    And,
188    AndKleene,
189    Or,
190    OrKleene,
191    // AndNot,
192    // AndNotKleene,
193    // Xor,
194}
195
196impl Options for BooleanOperator {
197    fn as_any(&self) -> &dyn Any {
198        self
199    }
200}
201
202struct BooleanArgs<'a> {
203    lhs: &'a dyn Array,
204    rhs: &'a dyn Array,
205    operator: BooleanOperator,
206}
207
208impl<'a> TryFrom<&InvocationArgs<'a>> for BooleanArgs<'a> {
209    type Error = VortexError;
210
211    fn try_from(value: &InvocationArgs<'a>) -> VortexResult<Self> {
212        if value.inputs.len() != 2 {
213            vortex_bail!("Expected 2 inputs, found {}", value.inputs.len());
214        }
215        let lhs = value.inputs[0]
216            .array()
217            .ok_or_else(|| vortex_err!("Expected input 0 to be an array"))?;
218        let rhs = value.inputs[1]
219            .array()
220            .ok_or_else(|| vortex_err!("Expected input 1 to be an array"))?;
221        let operator = value
222            .options
223            .as_any()
224            .downcast_ref::<BooleanOperator>()
225            .vortex_expect("Expected options to be an operator");
226
227        Ok(BooleanArgs {
228            lhs,
229            rhs,
230            operator: *operator,
231        })
232    }
233}
234
235/// Implementation of `BinaryBooleanFn` using the Arrow crate.
236///
237/// Note that other encodings should handle a constant RHS value, so we can assume here that
238/// the RHS is not constant and expand to a full array.
239pub(crate) fn arrow_boolean(
240    lhs: ArrayRef,
241    rhs: ArrayRef,
242    operator: BooleanOperator,
243) -> VortexResult<ArrayRef> {
244    let nullable = lhs.dtype().is_nullable() || rhs.dtype().is_nullable();
245
246    let lhs = lhs.into_arrow(&DataType::Boolean)?.as_boolean().clone();
247    let rhs = rhs.into_arrow(&DataType::Boolean)?.as_boolean().clone();
248
249    let array = match operator {
250        BooleanOperator::And => arrow_arith::boolean::and(&lhs, &rhs)?,
251        BooleanOperator::AndKleene => arrow_arith::boolean::and_kleene(&lhs, &rhs)?,
252        BooleanOperator::Or => arrow_arith::boolean::or(&lhs, &rhs)?,
253        BooleanOperator::OrKleene => arrow_arith::boolean::or_kleene(&lhs, &rhs)?,
254    };
255
256    Ok(ArrayRef::from_arrow(
257        Arc::new(array) as ArrowArrayRef,
258        nullable,
259    ))
260}
261
262#[cfg(test)]
263mod tests {
264    use rstest::rstest;
265
266    use super::*;
267    use crate::arrays::BoolArray;
268    use crate::canonical::ToCanonical;
269    use crate::compute::scalar_at;
270
271    #[rstest]
272    #[case(BoolArray::from_iter([Some(true), Some(true), Some(false), Some(false)].into_iter())
273    .into_array(), BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter())
274    .into_array())]
275    #[case(BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array(),
276        BoolArray::from_iter([Some(true), Some(true), Some(false), Some(false)].into_iter()).into_array())]
277    fn test_or(#[case] lhs: ArrayRef, #[case] rhs: ArrayRef) {
278        let r = or(&lhs, &rhs).unwrap();
279
280        let r = r.to_bool().unwrap().into_array();
281
282        let v0 = scalar_at(&r, 0).unwrap().as_bool().value();
283        let v1 = scalar_at(&r, 1).unwrap().as_bool().value();
284        let v2 = scalar_at(&r, 2).unwrap().as_bool().value();
285        let v3 = scalar_at(&r, 3).unwrap().as_bool().value();
286
287        assert!(v0.unwrap());
288        assert!(v1.unwrap());
289        assert!(v2.unwrap());
290        assert!(!v3.unwrap());
291    }
292
293    #[rstest]
294    #[case(BoolArray::from_iter([Some(true), Some(true), Some(false), Some(false)].into_iter())
295    .into_array(), BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter())
296    .into_array())]
297    #[case(BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array(),
298        BoolArray::from_iter([Some(true), Some(true), Some(false), Some(false)].into_iter()).into_array())]
299    fn test_and(#[case] lhs: ArrayRef, #[case] rhs: ArrayRef) {
300        let r = and(&lhs, &rhs).unwrap().to_bool().unwrap().into_array();
301
302        let v0 = scalar_at(&r, 0).unwrap().as_bool().value();
303        let v1 = scalar_at(&r, 1).unwrap().as_bool().value();
304        let v2 = scalar_at(&r, 2).unwrap().as_bool().value();
305        let v3 = scalar_at(&r, 3).unwrap().as_bool().value();
306
307        assert!(v0.unwrap());
308        assert!(!v1.unwrap());
309        assert!(!v2.unwrap());
310        assert!(!v3.unwrap());
311    }
312}