vortex_array/compute/
boolean.rs

1use std::any::Any;
2use std::sync::LazyLock;
3
4use arcref::ArcRef;
5use arrow_array::cast::AsArray;
6use arrow_schema::DataType;
7use vortex_dtype::DType;
8use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
9
10use crate::arrow::{FromArrowArray, IntoArrowArray};
11use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
12use crate::vtable::VTable;
13use crate::{Array, ArrayRef};
14
15/// Point-wise logical _and_ between two Boolean arrays.
16///
17/// This method uses Arrow-style null propagation rather than the Kleene logic semantics.
18pub fn and(lhs: &dyn Array, rhs: &dyn Array) -> VortexResult<ArrayRef> {
19    boolean(lhs, rhs, BooleanOperator::And)
20}
21
22/// Point-wise Kleene logical _and_ between two Boolean arrays.
23pub fn and_kleene(lhs: &dyn Array, rhs: &dyn Array) -> VortexResult<ArrayRef> {
24    boolean(lhs, rhs, BooleanOperator::AndKleene)
25}
26
27/// Point-wise logical _or_ between two Boolean arrays.
28///
29/// This method uses Arrow-style null propagation rather than the Kleene logic semantics.
30pub fn or(lhs: &dyn Array, rhs: &dyn Array) -> VortexResult<ArrayRef> {
31    boolean(lhs, rhs, BooleanOperator::Or)
32}
33
34/// Point-wise Kleene logical _or_ between two Boolean arrays.
35pub fn or_kleene(lhs: &dyn Array, rhs: &dyn Array) -> VortexResult<ArrayRef> {
36    boolean(lhs, rhs, BooleanOperator::OrKleene)
37}
38
39/// Point-wise logical operator between two Boolean arrays.
40///
41/// This method uses Arrow-style null propagation rather than the Kleene logic semantics.
42pub fn boolean(lhs: &dyn Array, rhs: &dyn Array, op: BooleanOperator) -> VortexResult<ArrayRef> {
43    BOOLEAN_FN
44        .invoke(&InvocationArgs {
45            inputs: &[lhs.into(), rhs.into()],
46            options: &op,
47        })?
48        .unwrap_array()
49}
50
51pub struct BooleanKernelRef(ArcRef<dyn Kernel>);
52inventory::collect!(BooleanKernelRef);
53
54pub trait BooleanKernel: VTable {
55    fn boolean(
56        &self,
57        array: &Self::Array,
58        other: &dyn Array,
59        op: BooleanOperator,
60    ) -> VortexResult<Option<ArrayRef>>;
61}
62
63#[derive(Debug)]
64pub struct BooleanKernelAdapter<V: VTable>(pub V);
65
66impl<V: VTable + BooleanKernel> BooleanKernelAdapter<V> {
67    pub const fn lift(&'static self) -> BooleanKernelRef {
68        BooleanKernelRef(ArcRef::new_ref(self))
69    }
70}
71
72impl<V: VTable + BooleanKernel> Kernel for BooleanKernelAdapter<V> {
73    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
74        let inputs = BooleanArgs::try_from(args)?;
75        let Some(array) = inputs.lhs.as_opt::<V>() else {
76            return Ok(None);
77        };
78        Ok(V::boolean(&self.0, array, inputs.rhs, inputs.operator)?.map(|array| array.into()))
79    }
80}
81
82pub static BOOLEAN_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
83    let compute = ComputeFn::new("boolean".into(), ArcRef::new_ref(&Boolean));
84    for kernel in inventory::iter::<BooleanKernelRef> {
85        compute.register_kernel(kernel.0.clone());
86    }
87    compute
88});
89
90struct Boolean;
91
92impl ComputeFnVTable for Boolean {
93    fn invoke(
94        &self,
95        args: &InvocationArgs,
96        kernels: &[ArcRef<dyn Kernel>],
97    ) -> VortexResult<Output> {
98        let BooleanArgs { lhs, rhs, operator } = BooleanArgs::try_from(args)?;
99
100        let rhs_is_constant = rhs.is_constant();
101
102        // If LHS is constant, then we make sure it's on the RHS.
103        if lhs.is_constant() && !rhs_is_constant {
104            return Ok(boolean(rhs, lhs, operator)?.into());
105        }
106
107        // If the RHS is constant and the LHS is Arrow, we can't do any better than arrow_compare.
108        if lhs.is_arrow() && (rhs.is_arrow() || rhs_is_constant) {
109            return Ok(arrow_boolean(lhs.to_array(), rhs.to_array(), operator)?.into());
110        }
111
112        // Check if either LHS or RHS supports the operation directly.
113        for kernel in kernels {
114            if let Some(output) = kernel.invoke(args)? {
115                return Ok(output);
116            }
117        }
118        if let Some(output) = lhs.invoke(&BOOLEAN_FN, args)? {
119            return Ok(output);
120        }
121
122        let inverse_args = InvocationArgs {
123            inputs: &[rhs.into(), lhs.into()],
124            options: &operator,
125        };
126        for kernel in kernels {
127            if let Some(output) = kernel.invoke(&inverse_args)? {
128                return Ok(output);
129            }
130        }
131        if let Some(output) = rhs.invoke(&BOOLEAN_FN, &inverse_args)? {
132            return Ok(output);
133        }
134
135        log::debug!(
136            "No boolean implementation found for LHS {}, RHS {}, and operator {:?} (or inverse)",
137            rhs.encoding_id(),
138            lhs.encoding_id(),
139            operator,
140        );
141
142        // If neither side implements the trait, then we delegate to Arrow compute.
143        Ok(arrow_boolean(lhs.to_array(), rhs.to_array(), operator)?.into())
144    }
145
146    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
147        let BooleanArgs { lhs, rhs, .. } = BooleanArgs::try_from(args)?;
148
149        if !lhs.dtype().is_boolean()
150            || !rhs.dtype().is_boolean()
151            || !lhs.dtype().eq_ignore_nullability(rhs.dtype())
152        {
153            vortex_bail!(
154                "Boolean operations are only supported on boolean arrays: {} and {}",
155                lhs.dtype(),
156                rhs.dtype()
157            )
158        }
159
160        Ok(DType::Bool(
161            (lhs.dtype().is_nullable() || rhs.dtype().is_nullable()).into(),
162        ))
163    }
164
165    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
166        let BooleanArgs { lhs, rhs, .. } = BooleanArgs::try_from(args)?;
167
168        if lhs.len() != rhs.len() {
169            vortex_bail!(
170                "Boolean operations aren't supported on arrays of different lengths: {} and {}",
171                lhs.len(),
172                rhs.len()
173            )
174        }
175
176        Ok(lhs.len())
177    }
178
179    fn is_elementwise(&self) -> bool {
180        true
181    }
182}
183
184#[derive(Debug, Clone, Copy, PartialEq, Eq)]
185pub enum BooleanOperator {
186    And,
187    AndKleene,
188    Or,
189    OrKleene,
190    // AndNot,
191    // AndNotKleene,
192    // Xor,
193}
194
195impl Options for BooleanOperator {
196    fn as_any(&self) -> &dyn Any {
197        self
198    }
199}
200
201struct BooleanArgs<'a> {
202    lhs: &'a dyn Array,
203    rhs: &'a dyn Array,
204    operator: BooleanOperator,
205}
206
207impl<'a> TryFrom<&InvocationArgs<'a>> for BooleanArgs<'a> {
208    type Error = VortexError;
209
210    fn try_from(value: &InvocationArgs<'a>) -> VortexResult<Self> {
211        if value.inputs.len() != 2 {
212            vortex_bail!("Expected 2 inputs, found {}", value.inputs.len());
213        }
214        let lhs = value.inputs[0]
215            .array()
216            .ok_or_else(|| vortex_err!("Expected input 0 to be an array"))?;
217        let rhs = value.inputs[1]
218            .array()
219            .ok_or_else(|| vortex_err!("Expected input 1 to be an array"))?;
220        let operator = value
221            .options
222            .as_any()
223            .downcast_ref::<BooleanOperator>()
224            .vortex_expect("Expected options to be an operator");
225
226        Ok(BooleanArgs {
227            lhs,
228            rhs,
229            operator: *operator,
230        })
231    }
232}
233
234/// Implementation of `BinaryBooleanFn` using the Arrow crate.
235///
236/// Note that other encodings should handle a constant RHS value, so we can assume here that
237/// the RHS is not constant and expand to a full array.
238pub(crate) fn arrow_boolean(
239    lhs: ArrayRef,
240    rhs: ArrayRef,
241    operator: BooleanOperator,
242) -> VortexResult<ArrayRef> {
243    let nullable = lhs.dtype().is_nullable() || rhs.dtype().is_nullable();
244
245    let lhs = lhs.into_arrow(&DataType::Boolean)?.as_boolean().clone();
246    let rhs = rhs.into_arrow(&DataType::Boolean)?.as_boolean().clone();
247
248    let array = match operator {
249        BooleanOperator::And => arrow_arith::boolean::and(&lhs, &rhs)?,
250        BooleanOperator::AndKleene => arrow_arith::boolean::and_kleene(&lhs, &rhs)?,
251        BooleanOperator::Or => arrow_arith::boolean::or(&lhs, &rhs)?,
252        BooleanOperator::OrKleene => arrow_arith::boolean::or_kleene(&lhs, &rhs)?,
253    };
254
255    Ok(ArrayRef::from_arrow(&array, nullable))
256}
257
258#[cfg(test)]
259mod tests {
260    use rstest::rstest;
261
262    use super::*;
263    use crate::IntoArray;
264    use crate::arrays::BoolArray;
265    use crate::canonical::ToCanonical;
266    #[rstest]
267    #[case(BoolArray::from_iter([Some(true), Some(true), Some(false), Some(false)].into_iter())
268    .into_array(), BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter())
269    .into_array())]
270    #[case(BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array(),
271        BoolArray::from_iter([Some(true), Some(true), Some(false), Some(false)].into_iter()).into_array())]
272    fn test_or(#[case] lhs: ArrayRef, #[case] rhs: ArrayRef) {
273        let r = or(&lhs, &rhs).unwrap();
274
275        let r = r.to_bool().unwrap().into_array();
276
277        let v0 = r.scalar_at(0).unwrap().as_bool().value();
278        let v1 = r.scalar_at(1).unwrap().as_bool().value();
279        let v2 = r.scalar_at(2).unwrap().as_bool().value();
280        let v3 = r.scalar_at(3).unwrap().as_bool().value();
281
282        assert!(v0.unwrap());
283        assert!(v1.unwrap());
284        assert!(v2.unwrap());
285        assert!(!v3.unwrap());
286    }
287
288    #[rstest]
289    #[case(BoolArray::from_iter([Some(true), Some(true), Some(false), Some(false)].into_iter())
290    .into_array(), BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter())
291    .into_array())]
292    #[case(BoolArray::from_iter([Some(true), Some(false), Some(true), Some(false)].into_iter()).into_array(),
293        BoolArray::from_iter([Some(true), Some(true), Some(false), Some(false)].into_iter()).into_array())]
294    fn test_and(#[case] lhs: ArrayRef, #[case] rhs: ArrayRef) {
295        let r = and(&lhs, &rhs).unwrap().to_bool().unwrap().into_array();
296
297        let v0 = r.scalar_at(0).unwrap().as_bool().value();
298        let v1 = r.scalar_at(1).unwrap().as_bool().value();
299        let v2 = r.scalar_at(2).unwrap().as_bool().value();
300        let v3 = r.scalar_at(3).unwrap().as_bool().value();
301
302        assert!(v0.unwrap());
303        assert!(!v1.unwrap());
304        assert!(!v2.unwrap());
305        assert!(!v3.unwrap());
306    }
307}