vortex_array/arrays/bool/compute/
take.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use arrow_buffer::BooleanBuffer;
5use itertools::Itertools as _;
6use num_traits::AsPrimitive;
7use vortex_dtype::match_each_integer_ptype;
8use vortex_error::VortexResult;
9use vortex_mask::Mask;
10use vortex_scalar::Scalar;
11
12use crate::arrays::{BoolArray, BoolVTable, ConstantArray};
13use crate::compute::{TakeKernel, TakeKernelAdapter, fill_null};
14use crate::vtable::ValidityHelper;
15use crate::{Array, ArrayRef, IntoArray, ToCanonical, register_kernel};
16
17impl TakeKernel for BoolVTable {
18    fn take(&self, array: &BoolArray, indices: &dyn Array) -> VortexResult<ArrayRef> {
19        let indices_nulls_zeroed = match indices.validity_mask()? {
20            Mask::AllTrue(_) => indices.to_array(),
21            Mask::AllFalse(_) => {
22                return Ok(ConstantArray::new(
23                    Scalar::null(array.dtype().as_nullable()),
24                    indices.len(),
25                )
26                .into_array());
27            }
28            Mask::Values(_) => fill_null(indices, &Scalar::from(0).cast(indices.dtype())?)?,
29        };
30        let indices_nulls_zeroed = indices_nulls_zeroed.to_primitive()?;
31        let buffer = match_each_integer_ptype!(indices_nulls_zeroed.ptype(), |I| {
32            take_valid_indices(array.boolean_buffer(), indices_nulls_zeroed.as_slice::<I>())
33        });
34
35        Ok(BoolArray::new(buffer, array.validity().take(indices)?).to_array())
36    }
37}
38
39register_kernel!(TakeKernelAdapter(BoolVTable).lift());
40
41fn take_valid_indices<I: AsPrimitive<usize>>(
42    bools: &BooleanBuffer,
43    indices: &[I],
44) -> BooleanBuffer {
45    // For boolean arrays that roughly fit into a single page (at least, on Linux), it's worth
46    // the overhead to convert to a Vec<bool>.
47    if bools.len() <= 4096 {
48        let bools = bools.into_iter().collect_vec();
49        take_byte_bool(bools, indices)
50    } else {
51        take_bool(bools, indices)
52    }
53}
54
55fn take_byte_bool<I: AsPrimitive<usize>>(bools: Vec<bool>, indices: &[I]) -> BooleanBuffer {
56    BooleanBuffer::collect_bool(indices.len(), |idx| {
57        bools[unsafe { indices.get_unchecked(idx).as_() }]
58    })
59}
60
61fn take_bool<I: AsPrimitive<usize>>(bools: &BooleanBuffer, indices: &[I]) -> BooleanBuffer {
62    BooleanBuffer::collect_bool(indices.len(), |idx| {
63        // We can always take from the indices unchecked since collect_bool just iterates len.
64        bools.value(unsafe { indices.get_unchecked(idx).as_() })
65    })
66}
67
68#[cfg(test)]
69mod test {
70    use vortex_buffer::buffer;
71    use vortex_dtype::{DType, Nullability};
72    use vortex_scalar::Scalar;
73
74    use crate::arrays::BoolArray;
75    use crate::arrays::primitive::PrimitiveArray;
76    use crate::compute::take;
77    use crate::validity::Validity;
78    use crate::{Array, ToCanonical};
79
80    #[test]
81    fn take_nullable() {
82        let reference = BoolArray::from_iter(vec![
83            Some(false),
84            Some(true),
85            Some(false),
86            None,
87            Some(false),
88        ]);
89
90        let b = take(
91            reference.as_ref(),
92            PrimitiveArray::from_iter([0, 3, 4]).as_ref(),
93        )
94        .unwrap()
95        .to_bool()
96        .unwrap();
97        assert_eq!(
98            b.boolean_buffer(),
99            BoolArray::from_iter([Some(false), None, Some(false)]).boolean_buffer()
100        );
101
102        let nullable_bool_dtype = DType::Bool(Nullability::Nullable);
103        let all_invalid_indices = PrimitiveArray::from_option_iter([None::<u32>, None, None]);
104        let b = take(reference.as_ref(), all_invalid_indices.as_ref()).unwrap();
105        assert_eq!(b.dtype(), &nullable_bool_dtype);
106        assert_eq!(
107            b.scalar_at(0).unwrap(),
108            Scalar::null(nullable_bool_dtype.clone())
109        );
110        assert_eq!(
111            b.scalar_at(1).unwrap(),
112            Scalar::null(nullable_bool_dtype.clone())
113        );
114        assert_eq!(b.scalar_at(2).unwrap(), Scalar::null(nullable_bool_dtype));
115    }
116
117    #[test]
118    fn test_bool_array_take_with_null_out_of_bounds_indices() {
119        let values = BoolArray::from_iter(vec![Some(false), Some(true), None, None, Some(false)]);
120        let indices = PrimitiveArray::new(
121            buffer![0, 3, 100],
122            Validity::Array(BoolArray::from_iter([true, true, false]).to_array()),
123        );
124        let actual = take(values.as_ref(), indices.as_ref()).unwrap();
125        assert_eq!(actual.scalar_at(0).unwrap(), Scalar::from(Some(false)));
126        // position 3 is null
127        assert_eq!(actual.scalar_at(1).unwrap(), Scalar::null_typed::<bool>());
128        // the third index is null
129        assert_eq!(actual.scalar_at(2).unwrap(), Scalar::null_typed::<bool>());
130    }
131
132    #[test]
133    fn test_non_null_bool_array_take_with_null_out_of_bounds_indices() {
134        let values = BoolArray::from_iter(vec![false, true, false, true, false]);
135        let indices = PrimitiveArray::new(
136            buffer![0, 3, 100],
137            Validity::Array(BoolArray::from_iter([true, true, false]).to_array()),
138        );
139        let actual = take(values.as_ref(), indices.as_ref()).unwrap();
140        assert_eq!(actual.scalar_at(0).unwrap(), Scalar::from(Some(false)));
141        assert_eq!(actual.scalar_at(1).unwrap(), Scalar::from(Some(true)));
142        // the third index is null
143        assert_eq!(actual.scalar_at(2).unwrap(), Scalar::null_typed::<bool>());
144    }
145
146    #[test]
147    fn test_bool_array_take_all_null_indices() {
148        let values = BoolArray::from_iter(vec![Some(false), Some(true), None, None, Some(false)]);
149        let indices = PrimitiveArray::new(
150            buffer![0, 3, 100],
151            Validity::Array(BoolArray::from_iter([false, false, false]).to_array()),
152        );
153        let actual = take(values.as_ref(), indices.as_ref()).unwrap();
154        assert_eq!(actual.scalar_at(0).unwrap(), Scalar::null_typed::<bool>());
155        assert_eq!(actual.scalar_at(1).unwrap(), Scalar::null_typed::<bool>());
156        assert_eq!(actual.scalar_at(2).unwrap(), Scalar::null_typed::<bool>());
157    }
158
159    #[test]
160    fn test_non_null_bool_array_take_all_null_indices() {
161        let values = BoolArray::from_iter(vec![false, true, false, true, false]);
162        let indices = PrimitiveArray::new(
163            buffer![0, 3, 100],
164            Validity::Array(BoolArray::from_iter([false, false, false]).to_array()),
165        );
166        let actual = take(values.as_ref(), indices.as_ref()).unwrap();
167        assert_eq!(actual.scalar_at(0).unwrap(), Scalar::null_typed::<bool>());
168        assert_eq!(actual.scalar_at(1).unwrap(), Scalar::null_typed::<bool>());
169        assert_eq!(actual.scalar_at(2).unwrap(), Scalar::null_typed::<bool>());
170    }
171}