vortex_runend/compute/
take.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::{AsPrimitive, NumCast};
5use vortex_array::arrays::PrimitiveArray;
6use vortex_array::compute::{TakeKernel, TakeKernelAdapter, take};
7use vortex_array::search_sorted::{SearchResult, SearchSorted, SearchSortedSide};
8use vortex_array::validity::Validity;
9use vortex_array::vtable::ValidityHelper;
10use vortex_array::{Array, ArrayRef, ToCanonical, register_kernel};
11use vortex_buffer::Buffer;
12use vortex_dtype::match_each_integer_ptype;
13use vortex_error::{VortexResult, vortex_bail};
14
15use crate::{RunEndArray, RunEndVTable};
16
17impl TakeKernel for RunEndVTable {
18    #[allow(clippy::cast_possible_truncation)]
19    fn take(&self, array: &RunEndArray, indices: &dyn Array) -> VortexResult<ArrayRef> {
20        let primitive_indices = indices.to_primitive();
21
22        let checked_indices = match_each_integer_ptype!(primitive_indices.ptype(), |P| {
23            primitive_indices
24                .as_slice::<P>()
25                .iter()
26                .copied()
27                .map(|idx| {
28                    let usize_idx = idx as usize;
29                    if usize_idx >= array.len() {
30                        vortex_bail!(OutOfBounds: usize_idx, 0, array.len());
31                    }
32                    Ok(usize_idx)
33                })
34                .collect::<VortexResult<Vec<_>>>()?
35        });
36
37        take_indices_unchecked(array, &checked_indices, primitive_indices.validity())
38    }
39}
40
41register_kernel!(TakeKernelAdapter(RunEndVTable).lift());
42
43/// Perform a take operation on a RunEndArray by binary searching for each of the indices.
44pub fn take_indices_unchecked<T: AsPrimitive<usize>>(
45    array: &RunEndArray,
46    indices: &[T],
47    validity: &Validity,
48) -> VortexResult<ArrayRef> {
49    let ends = array.ends().to_primitive();
50    let ends_len = ends.len();
51
52    // TODO(joe): use the validity mask to skip search sorted.
53    let physical_indices = match_each_integer_ptype!(ends.ptype(), |I| {
54        let end_slices = ends.as_slice::<I>();
55        let buffer = Buffer::from_trusted_len_iter(
56            indices
57                .iter()
58                .map(|idx| idx.as_() + array.offset())
59                .map(|idx| {
60                    match <I as NumCast>::from(idx) {
61                        Some(idx) => end_slices.search_sorted(&idx, SearchSortedSide::Right),
62                        None => {
63                            // The idx is too large for I, therefore it's out of bounds.
64                            SearchResult::NotFound(ends_len)
65                        }
66                    }
67                })
68                .map(|result| result.to_ends_index(ends_len) as u64),
69        );
70
71        PrimitiveArray::new(buffer, validity.clone())
72    });
73
74    take(array.values(), physical_indices.as_ref())
75}
76
77#[cfg(test)]
78mod test {
79    use rstest::rstest;
80    use vortex_array::arrays::PrimitiveArray;
81    use vortex_array::compute::conformance::take::test_take_conformance;
82    use vortex_array::compute::take;
83    use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical};
84    use vortex_dtype::{DType, Nullability, PType};
85    use vortex_scalar::{Scalar, ScalarValue};
86
87    use crate::RunEndArray;
88
89    fn ree_array() -> RunEndArray {
90        RunEndArray::encode(
91            PrimitiveArray::from_iter([1, 1, 1, 4, 4, 4, 2, 2, 5, 5, 5, 5]).into_array(),
92        )
93        .unwrap()
94    }
95
96    #[test]
97    fn ree_take() {
98        let taken = take(
99            ree_array().as_ref(),
100            PrimitiveArray::from_iter([9, 8, 1, 3]).as_ref(),
101        )
102        .unwrap();
103        assert_eq!(taken.to_primitive().as_slice::<i32>(), &[5, 5, 1, 4]);
104    }
105
106    #[test]
107    fn ree_take_end() {
108        let taken = take(
109            ree_array().as_ref(),
110            PrimitiveArray::from_iter([11]).as_ref(),
111        )
112        .unwrap();
113        assert_eq!(taken.to_primitive().as_slice::<i32>(), &[5]);
114    }
115
116    #[test]
117    #[should_panic]
118    fn ree_take_out_of_bounds() {
119        take(
120            ree_array().as_ref(),
121            PrimitiveArray::from_iter([12]).as_ref(),
122        )
123        .unwrap();
124    }
125
126    #[test]
127    fn sliced_take() {
128        let sliced = ree_array().slice(4..9);
129        let taken = take(
130            sliced.as_ref(),
131            PrimitiveArray::from_iter([1, 3, 4]).as_ref(),
132        )
133        .unwrap();
134
135        assert_eq!(taken.len(), 3);
136        assert_eq!(taken.scalar_at(0), 4.into());
137        assert_eq!(taken.scalar_at(1), 2.into());
138        assert_eq!(taken.scalar_at(2), 5.into());
139    }
140
141    #[test]
142    fn ree_take_nullable() {
143        let taken = take(
144            ree_array().as_ref(),
145            PrimitiveArray::from_option_iter([Some(1), None]).as_ref(),
146        )
147        .unwrap();
148
149        assert_eq!(
150            taken.scalar_at(0),
151            Scalar::new(
152                DType::Primitive(PType::I32, Nullability::Nullable),
153                ScalarValue::from(1i32)
154            )
155        );
156        assert_eq!(
157            taken.scalar_at(1),
158            Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable))
159        );
160    }
161
162    #[rstest]
163    #[case(ree_array())]
164    #[case(RunEndArray::encode(
165        PrimitiveArray::from_iter([1u8, 1, 2, 2, 2, 3, 3, 3, 3, 4]).into_array(),
166    ).unwrap())]
167    #[case(RunEndArray::encode(
168        PrimitiveArray::from_option_iter([
169            Some(10),
170            Some(10),
171            None,
172            None,
173            Some(20),
174            Some(20),
175            Some(20),
176        ])
177        .into_array(),
178    ).unwrap())]
179    #[case(RunEndArray::encode(PrimitiveArray::from_iter([42i32, 42, 42, 42, 42]).into_array())
180        .unwrap())]
181    #[case(RunEndArray::encode(
182        PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10]).into_array(),
183    ).unwrap())]
184    #[case({
185        let mut values = Vec::new();
186        for i in 0..20 {
187            for _ in 0..=i {
188                values.push(i);
189            }
190        }
191        RunEndArray::encode(PrimitiveArray::from_iter(values).into_array()).unwrap()
192    })]
193    fn test_take_runend_conformance(#[case] array: RunEndArray) {
194        test_take_conformance(array.as_ref());
195    }
196
197    #[rstest]
198    #[case(ree_array().slice(3..6))]
199    #[case({
200        let array = RunEndArray::encode(
201            PrimitiveArray::from_iter([1i32, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]).into_array(),
202        )
203        .unwrap();
204        array.slice(2..8)
205    })]
206    fn test_take_sliced_runend_conformance(#[case] sliced: ArrayRef) {
207        test_take_conformance(sliced.as_ref());
208    }
209}