vortex_array/compute/
take.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::LazyLock;
5
6use arcref::ArcRef;
7use vortex_dtype::DType;
8use vortex_error::{VortexError, VortexResult, vortex_bail, vortex_err};
9use vortex_scalar::Scalar;
10
11use crate::arrays::ConstantArray;
12use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Output};
13use crate::stats::{Precision, Stat, StatsProvider, StatsProviderExt, StatsSet};
14use crate::vtable::VTable;
15use crate::{Array, ArrayRef, Canonical, IntoArray};
16
17static TAKE_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
18    let compute = ComputeFn::new("take".into(), ArcRef::new_ref(&Take));
19    for kernel in inventory::iter::<TakeKernelRef> {
20        compute.register_kernel(kernel.0.clone());
21    }
22    compute
23});
24
25pub(crate) fn warm_up_vtable() -> usize {
26    TAKE_FN.kernels().len() + TAKE_FROM_FN.kernels().len()
27}
28
29/// Creates a new array using the elements from the input `array` indexed by `indices`.
30///
31/// For example, if we have an `array` `[1, 2, 3, 4, 5]` and `indices` `[4, 2]`, the resulting
32/// array would be `[5, 3]`.
33///
34/// The output array will have the same length as the `indices` array.
35pub fn take(array: &dyn Array, indices: &dyn Array) -> VortexResult<ArrayRef> {
36    if indices.is_empty() {
37        return Ok(Canonical::empty(
38            &array
39                .dtype()
40                .union_nullability(indices.dtype().nullability()),
41        )
42        .into_array());
43    }
44
45    TAKE_FN
46        .invoke(&InvocationArgs {
47            inputs: &[array.into(), indices.into()],
48            options: &(),
49        })?
50        .unwrap_array()
51}
52
53#[doc(hidden)]
54pub struct Take;
55
56impl ComputeFnVTable for Take {
57    fn invoke(
58        &self,
59        args: &InvocationArgs,
60        kernels: &[ArcRef<dyn Kernel>],
61    ) -> VortexResult<Output> {
62        let TakeArgs { array, indices } = TakeArgs::try_from(args)?;
63
64        // TODO(ngates): if indices are sorted and unique (strict-sorted), then we should delegate to
65        //  the filter function since they're typically optimised for this case.
66        // TODO(ngates): if indices min is quite high, we could slice self and offset the indices
67        //  such that canonicalize does less work.
68
69        if indices.all_invalid() {
70            return Ok(ConstantArray::new(
71                Scalar::null(array.dtype().as_nullable()),
72                indices.len(),
73            )
74            .into_array()
75            .into());
76        }
77
78        let taken_array = take_impl(array, indices, kernels)?;
79
80        // We know that constant array don't need stats propagation, so we can avoid the overhead of
81        // computing derived stats and merging them in.
82        if !taken_array.is_constant() {
83            propagate_take_stats(array, &taken_array)?;
84        }
85
86        Ok(taken_array.into())
87    }
88
89    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
90        let TakeArgs { array, indices } = TakeArgs::try_from(args)?;
91
92        if !indices.dtype().is_int() {
93            vortex_bail!(
94                "Take indices must be an integer type, got {}",
95                indices.dtype()
96            );
97        }
98
99        Ok(array
100            .dtype()
101            .union_nullability(indices.dtype().nullability()))
102    }
103
104    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
105        let TakeArgs { indices, .. } = TakeArgs::try_from(args)?;
106        Ok(indices.len())
107    }
108
109    fn is_elementwise(&self) -> bool {
110        false
111    }
112}
113
114fn propagate_take_stats(source: &dyn Array, target: &dyn Array) -> VortexResult<()> {
115    target.statistics().with_mut_typed_stats_set(|mut st| {
116        let is_constant = source.statistics().get_as::<bool>(Stat::IsConstant);
117        if is_constant == Some(Precision::Exact(true)) {
118            // Any combination of elements from a constant array is still const
119            st.set(Stat::IsConstant, Precision::exact(true));
120        }
121        let inexact_min_max = [Stat::Min, Stat::Max]
122            .into_iter()
123            .filter_map(|stat| {
124                source
125                    .statistics()
126                    .get(stat)
127                    .map(|v| (stat, v.map(|s| s.into_value()).into_inexact()))
128            })
129            .collect::<Vec<_>>();
130        st.combine_sets(
131            &(unsafe { StatsSet::new_unchecked(inexact_min_max) }).as_typed_ref(source.dtype()),
132        )
133    })
134}
135
136fn take_impl(
137    array: &dyn Array,
138    indices: &dyn Array,
139    kernels: &[ArcRef<dyn Kernel>],
140) -> VortexResult<ArrayRef> {
141    let args = InvocationArgs {
142        inputs: &[array.into(), indices.into()],
143        options: &(),
144    };
145
146    // First look for a TakeFrom specialized on the indices.
147    for kernel in TAKE_FROM_FN.kernels() {
148        if let Some(output) = kernel.invoke(&args)? {
149            return output.unwrap_array();
150        }
151    }
152    if let Some(output) = indices.invoke(&TAKE_FROM_FN, &args)? {
153        return output.unwrap_array();
154    }
155
156    // Then look for a Take kernel
157    for kernel in kernels {
158        if let Some(output) = kernel.invoke(&args)? {
159            return output.unwrap_array();
160        }
161    }
162    if let Some(output) = array.invoke(&TAKE_FN, &args)? {
163        return output.unwrap_array();
164    }
165
166    // Otherwise, canonicalize and try again.
167    if !array.is_canonical() {
168        log::debug!("No take implementation found for {}", array.encoding_id());
169        let canonical = array.to_canonical();
170        return take(canonical.as_ref(), indices);
171    }
172
173    vortex_bail!("No take implementation found for {}", array.encoding_id());
174}
175
176struct TakeArgs<'a> {
177    array: &'a dyn Array,
178    indices: &'a dyn Array,
179}
180
181impl<'a> TryFrom<&InvocationArgs<'a>> for TakeArgs<'a> {
182    type Error = VortexError;
183
184    fn try_from(value: &InvocationArgs<'a>) -> Result<Self, Self::Error> {
185        if value.inputs.len() != 2 {
186            vortex_bail!("Expected 2 inputs, found {}", value.inputs.len());
187        }
188        let array = value.inputs[0]
189            .array()
190            .ok_or_else(|| vortex_err!("Expected first input to be an array"))?;
191        let indices = value.inputs[1]
192            .array()
193            .ok_or_else(|| vortex_err!("Expected second input to be an array"))?;
194        Ok(Self { array, indices })
195    }
196}
197
198pub trait TakeKernel: VTable {
199    /// Create a new array by taking the values from the `array` at the
200    /// given `indices`.
201    ///
202    /// # Panics
203    ///
204    /// Using `indices` that are invalid for the given `array` will cause a panic.
205    fn take(&self, array: &Self::Array, indices: &dyn Array) -> VortexResult<ArrayRef>;
206}
207
208/// A kernel that implements the filter function.
209pub struct TakeKernelRef(pub ArcRef<dyn Kernel>);
210inventory::collect!(TakeKernelRef);
211
212#[derive(Debug)]
213pub struct TakeKernelAdapter<V: VTable>(pub V);
214
215impl<V: VTable + TakeKernel> TakeKernelAdapter<V> {
216    pub const fn lift(&'static self) -> TakeKernelRef {
217        TakeKernelRef(ArcRef::new_ref(self))
218    }
219}
220
221impl<V: VTable + TakeKernel> Kernel for TakeKernelAdapter<V> {
222    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
223        let inputs = TakeArgs::try_from(args)?;
224        let Some(array) = inputs.array.as_opt::<V>() else {
225            return Ok(None);
226        };
227        Ok(Some(V::take(&self.0, array, inputs.indices)?.into()))
228    }
229}
230
231static TAKE_FROM_FN: LazyLock<ComputeFn> = LazyLock::new(|| {
232    let compute = ComputeFn::new("take_from".into(), ArcRef::new_ref(&TakeFrom));
233    for kernel in inventory::iter::<TakeFromKernelRef> {
234        compute.register_kernel(kernel.0.clone());
235    }
236    compute
237});
238
239pub struct TakeFrom;
240
241impl ComputeFnVTable for TakeFrom {
242    fn invoke(
243        &self,
244        _args: &InvocationArgs,
245        _kernels: &[ArcRef<dyn Kernel>],
246    ) -> VortexResult<Output> {
247        vortex_bail!(
248            "TakeFrom should not be invoked directly. Its kernels are used to accelerated the Take function"
249        )
250    }
251
252    fn return_dtype(&self, args: &InvocationArgs) -> VortexResult<DType> {
253        Take.return_dtype(args)
254    }
255
256    fn return_len(&self, args: &InvocationArgs) -> VortexResult<usize> {
257        Take.return_len(args)
258    }
259
260    fn is_elementwise(&self) -> bool {
261        Take.is_elementwise()
262    }
263}
264
265pub trait TakeFromKernel: VTable {
266    /// Create a new array by taking the values from the `array` at the
267    /// given `indices`.
268    fn take_from(&self, indices: &Self::Array, array: &dyn Array)
269    -> VortexResult<Option<ArrayRef>>;
270}
271
272pub struct TakeFromKernelRef(pub ArcRef<dyn Kernel>);
273inventory::collect!(TakeFromKernelRef);
274
275#[derive(Debug)]
276pub struct TakeFromKernelAdapter<V: VTable>(pub V);
277
278impl<V: VTable + TakeFromKernel> TakeFromKernelAdapter<V> {
279    pub const fn lift(&'static self) -> TakeFromKernelRef {
280        TakeFromKernelRef(ArcRef::new_ref(self))
281    }
282}
283
284impl<V: VTable + TakeFromKernel> Kernel for TakeFromKernelAdapter<V> {
285    fn invoke(&self, args: &InvocationArgs) -> VortexResult<Option<Output>> {
286        let inputs = TakeArgs::try_from(args)?;
287        let Some(indices) = inputs.indices.as_opt::<V>() else {
288            return Ok(None);
289        };
290        Ok(V::take_from(&self.0, indices, inputs.array)?.map(Output::from))
291    }
292}