vortex_array/compute/
take.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
use log::info;
use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult};

use crate::encoding::Encoding;
use crate::stats::{ArrayStatistics, Stat};
use crate::{ArrayDType, ArrayData, IntoArrayData, IntoCanonical};

#[derive(Default, Debug, Clone, Copy)]
pub struct TakeOptions {
    pub skip_bounds_check: bool,
}

pub trait TakeFn<Array> {
    fn take(
        &self,
        array: &Array,
        indices: &ArrayData,
        options: TakeOptions,
    ) -> VortexResult<ArrayData>;
}

impl<E: Encoding> TakeFn<ArrayData> for E
where
    E: TakeFn<E::Array>,
    for<'a> &'a E::Array: TryFrom<&'a ArrayData, Error = VortexError>,
{
    fn take(
        &self,
        array: &ArrayData,
        indices: &ArrayData,
        options: TakeOptions,
    ) -> VortexResult<ArrayData> {
        let array_ref = <&E::Array>::try_from(array)?;
        let encoding = array
            .encoding()
            .as_any()
            .downcast_ref::<E>()
            .ok_or_else(|| vortex_err!("Mismatched encoding"))?;
        TakeFn::take(encoding, array_ref, indices, options)
    }
}

pub fn take(
    array: impl AsRef<ArrayData>,
    indices: impl AsRef<ArrayData>,
    mut options: TakeOptions,
) -> VortexResult<ArrayData> {
    let array = array.as_ref();
    let indices = indices.as_ref();

    if !indices.dtype().is_int() || indices.dtype().is_nullable() {
        vortex_bail!(
            "Take indices must be a non-nullable integer type, got {}",
            indices.dtype()
        );
    }

    // TODO(ngates): if indices are sorted and unique (strict-sorted), then we should delegate to
    //  the filter function since they're typically optimised for this case.

    // If the indices are all within bounds, we can skip bounds checking.
    if indices
        .statistics()
        .get_as::<usize>(Stat::Max)
        .is_some_and(|max| max < array.len())
    {
        options.skip_bounds_check = true;
    }

    // TODO(ngates): if indices min is quite high, we could slice self and offset the indices
    //  such that canonicalize does less work.

    if let Some(take_fn) = array.encoding().take_fn() {
        return take_fn.take(array, indices, options);
    }

    // Otherwise, flatten and try again.
    info!("TakeFn not implemented for {}, flattening", array);
    let canonical = array.clone().into_canonical()?.into_array();
    canonical
        .encoding()
        .take_fn()
        .ok_or_else(|| vortex_err!(NotImplemented: "take", canonical.encoding().id()))?
        .take(&canonical, indices, options)
}