vortex_array/arrays/listview/compute/
take.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::Zero;
5use vortex_dtype::{Nullability, match_each_integer_ptype};
6use vortex_error::VortexResult;
7use vortex_scalar::Scalar;
8
9use crate::arrays::{ListViewArray, ListViewRebuildMode, ListViewVTable};
10use crate::compute::{self, TakeKernel, TakeKernelAdapter};
11use crate::vtable::ValidityHelper;
12use crate::{Array, ArrayRef, IntoArray, register_kernel};
13
14/// The threshold for triggering a rebuild of the [`ListViewArray`].
15///
16/// By default, we will not touch the underlying `elements` array of the [`ListViewArray`] since it
17/// can be potentially expensive to reorganize the array based on what views we have into it.
18///
19/// However, we also do not want to carry around a large amount of garbage data. Below this
20/// threshold of the density of the selection mask, we will rebuild the [`ListViewArray`], removing
21/// any garbage data.
22#[allow(unused)]
23const REBUILD_DENSITY_THRESHOLD: f64 = 0.1;
24
25/// [`ListViewArray`] take implementation.
26///
27/// This implementation is deliberately simple and read-optimized. We just take the `offsets` and
28/// `sizes` at the requested indices and reuse the original `elements` array. This works because
29/// `ListView` (unlike `List`) allows non-contiguous and out-of-order lists.
30///
31/// We don't slice the `elements` array because it would require computing min/max offsets and
32/// adjusting all offsets accordingly, which is not really worth the small potential memory we would
33/// be able to get back.
34///
35/// The trade-off is that we may keep unreferenced elements in memory, but this is acceptable since
36/// we're optimizing for read performance and the data isn't being copied.
37impl TakeKernel for ListViewVTable {
38    fn take(&self, array: &ListViewArray, indices: &dyn Array) -> VortexResult<ArrayRef> {
39        let elements = array.elements();
40        let offsets = array.offsets();
41        let sizes = array.sizes();
42
43        // Compute the new validity by combining the array's validity with the indices' validity.
44        let new_validity = array.validity().take(indices)?;
45
46        // Take the offsets and sizes arrays at the requested indices.
47        // Take can reorder offsets, create gaps, and may introduce overlaps if the `indices`
48        // contain duplicates.
49        let nullable_new_offsets = compute::take(offsets.as_ref(), indices)?;
50        let nullable_new_sizes = compute::take(sizes.as_ref(), indices)?;
51
52        // Since `take` returns nullable arrays, we simply cast it back to non-nullable (filled with
53        // zeros to represent null lists).
54        let new_offsets = match_each_integer_ptype!(nullable_new_offsets.dtype().as_ptype(), |O| {
55            compute::fill_null(
56                &nullable_new_offsets,
57                &Scalar::primitive(O::zero(), Nullability::NonNullable),
58            )?
59        });
60        let new_sizes = match_each_integer_ptype!(nullable_new_sizes.dtype().as_ptype(), |S| {
61            compute::fill_null(
62                &nullable_new_sizes,
63                &Scalar::primitive(S::zero(), Nullability::NonNullable),
64            )?
65        });
66
67        // SAFETY: Take operation maintains all `ListViewArray` invariants:
68        // - `new_offsets` and `new_sizes` are derived from existing valid child arrays.
69        // - `new_offsets` and `new_sizes` are non-nullable.
70        // - `new_offsets` and `new_sizes` have the same length (both taken with the same
71        //   `indices`).
72        // - Validity correctly reflects the combination of array and indices validity.
73        let new_array = unsafe {
74            ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity)
75        };
76
77        // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter`
78        // compute functions have run, at the "top" of the operator tree. However, we cannot do this
79        // right now, so we will just rebuild every time (similar to `ListArray`).
80
81        Ok(new_array
82            .rebuild(ListViewRebuildMode::MakeZeroCopyToList)
83            .into_array())
84    }
85}
86
87register_kernel!(TakeKernelAdapter(ListViewVTable).lift());