Skip to main content

vortex_array/arrays/listview/compute/
take.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::Zero;
5use vortex_dtype::Nullability;
6use vortex_dtype::match_each_integer_ptype;
7use vortex_error::VortexResult;
8
9use crate::Array;
10use crate::ArrayRef;
11use crate::IntoArray;
12use crate::arrays::ListViewArray;
13use crate::arrays::ListViewRebuildMode;
14use crate::arrays::ListViewVTable;
15use crate::arrays::TakeExecute;
16use crate::builtins::ArrayBuiltins;
17use crate::executor::ExecutionCtx;
18use crate::scalar::Scalar;
19use crate::vtable::ValidityHelper;
20
21// TODO(connor)[ListView]: Make use of this threshold after we start migrating operators.
22/// The threshold for triggering a rebuild of the [`ListViewArray`].
23///
24/// By default, we will not touch the underlying `elements` array of the [`ListViewArray`] since it
25/// can be potentially expensive to reorganize the array based on what views we have into it.
26///
27/// However, we also do not want to carry around a large amount of garbage data. Below this
28/// threshold of the density of the selection mask, we will rebuild the [`ListViewArray`], removing
29/// any garbage data.
30#[allow(unused)]
31const REBUILD_DENSITY_THRESHOLD: f64 = 0.1;
32
33/// [`ListViewArray`] take implementation.
34///
35/// This implementation is deliberately simple and read-optimized. We just take the `offsets` and
36/// `sizes` at the requested indices and reuse the original `elements` array. This works because
37/// `ListView` (unlike `List`) allows non-contiguous and out-of-order lists.
38///
39/// We don't slice the `elements` array because it would require computing min/max offsets and
40/// adjusting all offsets accordingly, which is not really worth the small potential memory we would
41/// be able to get back.
42///
43/// The trade-off is that we may keep unreferenced elements in memory, but this is acceptable since
44/// we're optimizing for read performance and the data isn't being copied.
45impl TakeExecute for ListViewVTable {
46    fn take(
47        array: &ListViewArray,
48        indices: &dyn Array,
49        _ctx: &mut ExecutionCtx,
50    ) -> VortexResult<Option<ArrayRef>> {
51        let elements = array.elements();
52        let offsets = array.offsets();
53        let sizes = array.sizes();
54
55        // Compute the new validity by combining the array's validity with the indices' validity.
56        let new_validity = array.validity().take(indices)?;
57
58        // Take the offsets and sizes arrays at the requested indices.
59        // Take can reorder offsets, create gaps, and may introduce overlaps if the `indices`
60        // contain duplicates.
61        let nullable_new_offsets = offsets.take(indices.to_array())?;
62        let nullable_new_sizes = sizes.take(indices.to_array())?;
63
64        // Since `take` returns nullable arrays, we simply cast it back to non-nullable (filled with
65        // zeros to represent null lists).
66        let new_offsets = match_each_integer_ptype!(nullable_new_offsets.dtype().as_ptype(), |O| {
67            nullable_new_offsets
68                .fill_null(Scalar::primitive(O::zero(), Nullability::NonNullable))?
69        });
70        let new_sizes = match_each_integer_ptype!(nullable_new_sizes.dtype().as_ptype(), |S| {
71            nullable_new_sizes.fill_null(Scalar::primitive(S::zero(), Nullability::NonNullable))?
72        });
73        // SAFETY: Take operation maintains all `ListViewArray` invariants:
74        // - `new_offsets` and `new_sizes` are derived from existing valid child arrays.
75        // - `new_offsets` and `new_sizes` are non-nullable.
76        // - `new_offsets` and `new_sizes` have the same length (both taken with the same
77        //   `indices`).
78        // - Validity correctly reflects the combination of array and indices validity.
79        let new_array = unsafe {
80            ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity)
81        };
82
83        // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter`
84        // compute functions have run, at the "top" of the operator tree. However, we cannot do this
85        // right now, so we will just rebuild every time (similar to `ListArray`).
86
87        Ok(Some(
88            new_array
89                .rebuild(ListViewRebuildMode::MakeZeroCopyToList)?
90                .into_array(),
91        ))
92    }
93}