vortex_array/arrays/listview/compute/take.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::Zero;
5use vortex_dtype::Nullability;
6use vortex_dtype::match_each_integer_ptype;
7use vortex_error::VortexResult;
8use vortex_scalar::Scalar;
9
10use crate::Array;
11use crate::ArrayRef;
12use crate::IntoArray;
13use crate::arrays::ListViewArray;
14use crate::arrays::ListViewRebuildMode;
15use crate::arrays::ListViewVTable;
16use crate::compute::TakeKernel;
17use crate::compute::TakeKernelAdapter;
18use crate::compute::{self};
19use crate::register_kernel;
20use crate::vtable::ValidityHelper;
21
22// TODO(connor)[ListView]: Make use of this threshold after we start migrating operators.
23/// The threshold for triggering a rebuild of the [`ListViewArray`].
24///
25/// By default, we will not touch the underlying `elements` array of the [`ListViewArray`] since it
26/// can be potentially expensive to reorganize the array based on what views we have into it.
27///
28/// However, we also do not want to carry around a large amount of garbage data. Below this
29/// threshold of the density of the selection mask, we will rebuild the [`ListViewArray`], removing
30/// any garbage data.
31#[allow(unused)]
32const REBUILD_DENSITY_THRESHOLD: f64 = 0.1;
33
34/// [`ListViewArray`] take implementation.
35///
36/// This implementation is deliberately simple and read-optimized. We just take the `offsets` and
37/// `sizes` at the requested indices and reuse the original `elements` array. This works because
38/// `ListView` (unlike `List`) allows non-contiguous and out-of-order lists.
39///
40/// We don't slice the `elements` array because it would require computing min/max offsets and
41/// adjusting all offsets accordingly, which is not really worth the small potential memory we would
42/// be able to get back.
43///
44/// The trade-off is that we may keep unreferenced elements in memory, but this is acceptable since
45/// we're optimizing for read performance and the data isn't being copied.
46impl TakeKernel for ListViewVTable {
47 fn take(&self, array: &ListViewArray, indices: &dyn Array) -> VortexResult<ArrayRef> {
48 let elements = array.elements();
49 let offsets = array.offsets();
50 let sizes = array.sizes();
51
52 // Compute the new validity by combining the array's validity with the indices' validity.
53 let new_validity = array.validity().take(indices)?;
54
55 // Take the offsets and sizes arrays at the requested indices.
56 // Take can reorder offsets, create gaps, and may introduce overlaps if the `indices`
57 // contain duplicates.
58 let nullable_new_offsets = compute::take(offsets.as_ref(), indices)?;
59 let nullable_new_sizes = compute::take(sizes.as_ref(), indices)?;
60
61 // Since `take` returns nullable arrays, we simply cast it back to non-nullable (filled with
62 // zeros to represent null lists).
63 let new_offsets = match_each_integer_ptype!(nullable_new_offsets.dtype().as_ptype(), |O| {
64 compute::fill_null(
65 &nullable_new_offsets,
66 &Scalar::primitive(O::zero(), Nullability::NonNullable),
67 )?
68 });
69 let new_sizes = match_each_integer_ptype!(nullable_new_sizes.dtype().as_ptype(), |S| {
70 compute::fill_null(
71 &nullable_new_sizes,
72 &Scalar::primitive(S::zero(), Nullability::NonNullable),
73 )?
74 });
75 // SAFETY: Take operation maintains all `ListViewArray` invariants:
76 // - `new_offsets` and `new_sizes` are derived from existing valid child arrays.
77 // - `new_offsets` and `new_sizes` are non-nullable.
78 // - `new_offsets` and `new_sizes` have the same length (both taken with the same
79 // `indices`).
80 // - Validity correctly reflects the combination of array and indices validity.
81 let new_array = unsafe {
82 ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity)
83 };
84
85 // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter`
86 // compute functions have run, at the "top" of the operator tree. However, we cannot do this
87 // right now, so we will just rebuild every time (similar to `ListArray`).
88
89 Ok(new_array
90 .rebuild(ListViewRebuildMode::MakeZeroCopyToList)
91 .into_array())
92 }
93}
94
95register_kernel!(TakeKernelAdapter(ListViewVTable).lift());