Skip to main content

vortex_array/arrays/listview/compute/
take.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use num_traits::Zero;
5use vortex_error::VortexResult;
6
7use super::REBUILD_DENSITY_THRESHOLD;
8use crate::ArrayRef;
9use crate::ExecutionCtx;
10use crate::IntoArray;
11use crate::array::ArrayView;
12use crate::arrays::ListView;
13use crate::arrays::ListViewArray;
14use crate::arrays::dict::TakeExecute;
15use crate::arrays::dict::TakeReduce;
16use crate::arrays::listview::ListViewArrayExt;
17use crate::arrays::listview::ListViewRebuildMode;
18use crate::builtins::ArrayBuiltins;
19use crate::dtype::Nullability;
20use crate::match_each_integer_ptype;
21use crate::scalar::Scalar;
22
23/// Metadata-only take for [`ListViewArray`].
24impl TakeReduce for ListView {
25    fn take(array: ArrayView<'_, ListView>, indices: &ArrayRef) -> VortexResult<Option<ArrayRef>> {
26        // Approximate element density by the fraction of list rows retained. Assumes roughly
27        // uniform list sizes; good enough to decide whether dragging along the full `elements`
28        // buffer is worth avoiding a rebuild.
29        let kept_row_fraction = indices.len() as f32 / array.sizes().len() as f32;
30        if kept_row_fraction < REBUILD_DENSITY_THRESHOLD {
31            return Ok(None);
32        }
33
34        Ok(Some(apply_take(array, indices)?.into_array()))
35    }
36}
37
38/// Execution-path take for [`ListViewArray`].
39///
40/// This does the same metadata-only take as [`TakeReduce`], but also rebuilds the array if the
41/// resulting array will be less dense than `REBUILD_DENSITY_THRESHOLD`.
42impl TakeExecute for ListView {
43    fn take(
44        array: ArrayView<'_, ListView>,
45        indices: &ArrayRef,
46        _ctx: &mut ExecutionCtx,
47    ) -> VortexResult<Option<ArrayRef>> {
48        let kept_row_fraction = indices.len() as f32 / array.sizes().len() as f32;
49        let taken = apply_take(array, indices)?;
50
51        if kept_row_fraction < REBUILD_DENSITY_THRESHOLD {
52            // TODO(connor)[ListView]: Ideally, we would only rebuild after all `take`s and `filter`
53            // compute functions have run, at the "top" of the operator tree. However, we cannot do
54            // this right now, so we will just rebuild every time (similar to `ListArray`).
55            Ok(Some(
56                taken
57                    .rebuild(ListViewRebuildMode::MakeZeroCopyToList)?
58                    .into_array(),
59            ))
60        } else {
61            Ok(Some(taken.into_array()))
62        }
63    }
64}
65
66/// Shared metadata-only take: take `offsets`, `sizes` and `validity` at `indices` while reusing
67/// the original `elements` buffer as-is.
68fn apply_take(array: ArrayView<'_, ListView>, indices: &ArrayRef) -> VortexResult<ListViewArray> {
69    let elements = array.elements();
70    let offsets = array.offsets();
71    let sizes = array.sizes();
72
73    // Combine the array's validity with the indices' validity.
74    let new_validity = array.validity()?.take(indices)?;
75
76    // Take can reorder offsets, create gaps, and may introduce overlaps if `indices` contain
77    // duplicates.
78    let nullable_new_offsets = offsets.take(indices.clone())?;
79    let nullable_new_sizes = sizes.take(indices.clone())?;
80
81    // `take` returns nullable arrays; cast back to non-nullable (filling with zeros to represent
82    // the null lists — the validity mask tracks nullness separately).
83    let new_offsets = match_each_integer_ptype!(nullable_new_offsets.dtype().as_ptype(), |O| {
84        nullable_new_offsets.fill_null(Scalar::primitive(O::zero(), Nullability::NonNullable))?
85    });
86    let new_sizes = match_each_integer_ptype!(nullable_new_sizes.dtype().as_ptype(), |S| {
87        nullable_new_sizes.fill_null(Scalar::primitive(S::zero(), Nullability::NonNullable))?
88    });
89
90    // SAFETY: Take operation maintains all `ListViewArray` invariants:
91    // - `new_offsets` and `new_sizes` are derived from existing valid child arrays.
92    // - `new_offsets` and `new_sizes` are non-nullable.
93    // - `new_offsets` and `new_sizes` have the same length (both taken with the same `indices`).
94    // - Validity correctly reflects the combination of array and indices validity.
95    Ok(unsafe {
96        ListViewArray::new_unchecked(elements.clone(), new_offsets, new_sizes, new_validity)
97    })
98}