Skip to main content

vortex_array/arrays/listview/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use smallvec::smallvec;
10use vortex_buffer::BitBufferMut;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_bail;
14use vortex_error::vortex_ensure;
15use vortex_error::vortex_err;
16use vortex_mask::Mask;
17
18use crate::ArrayRef;
19use crate::ArraySlots;
20use crate::ExecutionCtx;
21use crate::LEGACY_SESSION;
22#[expect(deprecated)]
23use crate::ToCanonical as _;
24use crate::VortexSessionExecute;
25use crate::array::Array;
26use crate::array::ArrayParts;
27use crate::array::TypedArrayRef;
28use crate::array::child_to_validity;
29use crate::array::validity_to_child;
30use crate::arrays::ListView;
31use crate::arrays::Primitive;
32use crate::arrays::PrimitiveArray;
33use crate::arrays::bool;
34use crate::dtype::DType;
35use crate::dtype::IntegerPType;
36use crate::expr::stats::Stat;
37use crate::match_each_integer_ptype;
38use crate::validity::Validity;
39
40/// The `elements` data array, where each list scalar is a _slice_ of the `elements` array, and
41/// each inner list element is a _scalar_ of the `elements` array.
42pub(super) const ELEMENTS_SLOT: usize = 0;
43/// The `offsets` array indicating the start position of each list in elements.
44///
45/// Since we also store `sizes`, this `offsets` field is allowed to be stored out-of-order
46/// (which is different from [`ListArray`](crate::arrays::ListArray)).
47pub(super) const OFFSETS_SLOT: usize = 1;
48/// The `sizes` array indicating the length of each list.
49///
50/// This field is intended to be paired with a corresponding offset to determine the list scalar
51/// we want to access.
52pub(super) const SIZES_SLOT: usize = 2;
53/// The validity bitmap indicating which list elements are non-null.
54pub(super) const VALIDITY_SLOT: usize = 3;
55pub(super) const NUM_SLOTS: usize = 4;
56pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "sizes", "validity"];
57
58/// The canonical encoding for variable-length list arrays.
59///
60/// The `ListViewArray` encoding differs from [`ListArray`] in that it stores a child `sizes` array
61/// in addition to a child `offsets` array (which is the _only_ child in [`ListArray`]).
62///
63/// In the past, we used [`ListArray`] as the canonical encoding for [`DType::List`], but we have
64/// since migrated to `ListViewArray` for a few reasons:
65///
66/// - Enables better SIMD vectorization (no sequential dependency when reading `offsets`)
67/// - Allows out-of-order offsets for better compression (we can shuffle the buffers)
68/// - Supports different integer types for offsets vs sizes
69///
70/// It is worth mentioning that this encoding mirrors Apache Arrow's `ListView` array type, but does
71/// not exactly mirror the similar type found in DuckDB and Velox, which stores the pair of offset
72/// and size in a row-major fashion rather than column-major. More specifically, the row-major
73/// layout has a single child array with alternating offset and size next to each other.
74///
75/// We choose the column-major layout as it allows better compressability, as well as using
76/// different (logical) integer widths for our `offsets` and `sizes` buffers (note that the
77/// compressor will likely compress to a different bit-packed width, but this is speaking strictly
78/// about flexibility in the logcial type).
79///
80/// # Examples
81///
82/// ```
83/// # fn main() -> vortex_error::VortexResult<()> {
84/// # use vortex_array::arrays::{ListViewArray, PrimitiveArray};
85/// # use vortex_array::arrays::listview::ListViewArrayExt;
86/// # use vortex_array::validity::Validity;
87/// # use vortex_array::IntoArray;
88/// # use vortex_buffer::buffer;
89/// # use std::sync::Arc;
90/// #
91/// // Create a list view array representing [[3, 4], [1], [2, 3]].
92/// // Note: Unlike `ListArray`, offsets don't need to be monotonic.
93///
94/// let elements = buffer![1i32, 2, 3, 4, 5].into_array();
95/// let offsets = buffer![2u32, 0, 1].into_array();  // Out-of-order offsets
96/// let sizes = buffer![2u32, 1, 2].into_array();  // The sizes cause overlaps
97///
98/// let list_view = ListViewArray::new(
99///     elements.into_array(),
100///     offsets.into_array(),
101///     sizes.into_array(),
102///     Validity::NonNullable,
103/// );
104///
105/// assert_eq!(list_view.len(), 3);
106///
107/// // Access individual lists
108/// let first_list = list_view.list_elements_at(0)?;
109/// assert_eq!(first_list.len(), 2);
110/// // First list contains elements[2..4] = [3, 4]
111///
112/// let first_offset = list_view.offset_at(0);
113/// let first_size = list_view.size_at(0);
114/// assert_eq!(first_offset, 2);
115/// assert_eq!(first_size, 2);
116/// # Ok(())
117/// # }
118/// ```
119///
120/// [`ListArray`]: crate::arrays::ListArray
121#[derive(Clone, Debug)]
122pub struct ListViewData {
123    // TODO(connor)[ListView]: Add the n+1 memory allocation optimization.
124    /// A flag denoting if the array is zero-copyable* to a [`ListArray`](crate::arrays::ListArray).
125    ///
126    /// We use this information to help us more efficiently rebuild / compact our data.
127    ///
128    /// When this flag is true (indicating sorted offsets with no gaps and no overlaps and all
129    /// `offsets[i] + sizes[i]` are in order), conversions can bypass the very expensive rebuild
130    /// process which must rebuild the array from scratch.
131    is_zero_copy_to_list: bool,
132}
133
134impl Display for ListViewData {
135    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
136        write!(f, "is_zero_copy_to_list: {}", self.is_zero_copy_to_list)
137    }
138}
139
140pub struct ListViewDataParts {
141    pub elements_dtype: Arc<DType>,
142
143    /// See `ListViewArray::elements`
144    pub elements: ArrayRef,
145
146    /// See `ListViewArray::offsets`
147    pub offsets: ArrayRef,
148
149    /// See `ListViewArray::sizes`
150    pub sizes: ArrayRef,
151
152    /// See `ListViewArray::validity`
153    pub validity: Validity,
154}
155
156impl ListViewData {
157    pub(crate) fn make_slots(
158        elements: &ArrayRef,
159        offsets: &ArrayRef,
160        sizes: &ArrayRef,
161        validity: &Validity,
162        len: usize,
163    ) -> ArraySlots {
164        smallvec![
165            Some(elements.clone()),
166            Some(offsets.clone()),
167            Some(sizes.clone()),
168            validity_to_child(validity, len),
169        ]
170    }
171
172    /// Creates a new `ListViewArray`.
173    ///
174    /// # Panics
175    ///
176    /// Panics if the provided components do not satisfy the invariants documented
177    /// in `ListViewArray::new_unchecked`.
178    pub fn new() -> Self {
179        Self {
180            is_zero_copy_to_list: false,
181        }
182    }
183
184    /// Constructs a new `ListViewArray`.
185    ///
186    /// # Errors
187    ///
188    /// Returns an error if the provided components do not satisfy the invariants documented
189    /// in `ListViewArray::new_unchecked`.
190    pub fn try_new() -> VortexResult<Self> {
191        Ok(Self::new())
192    }
193
194    /// Creates a new `ListViewArray` without validation.
195    ///
196    /// This unsafe function does not check the validity of the data. Prefer calling [`new()`] or
197    /// [`try_new()`] over this function, as they will check the validity of the data.
198    ///
199    /// [`ListArray`]: crate::arrays::ListArray
200    /// [`new()`]: Self::new
201    /// [`try_new()`]: Self::try_new
202    ///
203    /// # Safety
204    ///
205    /// The caller must ensure all of the following invariants are satisfied:
206    ///
207    /// - `offsets` and `sizes` must be non-nullable integer arrays.
208    /// - `offsets` and `sizes` must have the same length.
209    /// - Size integer width must be smaller than or equal to offset type (to prevent overflow).
210    /// - For each `i`, `offsets[i] + sizes[i]` must not overflow and must be `<= elements.len()`
211    ///   (even if the corresponding view is defined as null by the validity array).
212    /// - If validity is an array, its length must equal `offsets.len()`.
213    pub unsafe fn new_unchecked() -> Self {
214        Self::new()
215    }
216
217    /// Validates the components that would be used to create a `ListViewArray`.
218    pub fn validate(
219        elements: &ArrayRef,
220        offsets: &ArrayRef,
221        sizes: &ArrayRef,
222        validity: &Validity,
223    ) -> VortexResult<()> {
224        // Check that offsets and sizes are integer arrays and non-nullable.
225        vortex_ensure!(
226            offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
227            "offsets must be non-nullable integer array, got {}",
228            offsets.dtype()
229        );
230        vortex_ensure!(
231            sizes.dtype().is_int() && !sizes.dtype().is_nullable(),
232            "sizes must be non-nullable integer array, got {}",
233            sizes.dtype()
234        );
235
236        // Check that they have the same length.
237        vortex_ensure!(
238            offsets.len() == sizes.len(),
239            "offsets and sizes must have the same length, got {} and {}",
240            offsets.len(),
241            sizes.len()
242        );
243
244        // Check that the size type can fit within the offset type to prevent overflows.
245        let size_ptype = sizes.dtype().as_ptype();
246        let offset_ptype = offsets.dtype().as_ptype();
247
248        // If a validity array is present, it must be the same length as the `ListViewArray`.
249        if let Some(validity_len) = validity.maybe_len() {
250            vortex_ensure!(
251                validity_len == offsets.len(),
252                "validity with size {validity_len} does not match array size {}",
253                offsets.len()
254            );
255        }
256
257        // Skip host-only validation when offsets/sizes are not host-resident.
258        if offsets.is_host() && sizes.is_host() {
259            #[expect(deprecated)]
260            let offsets_primitive = offsets.to_primitive();
261            #[expect(deprecated)]
262            let sizes_primitive = sizes.to_primitive();
263
264            // Validate the `offsets` and `sizes` arrays.
265            match_each_integer_ptype!(offset_ptype, |O| {
266                match_each_integer_ptype!(size_ptype, |S| {
267                    let offsets_slice = offsets_primitive.as_slice::<O>();
268                    let sizes_slice = sizes_primitive.as_slice::<S>();
269
270                    validate_offsets_and_sizes::<O, S>(
271                        offsets_slice,
272                        sizes_slice,
273                        elements.len() as u64,
274                    )?;
275                })
276            });
277        }
278
279        Ok(())
280    }
281
282    /// Sets whether this `ListViewArray` is zero-copyable to a [`ListArray`].
283    ///
284    /// This is an optimization flag that enables more efficient conversion to [`ListArray`] without
285    /// needing to copy or reorganize the data.
286    ///
287    /// [`ListArray`]: crate::arrays::ListArray
288    ///
289    /// # Safety
290    ///
291    /// When setting `is_zctl` to `true`, the caller must ensure that the `ListViewArray` is
292    /// actually zero-copyable to a [`ListArray`]. This means:
293    ///
294    /// - Offsets must be sorted (but not strictly sorted, zero-length lists are allowed).
295    /// - `offsets[i] + sizes[i] == offsets[i + 1]` for all `i`.
296    /// - No gaps in elements between first and last referenced elements.
297    /// - No overlapping list views (each element referenced at most once).
298    ///
299    /// Note that leading and trailing unreferenced elements **ARE** allowed.
300    pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
301        self.is_zero_copy_to_list = is_zctl;
302        self
303    }
304
305    /// Returns true if the `ListViewArray` is zero-copyable to a
306    /// [`ListArray`](crate::arrays::ListArray).
307    pub fn is_zero_copy_to_list(&self) -> bool {
308        self.is_zero_copy_to_list
309    }
310}
311
312impl Default for ListViewData {
313    fn default() -> Self {
314        Self::new()
315    }
316}
317
318/// Walks parallel `(offset, size)` slices and sets each range `[offset, offset + size]` in `buf`.
319///
320/// **Preconditions**
321///
322/// `offsets` and `sizes` must be the same length (which is always the case in valid `ListViewArray`s).
323fn fill_referenced_mask<O: IntegerPType, S: IntegerPType>(
324    buf: &mut BitBufferMut,
325    offsets: &[O],
326    sizes: &[S],
327) {
328    let len = offsets.len();
329
330    assert_eq!(
331        len,
332        sizes.len(),
333        "offsets and sizes must be the same length"
334    );
335
336    for i in 0..len {
337        let start: usize = offsets[i].as_();
338        let size: usize = sizes[i].as_();
339        buf.fill_range(start, start + size, true);
340    }
341}
342
343pub trait ListViewArrayExt: TypedArrayRef<ListView> {
344    fn nullability(&self) -> crate::dtype::Nullability {
345        match self.as_ref().dtype() {
346            DType::List(_, nullability) => *nullability,
347            _ => unreachable!("ListViewArrayExt requires a list dtype"),
348        }
349    }
350
351    fn elements(&self) -> &ArrayRef {
352        self.as_ref().slots()[ELEMENTS_SLOT]
353            .as_ref()
354            .vortex_expect("ListViewArray elements slot")
355    }
356
357    fn offsets(&self) -> &ArrayRef {
358        self.as_ref().slots()[OFFSETS_SLOT]
359            .as_ref()
360            .vortex_expect("ListViewArray offsets slot")
361    }
362
363    fn sizes(&self) -> &ArrayRef {
364        self.as_ref().slots()[SIZES_SLOT]
365            .as_ref()
366            .vortex_expect("ListViewArray sizes slot")
367    }
368
369    fn listview_validity(&self) -> Validity {
370        child_to_validity(
371            self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
372            self.nullability(),
373        )
374    }
375
376    fn offset_at(&self, index: usize) -> usize {
377        assert!(
378            index < self.as_ref().len(),
379            "Index {index} out of bounds 0..{}",
380            self.as_ref().len()
381        );
382        self.offsets()
383            .as_opt::<Primitive>()
384            .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
385            .unwrap_or_else(|| {
386                self.offsets()
387                    .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
388                    .vortex_expect("offsets must support execute_scalar")
389                    .as_primitive()
390                    .as_::<usize>()
391                    .vortex_expect("offset must fit in usize")
392            })
393    }
394
395    fn size_at(&self, index: usize) -> usize {
396        assert!(
397            index < self.as_ref().len(),
398            "Index {} out of bounds 0..{}",
399            index,
400            self.as_ref().len()
401        );
402        self.sizes()
403            .as_opt::<Primitive>()
404            .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
405            .unwrap_or_else(|| {
406                self.sizes()
407                    .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
408                    .vortex_expect("sizes must support execute_scalar")
409                    .as_primitive()
410                    .as_::<usize>()
411                    .vortex_expect("size must fit in usize")
412            })
413    }
414
415    fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
416        let offset = self.offset_at(index);
417        let size = self.size_at(index);
418        self.elements().slice(offset..offset + size)
419    }
420
421    fn verify_is_zero_copy_to_list(&self) -> bool {
422        #[expect(deprecated)]
423        let offsets_primitive = self.offsets().to_primitive();
424        #[expect(deprecated)]
425        let sizes_primitive = self.sizes().to_primitive();
426        validate_zctl(self.elements(), offsets_primitive, sizes_primitive).is_ok()
427    }
428
429    /// Returns a [`Mask`] of length `elements.len()` where each bit is set iff that
430    /// position in `elements` is referenced by at least one view. Caller must ensure `elements`
431    /// is non-empty.
432    ///
433    /// Walks every `(offset, size)` pair, canonicalizes both `offsets` and `sizes`,
434    /// and allocates a `BitBuffer` of length `elements.len()`, so it is extremely costly.
435    ///
436    /// **Preconditions**
437    ///
438    /// `self.elements()` must be non-empty.
439    fn compute_referenced_elements_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult<Mask> {
440        assert!(!self.elements().is_empty());
441        let len = self.elements().len();
442
443        let offsets_primitive = self.offsets().clone().execute::<PrimitiveArray>(ctx)?;
444        let sizes_primitive = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
445
446        let mut buf = BitBufferMut::new_unset(len);
447
448        match_each_integer_ptype!(offsets_primitive.ptype(), |O| {
449            match_each_integer_ptype!(sizes_primitive.ptype(), |S| {
450                fill_referenced_mask::<O, S>(
451                    &mut buf,
452                    offsets_primitive.as_slice::<O>(),
453                    sizes_primitive.as_slice::<S>(),
454                );
455            })
456        });
457
458        Ok(Mask::from_buffer(buf.freeze()))
459    }
460
461    /// Exact fraction of `elements` referenced by some view, in `[0.0, 1.0]`. Extremely costly.
462    ///
463    /// Returns `Ok(1.0)` when `elements` is empty instead of dividing by 0.
464    fn compute_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
465        if self.elements().is_empty() {
466            return Ok(1.0);
467        }
468
469        if self.sizes().is_empty() {
470            return Ok(0.0);
471        }
472
473        let density = match self.compute_referenced_elements_mask(ctx)? {
474            Mask::AllTrue(_) => 1.0,
475            Mask::AllFalse(_) => 0.0,
476            Mask::Values(values) => values.true_count() as f32 / self.elements().len() as f32,
477        };
478
479        Ok(density)
480    }
481
482    /// Upper-bound estimate of [`compute_density`](Self::compute_density) via
483    /// `sum(sizes) / elements.len()`, clamped to `[0.0, 1.0]`.
484    ///
485    /// Exact for non-overlapping views, but overcounts when multiple views share the same elements.
486    ///
487    /// Returns `Ok(1.0)` when `elements` is empty instead of dividing by 0.
488    fn upper_bound_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
489        let n_elts = self.elements().len();
490        if n_elts == 0 {
491            return Ok(1.0);
492        }
493
494        let sizes = self.sizes();
495        if sizes.is_empty() {
496            return Ok(0.0);
497        }
498
499        // compute_stat short-circuits on a cached exact Sum and otherwise computes
500        let sizes_sum = sizes
501            .statistics()
502            .compute_stat(Stat::Sum, ctx)?
503            .vortex_expect("sizes array has integer ptype elements")
504            .as_primitive()
505            .as_::<u64>()
506            .vortex_expect("integer ptypes can be upcast to u64");
507
508        // if the same elements are referenced more than once the estimate may be
509        // greater than 1.0, so clamp
510        let estimate = (sizes_sum as f32 / n_elts as f32).min(1.0);
511
512        debug_assert!(estimate >= 0.0);
513
514        Ok(estimate)
515    }
516}
517impl<T: TypedArrayRef<ListView>> ListViewArrayExt for T {}
518
519impl Array<ListView> {
520    /// Creates a new `ListViewArray`.
521    pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
522        let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
523        let len = offsets.len();
524        let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
525        ListViewData::validate(&elements, &offsets, &sizes, &validity)
526            .vortex_expect("`ListViewArray` construction failed");
527        let data = ListViewData::new();
528        unsafe {
529            Array::from_parts_unchecked(
530                ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
531            )
532        }
533    }
534
535    /// Constructs a new `ListViewArray`.
536    pub fn try_new(
537        elements: ArrayRef,
538        offsets: ArrayRef,
539        sizes: ArrayRef,
540        validity: Validity,
541    ) -> VortexResult<Self> {
542        let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
543        let len = offsets.len();
544        let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
545        ListViewData::validate(&elements, &offsets, &sizes, &validity)?;
546        let data = ListViewData::try_new()?;
547        Ok(unsafe {
548            Array::from_parts_unchecked(
549                ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
550            )
551        })
552    }
553
554    /// Creates a new `ListViewArray` without validation.
555    ///
556    /// # Safety
557    ///
558    /// See [`ListViewData::new_unchecked`].
559    pub unsafe fn new_unchecked(
560        elements: ArrayRef,
561        offsets: ArrayRef,
562        sizes: ArrayRef,
563        validity: Validity,
564    ) -> Self {
565        let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
566        let len = offsets.len();
567        let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
568        let data = unsafe { ListViewData::new_unchecked() };
569        unsafe {
570            Array::from_parts_unchecked(
571                ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
572            )
573        }
574    }
575
576    /// Mark whether this list view can be zero-copy converted to a list.
577    ///
578    /// # Safety
579    ///
580    /// See [`ListViewData::with_zero_copy_to_list`].
581    pub unsafe fn with_zero_copy_to_list(self, is_zctl: bool) -> Self {
582        if cfg!(debug_assertions) && is_zctl {
583            #[expect(deprecated)]
584            let offsets_primitive = self.offsets().to_primitive();
585            #[expect(deprecated)]
586            let sizes_primitive = self.sizes().to_primitive();
587            validate_zctl(self.elements(), offsets_primitive, sizes_primitive)
588                .vortex_expect("Failed to validate zero-copy to list flag");
589        }
590        let dtype = self.dtype().clone();
591        let len = self.len();
592        let slots: ArraySlots = self.slots().iter().cloned().collect();
593        let data = unsafe { self.into_data().with_zero_copy_to_list(is_zctl) };
594        unsafe {
595            Array::from_parts_unchecked(
596                ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
597            )
598        }
599    }
600
601    pub fn into_data_parts(self) -> ListViewDataParts {
602        let elements = self.slots()[ELEMENTS_SLOT]
603            .clone()
604            .vortex_expect("ListViewArray elements slot");
605        let offsets = self.slots()[OFFSETS_SLOT]
606            .clone()
607            .vortex_expect("ListViewArray offsets slot");
608        let sizes = self.slots()[SIZES_SLOT]
609            .clone()
610            .vortex_expect("ListViewArray sizes slot");
611        let validity = self.listview_validity();
612        ListViewDataParts {
613            elements_dtype: Arc::new(elements.dtype().clone()),
614            elements,
615            offsets,
616            sizes,
617            validity,
618        }
619    }
620}
621
622/// Helper function to validate `offsets` and `sizes` with specific types.
623fn validate_offsets_and_sizes<O, S>(
624    offsets_slice: &[O],
625    sizes_slice: &[S],
626    elements_len: u64,
627) -> VortexResult<()>
628where
629    O: IntegerPType,
630    S: IntegerPType,
631{
632    debug_assert_eq!(offsets_slice.len(), sizes_slice.len());
633
634    #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
635    for i in 0..offsets_slice.len() {
636        let offset = offsets_slice[i];
637        let size = sizes_slice[i];
638
639        vortex_ensure!(offset >= O::zero(), "cannot have negative offsets");
640        vortex_ensure!(size >= S::zero(), "cannot have negative size");
641
642        let offset_u64 = offset
643            .to_u64()
644            .ok_or_else(|| vortex_err!("offset[{i}] = {offset:?} cannot be converted to u64"))?;
645
646        let size_u64 = size
647            .to_u64()
648            .ok_or_else(|| vortex_err!("size[{i}] = {size:?} cannot be converted to u64"))?;
649
650        // Check for overflow when adding offset + size.
651        let end = offset_u64.checked_add(size_u64).ok_or_else(|| {
652            vortex_err!("offset[{i}] ({offset_u64}) + size[{i}] ({size_u64}) would overflow u64")
653        })?;
654
655        if offset_u64 == elements_len {
656            vortex_ensure!(
657                size_u64 == 0,
658                "views to the end of the elements array (length {elements_len}) must have size 0 \
659                    (had size {size_u64})"
660            );
661        }
662
663        vortex_ensure!(
664            end <= elements_len,
665            "offset[{i}] + size[{i}] = {offset_u64} + {size_u64} = {end} \
666            exceeds elements length {elements_len}",
667        );
668    }
669
670    Ok(())
671}
672
673/// Helper function to validate if the `ListViewArray` components are actually zero-copyable to
674/// [`ListArray`](crate::arrays::ListArray).
675fn validate_zctl(
676    elements: &ArrayRef,
677    offsets_primitive: PrimitiveArray,
678    sizes_primitive: PrimitiveArray,
679) -> VortexResult<()> {
680    // Offsets must be sorted (but not strictly sorted, zero-length lists are allowed), even
681    // if there are null views.
682    let mut ctx = LEGACY_SESSION.create_execution_ctx();
683    if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted(&mut ctx) {
684        vortex_ensure!(is_sorted, "offsets must be sorted");
685    } else {
686        vortex_bail!("offsets must report is_sorted statistic");
687    }
688
689    // Validate that offset[i] + size[i] <= offset[i+1] for all items
690    // This ensures views are non-overlapping and properly ordered for zero-copy-to-list
691    fn validate_monotonic_ends<O: IntegerPType, S: IntegerPType>(
692        offsets_slice: &[O],
693        sizes_slice: &[S],
694        len: usize,
695    ) -> VortexResult<()> {
696        let mut max_end = 0usize;
697
698        for i in 0..len {
699            let offset = offsets_slice[i].to_usize().unwrap_or(usize::MAX);
700            let size = sizes_slice[i].to_usize().unwrap_or(usize::MAX);
701
702            // Check that this view starts at or after the previous view ended
703            vortex_ensure!(
704                offset >= max_end,
705                "Zero-copy-to-list requires views to be non-overlapping and ordered: \
706                 view[{}] starts at {} but previous views extend to {}",
707                i,
708                offset,
709                max_end
710            );
711
712            // Update max_end for the next iteration
713            let end = offset.saturating_add(size);
714            max_end = max_end.max(end);
715        }
716
717        Ok(())
718    }
719
720    let offsets_dtype = offsets_primitive.dtype();
721    let sizes_dtype = sizes_primitive.dtype();
722    let len = offsets_primitive.len();
723
724    // Check that offset + size values are monotonic (no overlaps)
725    match_each_integer_ptype!(offsets_dtype.as_ptype(), |O| {
726        match_each_integer_ptype!(sizes_dtype.as_ptype(), |S| {
727            let offsets_slice = offsets_primitive.as_slice::<O>();
728            let sizes_slice = sizes_primitive.as_slice::<S>();
729
730            validate_monotonic_ends(offsets_slice, sizes_slice, len)?;
731        })
732    });
733
734    // TODO(connor)[ListView]: Making this allocation is expensive, but the more efficient
735    // implementation would be even more complicated than this. We could use a bit buffer denoting
736    // if positions in `elements` are used, and then additionally store a separate flag that tells
737    // us if a position is used more than once.
738    let mut element_references = vec![0u8; elements.len()];
739
740    fn count_references<O: IntegerPType, S: IntegerPType>(
741        element_references: &mut [u8],
742        offsets_primitive: PrimitiveArray,
743        sizes_primitive: PrimitiveArray,
744    ) {
745        let offsets_slice = offsets_primitive.as_slice::<O>();
746        let sizes_slice = sizes_primitive.as_slice::<S>();
747
748        // Note that we ignore nulls here, as the "null" view metadata must still maintain the same
749        // invariants as non-null views, even for a `bool` information.
750        for i in 0..offsets_slice.len() {
751            let offset: usize = offsets_slice[i].as_();
752            let size: usize = sizes_slice[i].as_();
753            for j in offset..offset + size {
754                element_references[j] = element_references[j].saturating_add(1);
755            }
756        }
757    }
758
759    match_each_integer_ptype!(offsets_primitive.ptype(), |O| {
760        match_each_integer_ptype!(sizes_primitive.ptype(), |S| {
761            count_references::<O, S>(&mut element_references, offsets_primitive, sizes_primitive);
762        })
763    });
764
765    // Allow leading and trailing unreferenced elements, but not gaps in the middle.
766    let leftmost_used = element_references
767        .iter()
768        .position(|&references| references != 0);
769    let rightmost_used = element_references
770        .iter()
771        .rposition(|&references| references != 0);
772
773    if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
774        vortex_ensure!(
775            element_references[first_ref..=last_ref]
776                .iter()
777                .all(|&references| references != 0),
778            "found gap in elements array between first and last referenced elements"
779        );
780    }
781
782    vortex_ensure!(element_references.iter().all(|&references| references <= 1));
783
784    Ok(())
785}