Skip to main content

vortex_array/arrays/listview/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use smallvec::smallvec;
10use vortex_buffer::BitBufferMut;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_bail;
14use vortex_error::vortex_ensure;
15use vortex_error::vortex_err;
16use vortex_mask::Mask;
17
18use crate::ArrayRef;
19use crate::ArraySlots;
20use crate::ExecutionCtx;
21use crate::LEGACY_SESSION;
22#[expect(deprecated)]
23use crate::ToCanonical as _;
24use crate::VortexSessionExecute;
25use crate::aggregate_fn::NumericalAggregateOpts;
26use crate::aggregate_fn::fns::min_max::min_max;
27use crate::array::Array;
28use crate::array::ArrayParts;
29use crate::array::TypedArrayRef;
30use crate::array::child_to_validity;
31use crate::array::validity_to_child;
32use crate::arrays::ListView;
33use crate::arrays::Primitive;
34use crate::arrays::PrimitiveArray;
35use crate::arrays::bool;
36use crate::arrays::primitive::PrimitiveArrayExt;
37use crate::builtins::ArrayBuiltins;
38use crate::dtype::DType;
39use crate::dtype::IntegerPType;
40use crate::dtype::PType;
41use crate::expr::stats::Stat;
42use crate::match_each_integer_ptype;
43use crate::match_each_unsigned_integer_ptype;
44use crate::scalar_fn::fns::operators::Operator;
45use crate::validity::Validity;
46
47/// The `elements` data array, where each list scalar is a _slice_ of the `elements` array, and
48/// each inner list element is a _scalar_ of the `elements` array.
49pub(super) const ELEMENTS_SLOT: usize = 0;
50/// The `offsets` array indicating the start position of each list in elements.
51///
52/// Since we also store `sizes`, this `offsets` field is allowed to be stored out-of-order
53/// (which is different from [`ListArray`](crate::arrays::ListArray)).
54pub(super) const OFFSETS_SLOT: usize = 1;
55/// The `sizes` array indicating the length of each list.
56///
57/// This field is intended to be paired with a corresponding offset to determine the list scalar
58/// we want to access.
59pub(super) const SIZES_SLOT: usize = 2;
60/// The validity bitmap indicating which list elements are non-null.
61pub(super) const VALIDITY_SLOT: usize = 3;
62pub(super) const NUM_SLOTS: usize = 4;
63pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "sizes", "validity"];
64
65/// The canonical encoding for variable-length list arrays.
66///
67/// The `ListViewArray` encoding differs from [`ListArray`] in that it stores a child `sizes` array
68/// in addition to a child `offsets` array (which is the _only_ child in [`ListArray`]).
69///
70/// In the past, we used [`ListArray`] as the canonical encoding for [`DType::List`], but we have
71/// since migrated to `ListViewArray` for a few reasons:
72///
73/// - Enables better SIMD vectorization (no sequential dependency when reading `offsets`)
74/// - Allows out-of-order offsets for better compression (we can shuffle the buffers)
75/// - Supports different integer types for offsets vs sizes
76///
77/// It is worth mentioning that this encoding mirrors Apache Arrow's `ListView` array type, but does
78/// not exactly mirror the similar type found in DuckDB and Velox, which stores the pair of offset
79/// and size in a row-major fashion rather than column-major. More specifically, the row-major
80/// layout has a single child array with alternating offset and size next to each other.
81///
82/// We choose the column-major layout as it allows better compressability, as well as using
83/// different (logical) integer widths for our `offsets` and `sizes` buffers (note that the
84/// compressor will likely compress to a different bit-packed width, but this is speaking strictly
85/// about flexibility in the logcial type).
86///
87/// # Examples
88///
89/// ```
90/// # fn main() -> vortex_error::VortexResult<()> {
91/// # use vortex_array::arrays::{ListViewArray, PrimitiveArray};
92/// # use vortex_array::arrays::listview::ListViewArrayExt;
93/// # use vortex_array::validity::Validity;
94/// # use vortex_array::IntoArray;
95/// # use vortex_buffer::buffer;
96/// # use std::sync::Arc;
97/// #
98/// // Create a list view array representing [[3, 4], [1], [2, 3]].
99/// // Note: Unlike `ListArray`, offsets don't need to be monotonic.
100///
101/// let elements = buffer![1i32, 2, 3, 4, 5].into_array();
102/// let offsets = buffer![2u32, 0, 1].into_array();  // Out-of-order offsets
103/// let sizes = buffer![2u32, 1, 2].into_array();  // The sizes cause overlaps
104///
105/// let list_view = ListViewArray::new(
106///     elements.into_array(),
107///     offsets.into_array(),
108///     sizes.into_array(),
109///     Validity::NonNullable,
110/// );
111///
112/// assert_eq!(list_view.len(), 3);
113///
114/// // Access individual lists
115/// let first_list = list_view.list_elements_at(0)?;
116/// assert_eq!(first_list.len(), 2);
117/// // First list contains elements[2..4] = [3, 4]
118///
119/// let first_offset = list_view.offset_at(0);
120/// let first_size = list_view.size_at(0);
121/// assert_eq!(first_offset, 2);
122/// assert_eq!(first_size, 2);
123/// # Ok(())
124/// # }
125/// ```
126///
127/// [`ListArray`]: crate::arrays::ListArray
128#[derive(Clone, Debug)]
129pub struct ListViewData {
130    // TODO(connor)[ListView]: Add the n+1 memory allocation optimization.
131    /// A flag denoting if the array is zero-copyable* to a [`ListArray`](crate::arrays::ListArray).
132    ///
133    /// We use this information to help us more efficiently rebuild / compact our data.
134    ///
135    /// When this flag is true (indicating sorted offsets with no gaps and no overlaps and all
136    /// `offsets[i] + sizes[i]` are in order), conversions can bypass the very expensive rebuild
137    /// process which must rebuild the array from scratch.
138    is_zero_copy_to_list: bool,
139}
140
141impl Display for ListViewData {
142    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
143        write!(f, "is_zero_copy_to_list: {}", self.is_zero_copy_to_list)
144    }
145}
146
147pub struct ListViewDataParts {
148    pub elements_dtype: Arc<DType>,
149
150    /// See `ListViewArray::elements`
151    pub elements: ArrayRef,
152
153    /// See `ListViewArray::offsets`
154    pub offsets: ArrayRef,
155
156    /// See `ListViewArray::sizes`
157    pub sizes: ArrayRef,
158
159    /// See `ListViewArray::validity`
160    pub validity: Validity,
161}
162
163impl ListViewData {
164    pub(crate) fn make_slots(
165        elements: &ArrayRef,
166        offsets: &ArrayRef,
167        sizes: &ArrayRef,
168        validity: &Validity,
169        len: usize,
170    ) -> ArraySlots {
171        smallvec![
172            Some(elements.clone()),
173            Some(offsets.clone()),
174            Some(sizes.clone()),
175            validity_to_child(validity, len),
176        ]
177    }
178
179    /// Creates a new `ListViewArray`.
180    ///
181    /// # Panics
182    ///
183    /// Panics if the provided components do not satisfy the invariants documented
184    /// in `ListViewArray::new_unchecked`.
185    pub fn new() -> Self {
186        Self {
187            is_zero_copy_to_list: false,
188        }
189    }
190
191    /// Constructs a new `ListViewArray`.
192    ///
193    /// # Errors
194    ///
195    /// Returns an error if the provided components do not satisfy the invariants documented
196    /// in `ListViewArray::new_unchecked`.
197    pub fn try_new() -> VortexResult<Self> {
198        Ok(Self::new())
199    }
200
201    /// Creates a new `ListViewArray` without validation.
202    ///
203    /// This unsafe function does not check the validity of the data. Prefer calling [`new()`] or
204    /// [`try_new()`] over this function, as they will check the validity of the data.
205    ///
206    /// [`ListArray`]: crate::arrays::ListArray
207    /// [`new()`]: Self::new
208    /// [`try_new()`]: Self::try_new
209    ///
210    /// # Safety
211    ///
212    /// The caller must ensure all of the following invariants are satisfied:
213    ///
214    /// - `offsets` and `sizes` must be non-nullable integer arrays.
215    /// - `offsets` and `sizes` must have the same length.
216    /// - Size integer width must be smaller than or equal to offset type (to prevent overflow).
217    /// - For each `i`, `offsets[i] + sizes[i]` must not overflow and must be `<= elements.len()`
218    ///   (even if the corresponding view is defined as null by the validity array).
219    /// - If validity is an array, its length must equal `offsets.len()`.
220    pub unsafe fn new_unchecked() -> Self {
221        Self::new()
222    }
223
224    /// Validates the components that would be used to create a `ListViewArray`.
225    pub fn validate(
226        elements: &ArrayRef,
227        offsets: &ArrayRef,
228        sizes: &ArrayRef,
229        validity: &Validity,
230    ) -> VortexResult<()> {
231        // Check that offsets and sizes are integer arrays and non-nullable.
232        vortex_ensure!(
233            offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
234            "offsets must be non-nullable integer array, got {}",
235            offsets.dtype()
236        );
237        vortex_ensure!(
238            sizes.dtype().is_int() && !sizes.dtype().is_nullable(),
239            "sizes must be non-nullable integer array, got {}",
240            sizes.dtype()
241        );
242
243        // Check that they have the same length.
244        vortex_ensure!(
245            offsets.len() == sizes.len(),
246            "offsets and sizes must have the same length, got {} and {}",
247            offsets.len(),
248            sizes.len()
249        );
250
251        // If a validity array is present, it must be the same length as the `ListViewArray`.
252        if let Some(validity_len) = validity.maybe_len() {
253            vortex_ensure!(
254                validity_len == offsets.len(),
255                "validity with size {validity_len} does not match array size {}",
256                offsets.len()
257            );
258        }
259
260        // Skip host-only validation when offsets/sizes are not host-resident.
261        if offsets.is_host() && sizes.is_host() {
262            #[expect(deprecated)]
263            let offsets_primitive = offsets.to_primitive();
264            #[expect(deprecated)]
265            let sizes_primitive = sizes.to_primitive();
266            // Offsets and sizes are non-negative; reinterpret to unsigned to dispatch over 4 widths
267            // each (4x4 instead of 8x8). This is a read-only validation, so result types are moot.
268            let offsets_primitive =
269                offsets_primitive.reinterpret_cast(offsets_primitive.ptype().to_unsigned());
270            let sizes_primitive =
271                sizes_primitive.reinterpret_cast(sizes_primitive.ptype().to_unsigned());
272
273            // Validate the `offsets` and `sizes` arrays.
274            match_each_unsigned_integer_ptype!(offsets_primitive.ptype(), |O| {
275                match_each_unsigned_integer_ptype!(sizes_primitive.ptype(), |S| {
276                    let offsets_slice = offsets_primitive.as_slice::<O>();
277                    let sizes_slice = sizes_primitive.as_slice::<S>();
278
279                    validate_offsets_and_sizes::<O, S>(
280                        offsets_slice,
281                        sizes_slice,
282                        elements.len() as u64,
283                    )?;
284                })
285            });
286        }
287
288        Ok(())
289    }
290
291    /// Sets whether this `ListViewArray` is zero-copyable to a [`ListArray`].
292    ///
293    /// This is an optimization flag that enables more efficient conversion to [`ListArray`] without
294    /// needing to copy or reorganize the data.
295    ///
296    /// [`ListArray`]: crate::arrays::ListArray
297    ///
298    /// # Safety
299    ///
300    /// When setting `is_zctl` to `true`, the caller must ensure that the `ListViewArray` is
301    /// actually zero-copyable to a [`ListArray`]. This means:
302    ///
303    /// - Offsets must be sorted (but not strictly sorted, zero-length lists are allowed).
304    /// - `offsets[i] + sizes[i] == offsets[i + 1]` for all `i`.
305    /// - No gaps in elements between first and last referenced elements.
306    /// - No overlapping list views (each element referenced at most once).
307    ///
308    /// Note that leading and trailing unreferenced elements **ARE** allowed.
309    pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
310        self.is_zero_copy_to_list = is_zctl;
311        self
312    }
313
314    /// Returns true if the `ListViewArray` is zero-copyable to a
315    /// [`ListArray`](crate::arrays::ListArray).
316    pub fn is_zero_copy_to_list(&self) -> bool {
317        self.is_zero_copy_to_list
318    }
319}
320
321impl Default for ListViewData {
322    fn default() -> Self {
323        Self::new()
324    }
325}
326
327/// Walks parallel `(offset, size)` slices and sets each range `[offset, offset + size]` in `buf`.
328///
329/// **Preconditions**
330///
331/// `offsets` and `sizes` must be the same length (which is always the case in valid `ListViewArray`s).
332fn fill_referenced_mask<O: IntegerPType, S: IntegerPType>(
333    buf: &mut BitBufferMut,
334    offsets: &[O],
335    sizes: &[S],
336) {
337    let len = offsets.len();
338
339    assert_eq!(
340        len,
341        sizes.len(),
342        "offsets and sizes must be the same length"
343    );
344
345    for i in 0..len {
346        let start: usize = offsets[i].as_();
347        let size: usize = sizes[i].as_();
348        buf.fill_range(start, start + size, true);
349    }
350}
351
352pub trait ListViewArrayExt: TypedArrayRef<ListView> {
353    fn nullability(&self) -> crate::dtype::Nullability {
354        match self.as_ref().dtype() {
355            DType::List(_, nullability) => *nullability,
356            _ => unreachable!("ListViewArrayExt requires a list dtype"),
357        }
358    }
359
360    fn elements(&self) -> &ArrayRef {
361        self.as_ref().slots()[ELEMENTS_SLOT]
362            .as_ref()
363            .vortex_expect("ListViewArray elements slot")
364    }
365
366    fn offsets(&self) -> &ArrayRef {
367        self.as_ref().slots()[OFFSETS_SLOT]
368            .as_ref()
369            .vortex_expect("ListViewArray offsets slot")
370    }
371
372    fn sizes(&self) -> &ArrayRef {
373        self.as_ref().slots()[SIZES_SLOT]
374            .as_ref()
375            .vortex_expect("ListViewArray sizes slot")
376    }
377
378    fn listview_validity(&self) -> Validity {
379        child_to_validity(
380            self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
381            self.nullability(),
382        )
383    }
384
385    fn offset_at(&self, index: usize) -> usize {
386        assert!(
387            index < self.as_ref().len(),
388            "Index {index} out of bounds 0..{}",
389            self.as_ref().len()
390        );
391        self.offsets()
392            .as_opt::<Primitive>()
393            .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
394            .unwrap_or_else(|| {
395                self.offsets()
396                    .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
397                    .vortex_expect("offsets must support execute_scalar")
398                    .as_primitive()
399                    .as_::<usize>()
400                    .vortex_expect("offset must fit in usize")
401            })
402    }
403
404    fn size_at(&self, index: usize) -> usize {
405        assert!(
406            index < self.as_ref().len(),
407            "Index {} out of bounds 0..{}",
408            index,
409            self.as_ref().len()
410        );
411        self.sizes()
412            .as_opt::<Primitive>()
413            .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
414            .unwrap_or_else(|| {
415                self.sizes()
416                    .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
417                    .vortex_expect("sizes must support execute_scalar")
418                    .as_primitive()
419                    .as_::<usize>()
420                    .vortex_expect("size must fit in usize")
421            })
422    }
423
424    fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
425        let offset = self.offset_at(index);
426        let size = self.size_at(index);
427        self.elements().slice(offset..offset + size)
428    }
429
430    fn verify_is_zero_copy_to_list(&self) -> bool {
431        #[expect(deprecated)]
432        let offsets_primitive = self.offsets().to_primitive();
433        #[expect(deprecated)]
434        let sizes_primitive = self.sizes().to_primitive();
435        validate_zctl(self.elements(), offsets_primitive, sizes_primitive).is_ok()
436    }
437
438    /// Returns a [`Mask`] of length `elements.len()` where each bit is set iff that
439    /// position in `elements` is referenced by at least one view. Caller must ensure `elements`
440    /// is non-empty.
441    ///
442    /// Walks every `(offset, size)` pair, canonicalizes both `offsets` and `sizes`,
443    /// and allocates a `BitBuffer` of length `elements.len()`, so it is extremely costly.
444    ///
445    /// **Preconditions**
446    ///
447    /// `self.elements()` must be non-empty.
448    fn compute_referenced_elements_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult<Mask> {
449        assert!(!self.elements().is_empty());
450        let len = self.elements().len();
451
452        let offsets_primitive = self.offsets().clone().execute::<PrimitiveArray>(ctx)?;
453        let sizes_primitive = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
454
455        let mut buf = BitBufferMut::new_unset(len);
456
457        // Offsets/sizes are non-negative; reinterpret to unsigned (4x4 instead of 8x8).
458        let offsets_primitive =
459            offsets_primitive.reinterpret_cast(offsets_primitive.ptype().to_unsigned());
460        let sizes_primitive =
461            sizes_primitive.reinterpret_cast(sizes_primitive.ptype().to_unsigned());
462        match_each_unsigned_integer_ptype!(offsets_primitive.ptype(), |O| {
463            match_each_unsigned_integer_ptype!(sizes_primitive.ptype(), |S| {
464                fill_referenced_mask::<O, S>(
465                    &mut buf,
466                    offsets_primitive.as_slice::<O>(),
467                    sizes_primitive.as_slice::<S>(),
468                );
469            })
470        });
471
472        Ok(Mask::from_buffer(buf.freeze()))
473    }
474
475    /// Exact fraction of `elements` referenced by some view, in `[0.0, 1.0]`. Extremely costly.
476    ///
477    /// Returns `Ok(1.0)` when `elements` is empty instead of dividing by 0.
478    fn compute_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
479        if self.elements().is_empty() {
480            return Ok(1.0);
481        }
482
483        if self.sizes().is_empty() {
484            return Ok(0.0);
485        }
486
487        let density = match self.compute_referenced_elements_mask(ctx)? {
488            Mask::AllTrue(_) => 1.0,
489            Mask::AllFalse(_) => 0.0,
490            Mask::Values(values) => values.true_count() as f32 / self.elements().len() as f32,
491        };
492
493        Ok(density)
494    }
495
496    /// Upper-bound estimate of [`compute_density`](Self::compute_density) via
497    /// `sum(sizes) / elements.len()`, clamped to `[0.0, 1.0]`.
498    ///
499    /// Exact for non-overlapping views, but overcounts when multiple views share the same elements.
500    ///
501    /// Returns `Ok(1.0)` when `elements` is empty instead of dividing by 0.
502    fn upper_bound_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
503        let n_elts = self.elements().len();
504        if n_elts == 0 {
505            return Ok(1.0);
506        }
507
508        let sizes = self.sizes();
509        if sizes.is_empty() {
510            return Ok(0.0);
511        }
512
513        // compute_stat short-circuits on a cached exact Sum and otherwise computes
514        let sizes_sum = sizes
515            .statistics()
516            .compute_stat(Stat::Sum, ctx)?
517            .vortex_expect("sizes array has integer ptype elements")
518            .as_primitive()
519            .as_::<u64>()
520            .vortex_expect("integer ptypes can be upcast to u64");
521
522        // if the same elements are referenced more than once the estimate may be
523        // greater than 1.0, so clamp
524        let estimate = (sizes_sum as f32 / n_elts as f32).min(1.0);
525
526        debug_assert!(estimate >= 0.0);
527
528        Ok(estimate)
529    }
530
531    /// Returns the half-open range `[start, end)` of `elements` indices referenced by any view:
532    /// the minimum offset and the maximum `offset + size`. Elements outside this range are
533    /// unreferenced leading or trailing slack that a
534    /// [`TrimElements`](super::ListViewRebuildMode::TrimElements) rebuild would reclaim.
535    ///
536    /// For **zero-copy-to-list** arrays this is `O(1)`: views are sorted and non-overlapping with
537    /// no interior gaps, so the bounds are exactly `[first_offset, last_offset + last_size)`.
538    /// Otherwise it computes min/max statistics over `offsets` and `offsets + sizes`.
539    ///
540    /// # Preconditions
541    ///
542    /// The array must contain at least one list (`len() > 0`).
543    fn referenced_element_bounds(&self, ctx: &mut ExecutionCtx) -> VortexResult<(usize, usize)> {
544        let n_lists = self.as_ref().len();
545        vortex_ensure!(
546            n_lists > 0,
547            "referenced_element_bounds requires a non-empty array"
548        );
549
550        if self.is_zero_copy_to_list() {
551            let start = self.offset_at(0);
552            let end = self.offset_at(n_lists - 1) + self.size_at(n_lists - 1);
553            return Ok((start, end));
554        }
555
556        let start = self
557            .offsets()
558            .statistics()
559            .compute_min::<usize>(ctx)
560            .vortex_expect("offsets must report a usize min statistic");
561
562        // Cast offsets and sizes to the widest integer type so that `offset + size` cannot overflow
563        // the narrower input width.
564        let wide_dtype = DType::from(if self.offsets().dtype().as_ptype().is_unsigned_int() {
565            PType::U64
566        } else {
567            PType::I64
568        });
569        let offsets = self.offsets().cast(wide_dtype.clone())?;
570        let sizes = self.sizes().cast(wide_dtype)?;
571        let end = min_max(
572            &offsets.binary(sizes, Operator::Add)?,
573            ctx,
574            NumericalAggregateOpts::default(),
575        )?
576        .vortex_expect("non-empty array must report a min/max")
577        .max
578        .as_primitive()
579        .as_::<usize>()
580        .vortex_expect("max `offset + size` must fit in a usize");
581
582        Ok((start, end))
583    }
584}
585impl<T: TypedArrayRef<ListView>> ListViewArrayExt for T {}
586
587impl Array<ListView> {
588    /// Creates a new `ListViewArray`.
589    pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
590        let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
591        let len = offsets.len();
592        let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
593        ListViewData::validate(&elements, &offsets, &sizes, &validity)
594            .vortex_expect("`ListViewArray` construction failed");
595        let data = ListViewData::new();
596        unsafe {
597            Array::from_parts_unchecked(
598                ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
599            )
600        }
601    }
602
603    /// Constructs a new `ListViewArray`.
604    pub fn try_new(
605        elements: ArrayRef,
606        offsets: ArrayRef,
607        sizes: ArrayRef,
608        validity: Validity,
609    ) -> VortexResult<Self> {
610        let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
611        let len = offsets.len();
612        let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
613        ListViewData::validate(&elements, &offsets, &sizes, &validity)?;
614        let data = ListViewData::try_new()?;
615        Ok(unsafe {
616            Array::from_parts_unchecked(
617                ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
618            )
619        })
620    }
621
622    /// Creates a new `ListViewArray` without validation.
623    ///
624    /// # Safety
625    ///
626    /// See [`ListViewData::new_unchecked`].
627    pub unsafe fn new_unchecked(
628        elements: ArrayRef,
629        offsets: ArrayRef,
630        sizes: ArrayRef,
631        validity: Validity,
632    ) -> Self {
633        let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
634        let len = offsets.len();
635        let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
636        let data = unsafe { ListViewData::new_unchecked() };
637        unsafe {
638            Array::from_parts_unchecked(
639                ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
640            )
641        }
642    }
643
644    /// Mark whether this list view can be zero-copy converted to a list.
645    ///
646    /// # Safety
647    ///
648    /// See [`ListViewData::with_zero_copy_to_list`].
649    pub unsafe fn with_zero_copy_to_list(self, is_zctl: bool) -> Self {
650        if cfg!(debug_assertions) && is_zctl {
651            #[expect(deprecated)]
652            let offsets_primitive = self.offsets().to_primitive();
653            #[expect(deprecated)]
654            let sizes_primitive = self.sizes().to_primitive();
655            validate_zctl(self.elements(), offsets_primitive, sizes_primitive)
656                .vortex_expect("Failed to validate zero-copy to list flag");
657        }
658        let dtype = self.dtype().clone();
659        let len = self.len();
660        let slots: ArraySlots = self.slots().iter().cloned().collect();
661        let data = unsafe { self.into_data().with_zero_copy_to_list(is_zctl) };
662        unsafe {
663            Array::from_parts_unchecked(
664                ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
665            )
666        }
667    }
668
669    pub fn into_data_parts(self) -> ListViewDataParts {
670        let elements = self.slots()[ELEMENTS_SLOT]
671            .clone()
672            .vortex_expect("ListViewArray elements slot");
673        let offsets = self.slots()[OFFSETS_SLOT]
674            .clone()
675            .vortex_expect("ListViewArray offsets slot");
676        let sizes = self.slots()[SIZES_SLOT]
677            .clone()
678            .vortex_expect("ListViewArray sizes slot");
679        let validity = self.listview_validity();
680        ListViewDataParts {
681            elements_dtype: Arc::new(elements.dtype().clone()),
682            elements,
683            offsets,
684            sizes,
685            validity,
686        }
687    }
688}
689
690/// Helper function to validate `offsets` and `sizes` with specific types.
691fn validate_offsets_and_sizes<O, S>(
692    offsets_slice: &[O],
693    sizes_slice: &[S],
694    elements_len: u64,
695) -> VortexResult<()>
696where
697    O: IntegerPType,
698    S: IntegerPType,
699{
700    debug_assert_eq!(offsets_slice.len(), sizes_slice.len());
701
702    #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
703    for i in 0..offsets_slice.len() {
704        let offset = offsets_slice[i];
705        let size = sizes_slice[i];
706
707        vortex_ensure!(offset >= O::zero(), "cannot have negative offsets");
708        vortex_ensure!(size >= S::zero(), "cannot have negative size");
709
710        let offset_u64 = offset
711            .to_u64()
712            .ok_or_else(|| vortex_err!("offset[{i}] = {offset:?} cannot be converted to u64"))?;
713
714        let size_u64 = size
715            .to_u64()
716            .ok_or_else(|| vortex_err!("size[{i}] = {size:?} cannot be converted to u64"))?;
717
718        // Check for overflow when adding offset + size.
719        let end = offset_u64.checked_add(size_u64).ok_or_else(|| {
720            vortex_err!("offset[{i}] ({offset_u64}) + size[{i}] ({size_u64}) would overflow u64")
721        })?;
722
723        if offset_u64 == elements_len {
724            vortex_ensure!(
725                size_u64 == 0,
726                "views to the end of the elements array (length {elements_len}) must have size 0 \
727                    (had size {size_u64})"
728            );
729        }
730
731        vortex_ensure!(
732            end <= elements_len,
733            "offset[{i}] + size[{i}] = {offset_u64} + {size_u64} = {end} \
734            exceeds elements length {elements_len}",
735        );
736    }
737
738    Ok(())
739}
740
741/// Helper function to validate if the `ListViewArray` components are actually zero-copyable to
742/// [`ListArray`](crate::arrays::ListArray).
743fn validate_zctl(
744    elements: &ArrayRef,
745    offsets_primitive: PrimitiveArray,
746    sizes_primitive: PrimitiveArray,
747) -> VortexResult<()> {
748    // Offsets must be sorted (but not strictly sorted, zero-length lists are allowed), even
749    // if there are null views.
750    let mut ctx = LEGACY_SESSION.create_execution_ctx();
751    if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted(&mut ctx) {
752        vortex_ensure!(is_sorted, "offsets must be sorted");
753    } else {
754        vortex_bail!("offsets must report is_sorted statistic");
755    }
756
757    // Validate that offset[i] + size[i] <= offset[i+1] for all items
758    // This ensures views are non-overlapping and properly ordered for zero-copy-to-list
759    fn validate_monotonic_ends<O: IntegerPType, S: IntegerPType>(
760        offsets_slice: &[O],
761        sizes_slice: &[S],
762        len: usize,
763    ) -> VortexResult<()> {
764        let mut max_end = 0usize;
765
766        for i in 0..len {
767            let offset = offsets_slice[i].to_usize().unwrap_or(usize::MAX);
768            let size = sizes_slice[i].to_usize().unwrap_or(usize::MAX);
769
770            // Check that this view starts at or after the previous view ended
771            vortex_ensure!(
772                offset >= max_end,
773                "Zero-copy-to-list requires views to be non-overlapping and ordered: \
774                 view[{}] starts at {} but previous views extend to {}",
775                i,
776                offset,
777                max_end
778            );
779
780            // Update max_end for the next iteration
781            let end = offset.saturating_add(size);
782            max_end = max_end.max(end);
783        }
784
785        Ok(())
786    }
787
788    let offsets_dtype = offsets_primitive.dtype();
789    let sizes_dtype = sizes_primitive.dtype();
790    let len = offsets_primitive.len();
791
792    // Offsets/sizes are non-negative; reinterpret to unsigned (4x4 instead of 8x8).
793    let offsets_unsigned =
794        offsets_primitive.reinterpret_cast(offsets_dtype.as_ptype().to_unsigned());
795    let sizes_unsigned = sizes_primitive.reinterpret_cast(sizes_dtype.as_ptype().to_unsigned());
796
797    // Check that offset + size values are monotonic (no overlaps)
798    match_each_unsigned_integer_ptype!(offsets_unsigned.ptype(), |O| {
799        match_each_unsigned_integer_ptype!(sizes_unsigned.ptype(), |S| {
800            let offsets_slice = offsets_unsigned.as_slice::<O>();
801            let sizes_slice = sizes_unsigned.as_slice::<S>();
802
803            validate_monotonic_ends(offsets_slice, sizes_slice, len)?;
804        })
805    });
806
807    // TODO(connor)[ListView]: Making this allocation is expensive, but the more efficient
808    // implementation would be even more complicated than this. We could use a bit buffer denoting
809    // if positions in `elements` are used, and then additionally store a separate flag that tells
810    // us if a position is used more than once.
811    let mut element_references = vec![0u8; elements.len()];
812
813    fn count_references<O: IntegerPType, S: IntegerPType>(
814        element_references: &mut [u8],
815        offsets_primitive: PrimitiveArray,
816        sizes_primitive: PrimitiveArray,
817    ) {
818        let offsets_slice = offsets_primitive.as_slice::<O>();
819        let sizes_slice = sizes_primitive.as_slice::<S>();
820
821        // Note that we ignore nulls here, as the "null" view metadata must still maintain the same
822        // invariants as non-null views, even for a `bool` information.
823        for i in 0..offsets_slice.len() {
824            let offset: usize = offsets_slice[i].as_();
825            let size: usize = sizes_slice[i].as_();
826            for j in offset..offset + size {
827                element_references[j] = element_references[j].saturating_add(1);
828            }
829        }
830    }
831
832    match_each_unsigned_integer_ptype!(offsets_unsigned.ptype(), |O| {
833        match_each_unsigned_integer_ptype!(sizes_unsigned.ptype(), |S| {
834            count_references::<O, S>(&mut element_references, offsets_unsigned, sizes_unsigned);
835        })
836    });
837
838    // Allow leading and trailing unreferenced elements, but not gaps in the middle.
839    let leftmost_used = element_references
840        .iter()
841        .position(|&references| references != 0);
842    let rightmost_used = element_references
843        .iter()
844        .rposition(|&references| references != 0);
845
846    if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
847        vortex_ensure!(
848            element_references[first_ref..=last_ref]
849                .iter()
850                .all(|&references| references != 0),
851            "found gap in elements array between first and last referenced elements"
852        );
853    }
854
855    vortex_ensure!(element_references.iter().all(|&references| references <= 1));
856
857    Ok(())
858}