1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use smallvec::smallvec;
10use vortex_buffer::BitBufferMut;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_bail;
14use vortex_error::vortex_ensure;
15use vortex_error::vortex_err;
16use vortex_mask::Mask;
17
18use crate::ArrayRef;
19use crate::ArraySlots;
20use crate::ExecutionCtx;
21use crate::LEGACY_SESSION;
22#[expect(deprecated)]
23use crate::ToCanonical as _;
24use crate::VortexSessionExecute;
25use crate::aggregate_fn::NumericalAggregateOpts;
26use crate::aggregate_fn::fns::min_max::min_max;
27use crate::array::Array;
28use crate::array::ArrayParts;
29use crate::array::TypedArrayRef;
30use crate::array::child_to_validity;
31use crate::array::validity_to_child;
32use crate::arrays::ListView;
33use crate::arrays::Primitive;
34use crate::arrays::PrimitiveArray;
35use crate::arrays::bool;
36use crate::arrays::primitive::PrimitiveArrayExt;
37use crate::builtins::ArrayBuiltins;
38use crate::dtype::DType;
39use crate::dtype::IntegerPType;
40use crate::dtype::PType;
41use crate::expr::stats::Stat;
42use crate::match_each_integer_ptype;
43use crate::match_each_unsigned_integer_ptype;
44use crate::scalar_fn::fns::operators::Operator;
45use crate::validity::Validity;
46
47pub(super) const ELEMENTS_SLOT: usize = 0;
50pub(super) const OFFSETS_SLOT: usize = 1;
55pub(super) const SIZES_SLOT: usize = 2;
60pub(super) const VALIDITY_SLOT: usize = 3;
62pub(super) const NUM_SLOTS: usize = 4;
63pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "sizes", "validity"];
64
65#[derive(Clone, Debug)]
129pub struct ListViewData {
130 is_zero_copy_to_list: bool,
139}
140
141impl Display for ListViewData {
142 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
143 write!(f, "is_zero_copy_to_list: {}", self.is_zero_copy_to_list)
144 }
145}
146
147pub struct ListViewDataParts {
148 pub elements_dtype: Arc<DType>,
149
150 pub elements: ArrayRef,
152
153 pub offsets: ArrayRef,
155
156 pub sizes: ArrayRef,
158
159 pub validity: Validity,
161}
162
163impl ListViewData {
164 pub(crate) fn make_slots(
165 elements: &ArrayRef,
166 offsets: &ArrayRef,
167 sizes: &ArrayRef,
168 validity: &Validity,
169 len: usize,
170 ) -> ArraySlots {
171 smallvec![
172 Some(elements.clone()),
173 Some(offsets.clone()),
174 Some(sizes.clone()),
175 validity_to_child(validity, len),
176 ]
177 }
178
179 pub fn new() -> Self {
186 Self {
187 is_zero_copy_to_list: false,
188 }
189 }
190
191 pub fn try_new() -> VortexResult<Self> {
198 Ok(Self::new())
199 }
200
201 pub unsafe fn new_unchecked() -> Self {
221 Self::new()
222 }
223
224 pub fn validate(
226 elements: &ArrayRef,
227 offsets: &ArrayRef,
228 sizes: &ArrayRef,
229 validity: &Validity,
230 ) -> VortexResult<()> {
231 vortex_ensure!(
233 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
234 "offsets must be non-nullable integer array, got {}",
235 offsets.dtype()
236 );
237 vortex_ensure!(
238 sizes.dtype().is_int() && !sizes.dtype().is_nullable(),
239 "sizes must be non-nullable integer array, got {}",
240 sizes.dtype()
241 );
242
243 vortex_ensure!(
245 offsets.len() == sizes.len(),
246 "offsets and sizes must have the same length, got {} and {}",
247 offsets.len(),
248 sizes.len()
249 );
250
251 if let Some(validity_len) = validity.maybe_len() {
253 vortex_ensure!(
254 validity_len == offsets.len(),
255 "validity with size {validity_len} does not match array size {}",
256 offsets.len()
257 );
258 }
259
260 if offsets.is_host() && sizes.is_host() {
262 #[expect(deprecated)]
263 let offsets_primitive = offsets.to_primitive();
264 #[expect(deprecated)]
265 let sizes_primitive = sizes.to_primitive();
266 let offsets_primitive =
269 offsets_primitive.reinterpret_cast(offsets_primitive.ptype().to_unsigned());
270 let sizes_primitive =
271 sizes_primitive.reinterpret_cast(sizes_primitive.ptype().to_unsigned());
272
273 match_each_unsigned_integer_ptype!(offsets_primitive.ptype(), |O| {
275 match_each_unsigned_integer_ptype!(sizes_primitive.ptype(), |S| {
276 let offsets_slice = offsets_primitive.as_slice::<O>();
277 let sizes_slice = sizes_primitive.as_slice::<S>();
278
279 validate_offsets_and_sizes::<O, S>(
280 offsets_slice,
281 sizes_slice,
282 elements.len() as u64,
283 )?;
284 })
285 });
286 }
287
288 Ok(())
289 }
290
291 pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
310 self.is_zero_copy_to_list = is_zctl;
311 self
312 }
313
314 pub fn is_zero_copy_to_list(&self) -> bool {
317 self.is_zero_copy_to_list
318 }
319}
320
321impl Default for ListViewData {
322 fn default() -> Self {
323 Self::new()
324 }
325}
326
327fn fill_referenced_mask<O: IntegerPType, S: IntegerPType>(
333 buf: &mut BitBufferMut,
334 offsets: &[O],
335 sizes: &[S],
336) {
337 let len = offsets.len();
338
339 assert_eq!(
340 len,
341 sizes.len(),
342 "offsets and sizes must be the same length"
343 );
344
345 for i in 0..len {
346 let start: usize = offsets[i].as_();
347 let size: usize = sizes[i].as_();
348 buf.fill_range(start, start + size, true);
349 }
350}
351
352pub trait ListViewArrayExt: TypedArrayRef<ListView> {
353 fn nullability(&self) -> crate::dtype::Nullability {
354 match self.as_ref().dtype() {
355 DType::List(_, nullability) => *nullability,
356 _ => unreachable!("ListViewArrayExt requires a list dtype"),
357 }
358 }
359
360 fn elements(&self) -> &ArrayRef {
361 self.as_ref().slots()[ELEMENTS_SLOT]
362 .as_ref()
363 .vortex_expect("ListViewArray elements slot")
364 }
365
366 fn offsets(&self) -> &ArrayRef {
367 self.as_ref().slots()[OFFSETS_SLOT]
368 .as_ref()
369 .vortex_expect("ListViewArray offsets slot")
370 }
371
372 fn sizes(&self) -> &ArrayRef {
373 self.as_ref().slots()[SIZES_SLOT]
374 .as_ref()
375 .vortex_expect("ListViewArray sizes slot")
376 }
377
378 fn listview_validity(&self) -> Validity {
379 child_to_validity(
380 self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
381 self.nullability(),
382 )
383 }
384
385 fn offset_at(&self, index: usize) -> usize {
386 assert!(
387 index < self.as_ref().len(),
388 "Index {index} out of bounds 0..{}",
389 self.as_ref().len()
390 );
391 self.offsets()
392 .as_opt::<Primitive>()
393 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
394 .unwrap_or_else(|| {
395 self.offsets()
396 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
397 .vortex_expect("offsets must support execute_scalar")
398 .as_primitive()
399 .as_::<usize>()
400 .vortex_expect("offset must fit in usize")
401 })
402 }
403
404 fn size_at(&self, index: usize) -> usize {
405 assert!(
406 index < self.as_ref().len(),
407 "Index {} out of bounds 0..{}",
408 index,
409 self.as_ref().len()
410 );
411 self.sizes()
412 .as_opt::<Primitive>()
413 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
414 .unwrap_or_else(|| {
415 self.sizes()
416 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
417 .vortex_expect("sizes must support execute_scalar")
418 .as_primitive()
419 .as_::<usize>()
420 .vortex_expect("size must fit in usize")
421 })
422 }
423
424 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
425 let offset = self.offset_at(index);
426 let size = self.size_at(index);
427 self.elements().slice(offset..offset + size)
428 }
429
430 fn verify_is_zero_copy_to_list(&self) -> bool {
431 #[expect(deprecated)]
432 let offsets_primitive = self.offsets().to_primitive();
433 #[expect(deprecated)]
434 let sizes_primitive = self.sizes().to_primitive();
435 validate_zctl(self.elements(), offsets_primitive, sizes_primitive).is_ok()
436 }
437
438 fn compute_referenced_elements_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult<Mask> {
449 assert!(!self.elements().is_empty());
450 let len = self.elements().len();
451
452 let offsets_primitive = self.offsets().clone().execute::<PrimitiveArray>(ctx)?;
453 let sizes_primitive = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
454
455 let mut buf = BitBufferMut::new_unset(len);
456
457 let offsets_primitive =
459 offsets_primitive.reinterpret_cast(offsets_primitive.ptype().to_unsigned());
460 let sizes_primitive =
461 sizes_primitive.reinterpret_cast(sizes_primitive.ptype().to_unsigned());
462 match_each_unsigned_integer_ptype!(offsets_primitive.ptype(), |O| {
463 match_each_unsigned_integer_ptype!(sizes_primitive.ptype(), |S| {
464 fill_referenced_mask::<O, S>(
465 &mut buf,
466 offsets_primitive.as_slice::<O>(),
467 sizes_primitive.as_slice::<S>(),
468 );
469 })
470 });
471
472 Ok(Mask::from_buffer(buf.freeze()))
473 }
474
475 fn compute_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
479 if self.elements().is_empty() {
480 return Ok(1.0);
481 }
482
483 if self.sizes().is_empty() {
484 return Ok(0.0);
485 }
486
487 let density = match self.compute_referenced_elements_mask(ctx)? {
488 Mask::AllTrue(_) => 1.0,
489 Mask::AllFalse(_) => 0.0,
490 Mask::Values(values) => values.true_count() as f32 / self.elements().len() as f32,
491 };
492
493 Ok(density)
494 }
495
496 fn upper_bound_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
503 let n_elts = self.elements().len();
504 if n_elts == 0 {
505 return Ok(1.0);
506 }
507
508 let sizes = self.sizes();
509 if sizes.is_empty() {
510 return Ok(0.0);
511 }
512
513 let sizes_sum = sizes
515 .statistics()
516 .compute_stat(Stat::Sum, ctx)?
517 .vortex_expect("sizes array has integer ptype elements")
518 .as_primitive()
519 .as_::<u64>()
520 .vortex_expect("integer ptypes can be upcast to u64");
521
522 let estimate = (sizes_sum as f32 / n_elts as f32).min(1.0);
525
526 debug_assert!(estimate >= 0.0);
527
528 Ok(estimate)
529 }
530
531 fn referenced_element_bounds(&self, ctx: &mut ExecutionCtx) -> VortexResult<(usize, usize)> {
544 let n_lists = self.as_ref().len();
545 vortex_ensure!(
546 n_lists > 0,
547 "referenced_element_bounds requires a non-empty array"
548 );
549
550 if self.is_zero_copy_to_list() {
551 let start = self.offset_at(0);
552 let end = self.offset_at(n_lists - 1) + self.size_at(n_lists - 1);
553 return Ok((start, end));
554 }
555
556 let start = self
557 .offsets()
558 .statistics()
559 .compute_min::<usize>(ctx)
560 .vortex_expect("offsets must report a usize min statistic");
561
562 let wide_dtype = DType::from(if self.offsets().dtype().as_ptype().is_unsigned_int() {
565 PType::U64
566 } else {
567 PType::I64
568 });
569 let offsets = self.offsets().cast(wide_dtype.clone())?;
570 let sizes = self.sizes().cast(wide_dtype)?;
571 let end = min_max(
572 &offsets.binary(sizes, Operator::Add)?,
573 ctx,
574 NumericalAggregateOpts::default(),
575 )?
576 .vortex_expect("non-empty array must report a min/max")
577 .max
578 .as_primitive()
579 .as_::<usize>()
580 .vortex_expect("max `offset + size` must fit in a usize");
581
582 Ok((start, end))
583 }
584}
585impl<T: TypedArrayRef<ListView>> ListViewArrayExt for T {}
586
587impl Array<ListView> {
588 pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
590 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
591 let len = offsets.len();
592 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
593 ListViewData::validate(&elements, &offsets, &sizes, &validity)
594 .vortex_expect("`ListViewArray` construction failed");
595 let data = ListViewData::new();
596 unsafe {
597 Array::from_parts_unchecked(
598 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
599 )
600 }
601 }
602
603 pub fn try_new(
605 elements: ArrayRef,
606 offsets: ArrayRef,
607 sizes: ArrayRef,
608 validity: Validity,
609 ) -> VortexResult<Self> {
610 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
611 let len = offsets.len();
612 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
613 ListViewData::validate(&elements, &offsets, &sizes, &validity)?;
614 let data = ListViewData::try_new()?;
615 Ok(unsafe {
616 Array::from_parts_unchecked(
617 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
618 )
619 })
620 }
621
622 pub unsafe fn new_unchecked(
628 elements: ArrayRef,
629 offsets: ArrayRef,
630 sizes: ArrayRef,
631 validity: Validity,
632 ) -> Self {
633 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
634 let len = offsets.len();
635 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
636 let data = unsafe { ListViewData::new_unchecked() };
637 unsafe {
638 Array::from_parts_unchecked(
639 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
640 )
641 }
642 }
643
644 pub unsafe fn with_zero_copy_to_list(self, is_zctl: bool) -> Self {
650 if cfg!(debug_assertions) && is_zctl {
651 #[expect(deprecated)]
652 let offsets_primitive = self.offsets().to_primitive();
653 #[expect(deprecated)]
654 let sizes_primitive = self.sizes().to_primitive();
655 validate_zctl(self.elements(), offsets_primitive, sizes_primitive)
656 .vortex_expect("Failed to validate zero-copy to list flag");
657 }
658 let dtype = self.dtype().clone();
659 let len = self.len();
660 let slots: ArraySlots = self.slots().iter().cloned().collect();
661 let data = unsafe { self.into_data().with_zero_copy_to_list(is_zctl) };
662 unsafe {
663 Array::from_parts_unchecked(
664 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
665 )
666 }
667 }
668
669 pub fn into_data_parts(self) -> ListViewDataParts {
670 let elements = self.slots()[ELEMENTS_SLOT]
671 .clone()
672 .vortex_expect("ListViewArray elements slot");
673 let offsets = self.slots()[OFFSETS_SLOT]
674 .clone()
675 .vortex_expect("ListViewArray offsets slot");
676 let sizes = self.slots()[SIZES_SLOT]
677 .clone()
678 .vortex_expect("ListViewArray sizes slot");
679 let validity = self.listview_validity();
680 ListViewDataParts {
681 elements_dtype: Arc::new(elements.dtype().clone()),
682 elements,
683 offsets,
684 sizes,
685 validity,
686 }
687 }
688}
689
690fn validate_offsets_and_sizes<O, S>(
692 offsets_slice: &[O],
693 sizes_slice: &[S],
694 elements_len: u64,
695) -> VortexResult<()>
696where
697 O: IntegerPType,
698 S: IntegerPType,
699{
700 debug_assert_eq!(offsets_slice.len(), sizes_slice.len());
701
702 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
703 for i in 0..offsets_slice.len() {
704 let offset = offsets_slice[i];
705 let size = sizes_slice[i];
706
707 vortex_ensure!(offset >= O::zero(), "cannot have negative offsets");
708 vortex_ensure!(size >= S::zero(), "cannot have negative size");
709
710 let offset_u64 = offset
711 .to_u64()
712 .ok_or_else(|| vortex_err!("offset[{i}] = {offset:?} cannot be converted to u64"))?;
713
714 let size_u64 = size
715 .to_u64()
716 .ok_or_else(|| vortex_err!("size[{i}] = {size:?} cannot be converted to u64"))?;
717
718 let end = offset_u64.checked_add(size_u64).ok_or_else(|| {
720 vortex_err!("offset[{i}] ({offset_u64}) + size[{i}] ({size_u64}) would overflow u64")
721 })?;
722
723 if offset_u64 == elements_len {
724 vortex_ensure!(
725 size_u64 == 0,
726 "views to the end of the elements array (length {elements_len}) must have size 0 \
727 (had size {size_u64})"
728 );
729 }
730
731 vortex_ensure!(
732 end <= elements_len,
733 "offset[{i}] + size[{i}] = {offset_u64} + {size_u64} = {end} \
734 exceeds elements length {elements_len}",
735 );
736 }
737
738 Ok(())
739}
740
741fn validate_zctl(
744 elements: &ArrayRef,
745 offsets_primitive: PrimitiveArray,
746 sizes_primitive: PrimitiveArray,
747) -> VortexResult<()> {
748 let mut ctx = LEGACY_SESSION.create_execution_ctx();
751 if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted(&mut ctx) {
752 vortex_ensure!(is_sorted, "offsets must be sorted");
753 } else {
754 vortex_bail!("offsets must report is_sorted statistic");
755 }
756
757 fn validate_monotonic_ends<O: IntegerPType, S: IntegerPType>(
760 offsets_slice: &[O],
761 sizes_slice: &[S],
762 len: usize,
763 ) -> VortexResult<()> {
764 let mut max_end = 0usize;
765
766 for i in 0..len {
767 let offset = offsets_slice[i].to_usize().unwrap_or(usize::MAX);
768 let size = sizes_slice[i].to_usize().unwrap_or(usize::MAX);
769
770 vortex_ensure!(
772 offset >= max_end,
773 "Zero-copy-to-list requires views to be non-overlapping and ordered: \
774 view[{}] starts at {} but previous views extend to {}",
775 i,
776 offset,
777 max_end
778 );
779
780 let end = offset.saturating_add(size);
782 max_end = max_end.max(end);
783 }
784
785 Ok(())
786 }
787
788 let offsets_dtype = offsets_primitive.dtype();
789 let sizes_dtype = sizes_primitive.dtype();
790 let len = offsets_primitive.len();
791
792 let offsets_unsigned =
794 offsets_primitive.reinterpret_cast(offsets_dtype.as_ptype().to_unsigned());
795 let sizes_unsigned = sizes_primitive.reinterpret_cast(sizes_dtype.as_ptype().to_unsigned());
796
797 match_each_unsigned_integer_ptype!(offsets_unsigned.ptype(), |O| {
799 match_each_unsigned_integer_ptype!(sizes_unsigned.ptype(), |S| {
800 let offsets_slice = offsets_unsigned.as_slice::<O>();
801 let sizes_slice = sizes_unsigned.as_slice::<S>();
802
803 validate_monotonic_ends(offsets_slice, sizes_slice, len)?;
804 })
805 });
806
807 let mut element_references = vec![0u8; elements.len()];
812
813 fn count_references<O: IntegerPType, S: IntegerPType>(
814 element_references: &mut [u8],
815 offsets_primitive: PrimitiveArray,
816 sizes_primitive: PrimitiveArray,
817 ) {
818 let offsets_slice = offsets_primitive.as_slice::<O>();
819 let sizes_slice = sizes_primitive.as_slice::<S>();
820
821 for i in 0..offsets_slice.len() {
824 let offset: usize = offsets_slice[i].as_();
825 let size: usize = sizes_slice[i].as_();
826 for j in offset..offset + size {
827 element_references[j] = element_references[j].saturating_add(1);
828 }
829 }
830 }
831
832 match_each_unsigned_integer_ptype!(offsets_unsigned.ptype(), |O| {
833 match_each_unsigned_integer_ptype!(sizes_unsigned.ptype(), |S| {
834 count_references::<O, S>(&mut element_references, offsets_unsigned, sizes_unsigned);
835 })
836 });
837
838 let leftmost_used = element_references
840 .iter()
841 .position(|&references| references != 0);
842 let rightmost_used = element_references
843 .iter()
844 .rposition(|&references| references != 0);
845
846 if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
847 vortex_ensure!(
848 element_references[first_ref..=last_ref]
849 .iter()
850 .all(|&references| references != 0),
851 "found gap in elements array between first and last referenced elements"
852 );
853 }
854
855 vortex_ensure!(element_references.iter().all(|&references| references <= 1));
856
857 Ok(())
858}