1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use smallvec::smallvec;
10use vortex_buffer::BitBufferMut;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_bail;
14use vortex_error::vortex_ensure;
15use vortex_error::vortex_err;
16use vortex_mask::Mask;
17
18use crate::ArrayRef;
19use crate::ArraySlots;
20use crate::ExecutionCtx;
21use crate::LEGACY_SESSION;
22#[expect(deprecated)]
23use crate::ToCanonical as _;
24use crate::VortexSessionExecute;
25use crate::aggregate_fn::fns::min_max::min_max;
26use crate::array::Array;
27use crate::array::ArrayParts;
28use crate::array::TypedArrayRef;
29use crate::array::child_to_validity;
30use crate::array::validity_to_child;
31use crate::arrays::ListView;
32use crate::arrays::Primitive;
33use crate::arrays::PrimitiveArray;
34use crate::arrays::bool;
35use crate::arrays::primitive::PrimitiveArrayExt;
36use crate::builtins::ArrayBuiltins;
37use crate::dtype::DType;
38use crate::dtype::IntegerPType;
39use crate::dtype::PType;
40use crate::expr::stats::Stat;
41use crate::match_each_integer_ptype;
42use crate::match_each_unsigned_integer_ptype;
43use crate::scalar_fn::fns::operators::Operator;
44use crate::validity::Validity;
45
46pub(super) const ELEMENTS_SLOT: usize = 0;
49pub(super) const OFFSETS_SLOT: usize = 1;
54pub(super) const SIZES_SLOT: usize = 2;
59pub(super) const VALIDITY_SLOT: usize = 3;
61pub(super) const NUM_SLOTS: usize = 4;
62pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "sizes", "validity"];
63
64#[derive(Clone, Debug)]
128pub struct ListViewData {
129 is_zero_copy_to_list: bool,
138}
139
140impl Display for ListViewData {
141 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
142 write!(f, "is_zero_copy_to_list: {}", self.is_zero_copy_to_list)
143 }
144}
145
146pub struct ListViewDataParts {
147 pub elements_dtype: Arc<DType>,
148
149 pub elements: ArrayRef,
151
152 pub offsets: ArrayRef,
154
155 pub sizes: ArrayRef,
157
158 pub validity: Validity,
160}
161
162impl ListViewData {
163 pub(crate) fn make_slots(
164 elements: &ArrayRef,
165 offsets: &ArrayRef,
166 sizes: &ArrayRef,
167 validity: &Validity,
168 len: usize,
169 ) -> ArraySlots {
170 smallvec![
171 Some(elements.clone()),
172 Some(offsets.clone()),
173 Some(sizes.clone()),
174 validity_to_child(validity, len),
175 ]
176 }
177
178 pub fn new() -> Self {
185 Self {
186 is_zero_copy_to_list: false,
187 }
188 }
189
190 pub fn try_new() -> VortexResult<Self> {
197 Ok(Self::new())
198 }
199
200 pub unsafe fn new_unchecked() -> Self {
220 Self::new()
221 }
222
223 pub fn validate(
225 elements: &ArrayRef,
226 offsets: &ArrayRef,
227 sizes: &ArrayRef,
228 validity: &Validity,
229 ) -> VortexResult<()> {
230 vortex_ensure!(
232 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
233 "offsets must be non-nullable integer array, got {}",
234 offsets.dtype()
235 );
236 vortex_ensure!(
237 sizes.dtype().is_int() && !sizes.dtype().is_nullable(),
238 "sizes must be non-nullable integer array, got {}",
239 sizes.dtype()
240 );
241
242 vortex_ensure!(
244 offsets.len() == sizes.len(),
245 "offsets and sizes must have the same length, got {} and {}",
246 offsets.len(),
247 sizes.len()
248 );
249
250 if let Some(validity_len) = validity.maybe_len() {
252 vortex_ensure!(
253 validity_len == offsets.len(),
254 "validity with size {validity_len} does not match array size {}",
255 offsets.len()
256 );
257 }
258
259 if offsets.is_host() && sizes.is_host() {
261 #[expect(deprecated)]
262 let offsets_primitive = offsets.to_primitive();
263 #[expect(deprecated)]
264 let sizes_primitive = sizes.to_primitive();
265 let offsets_primitive =
268 offsets_primitive.reinterpret_cast(offsets_primitive.ptype().to_unsigned());
269 let sizes_primitive =
270 sizes_primitive.reinterpret_cast(sizes_primitive.ptype().to_unsigned());
271
272 match_each_unsigned_integer_ptype!(offsets_primitive.ptype(), |O| {
274 match_each_unsigned_integer_ptype!(sizes_primitive.ptype(), |S| {
275 let offsets_slice = offsets_primitive.as_slice::<O>();
276 let sizes_slice = sizes_primitive.as_slice::<S>();
277
278 validate_offsets_and_sizes::<O, S>(
279 offsets_slice,
280 sizes_slice,
281 elements.len() as u64,
282 )?;
283 })
284 });
285 }
286
287 Ok(())
288 }
289
290 pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
309 self.is_zero_copy_to_list = is_zctl;
310 self
311 }
312
313 pub fn is_zero_copy_to_list(&self) -> bool {
316 self.is_zero_copy_to_list
317 }
318}
319
320impl Default for ListViewData {
321 fn default() -> Self {
322 Self::new()
323 }
324}
325
326fn fill_referenced_mask<O: IntegerPType, S: IntegerPType>(
332 buf: &mut BitBufferMut,
333 offsets: &[O],
334 sizes: &[S],
335) {
336 let len = offsets.len();
337
338 assert_eq!(
339 len,
340 sizes.len(),
341 "offsets and sizes must be the same length"
342 );
343
344 for i in 0..len {
345 let start: usize = offsets[i].as_();
346 let size: usize = sizes[i].as_();
347 buf.fill_range(start, start + size, true);
348 }
349}
350
351pub trait ListViewArrayExt: TypedArrayRef<ListView> {
352 fn nullability(&self) -> crate::dtype::Nullability {
353 match self.as_ref().dtype() {
354 DType::List(_, nullability) => *nullability,
355 _ => unreachable!("ListViewArrayExt requires a list dtype"),
356 }
357 }
358
359 fn elements(&self) -> &ArrayRef {
360 self.as_ref().slots()[ELEMENTS_SLOT]
361 .as_ref()
362 .vortex_expect("ListViewArray elements slot")
363 }
364
365 fn offsets(&self) -> &ArrayRef {
366 self.as_ref().slots()[OFFSETS_SLOT]
367 .as_ref()
368 .vortex_expect("ListViewArray offsets slot")
369 }
370
371 fn sizes(&self) -> &ArrayRef {
372 self.as_ref().slots()[SIZES_SLOT]
373 .as_ref()
374 .vortex_expect("ListViewArray sizes slot")
375 }
376
377 fn listview_validity(&self) -> Validity {
378 child_to_validity(
379 self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
380 self.nullability(),
381 )
382 }
383
384 fn offset_at(&self, index: usize) -> usize {
385 assert!(
386 index < self.as_ref().len(),
387 "Index {index} out of bounds 0..{}",
388 self.as_ref().len()
389 );
390 self.offsets()
391 .as_opt::<Primitive>()
392 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
393 .unwrap_or_else(|| {
394 self.offsets()
395 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
396 .vortex_expect("offsets must support execute_scalar")
397 .as_primitive()
398 .as_::<usize>()
399 .vortex_expect("offset must fit in usize")
400 })
401 }
402
403 fn size_at(&self, index: usize) -> usize {
404 assert!(
405 index < self.as_ref().len(),
406 "Index {} out of bounds 0..{}",
407 index,
408 self.as_ref().len()
409 );
410 self.sizes()
411 .as_opt::<Primitive>()
412 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
413 .unwrap_or_else(|| {
414 self.sizes()
415 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
416 .vortex_expect("sizes must support execute_scalar")
417 .as_primitive()
418 .as_::<usize>()
419 .vortex_expect("size must fit in usize")
420 })
421 }
422
423 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
424 let offset = self.offset_at(index);
425 let size = self.size_at(index);
426 self.elements().slice(offset..offset + size)
427 }
428
429 fn verify_is_zero_copy_to_list(&self) -> bool {
430 #[expect(deprecated)]
431 let offsets_primitive = self.offsets().to_primitive();
432 #[expect(deprecated)]
433 let sizes_primitive = self.sizes().to_primitive();
434 validate_zctl(self.elements(), offsets_primitive, sizes_primitive).is_ok()
435 }
436
437 fn compute_referenced_elements_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult<Mask> {
448 assert!(!self.elements().is_empty());
449 let len = self.elements().len();
450
451 let offsets_primitive = self.offsets().clone().execute::<PrimitiveArray>(ctx)?;
452 let sizes_primitive = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
453
454 let mut buf = BitBufferMut::new_unset(len);
455
456 let offsets_primitive =
458 offsets_primitive.reinterpret_cast(offsets_primitive.ptype().to_unsigned());
459 let sizes_primitive =
460 sizes_primitive.reinterpret_cast(sizes_primitive.ptype().to_unsigned());
461 match_each_unsigned_integer_ptype!(offsets_primitive.ptype(), |O| {
462 match_each_unsigned_integer_ptype!(sizes_primitive.ptype(), |S| {
463 fill_referenced_mask::<O, S>(
464 &mut buf,
465 offsets_primitive.as_slice::<O>(),
466 sizes_primitive.as_slice::<S>(),
467 );
468 })
469 });
470
471 Ok(Mask::from_buffer(buf.freeze()))
472 }
473
474 fn compute_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
478 if self.elements().is_empty() {
479 return Ok(1.0);
480 }
481
482 if self.sizes().is_empty() {
483 return Ok(0.0);
484 }
485
486 let density = match self.compute_referenced_elements_mask(ctx)? {
487 Mask::AllTrue(_) => 1.0,
488 Mask::AllFalse(_) => 0.0,
489 Mask::Values(values) => values.true_count() as f32 / self.elements().len() as f32,
490 };
491
492 Ok(density)
493 }
494
495 fn upper_bound_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
502 let n_elts = self.elements().len();
503 if n_elts == 0 {
504 return Ok(1.0);
505 }
506
507 let sizes = self.sizes();
508 if sizes.is_empty() {
509 return Ok(0.0);
510 }
511
512 let sizes_sum = sizes
514 .statistics()
515 .compute_stat(Stat::Sum, ctx)?
516 .vortex_expect("sizes array has integer ptype elements")
517 .as_primitive()
518 .as_::<u64>()
519 .vortex_expect("integer ptypes can be upcast to u64");
520
521 let estimate = (sizes_sum as f32 / n_elts as f32).min(1.0);
524
525 debug_assert!(estimate >= 0.0);
526
527 Ok(estimate)
528 }
529
530 fn referenced_element_bounds(&self, ctx: &mut ExecutionCtx) -> VortexResult<(usize, usize)> {
543 let n_lists = self.as_ref().len();
544 vortex_ensure!(
545 n_lists > 0,
546 "referenced_element_bounds requires a non-empty array"
547 );
548
549 if self.is_zero_copy_to_list() {
550 let start = self.offset_at(0);
551 let end = self.offset_at(n_lists - 1) + self.size_at(n_lists - 1);
552 return Ok((start, end));
553 }
554
555 let start = self
556 .offsets()
557 .statistics()
558 .compute_min::<usize>(ctx)
559 .vortex_expect("offsets must report a usize min statistic");
560
561 let wide_dtype = DType::from(if self.offsets().dtype().as_ptype().is_unsigned_int() {
564 PType::U64
565 } else {
566 PType::I64
567 });
568 let offsets = self.offsets().cast(wide_dtype.clone())?;
569 let sizes = self.sizes().cast(wide_dtype)?;
570 let end = min_max(&offsets.binary(sizes, Operator::Add)?, ctx)?
571 .vortex_expect("non-empty array must report a min/max")
572 .max
573 .as_primitive()
574 .as_::<usize>()
575 .vortex_expect("max `offset + size` must fit in a usize");
576
577 Ok((start, end))
578 }
579}
580impl<T: TypedArrayRef<ListView>> ListViewArrayExt for T {}
581
582impl Array<ListView> {
583 pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
585 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
586 let len = offsets.len();
587 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
588 ListViewData::validate(&elements, &offsets, &sizes, &validity)
589 .vortex_expect("`ListViewArray` construction failed");
590 let data = ListViewData::new();
591 unsafe {
592 Array::from_parts_unchecked(
593 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
594 )
595 }
596 }
597
598 pub fn try_new(
600 elements: ArrayRef,
601 offsets: ArrayRef,
602 sizes: ArrayRef,
603 validity: Validity,
604 ) -> VortexResult<Self> {
605 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
606 let len = offsets.len();
607 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
608 ListViewData::validate(&elements, &offsets, &sizes, &validity)?;
609 let data = ListViewData::try_new()?;
610 Ok(unsafe {
611 Array::from_parts_unchecked(
612 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
613 )
614 })
615 }
616
617 pub unsafe fn new_unchecked(
623 elements: ArrayRef,
624 offsets: ArrayRef,
625 sizes: ArrayRef,
626 validity: Validity,
627 ) -> Self {
628 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
629 let len = offsets.len();
630 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
631 let data = unsafe { ListViewData::new_unchecked() };
632 unsafe {
633 Array::from_parts_unchecked(
634 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
635 )
636 }
637 }
638
639 pub unsafe fn with_zero_copy_to_list(self, is_zctl: bool) -> Self {
645 if cfg!(debug_assertions) && is_zctl {
646 #[expect(deprecated)]
647 let offsets_primitive = self.offsets().to_primitive();
648 #[expect(deprecated)]
649 let sizes_primitive = self.sizes().to_primitive();
650 validate_zctl(self.elements(), offsets_primitive, sizes_primitive)
651 .vortex_expect("Failed to validate zero-copy to list flag");
652 }
653 let dtype = self.dtype().clone();
654 let len = self.len();
655 let slots: ArraySlots = self.slots().iter().cloned().collect();
656 let data = unsafe { self.into_data().with_zero_copy_to_list(is_zctl) };
657 unsafe {
658 Array::from_parts_unchecked(
659 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
660 )
661 }
662 }
663
664 pub fn into_data_parts(self) -> ListViewDataParts {
665 let elements = self.slots()[ELEMENTS_SLOT]
666 .clone()
667 .vortex_expect("ListViewArray elements slot");
668 let offsets = self.slots()[OFFSETS_SLOT]
669 .clone()
670 .vortex_expect("ListViewArray offsets slot");
671 let sizes = self.slots()[SIZES_SLOT]
672 .clone()
673 .vortex_expect("ListViewArray sizes slot");
674 let validity = self.listview_validity();
675 ListViewDataParts {
676 elements_dtype: Arc::new(elements.dtype().clone()),
677 elements,
678 offsets,
679 sizes,
680 validity,
681 }
682 }
683}
684
685fn validate_offsets_and_sizes<O, S>(
687 offsets_slice: &[O],
688 sizes_slice: &[S],
689 elements_len: u64,
690) -> VortexResult<()>
691where
692 O: IntegerPType,
693 S: IntegerPType,
694{
695 debug_assert_eq!(offsets_slice.len(), sizes_slice.len());
696
697 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
698 for i in 0..offsets_slice.len() {
699 let offset = offsets_slice[i];
700 let size = sizes_slice[i];
701
702 vortex_ensure!(offset >= O::zero(), "cannot have negative offsets");
703 vortex_ensure!(size >= S::zero(), "cannot have negative size");
704
705 let offset_u64 = offset
706 .to_u64()
707 .ok_or_else(|| vortex_err!("offset[{i}] = {offset:?} cannot be converted to u64"))?;
708
709 let size_u64 = size
710 .to_u64()
711 .ok_or_else(|| vortex_err!("size[{i}] = {size:?} cannot be converted to u64"))?;
712
713 let end = offset_u64.checked_add(size_u64).ok_or_else(|| {
715 vortex_err!("offset[{i}] ({offset_u64}) + size[{i}] ({size_u64}) would overflow u64")
716 })?;
717
718 if offset_u64 == elements_len {
719 vortex_ensure!(
720 size_u64 == 0,
721 "views to the end of the elements array (length {elements_len}) must have size 0 \
722 (had size {size_u64})"
723 );
724 }
725
726 vortex_ensure!(
727 end <= elements_len,
728 "offset[{i}] + size[{i}] = {offset_u64} + {size_u64} = {end} \
729 exceeds elements length {elements_len}",
730 );
731 }
732
733 Ok(())
734}
735
736fn validate_zctl(
739 elements: &ArrayRef,
740 offsets_primitive: PrimitiveArray,
741 sizes_primitive: PrimitiveArray,
742) -> VortexResult<()> {
743 let mut ctx = LEGACY_SESSION.create_execution_ctx();
746 if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted(&mut ctx) {
747 vortex_ensure!(is_sorted, "offsets must be sorted");
748 } else {
749 vortex_bail!("offsets must report is_sorted statistic");
750 }
751
752 fn validate_monotonic_ends<O: IntegerPType, S: IntegerPType>(
755 offsets_slice: &[O],
756 sizes_slice: &[S],
757 len: usize,
758 ) -> VortexResult<()> {
759 let mut max_end = 0usize;
760
761 for i in 0..len {
762 let offset = offsets_slice[i].to_usize().unwrap_or(usize::MAX);
763 let size = sizes_slice[i].to_usize().unwrap_or(usize::MAX);
764
765 vortex_ensure!(
767 offset >= max_end,
768 "Zero-copy-to-list requires views to be non-overlapping and ordered: \
769 view[{}] starts at {} but previous views extend to {}",
770 i,
771 offset,
772 max_end
773 );
774
775 let end = offset.saturating_add(size);
777 max_end = max_end.max(end);
778 }
779
780 Ok(())
781 }
782
783 let offsets_dtype = offsets_primitive.dtype();
784 let sizes_dtype = sizes_primitive.dtype();
785 let len = offsets_primitive.len();
786
787 let offsets_unsigned =
789 offsets_primitive.reinterpret_cast(offsets_dtype.as_ptype().to_unsigned());
790 let sizes_unsigned = sizes_primitive.reinterpret_cast(sizes_dtype.as_ptype().to_unsigned());
791
792 match_each_unsigned_integer_ptype!(offsets_unsigned.ptype(), |O| {
794 match_each_unsigned_integer_ptype!(sizes_unsigned.ptype(), |S| {
795 let offsets_slice = offsets_unsigned.as_slice::<O>();
796 let sizes_slice = sizes_unsigned.as_slice::<S>();
797
798 validate_monotonic_ends(offsets_slice, sizes_slice, len)?;
799 })
800 });
801
802 let mut element_references = vec![0u8; elements.len()];
807
808 fn count_references<O: IntegerPType, S: IntegerPType>(
809 element_references: &mut [u8],
810 offsets_primitive: PrimitiveArray,
811 sizes_primitive: PrimitiveArray,
812 ) {
813 let offsets_slice = offsets_primitive.as_slice::<O>();
814 let sizes_slice = sizes_primitive.as_slice::<S>();
815
816 for i in 0..offsets_slice.len() {
819 let offset: usize = offsets_slice[i].as_();
820 let size: usize = sizes_slice[i].as_();
821 for j in offset..offset + size {
822 element_references[j] = element_references[j].saturating_add(1);
823 }
824 }
825 }
826
827 match_each_unsigned_integer_ptype!(offsets_unsigned.ptype(), |O| {
828 match_each_unsigned_integer_ptype!(sizes_unsigned.ptype(), |S| {
829 count_references::<O, S>(&mut element_references, offsets_unsigned, sizes_unsigned);
830 })
831 });
832
833 let leftmost_used = element_references
835 .iter()
836 .position(|&references| references != 0);
837 let rightmost_used = element_references
838 .iter()
839 .rposition(|&references| references != 0);
840
841 if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
842 vortex_ensure!(
843 element_references[first_ref..=last_ref]
844 .iter()
845 .all(|&references| references != 0),
846 "found gap in elements array between first and last referenced elements"
847 );
848 }
849
850 vortex_ensure!(element_references.iter().all(|&references| references <= 1));
851
852 Ok(())
853}