1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use smallvec::smallvec;
10use vortex_buffer::BitBufferMut;
11use vortex_error::VortexExpect;
12use vortex_error::VortexResult;
13use vortex_error::vortex_bail;
14use vortex_error::vortex_ensure;
15use vortex_error::vortex_err;
16use vortex_mask::Mask;
17
18use crate::ArrayRef;
19use crate::ArraySlots;
20use crate::ExecutionCtx;
21use crate::LEGACY_SESSION;
22#[expect(deprecated)]
23use crate::ToCanonical as _;
24use crate::VortexSessionExecute;
25use crate::array::Array;
26use crate::array::ArrayParts;
27use crate::array::TypedArrayRef;
28use crate::array::child_to_validity;
29use crate::array::validity_to_child;
30use crate::arrays::ListView;
31use crate::arrays::Primitive;
32use crate::arrays::PrimitiveArray;
33use crate::arrays::bool;
34use crate::dtype::DType;
35use crate::dtype::IntegerPType;
36use crate::expr::stats::Stat;
37use crate::match_each_integer_ptype;
38use crate::validity::Validity;
39
40pub(super) const ELEMENTS_SLOT: usize = 0;
43pub(super) const OFFSETS_SLOT: usize = 1;
48pub(super) const SIZES_SLOT: usize = 2;
53pub(super) const VALIDITY_SLOT: usize = 3;
55pub(super) const NUM_SLOTS: usize = 4;
56pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "sizes", "validity"];
57
58#[derive(Clone, Debug)]
122pub struct ListViewData {
123 is_zero_copy_to_list: bool,
132}
133
134impl Display for ListViewData {
135 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
136 write!(f, "is_zero_copy_to_list: {}", self.is_zero_copy_to_list)
137 }
138}
139
140pub struct ListViewDataParts {
141 pub elements_dtype: Arc<DType>,
142
143 pub elements: ArrayRef,
145
146 pub offsets: ArrayRef,
148
149 pub sizes: ArrayRef,
151
152 pub validity: Validity,
154}
155
156impl ListViewData {
157 pub(crate) fn make_slots(
158 elements: &ArrayRef,
159 offsets: &ArrayRef,
160 sizes: &ArrayRef,
161 validity: &Validity,
162 len: usize,
163 ) -> ArraySlots {
164 smallvec![
165 Some(elements.clone()),
166 Some(offsets.clone()),
167 Some(sizes.clone()),
168 validity_to_child(validity, len),
169 ]
170 }
171
172 pub fn new() -> Self {
179 Self {
180 is_zero_copy_to_list: false,
181 }
182 }
183
184 pub fn try_new() -> VortexResult<Self> {
191 Ok(Self::new())
192 }
193
194 pub unsafe fn new_unchecked() -> Self {
214 Self::new()
215 }
216
217 pub fn validate(
219 elements: &ArrayRef,
220 offsets: &ArrayRef,
221 sizes: &ArrayRef,
222 validity: &Validity,
223 ) -> VortexResult<()> {
224 vortex_ensure!(
226 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
227 "offsets must be non-nullable integer array, got {}",
228 offsets.dtype()
229 );
230 vortex_ensure!(
231 sizes.dtype().is_int() && !sizes.dtype().is_nullable(),
232 "sizes must be non-nullable integer array, got {}",
233 sizes.dtype()
234 );
235
236 vortex_ensure!(
238 offsets.len() == sizes.len(),
239 "offsets and sizes must have the same length, got {} and {}",
240 offsets.len(),
241 sizes.len()
242 );
243
244 let size_ptype = sizes.dtype().as_ptype();
246 let offset_ptype = offsets.dtype().as_ptype();
247
248 if let Some(validity_len) = validity.maybe_len() {
250 vortex_ensure!(
251 validity_len == offsets.len(),
252 "validity with size {validity_len} does not match array size {}",
253 offsets.len()
254 );
255 }
256
257 if offsets.is_host() && sizes.is_host() {
259 #[expect(deprecated)]
260 let offsets_primitive = offsets.to_primitive();
261 #[expect(deprecated)]
262 let sizes_primitive = sizes.to_primitive();
263
264 match_each_integer_ptype!(offset_ptype, |O| {
266 match_each_integer_ptype!(size_ptype, |S| {
267 let offsets_slice = offsets_primitive.as_slice::<O>();
268 let sizes_slice = sizes_primitive.as_slice::<S>();
269
270 validate_offsets_and_sizes::<O, S>(
271 offsets_slice,
272 sizes_slice,
273 elements.len() as u64,
274 )?;
275 })
276 });
277 }
278
279 Ok(())
280 }
281
282 pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
301 self.is_zero_copy_to_list = is_zctl;
302 self
303 }
304
305 pub fn is_zero_copy_to_list(&self) -> bool {
308 self.is_zero_copy_to_list
309 }
310}
311
312impl Default for ListViewData {
313 fn default() -> Self {
314 Self::new()
315 }
316}
317
318fn fill_referenced_mask<O: IntegerPType, S: IntegerPType>(
324 buf: &mut BitBufferMut,
325 offsets: &[O],
326 sizes: &[S],
327) {
328 let len = offsets.len();
329
330 assert_eq!(
331 len,
332 sizes.len(),
333 "offsets and sizes must be the same length"
334 );
335
336 for i in 0..len {
337 let start: usize = offsets[i].as_();
338 let size: usize = sizes[i].as_();
339 buf.fill_range(start, start + size, true);
340 }
341}
342
343pub trait ListViewArrayExt: TypedArrayRef<ListView> {
344 fn nullability(&self) -> crate::dtype::Nullability {
345 match self.as_ref().dtype() {
346 DType::List(_, nullability) => *nullability,
347 _ => unreachable!("ListViewArrayExt requires a list dtype"),
348 }
349 }
350
351 fn elements(&self) -> &ArrayRef {
352 self.as_ref().slots()[ELEMENTS_SLOT]
353 .as_ref()
354 .vortex_expect("ListViewArray elements slot")
355 }
356
357 fn offsets(&self) -> &ArrayRef {
358 self.as_ref().slots()[OFFSETS_SLOT]
359 .as_ref()
360 .vortex_expect("ListViewArray offsets slot")
361 }
362
363 fn sizes(&self) -> &ArrayRef {
364 self.as_ref().slots()[SIZES_SLOT]
365 .as_ref()
366 .vortex_expect("ListViewArray sizes slot")
367 }
368
369 fn listview_validity(&self) -> Validity {
370 child_to_validity(
371 self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
372 self.nullability(),
373 )
374 }
375
376 fn offset_at(&self, index: usize) -> usize {
377 assert!(
378 index < self.as_ref().len(),
379 "Index {index} out of bounds 0..{}",
380 self.as_ref().len()
381 );
382 self.offsets()
383 .as_opt::<Primitive>()
384 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
385 .unwrap_or_else(|| {
386 self.offsets()
387 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
388 .vortex_expect("offsets must support execute_scalar")
389 .as_primitive()
390 .as_::<usize>()
391 .vortex_expect("offset must fit in usize")
392 })
393 }
394
395 fn size_at(&self, index: usize) -> usize {
396 assert!(
397 index < self.as_ref().len(),
398 "Index {} out of bounds 0..{}",
399 index,
400 self.as_ref().len()
401 );
402 self.sizes()
403 .as_opt::<Primitive>()
404 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
405 .unwrap_or_else(|| {
406 self.sizes()
407 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
408 .vortex_expect("sizes must support execute_scalar")
409 .as_primitive()
410 .as_::<usize>()
411 .vortex_expect("size must fit in usize")
412 })
413 }
414
415 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
416 let offset = self.offset_at(index);
417 let size = self.size_at(index);
418 self.elements().slice(offset..offset + size)
419 }
420
421 fn verify_is_zero_copy_to_list(&self) -> bool {
422 #[expect(deprecated)]
423 let offsets_primitive = self.offsets().to_primitive();
424 #[expect(deprecated)]
425 let sizes_primitive = self.sizes().to_primitive();
426 validate_zctl(self.elements(), offsets_primitive, sizes_primitive).is_ok()
427 }
428
429 fn compute_referenced_elements_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult<Mask> {
440 assert!(!self.elements().is_empty());
441 let len = self.elements().len();
442
443 let offsets_primitive = self.offsets().clone().execute::<PrimitiveArray>(ctx)?;
444 let sizes_primitive = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
445
446 let mut buf = BitBufferMut::new_unset(len);
447
448 match_each_integer_ptype!(offsets_primitive.ptype(), |O| {
449 match_each_integer_ptype!(sizes_primitive.ptype(), |S| {
450 fill_referenced_mask::<O, S>(
451 &mut buf,
452 offsets_primitive.as_slice::<O>(),
453 sizes_primitive.as_slice::<S>(),
454 );
455 })
456 });
457
458 Ok(Mask::from_buffer(buf.freeze()))
459 }
460
461 fn compute_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
465 if self.elements().is_empty() {
466 return Ok(1.0);
467 }
468
469 if self.sizes().is_empty() {
470 return Ok(0.0);
471 }
472
473 let density = match self.compute_referenced_elements_mask(ctx)? {
474 Mask::AllTrue(_) => 1.0,
475 Mask::AllFalse(_) => 0.0,
476 Mask::Values(values) => values.true_count() as f32 / self.elements().len() as f32,
477 };
478
479 Ok(density)
480 }
481
482 fn upper_bound_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
489 let n_elts = self.elements().len();
490 if n_elts == 0 {
491 return Ok(1.0);
492 }
493
494 let sizes = self.sizes();
495 if sizes.is_empty() {
496 return Ok(0.0);
497 }
498
499 let sizes_sum = sizes
501 .statistics()
502 .compute_stat(Stat::Sum, ctx)?
503 .vortex_expect("sizes array has integer ptype elements")
504 .as_primitive()
505 .as_::<u64>()
506 .vortex_expect("integer ptypes can be upcast to u64");
507
508 let estimate = (sizes_sum as f32 / n_elts as f32).min(1.0);
511
512 debug_assert!(estimate >= 0.0);
513
514 Ok(estimate)
515 }
516}
517impl<T: TypedArrayRef<ListView>> ListViewArrayExt for T {}
518
519impl Array<ListView> {
520 pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
522 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
523 let len = offsets.len();
524 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
525 ListViewData::validate(&elements, &offsets, &sizes, &validity)
526 .vortex_expect("`ListViewArray` construction failed");
527 let data = ListViewData::new();
528 unsafe {
529 Array::from_parts_unchecked(
530 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
531 )
532 }
533 }
534
535 pub fn try_new(
537 elements: ArrayRef,
538 offsets: ArrayRef,
539 sizes: ArrayRef,
540 validity: Validity,
541 ) -> VortexResult<Self> {
542 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
543 let len = offsets.len();
544 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
545 ListViewData::validate(&elements, &offsets, &sizes, &validity)?;
546 let data = ListViewData::try_new()?;
547 Ok(unsafe {
548 Array::from_parts_unchecked(
549 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
550 )
551 })
552 }
553
554 pub unsafe fn new_unchecked(
560 elements: ArrayRef,
561 offsets: ArrayRef,
562 sizes: ArrayRef,
563 validity: Validity,
564 ) -> Self {
565 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
566 let len = offsets.len();
567 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
568 let data = unsafe { ListViewData::new_unchecked() };
569 unsafe {
570 Array::from_parts_unchecked(
571 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
572 )
573 }
574 }
575
576 pub unsafe fn with_zero_copy_to_list(self, is_zctl: bool) -> Self {
582 if cfg!(debug_assertions) && is_zctl {
583 #[expect(deprecated)]
584 let offsets_primitive = self.offsets().to_primitive();
585 #[expect(deprecated)]
586 let sizes_primitive = self.sizes().to_primitive();
587 validate_zctl(self.elements(), offsets_primitive, sizes_primitive)
588 .vortex_expect("Failed to validate zero-copy to list flag");
589 }
590 let dtype = self.dtype().clone();
591 let len = self.len();
592 let slots: ArraySlots = self.slots().iter().cloned().collect();
593 let data = unsafe { self.into_data().with_zero_copy_to_list(is_zctl) };
594 unsafe {
595 Array::from_parts_unchecked(
596 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
597 )
598 }
599 }
600
601 pub fn into_data_parts(self) -> ListViewDataParts {
602 let elements = self.slots()[ELEMENTS_SLOT]
603 .clone()
604 .vortex_expect("ListViewArray elements slot");
605 let offsets = self.slots()[OFFSETS_SLOT]
606 .clone()
607 .vortex_expect("ListViewArray offsets slot");
608 let sizes = self.slots()[SIZES_SLOT]
609 .clone()
610 .vortex_expect("ListViewArray sizes slot");
611 let validity = self.listview_validity();
612 ListViewDataParts {
613 elements_dtype: Arc::new(elements.dtype().clone()),
614 elements,
615 offsets,
616 sizes,
617 validity,
618 }
619 }
620}
621
622fn validate_offsets_and_sizes<O, S>(
624 offsets_slice: &[O],
625 sizes_slice: &[S],
626 elements_len: u64,
627) -> VortexResult<()>
628where
629 O: IntegerPType,
630 S: IntegerPType,
631{
632 debug_assert_eq!(offsets_slice.len(), sizes_slice.len());
633
634 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
635 for i in 0..offsets_slice.len() {
636 let offset = offsets_slice[i];
637 let size = sizes_slice[i];
638
639 vortex_ensure!(offset >= O::zero(), "cannot have negative offsets");
640 vortex_ensure!(size >= S::zero(), "cannot have negative size");
641
642 let offset_u64 = offset
643 .to_u64()
644 .ok_or_else(|| vortex_err!("offset[{i}] = {offset:?} cannot be converted to u64"))?;
645
646 let size_u64 = size
647 .to_u64()
648 .ok_or_else(|| vortex_err!("size[{i}] = {size:?} cannot be converted to u64"))?;
649
650 let end = offset_u64.checked_add(size_u64).ok_or_else(|| {
652 vortex_err!("offset[{i}] ({offset_u64}) + size[{i}] ({size_u64}) would overflow u64")
653 })?;
654
655 if offset_u64 == elements_len {
656 vortex_ensure!(
657 size_u64 == 0,
658 "views to the end of the elements array (length {elements_len}) must have size 0 \
659 (had size {size_u64})"
660 );
661 }
662
663 vortex_ensure!(
664 end <= elements_len,
665 "offset[{i}] + size[{i}] = {offset_u64} + {size_u64} = {end} \
666 exceeds elements length {elements_len}",
667 );
668 }
669
670 Ok(())
671}
672
673fn validate_zctl(
676 elements: &ArrayRef,
677 offsets_primitive: PrimitiveArray,
678 sizes_primitive: PrimitiveArray,
679) -> VortexResult<()> {
680 let mut ctx = LEGACY_SESSION.create_execution_ctx();
683 if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted(&mut ctx) {
684 vortex_ensure!(is_sorted, "offsets must be sorted");
685 } else {
686 vortex_bail!("offsets must report is_sorted statistic");
687 }
688
689 fn validate_monotonic_ends<O: IntegerPType, S: IntegerPType>(
692 offsets_slice: &[O],
693 sizes_slice: &[S],
694 len: usize,
695 ) -> VortexResult<()> {
696 let mut max_end = 0usize;
697
698 for i in 0..len {
699 let offset = offsets_slice[i].to_usize().unwrap_or(usize::MAX);
700 let size = sizes_slice[i].to_usize().unwrap_or(usize::MAX);
701
702 vortex_ensure!(
704 offset >= max_end,
705 "Zero-copy-to-list requires views to be non-overlapping and ordered: \
706 view[{}] starts at {} but previous views extend to {}",
707 i,
708 offset,
709 max_end
710 );
711
712 let end = offset.saturating_add(size);
714 max_end = max_end.max(end);
715 }
716
717 Ok(())
718 }
719
720 let offsets_dtype = offsets_primitive.dtype();
721 let sizes_dtype = sizes_primitive.dtype();
722 let len = offsets_primitive.len();
723
724 match_each_integer_ptype!(offsets_dtype.as_ptype(), |O| {
726 match_each_integer_ptype!(sizes_dtype.as_ptype(), |S| {
727 let offsets_slice = offsets_primitive.as_slice::<O>();
728 let sizes_slice = sizes_primitive.as_slice::<S>();
729
730 validate_monotonic_ends(offsets_slice, sizes_slice, len)?;
731 })
732 });
733
734 let mut element_references = vec![0u8; elements.len()];
739
740 fn count_references<O: IntegerPType, S: IntegerPType>(
741 element_references: &mut [u8],
742 offsets_primitive: PrimitiveArray,
743 sizes_primitive: PrimitiveArray,
744 ) {
745 let offsets_slice = offsets_primitive.as_slice::<O>();
746 let sizes_slice = sizes_primitive.as_slice::<S>();
747
748 for i in 0..offsets_slice.len() {
751 let offset: usize = offsets_slice[i].as_();
752 let size: usize = sizes_slice[i].as_();
753 for j in offset..offset + size {
754 element_references[j] = element_references[j].saturating_add(1);
755 }
756 }
757 }
758
759 match_each_integer_ptype!(offsets_primitive.ptype(), |O| {
760 match_each_integer_ptype!(sizes_primitive.ptype(), |S| {
761 count_references::<O, S>(&mut element_references, offsets_primitive, sizes_primitive);
762 })
763 });
764
765 let leftmost_used = element_references
767 .iter()
768 .position(|&references| references != 0);
769 let rightmost_used = element_references
770 .iter()
771 .rposition(|&references| references != 0);
772
773 if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
774 vortex_ensure!(
775 element_references[first_ref..=last_ref]
776 .iter()
777 .all(|&references| references != 0),
778 "found gap in elements array between first and last referenced elements"
779 );
780 }
781
782 vortex_ensure!(element_references.iter().all(|&references| references <= 1));
783
784 Ok(())
785}