vortex_array/arrays/listview/
array.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11use vortex_error::vortex_bail;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_err;
14
15use crate::ArrayRef;
16use crate::ToCanonical;
17use crate::array::Array;
18use crate::array::ArrayParts;
19use crate::array::TypedArrayRef;
20use crate::array::child_to_validity;
21use crate::array::validity_to_child;
22use crate::arrays::ListView;
23use crate::arrays::Primitive;
24use crate::arrays::PrimitiveArray;
25use crate::arrays::bool;
26use crate::dtype::DType;
27use crate::dtype::IntegerPType;
28use crate::match_each_integer_ptype;
29use crate::validity::Validity;
30
31pub(super) const ELEMENTS_SLOT: usize = 0;
34pub(super) const OFFSETS_SLOT: usize = 1;
39pub(super) const SIZES_SLOT: usize = 2;
44pub(super) const VALIDITY_SLOT: usize = 3;
46pub(super) const NUM_SLOTS: usize = 4;
47pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "sizes", "validity"];
48
49#[derive(Clone, Debug)]
113pub struct ListViewData {
114 is_zero_copy_to_list: bool,
123}
124
125impl Display for ListViewData {
126 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
127 write!(f, "is_zero_copy_to_list: {}", self.is_zero_copy_to_list)
128 }
129}
130
131pub struct ListViewDataParts {
132 pub elements_dtype: Arc<DType>,
133
134 pub elements: ArrayRef,
136
137 pub offsets: ArrayRef,
139
140 pub sizes: ArrayRef,
142
143 pub validity: Validity,
145}
146
147impl ListViewData {
148 pub(crate) fn make_slots(
149 elements: &ArrayRef,
150 offsets: &ArrayRef,
151 sizes: &ArrayRef,
152 validity: &Validity,
153 len: usize,
154 ) -> Vec<Option<ArrayRef>> {
155 vec![
156 Some(elements.clone()),
157 Some(offsets.clone()),
158 Some(sizes.clone()),
159 validity_to_child(validity, len),
160 ]
161 }
162
163 pub fn new() -> Self {
170 Self {
171 is_zero_copy_to_list: false,
172 }
173 }
174
175 pub fn try_new() -> VortexResult<Self> {
182 Ok(Self::new())
183 }
184
185 pub unsafe fn new_unchecked() -> Self {
205 Self::new()
206 }
207
208 pub fn validate(
210 elements: &ArrayRef,
211 offsets: &ArrayRef,
212 sizes: &ArrayRef,
213 validity: &Validity,
214 ) -> VortexResult<()> {
215 vortex_ensure!(
217 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
218 "offsets must be non-nullable integer array, got {}",
219 offsets.dtype()
220 );
221 vortex_ensure!(
222 sizes.dtype().is_int() && !sizes.dtype().is_nullable(),
223 "sizes must be non-nullable integer array, got {}",
224 sizes.dtype()
225 );
226
227 vortex_ensure!(
229 offsets.len() == sizes.len(),
230 "offsets and sizes must have the same length, got {} and {}",
231 offsets.len(),
232 sizes.len()
233 );
234
235 let size_ptype = sizes.dtype().as_ptype();
237 let offset_ptype = offsets.dtype().as_ptype();
238
239 if let Some(validity_len) = validity.maybe_len() {
241 vortex_ensure!(
242 validity_len == offsets.len(),
243 "validity with size {validity_len} does not match array size {}",
244 offsets.len()
245 );
246 }
247
248 if offsets.is_host() && sizes.is_host() {
250 let offsets_primitive = offsets.to_primitive();
251 let sizes_primitive = sizes.to_primitive();
252
253 match_each_integer_ptype!(offset_ptype, |O| {
255 match_each_integer_ptype!(size_ptype, |S| {
256 let offsets_slice = offsets_primitive.as_slice::<O>();
257 let sizes_slice = sizes_primitive.as_slice::<S>();
258
259 validate_offsets_and_sizes::<O, S>(
260 offsets_slice,
261 sizes_slice,
262 elements.len() as u64,
263 )?;
264 })
265 });
266 }
267
268 Ok(())
269 }
270
271 pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
290 self.is_zero_copy_to_list = is_zctl;
291 self
292 }
293
294 pub fn is_zero_copy_to_list(&self) -> bool {
297 self.is_zero_copy_to_list
298 }
299}
300
301impl Default for ListViewData {
302 fn default() -> Self {
303 Self::new()
304 }
305}
306
307pub trait ListViewArrayExt: TypedArrayRef<ListView> {
308 fn nullability(&self) -> crate::dtype::Nullability {
309 match self.as_ref().dtype() {
310 DType::List(_, nullability) => *nullability,
311 _ => unreachable!("ListViewArrayExt requires a list dtype"),
312 }
313 }
314
315 fn elements(&self) -> &ArrayRef {
316 self.as_ref().slots()[ELEMENTS_SLOT]
317 .as_ref()
318 .vortex_expect("ListViewArray elements slot")
319 }
320
321 fn offsets(&self) -> &ArrayRef {
322 self.as_ref().slots()[OFFSETS_SLOT]
323 .as_ref()
324 .vortex_expect("ListViewArray offsets slot")
325 }
326
327 fn sizes(&self) -> &ArrayRef {
328 self.as_ref().slots()[SIZES_SLOT]
329 .as_ref()
330 .vortex_expect("ListViewArray sizes slot")
331 }
332
333 fn listview_validity(&self) -> Validity {
334 child_to_validity(&self.as_ref().slots()[VALIDITY_SLOT], self.nullability())
335 }
336
337 fn listview_validity_mask(&self) -> vortex_mask::Mask {
338 self.listview_validity().to_mask(self.as_ref().len())
339 }
340
341 fn offset_at(&self, index: usize) -> usize {
342 assert!(
343 index < self.as_ref().len(),
344 "Index {index} out of bounds 0..{}",
345 self.as_ref().len()
346 );
347 self.offsets()
348 .as_opt::<Primitive>()
349 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
350 .unwrap_or_else(|| {
351 self.offsets()
352 .scalar_at(index)
353 .vortex_expect("offsets must support scalar_at")
354 .as_primitive()
355 .as_::<usize>()
356 .vortex_expect("offset must fit in usize")
357 })
358 }
359
360 fn size_at(&self, index: usize) -> usize {
361 assert!(
362 index < self.as_ref().len(),
363 "Index {} out of bounds 0..{}",
364 index,
365 self.as_ref().len()
366 );
367 self.sizes()
368 .as_opt::<Primitive>()
369 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
370 .unwrap_or_else(|| {
371 self.sizes()
372 .scalar_at(index)
373 .vortex_expect("sizes must support scalar_at")
374 .as_primitive()
375 .as_::<usize>()
376 .vortex_expect("size must fit in usize")
377 })
378 }
379
380 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
381 let offset = self.offset_at(index);
382 let size = self.size_at(index);
383 self.elements().slice(offset..offset + size)
384 }
385
386 fn verify_is_zero_copy_to_list(&self) -> bool {
387 validate_zctl(
388 self.elements(),
389 self.offsets().to_primitive(),
390 self.sizes().to_primitive(),
391 )
392 .is_ok()
393 }
394}
395impl<T: TypedArrayRef<ListView>> ListViewArrayExt for T {}
396
397impl Array<ListView> {
398 pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
400 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
401 let len = offsets.len();
402 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
403 ListViewData::validate(&elements, &offsets, &sizes, &validity)
404 .vortex_expect("`ListViewArray` construction failed");
405 let data = ListViewData::new();
406 unsafe {
407 Array::from_parts_unchecked(
408 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
409 )
410 }
411 }
412
413 pub fn try_new(
415 elements: ArrayRef,
416 offsets: ArrayRef,
417 sizes: ArrayRef,
418 validity: Validity,
419 ) -> VortexResult<Self> {
420 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
421 let len = offsets.len();
422 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
423 ListViewData::validate(&elements, &offsets, &sizes, &validity)?;
424 let data = ListViewData::try_new()?;
425 Ok(unsafe {
426 Array::from_parts_unchecked(
427 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
428 )
429 })
430 }
431
432 pub unsafe fn new_unchecked(
438 elements: ArrayRef,
439 offsets: ArrayRef,
440 sizes: ArrayRef,
441 validity: Validity,
442 ) -> Self {
443 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
444 let len = offsets.len();
445 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
446 let data = unsafe { ListViewData::new_unchecked() };
447 unsafe {
448 Array::from_parts_unchecked(
449 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
450 )
451 }
452 }
453
454 pub unsafe fn with_zero_copy_to_list(self, is_zctl: bool) -> Self {
460 if cfg!(debug_assertions) && is_zctl {
461 validate_zctl(
462 self.elements(),
463 self.offsets().to_primitive(),
464 self.sizes().to_primitive(),
465 )
466 .vortex_expect("Failed to validate zero-copy to list flag");
467 }
468 let dtype = self.dtype().clone();
469 let len = self.len();
470 let slots = self.slots().to_vec();
471 let data = unsafe { self.into_data().with_zero_copy_to_list(is_zctl) };
472 unsafe {
473 Array::from_parts_unchecked(
474 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
475 )
476 }
477 }
478
479 pub fn into_data_parts(self) -> ListViewDataParts {
480 let elements = self.slots()[ELEMENTS_SLOT]
481 .clone()
482 .vortex_expect("ListViewArray elements slot");
483 let offsets = self.slots()[OFFSETS_SLOT]
484 .clone()
485 .vortex_expect("ListViewArray offsets slot");
486 let sizes = self.slots()[SIZES_SLOT]
487 .clone()
488 .vortex_expect("ListViewArray sizes slot");
489 let validity = self.listview_validity();
490 ListViewDataParts {
491 elements_dtype: Arc::new(elements.dtype().clone()),
492 elements,
493 offsets,
494 sizes,
495 validity,
496 }
497 }
498}
499
500fn validate_offsets_and_sizes<O, S>(
502 offsets_slice: &[O],
503 sizes_slice: &[S],
504 elements_len: u64,
505) -> VortexResult<()>
506where
507 O: IntegerPType,
508 S: IntegerPType,
509{
510 debug_assert_eq!(offsets_slice.len(), sizes_slice.len());
511
512 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
513 for i in 0..offsets_slice.len() {
514 let offset = offsets_slice[i];
515 let size = sizes_slice[i];
516
517 vortex_ensure!(offset >= O::zero(), "cannot have negative offsets");
518 vortex_ensure!(size >= S::zero(), "cannot have negative size");
519
520 let offset_u64 = offset
521 .to_u64()
522 .ok_or_else(|| vortex_err!("offset[{i}] = {offset:?} cannot be converted to u64"))?;
523
524 let size_u64 = size
525 .to_u64()
526 .ok_or_else(|| vortex_err!("size[{i}] = {size:?} cannot be converted to u64"))?;
527
528 let end = offset_u64.checked_add(size_u64).ok_or_else(|| {
530 vortex_err!("offset[{i}] ({offset_u64}) + size[{i}] ({size_u64}) would overflow u64")
531 })?;
532
533 if offset_u64 == elements_len {
534 vortex_ensure!(
535 size_u64 == 0,
536 "views to the end of the elements array (length {elements_len}) must have size 0 \
537 (had size {size_u64})"
538 );
539 }
540
541 vortex_ensure!(
542 end <= elements_len,
543 "offset[{i}] + size[{i}] = {offset_u64} + {size_u64} = {end} \
544 exceeds elements length {elements_len}",
545 );
546 }
547
548 Ok(())
549}
550
551fn validate_zctl(
554 elements: &ArrayRef,
555 offsets_primitive: PrimitiveArray,
556 sizes_primitive: PrimitiveArray,
557) -> VortexResult<()> {
558 if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted() {
561 vortex_ensure!(is_sorted, "offsets must be sorted");
562 } else {
563 vortex_bail!("offsets must report is_sorted statistic");
564 }
565
566 fn validate_monotonic_ends<O: IntegerPType, S: IntegerPType>(
569 offsets_slice: &[O],
570 sizes_slice: &[S],
571 len: usize,
572 ) -> VortexResult<()> {
573 let mut max_end = 0usize;
574
575 for i in 0..len {
576 let offset = offsets_slice[i].to_usize().unwrap_or(usize::MAX);
577 let size = sizes_slice[i].to_usize().unwrap_or(usize::MAX);
578
579 vortex_ensure!(
581 offset >= max_end,
582 "Zero-copy-to-list requires views to be non-overlapping and ordered: \
583 view[{}] starts at {} but previous views extend to {}",
584 i,
585 offset,
586 max_end
587 );
588
589 let end = offset.saturating_add(size);
591 max_end = max_end.max(end);
592 }
593
594 Ok(())
595 }
596
597 let offsets_dtype = offsets_primitive.dtype();
598 let sizes_dtype = sizes_primitive.dtype();
599 let len = offsets_primitive.len();
600
601 match_each_integer_ptype!(offsets_dtype.as_ptype(), |O| {
603 match_each_integer_ptype!(sizes_dtype.as_ptype(), |S| {
604 let offsets_slice = offsets_primitive.as_slice::<O>();
605 let sizes_slice = sizes_primitive.as_slice::<S>();
606
607 validate_monotonic_ends(offsets_slice, sizes_slice, len)?;
608 })
609 });
610
611 let mut element_references = vec![0u8; elements.len()];
616
617 fn count_references<O: IntegerPType, S: IntegerPType>(
618 element_references: &mut [u8],
619 offsets_primitive: PrimitiveArray,
620 sizes_primitive: PrimitiveArray,
621 ) {
622 let offsets_slice = offsets_primitive.as_slice::<O>();
623 let sizes_slice = sizes_primitive.as_slice::<S>();
624
625 for i in 0..offsets_slice.len() {
628 let offset: usize = offsets_slice[i].as_();
629 let size: usize = sizes_slice[i].as_();
630 for j in offset..offset + size {
631 element_references[j] = element_references[j].saturating_add(1);
632 }
633 }
634 }
635
636 match_each_integer_ptype!(offsets_primitive.ptype(), |O| {
637 match_each_integer_ptype!(sizes_primitive.ptype(), |S| {
638 count_references::<O, S>(&mut element_references, offsets_primitive, sizes_primitive);
639 })
640 });
641
642 let leftmost_used = element_references
644 .iter()
645 .position(|&references| references != 0);
646 let rightmost_used = element_references
647 .iter()
648 .rposition(|&references| references != 0);
649
650 if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
651 vortex_ensure!(
652 element_references[first_ref..=last_ref]
653 .iter()
654 .all(|&references| references != 0),
655 "found gap in elements array between first and last referenced elements"
656 );
657 }
658
659 vortex_ensure!(element_references.iter().all(|&references| references <= 1));
660
661 Ok(())
662}