vortex_array/arrays/listview/
array.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11use vortex_error::vortex_bail;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_err;
14
15use crate::ArrayRef;
16use crate::LEGACY_SESSION;
17#[expect(deprecated)]
18use crate::ToCanonical as _;
19use crate::VortexSessionExecute;
20use crate::array::Array;
21use crate::array::ArrayParts;
22use crate::array::TypedArrayRef;
23use crate::array::child_to_validity;
24use crate::array::validity_to_child;
25use crate::arrays::ListView;
26use crate::arrays::Primitive;
27use crate::arrays::PrimitiveArray;
28use crate::arrays::bool;
29use crate::dtype::DType;
30use crate::dtype::IntegerPType;
31use crate::match_each_integer_ptype;
32use crate::validity::Validity;
33
34pub(super) const ELEMENTS_SLOT: usize = 0;
37pub(super) const OFFSETS_SLOT: usize = 1;
42pub(super) const SIZES_SLOT: usize = 2;
47pub(super) const VALIDITY_SLOT: usize = 3;
49pub(super) const NUM_SLOTS: usize = 4;
50pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "sizes", "validity"];
51
52#[derive(Clone, Debug)]
116pub struct ListViewData {
117 is_zero_copy_to_list: bool,
126}
127
128impl Display for ListViewData {
129 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
130 write!(f, "is_zero_copy_to_list: {}", self.is_zero_copy_to_list)
131 }
132}
133
134pub struct ListViewDataParts {
135 pub elements_dtype: Arc<DType>,
136
137 pub elements: ArrayRef,
139
140 pub offsets: ArrayRef,
142
143 pub sizes: ArrayRef,
145
146 pub validity: Validity,
148}
149
150impl ListViewData {
151 pub(crate) fn make_slots(
152 elements: &ArrayRef,
153 offsets: &ArrayRef,
154 sizes: &ArrayRef,
155 validity: &Validity,
156 len: usize,
157 ) -> Vec<Option<ArrayRef>> {
158 vec![
159 Some(elements.clone()),
160 Some(offsets.clone()),
161 Some(sizes.clone()),
162 validity_to_child(validity, len),
163 ]
164 }
165
166 pub fn new() -> Self {
173 Self {
174 is_zero_copy_to_list: false,
175 }
176 }
177
178 pub fn try_new() -> VortexResult<Self> {
185 Ok(Self::new())
186 }
187
188 pub unsafe fn new_unchecked() -> Self {
208 Self::new()
209 }
210
211 pub fn validate(
213 elements: &ArrayRef,
214 offsets: &ArrayRef,
215 sizes: &ArrayRef,
216 validity: &Validity,
217 ) -> VortexResult<()> {
218 vortex_ensure!(
220 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
221 "offsets must be non-nullable integer array, got {}",
222 offsets.dtype()
223 );
224 vortex_ensure!(
225 sizes.dtype().is_int() && !sizes.dtype().is_nullable(),
226 "sizes must be non-nullable integer array, got {}",
227 sizes.dtype()
228 );
229
230 vortex_ensure!(
232 offsets.len() == sizes.len(),
233 "offsets and sizes must have the same length, got {} and {}",
234 offsets.len(),
235 sizes.len()
236 );
237
238 let size_ptype = sizes.dtype().as_ptype();
240 let offset_ptype = offsets.dtype().as_ptype();
241
242 if let Some(validity_len) = validity.maybe_len() {
244 vortex_ensure!(
245 validity_len == offsets.len(),
246 "validity with size {validity_len} does not match array size {}",
247 offsets.len()
248 );
249 }
250
251 if offsets.is_host() && sizes.is_host() {
253 #[expect(deprecated)]
254 let offsets_primitive = offsets.to_primitive();
255 #[expect(deprecated)]
256 let sizes_primitive = sizes.to_primitive();
257
258 match_each_integer_ptype!(offset_ptype, |O| {
260 match_each_integer_ptype!(size_ptype, |S| {
261 let offsets_slice = offsets_primitive.as_slice::<O>();
262 let sizes_slice = sizes_primitive.as_slice::<S>();
263
264 validate_offsets_and_sizes::<O, S>(
265 offsets_slice,
266 sizes_slice,
267 elements.len() as u64,
268 )?;
269 })
270 });
271 }
272
273 Ok(())
274 }
275
276 pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
295 self.is_zero_copy_to_list = is_zctl;
296 self
297 }
298
299 pub fn is_zero_copy_to_list(&self) -> bool {
302 self.is_zero_copy_to_list
303 }
304}
305
306impl Default for ListViewData {
307 fn default() -> Self {
308 Self::new()
309 }
310}
311
312pub trait ListViewArrayExt: TypedArrayRef<ListView> {
313 fn nullability(&self) -> crate::dtype::Nullability {
314 match self.as_ref().dtype() {
315 DType::List(_, nullability) => *nullability,
316 _ => unreachable!("ListViewArrayExt requires a list dtype"),
317 }
318 }
319
320 fn elements(&self) -> &ArrayRef {
321 self.as_ref().slots()[ELEMENTS_SLOT]
322 .as_ref()
323 .vortex_expect("ListViewArray elements slot")
324 }
325
326 fn offsets(&self) -> &ArrayRef {
327 self.as_ref().slots()[OFFSETS_SLOT]
328 .as_ref()
329 .vortex_expect("ListViewArray offsets slot")
330 }
331
332 fn sizes(&self) -> &ArrayRef {
333 self.as_ref().slots()[SIZES_SLOT]
334 .as_ref()
335 .vortex_expect("ListViewArray sizes slot")
336 }
337
338 fn listview_validity(&self) -> Validity {
339 child_to_validity(
340 self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
341 self.nullability(),
342 )
343 }
344
345 fn offset_at(&self, index: usize) -> usize {
346 assert!(
347 index < self.as_ref().len(),
348 "Index {index} out of bounds 0..{}",
349 self.as_ref().len()
350 );
351 self.offsets()
352 .as_opt::<Primitive>()
353 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
354 .unwrap_or_else(|| {
355 self.offsets()
356 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
357 .vortex_expect("offsets must support execute_scalar")
358 .as_primitive()
359 .as_::<usize>()
360 .vortex_expect("offset must fit in usize")
361 })
362 }
363
364 fn size_at(&self, index: usize) -> usize {
365 assert!(
366 index < self.as_ref().len(),
367 "Index {} out of bounds 0..{}",
368 index,
369 self.as_ref().len()
370 );
371 self.sizes()
372 .as_opt::<Primitive>()
373 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
374 .unwrap_or_else(|| {
375 self.sizes()
376 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
377 .vortex_expect("sizes must support execute_scalar")
378 .as_primitive()
379 .as_::<usize>()
380 .vortex_expect("size must fit in usize")
381 })
382 }
383
384 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
385 let offset = self.offset_at(index);
386 let size = self.size_at(index);
387 self.elements().slice(offset..offset + size)
388 }
389
390 fn verify_is_zero_copy_to_list(&self) -> bool {
391 #[expect(deprecated)]
392 let offsets_primitive = self.offsets().to_primitive();
393 #[expect(deprecated)]
394 let sizes_primitive = self.sizes().to_primitive();
395 validate_zctl(self.elements(), offsets_primitive, sizes_primitive).is_ok()
396 }
397}
398impl<T: TypedArrayRef<ListView>> ListViewArrayExt for T {}
399
400impl Array<ListView> {
401 pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
403 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
404 let len = offsets.len();
405 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
406 ListViewData::validate(&elements, &offsets, &sizes, &validity)
407 .vortex_expect("`ListViewArray` construction failed");
408 let data = ListViewData::new();
409 unsafe {
410 Array::from_parts_unchecked(
411 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
412 )
413 }
414 }
415
416 pub fn try_new(
418 elements: ArrayRef,
419 offsets: ArrayRef,
420 sizes: ArrayRef,
421 validity: Validity,
422 ) -> VortexResult<Self> {
423 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
424 let len = offsets.len();
425 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
426 ListViewData::validate(&elements, &offsets, &sizes, &validity)?;
427 let data = ListViewData::try_new()?;
428 Ok(unsafe {
429 Array::from_parts_unchecked(
430 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
431 )
432 })
433 }
434
435 pub unsafe fn new_unchecked(
441 elements: ArrayRef,
442 offsets: ArrayRef,
443 sizes: ArrayRef,
444 validity: Validity,
445 ) -> Self {
446 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
447 let len = offsets.len();
448 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
449 let data = unsafe { ListViewData::new_unchecked() };
450 unsafe {
451 Array::from_parts_unchecked(
452 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
453 )
454 }
455 }
456
457 pub unsafe fn with_zero_copy_to_list(self, is_zctl: bool) -> Self {
463 if cfg!(debug_assertions) && is_zctl {
464 #[expect(deprecated)]
465 let offsets_primitive = self.offsets().to_primitive();
466 #[expect(deprecated)]
467 let sizes_primitive = self.sizes().to_primitive();
468 validate_zctl(self.elements(), offsets_primitive, sizes_primitive)
469 .vortex_expect("Failed to validate zero-copy to list flag");
470 }
471 let dtype = self.dtype().clone();
472 let len = self.len();
473 let slots = self.slots().to_vec();
474 let data = unsafe { self.into_data().with_zero_copy_to_list(is_zctl) };
475 unsafe {
476 Array::from_parts_unchecked(
477 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
478 )
479 }
480 }
481
482 pub fn into_data_parts(self) -> ListViewDataParts {
483 let elements = self.slots()[ELEMENTS_SLOT]
484 .clone()
485 .vortex_expect("ListViewArray elements slot");
486 let offsets = self.slots()[OFFSETS_SLOT]
487 .clone()
488 .vortex_expect("ListViewArray offsets slot");
489 let sizes = self.slots()[SIZES_SLOT]
490 .clone()
491 .vortex_expect("ListViewArray sizes slot");
492 let validity = self.listview_validity();
493 ListViewDataParts {
494 elements_dtype: Arc::new(elements.dtype().clone()),
495 elements,
496 offsets,
497 sizes,
498 validity,
499 }
500 }
501}
502
503fn validate_offsets_and_sizes<O, S>(
505 offsets_slice: &[O],
506 sizes_slice: &[S],
507 elements_len: u64,
508) -> VortexResult<()>
509where
510 O: IntegerPType,
511 S: IntegerPType,
512{
513 debug_assert_eq!(offsets_slice.len(), sizes_slice.len());
514
515 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
516 for i in 0..offsets_slice.len() {
517 let offset = offsets_slice[i];
518 let size = sizes_slice[i];
519
520 vortex_ensure!(offset >= O::zero(), "cannot have negative offsets");
521 vortex_ensure!(size >= S::zero(), "cannot have negative size");
522
523 let offset_u64 = offset
524 .to_u64()
525 .ok_or_else(|| vortex_err!("offset[{i}] = {offset:?} cannot be converted to u64"))?;
526
527 let size_u64 = size
528 .to_u64()
529 .ok_or_else(|| vortex_err!("size[{i}] = {size:?} cannot be converted to u64"))?;
530
531 let end = offset_u64.checked_add(size_u64).ok_or_else(|| {
533 vortex_err!("offset[{i}] ({offset_u64}) + size[{i}] ({size_u64}) would overflow u64")
534 })?;
535
536 if offset_u64 == elements_len {
537 vortex_ensure!(
538 size_u64 == 0,
539 "views to the end of the elements array (length {elements_len}) must have size 0 \
540 (had size {size_u64})"
541 );
542 }
543
544 vortex_ensure!(
545 end <= elements_len,
546 "offset[{i}] + size[{i}] = {offset_u64} + {size_u64} = {end} \
547 exceeds elements length {elements_len}",
548 );
549 }
550
551 Ok(())
552}
553
554fn validate_zctl(
557 elements: &ArrayRef,
558 offsets_primitive: PrimitiveArray,
559 sizes_primitive: PrimitiveArray,
560) -> VortexResult<()> {
561 let mut ctx = LEGACY_SESSION.create_execution_ctx();
564 if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted(&mut ctx) {
565 vortex_ensure!(is_sorted, "offsets must be sorted");
566 } else {
567 vortex_bail!("offsets must report is_sorted statistic");
568 }
569
570 fn validate_monotonic_ends<O: IntegerPType, S: IntegerPType>(
573 offsets_slice: &[O],
574 sizes_slice: &[S],
575 len: usize,
576 ) -> VortexResult<()> {
577 let mut max_end = 0usize;
578
579 for i in 0..len {
580 let offset = offsets_slice[i].to_usize().unwrap_or(usize::MAX);
581 let size = sizes_slice[i].to_usize().unwrap_or(usize::MAX);
582
583 vortex_ensure!(
585 offset >= max_end,
586 "Zero-copy-to-list requires views to be non-overlapping and ordered: \
587 view[{}] starts at {} but previous views extend to {}",
588 i,
589 offset,
590 max_end
591 );
592
593 let end = offset.saturating_add(size);
595 max_end = max_end.max(end);
596 }
597
598 Ok(())
599 }
600
601 let offsets_dtype = offsets_primitive.dtype();
602 let sizes_dtype = sizes_primitive.dtype();
603 let len = offsets_primitive.len();
604
605 match_each_integer_ptype!(offsets_dtype.as_ptype(), |O| {
607 match_each_integer_ptype!(sizes_dtype.as_ptype(), |S| {
608 let offsets_slice = offsets_primitive.as_slice::<O>();
609 let sizes_slice = sizes_primitive.as_slice::<S>();
610
611 validate_monotonic_ends(offsets_slice, sizes_slice, len)?;
612 })
613 });
614
615 let mut element_references = vec![0u8; elements.len()];
620
621 fn count_references<O: IntegerPType, S: IntegerPType>(
622 element_references: &mut [u8],
623 offsets_primitive: PrimitiveArray,
624 sizes_primitive: PrimitiveArray,
625 ) {
626 let offsets_slice = offsets_primitive.as_slice::<O>();
627 let sizes_slice = sizes_primitive.as_slice::<S>();
628
629 for i in 0..offsets_slice.len() {
632 let offset: usize = offsets_slice[i].as_();
633 let size: usize = sizes_slice[i].as_();
634 for j in offset..offset + size {
635 element_references[j] = element_references[j].saturating_add(1);
636 }
637 }
638 }
639
640 match_each_integer_ptype!(offsets_primitive.ptype(), |O| {
641 match_each_integer_ptype!(sizes_primitive.ptype(), |S| {
642 count_references::<O, S>(&mut element_references, offsets_primitive, sizes_primitive);
643 })
644 });
645
646 let leftmost_used = element_references
648 .iter()
649 .position(|&references| references != 0);
650 let rightmost_used = element_references
651 .iter()
652 .rposition(|&references| references != 0);
653
654 if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
655 vortex_ensure!(
656 element_references[first_ref..=last_ref]
657 .iter()
658 .all(|&references| references != 0),
659 "found gap in elements array between first and last referenced elements"
660 );
661 }
662
663 vortex_ensure!(element_references.iter().all(|&references| references <= 1));
664
665 Ok(())
666}