1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11use vortex_error::vortex_bail;
12use vortex_error::vortex_ensure;
13use vortex_error::vortex_err;
14
15use crate::ArrayRef;
16use crate::LEGACY_SESSION;
17use crate::ToCanonical;
18use crate::VortexSessionExecute;
19use crate::array::Array;
20use crate::array::ArrayParts;
21use crate::array::TypedArrayRef;
22use crate::array::child_to_validity;
23use crate::array::validity_to_child;
24use crate::arrays::ListView;
25use crate::arrays::Primitive;
26use crate::arrays::PrimitiveArray;
27use crate::arrays::bool;
28use crate::dtype::DType;
29use crate::dtype::IntegerPType;
30use crate::match_each_integer_ptype;
31use crate::validity::Validity;
32
33pub(super) const ELEMENTS_SLOT: usize = 0;
36pub(super) const OFFSETS_SLOT: usize = 1;
41pub(super) const SIZES_SLOT: usize = 2;
46pub(super) const VALIDITY_SLOT: usize = 3;
48pub(super) const NUM_SLOTS: usize = 4;
49pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "sizes", "validity"];
50
51#[derive(Clone, Debug)]
115pub struct ListViewData {
116 is_zero_copy_to_list: bool,
125}
126
127impl Display for ListViewData {
128 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
129 write!(f, "is_zero_copy_to_list: {}", self.is_zero_copy_to_list)
130 }
131}
132
133pub struct ListViewDataParts {
134 pub elements_dtype: Arc<DType>,
135
136 pub elements: ArrayRef,
138
139 pub offsets: ArrayRef,
141
142 pub sizes: ArrayRef,
144
145 pub validity: Validity,
147}
148
149impl ListViewData {
150 pub(crate) fn make_slots(
151 elements: &ArrayRef,
152 offsets: &ArrayRef,
153 sizes: &ArrayRef,
154 validity: &Validity,
155 len: usize,
156 ) -> Vec<Option<ArrayRef>> {
157 vec![
158 Some(elements.clone()),
159 Some(offsets.clone()),
160 Some(sizes.clone()),
161 validity_to_child(validity, len),
162 ]
163 }
164
165 pub fn new() -> Self {
172 Self {
173 is_zero_copy_to_list: false,
174 }
175 }
176
177 pub fn try_new() -> VortexResult<Self> {
184 Ok(Self::new())
185 }
186
187 pub unsafe fn new_unchecked() -> Self {
207 Self::new()
208 }
209
210 pub fn validate(
212 elements: &ArrayRef,
213 offsets: &ArrayRef,
214 sizes: &ArrayRef,
215 validity: &Validity,
216 ) -> VortexResult<()> {
217 vortex_ensure!(
219 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
220 "offsets must be non-nullable integer array, got {}",
221 offsets.dtype()
222 );
223 vortex_ensure!(
224 sizes.dtype().is_int() && !sizes.dtype().is_nullable(),
225 "sizes must be non-nullable integer array, got {}",
226 sizes.dtype()
227 );
228
229 vortex_ensure!(
231 offsets.len() == sizes.len(),
232 "offsets and sizes must have the same length, got {} and {}",
233 offsets.len(),
234 sizes.len()
235 );
236
237 let size_ptype = sizes.dtype().as_ptype();
239 let offset_ptype = offsets.dtype().as_ptype();
240
241 if let Some(validity_len) = validity.maybe_len() {
243 vortex_ensure!(
244 validity_len == offsets.len(),
245 "validity with size {validity_len} does not match array size {}",
246 offsets.len()
247 );
248 }
249
250 if offsets.is_host() && sizes.is_host() {
252 let offsets_primitive = offsets.to_primitive();
253 let sizes_primitive = sizes.to_primitive();
254
255 match_each_integer_ptype!(offset_ptype, |O| {
257 match_each_integer_ptype!(size_ptype, |S| {
258 let offsets_slice = offsets_primitive.as_slice::<O>();
259 let sizes_slice = sizes_primitive.as_slice::<S>();
260
261 validate_offsets_and_sizes::<O, S>(
262 offsets_slice,
263 sizes_slice,
264 elements.len() as u64,
265 )?;
266 })
267 });
268 }
269
270 Ok(())
271 }
272
273 pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
292 self.is_zero_copy_to_list = is_zctl;
293 self
294 }
295
296 pub fn is_zero_copy_to_list(&self) -> bool {
299 self.is_zero_copy_to_list
300 }
301}
302
303impl Default for ListViewData {
304 fn default() -> Self {
305 Self::new()
306 }
307}
308
309pub trait ListViewArrayExt: TypedArrayRef<ListView> {
310 fn nullability(&self) -> crate::dtype::Nullability {
311 match self.as_ref().dtype() {
312 DType::List(_, nullability) => *nullability,
313 _ => unreachable!("ListViewArrayExt requires a list dtype"),
314 }
315 }
316
317 fn elements(&self) -> &ArrayRef {
318 self.as_ref().slots()[ELEMENTS_SLOT]
319 .as_ref()
320 .vortex_expect("ListViewArray elements slot")
321 }
322
323 fn offsets(&self) -> &ArrayRef {
324 self.as_ref().slots()[OFFSETS_SLOT]
325 .as_ref()
326 .vortex_expect("ListViewArray offsets slot")
327 }
328
329 fn sizes(&self) -> &ArrayRef {
330 self.as_ref().slots()[SIZES_SLOT]
331 .as_ref()
332 .vortex_expect("ListViewArray sizes slot")
333 }
334
335 fn listview_validity(&self) -> Validity {
336 child_to_validity(&self.as_ref().slots()[VALIDITY_SLOT], self.nullability())
337 }
338
339 fn offset_at(&self, index: usize) -> usize {
340 assert!(
341 index < self.as_ref().len(),
342 "Index {index} out of bounds 0..{}",
343 self.as_ref().len()
344 );
345 self.offsets()
346 .as_opt::<Primitive>()
347 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
348 .unwrap_or_else(|| {
349 self.offsets()
350 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
351 .vortex_expect("offsets must support execute_scalar")
352 .as_primitive()
353 .as_::<usize>()
354 .vortex_expect("offset must fit in usize")
355 })
356 }
357
358 fn size_at(&self, index: usize) -> usize {
359 assert!(
360 index < self.as_ref().len(),
361 "Index {} out of bounds 0..{}",
362 index,
363 self.as_ref().len()
364 );
365 self.sizes()
366 .as_opt::<Primitive>()
367 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
368 .unwrap_or_else(|| {
369 self.sizes()
370 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
371 .vortex_expect("sizes must support execute_scalar")
372 .as_primitive()
373 .as_::<usize>()
374 .vortex_expect("size must fit in usize")
375 })
376 }
377
378 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
379 let offset = self.offset_at(index);
380 let size = self.size_at(index);
381 self.elements().slice(offset..offset + size)
382 }
383
384 fn verify_is_zero_copy_to_list(&self) -> bool {
385 validate_zctl(
386 self.elements(),
387 self.offsets().to_primitive(),
388 self.sizes().to_primitive(),
389 )
390 .is_ok()
391 }
392}
393impl<T: TypedArrayRef<ListView>> ListViewArrayExt for T {}
394
395impl Array<ListView> {
396 pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
398 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
399 let len = offsets.len();
400 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
401 ListViewData::validate(&elements, &offsets, &sizes, &validity)
402 .vortex_expect("`ListViewArray` construction failed");
403 let data = ListViewData::new();
404 unsafe {
405 Array::from_parts_unchecked(
406 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
407 )
408 }
409 }
410
411 pub fn try_new(
413 elements: ArrayRef,
414 offsets: ArrayRef,
415 sizes: ArrayRef,
416 validity: Validity,
417 ) -> VortexResult<Self> {
418 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
419 let len = offsets.len();
420 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
421 ListViewData::validate(&elements, &offsets, &sizes, &validity)?;
422 let data = ListViewData::try_new()?;
423 Ok(unsafe {
424 Array::from_parts_unchecked(
425 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
426 )
427 })
428 }
429
430 pub unsafe fn new_unchecked(
436 elements: ArrayRef,
437 offsets: ArrayRef,
438 sizes: ArrayRef,
439 validity: Validity,
440 ) -> Self {
441 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
442 let len = offsets.len();
443 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
444 let data = unsafe { ListViewData::new_unchecked() };
445 unsafe {
446 Array::from_parts_unchecked(
447 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
448 )
449 }
450 }
451
452 pub unsafe fn with_zero_copy_to_list(self, is_zctl: bool) -> Self {
458 if cfg!(debug_assertions) && is_zctl {
459 validate_zctl(
460 self.elements(),
461 self.offsets().to_primitive(),
462 self.sizes().to_primitive(),
463 )
464 .vortex_expect("Failed to validate zero-copy to list flag");
465 }
466 let dtype = self.dtype().clone();
467 let len = self.len();
468 let slots = self.slots().to_vec();
469 let data = unsafe { self.into_data().with_zero_copy_to_list(is_zctl) };
470 unsafe {
471 Array::from_parts_unchecked(
472 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
473 )
474 }
475 }
476
477 pub fn into_data_parts(self) -> ListViewDataParts {
478 let elements = self.slots()[ELEMENTS_SLOT]
479 .clone()
480 .vortex_expect("ListViewArray elements slot");
481 let offsets = self.slots()[OFFSETS_SLOT]
482 .clone()
483 .vortex_expect("ListViewArray offsets slot");
484 let sizes = self.slots()[SIZES_SLOT]
485 .clone()
486 .vortex_expect("ListViewArray sizes slot");
487 let validity = self.listview_validity();
488 ListViewDataParts {
489 elements_dtype: Arc::new(elements.dtype().clone()),
490 elements,
491 offsets,
492 sizes,
493 validity,
494 }
495 }
496}
497
498fn validate_offsets_and_sizes<O, S>(
500 offsets_slice: &[O],
501 sizes_slice: &[S],
502 elements_len: u64,
503) -> VortexResult<()>
504where
505 O: IntegerPType,
506 S: IntegerPType,
507{
508 debug_assert_eq!(offsets_slice.len(), sizes_slice.len());
509
510 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
511 for i in 0..offsets_slice.len() {
512 let offset = offsets_slice[i];
513 let size = sizes_slice[i];
514
515 vortex_ensure!(offset >= O::zero(), "cannot have negative offsets");
516 vortex_ensure!(size >= S::zero(), "cannot have negative size");
517
518 let offset_u64 = offset
519 .to_u64()
520 .ok_or_else(|| vortex_err!("offset[{i}] = {offset:?} cannot be converted to u64"))?;
521
522 let size_u64 = size
523 .to_u64()
524 .ok_or_else(|| vortex_err!("size[{i}] = {size:?} cannot be converted to u64"))?;
525
526 let end = offset_u64.checked_add(size_u64).ok_or_else(|| {
528 vortex_err!("offset[{i}] ({offset_u64}) + size[{i}] ({size_u64}) would overflow u64")
529 })?;
530
531 if offset_u64 == elements_len {
532 vortex_ensure!(
533 size_u64 == 0,
534 "views to the end of the elements array (length {elements_len}) must have size 0 \
535 (had size {size_u64})"
536 );
537 }
538
539 vortex_ensure!(
540 end <= elements_len,
541 "offset[{i}] + size[{i}] = {offset_u64} + {size_u64} = {end} \
542 exceeds elements length {elements_len}",
543 );
544 }
545
546 Ok(())
547}
548
549fn validate_zctl(
552 elements: &ArrayRef,
553 offsets_primitive: PrimitiveArray,
554 sizes_primitive: PrimitiveArray,
555) -> VortexResult<()> {
556 let mut ctx = LEGACY_SESSION.create_execution_ctx();
559 if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted(&mut ctx) {
560 vortex_ensure!(is_sorted, "offsets must be sorted");
561 } else {
562 vortex_bail!("offsets must report is_sorted statistic");
563 }
564
565 fn validate_monotonic_ends<O: IntegerPType, S: IntegerPType>(
568 offsets_slice: &[O],
569 sizes_slice: &[S],
570 len: usize,
571 ) -> VortexResult<()> {
572 let mut max_end = 0usize;
573
574 for i in 0..len {
575 let offset = offsets_slice[i].to_usize().unwrap_or(usize::MAX);
576 let size = sizes_slice[i].to_usize().unwrap_or(usize::MAX);
577
578 vortex_ensure!(
580 offset >= max_end,
581 "Zero-copy-to-list requires views to be non-overlapping and ordered: \
582 view[{}] starts at {} but previous views extend to {}",
583 i,
584 offset,
585 max_end
586 );
587
588 let end = offset.saturating_add(size);
590 max_end = max_end.max(end);
591 }
592
593 Ok(())
594 }
595
596 let offsets_dtype = offsets_primitive.dtype();
597 let sizes_dtype = sizes_primitive.dtype();
598 let len = offsets_primitive.len();
599
600 match_each_integer_ptype!(offsets_dtype.as_ptype(), |O| {
602 match_each_integer_ptype!(sizes_dtype.as_ptype(), |S| {
603 let offsets_slice = offsets_primitive.as_slice::<O>();
604 let sizes_slice = sizes_primitive.as_slice::<S>();
605
606 validate_monotonic_ends(offsets_slice, sizes_slice, len)?;
607 })
608 });
609
610 let mut element_references = vec![0u8; elements.len()];
615
616 fn count_references<O: IntegerPType, S: IntegerPType>(
617 element_references: &mut [u8],
618 offsets_primitive: PrimitiveArray,
619 sizes_primitive: PrimitiveArray,
620 ) {
621 let offsets_slice = offsets_primitive.as_slice::<O>();
622 let sizes_slice = sizes_primitive.as_slice::<S>();
623
624 for i in 0..offsets_slice.len() {
627 let offset: usize = offsets_slice[i].as_();
628 let size: usize = sizes_slice[i].as_();
629 for j in offset..offset + size {
630 element_references[j] = element_references[j].saturating_add(1);
631 }
632 }
633 }
634
635 match_each_integer_ptype!(offsets_primitive.ptype(), |O| {
636 match_each_integer_ptype!(sizes_primitive.ptype(), |S| {
637 count_references::<O, S>(&mut element_references, offsets_primitive, sizes_primitive);
638 })
639 });
640
641 let leftmost_used = element_references
643 .iter()
644 .position(|&references| references != 0);
645 let rightmost_used = element_references
646 .iter()
647 .rposition(|&references| references != 0);
648
649 if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
650 vortex_ensure!(
651 element_references[first_ref..=last_ref]
652 .iter()
653 .all(|&references| references != 0),
654 "found gap in elements array between first and last referenced elements"
655 );
656 }
657
658 vortex_ensure!(element_references.iter().all(|&references| references <= 1));
659
660 Ok(())
661}