1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::sync::Arc;
7
8use num_traits::AsPrimitive;
9use smallvec::smallvec;
10use vortex_error::VortexExpect;
11use vortex_error::VortexResult;
12use vortex_error::vortex_bail;
13use vortex_error::vortex_ensure;
14use vortex_error::vortex_err;
15
16use crate::ArrayRef;
17use crate::ArraySlots;
18use crate::LEGACY_SESSION;
19#[expect(deprecated)]
20use crate::ToCanonical as _;
21use crate::VortexSessionExecute;
22use crate::array::Array;
23use crate::array::ArrayParts;
24use crate::array::TypedArrayRef;
25use crate::array::child_to_validity;
26use crate::array::validity_to_child;
27use crate::arrays::ListView;
28use crate::arrays::Primitive;
29use crate::arrays::PrimitiveArray;
30use crate::arrays::bool;
31use crate::dtype::DType;
32use crate::dtype::IntegerPType;
33use crate::match_each_integer_ptype;
34use crate::validity::Validity;
35
36pub(super) const ELEMENTS_SLOT: usize = 0;
39pub(super) const OFFSETS_SLOT: usize = 1;
44pub(super) const SIZES_SLOT: usize = 2;
49pub(super) const VALIDITY_SLOT: usize = 3;
51pub(super) const NUM_SLOTS: usize = 4;
52pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["elements", "offsets", "sizes", "validity"];
53
54#[derive(Clone, Debug)]
118pub struct ListViewData {
119 is_zero_copy_to_list: bool,
128}
129
130impl Display for ListViewData {
131 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
132 write!(f, "is_zero_copy_to_list: {}", self.is_zero_copy_to_list)
133 }
134}
135
136pub struct ListViewDataParts {
137 pub elements_dtype: Arc<DType>,
138
139 pub elements: ArrayRef,
141
142 pub offsets: ArrayRef,
144
145 pub sizes: ArrayRef,
147
148 pub validity: Validity,
150}
151
152impl ListViewData {
153 pub(crate) fn make_slots(
154 elements: &ArrayRef,
155 offsets: &ArrayRef,
156 sizes: &ArrayRef,
157 validity: &Validity,
158 len: usize,
159 ) -> ArraySlots {
160 smallvec![
161 Some(elements.clone()),
162 Some(offsets.clone()),
163 Some(sizes.clone()),
164 validity_to_child(validity, len),
165 ]
166 }
167
168 pub fn new() -> Self {
175 Self {
176 is_zero_copy_to_list: false,
177 }
178 }
179
180 pub fn try_new() -> VortexResult<Self> {
187 Ok(Self::new())
188 }
189
190 pub unsafe fn new_unchecked() -> Self {
210 Self::new()
211 }
212
213 pub fn validate(
215 elements: &ArrayRef,
216 offsets: &ArrayRef,
217 sizes: &ArrayRef,
218 validity: &Validity,
219 ) -> VortexResult<()> {
220 vortex_ensure!(
222 offsets.dtype().is_int() && !offsets.dtype().is_nullable(),
223 "offsets must be non-nullable integer array, got {}",
224 offsets.dtype()
225 );
226 vortex_ensure!(
227 sizes.dtype().is_int() && !sizes.dtype().is_nullable(),
228 "sizes must be non-nullable integer array, got {}",
229 sizes.dtype()
230 );
231
232 vortex_ensure!(
234 offsets.len() == sizes.len(),
235 "offsets and sizes must have the same length, got {} and {}",
236 offsets.len(),
237 sizes.len()
238 );
239
240 let size_ptype = sizes.dtype().as_ptype();
242 let offset_ptype = offsets.dtype().as_ptype();
243
244 if let Some(validity_len) = validity.maybe_len() {
246 vortex_ensure!(
247 validity_len == offsets.len(),
248 "validity with size {validity_len} does not match array size {}",
249 offsets.len()
250 );
251 }
252
253 if offsets.is_host() && sizes.is_host() {
255 #[expect(deprecated)]
256 let offsets_primitive = offsets.to_primitive();
257 #[expect(deprecated)]
258 let sizes_primitive = sizes.to_primitive();
259
260 match_each_integer_ptype!(offset_ptype, |O| {
262 match_each_integer_ptype!(size_ptype, |S| {
263 let offsets_slice = offsets_primitive.as_slice::<O>();
264 let sizes_slice = sizes_primitive.as_slice::<S>();
265
266 validate_offsets_and_sizes::<O, S>(
267 offsets_slice,
268 sizes_slice,
269 elements.len() as u64,
270 )?;
271 })
272 });
273 }
274
275 Ok(())
276 }
277
278 pub unsafe fn with_zero_copy_to_list(mut self, is_zctl: bool) -> Self {
297 self.is_zero_copy_to_list = is_zctl;
298 self
299 }
300
301 pub fn is_zero_copy_to_list(&self) -> bool {
304 self.is_zero_copy_to_list
305 }
306}
307
308impl Default for ListViewData {
309 fn default() -> Self {
310 Self::new()
311 }
312}
313
314pub trait ListViewArrayExt: TypedArrayRef<ListView> {
315 fn nullability(&self) -> crate::dtype::Nullability {
316 match self.as_ref().dtype() {
317 DType::List(_, nullability) => *nullability,
318 _ => unreachable!("ListViewArrayExt requires a list dtype"),
319 }
320 }
321
322 fn elements(&self) -> &ArrayRef {
323 self.as_ref().slots()[ELEMENTS_SLOT]
324 .as_ref()
325 .vortex_expect("ListViewArray elements slot")
326 }
327
328 fn offsets(&self) -> &ArrayRef {
329 self.as_ref().slots()[OFFSETS_SLOT]
330 .as_ref()
331 .vortex_expect("ListViewArray offsets slot")
332 }
333
334 fn sizes(&self) -> &ArrayRef {
335 self.as_ref().slots()[SIZES_SLOT]
336 .as_ref()
337 .vortex_expect("ListViewArray sizes slot")
338 }
339
340 fn listview_validity(&self) -> Validity {
341 child_to_validity(
342 self.as_ref().slots()[VALIDITY_SLOT].as_ref(),
343 self.nullability(),
344 )
345 }
346
347 fn offset_at(&self, index: usize) -> usize {
348 assert!(
349 index < self.as_ref().len(),
350 "Index {index} out of bounds 0..{}",
351 self.as_ref().len()
352 );
353 self.offsets()
354 .as_opt::<Primitive>()
355 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
356 .unwrap_or_else(|| {
357 self.offsets()
358 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
359 .vortex_expect("offsets must support execute_scalar")
360 .as_primitive()
361 .as_::<usize>()
362 .vortex_expect("offset must fit in usize")
363 })
364 }
365
366 fn size_at(&self, index: usize) -> usize {
367 assert!(
368 index < self.as_ref().len(),
369 "Index {} out of bounds 0..{}",
370 index,
371 self.as_ref().len()
372 );
373 self.sizes()
374 .as_opt::<Primitive>()
375 .map(|p| match_each_integer_ptype!(p.ptype(), |P| { p.as_slice::<P>()[index].as_() }))
376 .unwrap_or_else(|| {
377 self.sizes()
378 .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx())
379 .vortex_expect("sizes must support execute_scalar")
380 .as_primitive()
381 .as_::<usize>()
382 .vortex_expect("size must fit in usize")
383 })
384 }
385
386 fn list_elements_at(&self, index: usize) -> VortexResult<ArrayRef> {
387 let offset = self.offset_at(index);
388 let size = self.size_at(index);
389 self.elements().slice(offset..offset + size)
390 }
391
392 fn verify_is_zero_copy_to_list(&self) -> bool {
393 #[expect(deprecated)]
394 let offsets_primitive = self.offsets().to_primitive();
395 #[expect(deprecated)]
396 let sizes_primitive = self.sizes().to_primitive();
397 validate_zctl(self.elements(), offsets_primitive, sizes_primitive).is_ok()
398 }
399}
400impl<T: TypedArrayRef<ListView>> ListViewArrayExt for T {}
401
402impl Array<ListView> {
403 pub fn new(elements: ArrayRef, offsets: ArrayRef, sizes: ArrayRef, validity: Validity) -> Self {
405 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
406 let len = offsets.len();
407 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
408 ListViewData::validate(&elements, &offsets, &sizes, &validity)
409 .vortex_expect("`ListViewArray` construction failed");
410 let data = ListViewData::new();
411 unsafe {
412 Array::from_parts_unchecked(
413 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
414 )
415 }
416 }
417
418 pub fn try_new(
420 elements: ArrayRef,
421 offsets: ArrayRef,
422 sizes: ArrayRef,
423 validity: Validity,
424 ) -> VortexResult<Self> {
425 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
426 let len = offsets.len();
427 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
428 ListViewData::validate(&elements, &offsets, &sizes, &validity)?;
429 let data = ListViewData::try_new()?;
430 Ok(unsafe {
431 Array::from_parts_unchecked(
432 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
433 )
434 })
435 }
436
437 pub unsafe fn new_unchecked(
443 elements: ArrayRef,
444 offsets: ArrayRef,
445 sizes: ArrayRef,
446 validity: Validity,
447 ) -> Self {
448 let dtype = DType::List(Arc::new(elements.dtype().clone()), validity.nullability());
449 let len = offsets.len();
450 let slots = ListViewData::make_slots(&elements, &offsets, &sizes, &validity, len);
451 let data = unsafe { ListViewData::new_unchecked() };
452 unsafe {
453 Array::from_parts_unchecked(
454 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
455 )
456 }
457 }
458
459 pub unsafe fn with_zero_copy_to_list(self, is_zctl: bool) -> Self {
465 if cfg!(debug_assertions) && is_zctl {
466 #[expect(deprecated)]
467 let offsets_primitive = self.offsets().to_primitive();
468 #[expect(deprecated)]
469 let sizes_primitive = self.sizes().to_primitive();
470 validate_zctl(self.elements(), offsets_primitive, sizes_primitive)
471 .vortex_expect("Failed to validate zero-copy to list flag");
472 }
473 let dtype = self.dtype().clone();
474 let len = self.len();
475 let slots: ArraySlots = self.slots().iter().cloned().collect();
476 let data = unsafe { self.into_data().with_zero_copy_to_list(is_zctl) };
477 unsafe {
478 Array::from_parts_unchecked(
479 ArrayParts::new(ListView, dtype, len, data).with_slots(slots),
480 )
481 }
482 }
483
484 pub fn into_data_parts(self) -> ListViewDataParts {
485 let elements = self.slots()[ELEMENTS_SLOT]
486 .clone()
487 .vortex_expect("ListViewArray elements slot");
488 let offsets = self.slots()[OFFSETS_SLOT]
489 .clone()
490 .vortex_expect("ListViewArray offsets slot");
491 let sizes = self.slots()[SIZES_SLOT]
492 .clone()
493 .vortex_expect("ListViewArray sizes slot");
494 let validity = self.listview_validity();
495 ListViewDataParts {
496 elements_dtype: Arc::new(elements.dtype().clone()),
497 elements,
498 offsets,
499 sizes,
500 validity,
501 }
502 }
503}
504
505fn validate_offsets_and_sizes<O, S>(
507 offsets_slice: &[O],
508 sizes_slice: &[S],
509 elements_len: u64,
510) -> VortexResult<()>
511where
512 O: IntegerPType,
513 S: IntegerPType,
514{
515 debug_assert_eq!(offsets_slice.len(), sizes_slice.len());
516
517 #[allow(clippy::absurd_extreme_comparisons, unused_comparisons)]
518 for i in 0..offsets_slice.len() {
519 let offset = offsets_slice[i];
520 let size = sizes_slice[i];
521
522 vortex_ensure!(offset >= O::zero(), "cannot have negative offsets");
523 vortex_ensure!(size >= S::zero(), "cannot have negative size");
524
525 let offset_u64 = offset
526 .to_u64()
527 .ok_or_else(|| vortex_err!("offset[{i}] = {offset:?} cannot be converted to u64"))?;
528
529 let size_u64 = size
530 .to_u64()
531 .ok_or_else(|| vortex_err!("size[{i}] = {size:?} cannot be converted to u64"))?;
532
533 let end = offset_u64.checked_add(size_u64).ok_or_else(|| {
535 vortex_err!("offset[{i}] ({offset_u64}) + size[{i}] ({size_u64}) would overflow u64")
536 })?;
537
538 if offset_u64 == elements_len {
539 vortex_ensure!(
540 size_u64 == 0,
541 "views to the end of the elements array (length {elements_len}) must have size 0 \
542 (had size {size_u64})"
543 );
544 }
545
546 vortex_ensure!(
547 end <= elements_len,
548 "offset[{i}] + size[{i}] = {offset_u64} + {size_u64} = {end} \
549 exceeds elements length {elements_len}",
550 );
551 }
552
553 Ok(())
554}
555
556fn validate_zctl(
559 elements: &ArrayRef,
560 offsets_primitive: PrimitiveArray,
561 sizes_primitive: PrimitiveArray,
562) -> VortexResult<()> {
563 let mut ctx = LEGACY_SESSION.create_execution_ctx();
566 if let Some(is_sorted) = offsets_primitive.statistics().compute_is_sorted(&mut ctx) {
567 vortex_ensure!(is_sorted, "offsets must be sorted");
568 } else {
569 vortex_bail!("offsets must report is_sorted statistic");
570 }
571
572 fn validate_monotonic_ends<O: IntegerPType, S: IntegerPType>(
575 offsets_slice: &[O],
576 sizes_slice: &[S],
577 len: usize,
578 ) -> VortexResult<()> {
579 let mut max_end = 0usize;
580
581 for i in 0..len {
582 let offset = offsets_slice[i].to_usize().unwrap_or(usize::MAX);
583 let size = sizes_slice[i].to_usize().unwrap_or(usize::MAX);
584
585 vortex_ensure!(
587 offset >= max_end,
588 "Zero-copy-to-list requires views to be non-overlapping and ordered: \
589 view[{}] starts at {} but previous views extend to {}",
590 i,
591 offset,
592 max_end
593 );
594
595 let end = offset.saturating_add(size);
597 max_end = max_end.max(end);
598 }
599
600 Ok(())
601 }
602
603 let offsets_dtype = offsets_primitive.dtype();
604 let sizes_dtype = sizes_primitive.dtype();
605 let len = offsets_primitive.len();
606
607 match_each_integer_ptype!(offsets_dtype.as_ptype(), |O| {
609 match_each_integer_ptype!(sizes_dtype.as_ptype(), |S| {
610 let offsets_slice = offsets_primitive.as_slice::<O>();
611 let sizes_slice = sizes_primitive.as_slice::<S>();
612
613 validate_monotonic_ends(offsets_slice, sizes_slice, len)?;
614 })
615 });
616
617 let mut element_references = vec![0u8; elements.len()];
622
623 fn count_references<O: IntegerPType, S: IntegerPType>(
624 element_references: &mut [u8],
625 offsets_primitive: PrimitiveArray,
626 sizes_primitive: PrimitiveArray,
627 ) {
628 let offsets_slice = offsets_primitive.as_slice::<O>();
629 let sizes_slice = sizes_primitive.as_slice::<S>();
630
631 for i in 0..offsets_slice.len() {
634 let offset: usize = offsets_slice[i].as_();
635 let size: usize = sizes_slice[i].as_();
636 for j in offset..offset + size {
637 element_references[j] = element_references[j].saturating_add(1);
638 }
639 }
640 }
641
642 match_each_integer_ptype!(offsets_primitive.ptype(), |O| {
643 match_each_integer_ptype!(sizes_primitive.ptype(), |S| {
644 count_references::<O, S>(&mut element_references, offsets_primitive, sizes_primitive);
645 })
646 });
647
648 let leftmost_used = element_references
650 .iter()
651 .position(|&references| references != 0);
652 let rightmost_used = element_references
653 .iter()
654 .rposition(|&references| references != 0);
655
656 if let (Some(first_ref), Some(last_ref)) = (leftmost_used, rightmost_used) {
657 vortex_ensure!(
658 element_references[first_ref..=last_ref]
659 .iter()
660 .all(|&references| references != 0),
661 "found gap in elements array between first and last referenced elements"
662 );
663 }
664
665 vortex_ensure!(element_references.iter().all(|&references| references <= 1));
666
667 Ok(())
668}