1use std::sync::Arc;
14
15use vortex_error::VortexExpect;
16use vortex_error::VortexResult;
17use vortex_error::vortex_ensure;
18use vortex_error::vortex_panic;
19use vortex_mask::Mask;
20
21use crate::ArrayRef;
22use crate::Canonical;
23use crate::ExecutionCtx;
24use crate::LEGACY_SESSION;
25use crate::VortexSessionExecute;
26use crate::array::IntoArray;
27use crate::arrays::ListViewArray;
28use crate::arrays::PrimitiveArray;
29use crate::arrays::listview::ListViewArrayExt;
30use crate::arrays::listview::ListViewRebuildMode;
31use crate::builders::ArrayBuilder;
32use crate::builders::DEFAULT_BUILDER_CAPACITY;
33use crate::builders::PrimitiveBuilder;
34use crate::builders::UninitRange;
35use crate::builders::builder_with_capacity;
36use crate::builders::lazy_null_builder::LazyBitBufferBuilder;
37use crate::builtins::ArrayBuiltins;
38use crate::dtype::DType;
39use crate::dtype::IntegerPType;
40use crate::dtype::Nullability;
41use crate::match_each_integer_ptype;
42use crate::scalar::ListScalar;
43use crate::scalar::Scalar;
44
45pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
55 dtype: DType,
57
58 elements_builder: Box<dyn ArrayBuilder>,
60
61 offsets_builder: PrimitiveBuilder<O>,
63
64 sizes_builder: PrimitiveBuilder<S>,
66
67 nulls: LazyBitBufferBuilder,
69}
70
71impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
72 pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
74 Self::with_capacity(
75 element_dtype,
76 nullability,
77 DEFAULT_BUILDER_CAPACITY * 2,
80 DEFAULT_BUILDER_CAPACITY,
81 )
82 }
83
84 pub fn with_capacity(
92 element_dtype: Arc<DType>,
93 nullability: Nullability,
94 elements_capacity: usize,
95 capacity: usize,
96 ) -> Self {
97 let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
98
99 let offsets_builder =
100 PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
101 let sizes_builder =
102 PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
103
104 let nulls = LazyBitBufferBuilder::new(capacity);
105
106 Self {
107 dtype: DType::List(element_dtype, nullability),
108 elements_builder,
109 offsets_builder,
110 sizes_builder,
111 nulls,
112 }
113 }
114
115 pub fn append_array_as_list(
122 &mut self,
123 array: &ArrayRef,
124 ctx: &mut ExecutionCtx,
125 ) -> VortexResult<()> {
126 vortex_ensure!(
127 array.dtype() == self.element_dtype(),
128 "Array dtype {:?} does not match list element dtype {:?}",
129 array.dtype(),
130 self.element_dtype()
131 );
132
133 let curr_offset = self.elements_builder.len();
134 let num_elements = array.len();
135
136 assert!(
139 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
140 "appending this list would cause an offset overflow"
141 );
142
143 array.append_to_builder(self.elements_builder.as_mut(), ctx)?;
144 self.nulls.append_non_null();
145
146 self.offsets_builder.append_value(
147 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
148 );
149 self.sizes_builder.append_value(
150 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
151 );
152
153 Ok(())
154 }
155
156 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
161 let Some(elements) = value.elements() else {
162 vortex_ensure!(
164 self.dtype.is_nullable(),
165 "Cannot append null value to non-nullable list builder"
166 );
167 self.append_null();
168 return Ok(());
169 };
170
171 let curr_offset = self.elements_builder.len();
172 let num_elements = elements.len();
173
174 assert!(
177 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
178 "appending this list would cause an offset overflow"
179 );
180
181 for scalar in elements {
182 self.elements_builder.append_scalar(&scalar)?;
183 }
184 self.nulls.append_non_null();
185
186 self.offsets_builder.append_value(
187 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
188 );
189 self.sizes_builder.append_value(
190 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
191 );
192
193 Ok(())
194 }
195
196 pub fn finish_into_listview(&mut self) -> ListViewArray {
198 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
199 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
200
201 let elements = self.elements_builder.finish();
202 let offsets = self.offsets_builder.finish();
203 let sizes = self.sizes_builder.finish();
204 let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
205
206 unsafe {
216 ListViewArray::new_unchecked(elements, offsets, sizes, validity)
217 .with_zero_copy_to_list(true)
218 }
219 }
220
221 pub fn element_dtype(&self) -> &DType {
224 let DType::List(element_dtype, ..) = &self.dtype else {
225 vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
226 };
227
228 element_dtype
229 }
230}
231
232impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
233 fn as_any(&self) -> &dyn std::any::Any {
234 self
235 }
236
237 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
238 self
239 }
240
241 fn dtype(&self) -> &DType {
242 &self.dtype
243 }
244
245 fn len(&self) -> usize {
246 self.offsets_builder.len()
247 }
248
249 fn append_zeros(&mut self, n: usize) {
250 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
251 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
252
253 let curr_offset = self.elements_builder.len();
255
256 for _ in 0..n {
259 self.offsets_builder.append_value(
260 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
261 );
262 self.sizes_builder.append_value(S::zero());
263 }
264
265 self.nulls.append_n_non_nulls(n);
266 }
267
268 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
269 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
270 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
271
272 let curr_offset = self.elements_builder.len();
274
275 for _ in 0..n {
277 self.offsets_builder.append_value(
278 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
279 );
280 self.sizes_builder.append_value(S::zero());
281 }
282
283 self.nulls.append_n_nulls(n);
285 }
286
287 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
288 vortex_ensure!(
289 scalar.dtype() == self.dtype(),
290 "ListViewBuilder expected scalar with dtype {}, got {}",
291 self.dtype(),
292 scalar.dtype()
293 );
294
295 let list_scalar = scalar.as_list();
296 self.append_value(list_scalar)
297 }
298
299 unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
300 let mut ctx = LEGACY_SESSION.create_execution_ctx();
305
306 let listview = array
307 .clone()
308 .execute::<ListViewArray>(&mut ctx)
309 .vortex_expect("failed to execute array into ListViewArray in extend_from_array");
310 if listview.is_empty() {
311 return;
312 }
313
314 let listview = listview
317 .rebuild(ListViewRebuildMode::MakeExact, &mut ctx)
318 .vortex_expect("ListViewArray::rebuild(MakeExact) failed in extend_from_array");
319 debug_assert!(listview.is_zero_copy_to_list());
320
321 self.nulls.append_validity_mask(
322 &array
323 .validity()
324 .vortex_expect("validity_mask in extend_from_array_unchecked")
325 .execute_mask(array.len(), &mut ctx)
326 .vortex_expect("Failed to compute validity mask"),
327 );
328
329 let old_elements_len = self.elements_builder.len();
331 self.elements_builder
332 .reserve_exact(listview.elements().len());
333 self.elements_builder.extend_from_array(listview.elements());
334 let new_elements_len = self.elements_builder.len();
335
336 let extend_length = listview.len();
338 self.sizes_builder.reserve_exact(extend_length);
339 self.offsets_builder.reserve_exact(extend_length);
340
341 let cast_sizes = listview
343 .sizes()
344 .clone()
345 .cast(self.sizes_builder.dtype().clone())
346 .vortex_expect(
347 "was somehow unable to cast the new sizes to the type of the builder sizes",
348 );
349 self.sizes_builder.extend_from_array(&cast_sizes);
350
351 let uninit_range = self.offsets_builder.uninit_range(extend_length);
355
356 let new_offsets = listview
358 .offsets()
359 .clone()
360 .execute::<PrimitiveArray>(&mut ctx)
361 .vortex_expect("failed to execute list view offsets into a PrimitiveArray");
362
363 match_each_integer_ptype!(new_offsets.ptype(), |A| {
364 adjust_and_extend_offsets::<O, A>(
365 uninit_range,
366 new_offsets,
367 old_elements_len,
368 new_elements_len,
369 );
370 })
371 }
372
373 fn reserve_exact(&mut self, capacity: usize) {
374 self.elements_builder.reserve_exact(capacity * 2);
375 self.offsets_builder.reserve_exact(capacity);
376 self.sizes_builder.reserve_exact(capacity);
377 self.nulls.reserve_exact(capacity);
378 }
379
380 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
381 self.nulls = LazyBitBufferBuilder::from_validity_mask(validity);
382 }
383
384 fn finish(&mut self) -> ArrayRef {
385 self.finish_into_listview().into_array()
386 }
387
388 fn finish_into_canonical(&mut self) -> Canonical {
389 Canonical::List(self.finish_into_listview())
390 }
391}
392
393fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
396 mut uninit_range: UninitRange<'a, O>,
397 new_offsets: PrimitiveArray,
398 old_elements_len: usize,
399 new_elements_len: usize,
400) {
401 let new_offsets_slice = new_offsets.as_slice::<A>();
402 let old_elements_len = O::from_usize(old_elements_len)
403 .vortex_expect("the old elements length did not fit into the offset type (impossible)");
404 let new_elements_len = O::from_usize(new_elements_len)
405 .vortex_expect("the current elements length did not fit into the offset type (impossible)");
406
407 for i in 0..uninit_range.len() {
408 let new_offset = O::from_usize(
409 new_offsets_slice[i]
410 .to_usize()
411 .vortex_expect("Offsets must always fit in usize"),
412 )
413 .vortex_expect("New offset somehow did not fit into the builder's offset type");
414
415 let adjusted_new_offset = new_offset + old_elements_len;
418 assert!(
419 adjusted_new_offset <= new_elements_len,
420 "[{i}/{}]: {new_offset} + {old_elements_len} \
421 = {adjusted_new_offset} <= {new_elements_len} failed",
422 uninit_range.len()
423 );
424
425 uninit_range.set_value(i, adjusted_new_offset);
426 }
427
428 unsafe { uninit_range.finish() };
431}
432
433#[cfg(test)]
434mod tests {
435 use std::sync::Arc;
436
437 use vortex_buffer::buffer;
438 use vortex_error::VortexExpect;
439
440 use super::ListViewBuilder;
441 use crate::IntoArray;
442 use crate::VortexSessionExecute;
443 use crate::array_session;
444 use crate::arrays::ListArray;
445 use crate::arrays::ListViewArray;
446 use crate::arrays::listview::ListViewArrayExt;
447 use crate::assert_arrays_eq;
448 use crate::builders::ArrayBuilder;
449 use crate::builders::listview::PrimitiveArray;
450 use crate::dtype::DType;
451 use crate::dtype::Nullability::NonNullable;
452 use crate::dtype::Nullability::Nullable;
453 use crate::dtype::PType::I32;
454 use crate::scalar::Scalar;
455 use crate::validity::Validity;
456
457 #[test]
458 fn test_empty() {
459 let mut builder =
460 ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
461
462 let listview = builder.finish();
463 assert_eq!(listview.len(), 0);
464 }
465
466 #[test]
467 fn test_basic_append_and_nulls() {
468 let mut ctx = array_session().create_execution_ctx();
469 let dtype: Arc<DType> = Arc::new(I32.into());
470 let mut builder =
471 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
472
473 builder
475 .append_value(
476 Scalar::list(
477 Arc::clone(&dtype),
478 vec![1i32.into(), 2i32.into(), 3i32.into()],
479 NonNullable,
480 )
481 .as_list(),
482 )
483 .unwrap();
484
485 builder
487 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
488 .unwrap();
489
490 builder.append_null();
492
493 builder
495 .append_value(
496 Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
497 )
498 .unwrap();
499
500 let listview = builder.finish_into_listview();
501 assert_eq!(listview.len(), 4);
502
503 assert_arrays_eq!(
505 listview.list_elements_at(0).unwrap(),
506 PrimitiveArray::from_iter([1i32, 2, 3]),
507 &mut ctx
508 );
509
510 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
512
513 assert!(
515 !listview
516 .validity()
517 .vortex_expect("listview validity should be derivable")
518 .execute_is_valid(2, &mut ctx)
519 .unwrap()
520 );
521
522 assert_arrays_eq!(
524 listview.list_elements_at(3).unwrap(),
525 PrimitiveArray::from_iter([4i32, 5]),
526 &mut ctx
527 );
528 }
529
530 #[test]
531 fn test_different_offset_size_types() {
532 let mut ctx = array_session().create_execution_ctx();
533 let dtype: Arc<DType> = Arc::new(I32.into());
535 let mut builder =
536 ListViewBuilder::<u32, u8>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
537
538 builder
539 .append_value(
540 Scalar::list(
541 Arc::clone(&dtype),
542 vec![1i32.into(), 2i32.into()],
543 NonNullable,
544 )
545 .as_list(),
546 )
547 .unwrap();
548
549 builder
550 .append_value(
551 Scalar::list(
552 dtype,
553 vec![3i32.into(), 4i32.into(), 5i32.into()],
554 NonNullable,
555 )
556 .as_list(),
557 )
558 .unwrap();
559
560 let listview = builder.finish_into_listview();
561 assert_eq!(listview.len(), 2);
562
563 assert_arrays_eq!(
565 listview.list_elements_at(0).unwrap(),
566 PrimitiveArray::from_iter([1i32, 2]),
567 &mut ctx
568 );
569
570 assert_arrays_eq!(
572 listview.list_elements_at(1).unwrap(),
573 PrimitiveArray::from_iter([3i32, 4, 5]),
574 &mut ctx
575 );
576
577 let dtype2: Arc<DType> = Arc::new(I32.into());
579 let mut builder2 =
580 ListViewBuilder::<u64, u16>::with_capacity(Arc::clone(&dtype2), NonNullable, 0, 0);
581
582 for i in 0..5 {
583 builder2
584 .append_value(
585 Scalar::list(Arc::clone(&dtype2), vec![(i * 10).into()], NonNullable).as_list(),
586 )
587 .unwrap();
588 }
589
590 let listview2 = builder2.finish_into_listview();
591 assert_eq!(listview2.len(), 5);
592
593 for i in 0..5i32 {
595 assert_arrays_eq!(
596 listview2.list_elements_at(i as usize).unwrap(),
597 PrimitiveArray::from_iter([i * 10]),
598 &mut ctx
599 );
600 }
601 }
602
603 #[test]
604 fn test_builder_trait_methods() {
605 let mut ctx = array_session().create_execution_ctx();
606 let dtype: Arc<DType> = Arc::new(I32.into());
607 let mut builder =
608 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
609
610 builder.append_zeros(2);
612 assert_eq!(builder.len(), 2);
613
614 unsafe {
616 builder.append_nulls_unchecked(2);
617 }
618 assert_eq!(builder.len(), 4);
619
620 let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
622 builder.append_scalar(&list_scalar).unwrap();
623 assert_eq!(builder.len(), 5);
624
625 let listview = builder.finish_into_listview();
626 assert_eq!(listview.len(), 5);
627
628 assert_eq!(listview.list_elements_at(0).unwrap().len(), 0);
630 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
631
632 assert!(
634 !listview
635 .validity()
636 .vortex_expect("listview validity should be derivable")
637 .execute_is_valid(2, &mut ctx)
638 .unwrap()
639 );
640 assert!(
641 !listview
642 .validity()
643 .vortex_expect("listview validity should be derivable")
644 .execute_is_valid(3, &mut ctx)
645 .unwrap()
646 );
647
648 assert_arrays_eq!(
650 listview.list_elements_at(4).unwrap(),
651 PrimitiveArray::from_iter([10i32, 20]),
652 &mut ctx
653 );
654 }
655
656 #[test]
657 fn test_extend_from_array() {
658 let mut ctx = array_session().create_execution_ctx();
659 let dtype: Arc<DType> = Arc::new(I32.into());
660
661 let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
663 [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
664 Arc::new(I32.into()),
665 )
666 .unwrap();
667
668 let mut builder =
669 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
670
671 builder
673 .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
674 .unwrap();
675
676 builder.extend_from_array(&source.into_array());
678
679 let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
681 std::iter::empty::<Option<Vec<i32>>>(),
682 Arc::new(I32.into()),
683 )
684 .unwrap();
685 builder.extend_from_array(&empty_source.into_array());
686
687 let listview = builder.finish_into_listview();
688 assert_eq!(listview.len(), 4);
689
690 assert_arrays_eq!(
693 listview.list_elements_at(0).unwrap(),
694 PrimitiveArray::from_iter([0i32]),
695 &mut ctx
696 );
697
698 assert_arrays_eq!(
700 listview.list_elements_at(1).unwrap(),
701 PrimitiveArray::from_iter([1i32, 2, 3]),
702 &mut ctx
703 );
704
705 assert!(
707 !listview
708 .validity()
709 .vortex_expect("listview validity should be derivable")
710 .execute_is_valid(2, &mut ctx)
711 .unwrap()
712 );
713
714 assert_arrays_eq!(
716 listview.list_elements_at(3).unwrap(),
717 PrimitiveArray::from_iter([4i32, 5]),
718 &mut ctx
719 );
720 }
721
722 #[test]
723 fn test_extend_from_array_overlapping_listview() {
724 let mut ctx = array_session().create_execution_ctx();
725 let dtype: Arc<DType> = Arc::new(I32.into());
726
727 let source = unsafe {
732 ListViewArray::new_unchecked(
733 buffer![10i32, 20, 30].into_array(),
734 buffer![0u32, 1, 0].into_array(),
735 buffer![2u8, 2, 1].into_array(),
736 Validity::from_iter([true, false, true]),
737 )
738 };
739 assert!(!source.is_zero_copy_to_list());
740
741 let mut builder =
742 ListViewBuilder::<u32, u8>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
743 builder.extend_from_array(&source.into_array());
744
745 let listview = builder.finish_into_listview();
746 assert_eq!(listview.len(), 3);
747 assert!(listview.is_zero_copy_to_list());
748
749 assert_arrays_eq!(
750 listview.list_elements_at(0).unwrap(),
751 PrimitiveArray::from_iter([10i32, 20]),
752 &mut ctx
753 );
754 assert!(
755 !listview
756 .validity()
757 .vortex_expect("listview validity should be derivable")
758 .execute_is_valid(1, &mut ctx)
759 .unwrap()
760 );
761 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
762 assert_arrays_eq!(
763 listview.list_elements_at(2).unwrap(),
764 PrimitiveArray::from_iter([10i32]),
765 &mut ctx
766 );
767 }
768
769 #[test]
770 fn test_error_append_null_to_non_nullable() {
771 let dtype: Arc<DType> = Arc::new(I32.into());
772 let mut builder =
773 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
774
775 let null_scalar = Scalar::null(DType::List(dtype, Nullable));
777 let null_list = null_scalar.as_list();
778
779 let result = builder.append_value(null_list);
781 assert!(result.is_err());
782 assert!(
783 result
784 .unwrap_err()
785 .to_string()
786 .contains("null value to non-nullable")
787 );
788 }
789
790 #[test]
791 fn test_append_array_as_list() {
792 let dtype: Arc<DType> = Arc::new(I32.into());
793 let mut ctx = array_session().create_execution_ctx();
794 let mut builder =
795 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
796
797 let arr1 = buffer![1i32, 2, 3].into_array();
799 builder.append_array_as_list(&arr1, &mut ctx).unwrap();
800
801 builder
803 .append_value(
804 Scalar::list(
805 Arc::clone(&dtype),
806 vec![10i32.into(), 11i32.into()],
807 NonNullable,
808 )
809 .as_list(),
810 )
811 .unwrap();
812
813 let arr2 = buffer![4i32, 5].into_array();
815 builder.append_array_as_list(&arr2, &mut ctx).unwrap();
816
817 let arr3 = buffer![0i32; 0].into_array();
819 builder.append_array_as_list(&arr3, &mut ctx).unwrap();
820
821 builder
823 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
824 .unwrap();
825
826 let listview = builder.finish_into_listview();
827 assert_eq!(listview.len(), 5);
828
829 assert_arrays_eq!(
831 listview.elements(),
832 PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5]),
833 &mut ctx
834 );
835
836 assert_arrays_eq!(
838 listview.offsets(),
839 PrimitiveArray::from_iter([0u32, 3, 5, 7, 7]),
840 &mut ctx
841 );
842
843 assert_arrays_eq!(
845 listview.sizes(),
846 PrimitiveArray::from_iter([3u32, 2, 2, 0, 0]),
847 &mut ctx
848 );
849
850 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype, NonNullable, 20, 10);
852 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
853 assert!(
854 builder
855 .append_array_as_list(&wrong_dtype_arr, &mut ctx)
856 .is_err()
857 );
858 }
859}