1use std::sync::Arc;
14
15use vortex_error::VortexExpect;
16use vortex_error::VortexResult;
17use vortex_error::vortex_ensure;
18use vortex_error::vortex_panic;
19use vortex_mask::Mask;
20
21use crate::ArrayRef;
22use crate::Canonical;
23use crate::LEGACY_SESSION;
24use crate::ToCanonical;
25use crate::VortexSessionExecute;
26use crate::array::IntoArray;
27use crate::arrays::ListViewArray;
28use crate::arrays::PrimitiveArray;
29use crate::arrays::listview::ListViewArrayExt;
30use crate::arrays::listview::ListViewRebuildMode;
31use crate::builders::ArrayBuilder;
32use crate::builders::DEFAULT_BUILDER_CAPACITY;
33use crate::builders::PrimitiveBuilder;
34use crate::builders::UninitRange;
35use crate::builders::builder_with_capacity;
36use crate::builders::lazy_null_builder::LazyBitBufferBuilder;
37use crate::builtins::ArrayBuiltins;
38use crate::dtype::DType;
39use crate::dtype::IntegerPType;
40use crate::dtype::Nullability;
41use crate::match_each_integer_ptype;
42use crate::scalar::ListScalar;
43use crate::scalar::Scalar;
44
45pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
55 dtype: DType,
57
58 elements_builder: Box<dyn ArrayBuilder>,
60
61 offsets_builder: PrimitiveBuilder<O>,
63
64 sizes_builder: PrimitiveBuilder<S>,
66
67 nulls: LazyBitBufferBuilder,
69}
70
71impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
72 pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
74 Self::with_capacity(
75 element_dtype,
76 nullability,
77 DEFAULT_BUILDER_CAPACITY * 2,
80 DEFAULT_BUILDER_CAPACITY,
81 )
82 }
83
84 pub fn with_capacity(
92 element_dtype: Arc<DType>,
93 nullability: Nullability,
94 elements_capacity: usize,
95 capacity: usize,
96 ) -> Self {
97 let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
98
99 let offsets_builder =
100 PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
101 let sizes_builder =
102 PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
103
104 let nulls = LazyBitBufferBuilder::new(capacity);
105
106 Self {
107 dtype: DType::List(element_dtype, nullability),
108 elements_builder,
109 offsets_builder,
110 sizes_builder,
111 nulls,
112 }
113 }
114
115 pub fn append_array_as_list(&mut self, array: &ArrayRef) -> VortexResult<()> {
122 vortex_ensure!(
123 array.dtype() == self.element_dtype(),
124 "Array dtype {:?} does not match list element dtype {:?}",
125 array.dtype(),
126 self.element_dtype()
127 );
128
129 let curr_offset = self.elements_builder.len();
130 let num_elements = array.len();
131
132 assert!(
135 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
136 "appending this list would cause an offset overflow"
137 );
138
139 self.elements_builder.extend_from_array(array);
140 self.nulls.append_non_null();
141
142 self.offsets_builder.append_value(
143 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
144 );
145 self.sizes_builder.append_value(
146 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
147 );
148
149 Ok(())
150 }
151
152 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
157 let Some(elements) = value.elements() else {
158 vortex_ensure!(
160 self.dtype.is_nullable(),
161 "Cannot append null value to non-nullable list builder"
162 );
163 self.append_null();
164 return Ok(());
165 };
166
167 let curr_offset = self.elements_builder.len();
168 let num_elements = elements.len();
169
170 assert!(
173 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
174 "appending this list would cause an offset overflow"
175 );
176
177 for scalar in elements {
178 self.elements_builder.append_scalar(&scalar)?;
179 }
180 self.nulls.append_non_null();
181
182 self.offsets_builder.append_value(
183 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
184 );
185 self.sizes_builder.append_value(
186 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
187 );
188
189 Ok(())
190 }
191
192 pub fn finish_into_listview(&mut self) -> ListViewArray {
194 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
195 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
196
197 let elements = self.elements_builder.finish();
198 let offsets = self.offsets_builder.finish();
199 let sizes = self.sizes_builder.finish();
200 let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
201
202 unsafe {
212 ListViewArray::new_unchecked(elements, offsets, sizes, validity)
213 .with_zero_copy_to_list(true)
214 }
215 }
216
217 pub fn element_dtype(&self) -> &DType {
220 let DType::List(element_dtype, ..) = &self.dtype else {
221 vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
222 };
223
224 element_dtype
225 }
226}
227
228impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
229 fn as_any(&self) -> &dyn std::any::Any {
230 self
231 }
232
233 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
234 self
235 }
236
237 fn dtype(&self) -> &DType {
238 &self.dtype
239 }
240
241 fn len(&self) -> usize {
242 self.offsets_builder.len()
243 }
244
245 fn append_zeros(&mut self, n: usize) {
246 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
247 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
248
249 let curr_offset = self.elements_builder.len();
251
252 for _ in 0..n {
255 self.offsets_builder.append_value(
256 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
257 );
258 self.sizes_builder.append_value(S::zero());
259 }
260
261 self.nulls.append_n_non_nulls(n);
262 }
263
264 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
265 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
266 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
267
268 let curr_offset = self.elements_builder.len();
270
271 for _ in 0..n {
273 self.offsets_builder.append_value(
274 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
275 );
276 self.sizes_builder.append_value(S::zero());
277 }
278
279 self.nulls.append_n_nulls(n);
281 }
282
283 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
284 vortex_ensure!(
285 scalar.dtype() == self.dtype(),
286 "ListViewBuilder expected scalar with dtype {}, got {}",
287 self.dtype(),
288 scalar.dtype()
289 );
290
291 let list_scalar = scalar.as_list();
292 self.append_value(list_scalar)
293 }
294
295 unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
296 let listview = array.to_listview();
297 if listview.is_empty() {
298 return;
299 }
300
301 let listview = listview
304 .rebuild(ListViewRebuildMode::MakeExact)
305 .vortex_expect("ListViewArray::rebuild(MakeExact) failed in extend_from_array");
306 debug_assert!(listview.is_zero_copy_to_list());
307
308 self.nulls.append_validity_mask(
309 array
310 .validity()
311 .vortex_expect("validity_mask in extend_from_array_unchecked")
312 .to_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
313 .vortex_expect("Failed to compute validity mask"),
314 );
315
316 let old_elements_len = self.elements_builder.len();
318 self.elements_builder
319 .reserve_exact(listview.elements().len());
320 self.elements_builder.extend_from_array(listview.elements());
321 let new_elements_len = self.elements_builder.len();
322
323 let extend_length = listview.len();
325 self.sizes_builder.reserve_exact(extend_length);
326 self.offsets_builder.reserve_exact(extend_length);
327
328 let cast_sizes = listview
330 .sizes()
331 .clone()
332 .cast(self.sizes_builder.dtype().clone())
333 .vortex_expect(
334 "was somehow unable to cast the new sizes to the type of the builder sizes",
335 );
336 self.sizes_builder.extend_from_array(&cast_sizes);
337
338 let uninit_range = self.offsets_builder.uninit_range(extend_length);
342
343 let new_offsets = listview.offsets().to_primitive();
345
346 match_each_integer_ptype!(new_offsets.ptype(), |A| {
347 adjust_and_extend_offsets::<O, A>(
348 uninit_range,
349 new_offsets,
350 old_elements_len,
351 new_elements_len,
352 );
353 })
354 }
355
356 fn reserve_exact(&mut self, capacity: usize) {
357 self.elements_builder.reserve_exact(capacity * 2);
358 self.offsets_builder.reserve_exact(capacity);
359 self.sizes_builder.reserve_exact(capacity);
360 self.nulls.reserve_exact(capacity);
361 }
362
363 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
364 self.nulls = LazyBitBufferBuilder::new(validity.len());
365 self.nulls.append_validity_mask(validity);
366 }
367
368 fn finish(&mut self) -> ArrayRef {
369 self.finish_into_listview().into_array()
370 }
371
372 fn finish_into_canonical(&mut self) -> Canonical {
373 Canonical::List(self.finish_into_listview())
374 }
375}
376
377fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
380 mut uninit_range: UninitRange<'a, O>,
381 new_offsets: PrimitiveArray,
382 old_elements_len: usize,
383 new_elements_len: usize,
384) {
385 let new_offsets_slice = new_offsets.as_slice::<A>();
386 let old_elements_len = O::from_usize(old_elements_len)
387 .vortex_expect("the old elements length did not fit into the offset type (impossible)");
388 let new_elements_len = O::from_usize(new_elements_len)
389 .vortex_expect("the current elements length did not fit into the offset type (impossible)");
390
391 for i in 0..uninit_range.len() {
392 let new_offset = O::from_usize(
393 new_offsets_slice[i]
394 .to_usize()
395 .vortex_expect("Offsets must always fit in usize"),
396 )
397 .vortex_expect("New offset somehow did not fit into the builder's offset type");
398
399 let adjusted_new_offset = new_offset + old_elements_len;
402 assert!(
403 adjusted_new_offset <= new_elements_len,
404 "[{i}/{}]: {new_offset} + {old_elements_len} \
405 = {adjusted_new_offset} <= {new_elements_len} failed",
406 uninit_range.len()
407 );
408
409 uninit_range.set_value(i, adjusted_new_offset);
410 }
411
412 unsafe { uninit_range.finish() };
415}
416
417#[cfg(test)]
418mod tests {
419 use std::sync::Arc;
420
421 use vortex_buffer::buffer;
422 use vortex_error::VortexExpect;
423
424 use super::ListViewBuilder;
425 use crate::IntoArray;
426 use crate::arrays::ListArray;
427 use crate::arrays::ListViewArray;
428 use crate::arrays::listview::ListViewArrayExt;
429 use crate::assert_arrays_eq;
430 use crate::builders::ArrayBuilder;
431 use crate::builders::listview::PrimitiveArray;
432 use crate::dtype::DType;
433 use crate::dtype::Nullability::NonNullable;
434 use crate::dtype::Nullability::Nullable;
435 use crate::dtype::PType::I32;
436 use crate::scalar::Scalar;
437 use crate::validity::Validity;
438
439 #[test]
440 fn test_empty() {
441 let mut builder =
442 ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
443
444 let listview = builder.finish();
445 assert_eq!(listview.len(), 0);
446 }
447
448 #[test]
449 fn test_basic_append_and_nulls() {
450 let dtype: Arc<DType> = Arc::new(I32.into());
451 let mut builder =
452 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
453
454 builder
456 .append_value(
457 Scalar::list(
458 Arc::clone(&dtype),
459 vec![1i32.into(), 2i32.into(), 3i32.into()],
460 NonNullable,
461 )
462 .as_list(),
463 )
464 .unwrap();
465
466 builder
468 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
469 .unwrap();
470
471 builder.append_null();
473
474 builder
476 .append_value(
477 Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
478 )
479 .unwrap();
480
481 let listview = builder.finish_into_listview();
482 assert_eq!(listview.len(), 4);
483
484 assert_arrays_eq!(
486 listview.list_elements_at(0).unwrap(),
487 PrimitiveArray::from_iter([1i32, 2, 3])
488 );
489
490 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
492
493 assert!(
495 !listview
496 .validity()
497 .vortex_expect("listview validity should be derivable")
498 .is_valid(2)
499 .unwrap()
500 );
501
502 assert_arrays_eq!(
504 listview.list_elements_at(3).unwrap(),
505 PrimitiveArray::from_iter([4i32, 5])
506 );
507 }
508
509 #[test]
510 fn test_different_offset_size_types() {
511 let dtype: Arc<DType> = Arc::new(I32.into());
513 let mut builder =
514 ListViewBuilder::<u32, u8>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
515
516 builder
517 .append_value(
518 Scalar::list(
519 Arc::clone(&dtype),
520 vec![1i32.into(), 2i32.into()],
521 NonNullable,
522 )
523 .as_list(),
524 )
525 .unwrap();
526
527 builder
528 .append_value(
529 Scalar::list(
530 dtype,
531 vec![3i32.into(), 4i32.into(), 5i32.into()],
532 NonNullable,
533 )
534 .as_list(),
535 )
536 .unwrap();
537
538 let listview = builder.finish_into_listview();
539 assert_eq!(listview.len(), 2);
540
541 assert_arrays_eq!(
543 listview.list_elements_at(0).unwrap(),
544 PrimitiveArray::from_iter([1i32, 2])
545 );
546
547 assert_arrays_eq!(
549 listview.list_elements_at(1).unwrap(),
550 PrimitiveArray::from_iter([3i32, 4, 5])
551 );
552
553 let dtype2: Arc<DType> = Arc::new(I32.into());
555 let mut builder2 =
556 ListViewBuilder::<u64, u16>::with_capacity(Arc::clone(&dtype2), NonNullable, 0, 0);
557
558 for i in 0..5 {
559 builder2
560 .append_value(
561 Scalar::list(Arc::clone(&dtype2), vec![(i * 10).into()], NonNullable).as_list(),
562 )
563 .unwrap();
564 }
565
566 let listview2 = builder2.finish_into_listview();
567 assert_eq!(listview2.len(), 5);
568
569 for i in 0..5i32 {
571 assert_arrays_eq!(
572 listview2.list_elements_at(i as usize).unwrap(),
573 PrimitiveArray::from_iter([i * 10])
574 );
575 }
576 }
577
578 #[test]
579 fn test_builder_trait_methods() {
580 let dtype: Arc<DType> = Arc::new(I32.into());
581 let mut builder =
582 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
583
584 builder.append_zeros(2);
586 assert_eq!(builder.len(), 2);
587
588 unsafe {
590 builder.append_nulls_unchecked(2);
591 }
592 assert_eq!(builder.len(), 4);
593
594 let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
596 builder.append_scalar(&list_scalar).unwrap();
597 assert_eq!(builder.len(), 5);
598
599 let listview = builder.finish_into_listview();
600 assert_eq!(listview.len(), 5);
601
602 assert_eq!(listview.list_elements_at(0).unwrap().len(), 0);
604 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
605
606 assert!(
608 !listview
609 .validity()
610 .vortex_expect("listview validity should be derivable")
611 .is_valid(2)
612 .unwrap()
613 );
614 assert!(
615 !listview
616 .validity()
617 .vortex_expect("listview validity should be derivable")
618 .is_valid(3)
619 .unwrap()
620 );
621
622 assert_arrays_eq!(
624 listview.list_elements_at(4).unwrap(),
625 PrimitiveArray::from_iter([10i32, 20])
626 );
627 }
628
629 #[test]
630 fn test_extend_from_array() {
631 let dtype: Arc<DType> = Arc::new(I32.into());
632
633 let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
635 [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
636 Arc::new(I32.into()),
637 )
638 .unwrap();
639
640 let mut builder =
641 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
642
643 builder
645 .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
646 .unwrap();
647
648 builder.extend_from_array(&source.into_array());
650
651 let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
653 std::iter::empty::<Option<Vec<i32>>>(),
654 Arc::new(I32.into()),
655 )
656 .unwrap();
657 builder.extend_from_array(&empty_source.into_array());
658
659 let listview = builder.finish_into_listview();
660 assert_eq!(listview.len(), 4);
661
662 assert_arrays_eq!(
665 listview.list_elements_at(0).unwrap(),
666 PrimitiveArray::from_iter([0i32])
667 );
668
669 assert_arrays_eq!(
671 listview.list_elements_at(1).unwrap(),
672 PrimitiveArray::from_iter([1i32, 2, 3])
673 );
674
675 assert!(
677 !listview
678 .validity()
679 .vortex_expect("listview validity should be derivable")
680 .is_valid(2)
681 .unwrap()
682 );
683
684 assert_arrays_eq!(
686 listview.list_elements_at(3).unwrap(),
687 PrimitiveArray::from_iter([4i32, 5])
688 );
689 }
690
691 #[test]
692 fn test_extend_from_array_overlapping_listview() {
693 let dtype: Arc<DType> = Arc::new(I32.into());
694
695 let source = unsafe {
700 ListViewArray::new_unchecked(
701 buffer![10i32, 20, 30].into_array(),
702 buffer![0u32, 1, 0].into_array(),
703 buffer![2u8, 2, 1].into_array(),
704 Validity::from_iter([true, false, true]),
705 )
706 };
707 assert!(!source.is_zero_copy_to_list());
708
709 let mut builder =
710 ListViewBuilder::<u32, u8>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
711 builder.extend_from_array(&source.into_array());
712
713 let listview = builder.finish_into_listview();
714 assert_eq!(listview.len(), 3);
715 assert!(listview.is_zero_copy_to_list());
716
717 assert_arrays_eq!(
718 listview.list_elements_at(0).unwrap(),
719 PrimitiveArray::from_iter([10i32, 20])
720 );
721 assert!(
722 !listview
723 .validity()
724 .vortex_expect("listview validity should be derivable")
725 .is_valid(1)
726 .unwrap()
727 );
728 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
729 assert_arrays_eq!(
730 listview.list_elements_at(2).unwrap(),
731 PrimitiveArray::from_iter([10i32])
732 );
733 }
734
735 #[test]
736 fn test_error_append_null_to_non_nullable() {
737 let dtype: Arc<DType> = Arc::new(I32.into());
738 let mut builder =
739 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
740
741 let null_scalar = Scalar::null(DType::List(dtype, Nullable));
743 let null_list = null_scalar.as_list();
744
745 let result = builder.append_value(null_list);
747 assert!(result.is_err());
748 assert!(
749 result
750 .unwrap_err()
751 .to_string()
752 .contains("null value to non-nullable")
753 );
754 }
755
756 #[test]
757 fn test_append_array_as_list() {
758 let dtype: Arc<DType> = Arc::new(I32.into());
759 let mut builder =
760 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
761
762 let arr1 = buffer![1i32, 2, 3].into_array();
764 builder.append_array_as_list(&arr1).unwrap();
765
766 builder
768 .append_value(
769 Scalar::list(
770 Arc::clone(&dtype),
771 vec![10i32.into(), 11i32.into()],
772 NonNullable,
773 )
774 .as_list(),
775 )
776 .unwrap();
777
778 let arr2 = buffer![4i32, 5].into_array();
780 builder.append_array_as_list(&arr2).unwrap();
781
782 let arr3 = buffer![0i32; 0].into_array();
784 builder.append_array_as_list(&arr3).unwrap();
785
786 builder
788 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
789 .unwrap();
790
791 let listview = builder.finish_into_listview();
792 assert_eq!(listview.len(), 5);
793
794 assert_arrays_eq!(
796 listview.elements(),
797 PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
798 );
799
800 assert_arrays_eq!(
802 listview.offsets(),
803 PrimitiveArray::from_iter([0u32, 3, 5, 7, 7])
804 );
805
806 assert_arrays_eq!(
808 listview.sizes(),
809 PrimitiveArray::from_iter([3u32, 2, 2, 0, 0])
810 );
811
812 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype, NonNullable, 20, 10);
814 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
815 assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
816 }
817}