1use std::sync::Arc;
14
15use vortex_error::VortexExpect;
16use vortex_error::VortexResult;
17use vortex_error::vortex_ensure;
18use vortex_error::vortex_panic;
19use vortex_mask::Mask;
20
21use crate::ArrayRef;
22use crate::Canonical;
23use crate::LEGACY_SESSION;
24#[expect(deprecated)]
25use crate::ToCanonical as _;
26use crate::VortexSessionExecute;
27use crate::array::IntoArray;
28use crate::arrays::ListViewArray;
29use crate::arrays::PrimitiveArray;
30use crate::arrays::listview::ListViewArrayExt;
31use crate::arrays::listview::ListViewRebuildMode;
32use crate::builders::ArrayBuilder;
33use crate::builders::DEFAULT_BUILDER_CAPACITY;
34use crate::builders::PrimitiveBuilder;
35use crate::builders::UninitRange;
36use crate::builders::builder_with_capacity;
37use crate::builders::lazy_null_builder::LazyBitBufferBuilder;
38use crate::builtins::ArrayBuiltins;
39use crate::dtype::DType;
40use crate::dtype::IntegerPType;
41use crate::dtype::Nullability;
42use crate::match_each_integer_ptype;
43use crate::scalar::ListScalar;
44use crate::scalar::Scalar;
45
46pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
56 dtype: DType,
58
59 elements_builder: Box<dyn ArrayBuilder>,
61
62 offsets_builder: PrimitiveBuilder<O>,
64
65 sizes_builder: PrimitiveBuilder<S>,
67
68 nulls: LazyBitBufferBuilder,
70}
71
72impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
73 pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
75 Self::with_capacity(
76 element_dtype,
77 nullability,
78 DEFAULT_BUILDER_CAPACITY * 2,
81 DEFAULT_BUILDER_CAPACITY,
82 )
83 }
84
85 pub fn with_capacity(
93 element_dtype: Arc<DType>,
94 nullability: Nullability,
95 elements_capacity: usize,
96 capacity: usize,
97 ) -> Self {
98 let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
99
100 let offsets_builder =
101 PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
102 let sizes_builder =
103 PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
104
105 let nulls = LazyBitBufferBuilder::new(capacity);
106
107 Self {
108 dtype: DType::List(element_dtype, nullability),
109 elements_builder,
110 offsets_builder,
111 sizes_builder,
112 nulls,
113 }
114 }
115
116 pub fn append_array_as_list(&mut self, array: &ArrayRef) -> VortexResult<()> {
123 vortex_ensure!(
124 array.dtype() == self.element_dtype(),
125 "Array dtype {:?} does not match list element dtype {:?}",
126 array.dtype(),
127 self.element_dtype()
128 );
129
130 let curr_offset = self.elements_builder.len();
131 let num_elements = array.len();
132
133 assert!(
136 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
137 "appending this list would cause an offset overflow"
138 );
139
140 self.elements_builder.extend_from_array(array);
141 self.nulls.append_non_null();
142
143 self.offsets_builder.append_value(
144 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
145 );
146 self.sizes_builder.append_value(
147 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
148 );
149
150 Ok(())
151 }
152
153 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
158 let Some(elements) = value.elements() else {
159 vortex_ensure!(
161 self.dtype.is_nullable(),
162 "Cannot append null value to non-nullable list builder"
163 );
164 self.append_null();
165 return Ok(());
166 };
167
168 let curr_offset = self.elements_builder.len();
169 let num_elements = elements.len();
170
171 assert!(
174 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
175 "appending this list would cause an offset overflow"
176 );
177
178 for scalar in elements {
179 self.elements_builder.append_scalar(&scalar)?;
180 }
181 self.nulls.append_non_null();
182
183 self.offsets_builder.append_value(
184 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
185 );
186 self.sizes_builder.append_value(
187 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
188 );
189
190 Ok(())
191 }
192
193 pub fn finish_into_listview(&mut self) -> ListViewArray {
195 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
196 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
197
198 let elements = self.elements_builder.finish();
199 let offsets = self.offsets_builder.finish();
200 let sizes = self.sizes_builder.finish();
201 let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
202
203 unsafe {
213 ListViewArray::new_unchecked(elements, offsets, sizes, validity)
214 .with_zero_copy_to_list(true)
215 }
216 }
217
218 pub fn element_dtype(&self) -> &DType {
221 let DType::List(element_dtype, ..) = &self.dtype else {
222 vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
223 };
224
225 element_dtype
226 }
227}
228
229impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
230 fn as_any(&self) -> &dyn std::any::Any {
231 self
232 }
233
234 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
235 self
236 }
237
238 fn dtype(&self) -> &DType {
239 &self.dtype
240 }
241
242 fn len(&self) -> usize {
243 self.offsets_builder.len()
244 }
245
246 fn append_zeros(&mut self, n: usize) {
247 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
248 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
249
250 let curr_offset = self.elements_builder.len();
252
253 for _ in 0..n {
256 self.offsets_builder.append_value(
257 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
258 );
259 self.sizes_builder.append_value(S::zero());
260 }
261
262 self.nulls.append_n_non_nulls(n);
263 }
264
265 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
266 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
267 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
268
269 let curr_offset = self.elements_builder.len();
271
272 for _ in 0..n {
274 self.offsets_builder.append_value(
275 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
276 );
277 self.sizes_builder.append_value(S::zero());
278 }
279
280 self.nulls.append_n_nulls(n);
282 }
283
284 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
285 vortex_ensure!(
286 scalar.dtype() == self.dtype(),
287 "ListViewBuilder expected scalar with dtype {}, got {}",
288 self.dtype(),
289 scalar.dtype()
290 );
291
292 let list_scalar = scalar.as_list();
293 self.append_value(list_scalar)
294 }
295
296 unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
297 #[expect(deprecated)]
298 let listview = array.to_listview();
299 if listview.is_empty() {
300 return;
301 }
302
303 let listview = listview
306 .rebuild(ListViewRebuildMode::MakeExact)
307 .vortex_expect("ListViewArray::rebuild(MakeExact) failed in extend_from_array");
308 debug_assert!(listview.is_zero_copy_to_list());
309
310 self.nulls.append_validity_mask(
311 array
312 .validity()
313 .vortex_expect("validity_mask in extend_from_array_unchecked")
314 .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
315 .vortex_expect("Failed to compute validity mask"),
316 );
317
318 let old_elements_len = self.elements_builder.len();
320 self.elements_builder
321 .reserve_exact(listview.elements().len());
322 self.elements_builder.extend_from_array(listview.elements());
323 let new_elements_len = self.elements_builder.len();
324
325 let extend_length = listview.len();
327 self.sizes_builder.reserve_exact(extend_length);
328 self.offsets_builder.reserve_exact(extend_length);
329
330 let cast_sizes = listview
332 .sizes()
333 .clone()
334 .cast(self.sizes_builder.dtype().clone())
335 .vortex_expect(
336 "was somehow unable to cast the new sizes to the type of the builder sizes",
337 );
338 self.sizes_builder.extend_from_array(&cast_sizes);
339
340 let uninit_range = self.offsets_builder.uninit_range(extend_length);
344
345 #[expect(deprecated)]
347 let new_offsets = listview.offsets().to_primitive();
348
349 match_each_integer_ptype!(new_offsets.ptype(), |A| {
350 adjust_and_extend_offsets::<O, A>(
351 uninit_range,
352 new_offsets,
353 old_elements_len,
354 new_elements_len,
355 );
356 })
357 }
358
359 fn reserve_exact(&mut self, capacity: usize) {
360 self.elements_builder.reserve_exact(capacity * 2);
361 self.offsets_builder.reserve_exact(capacity);
362 self.sizes_builder.reserve_exact(capacity);
363 self.nulls.reserve_exact(capacity);
364 }
365
366 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
367 self.nulls = LazyBitBufferBuilder::new(validity.len());
368 self.nulls.append_validity_mask(validity);
369 }
370
371 fn finish(&mut self) -> ArrayRef {
372 self.finish_into_listview().into_array()
373 }
374
375 fn finish_into_canonical(&mut self) -> Canonical {
376 Canonical::List(self.finish_into_listview())
377 }
378}
379
380fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
383 mut uninit_range: UninitRange<'a, O>,
384 new_offsets: PrimitiveArray,
385 old_elements_len: usize,
386 new_elements_len: usize,
387) {
388 let new_offsets_slice = new_offsets.as_slice::<A>();
389 let old_elements_len = O::from_usize(old_elements_len)
390 .vortex_expect("the old elements length did not fit into the offset type (impossible)");
391 let new_elements_len = O::from_usize(new_elements_len)
392 .vortex_expect("the current elements length did not fit into the offset type (impossible)");
393
394 for i in 0..uninit_range.len() {
395 let new_offset = O::from_usize(
396 new_offsets_slice[i]
397 .to_usize()
398 .vortex_expect("Offsets must always fit in usize"),
399 )
400 .vortex_expect("New offset somehow did not fit into the builder's offset type");
401
402 let adjusted_new_offset = new_offset + old_elements_len;
405 assert!(
406 adjusted_new_offset <= new_elements_len,
407 "[{i}/{}]: {new_offset} + {old_elements_len} \
408 = {adjusted_new_offset} <= {new_elements_len} failed",
409 uninit_range.len()
410 );
411
412 uninit_range.set_value(i, adjusted_new_offset);
413 }
414
415 unsafe { uninit_range.finish() };
418}
419
420#[cfg(test)]
421mod tests {
422 use std::sync::Arc;
423
424 use vortex_buffer::buffer;
425 use vortex_error::VortexExpect;
426
427 use super::ListViewBuilder;
428 use crate::IntoArray;
429 use crate::arrays::ListArray;
430 use crate::arrays::ListViewArray;
431 use crate::arrays::listview::ListViewArrayExt;
432 use crate::assert_arrays_eq;
433 use crate::builders::ArrayBuilder;
434 use crate::builders::listview::PrimitiveArray;
435 use crate::dtype::DType;
436 use crate::dtype::Nullability::NonNullable;
437 use crate::dtype::Nullability::Nullable;
438 use crate::dtype::PType::I32;
439 use crate::scalar::Scalar;
440 use crate::validity::Validity;
441
442 #[test]
443 fn test_empty() {
444 let mut builder =
445 ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
446
447 let listview = builder.finish();
448 assert_eq!(listview.len(), 0);
449 }
450
451 #[test]
452 fn test_basic_append_and_nulls() {
453 let dtype: Arc<DType> = Arc::new(I32.into());
454 let mut builder =
455 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
456
457 builder
459 .append_value(
460 Scalar::list(
461 Arc::clone(&dtype),
462 vec![1i32.into(), 2i32.into(), 3i32.into()],
463 NonNullable,
464 )
465 .as_list(),
466 )
467 .unwrap();
468
469 builder
471 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
472 .unwrap();
473
474 builder.append_null();
476
477 builder
479 .append_value(
480 Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
481 )
482 .unwrap();
483
484 let listview = builder.finish_into_listview();
485 assert_eq!(listview.len(), 4);
486
487 assert_arrays_eq!(
489 listview.list_elements_at(0).unwrap(),
490 PrimitiveArray::from_iter([1i32, 2, 3])
491 );
492
493 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
495
496 assert!(
498 !listview
499 .validity()
500 .vortex_expect("listview validity should be derivable")
501 .is_valid(2)
502 .unwrap()
503 );
504
505 assert_arrays_eq!(
507 listview.list_elements_at(3).unwrap(),
508 PrimitiveArray::from_iter([4i32, 5])
509 );
510 }
511
512 #[test]
513 fn test_different_offset_size_types() {
514 let dtype: Arc<DType> = Arc::new(I32.into());
516 let mut builder =
517 ListViewBuilder::<u32, u8>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
518
519 builder
520 .append_value(
521 Scalar::list(
522 Arc::clone(&dtype),
523 vec![1i32.into(), 2i32.into()],
524 NonNullable,
525 )
526 .as_list(),
527 )
528 .unwrap();
529
530 builder
531 .append_value(
532 Scalar::list(
533 dtype,
534 vec![3i32.into(), 4i32.into(), 5i32.into()],
535 NonNullable,
536 )
537 .as_list(),
538 )
539 .unwrap();
540
541 let listview = builder.finish_into_listview();
542 assert_eq!(listview.len(), 2);
543
544 assert_arrays_eq!(
546 listview.list_elements_at(0).unwrap(),
547 PrimitiveArray::from_iter([1i32, 2])
548 );
549
550 assert_arrays_eq!(
552 listview.list_elements_at(1).unwrap(),
553 PrimitiveArray::from_iter([3i32, 4, 5])
554 );
555
556 let dtype2: Arc<DType> = Arc::new(I32.into());
558 let mut builder2 =
559 ListViewBuilder::<u64, u16>::with_capacity(Arc::clone(&dtype2), NonNullable, 0, 0);
560
561 for i in 0..5 {
562 builder2
563 .append_value(
564 Scalar::list(Arc::clone(&dtype2), vec![(i * 10).into()], NonNullable).as_list(),
565 )
566 .unwrap();
567 }
568
569 let listview2 = builder2.finish_into_listview();
570 assert_eq!(listview2.len(), 5);
571
572 for i in 0..5i32 {
574 assert_arrays_eq!(
575 listview2.list_elements_at(i as usize).unwrap(),
576 PrimitiveArray::from_iter([i * 10])
577 );
578 }
579 }
580
581 #[test]
582 fn test_builder_trait_methods() {
583 let dtype: Arc<DType> = Arc::new(I32.into());
584 let mut builder =
585 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
586
587 builder.append_zeros(2);
589 assert_eq!(builder.len(), 2);
590
591 unsafe {
593 builder.append_nulls_unchecked(2);
594 }
595 assert_eq!(builder.len(), 4);
596
597 let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
599 builder.append_scalar(&list_scalar).unwrap();
600 assert_eq!(builder.len(), 5);
601
602 let listview = builder.finish_into_listview();
603 assert_eq!(listview.len(), 5);
604
605 assert_eq!(listview.list_elements_at(0).unwrap().len(), 0);
607 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
608
609 assert!(
611 !listview
612 .validity()
613 .vortex_expect("listview validity should be derivable")
614 .is_valid(2)
615 .unwrap()
616 );
617 assert!(
618 !listview
619 .validity()
620 .vortex_expect("listview validity should be derivable")
621 .is_valid(3)
622 .unwrap()
623 );
624
625 assert_arrays_eq!(
627 listview.list_elements_at(4).unwrap(),
628 PrimitiveArray::from_iter([10i32, 20])
629 );
630 }
631
632 #[test]
633 fn test_extend_from_array() {
634 let dtype: Arc<DType> = Arc::new(I32.into());
635
636 let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
638 [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
639 Arc::new(I32.into()),
640 )
641 .unwrap();
642
643 let mut builder =
644 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
645
646 builder
648 .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
649 .unwrap();
650
651 builder.extend_from_array(&source.into_array());
653
654 let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
656 std::iter::empty::<Option<Vec<i32>>>(),
657 Arc::new(I32.into()),
658 )
659 .unwrap();
660 builder.extend_from_array(&empty_source.into_array());
661
662 let listview = builder.finish_into_listview();
663 assert_eq!(listview.len(), 4);
664
665 assert_arrays_eq!(
668 listview.list_elements_at(0).unwrap(),
669 PrimitiveArray::from_iter([0i32])
670 );
671
672 assert_arrays_eq!(
674 listview.list_elements_at(1).unwrap(),
675 PrimitiveArray::from_iter([1i32, 2, 3])
676 );
677
678 assert!(
680 !listview
681 .validity()
682 .vortex_expect("listview validity should be derivable")
683 .is_valid(2)
684 .unwrap()
685 );
686
687 assert_arrays_eq!(
689 listview.list_elements_at(3).unwrap(),
690 PrimitiveArray::from_iter([4i32, 5])
691 );
692 }
693
694 #[test]
695 fn test_extend_from_array_overlapping_listview() {
696 let dtype: Arc<DType> = Arc::new(I32.into());
697
698 let source = unsafe {
703 ListViewArray::new_unchecked(
704 buffer![10i32, 20, 30].into_array(),
705 buffer![0u32, 1, 0].into_array(),
706 buffer![2u8, 2, 1].into_array(),
707 Validity::from_iter([true, false, true]),
708 )
709 };
710 assert!(!source.is_zero_copy_to_list());
711
712 let mut builder =
713 ListViewBuilder::<u32, u8>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
714 builder.extend_from_array(&source.into_array());
715
716 let listview = builder.finish_into_listview();
717 assert_eq!(listview.len(), 3);
718 assert!(listview.is_zero_copy_to_list());
719
720 assert_arrays_eq!(
721 listview.list_elements_at(0).unwrap(),
722 PrimitiveArray::from_iter([10i32, 20])
723 );
724 assert!(
725 !listview
726 .validity()
727 .vortex_expect("listview validity should be derivable")
728 .is_valid(1)
729 .unwrap()
730 );
731 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
732 assert_arrays_eq!(
733 listview.list_elements_at(2).unwrap(),
734 PrimitiveArray::from_iter([10i32])
735 );
736 }
737
738 #[test]
739 fn test_error_append_null_to_non_nullable() {
740 let dtype: Arc<DType> = Arc::new(I32.into());
741 let mut builder =
742 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
743
744 let null_scalar = Scalar::null(DType::List(dtype, Nullable));
746 let null_list = null_scalar.as_list();
747
748 let result = builder.append_value(null_list);
750 assert!(result.is_err());
751 assert!(
752 result
753 .unwrap_err()
754 .to_string()
755 .contains("null value to non-nullable")
756 );
757 }
758
759 #[test]
760 fn test_append_array_as_list() {
761 let dtype: Arc<DType> = Arc::new(I32.into());
762 let mut builder =
763 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
764
765 let arr1 = buffer![1i32, 2, 3].into_array();
767 builder.append_array_as_list(&arr1).unwrap();
768
769 builder
771 .append_value(
772 Scalar::list(
773 Arc::clone(&dtype),
774 vec![10i32.into(), 11i32.into()],
775 NonNullable,
776 )
777 .as_list(),
778 )
779 .unwrap();
780
781 let arr2 = buffer![4i32, 5].into_array();
783 builder.append_array_as_list(&arr2).unwrap();
784
785 let arr3 = buffer![0i32; 0].into_array();
787 builder.append_array_as_list(&arr3).unwrap();
788
789 builder
791 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
792 .unwrap();
793
794 let listview = builder.finish_into_listview();
795 assert_eq!(listview.len(), 5);
796
797 assert_arrays_eq!(
799 listview.elements(),
800 PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
801 );
802
803 assert_arrays_eq!(
805 listview.offsets(),
806 PrimitiveArray::from_iter([0u32, 3, 5, 7, 7])
807 );
808
809 assert_arrays_eq!(
811 listview.sizes(),
812 PrimitiveArray::from_iter([3u32, 2, 2, 0, 0])
813 );
814
815 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype, NonNullable, 20, 10);
817 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
818 assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
819 }
820}