1use std::sync::Arc;
14
15use vortex_error::VortexExpect;
16use vortex_error::VortexResult;
17use vortex_error::vortex_ensure;
18use vortex_error::vortex_panic;
19use vortex_mask::Mask;
20
21use crate::ArrayRef;
22use crate::Canonical;
23use crate::ToCanonical;
24use crate::array::IntoArray;
25use crate::arrays::ListViewArray;
26use crate::arrays::PrimitiveArray;
27use crate::arrays::listview::ListViewArrayExt;
28use crate::arrays::listview::ListViewRebuildMode;
29use crate::builders::ArrayBuilder;
30use crate::builders::DEFAULT_BUILDER_CAPACITY;
31use crate::builders::PrimitiveBuilder;
32use crate::builders::UninitRange;
33use crate::builders::builder_with_capacity;
34use crate::builders::lazy_null_builder::LazyBitBufferBuilder;
35use crate::builtins::ArrayBuiltins;
36use crate::dtype::DType;
37use crate::dtype::IntegerPType;
38use crate::dtype::Nullability;
39use crate::match_each_integer_ptype;
40use crate::scalar::ListScalar;
41use crate::scalar::Scalar;
42
43pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
53 dtype: DType,
55
56 elements_builder: Box<dyn ArrayBuilder>,
58
59 offsets_builder: PrimitiveBuilder<O>,
61
62 sizes_builder: PrimitiveBuilder<S>,
64
65 nulls: LazyBitBufferBuilder,
67}
68
69impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
70 pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
72 Self::with_capacity(
73 element_dtype,
74 nullability,
75 DEFAULT_BUILDER_CAPACITY * 2,
78 DEFAULT_BUILDER_CAPACITY,
79 )
80 }
81
82 pub fn with_capacity(
90 element_dtype: Arc<DType>,
91 nullability: Nullability,
92 elements_capacity: usize,
93 capacity: usize,
94 ) -> Self {
95 let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
96
97 let offsets_builder =
98 PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
99 let sizes_builder =
100 PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
101
102 let nulls = LazyBitBufferBuilder::new(capacity);
103
104 Self {
105 dtype: DType::List(element_dtype, nullability),
106 elements_builder,
107 offsets_builder,
108 sizes_builder,
109 nulls,
110 }
111 }
112
113 pub fn append_array_as_list(&mut self, array: &ArrayRef) -> VortexResult<()> {
120 vortex_ensure!(
121 array.dtype() == self.element_dtype(),
122 "Array dtype {:?} does not match list element dtype {:?}",
123 array.dtype(),
124 self.element_dtype()
125 );
126
127 let curr_offset = self.elements_builder.len();
128 let num_elements = array.len();
129
130 assert!(
133 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
134 "appending this list would cause an offset overflow"
135 );
136
137 self.elements_builder.extend_from_array(array);
138 self.nulls.append_non_null();
139
140 self.offsets_builder.append_value(
141 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
142 );
143 self.sizes_builder.append_value(
144 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
145 );
146
147 Ok(())
148 }
149
150 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
155 let Some(elements) = value.elements() else {
156 vortex_ensure!(
158 self.dtype.is_nullable(),
159 "Cannot append null value to non-nullable list builder"
160 );
161 self.append_null();
162 return Ok(());
163 };
164
165 let curr_offset = self.elements_builder.len();
166 let num_elements = elements.len();
167
168 assert!(
171 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
172 "appending this list would cause an offset overflow"
173 );
174
175 for scalar in elements {
176 self.elements_builder.append_scalar(&scalar)?;
177 }
178 self.nulls.append_non_null();
179
180 self.offsets_builder.append_value(
181 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
182 );
183 self.sizes_builder.append_value(
184 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
185 );
186
187 Ok(())
188 }
189
190 pub fn finish_into_listview(&mut self) -> ListViewArray {
192 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
193 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
194
195 let elements = self.elements_builder.finish();
196 let offsets = self.offsets_builder.finish();
197 let sizes = self.sizes_builder.finish();
198 let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
199
200 unsafe {
210 ListViewArray::new_unchecked(elements, offsets, sizes, validity)
211 .with_zero_copy_to_list(true)
212 }
213 }
214
215 pub fn element_dtype(&self) -> &DType {
218 let DType::List(element_dtype, ..) = &self.dtype else {
219 vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
220 };
221
222 element_dtype
223 }
224}
225
226impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
227 fn as_any(&self) -> &dyn std::any::Any {
228 self
229 }
230
231 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
232 self
233 }
234
235 fn dtype(&self) -> &DType {
236 &self.dtype
237 }
238
239 fn len(&self) -> usize {
240 self.offsets_builder.len()
241 }
242
243 fn append_zeros(&mut self, n: usize) {
244 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
245 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
246
247 let curr_offset = self.elements_builder.len();
249
250 for _ in 0..n {
253 self.offsets_builder.append_value(
254 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
255 );
256 self.sizes_builder.append_value(S::zero());
257 }
258
259 self.nulls.append_n_non_nulls(n);
260 }
261
262 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
263 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
264 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
265
266 let curr_offset = self.elements_builder.len();
268
269 for _ in 0..n {
271 self.offsets_builder.append_value(
272 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
273 );
274 self.sizes_builder.append_value(S::zero());
275 }
276
277 self.nulls.append_n_nulls(n);
279 }
280
281 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
282 vortex_ensure!(
283 scalar.dtype() == self.dtype(),
284 "ListViewBuilder expected scalar with dtype {}, got {}",
285 self.dtype(),
286 scalar.dtype()
287 );
288
289 let list_scalar = scalar.as_list();
290 self.append_value(list_scalar)
291 }
292
293 unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
294 let listview = array.to_listview();
295 if listview.is_empty() {
296 return;
297 }
298
299 if !listview.is_zero_copy_to_list() {
302 for i in 0..listview.len() {
303 let list = listview
304 .scalar_at(i)
305 .vortex_expect("scalar_at failed in extend_from_array_unchecked");
306
307 self.append_scalar(&list)
308 .vortex_expect("was unable to extend the `ListViewBuilder`")
309 }
310
311 return;
312 }
313
314 let listview = listview
317 .rebuild(ListViewRebuildMode::MakeExact)
318 .vortex_expect("ListViewArray::rebuild(MakeExact) failed in extend_from_array");
319 debug_assert!(listview.is_zero_copy_to_list());
320
321 self.nulls.append_validity_mask(
322 array
323 .validity_mask()
324 .vortex_expect("validity_mask in extend_from_array_unchecked"),
325 );
326
327 let old_elements_len = self.elements_builder.len();
329 self.elements_builder
330 .reserve_exact(listview.elements().len());
331 self.elements_builder.extend_from_array(listview.elements());
332 let new_elements_len = self.elements_builder.len();
333
334 let extend_length = listview.len();
336 self.sizes_builder.reserve_exact(extend_length);
337 self.offsets_builder.reserve_exact(extend_length);
338
339 let cast_sizes = listview
341 .sizes()
342 .clone()
343 .cast(self.sizes_builder.dtype().clone())
344 .vortex_expect(
345 "was somehow unable to cast the new sizes to the type of the builder sizes",
346 );
347 self.sizes_builder.extend_from_array(&cast_sizes);
348
349 let uninit_range = self.offsets_builder.uninit_range(extend_length);
353
354 let new_offsets = listview.offsets().to_primitive();
356
357 match_each_integer_ptype!(new_offsets.ptype(), |A| {
358 adjust_and_extend_offsets::<O, A>(
359 uninit_range,
360 new_offsets,
361 old_elements_len,
362 new_elements_len,
363 );
364 })
365 }
366
367 fn reserve_exact(&mut self, capacity: usize) {
368 self.elements_builder.reserve_exact(capacity * 2);
369 self.offsets_builder.reserve_exact(capacity);
370 self.sizes_builder.reserve_exact(capacity);
371 self.nulls.reserve_exact(capacity);
372 }
373
374 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
375 self.nulls = LazyBitBufferBuilder::new(validity.len());
376 self.nulls.append_validity_mask(validity);
377 }
378
379 fn finish(&mut self) -> ArrayRef {
380 self.finish_into_listview().into_array()
381 }
382
383 fn finish_into_canonical(&mut self) -> Canonical {
384 Canonical::List(self.finish_into_listview())
385 }
386}
387
388fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
391 mut uninit_range: UninitRange<'a, O>,
392 new_offsets: PrimitiveArray,
393 old_elements_len: usize,
394 new_elements_len: usize,
395) {
396 let new_offsets_slice = new_offsets.as_slice::<A>();
397 let old_elements_len = O::from_usize(old_elements_len)
398 .vortex_expect("the old elements length did not fit into the offset type (impossible)");
399 let new_elements_len = O::from_usize(new_elements_len)
400 .vortex_expect("the current elements length did not fit into the offset type (impossible)");
401
402 for i in 0..uninit_range.len() {
403 let new_offset = O::from_usize(
404 new_offsets_slice[i]
405 .to_usize()
406 .vortex_expect("Offsets must always fit in usize"),
407 )
408 .vortex_expect("New offset somehow did not fit into the builder's offset type");
409
410 let adjusted_new_offset = new_offset + old_elements_len;
413 assert!(
414 adjusted_new_offset <= new_elements_len,
415 "[{i}/{}]: {new_offset} + {old_elements_len} \
416 = {adjusted_new_offset} <= {new_elements_len} failed",
417 uninit_range.len()
418 );
419
420 uninit_range.set_value(i, adjusted_new_offset);
421 }
422
423 unsafe { uninit_range.finish() };
426}
427
428#[cfg(test)]
429mod tests {
430 use std::sync::Arc;
431
432 use vortex_error::VortexExpect;
433
434 use super::ListViewBuilder;
435 use crate::IntoArray;
436 use crate::arrays::ListArray;
437 use crate::arrays::listview::ListViewArrayExt;
438 use crate::assert_arrays_eq;
439 use crate::builders::ArrayBuilder;
440 use crate::builders::listview::PrimitiveArray;
441 use crate::dtype::DType;
442 use crate::dtype::Nullability::NonNullable;
443 use crate::dtype::Nullability::Nullable;
444 use crate::dtype::PType::I32;
445 use crate::scalar::Scalar;
446
447 #[test]
448 fn test_empty() {
449 let mut builder =
450 ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
451
452 let listview = builder.finish();
453 assert_eq!(listview.len(), 0);
454 }
455
456 #[test]
457 fn test_basic_append_and_nulls() {
458 let dtype: Arc<DType> = Arc::new(I32.into());
459 let mut builder =
460 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
461
462 builder
464 .append_value(
465 Scalar::list(
466 Arc::clone(&dtype),
467 vec![1i32.into(), 2i32.into(), 3i32.into()],
468 NonNullable,
469 )
470 .as_list(),
471 )
472 .unwrap();
473
474 builder
476 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
477 .unwrap();
478
479 builder.append_null();
481
482 builder
484 .append_value(
485 Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
486 )
487 .unwrap();
488
489 let listview = builder.finish_into_listview();
490 assert_eq!(listview.len(), 4);
491
492 assert_arrays_eq!(
494 listview.list_elements_at(0).unwrap(),
495 PrimitiveArray::from_iter([1i32, 2, 3])
496 );
497
498 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
500
501 assert!(
503 !listview
504 .validity()
505 .vortex_expect("listview validity should be derivable")
506 .is_valid(2)
507 .unwrap()
508 );
509
510 assert_arrays_eq!(
512 listview.list_elements_at(3).unwrap(),
513 PrimitiveArray::from_iter([4i32, 5])
514 );
515 }
516
517 #[test]
518 fn test_different_offset_size_types() {
519 let dtype: Arc<DType> = Arc::new(I32.into());
521 let mut builder =
522 ListViewBuilder::<u32, u8>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
523
524 builder
525 .append_value(
526 Scalar::list(
527 Arc::clone(&dtype),
528 vec![1i32.into(), 2i32.into()],
529 NonNullable,
530 )
531 .as_list(),
532 )
533 .unwrap();
534
535 builder
536 .append_value(
537 Scalar::list(
538 dtype,
539 vec![3i32.into(), 4i32.into(), 5i32.into()],
540 NonNullable,
541 )
542 .as_list(),
543 )
544 .unwrap();
545
546 let listview = builder.finish_into_listview();
547 assert_eq!(listview.len(), 2);
548
549 assert_arrays_eq!(
551 listview.list_elements_at(0).unwrap(),
552 PrimitiveArray::from_iter([1i32, 2])
553 );
554
555 assert_arrays_eq!(
557 listview.list_elements_at(1).unwrap(),
558 PrimitiveArray::from_iter([3i32, 4, 5])
559 );
560
561 let dtype2: Arc<DType> = Arc::new(I32.into());
563 let mut builder2 =
564 ListViewBuilder::<u64, u16>::with_capacity(Arc::clone(&dtype2), NonNullable, 0, 0);
565
566 for i in 0..5 {
567 builder2
568 .append_value(
569 Scalar::list(Arc::clone(&dtype2), vec![(i * 10).into()], NonNullable).as_list(),
570 )
571 .unwrap();
572 }
573
574 let listview2 = builder2.finish_into_listview();
575 assert_eq!(listview2.len(), 5);
576
577 for i in 0..5i32 {
579 assert_arrays_eq!(
580 listview2.list_elements_at(i as usize).unwrap(),
581 PrimitiveArray::from_iter([i * 10])
582 );
583 }
584 }
585
586 #[test]
587 fn test_builder_trait_methods() {
588 let dtype: Arc<DType> = Arc::new(I32.into());
589 let mut builder =
590 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
591
592 builder.append_zeros(2);
594 assert_eq!(builder.len(), 2);
595
596 unsafe {
598 builder.append_nulls_unchecked(2);
599 }
600 assert_eq!(builder.len(), 4);
601
602 let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
604 builder.append_scalar(&list_scalar).unwrap();
605 assert_eq!(builder.len(), 5);
606
607 let listview = builder.finish_into_listview();
608 assert_eq!(listview.len(), 5);
609
610 assert_eq!(listview.list_elements_at(0).unwrap().len(), 0);
612 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
613
614 assert!(
616 !listview
617 .validity()
618 .vortex_expect("listview validity should be derivable")
619 .is_valid(2)
620 .unwrap()
621 );
622 assert!(
623 !listview
624 .validity()
625 .vortex_expect("listview validity should be derivable")
626 .is_valid(3)
627 .unwrap()
628 );
629
630 assert_arrays_eq!(
632 listview.list_elements_at(4).unwrap(),
633 PrimitiveArray::from_iter([10i32, 20])
634 );
635 }
636
637 #[test]
638 fn test_extend_from_array() {
639 let dtype: Arc<DType> = Arc::new(I32.into());
640
641 let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
643 [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
644 Arc::new(I32.into()),
645 )
646 .unwrap();
647
648 let mut builder =
649 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
650
651 builder
653 .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
654 .unwrap();
655
656 builder.extend_from_array(&source.into_array());
658
659 let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
661 std::iter::empty::<Option<Vec<i32>>>(),
662 Arc::new(I32.into()),
663 )
664 .unwrap();
665 builder.extend_from_array(&empty_source.into_array());
666
667 let listview = builder.finish_into_listview();
668 assert_eq!(listview.len(), 4);
669
670 assert_arrays_eq!(
673 listview.list_elements_at(0).unwrap(),
674 PrimitiveArray::from_iter([0i32])
675 );
676
677 assert_arrays_eq!(
679 listview.list_elements_at(1).unwrap(),
680 PrimitiveArray::from_iter([1i32, 2, 3])
681 );
682
683 assert!(
685 !listview
686 .validity()
687 .vortex_expect("listview validity should be derivable")
688 .is_valid(2)
689 .unwrap()
690 );
691
692 assert_arrays_eq!(
694 listview.list_elements_at(3).unwrap(),
695 PrimitiveArray::from_iter([4i32, 5])
696 );
697 }
698
699 #[test]
700 fn test_error_append_null_to_non_nullable() {
701 let dtype: Arc<DType> = Arc::new(I32.into());
702 let mut builder =
703 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
704
705 let null_scalar = Scalar::null(DType::List(dtype, Nullable));
707 let null_list = null_scalar.as_list();
708
709 let result = builder.append_value(null_list);
711 assert!(result.is_err());
712 assert!(
713 result
714 .unwrap_err()
715 .to_string()
716 .contains("null value to non-nullable")
717 );
718 }
719
720 #[test]
721 fn test_append_array_as_list() {
722 use vortex_buffer::buffer;
723
724 let dtype: Arc<DType> = Arc::new(I32.into());
725 let mut builder =
726 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
727
728 let arr1 = buffer![1i32, 2, 3].into_array();
730 builder.append_array_as_list(&arr1).unwrap();
731
732 builder
734 .append_value(
735 Scalar::list(
736 Arc::clone(&dtype),
737 vec![10i32.into(), 11i32.into()],
738 NonNullable,
739 )
740 .as_list(),
741 )
742 .unwrap();
743
744 let arr2 = buffer![4i32, 5].into_array();
746 builder.append_array_as_list(&arr2).unwrap();
747
748 let arr3 = buffer![0i32; 0].into_array();
750 builder.append_array_as_list(&arr3).unwrap();
751
752 builder
754 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
755 .unwrap();
756
757 let listview = builder.finish_into_listview();
758 assert_eq!(listview.len(), 5);
759
760 assert_arrays_eq!(
762 listview.elements(),
763 PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
764 );
765
766 assert_arrays_eq!(
768 listview.offsets(),
769 PrimitiveArray::from_iter([0u32, 3, 5, 7, 7])
770 );
771
772 assert_arrays_eq!(
774 listview.sizes(),
775 PrimitiveArray::from_iter([3u32, 2, 2, 0, 0])
776 );
777
778 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype, NonNullable, 20, 10);
780 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
781 assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
782 }
783}