1use std::sync::Arc;
14
15use vortex_error::VortexExpect;
16use vortex_error::VortexResult;
17use vortex_error::vortex_ensure;
18use vortex_error::vortex_panic;
19use vortex_mask::Mask;
20
21use crate::ArrayRef;
22use crate::Canonical;
23use crate::LEGACY_SESSION;
24use crate::VortexSessionExecute;
25use crate::array::IntoArray;
26use crate::arrays::ListViewArray;
27use crate::arrays::PrimitiveArray;
28use crate::arrays::listview::ListViewArrayExt;
29use crate::arrays::listview::ListViewRebuildMode;
30use crate::builders::ArrayBuilder;
31use crate::builders::DEFAULT_BUILDER_CAPACITY;
32use crate::builders::PrimitiveBuilder;
33use crate::builders::UninitRange;
34use crate::builders::builder_with_capacity;
35use crate::builders::lazy_null_builder::LazyBitBufferBuilder;
36use crate::builtins::ArrayBuiltins;
37use crate::dtype::DType;
38use crate::dtype::IntegerPType;
39use crate::dtype::Nullability;
40use crate::match_each_integer_ptype;
41use crate::scalar::ListScalar;
42use crate::scalar::Scalar;
43
44pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
54 dtype: DType,
56
57 elements_builder: Box<dyn ArrayBuilder>,
59
60 offsets_builder: PrimitiveBuilder<O>,
62
63 sizes_builder: PrimitiveBuilder<S>,
65
66 nulls: LazyBitBufferBuilder,
68}
69
70impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
71 pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
73 Self::with_capacity(
74 element_dtype,
75 nullability,
76 DEFAULT_BUILDER_CAPACITY * 2,
79 DEFAULT_BUILDER_CAPACITY,
80 )
81 }
82
83 pub fn with_capacity(
91 element_dtype: Arc<DType>,
92 nullability: Nullability,
93 elements_capacity: usize,
94 capacity: usize,
95 ) -> Self {
96 let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
97
98 let offsets_builder =
99 PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
100 let sizes_builder =
101 PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
102
103 let nulls = LazyBitBufferBuilder::new(capacity);
104
105 Self {
106 dtype: DType::List(element_dtype, nullability),
107 elements_builder,
108 offsets_builder,
109 sizes_builder,
110 nulls,
111 }
112 }
113
114 pub fn append_array_as_list(&mut self, array: &ArrayRef) -> VortexResult<()> {
121 vortex_ensure!(
122 array.dtype() == self.element_dtype(),
123 "Array dtype {:?} does not match list element dtype {:?}",
124 array.dtype(),
125 self.element_dtype()
126 );
127
128 let curr_offset = self.elements_builder.len();
129 let num_elements = array.len();
130
131 assert!(
134 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
135 "appending this list would cause an offset overflow"
136 );
137
138 self.elements_builder.extend_from_array(array);
139 self.nulls.append_non_null();
140
141 self.offsets_builder.append_value(
142 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
143 );
144 self.sizes_builder.append_value(
145 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
146 );
147
148 Ok(())
149 }
150
151 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
156 let Some(elements) = value.elements() else {
157 vortex_ensure!(
159 self.dtype.is_nullable(),
160 "Cannot append null value to non-nullable list builder"
161 );
162 self.append_null();
163 return Ok(());
164 };
165
166 let curr_offset = self.elements_builder.len();
167 let num_elements = elements.len();
168
169 assert!(
172 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
173 "appending this list would cause an offset overflow"
174 );
175
176 for scalar in elements {
177 self.elements_builder.append_scalar(&scalar)?;
178 }
179 self.nulls.append_non_null();
180
181 self.offsets_builder.append_value(
182 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
183 );
184 self.sizes_builder.append_value(
185 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
186 );
187
188 Ok(())
189 }
190
191 pub fn finish_into_listview(&mut self) -> ListViewArray {
193 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
194 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
195
196 let elements = self.elements_builder.finish();
197 let offsets = self.offsets_builder.finish();
198 let sizes = self.sizes_builder.finish();
199 let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
200
201 unsafe {
211 ListViewArray::new_unchecked(elements, offsets, sizes, validity)
212 .with_zero_copy_to_list(true)
213 }
214 }
215
216 pub fn element_dtype(&self) -> &DType {
219 let DType::List(element_dtype, ..) = &self.dtype else {
220 vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
221 };
222
223 element_dtype
224 }
225}
226
227impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
228 fn as_any(&self) -> &dyn std::any::Any {
229 self
230 }
231
232 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
233 self
234 }
235
236 fn dtype(&self) -> &DType {
237 &self.dtype
238 }
239
240 fn len(&self) -> usize {
241 self.offsets_builder.len()
242 }
243
244 fn append_zeros(&mut self, n: usize) {
245 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
246 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
247
248 let curr_offset = self.elements_builder.len();
250
251 for _ in 0..n {
254 self.offsets_builder.append_value(
255 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
256 );
257 self.sizes_builder.append_value(S::zero());
258 }
259
260 self.nulls.append_n_non_nulls(n);
261 }
262
263 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
264 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
265 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
266
267 let curr_offset = self.elements_builder.len();
269
270 for _ in 0..n {
272 self.offsets_builder.append_value(
273 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
274 );
275 self.sizes_builder.append_value(S::zero());
276 }
277
278 self.nulls.append_n_nulls(n);
280 }
281
282 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
283 vortex_ensure!(
284 scalar.dtype() == self.dtype(),
285 "ListViewBuilder expected scalar with dtype {}, got {}",
286 self.dtype(),
287 scalar.dtype()
288 );
289
290 let list_scalar = scalar.as_list();
291 self.append_value(list_scalar)
292 }
293
294 unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
295 let mut ctx = LEGACY_SESSION.create_execution_ctx();
300
301 let listview = array
302 .clone()
303 .execute::<ListViewArray>(&mut ctx)
304 .vortex_expect("failed to execute array into ListViewArray in extend_from_array");
305 if listview.is_empty() {
306 return;
307 }
308
309 let listview = listview
312 .rebuild(ListViewRebuildMode::MakeExact, &mut ctx)
313 .vortex_expect("ListViewArray::rebuild(MakeExact) failed in extend_from_array");
314 debug_assert!(listview.is_zero_copy_to_list());
315
316 self.nulls.append_validity_mask(
317 &array
318 .validity()
319 .vortex_expect("validity_mask in extend_from_array_unchecked")
320 .execute_mask(array.len(), &mut ctx)
321 .vortex_expect("Failed to compute validity mask"),
322 );
323
324 let old_elements_len = self.elements_builder.len();
326 self.elements_builder
327 .reserve_exact(listview.elements().len());
328 self.elements_builder.extend_from_array(listview.elements());
329 let new_elements_len = self.elements_builder.len();
330
331 let extend_length = listview.len();
333 self.sizes_builder.reserve_exact(extend_length);
334 self.offsets_builder.reserve_exact(extend_length);
335
336 let cast_sizes = listview
338 .sizes()
339 .clone()
340 .cast(self.sizes_builder.dtype().clone())
341 .vortex_expect(
342 "was somehow unable to cast the new sizes to the type of the builder sizes",
343 );
344 self.sizes_builder.extend_from_array(&cast_sizes);
345
346 let uninit_range = self.offsets_builder.uninit_range(extend_length);
350
351 let new_offsets = listview
353 .offsets()
354 .clone()
355 .execute::<PrimitiveArray>(&mut ctx)
356 .vortex_expect("failed to execute list view offsets into a PrimitiveArray");
357
358 match_each_integer_ptype!(new_offsets.ptype(), |A| {
359 adjust_and_extend_offsets::<O, A>(
360 uninit_range,
361 new_offsets,
362 old_elements_len,
363 new_elements_len,
364 );
365 })
366 }
367
368 fn reserve_exact(&mut self, capacity: usize) {
369 self.elements_builder.reserve_exact(capacity * 2);
370 self.offsets_builder.reserve_exact(capacity);
371 self.sizes_builder.reserve_exact(capacity);
372 self.nulls.reserve_exact(capacity);
373 }
374
375 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
376 self.nulls = LazyBitBufferBuilder::from_validity_mask(validity);
377 }
378
379 fn finish(&mut self) -> ArrayRef {
380 self.finish_into_listview().into_array()
381 }
382
383 fn finish_into_canonical(&mut self) -> Canonical {
384 Canonical::List(self.finish_into_listview())
385 }
386}
387
388fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
391 mut uninit_range: UninitRange<'a, O>,
392 new_offsets: PrimitiveArray,
393 old_elements_len: usize,
394 new_elements_len: usize,
395) {
396 let new_offsets_slice = new_offsets.as_slice::<A>();
397 let old_elements_len = O::from_usize(old_elements_len)
398 .vortex_expect("the old elements length did not fit into the offset type (impossible)");
399 let new_elements_len = O::from_usize(new_elements_len)
400 .vortex_expect("the current elements length did not fit into the offset type (impossible)");
401
402 for i in 0..uninit_range.len() {
403 let new_offset = O::from_usize(
404 new_offsets_slice[i]
405 .to_usize()
406 .vortex_expect("Offsets must always fit in usize"),
407 )
408 .vortex_expect("New offset somehow did not fit into the builder's offset type");
409
410 let adjusted_new_offset = new_offset + old_elements_len;
413 assert!(
414 adjusted_new_offset <= new_elements_len,
415 "[{i}/{}]: {new_offset} + {old_elements_len} \
416 = {adjusted_new_offset} <= {new_elements_len} failed",
417 uninit_range.len()
418 );
419
420 uninit_range.set_value(i, adjusted_new_offset);
421 }
422
423 unsafe { uninit_range.finish() };
426}
427
428#[cfg(test)]
429mod tests {
430 use std::sync::Arc;
431
432 use vortex_buffer::buffer;
433 use vortex_error::VortexExpect;
434
435 use super::ListViewBuilder;
436 use crate::IntoArray;
437 use crate::LEGACY_SESSION;
438 use crate::VortexSessionExecute;
439 use crate::arrays::ListArray;
440 use crate::arrays::ListViewArray;
441 use crate::arrays::listview::ListViewArrayExt;
442 use crate::assert_arrays_eq;
443 use crate::builders::ArrayBuilder;
444 use crate::builders::listview::PrimitiveArray;
445 use crate::dtype::DType;
446 use crate::dtype::Nullability::NonNullable;
447 use crate::dtype::Nullability::Nullable;
448 use crate::dtype::PType::I32;
449 use crate::scalar::Scalar;
450 use crate::validity::Validity;
451
452 #[test]
453 fn test_empty() {
454 let mut builder =
455 ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
456
457 let listview = builder.finish();
458 assert_eq!(listview.len(), 0);
459 }
460
461 #[test]
462 fn test_basic_append_and_nulls() {
463 let mut ctx = LEGACY_SESSION.create_execution_ctx();
464 let dtype: Arc<DType> = Arc::new(I32.into());
465 let mut builder =
466 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
467
468 builder
470 .append_value(
471 Scalar::list(
472 Arc::clone(&dtype),
473 vec![1i32.into(), 2i32.into(), 3i32.into()],
474 NonNullable,
475 )
476 .as_list(),
477 )
478 .unwrap();
479
480 builder
482 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
483 .unwrap();
484
485 builder.append_null();
487
488 builder
490 .append_value(
491 Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
492 )
493 .unwrap();
494
495 let listview = builder.finish_into_listview();
496 assert_eq!(listview.len(), 4);
497
498 assert_arrays_eq!(
500 listview.list_elements_at(0).unwrap(),
501 PrimitiveArray::from_iter([1i32, 2, 3])
502 );
503
504 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
506
507 assert!(
509 !listview
510 .validity()
511 .vortex_expect("listview validity should be derivable")
512 .execute_is_valid(2, &mut ctx)
513 .unwrap()
514 );
515
516 assert_arrays_eq!(
518 listview.list_elements_at(3).unwrap(),
519 PrimitiveArray::from_iter([4i32, 5])
520 );
521 }
522
523 #[test]
524 fn test_different_offset_size_types() {
525 let dtype: Arc<DType> = Arc::new(I32.into());
527 let mut builder =
528 ListViewBuilder::<u32, u8>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
529
530 builder
531 .append_value(
532 Scalar::list(
533 Arc::clone(&dtype),
534 vec![1i32.into(), 2i32.into()],
535 NonNullable,
536 )
537 .as_list(),
538 )
539 .unwrap();
540
541 builder
542 .append_value(
543 Scalar::list(
544 dtype,
545 vec![3i32.into(), 4i32.into(), 5i32.into()],
546 NonNullable,
547 )
548 .as_list(),
549 )
550 .unwrap();
551
552 let listview = builder.finish_into_listview();
553 assert_eq!(listview.len(), 2);
554
555 assert_arrays_eq!(
557 listview.list_elements_at(0).unwrap(),
558 PrimitiveArray::from_iter([1i32, 2])
559 );
560
561 assert_arrays_eq!(
563 listview.list_elements_at(1).unwrap(),
564 PrimitiveArray::from_iter([3i32, 4, 5])
565 );
566
567 let dtype2: Arc<DType> = Arc::new(I32.into());
569 let mut builder2 =
570 ListViewBuilder::<u64, u16>::with_capacity(Arc::clone(&dtype2), NonNullable, 0, 0);
571
572 for i in 0..5 {
573 builder2
574 .append_value(
575 Scalar::list(Arc::clone(&dtype2), vec![(i * 10).into()], NonNullable).as_list(),
576 )
577 .unwrap();
578 }
579
580 let listview2 = builder2.finish_into_listview();
581 assert_eq!(listview2.len(), 5);
582
583 for i in 0..5i32 {
585 assert_arrays_eq!(
586 listview2.list_elements_at(i as usize).unwrap(),
587 PrimitiveArray::from_iter([i * 10])
588 );
589 }
590 }
591
592 #[test]
593 fn test_builder_trait_methods() {
594 let mut ctx = LEGACY_SESSION.create_execution_ctx();
595 let dtype: Arc<DType> = Arc::new(I32.into());
596 let mut builder =
597 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
598
599 builder.append_zeros(2);
601 assert_eq!(builder.len(), 2);
602
603 unsafe {
605 builder.append_nulls_unchecked(2);
606 }
607 assert_eq!(builder.len(), 4);
608
609 let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
611 builder.append_scalar(&list_scalar).unwrap();
612 assert_eq!(builder.len(), 5);
613
614 let listview = builder.finish_into_listview();
615 assert_eq!(listview.len(), 5);
616
617 assert_eq!(listview.list_elements_at(0).unwrap().len(), 0);
619 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
620
621 assert!(
623 !listview
624 .validity()
625 .vortex_expect("listview validity should be derivable")
626 .execute_is_valid(2, &mut ctx)
627 .unwrap()
628 );
629 assert!(
630 !listview
631 .validity()
632 .vortex_expect("listview validity should be derivable")
633 .execute_is_valid(3, &mut ctx)
634 .unwrap()
635 );
636
637 assert_arrays_eq!(
639 listview.list_elements_at(4).unwrap(),
640 PrimitiveArray::from_iter([10i32, 20])
641 );
642 }
643
644 #[test]
645 fn test_extend_from_array() {
646 let mut ctx = LEGACY_SESSION.create_execution_ctx();
647 let dtype: Arc<DType> = Arc::new(I32.into());
648
649 let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
651 [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
652 Arc::new(I32.into()),
653 )
654 .unwrap();
655
656 let mut builder =
657 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
658
659 builder
661 .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
662 .unwrap();
663
664 builder.extend_from_array(&source.into_array());
666
667 let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
669 std::iter::empty::<Option<Vec<i32>>>(),
670 Arc::new(I32.into()),
671 )
672 .unwrap();
673 builder.extend_from_array(&empty_source.into_array());
674
675 let listview = builder.finish_into_listview();
676 assert_eq!(listview.len(), 4);
677
678 assert_arrays_eq!(
681 listview.list_elements_at(0).unwrap(),
682 PrimitiveArray::from_iter([0i32])
683 );
684
685 assert_arrays_eq!(
687 listview.list_elements_at(1).unwrap(),
688 PrimitiveArray::from_iter([1i32, 2, 3])
689 );
690
691 assert!(
693 !listview
694 .validity()
695 .vortex_expect("listview validity should be derivable")
696 .execute_is_valid(2, &mut ctx)
697 .unwrap()
698 );
699
700 assert_arrays_eq!(
702 listview.list_elements_at(3).unwrap(),
703 PrimitiveArray::from_iter([4i32, 5])
704 );
705 }
706
707 #[test]
708 fn test_extend_from_array_overlapping_listview() {
709 let mut ctx = LEGACY_SESSION.create_execution_ctx();
710 let dtype: Arc<DType> = Arc::new(I32.into());
711
712 let source = unsafe {
717 ListViewArray::new_unchecked(
718 buffer![10i32, 20, 30].into_array(),
719 buffer![0u32, 1, 0].into_array(),
720 buffer![2u8, 2, 1].into_array(),
721 Validity::from_iter([true, false, true]),
722 )
723 };
724 assert!(!source.is_zero_copy_to_list());
725
726 let mut builder =
727 ListViewBuilder::<u32, u8>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
728 builder.extend_from_array(&source.into_array());
729
730 let listview = builder.finish_into_listview();
731 assert_eq!(listview.len(), 3);
732 assert!(listview.is_zero_copy_to_list());
733
734 assert_arrays_eq!(
735 listview.list_elements_at(0).unwrap(),
736 PrimitiveArray::from_iter([10i32, 20])
737 );
738 assert!(
739 !listview
740 .validity()
741 .vortex_expect("listview validity should be derivable")
742 .execute_is_valid(1, &mut ctx)
743 .unwrap()
744 );
745 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
746 assert_arrays_eq!(
747 listview.list_elements_at(2).unwrap(),
748 PrimitiveArray::from_iter([10i32])
749 );
750 }
751
752 #[test]
753 fn test_error_append_null_to_non_nullable() {
754 let dtype: Arc<DType> = Arc::new(I32.into());
755 let mut builder =
756 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
757
758 let null_scalar = Scalar::null(DType::List(dtype, Nullable));
760 let null_list = null_scalar.as_list();
761
762 let result = builder.append_value(null_list);
764 assert!(result.is_err());
765 assert!(
766 result
767 .unwrap_err()
768 .to_string()
769 .contains("null value to non-nullable")
770 );
771 }
772
773 #[test]
774 fn test_append_array_as_list() {
775 let dtype: Arc<DType> = Arc::new(I32.into());
776 let mut builder =
777 ListViewBuilder::<u32, u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
778
779 let arr1 = buffer![1i32, 2, 3].into_array();
781 builder.append_array_as_list(&arr1).unwrap();
782
783 builder
785 .append_value(
786 Scalar::list(
787 Arc::clone(&dtype),
788 vec![10i32.into(), 11i32.into()],
789 NonNullable,
790 )
791 .as_list(),
792 )
793 .unwrap();
794
795 let arr2 = buffer![4i32, 5].into_array();
797 builder.append_array_as_list(&arr2).unwrap();
798
799 let arr3 = buffer![0i32; 0].into_array();
801 builder.append_array_as_list(&arr3).unwrap();
802
803 builder
805 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
806 .unwrap();
807
808 let listview = builder.finish_into_listview();
809 assert_eq!(listview.len(), 5);
810
811 assert_arrays_eq!(
813 listview.elements(),
814 PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
815 );
816
817 assert_arrays_eq!(
819 listview.offsets(),
820 PrimitiveArray::from_iter([0u32, 3, 5, 7, 7])
821 );
822
823 assert_arrays_eq!(
825 listview.sizes(),
826 PrimitiveArray::from_iter([3u32, 2, 2, 0, 0])
827 );
828
829 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype, NonNullable, 20, 10);
831 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
832 assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
833 }
834}