1use std::sync::Arc;
14
15use vortex_dtype::DType;
16use vortex_dtype::IntegerPType;
17use vortex_dtype::Nullability;
18use vortex_dtype::match_each_integer_ptype;
19use vortex_error::VortexExpect;
20use vortex_error::VortexResult;
21use vortex_error::vortex_ensure;
22use vortex_error::vortex_panic;
23use vortex_mask::Mask;
24
25use crate::Canonical;
26use crate::ToCanonical;
27use crate::array::Array;
28use crate::array::ArrayRef;
29use crate::array::IntoArray;
30use crate::arrays::ListViewArray;
31use crate::arrays::ListViewRebuildMode;
32use crate::arrays::PrimitiveArray;
33use crate::builders::ArrayBuilder;
34use crate::builders::DEFAULT_BUILDER_CAPACITY;
35use crate::builders::PrimitiveBuilder;
36use crate::builders::UninitRange;
37use crate::builders::builder_with_capacity;
38use crate::builders::lazy_null_builder::LazyBitBufferBuilder;
39use crate::builtins::ArrayBuiltins;
40use crate::scalar::ListScalar;
41use crate::scalar::Scalar;
42
43pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
53 dtype: DType,
55
56 elements_builder: Box<dyn ArrayBuilder>,
58
59 offsets_builder: PrimitiveBuilder<O>,
61
62 sizes_builder: PrimitiveBuilder<S>,
64
65 nulls: LazyBitBufferBuilder,
67}
68
69impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
70 pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
72 Self::with_capacity(
73 element_dtype,
74 nullability,
75 DEFAULT_BUILDER_CAPACITY * 2,
78 DEFAULT_BUILDER_CAPACITY,
79 )
80 }
81
82 pub fn with_capacity(
90 element_dtype: Arc<DType>,
91 nullability: Nullability,
92 elements_capacity: usize,
93 capacity: usize,
94 ) -> Self {
95 let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
96
97 let offsets_builder =
98 PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
99 let sizes_builder =
100 PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
101
102 let nulls = LazyBitBufferBuilder::new(capacity);
103
104 Self {
105 dtype: DType::List(element_dtype, nullability),
106 elements_builder,
107 offsets_builder,
108 sizes_builder,
109 nulls,
110 }
111 }
112
113 pub fn append_array_as_list(&mut self, array: &dyn Array) -> VortexResult<()> {
120 vortex_ensure!(
121 array.dtype() == self.element_dtype(),
122 "Array dtype {:?} does not match list element dtype {:?}",
123 array.dtype(),
124 self.element_dtype()
125 );
126
127 let curr_offset = self.elements_builder.len();
128 let num_elements = array.len();
129
130 assert!(
133 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
134 "appending this list would cause an offset overflow"
135 );
136
137 self.elements_builder.extend_from_array(array);
138 self.nulls.append_non_null();
139
140 self.offsets_builder.append_value(
141 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
142 );
143 self.sizes_builder.append_value(
144 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
145 );
146
147 Ok(())
148 }
149
150 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
155 let Some(elements) = value.elements() else {
156 vortex_ensure!(
158 self.dtype.is_nullable(),
159 "Cannot append null value to non-nullable list builder"
160 );
161 self.append_null();
162 return Ok(());
163 };
164
165 let curr_offset = self.elements_builder.len();
166 let num_elements = elements.len();
167
168 assert!(
171 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
172 "appending this list would cause an offset overflow"
173 );
174
175 for scalar in elements {
176 self.elements_builder.append_scalar(&scalar)?;
177 }
178 self.nulls.append_non_null();
179
180 self.offsets_builder.append_value(
181 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
182 );
183 self.sizes_builder.append_value(
184 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
185 );
186
187 Ok(())
188 }
189
190 pub fn finish_into_listview(&mut self) -> ListViewArray {
192 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
193 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
194
195 let elements = self.elements_builder.finish();
196 let offsets = self.offsets_builder.finish();
197 let sizes = self.sizes_builder.finish();
198 let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
199
200 unsafe {
210 ListViewArray::new_unchecked(elements, offsets, sizes, validity)
211 .with_zero_copy_to_list(true)
212 }
213 }
214
215 pub fn element_dtype(&self) -> &DType {
218 let DType::List(element_dtype, ..) = &self.dtype else {
219 vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
220 };
221
222 element_dtype
223 }
224}
225
226impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
227 fn as_any(&self) -> &dyn std::any::Any {
228 self
229 }
230
231 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
232 self
233 }
234
235 fn dtype(&self) -> &DType {
236 &self.dtype
237 }
238
239 fn len(&self) -> usize {
240 self.offsets_builder.len()
241 }
242
243 fn append_zeros(&mut self, n: usize) {
244 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
245 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
246
247 let curr_offset = self.elements_builder.len();
249
250 for _ in 0..n {
253 self.offsets_builder.append_value(
254 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
255 );
256 self.sizes_builder.append_value(S::zero());
257 }
258
259 self.nulls.append_n_non_nulls(n);
260 }
261
262 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
263 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
264 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
265
266 let curr_offset = self.elements_builder.len();
268
269 for _ in 0..n {
271 self.offsets_builder.append_value(
272 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
273 );
274 self.sizes_builder.append_value(S::zero());
275 }
276
277 self.nulls.append_n_nulls(n);
279 }
280
281 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
282 vortex_ensure!(
283 scalar.dtype() == self.dtype(),
284 "ListViewBuilder expected scalar with dtype {}, got {}",
285 self.dtype(),
286 scalar.dtype()
287 );
288
289 let list_scalar = scalar.as_list();
290 self.append_value(list_scalar)
291 }
292
293 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
294 let listview = array.to_listview();
295 if listview.is_empty() {
296 return;
297 }
298
299 if !listview.is_zero_copy_to_list() {
302 for i in 0..listview.len() {
303 let list = listview
304 .scalar_at(i)
305 .vortex_expect("scalar_at failed in extend_from_array_unchecked");
306
307 self.append_scalar(&list)
308 .vortex_expect("was unable to extend the `ListViewBuilder`")
309 }
310
311 return;
312 }
313
314 let listview = listview
317 .rebuild(ListViewRebuildMode::MakeExact)
318 .vortex_expect("ListViewArray::rebuild(MakeExact) failed in extend_from_array");
319 debug_assert!(listview.is_zero_copy_to_list());
320
321 self.nulls.append_validity_mask(
322 array
323 .validity_mask()
324 .vortex_expect("validity_mask in extend_from_array_unchecked"),
325 );
326
327 let old_elements_len = self.elements_builder.len();
329 self.elements_builder
330 .reserve_exact(listview.elements().len());
331 self.elements_builder.extend_from_array(listview.elements());
332 let new_elements_len = self.elements_builder.len();
333
334 let extend_length = listview.len();
336 self.sizes_builder.reserve_exact(extend_length);
337 self.offsets_builder.reserve_exact(extend_length);
338
339 let cast_sizes = listview
341 .sizes()
342 .to_array()
343 .cast(self.sizes_builder.dtype().clone())
344 .vortex_expect(
345 "was somehow unable to cast the new sizes to the type of the builder sizes",
346 );
347 self.sizes_builder.extend_from_array(cast_sizes.as_ref());
348
349 let uninit_range = self.offsets_builder.uninit_range(extend_length);
353
354 let new_offsets = listview.offsets().to_primitive();
356
357 match_each_integer_ptype!(new_offsets.ptype(), |A| {
358 adjust_and_extend_offsets::<O, A>(
359 uninit_range,
360 new_offsets,
361 old_elements_len,
362 new_elements_len,
363 );
364 })
365 }
366
367 fn reserve_exact(&mut self, capacity: usize) {
368 self.elements_builder.reserve_exact(capacity * 2);
369 self.offsets_builder.reserve_exact(capacity);
370 self.sizes_builder.reserve_exact(capacity);
371 self.nulls.reserve_exact(capacity);
372 }
373
374 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
375 self.nulls = LazyBitBufferBuilder::new(validity.len());
376 self.nulls.append_validity_mask(validity);
377 }
378
379 fn finish(&mut self) -> ArrayRef {
380 self.finish_into_listview().into_array()
381 }
382
383 fn finish_into_canonical(&mut self) -> Canonical {
384 Canonical::List(self.finish_into_listview())
385 }
386}
387
388fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
391 mut uninit_range: UninitRange<'a, O>,
392 new_offsets: PrimitiveArray,
393 old_elements_len: usize,
394 new_elements_len: usize,
395) {
396 let new_offsets_slice = new_offsets.as_slice::<A>();
397 let old_elements_len = O::from_usize(old_elements_len)
398 .vortex_expect("the old elements length did not fit into the offset type (impossible)");
399 let new_elements_len = O::from_usize(new_elements_len)
400 .vortex_expect("the current elements length did not fit into the offset type (impossible)");
401
402 for i in 0..uninit_range.len() {
403 let new_offset = O::from_usize(
404 new_offsets_slice[i]
405 .to_usize()
406 .vortex_expect("Offsets must always fit in usize"),
407 )
408 .vortex_expect("New offset somehow did not fit into the builder's offset type");
409
410 let adjusted_new_offset = new_offset + old_elements_len;
413 assert!(
414 adjusted_new_offset <= new_elements_len,
415 "[{i}/{}]: {new_offset} + {old_elements_len} \
416 = {adjusted_new_offset} <= {new_elements_len} failed",
417 uninit_range.len()
418 );
419
420 uninit_range.set_value(i, adjusted_new_offset);
421 }
422
423 unsafe { uninit_range.finish() };
426}
427
428#[cfg(test)]
429mod tests {
430 use std::sync::Arc;
431
432 use vortex_dtype::DType;
433 use vortex_dtype::Nullability::NonNullable;
434 use vortex_dtype::Nullability::Nullable;
435 use vortex_dtype::PType::I32;
436
437 use super::ListViewBuilder;
438 use crate::IntoArray;
439 use crate::array::Array;
440 use crate::arrays::ListArray;
441 use crate::arrays::PrimitiveArray;
442 use crate::assert_arrays_eq;
443 use crate::builders::ArrayBuilder;
444 use crate::scalar::Scalar;
445 use crate::vtable::ValidityHelper;
446
447 #[test]
448 fn test_empty() {
449 let mut builder =
450 ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
451
452 let listview = builder.finish();
453 assert_eq!(listview.len(), 0);
454 }
455
456 #[test]
457 fn test_basic_append_and_nulls() {
458 let dtype: Arc<DType> = Arc::new(I32.into());
459 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
460
461 builder
463 .append_value(
464 Scalar::list(
465 dtype.clone(),
466 vec![1i32.into(), 2i32.into(), 3i32.into()],
467 NonNullable,
468 )
469 .as_list(),
470 )
471 .unwrap();
472
473 builder
475 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
476 .unwrap();
477
478 builder.append_null();
480
481 builder
483 .append_value(
484 Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
485 )
486 .unwrap();
487
488 let listview = builder.finish_into_listview();
489 assert_eq!(listview.len(), 4);
490
491 assert_arrays_eq!(
493 listview.list_elements_at(0).unwrap(),
494 PrimitiveArray::from_iter([1i32, 2, 3])
495 );
496
497 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
499
500 assert!(!listview.validity().is_valid(2).unwrap());
502
503 assert_arrays_eq!(
505 listview.list_elements_at(3).unwrap(),
506 PrimitiveArray::from_iter([4i32, 5])
507 );
508 }
509
510 #[test]
511 fn test_different_offset_size_types() {
512 let dtype: Arc<DType> = Arc::new(I32.into());
514 let mut builder =
515 ListViewBuilder::<u32, u8>::with_capacity(dtype.clone(), NonNullable, 0, 0);
516
517 builder
518 .append_value(
519 Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], NonNullable).as_list(),
520 )
521 .unwrap();
522
523 builder
524 .append_value(
525 Scalar::list(
526 dtype,
527 vec![3i32.into(), 4i32.into(), 5i32.into()],
528 NonNullable,
529 )
530 .as_list(),
531 )
532 .unwrap();
533
534 let listview = builder.finish_into_listview();
535 assert_eq!(listview.len(), 2);
536
537 assert_arrays_eq!(
539 listview.list_elements_at(0).unwrap(),
540 PrimitiveArray::from_iter([1i32, 2])
541 );
542
543 assert_arrays_eq!(
545 listview.list_elements_at(1).unwrap(),
546 PrimitiveArray::from_iter([3i32, 4, 5])
547 );
548
549 let dtype2: Arc<DType> = Arc::new(I32.into());
551 let mut builder2 =
552 ListViewBuilder::<u64, u16>::with_capacity(dtype2.clone(), NonNullable, 0, 0);
553
554 for i in 0..5 {
555 builder2
556 .append_value(
557 Scalar::list(dtype2.clone(), vec![(i * 10).into()], NonNullable).as_list(),
558 )
559 .unwrap();
560 }
561
562 let listview2 = builder2.finish_into_listview();
563 assert_eq!(listview2.len(), 5);
564
565 for i in 0..5i32 {
567 assert_arrays_eq!(
568 listview2.list_elements_at(i as usize).unwrap(),
569 PrimitiveArray::from_iter([i * 10])
570 );
571 }
572 }
573
574 #[test]
575 fn test_builder_trait_methods() {
576 let dtype: Arc<DType> = Arc::new(I32.into());
577 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
578
579 builder.append_zeros(2);
581 assert_eq!(builder.len(), 2);
582
583 unsafe {
585 builder.append_nulls_unchecked(2);
586 }
587 assert_eq!(builder.len(), 4);
588
589 let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
591 builder.append_scalar(&list_scalar).unwrap();
592 assert_eq!(builder.len(), 5);
593
594 let listview = builder.finish_into_listview();
595 assert_eq!(listview.len(), 5);
596
597 assert_eq!(listview.list_elements_at(0).unwrap().len(), 0);
599 assert_eq!(listview.list_elements_at(1).unwrap().len(), 0);
600
601 assert!(!listview.validity().is_valid(2).unwrap());
603 assert!(!listview.validity().is_valid(3).unwrap());
604
605 assert_arrays_eq!(
607 listview.list_elements_at(4).unwrap(),
608 PrimitiveArray::from_iter([10i32, 20])
609 );
610 }
611
612 #[test]
613 fn test_extend_from_array() {
614 let dtype: Arc<DType> = Arc::new(I32.into());
615
616 let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
618 [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
619 Arc::new(I32.into()),
620 )
621 .unwrap();
622
623 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
624
625 builder
627 .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
628 .unwrap();
629
630 builder.extend_from_array(&source.into_array());
632
633 let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
635 std::iter::empty::<Option<Vec<i32>>>(),
636 Arc::new(I32.into()),
637 )
638 .unwrap();
639 builder.extend_from_array(&empty_source.into_array());
640
641 let listview = builder.finish_into_listview();
642 assert_eq!(listview.len(), 4);
643
644 assert_arrays_eq!(
647 listview.list_elements_at(0).unwrap(),
648 PrimitiveArray::from_iter([0i32])
649 );
650
651 assert_arrays_eq!(
653 listview.list_elements_at(1).unwrap(),
654 PrimitiveArray::from_iter([1i32, 2, 3])
655 );
656
657 assert!(!listview.validity().is_valid(2).unwrap());
659
660 assert_arrays_eq!(
662 listview.list_elements_at(3).unwrap(),
663 PrimitiveArray::from_iter([4i32, 5])
664 );
665 }
666
667 #[test]
668 fn test_error_append_null_to_non_nullable() {
669 let dtype: Arc<DType> = Arc::new(I32.into());
670 let mut builder =
671 ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
672
673 let null_scalar = Scalar::null(DType::List(dtype, Nullable));
675 let null_list = null_scalar.as_list();
676
677 let result = builder.append_value(null_list);
679 assert!(result.is_err());
680 assert!(
681 result
682 .unwrap_err()
683 .to_string()
684 .contains("null value to non-nullable")
685 );
686 }
687
688 #[test]
689 fn test_append_array_as_list() {
690 use vortex_buffer::buffer;
691
692 let dtype: Arc<DType> = Arc::new(I32.into());
693 let mut builder =
694 ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), NonNullable, 20, 10);
695
696 let arr1 = buffer![1i32, 2, 3].into_array();
698 builder.append_array_as_list(&arr1).unwrap();
699
700 builder
702 .append_value(
703 Scalar::list(dtype.clone(), vec![10i32.into(), 11i32.into()], NonNullable)
704 .as_list(),
705 )
706 .unwrap();
707
708 let arr2 = buffer![4i32, 5].into_array();
710 builder.append_array_as_list(&arr2).unwrap();
711
712 let arr3 = buffer![0i32; 0].into_array();
714 builder.append_array_as_list(&arr3).unwrap();
715
716 builder
718 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
719 .unwrap();
720
721 let listview = builder.finish_into_listview();
722 assert_eq!(listview.len(), 5);
723
724 assert_arrays_eq!(
726 listview.elements(),
727 PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
728 );
729
730 assert_arrays_eq!(
732 listview.offsets(),
733 PrimitiveArray::from_iter([0u32, 3, 5, 7, 7])
734 );
735
736 assert_arrays_eq!(
738 listview.sizes(),
739 PrimitiveArray::from_iter([3u32, 2, 2, 0, 0])
740 );
741
742 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype, NonNullable, 20, 10);
744 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
745 assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
746 }
747}