1use std::sync::Arc;
14
15use vortex_dtype::DType;
16use vortex_dtype::IntegerPType;
17use vortex_dtype::Nullability;
18use vortex_dtype::match_each_integer_ptype;
19use vortex_error::VortexExpect;
20use vortex_error::VortexResult;
21use vortex_error::vortex_ensure;
22use vortex_error::vortex_panic;
23use vortex_mask::Mask;
24use vortex_scalar::ListScalar;
25use vortex_scalar::Scalar;
26
27use crate::Canonical;
28use crate::ToCanonical;
29use crate::array::Array;
30use crate::array::ArrayRef;
31use crate::array::IntoArray;
32use crate::arrays::ListViewArray;
33use crate::arrays::ListViewRebuildMode;
34use crate::arrays::PrimitiveArray;
35use crate::builders::ArrayBuilder;
36use crate::builders::DEFAULT_BUILDER_CAPACITY;
37use crate::builders::PrimitiveBuilder;
38use crate::builders::UninitRange;
39use crate::builders::builder_with_capacity;
40use crate::builders::lazy_null_builder::LazyBitBufferBuilder;
41use crate::compute;
42
43pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
53 dtype: DType,
55
56 elements_builder: Box<dyn ArrayBuilder>,
58
59 offsets_builder: PrimitiveBuilder<O>,
61
62 sizes_builder: PrimitiveBuilder<S>,
64
65 nulls: LazyBitBufferBuilder,
67}
68
69impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
70 pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
72 Self::with_capacity(
73 element_dtype,
74 nullability,
75 DEFAULT_BUILDER_CAPACITY * 2,
78 DEFAULT_BUILDER_CAPACITY,
79 )
80 }
81
82 pub fn with_capacity(
90 element_dtype: Arc<DType>,
91 nullability: Nullability,
92 elements_capacity: usize,
93 capacity: usize,
94 ) -> Self {
95 let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
96
97 let offsets_builder =
98 PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
99 let sizes_builder =
100 PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
101
102 let nulls = LazyBitBufferBuilder::new(capacity);
103
104 Self {
105 dtype: DType::List(element_dtype, nullability),
106 elements_builder,
107 offsets_builder,
108 sizes_builder,
109 nulls,
110 }
111 }
112
113 pub fn append_array_as_list(&mut self, array: &dyn Array) -> VortexResult<()> {
120 vortex_ensure!(
121 array.dtype() == self.element_dtype(),
122 "Array dtype {:?} does not match list element dtype {:?}",
123 array.dtype(),
124 self.element_dtype()
125 );
126
127 let curr_offset = self.elements_builder.len();
128 let num_elements = array.len();
129
130 assert!(
133 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
134 "appending this list would cause an offset overflow"
135 );
136
137 self.elements_builder.extend_from_array(array);
138 self.nulls.append_non_null();
139
140 self.offsets_builder.append_value(
141 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
142 );
143 self.sizes_builder.append_value(
144 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
145 );
146
147 Ok(())
148 }
149
150 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
155 let Some(elements) = value.elements() else {
156 vortex_ensure!(
158 self.dtype.is_nullable(),
159 "Cannot append null value to non-nullable list builder"
160 );
161 self.append_null();
162 return Ok(());
163 };
164
165 let curr_offset = self.elements_builder.len();
166 let num_elements = elements.len();
167
168 assert!(
171 ((curr_offset + num_elements) as u64) < O::max_value_as_u64(),
172 "appending this list would cause an offset overflow"
173 );
174
175 for scalar in elements {
176 self.elements_builder.append_scalar(&scalar)?;
177 }
178 self.nulls.append_non_null();
179
180 self.offsets_builder.append_value(
181 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
182 );
183 self.sizes_builder.append_value(
184 S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
185 );
186
187 Ok(())
188 }
189
190 pub fn finish_into_listview(&mut self) -> ListViewArray {
192 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
193 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
194
195 let elements = self.elements_builder.finish();
196 let offsets = self.offsets_builder.finish();
197 let sizes = self.sizes_builder.finish();
198 let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
199
200 unsafe {
210 ListViewArray::new_unchecked(elements, offsets, sizes, validity)
211 .with_zero_copy_to_list(true)
212 }
213 }
214
215 pub fn element_dtype(&self) -> &DType {
218 let DType::List(element_dtype, ..) = &self.dtype else {
219 vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
220 };
221
222 element_dtype
223 }
224}
225
226impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
227 fn as_any(&self) -> &dyn std::any::Any {
228 self
229 }
230
231 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
232 self
233 }
234
235 fn dtype(&self) -> &DType {
236 &self.dtype
237 }
238
239 fn len(&self) -> usize {
240 self.offsets_builder.len()
241 }
242
243 fn append_zeros(&mut self, n: usize) {
244 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
245 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
246
247 let curr_offset = self.elements_builder.len();
249
250 for _ in 0..n {
253 self.offsets_builder.append_value(
254 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
255 );
256 self.sizes_builder.append_value(S::zero());
257 }
258
259 self.nulls.append_n_non_nulls(n);
260 }
261
262 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
263 debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
264 debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
265
266 let curr_offset = self.elements_builder.len();
268
269 for _ in 0..n {
271 self.offsets_builder.append_value(
272 O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
273 );
274 self.sizes_builder.append_value(S::zero());
275 }
276
277 self.nulls.append_n_nulls(n);
279 }
280
281 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
282 vortex_ensure!(
283 scalar.dtype() == self.dtype(),
284 "ListViewBuilder expected scalar with dtype {:?}, got {:?}",
285 self.dtype(),
286 scalar.dtype()
287 );
288
289 let list_scalar = scalar.as_list();
290 self.append_value(list_scalar)
291 }
292
293 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
294 let listview = array.to_listview();
295 if listview.is_empty() {
296 return;
297 }
298
299 if !listview.is_zero_copy_to_list() {
302 for i in 0..listview.len() {
303 let list = listview.scalar_at(i);
304
305 self.append_scalar(&list)
306 .vortex_expect("was unable to extend the `ListViewBuilder`")
307 }
308
309 return;
310 }
311
312 let listview = listview.rebuild(ListViewRebuildMode::MakeExact);
315 debug_assert!(listview.is_zero_copy_to_list());
316
317 self.nulls.append_validity_mask(array.validity_mask());
318
319 let old_elements_len = self.elements_builder.len();
321 self.elements_builder
322 .reserve_exact(listview.elements().len());
323 self.elements_builder.extend_from_array(listview.elements());
324 let new_elements_len = self.elements_builder.len();
325
326 let extend_length = listview.len();
328 self.sizes_builder.reserve_exact(extend_length);
329 self.offsets_builder.reserve_exact(extend_length);
330
331 let cast_sizes = compute::cast(listview.sizes(), self.sizes_builder.dtype()).vortex_expect(
333 "was somehow unable to cast the new sizes to the type of the builder sizes",
334 );
335 self.sizes_builder.extend_from_array(cast_sizes.as_ref());
336
337 let uninit_range = self.offsets_builder.uninit_range(extend_length);
341
342 let new_offsets = listview.offsets().to_primitive();
344
345 match_each_integer_ptype!(new_offsets.ptype(), |A| {
346 adjust_and_extend_offsets::<O, A>(
347 uninit_range,
348 new_offsets,
349 old_elements_len,
350 new_elements_len,
351 );
352 })
353 }
354
355 fn reserve_exact(&mut self, capacity: usize) {
356 self.elements_builder.reserve_exact(capacity * 2);
357 self.offsets_builder.reserve_exact(capacity);
358 self.sizes_builder.reserve_exact(capacity);
359 self.nulls.reserve_exact(capacity);
360 }
361
362 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
363 self.nulls = LazyBitBufferBuilder::new(validity.len());
364 self.nulls.append_validity_mask(validity);
365 }
366
367 fn finish(&mut self) -> ArrayRef {
368 self.finish_into_listview().into_array()
369 }
370
371 fn finish_into_canonical(&mut self) -> Canonical {
372 Canonical::List(self.finish_into_listview())
373 }
374}
375
376fn adjust_and_extend_offsets<'a, O: IntegerPType, A: IntegerPType>(
379 mut uninit_range: UninitRange<'a, O>,
380 new_offsets: PrimitiveArray,
381 old_elements_len: usize,
382 new_elements_len: usize,
383) {
384 let new_offsets_slice = new_offsets.as_slice::<A>();
385 let old_elements_len = O::from_usize(old_elements_len)
386 .vortex_expect("the old elements length did not fit into the offset type (impossible)");
387 let new_elements_len = O::from_usize(new_elements_len)
388 .vortex_expect("the current elements length did not fit into the offset type (impossible)");
389
390 for i in 0..uninit_range.len() {
391 let new_offset = O::from_usize(
392 new_offsets_slice[i]
393 .to_usize()
394 .vortex_expect("Offsets must always fit in usize"),
395 )
396 .vortex_expect("New offset somehow did not fit into the builder's offset type");
397
398 let adjusted_new_offset = new_offset + old_elements_len;
401 assert!(
402 adjusted_new_offset <= new_elements_len,
403 "[{i}/{}]: {new_offset} + {old_elements_len} \
404 = {adjusted_new_offset} <= {new_elements_len} failed",
405 uninit_range.len()
406 );
407
408 uninit_range.set_value(i, adjusted_new_offset);
409 }
410
411 unsafe { uninit_range.finish() };
414}
415
416#[cfg(test)]
417mod tests {
418 use std::sync::Arc;
419
420 use vortex_dtype::DType;
421 use vortex_dtype::Nullability::NonNullable;
422 use vortex_dtype::Nullability::Nullable;
423 use vortex_dtype::PType::I32;
424 use vortex_scalar::Scalar;
425
426 use super::ListViewBuilder;
427 use crate::IntoArray;
428 use crate::array::Array;
429 use crate::arrays::ListArray;
430 use crate::builders::ArrayBuilder;
431 use crate::vtable::ValidityHelper;
432
433 #[test]
434 fn test_empty() {
435 let mut builder =
436 ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
437
438 let listview = builder.finish();
439 assert_eq!(listview.len(), 0);
440 }
441
442 #[test]
443 fn test_basic_append_and_nulls() {
444 let dtype: Arc<DType> = Arc::new(I32.into());
445 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
446
447 builder
449 .append_value(
450 Scalar::list(
451 dtype.clone(),
452 vec![1i32.into(), 2i32.into(), 3i32.into()],
453 NonNullable,
454 )
455 .as_list(),
456 )
457 .unwrap();
458
459 builder
461 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
462 .unwrap();
463
464 builder.append_null();
466
467 builder
469 .append_value(
470 Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
471 )
472 .unwrap();
473
474 let listview = builder.finish_into_listview();
475 assert_eq!(listview.len(), 4);
476
477 let first_list = listview.list_elements_at(0);
479 assert_eq!(first_list.len(), 3);
480 assert_eq!(first_list.scalar_at(0), 1i32.into());
481 assert_eq!(first_list.scalar_at(1), 2i32.into());
482 assert_eq!(first_list.scalar_at(2), 3i32.into());
483
484 let empty_list = listview.list_elements_at(1);
486 assert_eq!(empty_list.len(), 0);
487
488 assert!(!listview.validity().is_valid(2));
490
491 let last_list = listview.list_elements_at(3);
493 assert_eq!(last_list.len(), 2);
494 assert_eq!(last_list.scalar_at(0), 4i32.into());
495 assert_eq!(last_list.scalar_at(1), 5i32.into());
496 }
497
498 #[test]
499 fn test_different_offset_size_types() {
500 let dtype: Arc<DType> = Arc::new(I32.into());
502 let mut builder =
503 ListViewBuilder::<u32, u8>::with_capacity(dtype.clone(), NonNullable, 0, 0);
504
505 builder
506 .append_value(
507 Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], NonNullable).as_list(),
508 )
509 .unwrap();
510
511 builder
512 .append_value(
513 Scalar::list(
514 dtype,
515 vec![3i32.into(), 4i32.into(), 5i32.into()],
516 NonNullable,
517 )
518 .as_list(),
519 )
520 .unwrap();
521
522 let listview = builder.finish_into_listview();
523 assert_eq!(listview.len(), 2);
524
525 let first = listview.list_elements_at(0);
527 assert_eq!(first.scalar_at(0), 1i32.into());
528 assert_eq!(first.scalar_at(1), 2i32.into());
529
530 let second = listview.list_elements_at(1);
532 assert_eq!(second.scalar_at(0), 3i32.into());
533 assert_eq!(second.scalar_at(1), 4i32.into());
534 assert_eq!(second.scalar_at(2), 5i32.into());
535
536 let dtype2: Arc<DType> = Arc::new(I32.into());
538 let mut builder2 =
539 ListViewBuilder::<u64, u16>::with_capacity(dtype2.clone(), NonNullable, 0, 0);
540
541 for i in 0..5 {
542 builder2
543 .append_value(
544 Scalar::list(dtype2.clone(), vec![(i * 10).into()], NonNullable).as_list(),
545 )
546 .unwrap();
547 }
548
549 let listview2 = builder2.finish_into_listview();
550 assert_eq!(listview2.len(), 5);
551
552 for i in 0..5i32 {
554 let list = listview2.list_elements_at(i as usize);
555 assert_eq!(list.len(), 1);
556 assert_eq!(list.scalar_at(0), (i * 10).into());
557 }
558 }
559
560 #[test]
561 fn test_builder_trait_methods() {
562 let dtype: Arc<DType> = Arc::new(I32.into());
563 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
564
565 builder.append_zeros(2);
567 assert_eq!(builder.len(), 2);
568
569 unsafe {
571 builder.append_nulls_unchecked(2);
572 }
573 assert_eq!(builder.len(), 4);
574
575 let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
577 builder.append_scalar(&list_scalar).unwrap();
578 assert_eq!(builder.len(), 5);
579
580 let listview = builder.finish_into_listview();
581 assert_eq!(listview.len(), 5);
582
583 assert_eq!(listview.list_elements_at(0).len(), 0);
585 assert_eq!(listview.list_elements_at(1).len(), 0);
586
587 assert!(!listview.validity().is_valid(2));
589 assert!(!listview.validity().is_valid(3));
590
591 let last_list = listview.list_elements_at(4);
593 assert_eq!(last_list.len(), 2);
594 assert_eq!(last_list.scalar_at(0), 10i32.into());
595 assert_eq!(last_list.scalar_at(1), 20i32.into());
596 }
597
598 #[test]
599 fn test_extend_from_array() {
600 let dtype: Arc<DType> = Arc::new(I32.into());
601
602 let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
604 [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
605 Arc::new(I32.into()),
606 )
607 .unwrap();
608
609 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
610
611 builder
613 .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
614 .unwrap();
615
616 builder.extend_from_array(&source.into_array());
618
619 let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
621 std::iter::empty::<Option<Vec<i32>>>(),
622 Arc::new(I32.into()),
623 )
624 .unwrap();
625 builder.extend_from_array(&empty_source.into_array());
626
627 let listview = builder.finish_into_listview();
628 assert_eq!(listview.len(), 4);
629
630 let first = listview.list_elements_at(0);
633 assert_eq!(first.len(), 1);
634 assert_eq!(first.scalar_at(0), 0i32.into());
635
636 let second = listview.list_elements_at(1);
638 assert_eq!(second.len(), 3);
639 assert_eq!(second.scalar_at(0), 1i32.into());
640 assert_eq!(second.scalar_at(1), 2i32.into());
641 assert_eq!(second.scalar_at(2), 3i32.into());
642
643 assert!(!listview.validity().is_valid(2));
645
646 let fourth = listview.list_elements_at(3);
648 assert_eq!(fourth.len(), 2);
649 assert_eq!(fourth.scalar_at(0), 4i32.into());
650 assert_eq!(fourth.scalar_at(1), 5i32.into());
651 }
652
653 #[test]
654 fn test_error_append_null_to_non_nullable() {
655 let dtype: Arc<DType> = Arc::new(I32.into());
656 let mut builder =
657 ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
658
659 let null_scalar = Scalar::null(DType::List(dtype, Nullable));
661 let null_list = null_scalar.as_list();
662
663 let result = builder.append_value(null_list);
665 assert!(result.is_err());
666 assert!(
667 result
668 .unwrap_err()
669 .to_string()
670 .contains("null value to non-nullable")
671 );
672 }
673
674 #[test]
675 fn test_append_array_as_list() {
676 use vortex_buffer::buffer;
677
678 use crate::ToCanonical;
679
680 let dtype: Arc<DType> = Arc::new(I32.into());
681 let mut builder =
682 ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), NonNullable, 20, 10);
683
684 let arr1 = buffer![1i32, 2, 3].into_array();
686 builder.append_array_as_list(&arr1).unwrap();
687
688 builder
690 .append_value(
691 Scalar::list(dtype.clone(), vec![10i32.into(), 11i32.into()], NonNullable)
692 .as_list(),
693 )
694 .unwrap();
695
696 let arr2 = buffer![4i32, 5].into_array();
698 builder.append_array_as_list(&arr2).unwrap();
699
700 let arr3 = buffer![0i32; 0].into_array();
702 builder.append_array_as_list(&arr3).unwrap();
703
704 builder
706 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
707 .unwrap();
708
709 let listview = builder.finish_into_listview();
710 assert_eq!(listview.len(), 5);
711
712 let elements = listview.elements().to_primitive();
714 assert_eq!(elements.as_slice::<i32>(), &[1, 2, 3, 10, 11, 4, 5]);
715
716 let offsets = listview.offsets().to_primitive();
718 assert_eq!(offsets.as_slice::<u32>(), &[0, 3, 5, 7, 7]);
719
720 let sizes = listview.sizes().to_primitive();
722 assert_eq!(sizes.as_slice::<u32>(), &[3, 2, 2, 0, 0]);
723
724 let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype, NonNullable, 20, 10);
726 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
727 assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
728 }
729}