1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_panic;
12use vortex_mask::Mask;
13
14use crate::ArrayRef;
15use crate::Canonical;
16use crate::IntoArray;
17use crate::arrays::ListArray;
18use crate::arrays::listview::ListViewArrayExt;
19use crate::builders::ArrayBuilder;
20use crate::builders::DEFAULT_BUILDER_CAPACITY;
21use crate::builders::LazyBitBufferBuilder;
22use crate::builders::PrimitiveBuilder;
23use crate::builders::builder_with_capacity;
24use crate::canonical::ToCanonical;
25use crate::dtype::DType;
26use crate::dtype::IntegerPType;
27use crate::dtype::Nullability;
28use crate::dtype::Nullability::NonNullable;
29use crate::match_each_integer_ptype;
30use crate::scalar::ListScalar;
31use crate::scalar::Scalar;
32
33pub struct ListBuilder<O: IntegerPType> {
36 dtype: DType,
38
39 elements_builder: Box<dyn ArrayBuilder>,
41
42 offsets_builder: PrimitiveBuilder<O>,
44
45 nulls: LazyBitBufferBuilder,
47}
48
49impl<O: IntegerPType> ListBuilder<O> {
50 pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
52 Self::with_capacity(
53 value_dtype,
54 nullability,
55 DEFAULT_BUILDER_CAPACITY * 2,
58 DEFAULT_BUILDER_CAPACITY,
59 )
60 }
61
62 pub fn with_capacity(
70 value_dtype: Arc<DType>,
71 nullability: Nullability,
72 elements_capacity: usize,
73 capacity: usize,
74 ) -> Self {
75 let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
76 let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
77
78 offsets_builder.append_zero();
80
81 Self {
82 elements_builder,
83 offsets_builder,
84 nulls: LazyBitBufferBuilder::new(capacity),
85 dtype: DType::List(value_dtype, nullability),
86 }
87 }
88
89 pub fn append_array_as_list(&mut self, array: &ArrayRef) -> VortexResult<()> {
96 vortex_ensure!(
97 array.dtype() == self.element_dtype(),
98 "Array dtype {:?} does not match list element dtype {:?}",
99 array.dtype(),
100 self.element_dtype()
101 );
102
103 self.elements_builder.extend_from_array(array);
104 self.nulls.append_non_null();
105 self.offsets_builder.append_value(
106 O::from_usize(self.elements_builder.len())
107 .vortex_expect("Failed to convert from usize to O"),
108 );
109
110 Ok(())
111 }
112
113 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
115 match value.elements() {
116 None => {
117 if self.dtype.nullability() == NonNullable {
118 vortex_bail!("Cannot append null value to non-nullable list");
119 }
120 self.append_null();
121 }
122 Some(elements) => {
123 for scalar in elements {
124 self.elements_builder.append_scalar(&scalar)?;
127 }
128
129 self.nulls.append_non_null();
130 self.offsets_builder.append_value(
131 O::from_usize(self.elements_builder.len())
132 .vortex_expect("Failed to convert from usize to O"),
133 );
134 }
135 }
136
137 Ok(())
138 }
139
140 pub fn finish_into_list(&mut self) -> ListArray {
142 assert_eq!(
143 self.offsets_builder.len(),
144 self.nulls.len() + 1,
145 "offsets length must be one more than nulls length."
146 );
147
148 ListArray::try_new(
149 self.elements_builder.finish(),
150 self.offsets_builder.finish(),
151 self.nulls.finish_with_nullability(self.dtype.nullability()),
152 )
153 .vortex_expect("Buffer, offsets, and validity must have same length.")
154 }
155
156 pub fn element_dtype(&self) -> &DType {
159 let DType::List(element_dtype, _) = &self.dtype else {
160 vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
161 };
162
163 element_dtype
164 }
165}
166
167impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
168 fn as_any(&self) -> &dyn Any {
169 self
170 }
171
172 fn as_any_mut(&mut self) -> &mut dyn Any {
173 self
174 }
175
176 fn dtype(&self) -> &DType {
177 &self.dtype
178 }
179
180 fn len(&self) -> usize {
181 self.nulls.len()
182 }
183
184 fn append_zeros(&mut self, n: usize) {
185 let curr_len = self.elements_builder.len();
186 for _ in 0..n {
187 self.offsets_builder.append_value(
188 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
189 )
190 }
191 self.nulls.append_n_non_nulls(n);
192 }
193
194 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
195 let curr_len = self.elements_builder.len();
196 for _ in 0..n {
197 self.offsets_builder.append_value(
200 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
201 )
202 }
203 self.nulls.append_n_nulls(n);
204 }
205
206 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
207 vortex_ensure!(
208 scalar.dtype() == self.dtype(),
209 "ListBuilder expected scalar with dtype {}, got {}",
210 self.dtype(),
211 scalar.dtype()
212 );
213
214 self.append_value(scalar.as_list())
215 }
216
217 unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
218 let list = array.to_listview();
219 if list.is_empty() {
220 return;
221 }
222
223 self.nulls.append_validity_mask(
225 array
226 .validity_mask()
227 .vortex_expect("validity_mask in extend_from_array_unchecked"),
228 );
229
230 let elements = list.elements();
232 let offsets = list.offsets().to_primitive();
233 let sizes = list.sizes().to_primitive();
234
235 fn extend_inner<O, OffsetType, SizeType>(
236 builder: &mut ListBuilder<O>,
237 new_elements: &ArrayRef,
238 new_offsets: &[OffsetType],
239 new_sizes: &[SizeType],
240 ) where
241 O: IntegerPType,
242 OffsetType: IntegerPType,
243 SizeType: IntegerPType,
244 {
245 let num_lists = new_offsets.len();
246 debug_assert_eq!(num_lists, new_sizes.len());
247
248 let mut curr_offset = builder.elements_builder.len();
249 let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
250
251 for i in 0..new_offsets.len() {
254 let offset: usize = new_offsets[i].as_();
255 let size: usize = new_sizes[i].as_();
256
257 if size > 0 {
258 let list_elements = new_elements
259 .slice(offset..offset + size)
260 .vortex_expect("list builder slice");
261 builder.elements_builder.extend_from_array(&list_elements);
262 curr_offset += size;
263 }
264
265 let new_offset =
266 O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
267
268 offsets_range.set_value(i, new_offset);
269 }
270
271 unsafe { offsets_range.finish() };
274 }
275
276 match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
277 match_each_integer_ptype!(sizes.ptype(), |SizeType| {
278 extend_inner(
279 self,
280 elements,
281 offsets.as_slice::<OffsetType>(),
282 sizes.as_slice::<SizeType>(),
283 )
284 })
285 })
286 }
287
288 fn reserve_exact(&mut self, additional: usize) {
289 self.elements_builder.reserve_exact(additional);
290 self.offsets_builder.reserve_exact(additional);
291 self.nulls.reserve_exact(additional);
292 }
293
294 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
295 self.nulls = LazyBitBufferBuilder::new(validity.len());
296 self.nulls.append_validity_mask(validity);
297 }
298
299 fn finish(&mut self) -> ArrayRef {
300 self.finish_into_list().into_array()
301 }
302
303 fn finish_into_canonical(&mut self) -> Canonical {
304 Canonical::List(self.finish_into_list().into_array().to_listview())
305 }
306}
307
308#[cfg(test)]
309mod tests {
310 use std::sync::Arc;
311
312 use Nullability::NonNullable;
313 use Nullability::Nullable;
314 use vortex_buffer::buffer;
315 use vortex_error::VortexExpect;
316
317 use crate::IntoArray;
318 use crate::LEGACY_SESSION;
319 use crate::ToCanonical;
320 use crate::arrays::ChunkedArray;
321 use crate::arrays::PrimitiveArray;
322 use crate::arrays::list::ListArrayExt;
323 use crate::arrays::listview::ListViewArrayExt;
324 use crate::assert_arrays_eq;
325 use crate::builders::ArrayBuilder;
326 use crate::builders::list::ListArray;
327 use crate::builders::list::ListBuilder;
328 use crate::dtype::DType;
329 use crate::dtype::IntegerPType;
330 use crate::dtype::Nullability;
331 use crate::dtype::PType::I32;
332 use crate::executor::VortexSessionExecute;
333 use crate::scalar::Scalar;
334 use crate::validity::Validity;
335
336 #[test]
337 fn test_empty() {
338 let mut builder =
339 ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
340
341 let list = builder.finish();
342 assert_eq!(list.len(), 0);
343 }
344
345 #[test]
346 fn test_values() {
347 let dtype: Arc<DType> = Arc::new(I32.into());
348 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
349
350 builder
351 .append_value(
352 Scalar::list(
353 Arc::clone(&dtype),
354 vec![1i32.into(), 2i32.into(), 3i32.into()],
355 NonNullable,
356 )
357 .as_list(),
358 )
359 .unwrap();
360
361 builder
362 .append_value(
363 Scalar::list(
364 dtype,
365 vec![4i32.into(), 5i32.into(), 6i32.into()],
366 NonNullable,
367 )
368 .as_list(),
369 )
370 .unwrap();
371
372 let list = builder.finish();
373 assert_eq!(list.len(), 2);
374
375 let list_array = list.to_listview();
376
377 assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
378 assert_eq!(list_array.list_elements_at(1).unwrap().len(), 3);
379 }
380
381 #[test]
382 fn test_append_empty_list() {
383 let dtype: Arc<DType> = Arc::new(I32.into());
384 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
385
386 assert!(
387 builder
388 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
389 .is_ok()
390 )
391 }
392
393 #[test]
394 fn test_nullable_values() {
395 let dtype: Arc<DType> = Arc::new(I32.into());
396 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
397
398 builder
399 .append_value(
400 Scalar::list(
401 Arc::clone(&dtype),
402 vec![1i32.into(), 2i32.into(), 3i32.into()],
403 NonNullable,
404 )
405 .as_list(),
406 )
407 .unwrap();
408
409 builder
410 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
411 .unwrap();
412
413 builder
414 .append_value(
415 Scalar::list(
416 dtype,
417 vec![4i32.into(), 5i32.into(), 6i32.into()],
418 NonNullable,
419 )
420 .as_list(),
421 )
422 .unwrap();
423
424 let list = builder.finish();
425 assert_eq!(list.len(), 3);
426
427 let list_array = list.to_listview();
428
429 assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
430 assert_eq!(list_array.list_elements_at(1).unwrap().len(), 0);
431 assert_eq!(list_array.list_elements_at(2).unwrap().len(), 3);
432 }
433
434 fn test_extend_builder_gen<O: IntegerPType>() {
435 let list = ListArray::from_iter_opt_slow::<O, _, _>(
436 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
437 Arc::new(I32.into()),
438 )
439 .unwrap();
440 assert_eq!(list.len(), 3);
441
442 let mut ctx = LEGACY_SESSION.create_execution_ctx();
443
444 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
445 builder.extend_from_array(&list);
446 builder.extend_from_array(&list);
447 builder.extend_from_array(&list.slice(0..0).unwrap());
448 builder.extend_from_array(&list.slice(1..3).unwrap());
449
450 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
451 [
452 Some(vec![0, 1, 2]),
453 None,
454 Some(vec![4, 5]),
455 Some(vec![0, 1, 2]),
456 None,
457 Some(vec![4, 5]),
458 None,
459 Some(vec![4, 5]),
460 ],
461 Arc::new(DType::Primitive(I32, NonNullable)),
462 )
463 .unwrap()
464 .to_listview();
465
466 let actual = builder.finish_into_canonical().into_listview();
467
468 assert_arrays_eq!(actual.elements(), expected.elements());
469
470 assert_arrays_eq!(actual.offsets(), expected.offsets());
471
472 assert!(
473 actual
474 .validity()
475 .vortex_expect("list validity should be derivable")
476 .mask_eq(
477 &expected
478 .validity()
479 .vortex_expect("list validity should be derivable"),
480 &mut ctx,
481 )
482 .unwrap(),
483 );
484 }
485
486 #[test]
487 fn test_extend_builder() {
488 test_extend_builder_gen::<i8>();
489 test_extend_builder_gen::<i16>();
490 test_extend_builder_gen::<i32>();
491 test_extend_builder_gen::<i64>();
492
493 test_extend_builder_gen::<u8>();
494 test_extend_builder_gen::<u16>();
495 test_extend_builder_gen::<u32>();
496 test_extend_builder_gen::<u64>();
497 }
498
499 #[test]
500 pub fn test_array_with_gap() {
501 let one_trailing_unused_element = ListArray::try_new(
502 buffer![1, 2, 3, 4].into_array(),
503 buffer![0, 3].into_array(),
504 Validity::NonNullable,
505 )
506 .unwrap();
507
508 let second_array = ListArray::try_new(
509 buffer![5, 6].into_array(),
510 buffer![0, 2].into_array(),
511 Validity::NonNullable,
512 )
513 .unwrap();
514
515 let chunked_list = ChunkedArray::try_new(
516 vec![
517 one_trailing_unused_element.clone().into_array(),
518 second_array.clone().into_array(),
519 ],
520 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
521 );
522
523 let canon_values = chunked_list.unwrap().as_array().to_listview();
524
525 assert_eq!(
526 one_trailing_unused_element.scalar_at(0).unwrap(),
527 canon_values.scalar_at(0).unwrap()
528 );
529 assert_eq!(
530 second_array.scalar_at(0).unwrap(),
531 canon_values.scalar_at(1).unwrap()
532 );
533 }
534
535 #[test]
536 fn test_append_scalar() {
537 let dtype: Arc<DType> = Arc::new(I32.into());
538 let mut builder = ListBuilder::<u64>::with_capacity(Arc::clone(&dtype), Nullable, 20, 10);
539
540 let list_scalar1 =
542 Scalar::list(Arc::clone(&dtype), vec![1i32.into(), 2i32.into()], Nullable);
543 builder.append_scalar(&list_scalar1).unwrap();
544
545 let list_scalar2 = Scalar::list(
547 Arc::clone(&dtype),
548 vec![3i32.into(), 4i32.into(), 5i32.into()],
549 Nullable,
550 );
551 builder.append_scalar(&list_scalar2).unwrap();
552
553 let null_scalar = Scalar::null(DType::List(Arc::clone(&dtype), Nullable));
555 builder.append_scalar(&null_scalar).unwrap();
556
557 let array = builder.finish_into_list();
558 assert_eq!(array.len(), 3);
559
560 let scalar0 = array.scalar_at(0).unwrap();
563 let list0 = scalar0.as_list();
564 assert_eq!(list0.len(), 2);
565 if let Some(list0_items) = list0.elements() {
566 assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
567 assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
568 }
569
570 let scalar1 = array.scalar_at(1).unwrap();
571 let list1 = scalar1.as_list();
572 assert_eq!(list1.len(), 3);
573 if let Some(list1_items) = list1.elements() {
574 assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
575 assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
576 assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
577 }
578
579 let scalar2 = array.scalar_at(2).unwrap();
580 let list2 = scalar2.as_list();
581 assert!(list2.is_null()); assert!(
585 array
586 .validity()
587 .vortex_expect("list validity should be derivable")
588 .is_valid(0)
589 .unwrap()
590 );
591 assert!(
592 array
593 .validity()
594 .vortex_expect("list validity should be derivable")
595 .is_valid(1)
596 .unwrap()
597 );
598 assert!(
599 !array
600 .validity()
601 .vortex_expect("list validity should be derivable")
602 .is_valid(2)
603 .unwrap()
604 );
605
606 let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
608 let wrong_scalar = Scalar::from(42i32);
609 assert!(builder.append_scalar(&wrong_scalar).is_err());
610 }
611
612 #[test]
613 fn test_append_array_as_list() {
614 let dtype: Arc<DType> = Arc::new(I32.into());
615 let mut builder =
616 ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
617
618 let arr1 = buffer![1i32, 2, 3].into_array();
620 builder.append_array_as_list(&arr1).unwrap();
621
622 builder
624 .append_value(
625 Scalar::list(
626 Arc::clone(&dtype),
627 vec![10i32.into(), 11i32.into()],
628 NonNullable,
629 )
630 .as_list(),
631 )
632 .unwrap();
633
634 let arr2 = buffer![4i32, 5].into_array();
636 builder.append_array_as_list(&arr2).unwrap();
637
638 let arr3 = buffer![0i32; 0].into_array();
640 builder.append_array_as_list(&arr3).unwrap();
641
642 builder
644 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
645 .unwrap();
646
647 let list = builder.finish_into_list();
648 assert_eq!(list.len(), 5);
649
650 assert_arrays_eq!(
652 list.elements(),
653 PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
654 );
655
656 assert_arrays_eq!(
658 list.offsets(),
659 PrimitiveArray::from_iter([0u32, 3, 5, 7, 7, 7])
660 );
661
662 let mut builder = ListBuilder::<u32>::with_capacity(dtype, NonNullable, 20, 10);
664 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
665 assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
666 }
667}