1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_panic;
12use vortex_mask::Mask;
13
14use crate::ArrayRef;
15use crate::Canonical;
16use crate::ExecutionCtx;
17use crate::IntoArray;
18use crate::LEGACY_SESSION;
19use crate::VortexSessionExecute;
20use crate::arrays::ListArray;
21use crate::arrays::listview::ListViewArrayExt;
22use crate::builders::ArrayBuilder;
23use crate::builders::DEFAULT_BUILDER_CAPACITY;
24use crate::builders::LazyBitBufferBuilder;
25use crate::builders::PrimitiveBuilder;
26use crate::builders::builder_with_capacity;
27#[expect(deprecated)]
28use crate::canonical::ToCanonical as _;
29use crate::dtype::DType;
30use crate::dtype::IntegerPType;
31use crate::dtype::Nullability;
32use crate::dtype::Nullability::NonNullable;
33use crate::match_each_integer_ptype;
34use crate::scalar::ListScalar;
35use crate::scalar::Scalar;
36
37pub struct ListBuilder<O: IntegerPType> {
40 dtype: DType,
42
43 elements_builder: Box<dyn ArrayBuilder>,
45
46 offsets_builder: PrimitiveBuilder<O>,
48
49 nulls: LazyBitBufferBuilder,
51}
52
53impl<O: IntegerPType> ListBuilder<O> {
54 pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
56 Self::with_capacity(
57 value_dtype,
58 nullability,
59 DEFAULT_BUILDER_CAPACITY * 2,
62 DEFAULT_BUILDER_CAPACITY,
63 )
64 }
65
66 pub fn with_capacity(
74 value_dtype: Arc<DType>,
75 nullability: Nullability,
76 elements_capacity: usize,
77 capacity: usize,
78 ) -> Self {
79 let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
80 let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
81
82 offsets_builder.append_zero();
84
85 Self {
86 elements_builder,
87 offsets_builder,
88 nulls: LazyBitBufferBuilder::new(capacity),
89 dtype: DType::List(value_dtype, nullability),
90 }
91 }
92
93 pub fn append_array_as_list(
100 &mut self,
101 array: &ArrayRef,
102 ctx: &mut ExecutionCtx,
103 ) -> VortexResult<()> {
104 vortex_ensure!(
105 array.dtype() == self.element_dtype(),
106 "Array dtype {:?} does not match list element dtype {:?}",
107 array.dtype(),
108 self.element_dtype()
109 );
110
111 array.append_to_builder(self.elements_builder.as_mut(), ctx)?;
112 self.nulls.append_non_null();
113 self.offsets_builder.append_value(
114 O::from_usize(self.elements_builder.len())
115 .vortex_expect("Failed to convert from usize to O"),
116 );
117
118 Ok(())
119 }
120
121 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
123 match value.elements() {
124 None => {
125 if self.dtype.nullability() == NonNullable {
126 vortex_bail!("Cannot append null value to non-nullable list");
127 }
128 self.append_null();
129 }
130 Some(elements) => {
131 for scalar in elements {
132 self.elements_builder.append_scalar(&scalar)?;
135 }
136
137 self.nulls.append_non_null();
138 self.offsets_builder.append_value(
139 O::from_usize(self.elements_builder.len())
140 .vortex_expect("Failed to convert from usize to O"),
141 );
142 }
143 }
144
145 Ok(())
146 }
147
148 pub fn finish_into_list(&mut self) -> ListArray {
150 assert_eq!(
151 self.offsets_builder.len(),
152 self.nulls.len() + 1,
153 "offsets length must be one more than nulls length."
154 );
155
156 ListArray::try_new(
157 self.elements_builder.finish(),
158 self.offsets_builder.finish(),
159 self.nulls.finish_with_nullability(self.dtype.nullability()),
160 )
161 .vortex_expect("Buffer, offsets, and validity must have same length.")
162 }
163
164 pub fn element_dtype(&self) -> &DType {
167 let DType::List(element_dtype, _) = &self.dtype else {
168 vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
169 };
170
171 element_dtype
172 }
173}
174
175impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
176 fn as_any(&self) -> &dyn Any {
177 self
178 }
179
180 fn as_any_mut(&mut self) -> &mut dyn Any {
181 self
182 }
183
184 fn dtype(&self) -> &DType {
185 &self.dtype
186 }
187
188 fn len(&self) -> usize {
189 self.nulls.len()
190 }
191
192 fn append_zeros(&mut self, n: usize) {
193 let curr_len = self.elements_builder.len();
194 for _ in 0..n {
195 self.offsets_builder.append_value(
196 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
197 )
198 }
199 self.nulls.append_n_non_nulls(n);
200 }
201
202 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
203 let curr_len = self.elements_builder.len();
204 for _ in 0..n {
205 self.offsets_builder.append_value(
208 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
209 )
210 }
211 self.nulls.append_n_nulls(n);
212 }
213
214 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
215 vortex_ensure!(
216 scalar.dtype() == self.dtype(),
217 "ListBuilder expected scalar with dtype {}, got {}",
218 self.dtype(),
219 scalar.dtype()
220 );
221
222 self.append_value(scalar.as_list())
223 }
224
225 unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
226 #[expect(deprecated)]
227 let list = array.to_listview();
228 if list.is_empty() {
229 return;
230 }
231
232 self.nulls.append_validity_mask(
234 &array
235 .validity()
236 .vortex_expect("validity_mask in extend_from_array_unchecked")
237 .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
238 .vortex_expect("Failed to compute validity mask"),
239 );
240
241 let elements = list.elements();
243 #[expect(deprecated)]
244 let offsets = list.offsets().to_primitive();
245 #[expect(deprecated)]
246 let sizes = list.sizes().to_primitive();
247
248 fn extend_inner<O, OffsetType, SizeType>(
249 builder: &mut ListBuilder<O>,
250 new_elements: &ArrayRef,
251 new_offsets: &[OffsetType],
252 new_sizes: &[SizeType],
253 ) where
254 O: IntegerPType,
255 OffsetType: IntegerPType,
256 SizeType: IntegerPType,
257 {
258 let num_lists = new_offsets.len();
259 debug_assert_eq!(num_lists, new_sizes.len());
260
261 let mut curr_offset = builder.elements_builder.len();
262 let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
263
264 for i in 0..new_offsets.len() {
267 let offset: usize = new_offsets[i].as_();
268 let size: usize = new_sizes[i].as_();
269
270 if size > 0 {
271 let list_elements = new_elements
272 .slice(offset..offset + size)
273 .vortex_expect("list builder slice");
274 builder.elements_builder.extend_from_array(&list_elements);
275 curr_offset += size;
276 }
277
278 let new_offset =
279 O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
280
281 offsets_range.set_value(i, new_offset);
282 }
283
284 unsafe { offsets_range.finish() };
287 }
288
289 match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
290 match_each_integer_ptype!(sizes.ptype(), |SizeType| {
291 extend_inner(
292 self,
293 elements,
294 offsets.as_slice::<OffsetType>(),
295 sizes.as_slice::<SizeType>(),
296 )
297 })
298 })
299 }
300
301 fn reserve_exact(&mut self, additional: usize) {
302 self.elements_builder.reserve_exact(additional);
303 self.offsets_builder.reserve_exact(additional);
304 self.nulls.reserve_exact(additional);
305 }
306
307 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
308 self.nulls = LazyBitBufferBuilder::from_validity_mask(validity);
309 }
310
311 fn finish(&mut self) -> ArrayRef {
312 self.finish_into_list().into_array()
313 }
314
315 fn finish_into_canonical(&mut self) -> Canonical {
316 #[expect(deprecated)]
317 let listview = self.finish_into_list().into_array().to_listview();
318 Canonical::List(listview)
319 }
320}
321
322#[cfg(test)]
323mod tests {
324 use std::sync::Arc;
325
326 use Nullability::NonNullable;
327 use Nullability::Nullable;
328 use vortex_buffer::buffer;
329 use vortex_error::VortexExpect;
330
331 use crate::IntoArray;
332 #[expect(deprecated)]
333 use crate::ToCanonical as _;
334 use crate::array_session;
335 use crate::arrays::ChunkedArray;
336 use crate::arrays::PrimitiveArray;
337 use crate::arrays::list::ListArrayExt;
338 use crate::arrays::listview::ListViewArrayExt;
339 use crate::assert_arrays_eq;
340 use crate::builders::ArrayBuilder;
341 use crate::builders::list::ListArray;
342 use crate::builders::list::ListBuilder;
343 use crate::dtype::DType;
344 use crate::dtype::IntegerPType;
345 use crate::dtype::Nullability;
346 use crate::dtype::PType::I32;
347 use crate::executor::VortexSessionExecute;
348 use crate::scalar::Scalar;
349 use crate::validity::Validity;
350
351 #[test]
352 fn test_empty() {
353 let mut builder =
354 ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
355
356 let list = builder.finish();
357 assert_eq!(list.len(), 0);
358 }
359
360 #[test]
361 fn test_values() {
362 let dtype: Arc<DType> = Arc::new(I32.into());
363 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
364
365 builder
366 .append_value(
367 Scalar::list(
368 Arc::clone(&dtype),
369 vec![1i32.into(), 2i32.into(), 3i32.into()],
370 NonNullable,
371 )
372 .as_list(),
373 )
374 .unwrap();
375
376 builder
377 .append_value(
378 Scalar::list(
379 dtype,
380 vec![4i32.into(), 5i32.into(), 6i32.into()],
381 NonNullable,
382 )
383 .as_list(),
384 )
385 .unwrap();
386
387 let list = builder.finish();
388 assert_eq!(list.len(), 2);
389
390 #[expect(deprecated)]
391 let list_array = list.to_listview();
392
393 assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
394 assert_eq!(list_array.list_elements_at(1).unwrap().len(), 3);
395 }
396
397 #[test]
398 fn test_append_empty_list() {
399 let dtype: Arc<DType> = Arc::new(I32.into());
400 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
401
402 assert!(
403 builder
404 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
405 .is_ok()
406 )
407 }
408
409 #[test]
410 fn test_nullable_values() {
411 let dtype: Arc<DType> = Arc::new(I32.into());
412 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
413
414 builder
415 .append_value(
416 Scalar::list(
417 Arc::clone(&dtype),
418 vec![1i32.into(), 2i32.into(), 3i32.into()],
419 NonNullable,
420 )
421 .as_list(),
422 )
423 .unwrap();
424
425 builder
426 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
427 .unwrap();
428
429 builder
430 .append_value(
431 Scalar::list(
432 dtype,
433 vec![4i32.into(), 5i32.into(), 6i32.into()],
434 NonNullable,
435 )
436 .as_list(),
437 )
438 .unwrap();
439
440 let list = builder.finish();
441 assert_eq!(list.len(), 3);
442
443 #[expect(deprecated)]
444 let list_array = list.to_listview();
445
446 assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
447 assert_eq!(list_array.list_elements_at(1).unwrap().len(), 0);
448 assert_eq!(list_array.list_elements_at(2).unwrap().len(), 3);
449 }
450
451 fn test_extend_builder_gen<O: IntegerPType>() {
452 let list = ListArray::from_iter_opt_slow::<O, _, _>(
453 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
454 Arc::new(I32.into()),
455 )
456 .unwrap();
457 assert_eq!(list.len(), 3);
458
459 let mut ctx = array_session().create_execution_ctx();
460
461 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
462 builder.extend_from_array(&list);
463 builder.extend_from_array(&list);
464 builder.extend_from_array(&list.slice(0..0).unwrap());
465 builder.extend_from_array(&list.slice(1..3).unwrap());
466
467 #[expect(deprecated)]
468 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
469 [
470 Some(vec![0, 1, 2]),
471 None,
472 Some(vec![4, 5]),
473 Some(vec![0, 1, 2]),
474 None,
475 Some(vec![4, 5]),
476 None,
477 Some(vec![4, 5]),
478 ],
479 Arc::new(DType::Primitive(I32, NonNullable)),
480 )
481 .unwrap()
482 .to_listview();
483
484 let actual = builder.finish_into_canonical().into_listview();
485
486 assert_arrays_eq!(actual.elements(), expected.elements(), &mut ctx);
487
488 assert_arrays_eq!(actual.offsets(), expected.offsets(), &mut ctx);
489
490 assert!(
491 actual
492 .validity()
493 .vortex_expect("list validity should be derivable")
494 .mask_eq(
495 &expected
496 .validity()
497 .vortex_expect("list validity should be derivable"),
498 actual.len(),
499 &mut ctx,
500 )
501 .unwrap(),
502 );
503 }
504
505 #[test]
506 fn test_extend_builder() {
507 test_extend_builder_gen::<i8>();
508 test_extend_builder_gen::<i16>();
509 test_extend_builder_gen::<i32>();
510 test_extend_builder_gen::<i64>();
511
512 test_extend_builder_gen::<u8>();
513 test_extend_builder_gen::<u16>();
514 test_extend_builder_gen::<u32>();
515 test_extend_builder_gen::<u64>();
516 }
517
518 #[test]
519 pub fn test_array_with_gap() {
520 let one_trailing_unused_element = ListArray::try_new(
521 buffer![1, 2, 3, 4].into_array(),
522 buffer![0, 3].into_array(),
523 Validity::NonNullable,
524 )
525 .unwrap();
526
527 let second_array = ListArray::try_new(
528 buffer![5, 6].into_array(),
529 buffer![0, 2].into_array(),
530 Validity::NonNullable,
531 )
532 .unwrap();
533
534 let chunked_list = ChunkedArray::try_new(
535 vec![
536 one_trailing_unused_element.clone().into_array(),
537 second_array.clone().into_array(),
538 ],
539 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
540 );
541
542 #[expect(deprecated)]
543 let canon_values = chunked_list.unwrap().as_array().to_listview();
544
545 assert_eq!(
546 one_trailing_unused_element
547 .execute_scalar(0, &mut array_session().create_execution_ctx())
548 .unwrap(),
549 canon_values
550 .execute_scalar(0, &mut array_session().create_execution_ctx())
551 .unwrap()
552 );
553 assert_eq!(
554 second_array
555 .execute_scalar(0, &mut array_session().create_execution_ctx())
556 .unwrap(),
557 canon_values
558 .execute_scalar(1, &mut array_session().create_execution_ctx())
559 .unwrap()
560 );
561 }
562
563 #[test]
564 fn test_append_scalar() {
565 let dtype: Arc<DType> = Arc::new(I32.into());
566 let mut builder = ListBuilder::<u64>::with_capacity(Arc::clone(&dtype), Nullable, 20, 10);
567
568 let list_scalar1 =
570 Scalar::list(Arc::clone(&dtype), vec![1i32.into(), 2i32.into()], Nullable);
571 builder.append_scalar(&list_scalar1).unwrap();
572
573 let list_scalar2 = Scalar::list(
575 Arc::clone(&dtype),
576 vec![3i32.into(), 4i32.into(), 5i32.into()],
577 Nullable,
578 );
579 builder.append_scalar(&list_scalar2).unwrap();
580
581 let null_scalar = Scalar::null(DType::List(Arc::clone(&dtype), Nullable));
583 builder.append_scalar(&null_scalar).unwrap();
584
585 let array = builder.finish_into_list();
586 assert_eq!(array.len(), 3);
587
588 let mut ctx = array_session().create_execution_ctx();
589
590 let scalar0 = array.execute_scalar(0, &mut ctx).unwrap();
593 let list0 = scalar0.as_list();
594 assert_eq!(list0.len(), 2);
595 if let Some(list0_items) = list0.elements() {
596 assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
597 assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
598 }
599
600 let scalar1 = array.execute_scalar(1, &mut ctx).unwrap();
601 let list1 = scalar1.as_list();
602 assert_eq!(list1.len(), 3);
603 if let Some(list1_items) = list1.elements() {
604 assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
605 assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
606 assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
607 }
608
609 let scalar2 = array.execute_scalar(2, &mut ctx).unwrap();
610 let list2 = scalar2.as_list();
611 assert!(list2.is_null()); assert!(
615 array
616 .validity()
617 .vortex_expect("list validity should be derivable")
618 .execute_is_valid(0, &mut ctx)
619 .unwrap()
620 );
621 assert!(
622 array
623 .validity()
624 .vortex_expect("list validity should be derivable")
625 .execute_is_valid(1, &mut ctx)
626 .unwrap()
627 );
628 assert!(
629 !array
630 .validity()
631 .vortex_expect("list validity should be derivable")
632 .execute_is_valid(2, &mut ctx)
633 .unwrap()
634 );
635
636 let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
638 let wrong_scalar = Scalar::from(42i32);
639 assert!(builder.append_scalar(&wrong_scalar).is_err());
640 }
641
642 #[test]
643 fn test_append_array_as_list() {
644 let dtype: Arc<DType> = Arc::new(I32.into());
645 let mut ctx = array_session().create_execution_ctx();
646 let mut builder =
647 ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
648
649 let arr1 = buffer![1i32, 2, 3].into_array();
651 builder.append_array_as_list(&arr1, &mut ctx).unwrap();
652
653 builder
655 .append_value(
656 Scalar::list(
657 Arc::clone(&dtype),
658 vec![10i32.into(), 11i32.into()],
659 NonNullable,
660 )
661 .as_list(),
662 )
663 .unwrap();
664
665 let arr2 = buffer![4i32, 5].into_array();
667 builder.append_array_as_list(&arr2, &mut ctx).unwrap();
668
669 let arr3 = buffer![0i32; 0].into_array();
671 builder.append_array_as_list(&arr3, &mut ctx).unwrap();
672
673 builder
675 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
676 .unwrap();
677
678 let list = builder.finish_into_list();
679 assert_eq!(list.len(), 5);
680
681 assert_arrays_eq!(
683 list.elements(),
684 PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5]),
685 &mut ctx
686 );
687
688 assert_arrays_eq!(
690 list.offsets(),
691 PrimitiveArray::from_iter([0u32, 3, 5, 7, 7, 7]),
692 &mut ctx
693 );
694
695 let mut builder = ListBuilder::<u32>::with_capacity(dtype, NonNullable, 20, 10);
697 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
698 assert!(
699 builder
700 .append_array_as_list(&wrong_dtype_arr, &mut ctx)
701 .is_err()
702 );
703 }
704}