1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_bail;
10use vortex_error::vortex_ensure;
11use vortex_error::vortex_panic;
12use vortex_mask::Mask;
13
14use crate::ArrayRef;
15use crate::Canonical;
16use crate::IntoArray;
17use crate::LEGACY_SESSION;
18use crate::VortexSessionExecute;
19use crate::arrays::ListArray;
20use crate::arrays::listview::ListViewArrayExt;
21use crate::builders::ArrayBuilder;
22use crate::builders::DEFAULT_BUILDER_CAPACITY;
23use crate::builders::LazyBitBufferBuilder;
24use crate::builders::PrimitiveBuilder;
25use crate::builders::builder_with_capacity;
26#[expect(deprecated)]
27use crate::canonical::ToCanonical as _;
28use crate::dtype::DType;
29use crate::dtype::IntegerPType;
30use crate::dtype::Nullability;
31use crate::dtype::Nullability::NonNullable;
32use crate::match_each_integer_ptype;
33use crate::scalar::ListScalar;
34use crate::scalar::Scalar;
35
36pub struct ListBuilder<O: IntegerPType> {
39 dtype: DType,
41
42 elements_builder: Box<dyn ArrayBuilder>,
44
45 offsets_builder: PrimitiveBuilder<O>,
47
48 nulls: LazyBitBufferBuilder,
50}
51
52impl<O: IntegerPType> ListBuilder<O> {
53 pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
55 Self::with_capacity(
56 value_dtype,
57 nullability,
58 DEFAULT_BUILDER_CAPACITY * 2,
61 DEFAULT_BUILDER_CAPACITY,
62 )
63 }
64
65 pub fn with_capacity(
73 value_dtype: Arc<DType>,
74 nullability: Nullability,
75 elements_capacity: usize,
76 capacity: usize,
77 ) -> Self {
78 let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
79 let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
80
81 offsets_builder.append_zero();
83
84 Self {
85 elements_builder,
86 offsets_builder,
87 nulls: LazyBitBufferBuilder::new(capacity),
88 dtype: DType::List(value_dtype, nullability),
89 }
90 }
91
92 pub fn append_array_as_list(&mut self, array: &ArrayRef) -> VortexResult<()> {
99 vortex_ensure!(
100 array.dtype() == self.element_dtype(),
101 "Array dtype {:?} does not match list element dtype {:?}",
102 array.dtype(),
103 self.element_dtype()
104 );
105
106 self.elements_builder.extend_from_array(array);
107 self.nulls.append_non_null();
108 self.offsets_builder.append_value(
109 O::from_usize(self.elements_builder.len())
110 .vortex_expect("Failed to convert from usize to O"),
111 );
112
113 Ok(())
114 }
115
116 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
118 match value.elements() {
119 None => {
120 if self.dtype.nullability() == NonNullable {
121 vortex_bail!("Cannot append null value to non-nullable list");
122 }
123 self.append_null();
124 }
125 Some(elements) => {
126 for scalar in elements {
127 self.elements_builder.append_scalar(&scalar)?;
130 }
131
132 self.nulls.append_non_null();
133 self.offsets_builder.append_value(
134 O::from_usize(self.elements_builder.len())
135 .vortex_expect("Failed to convert from usize to O"),
136 );
137 }
138 }
139
140 Ok(())
141 }
142
143 pub fn finish_into_list(&mut self) -> ListArray {
145 assert_eq!(
146 self.offsets_builder.len(),
147 self.nulls.len() + 1,
148 "offsets length must be one more than nulls length."
149 );
150
151 ListArray::try_new(
152 self.elements_builder.finish(),
153 self.offsets_builder.finish(),
154 self.nulls.finish_with_nullability(self.dtype.nullability()),
155 )
156 .vortex_expect("Buffer, offsets, and validity must have same length.")
157 }
158
159 pub fn element_dtype(&self) -> &DType {
162 let DType::List(element_dtype, _) = &self.dtype else {
163 vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
164 };
165
166 element_dtype
167 }
168}
169
170impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
171 fn as_any(&self) -> &dyn Any {
172 self
173 }
174
175 fn as_any_mut(&mut self) -> &mut dyn Any {
176 self
177 }
178
179 fn dtype(&self) -> &DType {
180 &self.dtype
181 }
182
183 fn len(&self) -> usize {
184 self.nulls.len()
185 }
186
187 fn append_zeros(&mut self, n: usize) {
188 let curr_len = self.elements_builder.len();
189 for _ in 0..n {
190 self.offsets_builder.append_value(
191 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
192 )
193 }
194 self.nulls.append_n_non_nulls(n);
195 }
196
197 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
198 let curr_len = self.elements_builder.len();
199 for _ in 0..n {
200 self.offsets_builder.append_value(
203 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
204 )
205 }
206 self.nulls.append_n_nulls(n);
207 }
208
209 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
210 vortex_ensure!(
211 scalar.dtype() == self.dtype(),
212 "ListBuilder expected scalar with dtype {}, got {}",
213 self.dtype(),
214 scalar.dtype()
215 );
216
217 self.append_value(scalar.as_list())
218 }
219
220 unsafe fn extend_from_array_unchecked(&mut self, array: &ArrayRef) {
221 #[expect(deprecated)]
222 let list = array.to_listview();
223 if list.is_empty() {
224 return;
225 }
226
227 self.nulls.append_validity_mask(
229 array
230 .validity()
231 .vortex_expect("validity_mask in extend_from_array_unchecked")
232 .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
233 .vortex_expect("Failed to compute validity mask"),
234 );
235
236 let elements = list.elements();
238 #[expect(deprecated)]
239 let offsets = list.offsets().to_primitive();
240 #[expect(deprecated)]
241 let sizes = list.sizes().to_primitive();
242
243 fn extend_inner<O, OffsetType, SizeType>(
244 builder: &mut ListBuilder<O>,
245 new_elements: &ArrayRef,
246 new_offsets: &[OffsetType],
247 new_sizes: &[SizeType],
248 ) where
249 O: IntegerPType,
250 OffsetType: IntegerPType,
251 SizeType: IntegerPType,
252 {
253 let num_lists = new_offsets.len();
254 debug_assert_eq!(num_lists, new_sizes.len());
255
256 let mut curr_offset = builder.elements_builder.len();
257 let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
258
259 for i in 0..new_offsets.len() {
262 let offset: usize = new_offsets[i].as_();
263 let size: usize = new_sizes[i].as_();
264
265 if size > 0 {
266 let list_elements = new_elements
267 .slice(offset..offset + size)
268 .vortex_expect("list builder slice");
269 builder.elements_builder.extend_from_array(&list_elements);
270 curr_offset += size;
271 }
272
273 let new_offset =
274 O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
275
276 offsets_range.set_value(i, new_offset);
277 }
278
279 unsafe { offsets_range.finish() };
282 }
283
284 match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
285 match_each_integer_ptype!(sizes.ptype(), |SizeType| {
286 extend_inner(
287 self,
288 elements,
289 offsets.as_slice::<OffsetType>(),
290 sizes.as_slice::<SizeType>(),
291 )
292 })
293 })
294 }
295
296 fn reserve_exact(&mut self, additional: usize) {
297 self.elements_builder.reserve_exact(additional);
298 self.offsets_builder.reserve_exact(additional);
299 self.nulls.reserve_exact(additional);
300 }
301
302 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
303 self.nulls = LazyBitBufferBuilder::new(validity.len());
304 self.nulls.append_validity_mask(validity);
305 }
306
307 fn finish(&mut self) -> ArrayRef {
308 self.finish_into_list().into_array()
309 }
310
311 fn finish_into_canonical(&mut self) -> Canonical {
312 #[expect(deprecated)]
313 let listview = self.finish_into_list().into_array().to_listview();
314 Canonical::List(listview)
315 }
316}
317
318#[cfg(test)]
319mod tests {
320 use std::sync::Arc;
321
322 use Nullability::NonNullable;
323 use Nullability::Nullable;
324 use vortex_buffer::buffer;
325 use vortex_error::VortexExpect;
326
327 use crate::IntoArray;
328 use crate::LEGACY_SESSION;
329 #[expect(deprecated)]
330 use crate::ToCanonical as _;
331 use crate::arrays::ChunkedArray;
332 use crate::arrays::PrimitiveArray;
333 use crate::arrays::list::ListArrayExt;
334 use crate::arrays::listview::ListViewArrayExt;
335 use crate::assert_arrays_eq;
336 use crate::builders::ArrayBuilder;
337 use crate::builders::list::ListArray;
338 use crate::builders::list::ListBuilder;
339 use crate::dtype::DType;
340 use crate::dtype::IntegerPType;
341 use crate::dtype::Nullability;
342 use crate::dtype::PType::I32;
343 use crate::executor::VortexSessionExecute;
344 use crate::scalar::Scalar;
345 use crate::validity::Validity;
346
347 #[test]
348 fn test_empty() {
349 let mut builder =
350 ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
351
352 let list = builder.finish();
353 assert_eq!(list.len(), 0);
354 }
355
356 #[test]
357 fn test_values() {
358 let dtype: Arc<DType> = Arc::new(I32.into());
359 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
360
361 builder
362 .append_value(
363 Scalar::list(
364 Arc::clone(&dtype),
365 vec![1i32.into(), 2i32.into(), 3i32.into()],
366 NonNullable,
367 )
368 .as_list(),
369 )
370 .unwrap();
371
372 builder
373 .append_value(
374 Scalar::list(
375 dtype,
376 vec![4i32.into(), 5i32.into(), 6i32.into()],
377 NonNullable,
378 )
379 .as_list(),
380 )
381 .unwrap();
382
383 let list = builder.finish();
384 assert_eq!(list.len(), 2);
385
386 #[expect(deprecated)]
387 let list_array = list.to_listview();
388
389 assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
390 assert_eq!(list_array.list_elements_at(1).unwrap().len(), 3);
391 }
392
393 #[test]
394 fn test_append_empty_list() {
395 let dtype: Arc<DType> = Arc::new(I32.into());
396 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 0, 0);
397
398 assert!(
399 builder
400 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
401 .is_ok()
402 )
403 }
404
405 #[test]
406 fn test_nullable_values() {
407 let dtype: Arc<DType> = Arc::new(I32.into());
408 let mut builder = ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), Nullable, 0, 0);
409
410 builder
411 .append_value(
412 Scalar::list(
413 Arc::clone(&dtype),
414 vec![1i32.into(), 2i32.into(), 3i32.into()],
415 NonNullable,
416 )
417 .as_list(),
418 )
419 .unwrap();
420
421 builder
422 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
423 .unwrap();
424
425 builder
426 .append_value(
427 Scalar::list(
428 dtype,
429 vec![4i32.into(), 5i32.into(), 6i32.into()],
430 NonNullable,
431 )
432 .as_list(),
433 )
434 .unwrap();
435
436 let list = builder.finish();
437 assert_eq!(list.len(), 3);
438
439 #[expect(deprecated)]
440 let list_array = list.to_listview();
441
442 assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
443 assert_eq!(list_array.list_elements_at(1).unwrap().len(), 0);
444 assert_eq!(list_array.list_elements_at(2).unwrap().len(), 3);
445 }
446
447 fn test_extend_builder_gen<O: IntegerPType>() {
448 let list = ListArray::from_iter_opt_slow::<O, _, _>(
449 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
450 Arc::new(I32.into()),
451 )
452 .unwrap();
453 assert_eq!(list.len(), 3);
454
455 let mut ctx = LEGACY_SESSION.create_execution_ctx();
456
457 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
458 builder.extend_from_array(&list);
459 builder.extend_from_array(&list);
460 builder.extend_from_array(&list.slice(0..0).unwrap());
461 builder.extend_from_array(&list.slice(1..3).unwrap());
462
463 #[expect(deprecated)]
464 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
465 [
466 Some(vec![0, 1, 2]),
467 None,
468 Some(vec![4, 5]),
469 Some(vec![0, 1, 2]),
470 None,
471 Some(vec![4, 5]),
472 None,
473 Some(vec![4, 5]),
474 ],
475 Arc::new(DType::Primitive(I32, NonNullable)),
476 )
477 .unwrap()
478 .to_listview();
479
480 let actual = builder.finish_into_canonical().into_listview();
481
482 assert_arrays_eq!(actual.elements(), expected.elements());
483
484 assert_arrays_eq!(actual.offsets(), expected.offsets());
485
486 assert!(
487 actual
488 .validity()
489 .vortex_expect("list validity should be derivable")
490 .mask_eq(
491 &expected
492 .validity()
493 .vortex_expect("list validity should be derivable"),
494 &mut ctx,
495 )
496 .unwrap(),
497 );
498 }
499
500 #[test]
501 fn test_extend_builder() {
502 test_extend_builder_gen::<i8>();
503 test_extend_builder_gen::<i16>();
504 test_extend_builder_gen::<i32>();
505 test_extend_builder_gen::<i64>();
506
507 test_extend_builder_gen::<u8>();
508 test_extend_builder_gen::<u16>();
509 test_extend_builder_gen::<u32>();
510 test_extend_builder_gen::<u64>();
511 }
512
513 #[test]
514 pub fn test_array_with_gap() {
515 let one_trailing_unused_element = ListArray::try_new(
516 buffer![1, 2, 3, 4].into_array(),
517 buffer![0, 3].into_array(),
518 Validity::NonNullable,
519 )
520 .unwrap();
521
522 let second_array = ListArray::try_new(
523 buffer![5, 6].into_array(),
524 buffer![0, 2].into_array(),
525 Validity::NonNullable,
526 )
527 .unwrap();
528
529 let chunked_list = ChunkedArray::try_new(
530 vec![
531 one_trailing_unused_element.clone().into_array(),
532 second_array.clone().into_array(),
533 ],
534 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
535 );
536
537 #[expect(deprecated)]
538 let canon_values = chunked_list.unwrap().as_array().to_listview();
539
540 assert_eq!(
541 one_trailing_unused_element
542 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
543 .unwrap(),
544 canon_values
545 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
546 .unwrap()
547 );
548 assert_eq!(
549 second_array
550 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
551 .unwrap(),
552 canon_values
553 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
554 .unwrap()
555 );
556 }
557
558 #[test]
559 fn test_append_scalar() {
560 let dtype: Arc<DType> = Arc::new(I32.into());
561 let mut builder = ListBuilder::<u64>::with_capacity(Arc::clone(&dtype), Nullable, 20, 10);
562
563 let list_scalar1 =
565 Scalar::list(Arc::clone(&dtype), vec![1i32.into(), 2i32.into()], Nullable);
566 builder.append_scalar(&list_scalar1).unwrap();
567
568 let list_scalar2 = Scalar::list(
570 Arc::clone(&dtype),
571 vec![3i32.into(), 4i32.into(), 5i32.into()],
572 Nullable,
573 );
574 builder.append_scalar(&list_scalar2).unwrap();
575
576 let null_scalar = Scalar::null(DType::List(Arc::clone(&dtype), Nullable));
578 builder.append_scalar(&null_scalar).unwrap();
579
580 let array = builder.finish_into_list();
581 assert_eq!(array.len(), 3);
582
583 let scalar0 = array
586 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
587 .unwrap();
588 let list0 = scalar0.as_list();
589 assert_eq!(list0.len(), 2);
590 if let Some(list0_items) = list0.elements() {
591 assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
592 assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
593 }
594
595 let scalar1 = array
596 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
597 .unwrap();
598 let list1 = scalar1.as_list();
599 assert_eq!(list1.len(), 3);
600 if let Some(list1_items) = list1.elements() {
601 assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
602 assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
603 assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
604 }
605
606 let scalar2 = array
607 .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
608 .unwrap();
609 let list2 = scalar2.as_list();
610 assert!(list2.is_null()); assert!(
614 array
615 .validity()
616 .vortex_expect("list validity should be derivable")
617 .is_valid(0)
618 .unwrap()
619 );
620 assert!(
621 array
622 .validity()
623 .vortex_expect("list validity should be derivable")
624 .is_valid(1)
625 .unwrap()
626 );
627 assert!(
628 !array
629 .validity()
630 .vortex_expect("list validity should be derivable")
631 .is_valid(2)
632 .unwrap()
633 );
634
635 let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
637 let wrong_scalar = Scalar::from(42i32);
638 assert!(builder.append_scalar(&wrong_scalar).is_err());
639 }
640
641 #[test]
642 fn test_append_array_as_list() {
643 let dtype: Arc<DType> = Arc::new(I32.into());
644 let mut builder =
645 ListBuilder::<u32>::with_capacity(Arc::clone(&dtype), NonNullable, 20, 10);
646
647 let arr1 = buffer![1i32, 2, 3].into_array();
649 builder.append_array_as_list(&arr1).unwrap();
650
651 builder
653 .append_value(
654 Scalar::list(
655 Arc::clone(&dtype),
656 vec![10i32.into(), 11i32.into()],
657 NonNullable,
658 )
659 .as_list(),
660 )
661 .unwrap();
662
663 let arr2 = buffer![4i32, 5].into_array();
665 builder.append_array_as_list(&arr2).unwrap();
666
667 let arr3 = buffer![0i32; 0].into_array();
669 builder.append_array_as_list(&arr3).unwrap();
670
671 builder
673 .append_value(Scalar::list_empty(Arc::clone(&dtype), NonNullable).as_list())
674 .unwrap();
675
676 let list = builder.finish_into_list();
677 assert_eq!(list.len(), 5);
678
679 assert_arrays_eq!(
681 list.elements(),
682 PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
683 );
684
685 assert_arrays_eq!(
687 list.offsets(),
688 PrimitiveArray::from_iter([0u32, 3, 5, 7, 7, 7])
689 );
690
691 let mut builder = ListBuilder::<u32>::with_capacity(dtype, NonNullable, 20, 10);
693 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
694 assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
695 }
696}