1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::DType;
8use vortex_dtype::IntegerPType;
9use vortex_dtype::Nullability;
10use vortex_dtype::Nullability::NonNullable;
11use vortex_dtype::match_each_integer_ptype;
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_panic;
17use vortex_mask::Mask;
18use vortex_scalar::ListScalar;
19use vortex_scalar::Scalar;
20
21use crate::Array;
22use crate::ArrayRef;
23use crate::IntoArray;
24use crate::arrays::ListArray;
25use crate::arrays::list_view_from_list;
26use crate::builders::ArrayBuilder;
27use crate::builders::DEFAULT_BUILDER_CAPACITY;
28use crate::builders::LazyBitBufferBuilder;
29use crate::builders::PrimitiveBuilder;
30use crate::builders::builder_with_capacity;
31use crate::canonical::Canonical;
32use crate::canonical::ToCanonical;
33
34pub struct ListBuilder<O: IntegerPType> {
37 dtype: DType,
39
40 elements_builder: Box<dyn ArrayBuilder>,
42
43 offsets_builder: PrimitiveBuilder<O>,
45
46 nulls: LazyBitBufferBuilder,
48}
49
50impl<O: IntegerPType> ListBuilder<O> {
51 pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
53 Self::with_capacity(
54 value_dtype,
55 nullability,
56 DEFAULT_BUILDER_CAPACITY * 2,
59 DEFAULT_BUILDER_CAPACITY,
60 )
61 }
62
63 pub fn with_capacity(
71 value_dtype: Arc<DType>,
72 nullability: Nullability,
73 elements_capacity: usize,
74 capacity: usize,
75 ) -> Self {
76 let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
77 let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
78
79 offsets_builder.append_zero();
81
82 Self {
83 elements_builder,
84 offsets_builder,
85 nulls: LazyBitBufferBuilder::new(capacity),
86 dtype: DType::List(value_dtype, nullability),
87 }
88 }
89
90 pub fn append_array_as_list(&mut self, array: &dyn Array) -> VortexResult<()> {
97 vortex_ensure!(
98 array.dtype() == self.element_dtype(),
99 "Array dtype {:?} does not match list element dtype {:?}",
100 array.dtype(),
101 self.element_dtype()
102 );
103
104 self.elements_builder.extend_from_array(array);
105 self.nulls.append_non_null();
106 self.offsets_builder.append_value(
107 O::from_usize(self.elements_builder.len())
108 .vortex_expect("Failed to convert from usize to O"),
109 );
110
111 Ok(())
112 }
113
114 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
116 match value.elements() {
117 None => {
118 if self.dtype.nullability() == NonNullable {
119 vortex_bail!("Cannot append null value to non-nullable list");
120 }
121 self.append_null();
122 }
123 Some(elements) => {
124 for scalar in elements {
125 self.elements_builder.append_scalar(&scalar)?;
128 }
129
130 self.nulls.append_non_null();
131 self.offsets_builder.append_value(
132 O::from_usize(self.elements_builder.len())
133 .vortex_expect("Failed to convert from usize to O"),
134 );
135 }
136 }
137
138 Ok(())
139 }
140
141 pub fn finish_into_list(&mut self) -> ListArray {
143 assert_eq!(
144 self.offsets_builder.len(),
145 self.nulls.len() + 1,
146 "offsets length must be one more than nulls length."
147 );
148
149 ListArray::try_new(
150 self.elements_builder.finish(),
151 self.offsets_builder.finish(),
152 self.nulls.finish_with_nullability(self.dtype.nullability()),
153 )
154 .vortex_expect("Buffer, offsets, and validity must have same length.")
155 }
156
157 pub fn element_dtype(&self) -> &DType {
160 let DType::List(element_dtype, _) = &self.dtype else {
161 vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
162 };
163
164 element_dtype
165 }
166}
167
168impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
169 fn as_any(&self) -> &dyn Any {
170 self
171 }
172
173 fn as_any_mut(&mut self) -> &mut dyn Any {
174 self
175 }
176
177 fn dtype(&self) -> &DType {
178 &self.dtype
179 }
180
181 fn len(&self) -> usize {
182 self.nulls.len()
183 }
184
185 fn append_zeros(&mut self, n: usize) {
186 let curr_len = self.elements_builder.len();
187 for _ in 0..n {
188 self.offsets_builder.append_value(
189 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
190 )
191 }
192 self.nulls.append_n_non_nulls(n);
193 }
194
195 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
196 let curr_len = self.elements_builder.len();
197 for _ in 0..n {
198 self.offsets_builder.append_value(
201 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
202 )
203 }
204 self.nulls.append_n_nulls(n);
205 }
206
207 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
208 vortex_ensure!(
209 scalar.dtype() == self.dtype(),
210 "ListBuilder expected scalar with dtype {:?}, got {:?}",
211 self.dtype(),
212 scalar.dtype()
213 );
214
215 let list_scalar = ListScalar::try_from(scalar)?;
216 self.append_value(list_scalar)
217 }
218
219 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
220 let list = array.to_listview();
221 if list.is_empty() {
222 return;
223 }
224
225 self.nulls.append_validity_mask(array.validity_mask());
227
228 let elements = list.elements();
230 let offsets = list.offsets().to_primitive();
231 let sizes = list.sizes().to_primitive();
232
233 fn extend_inner<O, OffsetType, SizeType>(
234 builder: &mut ListBuilder<O>,
235 new_elements: &ArrayRef,
236 new_offsets: &[OffsetType],
237 new_sizes: &[SizeType],
238 ) where
239 O: IntegerPType,
240 OffsetType: IntegerPType,
241 SizeType: IntegerPType,
242 {
243 let num_lists = new_offsets.len();
244 debug_assert_eq!(num_lists, new_sizes.len());
245
246 let mut curr_offset = builder.elements_builder.len();
247 let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
248
249 for i in 0..new_offsets.len() {
252 let offset: usize = new_offsets[i].as_();
253 let size: usize = new_sizes[i].as_();
254
255 if size > 0 {
256 let list_elements = new_elements.slice(offset..offset + size);
257 builder.elements_builder.extend_from_array(&list_elements);
258 curr_offset += size;
259 }
260
261 let new_offset =
262 O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
263
264 offsets_range.set_value(i, new_offset);
265 }
266
267 unsafe { offsets_range.finish() };
270 }
271
272 match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
273 match_each_integer_ptype!(sizes.ptype(), |SizeType| {
274 extend_inner(
275 self,
276 elements,
277 offsets.as_slice::<OffsetType>(),
278 sizes.as_slice::<SizeType>(),
279 )
280 })
281 })
282 }
283
284 fn reserve_exact(&mut self, additional: usize) {
285 self.elements_builder.reserve_exact(additional);
286 self.offsets_builder.reserve_exact(additional);
287 self.nulls.reserve_exact(additional);
288 }
289
290 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
291 self.nulls = LazyBitBufferBuilder::new(validity.len());
292 self.nulls.append_validity_mask(validity);
293 }
294
295 fn finish(&mut self) -> ArrayRef {
296 self.finish_into_list().into_array()
297 }
298
299 fn finish_into_canonical(&mut self) -> Canonical {
300 Canonical::List(list_view_from_list(self.finish_into_list()))
301 }
302}
303
304#[cfg(test)]
305mod tests {
306 use std::sync::Arc;
307
308 use Nullability::NonNullable;
309 use Nullability::Nullable;
310 use vortex_buffer::buffer;
311 use vortex_dtype::DType;
312 use vortex_dtype::IntegerPType;
313 use vortex_dtype::Nullability;
314 use vortex_dtype::PType::I32;
315 use vortex_scalar::Scalar;
316
317 use crate::IntoArray;
318 use crate::ToCanonical;
319 use crate::array::Array;
320 use crate::arrays::ChunkedArray;
321 use crate::arrays::ListArray;
322 use crate::builders::ArrayBuilder;
323 use crate::builders::list::ListBuilder;
324 use crate::validity::Validity;
325 use crate::vtable::ValidityHelper;
326
327 #[test]
328 fn test_empty() {
329 let mut builder =
330 ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
331
332 let list = builder.finish();
333 assert_eq!(list.len(), 0);
334 }
335
336 #[test]
337 fn test_values() {
338 let dtype: Arc<DType> = Arc::new(I32.into());
339 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
340
341 builder
342 .append_value(
343 Scalar::list(
344 dtype.clone(),
345 vec![1i32.into(), 2i32.into(), 3i32.into()],
346 NonNullable,
347 )
348 .as_list(),
349 )
350 .unwrap();
351
352 builder
353 .append_value(
354 Scalar::list(
355 dtype,
356 vec![4i32.into(), 5i32.into(), 6i32.into()],
357 NonNullable,
358 )
359 .as_list(),
360 )
361 .unwrap();
362
363 let list = builder.finish();
364 assert_eq!(list.len(), 2);
365
366 let list_array = list.to_listview();
367
368 assert_eq!(list_array.list_elements_at(0).len(), 3);
369 assert_eq!(list_array.list_elements_at(1).len(), 3);
370 }
371
372 #[test]
373 fn test_append_empty_list() {
374 let dtype: Arc<DType> = Arc::new(I32.into());
375 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
376
377 assert!(
378 builder
379 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
380 .is_ok()
381 )
382 }
383
384 #[test]
385 fn test_nullable_values() {
386 let dtype: Arc<DType> = Arc::new(I32.into());
387 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
388
389 builder
390 .append_value(
391 Scalar::list(
392 dtype.clone(),
393 vec![1i32.into(), 2i32.into(), 3i32.into()],
394 NonNullable,
395 )
396 .as_list(),
397 )
398 .unwrap();
399
400 builder
401 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
402 .unwrap();
403
404 builder
405 .append_value(
406 Scalar::list(
407 dtype,
408 vec![4i32.into(), 5i32.into(), 6i32.into()],
409 NonNullable,
410 )
411 .as_list(),
412 )
413 .unwrap();
414
415 let list = builder.finish();
416 assert_eq!(list.len(), 3);
417
418 let list_array = list.to_listview();
419
420 assert_eq!(list_array.list_elements_at(0).len(), 3);
421 assert_eq!(list_array.list_elements_at(1).len(), 0);
422 assert_eq!(list_array.list_elements_at(2).len(), 3);
423 }
424
425 fn test_extend_builder_gen<O: IntegerPType>() {
426 let list = ListArray::from_iter_opt_slow::<O, _, _>(
427 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
428 Arc::new(I32.into()),
429 )
430 .unwrap();
431 assert_eq!(list.len(), 3);
432
433 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
434
435 builder.extend_from_array(&list);
436 builder.extend_from_array(&list);
437 builder.extend_from_array(&list.slice(0..0));
438 builder.extend_from_array(&list.slice(1..3));
439
440 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
441 [
442 Some(vec![0, 1, 2]),
443 None,
444 Some(vec![4, 5]),
445 Some(vec![0, 1, 2]),
446 None,
447 Some(vec![4, 5]),
448 None,
449 Some(vec![4, 5]),
450 ],
451 Arc::new(DType::Primitive(I32, NonNullable)),
452 )
453 .unwrap()
454 .to_listview();
455
456 let actual = builder.finish_into_canonical().into_listview();
457
458 assert_eq!(
459 actual.elements().to_primitive().as_slice::<i32>(),
460 expected.elements().to_primitive().as_slice::<i32>()
461 );
462
463 assert_eq!(
464 actual.offsets().to_primitive().as_slice::<O>(),
465 expected.offsets().to_primitive().as_slice::<O>()
466 );
467
468 assert_eq!(actual.validity(), expected.validity())
469 }
470
471 #[test]
472 fn test_extend_builder() {
473 test_extend_builder_gen::<i8>();
474 test_extend_builder_gen::<i16>();
475 test_extend_builder_gen::<i32>();
476 test_extend_builder_gen::<i64>();
477
478 test_extend_builder_gen::<u8>();
479 test_extend_builder_gen::<u16>();
480 test_extend_builder_gen::<u32>();
481 test_extend_builder_gen::<u64>();
482 }
483
484 #[test]
485 pub fn test_array_with_gap() {
486 let one_trailing_unused_element = ListArray::try_new(
487 buffer![1, 2, 3, 4].into_array(),
488 buffer![0, 3].into_array(),
489 Validity::NonNullable,
490 )
491 .unwrap();
492
493 let second_array = ListArray::try_new(
494 buffer![5, 6].into_array(),
495 buffer![0, 2].into_array(),
496 Validity::NonNullable,
497 )
498 .unwrap();
499
500 let chunked_list = ChunkedArray::try_new(
501 vec![
502 one_trailing_unused_element.clone().into_array(),
503 second_array.clone().into_array(),
504 ],
505 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
506 );
507
508 let canon_values = chunked_list.unwrap().to_listview();
509
510 assert_eq!(
511 one_trailing_unused_element.scalar_at(0),
512 canon_values.scalar_at(0)
513 );
514 assert_eq!(second_array.scalar_at(0), canon_values.scalar_at(1));
515 }
516
517 #[test]
518 fn test_append_scalar() {
519 let dtype: Arc<DType> = Arc::new(I32.into());
520 let mut builder = ListBuilder::<u64>::with_capacity(dtype.clone(), Nullable, 20, 10);
521
522 let list_scalar1 = Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable);
524 builder.append_scalar(&list_scalar1).unwrap();
525
526 let list_scalar2 = Scalar::list(
528 dtype.clone(),
529 vec![3i32.into(), 4i32.into(), 5i32.into()],
530 Nullable,
531 );
532 builder.append_scalar(&list_scalar2).unwrap();
533
534 let null_scalar = Scalar::null(DType::List(dtype.clone(), Nullable));
536 builder.append_scalar(&null_scalar).unwrap();
537
538 let array = builder.finish_into_list();
539 assert_eq!(array.len(), 3);
540
541 let scalar0 = array.scalar_at(0);
544 let list0 = scalar0.as_list();
545 assert_eq!(list0.len(), 2);
546 if let Some(list0_items) = list0.elements() {
547 assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
548 assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
549 }
550
551 let scalar1 = array.scalar_at(1);
552 let list1 = scalar1.as_list();
553 assert_eq!(list1.len(), 3);
554 if let Some(list1_items) = list1.elements() {
555 assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
556 assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
557 assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
558 }
559
560 let scalar2 = array.scalar_at(2);
561 let list2 = scalar2.as_list();
562 assert!(list2.is_null()); assert!(array.validity().is_valid(0));
566 assert!(array.validity().is_valid(1));
567 assert!(!array.validity().is_valid(2));
568
569 let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
571 let wrong_scalar = Scalar::from(42i32);
572 assert!(builder.append_scalar(&wrong_scalar).is_err());
573 }
574
575 #[test]
576 fn test_append_array_as_list() {
577 let dtype: Arc<DType> = Arc::new(I32.into());
578 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 20, 10);
579
580 let arr1 = buffer![1i32, 2, 3].into_array();
582 builder.append_array_as_list(&arr1).unwrap();
583
584 builder
586 .append_value(
587 Scalar::list(dtype.clone(), vec![10i32.into(), 11i32.into()], NonNullable)
588 .as_list(),
589 )
590 .unwrap();
591
592 let arr2 = buffer![4i32, 5].into_array();
594 builder.append_array_as_list(&arr2).unwrap();
595
596 let arr3 = buffer![0i32; 0].into_array();
598 builder.append_array_as_list(&arr3).unwrap();
599
600 builder
602 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
603 .unwrap();
604
605 let list = builder.finish_into_list();
606 assert_eq!(list.len(), 5);
607
608 let elements = list.elements().to_primitive();
610 assert_eq!(elements.as_slice::<i32>(), &[1, 2, 3, 10, 11, 4, 5]);
611
612 let offsets = list.offsets().to_primitive();
614 assert_eq!(offsets.as_slice::<u32>(), &[0, 3, 5, 7, 7, 7]);
615
616 let mut builder = ListBuilder::<u32>::with_capacity(dtype, NonNullable, 20, 10);
618 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
619 assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
620 }
621}