1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::DType;
8use vortex_dtype::IntegerPType;
9use vortex_dtype::Nullability;
10use vortex_dtype::Nullability::NonNullable;
11use vortex_dtype::match_each_integer_ptype;
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_panic;
17use vortex_mask::Mask;
18
19use crate::Array;
20use crate::ArrayRef;
21use crate::IntoArray;
22use crate::arrays::ListArray;
23use crate::builders::ArrayBuilder;
24use crate::builders::DEFAULT_BUILDER_CAPACITY;
25use crate::builders::LazyBitBufferBuilder;
26use crate::builders::PrimitiveBuilder;
27use crate::builders::builder_with_capacity;
28use crate::canonical::ToCanonical;
29use crate::scalar::ListScalar;
30use crate::scalar::Scalar;
31
32pub struct ListBuilder<O: IntegerPType> {
35 dtype: DType,
37
38 elements_builder: Box<dyn ArrayBuilder>,
40
41 offsets_builder: PrimitiveBuilder<O>,
43
44 nulls: LazyBitBufferBuilder,
46}
47
48impl<O: IntegerPType> ListBuilder<O> {
49 pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
51 Self::with_capacity(
52 value_dtype,
53 nullability,
54 DEFAULT_BUILDER_CAPACITY * 2,
57 DEFAULT_BUILDER_CAPACITY,
58 )
59 }
60
61 pub fn with_capacity(
69 value_dtype: Arc<DType>,
70 nullability: Nullability,
71 elements_capacity: usize,
72 capacity: usize,
73 ) -> Self {
74 let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
75 let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
76
77 offsets_builder.append_zero();
79
80 Self {
81 elements_builder,
82 offsets_builder,
83 nulls: LazyBitBufferBuilder::new(capacity),
84 dtype: DType::List(value_dtype, nullability),
85 }
86 }
87
88 pub fn append_array_as_list(&mut self, array: &dyn Array) -> VortexResult<()> {
95 vortex_ensure!(
96 array.dtype() == self.element_dtype(),
97 "Array dtype {:?} does not match list element dtype {:?}",
98 array.dtype(),
99 self.element_dtype()
100 );
101
102 self.elements_builder.extend_from_array(array);
103 self.nulls.append_non_null();
104 self.offsets_builder.append_value(
105 O::from_usize(self.elements_builder.len())
106 .vortex_expect("Failed to convert from usize to O"),
107 );
108
109 Ok(())
110 }
111
112 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
114 match value.elements() {
115 None => {
116 if self.dtype.nullability() == NonNullable {
117 vortex_bail!("Cannot append null value to non-nullable list");
118 }
119 self.append_null();
120 }
121 Some(elements) => {
122 for scalar in elements {
123 self.elements_builder.append_scalar(&scalar)?;
126 }
127
128 self.nulls.append_non_null();
129 self.offsets_builder.append_value(
130 O::from_usize(self.elements_builder.len())
131 .vortex_expect("Failed to convert from usize to O"),
132 );
133 }
134 }
135
136 Ok(())
137 }
138
139 pub fn finish_into_list(&mut self) -> ListArray {
141 assert_eq!(
142 self.offsets_builder.len(),
143 self.nulls.len() + 1,
144 "offsets length must be one more than nulls length."
145 );
146
147 ListArray::try_new(
148 self.elements_builder.finish(),
149 self.offsets_builder.finish(),
150 self.nulls.finish_with_nullability(self.dtype.nullability()),
151 )
152 .vortex_expect("Buffer, offsets, and validity must have same length.")
153 }
154
155 pub fn element_dtype(&self) -> &DType {
158 let DType::List(element_dtype, _) = &self.dtype else {
159 vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
160 };
161
162 element_dtype
163 }
164}
165
166impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
167 fn as_any(&self) -> &dyn Any {
168 self
169 }
170
171 fn as_any_mut(&mut self) -> &mut dyn Any {
172 self
173 }
174
175 fn dtype(&self) -> &DType {
176 &self.dtype
177 }
178
179 fn len(&self) -> usize {
180 self.nulls.len()
181 }
182
183 fn append_zeros(&mut self, n: usize) {
184 let curr_len = self.elements_builder.len();
185 for _ in 0..n {
186 self.offsets_builder.append_value(
187 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
188 )
189 }
190 self.nulls.append_n_non_nulls(n);
191 }
192
193 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
194 let curr_len = self.elements_builder.len();
195 for _ in 0..n {
196 self.offsets_builder.append_value(
199 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
200 )
201 }
202 self.nulls.append_n_nulls(n);
203 }
204
205 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
206 vortex_ensure!(
207 scalar.dtype() == self.dtype(),
208 "ListBuilder expected scalar with dtype {}, got {}",
209 self.dtype(),
210 scalar.dtype()
211 );
212
213 self.append_value(scalar.as_list())
214 }
215
216 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
217 let list = array.to_listview();
218 if list.is_empty() {
219 return;
220 }
221
222 self.nulls.append_validity_mask(
224 array
225 .validity_mask()
226 .vortex_expect("validity_mask in extend_from_array_unchecked"),
227 );
228
229 let elements = list.elements();
231 let offsets = list.offsets().to_primitive();
232 let sizes = list.sizes().to_primitive();
233
234 fn extend_inner<O, OffsetType, SizeType>(
235 builder: &mut ListBuilder<O>,
236 new_elements: &ArrayRef,
237 new_offsets: &[OffsetType],
238 new_sizes: &[SizeType],
239 ) where
240 O: IntegerPType,
241 OffsetType: IntegerPType,
242 SizeType: IntegerPType,
243 {
244 let num_lists = new_offsets.len();
245 debug_assert_eq!(num_lists, new_sizes.len());
246
247 let mut curr_offset = builder.elements_builder.len();
248 let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
249
250 for i in 0..new_offsets.len() {
253 let offset: usize = new_offsets[i].as_();
254 let size: usize = new_sizes[i].as_();
255
256 if size > 0 {
257 let list_elements = new_elements
258 .slice(offset..offset + size)
259 .vortex_expect("list builder slice");
260 builder.elements_builder.extend_from_array(&list_elements);
261 curr_offset += size;
262 }
263
264 let new_offset =
265 O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
266
267 offsets_range.set_value(i, new_offset);
268 }
269
270 unsafe { offsets_range.finish() };
273 }
274
275 match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
276 match_each_integer_ptype!(sizes.ptype(), |SizeType| {
277 extend_inner(
278 self,
279 elements,
280 offsets.as_slice::<OffsetType>(),
281 sizes.as_slice::<SizeType>(),
282 )
283 })
284 })
285 }
286
287 fn reserve_exact(&mut self, additional: usize) {
288 self.elements_builder.reserve_exact(additional);
289 self.offsets_builder.reserve_exact(additional);
290 self.nulls.reserve_exact(additional);
291 }
292
293 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
294 self.nulls = LazyBitBufferBuilder::new(validity.len());
295 self.nulls.append_validity_mask(validity);
296 }
297
298 fn finish(&mut self) -> ArrayRef {
299 self.finish_into_list().into_array()
300 }
301}
302
303#[cfg(test)]
304mod tests {
305 use std::sync::Arc;
306
307 use Nullability::NonNullable;
308 use Nullability::Nullable;
309 use vortex_buffer::buffer;
310 use vortex_dtype::DType;
311 use vortex_dtype::IntegerPType;
312 use vortex_dtype::Nullability;
313 use vortex_dtype::PType::I32;
314
315 use crate::IntoArray;
316 use crate::ToCanonical;
317 use crate::array::Array;
318 use crate::arrays::ChunkedArray;
319 use crate::arrays::ListArray;
320 use crate::arrays::PrimitiveArray;
321 use crate::assert_arrays_eq;
322 use crate::builders::ArrayBuilder;
323 use crate::builders::list::ListBuilder;
324 use crate::scalar::Scalar;
325 use crate::validity::Validity;
326 use crate::vtable::ValidityHelper;
327
328 #[test]
329 fn test_empty() {
330 let mut builder =
331 ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
332
333 let list = builder.finish();
334 assert_eq!(list.len(), 0);
335 }
336
337 #[test]
338 fn test_values() {
339 let dtype: Arc<DType> = Arc::new(I32.into());
340 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
341
342 builder
343 .append_value(
344 Scalar::list(
345 dtype.clone(),
346 vec![1i32.into(), 2i32.into(), 3i32.into()],
347 NonNullable,
348 )
349 .as_list(),
350 )
351 .unwrap();
352
353 builder
354 .append_value(
355 Scalar::list(
356 dtype,
357 vec![4i32.into(), 5i32.into(), 6i32.into()],
358 NonNullable,
359 )
360 .as_list(),
361 )
362 .unwrap();
363
364 let list = builder.finish();
365 assert_eq!(list.len(), 2);
366
367 let list_array = list.to_listview();
368
369 assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
370 assert_eq!(list_array.list_elements_at(1).unwrap().len(), 3);
371 }
372
373 #[test]
374 fn test_append_empty_list() {
375 let dtype: Arc<DType> = Arc::new(I32.into());
376 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
377
378 assert!(
379 builder
380 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
381 .is_ok()
382 )
383 }
384
385 #[test]
386 fn test_nullable_values() {
387 let dtype: Arc<DType> = Arc::new(I32.into());
388 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
389
390 builder
391 .append_value(
392 Scalar::list(
393 dtype.clone(),
394 vec![1i32.into(), 2i32.into(), 3i32.into()],
395 NonNullable,
396 )
397 .as_list(),
398 )
399 .unwrap();
400
401 builder
402 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
403 .unwrap();
404
405 builder
406 .append_value(
407 Scalar::list(
408 dtype,
409 vec![4i32.into(), 5i32.into(), 6i32.into()],
410 NonNullable,
411 )
412 .as_list(),
413 )
414 .unwrap();
415
416 let list = builder.finish();
417 assert_eq!(list.len(), 3);
418
419 let list_array = list.to_listview();
420
421 assert_eq!(list_array.list_elements_at(0).unwrap().len(), 3);
422 assert_eq!(list_array.list_elements_at(1).unwrap().len(), 0);
423 assert_eq!(list_array.list_elements_at(2).unwrap().len(), 3);
424 }
425
426 fn test_extend_builder_gen<O: IntegerPType>() {
427 let list = ListArray::from_iter_opt_slow::<O, _, _>(
428 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
429 Arc::new(I32.into()),
430 )
431 .unwrap();
432 assert_eq!(list.len(), 3);
433
434 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
435
436 builder.extend_from_array(&list);
437 builder.extend_from_array(&list);
438 builder.extend_from_array(&list.slice(0..0).unwrap());
439 builder.extend_from_array(&list.slice(1..3).unwrap());
440
441 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
442 [
443 Some(vec![0, 1, 2]),
444 None,
445 Some(vec![4, 5]),
446 Some(vec![0, 1, 2]),
447 None,
448 Some(vec![4, 5]),
449 None,
450 Some(vec![4, 5]),
451 ],
452 Arc::new(DType::Primitive(I32, NonNullable)),
453 )
454 .unwrap()
455 .to_listview();
456
457 let actual = builder.finish_into_canonical().into_listview();
458
459 assert_arrays_eq!(actual.elements(), expected.elements());
460
461 assert_arrays_eq!(actual.offsets(), expected.offsets());
462
463 assert_eq!(actual.validity(), expected.validity())
464 }
465
466 #[test]
467 fn test_extend_builder() {
468 test_extend_builder_gen::<i8>();
469 test_extend_builder_gen::<i16>();
470 test_extend_builder_gen::<i32>();
471 test_extend_builder_gen::<i64>();
472
473 test_extend_builder_gen::<u8>();
474 test_extend_builder_gen::<u16>();
475 test_extend_builder_gen::<u32>();
476 test_extend_builder_gen::<u64>();
477 }
478
479 #[test]
480 pub fn test_array_with_gap() {
481 let one_trailing_unused_element = ListArray::try_new(
482 buffer![1, 2, 3, 4].into_array(),
483 buffer![0, 3].into_array(),
484 Validity::NonNullable,
485 )
486 .unwrap();
487
488 let second_array = ListArray::try_new(
489 buffer![5, 6].into_array(),
490 buffer![0, 2].into_array(),
491 Validity::NonNullable,
492 )
493 .unwrap();
494
495 let chunked_list = ChunkedArray::try_new(
496 vec![
497 one_trailing_unused_element.clone().into_array(),
498 second_array.clone().into_array(),
499 ],
500 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
501 );
502
503 let canon_values = chunked_list.unwrap().to_listview();
504
505 assert_eq!(
506 one_trailing_unused_element.scalar_at(0).unwrap(),
507 canon_values.scalar_at(0).unwrap()
508 );
509 assert_eq!(
510 second_array.scalar_at(0).unwrap(),
511 canon_values.scalar_at(1).unwrap()
512 );
513 }
514
515 #[test]
516 fn test_append_scalar() {
517 let dtype: Arc<DType> = Arc::new(I32.into());
518 let mut builder = ListBuilder::<u64>::with_capacity(dtype.clone(), Nullable, 20, 10);
519
520 let list_scalar1 = Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable);
522 builder.append_scalar(&list_scalar1).unwrap();
523
524 let list_scalar2 = Scalar::list(
526 dtype.clone(),
527 vec![3i32.into(), 4i32.into(), 5i32.into()],
528 Nullable,
529 );
530 builder.append_scalar(&list_scalar2).unwrap();
531
532 let null_scalar = Scalar::null(DType::List(dtype.clone(), Nullable));
534 builder.append_scalar(&null_scalar).unwrap();
535
536 let array = builder.finish_into_list();
537 assert_eq!(array.len(), 3);
538
539 let scalar0 = array.scalar_at(0).unwrap();
542 let list0 = scalar0.as_list();
543 assert_eq!(list0.len(), 2);
544 if let Some(list0_items) = list0.elements() {
545 assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
546 assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
547 }
548
549 let scalar1 = array.scalar_at(1).unwrap();
550 let list1 = scalar1.as_list();
551 assert_eq!(list1.len(), 3);
552 if let Some(list1_items) = list1.elements() {
553 assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
554 assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
555 assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
556 }
557
558 let scalar2 = array.scalar_at(2).unwrap();
559 let list2 = scalar2.as_list();
560 assert!(list2.is_null()); assert!(array.validity().is_valid(0).unwrap());
564 assert!(array.validity().is_valid(1).unwrap());
565 assert!(!array.validity().is_valid(2).unwrap());
566
567 let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
569 let wrong_scalar = Scalar::from(42i32);
570 assert!(builder.append_scalar(&wrong_scalar).is_err());
571 }
572
573 #[test]
574 fn test_append_array_as_list() {
575 let dtype: Arc<DType> = Arc::new(I32.into());
576 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 20, 10);
577
578 let arr1 = buffer![1i32, 2, 3].into_array();
580 builder.append_array_as_list(&arr1).unwrap();
581
582 builder
584 .append_value(
585 Scalar::list(dtype.clone(), vec![10i32.into(), 11i32.into()], NonNullable)
586 .as_list(),
587 )
588 .unwrap();
589
590 let arr2 = buffer![4i32, 5].into_array();
592 builder.append_array_as_list(&arr2).unwrap();
593
594 let arr3 = buffer![0i32; 0].into_array();
596 builder.append_array_as_list(&arr3).unwrap();
597
598 builder
600 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
601 .unwrap();
602
603 let list = builder.finish_into_list();
604 assert_eq!(list.len(), 5);
605
606 assert_arrays_eq!(
608 list.elements(),
609 PrimitiveArray::from_iter([1i32, 2, 3, 10, 11, 4, 5])
610 );
611
612 assert_arrays_eq!(
614 list.offsets(),
615 PrimitiveArray::from_iter([0u32, 3, 5, 7, 7, 7])
616 );
617
618 let mut builder = ListBuilder::<u32>::with_capacity(dtype, NonNullable, 20, 10);
620 let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
621 assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
622 }
623}