vortex_array/builders/
list.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::DType;
8use vortex_dtype::IntegerPType;
9use vortex_dtype::Nullability;
10use vortex_dtype::Nullability::NonNullable;
11use vortex_dtype::match_each_integer_ptype;
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_panic;
17use vortex_mask::Mask;
18use vortex_scalar::ListScalar;
19use vortex_scalar::Scalar;
20
21use crate::Array;
22use crate::ArrayRef;
23use crate::IntoArray;
24use crate::arrays::ListArray;
25use crate::arrays::list_view_from_list;
26use crate::builders::ArrayBuilder;
27use crate::builders::DEFAULT_BUILDER_CAPACITY;
28use crate::builders::LazyBitBufferBuilder;
29use crate::builders::PrimitiveBuilder;
30use crate::builders::builder_with_capacity;
31use crate::canonical::Canonical;
32use crate::canonical::ToCanonical;
33
34/// The builder for building a [`ListArray`], parametrized by the [`IntegerPType`] of the `offsets`
35/// builder.
36pub struct ListBuilder<O: IntegerPType> {
37    /// The [`DType`] of the [`ListArray`]. This **must** be a [`DType::List`].
38    dtype: DType,
39
40    /// The builder for the underlying elements of the [`ListArray`].
41    elements_builder: Box<dyn ArrayBuilder>,
42
43    /// The builder for the `offsets` into the `elements` array.
44    offsets_builder: PrimitiveBuilder<O>,
45
46    /// The null map builder of the [`ListArray`].
47    nulls: LazyBitBufferBuilder,
48}
49
50impl<O: IntegerPType> ListBuilder<O> {
51    /// Creates a new `ListBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
52    pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
53        Self::with_capacity(
54            value_dtype,
55            nullability,
56            // We arbitrarily choose 2 times the number of list scalars for the capacity of the
57            // elements builder since we cannot know this ahead of time.
58            DEFAULT_BUILDER_CAPACITY * 2,
59            DEFAULT_BUILDER_CAPACITY,
60        )
61    }
62
63    /// Create a new [`ListArray`] builder with a with the given `capacity`, as well as an initial
64    /// capacity for the `elements` builder (since we cannot know that ahead of time solely based on
65    /// the outer array `capacity`).
66    ///
67    /// # Notes
68    ///
69    /// The number of offsets is one more than the length (# of list scalars) in the array.
70    pub fn with_capacity(
71        value_dtype: Arc<DType>,
72        nullability: Nullability,
73        elements_capacity: usize,
74        capacity: usize,
75    ) -> Self {
76        let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
77        let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
78
79        // The first offset is always 0 and represents an empty list.
80        offsets_builder.append_zero();
81
82        Self {
83            elements_builder,
84            offsets_builder,
85            nulls: LazyBitBufferBuilder::new(capacity),
86            dtype: DType::List(value_dtype, nullability),
87        }
88    }
89
90    /// Appends an array as a single non-null list entry to the builder.
91    ///
92    /// The input `array` must have the same dtype as the element dtype of this list builder.
93    ///
94    /// Note that the list entry will be non-null but the elements themselves are allowed to be null
95    /// (only if the elements [`DType`] in nullable, of course).
96    pub fn append_array_as_list(&mut self, array: &dyn Array) -> VortexResult<()> {
97        vortex_ensure!(
98            array.dtype() == self.element_dtype(),
99            "Array dtype {:?} does not match list element dtype {:?}",
100            array.dtype(),
101            self.element_dtype()
102        );
103
104        self.elements_builder.extend_from_array(array);
105        self.nulls.append_non_null();
106        self.offsets_builder.append_value(
107            O::from_usize(self.elements_builder.len())
108                .vortex_expect("Failed to convert from usize to O"),
109        );
110
111        Ok(())
112    }
113
114    /// Appends a list `value` to the builder.
115    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
116        match value.elements() {
117            None => {
118                if self.dtype.nullability() == NonNullable {
119                    vortex_bail!("Cannot append null value to non-nullable list");
120                }
121                self.append_null();
122            }
123            Some(elements) => {
124                for scalar in elements {
125                    // TODO(connor): This is slow, we should be able to append multiple values at
126                    // once, or the list scalar should hold an Array
127                    self.elements_builder.append_scalar(&scalar)?;
128                }
129
130                self.nulls.append_non_null();
131                self.offsets_builder.append_value(
132                    O::from_usize(self.elements_builder.len())
133                        .vortex_expect("Failed to convert from usize to O"),
134                );
135            }
136        }
137
138        Ok(())
139    }
140
141    /// Finishes the builder directly into a [`ListArray`].
142    pub fn finish_into_list(&mut self) -> ListArray {
143        assert_eq!(
144            self.offsets_builder.len(),
145            self.nulls.len() + 1,
146            "offsets length must be one more than nulls length."
147        );
148
149        ListArray::try_new(
150            self.elements_builder.finish(),
151            self.offsets_builder.finish(),
152            self.nulls.finish_with_nullability(self.dtype.nullability()),
153        )
154        .vortex_expect("Buffer, offsets, and validity must have same length.")
155    }
156
157    /// The [`DType`] of the inner elements. Note that this is **not** the same as the [`DType`] of
158    /// the outer `List`.
159    pub fn element_dtype(&self) -> &DType {
160        let DType::List(element_dtype, _) = &self.dtype else {
161            vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
162        };
163
164        element_dtype
165    }
166}
167
168impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
169    fn as_any(&self) -> &dyn Any {
170        self
171    }
172
173    fn as_any_mut(&mut self) -> &mut dyn Any {
174        self
175    }
176
177    fn dtype(&self) -> &DType {
178        &self.dtype
179    }
180
181    fn len(&self) -> usize {
182        self.nulls.len()
183    }
184
185    fn append_zeros(&mut self, n: usize) {
186        let curr_len = self.elements_builder.len();
187        for _ in 0..n {
188            self.offsets_builder.append_value(
189                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
190            )
191        }
192        self.nulls.append_n_non_nulls(n);
193    }
194
195    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
196        let curr_len = self.elements_builder.len();
197        for _ in 0..n {
198            // A list with a null element is can be a list with a zero-span offset and a validity
199            // bit set
200            self.offsets_builder.append_value(
201                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
202            )
203        }
204        self.nulls.append_n_nulls(n);
205    }
206
207    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
208        vortex_ensure!(
209            scalar.dtype() == self.dtype(),
210            "ListBuilder expected scalar with dtype {:?}, got {:?}",
211            self.dtype(),
212            scalar.dtype()
213        );
214
215        let list_scalar = ListScalar::try_from(scalar)?;
216        self.append_value(list_scalar)
217    }
218
219    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
220        let list = array.to_listview();
221        if list.is_empty() {
222            return;
223        }
224
225        // Append validity information.
226        self.nulls.append_validity_mask(array.validity_mask());
227
228        // Note that `ListViewArray` has `n` offsets and sizes, not `n+1` offsets like `ListArray`.
229        let elements = list.elements();
230        let offsets = list.offsets().to_primitive();
231        let sizes = list.sizes().to_primitive();
232
233        fn extend_inner<O, OffsetType, SizeType>(
234            builder: &mut ListBuilder<O>,
235            new_elements: &ArrayRef,
236            new_offsets: &[OffsetType],
237            new_sizes: &[SizeType],
238        ) where
239            O: IntegerPType,
240            OffsetType: IntegerPType,
241            SizeType: IntegerPType,
242        {
243            let num_lists = new_offsets.len();
244            debug_assert_eq!(num_lists, new_sizes.len());
245
246            let mut curr_offset = builder.elements_builder.len();
247            let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
248
249            // We need to append each list individually, converting from `ListViewArray` format to
250            // the `ListArray` format that `ListBuilder` expects.
251            for i in 0..new_offsets.len() {
252                let offset: usize = new_offsets[i].as_();
253                let size: usize = new_sizes[i].as_();
254
255                if size > 0 {
256                    let list_elements = new_elements.slice(offset..offset + size);
257                    builder.elements_builder.extend_from_array(&list_elements);
258                    curr_offset += size;
259                }
260
261                let new_offset =
262                    O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
263
264                offsets_range.set_value(i, new_offset);
265            }
266
267            // SAFETY: We have initialized all `num_lists` values, and since the `offsets` array is
268            // non-nullable, we are done.
269            unsafe { offsets_range.finish() };
270        }
271
272        match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
273            match_each_integer_ptype!(sizes.ptype(), |SizeType| {
274                extend_inner(
275                    self,
276                    elements,
277                    offsets.as_slice::<OffsetType>(),
278                    sizes.as_slice::<SizeType>(),
279                )
280            })
281        })
282    }
283
284    fn reserve_exact(&mut self, additional: usize) {
285        self.elements_builder.reserve_exact(additional);
286        self.offsets_builder.reserve_exact(additional);
287        self.nulls.reserve_exact(additional);
288    }
289
290    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
291        self.nulls = LazyBitBufferBuilder::new(validity.len());
292        self.nulls.append_validity_mask(validity);
293    }
294
295    fn finish(&mut self) -> ArrayRef {
296        self.finish_into_list().into_array()
297    }
298
299    fn finish_into_canonical(&mut self) -> Canonical {
300        Canonical::List(list_view_from_list(self.finish_into_list()))
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use std::sync::Arc;
307
308    use Nullability::NonNullable;
309    use Nullability::Nullable;
310    use vortex_buffer::buffer;
311    use vortex_dtype::DType;
312    use vortex_dtype::IntegerPType;
313    use vortex_dtype::Nullability;
314    use vortex_dtype::PType::I32;
315    use vortex_scalar::Scalar;
316
317    use crate::IntoArray;
318    use crate::ToCanonical;
319    use crate::array::Array;
320    use crate::arrays::ChunkedArray;
321    use crate::arrays::ListArray;
322    use crate::builders::ArrayBuilder;
323    use crate::builders::list::ListBuilder;
324    use crate::validity::Validity;
325    use crate::vtable::ValidityHelper;
326
327    #[test]
328    fn test_empty() {
329        let mut builder =
330            ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
331
332        let list = builder.finish();
333        assert_eq!(list.len(), 0);
334    }
335
336    #[test]
337    fn test_values() {
338        let dtype: Arc<DType> = Arc::new(I32.into());
339        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
340
341        builder
342            .append_value(
343                Scalar::list(
344                    dtype.clone(),
345                    vec![1i32.into(), 2i32.into(), 3i32.into()],
346                    NonNullable,
347                )
348                .as_list(),
349            )
350            .unwrap();
351
352        builder
353            .append_value(
354                Scalar::list(
355                    dtype,
356                    vec![4i32.into(), 5i32.into(), 6i32.into()],
357                    NonNullable,
358                )
359                .as_list(),
360            )
361            .unwrap();
362
363        let list = builder.finish();
364        assert_eq!(list.len(), 2);
365
366        let list_array = list.to_listview();
367
368        assert_eq!(list_array.list_elements_at(0).len(), 3);
369        assert_eq!(list_array.list_elements_at(1).len(), 3);
370    }
371
372    #[test]
373    fn test_append_empty_list() {
374        let dtype: Arc<DType> = Arc::new(I32.into());
375        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
376
377        assert!(
378            builder
379                .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
380                .is_ok()
381        )
382    }
383
384    #[test]
385    fn test_nullable_values() {
386        let dtype: Arc<DType> = Arc::new(I32.into());
387        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
388
389        builder
390            .append_value(
391                Scalar::list(
392                    dtype.clone(),
393                    vec![1i32.into(), 2i32.into(), 3i32.into()],
394                    NonNullable,
395                )
396                .as_list(),
397            )
398            .unwrap();
399
400        builder
401            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
402            .unwrap();
403
404        builder
405            .append_value(
406                Scalar::list(
407                    dtype,
408                    vec![4i32.into(), 5i32.into(), 6i32.into()],
409                    NonNullable,
410                )
411                .as_list(),
412            )
413            .unwrap();
414
415        let list = builder.finish();
416        assert_eq!(list.len(), 3);
417
418        let list_array = list.to_listview();
419
420        assert_eq!(list_array.list_elements_at(0).len(), 3);
421        assert_eq!(list_array.list_elements_at(1).len(), 0);
422        assert_eq!(list_array.list_elements_at(2).len(), 3);
423    }
424
425    fn test_extend_builder_gen<O: IntegerPType>() {
426        let list = ListArray::from_iter_opt_slow::<O, _, _>(
427            [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
428            Arc::new(I32.into()),
429        )
430        .unwrap();
431        assert_eq!(list.len(), 3);
432
433        let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
434
435        builder.extend_from_array(&list);
436        builder.extend_from_array(&list);
437        builder.extend_from_array(&list.slice(0..0));
438        builder.extend_from_array(&list.slice(1..3));
439
440        let expected = ListArray::from_iter_opt_slow::<O, _, _>(
441            [
442                Some(vec![0, 1, 2]),
443                None,
444                Some(vec![4, 5]),
445                Some(vec![0, 1, 2]),
446                None,
447                Some(vec![4, 5]),
448                None,
449                Some(vec![4, 5]),
450            ],
451            Arc::new(DType::Primitive(I32, NonNullable)),
452        )
453        .unwrap()
454        .to_listview();
455
456        let actual = builder.finish_into_canonical().into_listview();
457
458        assert_eq!(
459            actual.elements().to_primitive().as_slice::<i32>(),
460            expected.elements().to_primitive().as_slice::<i32>()
461        );
462
463        assert_eq!(
464            actual.offsets().to_primitive().as_slice::<O>(),
465            expected.offsets().to_primitive().as_slice::<O>()
466        );
467
468        assert_eq!(actual.validity(), expected.validity())
469    }
470
471    #[test]
472    fn test_extend_builder() {
473        test_extend_builder_gen::<i8>();
474        test_extend_builder_gen::<i16>();
475        test_extend_builder_gen::<i32>();
476        test_extend_builder_gen::<i64>();
477
478        test_extend_builder_gen::<u8>();
479        test_extend_builder_gen::<u16>();
480        test_extend_builder_gen::<u32>();
481        test_extend_builder_gen::<u64>();
482    }
483
484    #[test]
485    pub fn test_array_with_gap() {
486        let one_trailing_unused_element = ListArray::try_new(
487            buffer![1, 2, 3, 4].into_array(),
488            buffer![0, 3].into_array(),
489            Validity::NonNullable,
490        )
491        .unwrap();
492
493        let second_array = ListArray::try_new(
494            buffer![5, 6].into_array(),
495            buffer![0, 2].into_array(),
496            Validity::NonNullable,
497        )
498        .unwrap();
499
500        let chunked_list = ChunkedArray::try_new(
501            vec![
502                one_trailing_unused_element.clone().into_array(),
503                second_array.clone().into_array(),
504            ],
505            DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
506        );
507
508        let canon_values = chunked_list.unwrap().to_listview();
509
510        assert_eq!(
511            one_trailing_unused_element.scalar_at(0),
512            canon_values.scalar_at(0)
513        );
514        assert_eq!(second_array.scalar_at(0), canon_values.scalar_at(1));
515    }
516
517    #[test]
518    fn test_append_scalar() {
519        let dtype: Arc<DType> = Arc::new(I32.into());
520        let mut builder = ListBuilder::<u64>::with_capacity(dtype.clone(), Nullable, 20, 10);
521
522        // Test appending a valid list.
523        let list_scalar1 = Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable);
524        builder.append_scalar(&list_scalar1).unwrap();
525
526        // Test appending another list.
527        let list_scalar2 = Scalar::list(
528            dtype.clone(),
529            vec![3i32.into(), 4i32.into(), 5i32.into()],
530            Nullable,
531        );
532        builder.append_scalar(&list_scalar2).unwrap();
533
534        // Test appending null value.
535        let null_scalar = Scalar::null(DType::List(dtype.clone(), Nullable));
536        builder.append_scalar(&null_scalar).unwrap();
537
538        let array = builder.finish_into_list();
539        assert_eq!(array.len(), 3);
540
541        // Check actual values using scalar_at.
542
543        let scalar0 = array.scalar_at(0);
544        let list0 = scalar0.as_list();
545        assert_eq!(list0.len(), 2);
546        if let Some(list0_items) = list0.elements() {
547            assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
548            assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
549        }
550
551        let scalar1 = array.scalar_at(1);
552        let list1 = scalar1.as_list();
553        assert_eq!(list1.len(), 3);
554        if let Some(list1_items) = list1.elements() {
555            assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
556            assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
557            assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
558        }
559
560        let scalar2 = array.scalar_at(2);
561        let list2 = scalar2.as_list();
562        assert!(list2.is_null()); // This should be null.
563
564        // Check validity.
565        assert!(array.validity().is_valid(0));
566        assert!(array.validity().is_valid(1));
567        assert!(!array.validity().is_valid(2));
568
569        // Test wrong dtype error.
570        let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
571        let wrong_scalar = Scalar::from(42i32);
572        assert!(builder.append_scalar(&wrong_scalar).is_err());
573    }
574
575    #[test]
576    fn test_append_array_as_list() {
577        let dtype: Arc<DType> = Arc::new(I32.into());
578        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 20, 10);
579
580        // Append a primitive array as a single list entry.
581        let arr1 = buffer![1i32, 2, 3].into_array();
582        builder.append_array_as_list(&arr1).unwrap();
583
584        // Interleave with a list scalar.
585        builder
586            .append_value(
587                Scalar::list(dtype.clone(), vec![10i32.into(), 11i32.into()], NonNullable)
588                    .as_list(),
589            )
590            .unwrap();
591
592        // Append another primitive array as a single list entry.
593        let arr2 = buffer![4i32, 5].into_array();
594        builder.append_array_as_list(&arr2).unwrap();
595
596        // Append an empty array as a single list entry (empty list).
597        let arr3 = buffer![0i32; 0].into_array();
598        builder.append_array_as_list(&arr3).unwrap();
599
600        // Interleave with another list scalar (empty list).
601        builder
602            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
603            .unwrap();
604
605        let list = builder.finish_into_list();
606        assert_eq!(list.len(), 5);
607
608        // Verify elements array: [1, 2, 3, 10, 11, 4, 5].
609        let elements = list.elements().to_primitive();
610        assert_eq!(elements.as_slice::<i32>(), &[1, 2, 3, 10, 11, 4, 5]);
611
612        // Verify offsets array.
613        let offsets = list.offsets().to_primitive();
614        assert_eq!(offsets.as_slice::<u32>(), &[0, 3, 5, 7, 7, 7]);
615
616        // Test dtype mismatch error.
617        let mut builder = ListBuilder::<u32>::with_capacity(dtype, NonNullable, 20, 10);
618        let wrong_dtype_arr = buffer![1i64, 2, 3].into_array();
619        assert!(builder.append_array_as_list(&wrong_dtype_arr).is_err());
620    }
621}