vortex_array/builders/
list.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::DType;
8use vortex_dtype::IntegerPType;
9use vortex_dtype::Nullability;
10use vortex_dtype::Nullability::NonNullable;
11use vortex_dtype::match_each_integer_ptype;
12use vortex_error::VortexExpect;
13use vortex_error::VortexResult;
14use vortex_error::vortex_bail;
15use vortex_error::vortex_ensure;
16use vortex_error::vortex_panic;
17use vortex_mask::Mask;
18use vortex_scalar::ListScalar;
19use vortex_scalar::Scalar;
20
21use crate::Array;
22use crate::ArrayRef;
23use crate::IntoArray;
24use crate::arrays::ListArray;
25use crate::arrays::list_view_from_list;
26use crate::builders::ArrayBuilder;
27use crate::builders::DEFAULT_BUILDER_CAPACITY;
28use crate::builders::LazyBitBufferBuilder;
29use crate::builders::PrimitiveBuilder;
30use crate::builders::builder_with_capacity;
31use crate::canonical::Canonical;
32use crate::canonical::ToCanonical;
33
34/// The builder for building a [`ListArray`], parametrized by the [`IntegerPType`] of the `offsets`
35/// builder.
36pub struct ListBuilder<O: IntegerPType> {
37    /// The [`DType`] of the [`ListArray`]. This **must** be a [`DType::List`].
38    dtype: DType,
39
40    /// The builder for the underlying elements of the [`ListArray`].
41    elements_builder: Box<dyn ArrayBuilder>,
42
43    /// The builder for the `offsets` into the `elements` array.
44    offsets_builder: PrimitiveBuilder<O>,
45
46    /// The null map builder of the [`ListArray`].
47    nulls: LazyBitBufferBuilder,
48}
49
50impl<O: IntegerPType> ListBuilder<O> {
51    /// Creates a new `ListBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
52    pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
53        Self::with_capacity(
54            value_dtype,
55            nullability,
56            // We arbitrarily choose 2 times the number of list scalars for the capacity of the
57            // elements builder since we cannot know this ahead of time.
58            DEFAULT_BUILDER_CAPACITY * 2,
59            DEFAULT_BUILDER_CAPACITY,
60        )
61    }
62
63    /// Create a new [`ListArray`] builder with a with the given `capacity`, as well as an initial
64    /// capacity for the `elements` builder (since we cannot know that ahead of time solely based on
65    /// the outer array `capacity`).
66    ///
67    /// # Notes
68    ///
69    /// The number of offsets is one more than the length (# of list scalars) in the array.
70    pub fn with_capacity(
71        value_dtype: Arc<DType>,
72        nullability: Nullability,
73        elements_capacity: usize,
74        capacity: usize,
75    ) -> Self {
76        let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
77        let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
78
79        // The first offset is always 0 and represents an empty list.
80        offsets_builder.append_zero();
81
82        Self {
83            elements_builder,
84            offsets_builder,
85            nulls: LazyBitBufferBuilder::new(capacity),
86            dtype: DType::List(value_dtype, nullability),
87        }
88    }
89
90    /// Appends a list `value` to the builder.
91    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
92        match value.elements() {
93            None => {
94                if self.dtype.nullability() == NonNullable {
95                    vortex_bail!("Cannot append null value to non-nullable list");
96                }
97                self.append_null();
98            }
99            Some(elements) => {
100                for scalar in elements {
101                    // TODO(connor): This is slow, we should be able to append multiple values at
102                    // once, or the list scalar should hold an Array
103                    self.elements_builder.append_scalar(&scalar)?;
104                }
105
106                self.nulls.append_non_null();
107                self.offsets_builder.append_value(
108                    O::from_usize(self.elements_builder.len())
109                        .vortex_expect("Failed to convert from usize to O"),
110                );
111            }
112        }
113
114        Ok(())
115    }
116
117    /// Finishes the builder directly into a [`ListArray`].
118    pub fn finish_into_list(&mut self) -> ListArray {
119        assert_eq!(
120            self.offsets_builder.len(),
121            self.nulls.len() + 1,
122            "offsets length must be one more than nulls length."
123        );
124
125        ListArray::try_new(
126            self.elements_builder.finish(),
127            self.offsets_builder.finish(),
128            self.nulls.finish_with_nullability(self.dtype.nullability()),
129        )
130        .vortex_expect("Buffer, offsets, and validity must have same length.")
131    }
132
133    /// The [`DType`] of the inner elements. Note that this is **not** the same as the [`DType`] of
134    /// the outer `List`.
135    pub fn element_dtype(&self) -> &DType {
136        let DType::List(element_dtype, _) = &self.dtype else {
137            vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
138        };
139
140        element_dtype
141    }
142}
143
144impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
145    fn as_any(&self) -> &dyn Any {
146        self
147    }
148
149    fn as_any_mut(&mut self) -> &mut dyn Any {
150        self
151    }
152
153    fn dtype(&self) -> &DType {
154        &self.dtype
155    }
156
157    fn len(&self) -> usize {
158        self.nulls.len()
159    }
160
161    fn append_zeros(&mut self, n: usize) {
162        let curr_len = self.elements_builder.len();
163        for _ in 0..n {
164            self.offsets_builder.append_value(
165                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
166            )
167        }
168        self.nulls.append_n_non_nulls(n);
169    }
170
171    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
172        let curr_len = self.elements_builder.len();
173        for _ in 0..n {
174            // A list with a null element is can be a list with a zero-span offset and a validity
175            // bit set
176            self.offsets_builder.append_value(
177                O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
178            )
179        }
180        self.nulls.append_n_nulls(n);
181    }
182
183    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
184        vortex_ensure!(
185            scalar.dtype() == self.dtype(),
186            "ListBuilder expected scalar with dtype {:?}, got {:?}",
187            self.dtype(),
188            scalar.dtype()
189        );
190
191        let list_scalar = ListScalar::try_from(scalar)?;
192        self.append_value(list_scalar)
193    }
194
195    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
196        let list = array.to_listview();
197        if list.is_empty() {
198            return;
199        }
200
201        // Append validity information.
202        self.nulls.append_validity_mask(array.validity_mask());
203
204        // Note that `ListViewArray` has `n` offsets and sizes, not `n+1` offsets like `ListArray`.
205        let elements = list.elements();
206        let offsets = list.offsets().to_primitive();
207        let sizes = list.sizes().to_primitive();
208
209        fn extend_inner<O, OffsetType, SizeType>(
210            builder: &mut ListBuilder<O>,
211            new_elements: &ArrayRef,
212            new_offsets: &[OffsetType],
213            new_sizes: &[SizeType],
214        ) where
215            O: IntegerPType,
216            OffsetType: IntegerPType,
217            SizeType: IntegerPType,
218        {
219            let num_lists = new_offsets.len();
220            debug_assert_eq!(num_lists, new_sizes.len());
221
222            let mut curr_offset = builder.elements_builder.len();
223            let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
224
225            // We need to append each list individually, converting from `ListViewArray` format to
226            // the `ListArray` format that `ListBuilder` expects.
227            for i in 0..new_offsets.len() {
228                let offset: usize = new_offsets[i].as_();
229                let size: usize = new_sizes[i].as_();
230
231                if size > 0 {
232                    let list_elements = new_elements.slice(offset..offset + size);
233                    builder.elements_builder.extend_from_array(&list_elements);
234                    curr_offset += size;
235                }
236
237                let new_offset =
238                    O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
239
240                offsets_range.set_value(i, new_offset);
241            }
242
243            // SAFETY: We have initialized all `num_lists` values, and since the `offsets` array is
244            // non-nullable, we are done.
245            unsafe { offsets_range.finish() };
246        }
247
248        match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
249            match_each_integer_ptype!(sizes.ptype(), |SizeType| {
250                extend_inner(
251                    self,
252                    elements,
253                    offsets.as_slice::<OffsetType>(),
254                    sizes.as_slice::<SizeType>(),
255                )
256            })
257        })
258    }
259
260    fn reserve_exact(&mut self, additional: usize) {
261        self.elements_builder.reserve_exact(additional);
262        self.offsets_builder.reserve_exact(additional);
263        self.nulls.reserve_exact(additional);
264    }
265
266    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
267        self.nulls = LazyBitBufferBuilder::new(validity.len());
268        self.nulls.append_validity_mask(validity);
269    }
270
271    fn finish(&mut self) -> ArrayRef {
272        self.finish_into_list().into_array()
273    }
274
275    fn finish_into_canonical(&mut self) -> Canonical {
276        Canonical::List(list_view_from_list(self.finish_into_list()))
277    }
278}
279
280#[cfg(test)]
281mod tests {
282    use std::sync::Arc;
283
284    use Nullability::NonNullable;
285    use Nullability::Nullable;
286    use vortex_buffer::buffer;
287    use vortex_dtype::DType;
288    use vortex_dtype::IntegerPType;
289    use vortex_dtype::Nullability;
290    use vortex_dtype::PType::I32;
291    use vortex_scalar::Scalar;
292
293    use crate::IntoArray;
294    use crate::ToCanonical;
295    use crate::array::Array;
296    use crate::arrays::ChunkedArray;
297    use crate::arrays::ListArray;
298    use crate::builders::ArrayBuilder;
299    use crate::builders::list::ListBuilder;
300    use crate::validity::Validity;
301    use crate::vtable::ValidityHelper;
302
303    #[test]
304    fn test_empty() {
305        let mut builder =
306            ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
307
308        let list = builder.finish();
309        assert_eq!(list.len(), 0);
310    }
311
312    #[test]
313    fn test_values() {
314        let dtype: Arc<DType> = Arc::new(I32.into());
315        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
316
317        builder
318            .append_value(
319                Scalar::list(
320                    dtype.clone(),
321                    vec![1i32.into(), 2i32.into(), 3i32.into()],
322                    NonNullable,
323                )
324                .as_list(),
325            )
326            .unwrap();
327
328        builder
329            .append_value(
330                Scalar::list(
331                    dtype,
332                    vec![4i32.into(), 5i32.into(), 6i32.into()],
333                    NonNullable,
334                )
335                .as_list(),
336            )
337            .unwrap();
338
339        let list = builder.finish();
340        assert_eq!(list.len(), 2);
341
342        let list_array = list.to_listview();
343
344        assert_eq!(list_array.list_elements_at(0).len(), 3);
345        assert_eq!(list_array.list_elements_at(1).len(), 3);
346    }
347
348    #[test]
349    fn test_append_empty_list() {
350        let dtype: Arc<DType> = Arc::new(I32.into());
351        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
352
353        assert!(
354            builder
355                .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
356                .is_ok()
357        )
358    }
359
360    #[test]
361    fn test_nullable_values() {
362        let dtype: Arc<DType> = Arc::new(I32.into());
363        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
364
365        builder
366            .append_value(
367                Scalar::list(
368                    dtype.clone(),
369                    vec![1i32.into(), 2i32.into(), 3i32.into()],
370                    NonNullable,
371                )
372                .as_list(),
373            )
374            .unwrap();
375
376        builder
377            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
378            .unwrap();
379
380        builder
381            .append_value(
382                Scalar::list(
383                    dtype,
384                    vec![4i32.into(), 5i32.into(), 6i32.into()],
385                    NonNullable,
386                )
387                .as_list(),
388            )
389            .unwrap();
390
391        let list = builder.finish();
392        assert_eq!(list.len(), 3);
393
394        let list_array = list.to_listview();
395
396        assert_eq!(list_array.list_elements_at(0).len(), 3);
397        assert_eq!(list_array.list_elements_at(1).len(), 0);
398        assert_eq!(list_array.list_elements_at(2).len(), 3);
399    }
400
401    fn test_extend_builder_gen<O: IntegerPType>() {
402        let list = ListArray::from_iter_opt_slow::<O, _, _>(
403            [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
404            Arc::new(I32.into()),
405        )
406        .unwrap();
407        assert_eq!(list.len(), 3);
408
409        let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
410
411        builder.extend_from_array(&list);
412        builder.extend_from_array(&list);
413        builder.extend_from_array(&list.slice(0..0));
414        builder.extend_from_array(&list.slice(1..3));
415
416        let expected = ListArray::from_iter_opt_slow::<O, _, _>(
417            [
418                Some(vec![0, 1, 2]),
419                None,
420                Some(vec![4, 5]),
421                Some(vec![0, 1, 2]),
422                None,
423                Some(vec![4, 5]),
424                None,
425                Some(vec![4, 5]),
426            ],
427            Arc::new(DType::Primitive(I32, NonNullable)),
428        )
429        .unwrap()
430        .to_listview();
431
432        let actual = builder.finish_into_canonical().into_listview();
433
434        assert_eq!(
435            actual.elements().to_primitive().as_slice::<i32>(),
436            expected.elements().to_primitive().as_slice::<i32>()
437        );
438
439        assert_eq!(
440            actual.offsets().to_primitive().as_slice::<O>(),
441            expected.offsets().to_primitive().as_slice::<O>()
442        );
443
444        assert_eq!(actual.validity(), expected.validity())
445    }
446
447    #[test]
448    fn test_extend_builder() {
449        test_extend_builder_gen::<i8>();
450        test_extend_builder_gen::<i16>();
451        test_extend_builder_gen::<i32>();
452        test_extend_builder_gen::<i64>();
453
454        test_extend_builder_gen::<u8>();
455        test_extend_builder_gen::<u16>();
456        test_extend_builder_gen::<u32>();
457        test_extend_builder_gen::<u64>();
458    }
459
460    #[test]
461    pub fn test_array_with_gap() {
462        let one_trailing_unused_element = ListArray::try_new(
463            buffer![1, 2, 3, 4].into_array(),
464            buffer![0, 3].into_array(),
465            Validity::NonNullable,
466        )
467        .unwrap();
468
469        let second_array = ListArray::try_new(
470            buffer![5, 6].into_array(),
471            buffer![0, 2].into_array(),
472            Validity::NonNullable,
473        )
474        .unwrap();
475
476        let chunked_list = ChunkedArray::try_new(
477            vec![
478                one_trailing_unused_element.clone().into_array(),
479                second_array.clone().into_array(),
480            ],
481            DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
482        );
483
484        let canon_values = chunked_list.unwrap().to_listview();
485
486        assert_eq!(
487            one_trailing_unused_element.scalar_at(0),
488            canon_values.scalar_at(0)
489        );
490        assert_eq!(second_array.scalar_at(0), canon_values.scalar_at(1));
491    }
492
493    #[test]
494    fn test_append_scalar() {
495        let dtype: Arc<DType> = Arc::new(I32.into());
496        let mut builder = ListBuilder::<u64>::with_capacity(dtype.clone(), Nullable, 20, 10);
497
498        // Test appending a valid list.
499        let list_scalar1 = Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable);
500        builder.append_scalar(&list_scalar1).unwrap();
501
502        // Test appending another list.
503        let list_scalar2 = Scalar::list(
504            dtype.clone(),
505            vec![3i32.into(), 4i32.into(), 5i32.into()],
506            Nullable,
507        );
508        builder.append_scalar(&list_scalar2).unwrap();
509
510        // Test appending null value.
511        let null_scalar = Scalar::null(DType::List(dtype.clone(), Nullable));
512        builder.append_scalar(&null_scalar).unwrap();
513
514        let array = builder.finish_into_list();
515        assert_eq!(array.len(), 3);
516
517        // Check actual values using scalar_at.
518
519        let scalar0 = array.scalar_at(0);
520        let list0 = scalar0.as_list();
521        assert_eq!(list0.len(), 2);
522        if let Some(list0_items) = list0.elements() {
523            assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
524            assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
525        }
526
527        let scalar1 = array.scalar_at(1);
528        let list1 = scalar1.as_list();
529        assert_eq!(list1.len(), 3);
530        if let Some(list1_items) = list1.elements() {
531            assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
532            assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
533            assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
534        }
535
536        let scalar2 = array.scalar_at(2);
537        let list2 = scalar2.as_list();
538        assert!(list2.is_null()); // This should be null.
539
540        // Check validity.
541        assert!(array.validity().is_valid(0));
542        assert!(array.validity().is_valid(1));
543        assert!(!array.validity().is_valid(2));
544
545        // Test wrong dtype error.
546        let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
547        let wrong_scalar = Scalar::from(42i32);
548        assert!(builder.append_scalar(&wrong_scalar).is_err());
549    }
550}