vortex_array/builders/
listview.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! ListView Builder Implementation.
5//!
6//! A builder for [`ListViewArray`] that tracks both offsets and sizes.
7//!
8//! Unlike [`ListArray`] which only tracks offsets, [`ListViewArray`] stores both offsets and sizes
9//! in separate arrays for better compression.
10
11use std::sync::Arc;
12
13use vortex_dtype::{DType, IntegerPType, Nullability};
14use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic};
15use vortex_mask::Mask;
16use vortex_scalar::{ListScalar, Scalar};
17
18use super::lazy_null_builder::LazyNullBufferBuilder;
19use crate::array::{Array, ArrayRef, IntoArray};
20use crate::arrays::ListViewArray;
21use crate::builders::{
22    ArrayBuilder, DEFAULT_BUILDER_CAPACITY, PrimitiveBuilder, builder_with_capacity,
23};
24use crate::{Canonical, ToCanonical};
25
26/// A builder for creating [`ListViewArray`] instances, parameterized by the [`IntegerPType`] of
27/// the `offsets` and the `sizes` builders.
28///
29/// This builder tracks both offsets and sizes using potentially different integer types for memory
30/// efficiency. For example, you might use `u64` for offsets but only `u8` for sizes if your lists
31/// are small.
32///
33/// Any combination of [`IntegerPType`] types are valid, as long as the type of `sizes` can fit into
34/// the type of `offsets`.
35pub struct ListViewBuilder<O: IntegerPType, S: IntegerPType> {
36    /// The [`DType`] of the [`ListViewArray`]. This **must** be a [`DType::List`].
37    dtype: DType,
38
39    /// The builder for the underlying elements of the [`ListArray`].
40    elements_builder: Box<dyn ArrayBuilder>,
41
42    /// The builder for the `offsets` into the `elements` array.
43    offsets_builder: PrimitiveBuilder<O>,
44
45    /// The builder for the `sizes` of each list view.
46    sizes_builder: PrimitiveBuilder<S>,
47
48    /// The null map builder of the [`ListViewArray`].
49    nulls: LazyNullBufferBuilder,
50}
51
52impl<O: IntegerPType, S: IntegerPType> ListViewBuilder<O, S> {
53    /// Creates a new `ListViewBuilder` with a capacity of [`DEFAULT_BUILDER_CAPACITY`].
54    pub fn new(element_dtype: Arc<DType>, nullability: Nullability) -> Self {
55        Self::with_capacity(
56            element_dtype,
57            nullability,
58            // We arbitrarily choose 2 times the number of list scalars for the capacity of the
59            // elements builder since we cannot know this ahead of time.
60            DEFAULT_BUILDER_CAPACITY * 2,
61            DEFAULT_BUILDER_CAPACITY,
62        )
63    }
64
65    /// Create a new [`ListViewArray`] builder with a with the given `capacity`, as well as an
66    /// initial capacity for the `elements` builder (since we cannot know that ahead of time solely
67    /// based on the outer array `capacity`).
68    ///
69    /// # Panics
70    ///
71    /// Panics if the size type `S` cannot fit within the offset type `O`.
72    pub fn with_capacity(
73        element_dtype: Arc<DType>,
74        nullability: Nullability,
75        elements_capacity: usize,
76        capacity: usize,
77    ) -> Self {
78        // Validate that size type's maximum value fits within offset type's maximum value.
79        // Since offsets are non-negative, we only need to check max values.
80        assert!(
81            S::max_value_as_u64() <= O::max_value_as_u64(),
82            "Size type {:?} (max offset {}) must fit within offset type {:?} (max offset {})",
83            S::PTYPE,
84            S::max_value_as_u64(),
85            O::PTYPE,
86            O::max_value_as_u64()
87        );
88
89        let elements_builder = builder_with_capacity(&element_dtype, elements_capacity);
90
91        let offsets_builder =
92            PrimitiveBuilder::<O>::with_capacity(Nullability::NonNullable, capacity);
93        let sizes_builder =
94            PrimitiveBuilder::<S>::with_capacity(Nullability::NonNullable, capacity);
95
96        let nulls = LazyNullBufferBuilder::new(capacity);
97
98        Self {
99            dtype: DType::List(element_dtype, nullability),
100            elements_builder,
101            offsets_builder,
102            sizes_builder,
103            nulls,
104        }
105    }
106
107    /// Append a list of values to the builder.
108    ///
109    /// This method extends the value builder with the provided values and records
110    /// the offset and size of the new list.
111    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
112        let Some(elements) = value.elements() else {
113            // If `elements` is `None`, then the `value` is a null value.
114            vortex_ensure!(
115                self.dtype.is_nullable(),
116                "Cannot append null value to non-nullable list builder"
117            );
118            self.append_null();
119            return Ok(());
120        };
121
122        let curr_offset = self.elements_builder.len();
123        let num_elements = elements.len();
124
125        for scalar in elements {
126            // TODO(connor): This is slow, we should be able to append multiple values at once, or
127            // the list scalar should hold an Array
128            self.elements_builder.append_scalar(&scalar)?;
129        }
130        self.nulls.append_non_null();
131
132        self.offsets_builder.append_value(
133            O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
134        );
135        self.sizes_builder.append_value(
136            S::from_usize(num_elements).vortex_expect("Failed to convert from usize to `S`"),
137        );
138
139        Ok(())
140    }
141
142    /// Finishes the builder directly into a [`ListViewArray`].
143    pub fn finish_into_listview(&mut self) -> ListViewArray {
144        debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
145        debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
146
147        let elements = self.elements_builder.finish();
148        let offsets = self.offsets_builder.finish();
149        let sizes = self.sizes_builder.finish();
150        let validity = self.nulls.finish_with_nullability(self.dtype.nullability());
151
152        ListViewArray::try_new(elements, offsets, sizes, validity)
153            .vortex_expect("Failed to create ListViewArray")
154    }
155
156    /// The [`DType`] of the inner elements. Note that this is **not** the same as the [`DType`] of
157    /// the outer `FixedSizeList`.
158    pub fn element_dtype(&self) -> &DType {
159        let DType::List(element_dtype, ..) = &self.dtype else {
160            vortex_panic!("`ListViewBuilder` has an incorrect dtype: {}", self.dtype);
161        };
162
163        element_dtype
164    }
165}
166
167impl<O: IntegerPType, S: IntegerPType> ArrayBuilder for ListViewBuilder<O, S> {
168    fn as_any(&self) -> &dyn std::any::Any {
169        self
170    }
171
172    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
173        self
174    }
175
176    fn dtype(&self) -> &DType {
177        &self.dtype
178    }
179
180    fn len(&self) -> usize {
181        self.offsets_builder.len()
182    }
183
184    fn append_zeros(&mut self, n: usize) {
185        debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
186        debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
187
188        // Get the current position in the elements array.
189        let curr_offset = self.elements_builder.len();
190
191        // Since we consider the "zero" element of a list an empty list, we simply update the
192        // `offsets` and `sizes` metadata to add an empty list.
193        for _ in 0..n {
194            self.offsets_builder.append_value(
195                O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
196            );
197            self.sizes_builder.append_value(S::zero());
198        }
199
200        self.nulls.append_n_non_nulls(n);
201    }
202
203    unsafe fn append_nulls_unchecked(&mut self, n: usize) {
204        debug_assert_eq!(self.offsets_builder.len(), self.sizes_builder.len());
205        debug_assert_eq!(self.offsets_builder.len(), self.nulls.len());
206
207        // Get the current position in the elements array.
208        let curr_offset = self.elements_builder.len();
209
210        // A null list can have any representation, but we choose to use the zero representation.
211        for _ in 0..n {
212            self.offsets_builder.append_value(
213                O::from_usize(curr_offset).vortex_expect("Failed to convert from usize to `O`"),
214            );
215            self.sizes_builder.append_value(S::zero());
216        }
217
218        // This is the only difference from `append_zeros`.
219        self.nulls.append_n_nulls(n);
220    }
221
222    fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
223        vortex_ensure!(
224            scalar.dtype() == self.dtype(),
225            "ListViewBuilder expected scalar with dtype {:?}, got {:?}",
226            self.dtype(),
227            scalar.dtype()
228        );
229
230        let list_scalar = scalar.as_list();
231        self.append_value(list_scalar)
232    }
233
234    unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
235        let listview_array = array.to_listview();
236        if listview_array.is_empty() {
237            return;
238        }
239
240        // TODO(connor)[ListView]: We could potentially concatenate the new elements on top of the
241        // existing elements and recalculate offsets (and then use `UninitRange`). However, that
242        // would mean we lose the guarantee that the output `ListViewArray` does not look like a
243        // `ListArray` (because the incoming array could have garbage data).
244
245        // We assume the worst case scenario, where the list view array is stored completely out of
246        // order, with many out-of-order offsets, and lots of garbage data. Thus, we simply iterate
247        // over all of the lists in the array and copy the data into this builder.
248        for i in 0..listview_array.len() {
249            let list = listview_array.scalar_at(i);
250
251            self.append_scalar(&list)
252                .vortex_expect("was unable to extend the `ListViewBuilder`")
253        }
254    }
255
256    fn reserve_exact(&mut self, capacity: usize) {
257        self.elements_builder.reserve_exact(capacity * 2);
258        self.offsets_builder.reserve_exact(capacity);
259        self.sizes_builder.reserve_exact(capacity);
260        self.nulls.reserve_exact(capacity);
261    }
262
263    unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
264        self.nulls = LazyNullBufferBuilder::new(validity.len());
265        self.nulls.append_validity_mask(validity);
266    }
267
268    fn finish(&mut self) -> ArrayRef {
269        self.finish_into_listview().into_array()
270    }
271
272    fn finish_into_canonical(&mut self) -> Canonical {
273        Canonical::List(self.finish_into_listview())
274    }
275}
276
277#[cfg(test)]
278mod tests {
279    use std::sync::Arc;
280
281    use vortex_dtype::DType;
282    use vortex_dtype::Nullability::{NonNullable, Nullable};
283    use vortex_dtype::PType::I32;
284    use vortex_scalar::Scalar;
285
286    use super::ListViewBuilder;
287    use crate::IntoArray;
288    use crate::array::Array;
289    use crate::arrays::ListArray;
290    use crate::builders::ArrayBuilder;
291    use crate::vtable::ValidityHelper;
292
293    #[test]
294    fn test_empty() {
295        let mut builder =
296            ListViewBuilder::<u32, u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
297
298        let listview = builder.finish();
299        assert_eq!(listview.len(), 0);
300    }
301
302    #[test]
303    fn test_basic_append_and_nulls() {
304        let dtype: Arc<DType> = Arc::new(I32.into());
305        let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
306
307        // Append a regular list.
308        builder
309            .append_value(
310                Scalar::list(
311                    dtype.clone(),
312                    vec![1i32.into(), 2i32.into(), 3i32.into()],
313                    NonNullable,
314                )
315                .as_list(),
316            )
317            .unwrap();
318
319        // Append an empty list.
320        builder
321            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
322            .unwrap();
323
324        // Append a null list.
325        builder.append_null();
326
327        // Append another regular list.
328        builder
329            .append_value(
330                Scalar::list(dtype, vec![4i32.into(), 5i32.into()], NonNullable).as_list(),
331            )
332            .unwrap();
333
334        let listview = builder.finish_into_listview();
335        assert_eq!(listview.len(), 4);
336
337        // Check first list: [1, 2, 3].
338        let first_list = listview.list_elements_at(0);
339        assert_eq!(first_list.len(), 3);
340        assert_eq!(first_list.scalar_at(0), 1i32.into());
341        assert_eq!(first_list.scalar_at(1), 2i32.into());
342        assert_eq!(first_list.scalar_at(2), 3i32.into());
343
344        // Check empty list.
345        let empty_list = listview.list_elements_at(1);
346        assert_eq!(empty_list.len(), 0);
347
348        // Check null list.
349        assert!(!listview.validity().is_valid(2));
350
351        // Check last list: [4, 5].
352        let last_list = listview.list_elements_at(3);
353        assert_eq!(last_list.len(), 2);
354        assert_eq!(last_list.scalar_at(0), 4i32.into());
355        assert_eq!(last_list.scalar_at(1), 5i32.into());
356    }
357
358    #[test]
359    fn test_different_offset_size_types() {
360        // Test u32 offsets with u8 sizes.
361        let dtype: Arc<DType> = Arc::new(I32.into());
362        let mut builder =
363            ListViewBuilder::<u32, u8>::with_capacity(dtype.clone(), NonNullable, 0, 0);
364
365        builder
366            .append_value(
367                Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], NonNullable).as_list(),
368            )
369            .unwrap();
370
371        builder
372            .append_value(
373                Scalar::list(
374                    dtype,
375                    vec![3i32.into(), 4i32.into(), 5i32.into()],
376                    NonNullable,
377                )
378                .as_list(),
379            )
380            .unwrap();
381
382        let listview = builder.finish_into_listview();
383        assert_eq!(listview.len(), 2);
384
385        // Verify first list: [1, 2].
386        let first = listview.list_elements_at(0);
387        assert_eq!(first.scalar_at(0), 1i32.into());
388        assert_eq!(first.scalar_at(1), 2i32.into());
389
390        // Verify second list: [3, 4, 5].
391        let second = listview.list_elements_at(1);
392        assert_eq!(second.scalar_at(0), 3i32.into());
393        assert_eq!(second.scalar_at(1), 4i32.into());
394        assert_eq!(second.scalar_at(2), 5i32.into());
395
396        // Test u64 offsets with u16 sizes.
397        let dtype2: Arc<DType> = Arc::new(I32.into());
398        let mut builder2 =
399            ListViewBuilder::<u64, u16>::with_capacity(dtype2.clone(), NonNullable, 0, 0);
400
401        for i in 0..5 {
402            builder2
403                .append_value(
404                    Scalar::list(dtype2.clone(), vec![(i * 10).into()], NonNullable).as_list(),
405                )
406                .unwrap();
407        }
408
409        let listview2 = builder2.finish_into_listview();
410        assert_eq!(listview2.len(), 5);
411
412        // Verify the values: [0], [10], [20], [30], [40].
413        for i in 0..5i32 {
414            let list = listview2.list_elements_at(i as usize);
415            assert_eq!(list.len(), 1);
416            assert_eq!(list.scalar_at(0), (i * 10).into());
417        }
418    }
419
420    #[test]
421    fn test_builder_trait_methods() {
422        let dtype: Arc<DType> = Arc::new(I32.into());
423        let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
424
425        // Test append_zeros (creates empty lists).
426        builder.append_zeros(2);
427        assert_eq!(builder.len(), 2);
428
429        // Test append_nulls.
430        unsafe {
431            builder.append_nulls_unchecked(2);
432        }
433        assert_eq!(builder.len(), 4);
434
435        // Test append_scalar.
436        let list_scalar = Scalar::list(dtype, vec![10i32.into(), 20i32.into()], Nullable);
437        builder.append_scalar(&list_scalar).unwrap();
438        assert_eq!(builder.len(), 5);
439
440        let listview = builder.finish_into_listview();
441        assert_eq!(listview.len(), 5);
442
443        // First two are empty lists (from append_zeros).
444        assert_eq!(listview.list_elements_at(0).len(), 0);
445        assert_eq!(listview.list_elements_at(1).len(), 0);
446
447        // Next two are nulls.
448        assert!(!listview.validity().is_valid(2));
449        assert!(!listview.validity().is_valid(3));
450
451        // Last is the regular list: [10, 20].
452        let last_list = listview.list_elements_at(4);
453        assert_eq!(last_list.len(), 2);
454        assert_eq!(last_list.scalar_at(0), 10i32.into());
455        assert_eq!(last_list.scalar_at(1), 20i32.into());
456    }
457
458    #[test]
459    fn test_extend_from_array() {
460        let dtype: Arc<DType> = Arc::new(I32.into());
461
462        // Create a source ListArray.
463        let source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
464            [Some(vec![1, 2, 3]), None, Some(vec![4, 5])],
465            Arc::new(I32.into()),
466        )
467        .unwrap();
468
469        let mut builder = ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
470
471        // Add initial data.
472        builder
473            .append_value(Scalar::list(dtype, vec![0i32.into()], NonNullable).as_list())
474            .unwrap();
475
476        // Extend from the ListArray.
477        unsafe {
478            builder.extend_from_array_unchecked(&source.into_array());
479        }
480
481        // Extend from empty array (should be no-op).
482        let empty_source = ListArray::from_iter_opt_slow::<u32, _, Vec<i32>>(
483            std::iter::empty::<Option<Vec<i32>>>(),
484            Arc::new(I32.into()),
485        )
486        .unwrap();
487        unsafe {
488            builder.extend_from_array_unchecked(&empty_source.into_array());
489        }
490
491        let listview = builder.finish_into_listview();
492        assert_eq!(listview.len(), 4);
493
494        // Check the extended data.
495        // First list: [0] (initial data).
496        let first = listview.list_elements_at(0);
497        assert_eq!(first.len(), 1);
498        assert_eq!(first.scalar_at(0), 0i32.into());
499
500        // Second list: [1, 2, 3] (from source).
501        let second = listview.list_elements_at(1);
502        assert_eq!(second.len(), 3);
503        assert_eq!(second.scalar_at(0), 1i32.into());
504        assert_eq!(second.scalar_at(1), 2i32.into());
505        assert_eq!(second.scalar_at(2), 3i32.into());
506
507        // Third list: null (from source).
508        assert!(!listview.validity().is_valid(2));
509
510        // Fourth list: [4, 5] (from source).
511        let fourth = listview.list_elements_at(3);
512        assert_eq!(fourth.len(), 2);
513        assert_eq!(fourth.scalar_at(0), 4i32.into());
514        assert_eq!(fourth.scalar_at(1), 5i32.into());
515    }
516
517    #[test]
518    fn test_error_append_null_to_non_nullable() {
519        let dtype: Arc<DType> = Arc::new(I32.into());
520        let mut builder =
521            ListViewBuilder::<u32, u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
522
523        // Create a null list with nullable type (since Scalar::null requires nullable type).
524        let null_scalar = Scalar::null(DType::List(dtype, Nullable));
525        let null_list = null_scalar.as_list();
526
527        // This should fail because we're trying to append a null to a non-nullable builder.
528        let result = builder.append_value(null_list);
529        assert!(result.is_err());
530        assert!(
531            result
532                .unwrap_err()
533                .to_string()
534                .contains("null value to non-nullable")
535        );
536    }
537
538    #[test]
539    #[should_panic(
540        expected = "Size type I32 (max offset 2147483647) must fit within offset type I16 (max offset 32767)"
541    )]
542    fn test_error_invalid_type_combination() {
543        let dtype: Arc<DType> = Arc::new(I32.into());
544        // This should panic because i32 (4 bytes) cannot fit within i16 (2 bytes).
545        let _builder = ListViewBuilder::<i16, i32>::with_capacity(dtype, NonNullable, 0, 0);
546    }
547}