vortex_array/builders/
list.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::Nullability::NonNullable;
8use vortex_dtype::{DType, NativePType, Nullability};
9use vortex_error::{VortexExpect, VortexResult, vortex_bail};
10use vortex_mask::Mask;
11use vortex_scalar::ListScalar;
12
13use crate::arrays::{ListArray, OffsetPType};
14use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
15use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
16use crate::compute::{add_scalar, cast, sub_scalar};
17use crate::{Array, ArrayRef, IntoArray, ToCanonical};
18
19pub struct ListBuilder<O: NativePType> {
20    value_builder: Box<dyn ArrayBuilder>,
21    index_builder: PrimitiveBuilder<O>,
22    nulls: LazyNullBufferBuilder,
23    nullability: Nullability,
24    dtype: DType,
25}
26
27impl<O: OffsetPType> ListBuilder<O> {
28    /// Create a ListBuilder with the specified capacity for indices.
29    ///
30    /// # Notes
31    ///
32    /// The number of indices is one more than the number of lists in the array!
33    ///
34    /// See also: [ListBuilder::with_values_and_index_capacity].
35    pub fn with_capacity(
36        value_dtype: Arc<DType>,
37        nullability: Nullability,
38        index_capacity: usize,
39    ) -> Self {
40        // I would expect the list to have more than one value per index
41        Self::with_values_and_index_capacity(
42            value_dtype,
43            nullability,
44            2 * index_capacity,
45            index_capacity,
46        )
47    }
48
49    /// Create a ListBuilder with the specified capacity for indices and values.
50    ///
51    /// # Notes
52    ///
53    /// The number of indices is one more than the number of lists in the array!
54    pub fn with_values_and_index_capacity(
55        value_dtype: Arc<DType>,
56        nullability: Nullability,
57        values_capacity: usize,
58        index_capacity: usize,
59    ) -> Self {
60        let value_builder = builder_with_capacity(value_dtype.as_ref(), values_capacity);
61        let mut index_builder = PrimitiveBuilder::with_capacity(NonNullable, index_capacity);
62
63        // The first index of the list, which is always 0 and represents an empty list.
64        index_builder.append_zero();
65
66        Self {
67            value_builder,
68            index_builder,
69            nulls: LazyNullBufferBuilder::new(index_capacity),
70            nullability,
71            dtype: DType::List(value_dtype, nullability),
72        }
73    }
74
75    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
76        match value.elements() {
77            None => {
78                if self.nullability == NonNullable {
79                    vortex_bail!("Cannot append null value to non-nullable list");
80                }
81                self.append_null();
82                Ok(())
83            }
84            Some(elements) => {
85                for scalar in elements {
86                    // TODO(joe): This is slow, we should be able to append multiple values at once,
87                    // or the list scalar should hold an Array
88                    self.value_builder.append_scalar(&scalar)?;
89                }
90                self.nulls.append_non_null();
91                self.append_index(
92                    O::from_usize(self.value_builder.len())
93                        .vortex_expect("Failed to convert from usize to O"),
94                )
95            }
96        }
97    }
98
99    fn append_index(&mut self, index: O) -> VortexResult<()> {
100        self.index_builder.append_scalar(&index.into())
101    }
102}
103
104impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
105    fn as_any(&self) -> &dyn Any {
106        self
107    }
108
109    fn as_any_mut(&mut self) -> &mut dyn Any {
110        self
111    }
112
113    fn dtype(&self) -> &DType {
114        &self.dtype
115    }
116
117    fn len(&self) -> usize {
118        self.nulls.len()
119    }
120
121    fn append_zeros(&mut self, n: usize) {
122        let count = self.value_builder.len();
123        self.value_builder.append_zeros(n);
124        for i in 0..n {
125            self.append_index(
126                O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
127            )
128            .vortex_expect("Failed to append index");
129        }
130        self.nulls.append_n_non_nulls(n);
131    }
132
133    fn append_nulls(&mut self, n: usize) {
134        let count = self.value_builder.len();
135        for _ in 0..n {
136            // A list with a null element is can be a list with a zero-span offset and a validity
137            // bit set
138            self.append_index(
139                O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
140            )
141            .vortex_expect("Failed to append index");
142        }
143        self.nulls.append_n_nulls(n);
144    }
145
146    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
147        let list = array.to_list()?;
148        if list.is_empty() {
149            return Ok(());
150        }
151
152        let n_already_added_values = self.value_builder.len();
153        let Some(n_already_added_values) = O::from_usize(n_already_added_values) else {
154            vortex_bail!(
155                "cannot convert length {} to type {:?}",
156                n_already_added_values,
157                O::PTYPE
158            )
159        };
160
161        let offsets = list.offsets();
162        let elements = list.elements();
163
164        let index_dtype = self.index_builder.dtype();
165
166        let n_leading_junk_values_scalar = offsets.scalar_at(0)?.cast(index_dtype)?;
167        let n_leading_junk_values = usize::try_from(&n_leading_junk_values_scalar)?;
168
169        let casted_offsets = cast(&offsets.slice(1, offsets.len())?, index_dtype)?;
170        let offsets_without_leading_junk =
171            sub_scalar(&casted_offsets, n_leading_junk_values_scalar)?;
172        let offsets_into_builder =
173            add_scalar(&offsets_without_leading_junk, n_already_added_values.into())?;
174
175        let last_offset = offsets.scalar_at(offsets.len() - 1)?;
176        let last_offset = usize::try_from(&last_offset)?;
177        let non_junk_values = elements.slice(n_leading_junk_values, last_offset)?;
178
179        self.nulls.append_validity_mask(array.validity_mask()?);
180        self.index_builder
181            .extend_from_array(&offsets_into_builder)?;
182        self.value_builder.ensure_capacity(non_junk_values.len());
183        self.value_builder.extend_from_array(&non_junk_values)?;
184
185        Ok(())
186    }
187
188    fn ensure_capacity(&mut self, capacity: usize) {
189        self.index_builder.ensure_capacity(capacity);
190        self.value_builder.ensure_capacity(capacity);
191        self.nulls.ensure_capacity(capacity);
192    }
193
194    fn set_validity(&mut self, validity: Mask) {
195        self.nulls = LazyNullBufferBuilder::new(validity.len());
196        self.nulls.append_validity_mask(validity);
197    }
198
199    fn finish(&mut self) -> ArrayRef {
200        assert_eq!(
201            self.index_builder.len(),
202            self.nulls.len() + 1,
203            "Indices length must be one more than nulls length."
204        );
205
206        ListArray::try_new(
207            self.value_builder.finish(),
208            self.index_builder.finish(),
209            self.nulls.finish_with_nullability(self.nullability),
210        )
211        .vortex_expect("Buffer, offsets, and validity must have same length.")
212        .into_array()
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use std::sync::Arc;
219
220    use Nullability::{NonNullable, Nullable};
221    use vortex_buffer::buffer;
222    use vortex_dtype::PType::I32;
223    use vortex_dtype::{DType, Nullability};
224    use vortex_scalar::Scalar;
225
226    use crate::array::Array;
227    use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
228    use crate::builders::ArrayBuilder;
229    use crate::builders::list::ListBuilder;
230    use crate::validity::Validity;
231    use crate::vtable::ValidityHelper;
232    use crate::{IntoArray as _, ToCanonical};
233
234    #[test]
235    fn test_empty() {
236        let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
237
238        let list = builder.finish();
239        assert_eq!(list.len(), 0);
240    }
241
242    #[test]
243    fn test_values() {
244        let dtype: Arc<DType> = Arc::new(I32.into());
245        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
246
247        builder
248            .append_value(
249                Scalar::list(
250                    dtype.clone(),
251                    vec![1i32.into(), 2i32.into(), 3i32.into()],
252                    NonNullable,
253                )
254                .as_list(),
255            )
256            .unwrap();
257
258        builder
259            .append_value(
260                Scalar::list(
261                    dtype,
262                    vec![4i32.into(), 5i32.into(), 6i32.into()],
263                    NonNullable,
264                )
265                .as_list(),
266            )
267            .unwrap();
268
269        let list = builder.finish();
270        assert_eq!(list.len(), 2);
271
272        let list_array = list.to_list().unwrap();
273
274        assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
275        assert_eq!(list_array.elements_at(1).unwrap().len(), 3);
276    }
277
278    #[test]
279    fn test_non_null_fails() {
280        let dtype: Arc<DType> = Arc::new(I32.into());
281        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
282
283        assert!(
284            builder
285                .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
286                .is_err()
287        )
288    }
289
290    #[test]
291    fn test_nullable_values() {
292        let dtype: Arc<DType> = Arc::new(I32.into());
293        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
294
295        builder
296            .append_value(
297                Scalar::list(
298                    dtype.clone(),
299                    vec![1i32.into(), 2i32.into(), 3i32.into()],
300                    NonNullable,
301                )
302                .as_list(),
303            )
304            .unwrap();
305
306        builder
307            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
308            .unwrap();
309
310        builder
311            .append_value(
312                Scalar::list(
313                    dtype,
314                    vec![4i32.into(), 5i32.into(), 6i32.into()],
315                    NonNullable,
316                )
317                .as_list(),
318            )
319            .unwrap();
320
321        let list = builder.finish();
322        assert_eq!(list.len(), 3);
323
324        let list_array = list.to_list().unwrap();
325
326        assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
327        assert_eq!(list_array.elements_at(1).unwrap().len(), 0);
328        assert_eq!(list_array.elements_at(2).unwrap().len(), 3);
329    }
330
331    fn test_extend_builder_gen<O: OffsetPType>() {
332        let list = ListArray::from_iter_opt_slow::<O, _, _>(
333            [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
334            Arc::new(I32.into()),
335        )
336        .unwrap();
337
338        let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
339
340        builder.extend_from_array(&list).unwrap();
341        builder.extend_from_array(&list).unwrap();
342        builder
343            .extend_from_array(&list.slice(0, 0).unwrap())
344            .unwrap();
345        builder
346            .extend_from_array(&list.slice(1, 3).unwrap())
347            .unwrap();
348
349        let expected = ListArray::from_iter_opt_slow::<O, _, _>(
350            [
351                Some(vec![0, 1, 2]),
352                None,
353                Some(vec![4, 5]),
354                Some(vec![0, 1, 2]),
355                None,
356                Some(vec![4, 5]),
357                None,
358                Some(vec![4, 5]),
359            ],
360            Arc::new(DType::Primitive(I32, NonNullable)),
361        )
362        .unwrap()
363        .to_list()
364        .unwrap();
365
366        let actual = builder
367            .finish()
368            .to_canonical()
369            .unwrap()
370            .into_list()
371            .unwrap();
372
373        assert_eq!(
374            actual.elements().to_primitive().unwrap().as_slice::<i32>(),
375            expected
376                .elements()
377                .to_primitive()
378                .unwrap()
379                .as_slice::<i32>()
380        );
381
382        assert_eq!(
383            actual.offsets().to_primitive().unwrap().as_slice::<O>(),
384            expected.offsets().to_primitive().unwrap().as_slice::<O>()
385        );
386
387        assert_eq!(actual.validity(), expected.validity())
388    }
389
390    #[test]
391    fn test_extend_builder() {
392        test_extend_builder_gen::<i8>();
393        test_extend_builder_gen::<i16>();
394        test_extend_builder_gen::<i32>();
395        test_extend_builder_gen::<i64>();
396
397        test_extend_builder_gen::<u8>();
398        test_extend_builder_gen::<u16>();
399        test_extend_builder_gen::<u32>();
400        test_extend_builder_gen::<u64>();
401    }
402
403    #[test]
404    pub fn test_array_with_gap() {
405        let one_trailing_unused_element = ListArray::try_new(
406            buffer![1, 2, 3, 4].into_array(),
407            buffer![0, 3].into_array(),
408            Validity::NonNullable,
409        )
410        .unwrap();
411
412        let second_array = ListArray::try_new(
413            buffer![5, 6].into_array(),
414            buffer![0, 2].into_array(),
415            Validity::NonNullable,
416        )
417        .unwrap();
418
419        let chunked_list = ChunkedArray::try_new(
420            vec![
421                one_trailing_unused_element.clone().into_array(),
422                second_array.clone().into_array(),
423            ],
424            DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
425        );
426
427        let canon_values = chunked_list.unwrap().to_list().unwrap();
428
429        assert_eq!(
430            one_trailing_unused_element.scalar_at(0).unwrap(),
431            canon_values.scalar_at(0).unwrap()
432        );
433        assert_eq!(
434            second_array.scalar_at(0).unwrap(),
435            canon_values.scalar_at(1).unwrap()
436        );
437    }
438}