vortex_array/builders/
list.rs

1use std::any::Any;
2use std::sync::Arc;
3
4use vortex_dtype::Nullability::NonNullable;
5use vortex_dtype::{DType, NativePType, Nullability};
6use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
7use vortex_mask::Mask;
8use vortex_scalar::{ListScalar, NumericOperator};
9
10use crate::arrays::{ConstantArray, ListArray, OffsetPType};
11use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
12use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
13use crate::compute::{cast, numeric, slice};
14use crate::{Array, ArrayRef, ToCanonical};
15
16pub struct ListBuilder<O: NativePType> {
17    value_builder: Box<dyn ArrayBuilder>,
18    index_builder: PrimitiveBuilder<O>,
19    nulls: LazyNullBufferBuilder,
20    nullability: Nullability,
21    dtype: DType,
22}
23
24impl<O: OffsetPType> ListBuilder<O> {
25    // TODO(joe): add value + index capacity ctor.
26    pub fn with_capacity(
27        value_dtype: Arc<DType>,
28        nullability: Nullability,
29        capacity: usize,
30    ) -> Self {
31        // I would expect the list to have more than one value per index
32        let value_builder = builder_with_capacity(value_dtype.as_ref(), 2 * capacity);
33        let mut index_builder = PrimitiveBuilder::with_capacity(NonNullable, capacity);
34
35        // The first index of the list, which is always 0 and represents an empty list.
36        index_builder.append_zero();
37
38        Self {
39            value_builder,
40            index_builder,
41            nulls: LazyNullBufferBuilder::new(capacity),
42            nullability,
43            dtype: DType::List(value_dtype, nullability),
44        }
45    }
46
47    pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
48        match value.elements() {
49            None => {
50                if self.nullability == NonNullable {
51                    vortex_bail!("Cannot append null value to non-nullable list");
52                }
53                self.append_null();
54                Ok(())
55            }
56            Some(elements) => {
57                for scalar in elements {
58                    // TODO(joe): This is slow, we should be able to append multiple values at once,
59                    // or the list scalar should hold an Array
60                    self.value_builder.append_scalar(&scalar)?;
61                }
62                self.nulls.append_non_null();
63                self.append_index(
64                    O::from_usize(self.value_builder.len())
65                        .vortex_expect("Failed to convert from usize to O"),
66                )
67            }
68        }
69    }
70
71    fn append_index(&mut self, index: O) -> VortexResult<()> {
72        self.index_builder.append_scalar(&index.into())
73    }
74}
75
76impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
77    fn as_any(&self) -> &dyn Any {
78        self
79    }
80
81    fn as_any_mut(&mut self) -> &mut dyn Any {
82        self
83    }
84
85    fn dtype(&self) -> &DType {
86        &self.dtype
87    }
88
89    fn len(&self) -> usize {
90        self.nulls.len()
91    }
92
93    fn append_zeros(&mut self, n: usize) {
94        let count = self.value_builder.len();
95        self.value_builder.append_zeros(n);
96        for i in 0..n {
97            self.append_index(
98                O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
99            )
100            .vortex_expect("Failed to append index");
101        }
102        self.nulls.append_n_non_nulls(n);
103    }
104
105    fn append_nulls(&mut self, n: usize) {
106        let count = self.value_builder.len();
107        for _ in 0..n {
108            // A list with a null element is can be a list with a zero-span offset and a validity
109            // bit set
110            self.append_index(
111                O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
112            )
113            .vortex_expect("Failed to append index");
114        }
115        self.nulls.append_n_nulls(n);
116    }
117
118    fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
119        self.nulls.append_validity_mask(array.validity_mask()?);
120
121        let list = array.to_list()?;
122
123        let cursor_usize = self.value_builder.len();
124        let cursor = O::from_usize(cursor_usize).ok_or_else(|| {
125            vortex_err!(
126                "cannot convert length {} to type {:?}",
127                cursor_usize,
128                O::PTYPE
129            )
130        })?;
131
132        let offsets = numeric(
133            &cast(
134                &slice(list.offsets(), 1, list.offsets().len())?,
135                &DType::Primitive(O::PTYPE, NonNullable),
136            )?,
137            &ConstantArray::new(cursor, list.len()),
138            NumericOperator::Add,
139        )?;
140        self.index_builder.extend_from_array(&offsets)?;
141
142        if !list.is_empty() {
143            let last_used_index = self.index_builder.values().last().vortex_expect("there must be at least one index because we just extended a non-zero list of offsets");
144            let sliced_values = slice(list.elements(), 0, last_used_index.as_() - cursor_usize)?;
145            self.value_builder.ensure_capacity(sliced_values.len());
146            self.value_builder.extend_from_array(&sliced_values)?;
147        }
148
149        Ok(())
150    }
151
152    fn ensure_capacity(&mut self, capacity: usize) {
153        self.index_builder.ensure_capacity(capacity);
154        self.value_builder.ensure_capacity(capacity);
155        self.nulls.ensure_capacity(capacity);
156    }
157
158    fn set_validity(&mut self, validity: Mask) {
159        self.nulls = LazyNullBufferBuilder::new(validity.len());
160        self.nulls.append_validity_mask(validity);
161    }
162
163    fn finish(&mut self) -> ArrayRef {
164        assert_eq!(
165            self.index_builder.len(),
166            self.nulls.len() + 1,
167            "Indices length must be one more than nulls length."
168        );
169
170        ListArray::try_new(
171            self.value_builder.finish(),
172            self.index_builder.finish(),
173            self.nulls.finish_with_nullability(self.nullability),
174        )
175        .vortex_expect("Buffer, offsets, and validity must have same length.")
176        .into_array()
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use std::sync::Arc;
183
184    use Nullability::{NonNullable, Nullable};
185    use vortex_buffer::buffer;
186    use vortex_dtype::PType::I32;
187    use vortex_dtype::{DType, Nullability};
188    use vortex_scalar::Scalar;
189
190    use crate::array::Array;
191    use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
192    use crate::builders::ArrayBuilder;
193    use crate::builders::list::ListBuilder;
194    use crate::compute::scalar_at;
195    use crate::validity::Validity;
196    use crate::{IntoArray as _, ToCanonical};
197
198    #[test]
199    fn test_empty() {
200        let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
201
202        let list = builder.finish();
203        assert_eq!(list.len(), 0);
204    }
205
206    #[test]
207    fn test_values() {
208        let dtype: Arc<DType> = Arc::new(I32.into());
209        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
210
211        builder
212            .append_value(
213                Scalar::list(
214                    dtype.clone(),
215                    vec![1i32.into(), 2i32.into(), 3i32.into()],
216                    NonNullable,
217                )
218                .as_list(),
219            )
220            .unwrap();
221
222        builder
223            .append_value(
224                Scalar::list(
225                    dtype,
226                    vec![4i32.into(), 5i32.into(), 6i32.into()],
227                    NonNullable,
228                )
229                .as_list(),
230            )
231            .unwrap();
232
233        let list = builder.finish();
234        assert_eq!(list.len(), 2);
235
236        let list_array = list.to_list().unwrap();
237
238        assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
239        assert_eq!(list_array.elements_at(1).unwrap().len(), 3);
240    }
241
242    #[test]
243    fn test_non_null_fails() {
244        let dtype: Arc<DType> = Arc::new(I32.into());
245        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
246
247        assert!(
248            builder
249                .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
250                .is_err()
251        )
252    }
253
254    #[test]
255    fn test_nullable_values() {
256        let dtype: Arc<DType> = Arc::new(I32.into());
257        let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
258
259        builder
260            .append_value(
261                Scalar::list(
262                    dtype.clone(),
263                    vec![1i32.into(), 2i32.into(), 3i32.into()],
264                    NonNullable,
265                )
266                .as_list(),
267            )
268            .unwrap();
269
270        builder
271            .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
272            .unwrap();
273
274        builder
275            .append_value(
276                Scalar::list(
277                    dtype,
278                    vec![4i32.into(), 5i32.into(), 6i32.into()],
279                    NonNullable,
280                )
281                .as_list(),
282            )
283            .unwrap();
284
285        let list = builder.finish();
286        assert_eq!(list.len(), 3);
287
288        let list_array = list.to_list().unwrap();
289
290        assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
291        assert_eq!(list_array.elements_at(1).unwrap().len(), 0);
292        assert_eq!(list_array.elements_at(2).unwrap().len(), 3);
293    }
294
295    fn test_extend_builder_gen<O: OffsetPType>() {
296        let list = ListArray::from_iter_opt_slow::<O, _, _>(
297            [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
298            Arc::new(I32.into()),
299        )
300        .unwrap();
301
302        let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
303
304        builder.extend_from_array(&list).unwrap();
305        builder.extend_from_array(&list).unwrap();
306
307        let expect = ListArray::from_iter_opt_slow::<O, _, _>(
308            [
309                Some(vec![0, 1, 2]),
310                None,
311                Some(vec![4, 5]),
312                Some(vec![0, 1, 2]),
313                None,
314                Some(vec![4, 5]),
315            ],
316            Arc::new(DType::Primitive(I32, NonNullable)),
317        )
318        .unwrap()
319        .to_list()
320        .unwrap();
321
322        let res = builder
323            .finish()
324            .to_canonical()
325            .unwrap()
326            .into_list()
327            .unwrap();
328
329        assert_eq!(
330            res.elements().to_primitive().unwrap().as_slice::<i32>(),
331            expect.elements().to_primitive().unwrap().as_slice::<i32>()
332        );
333
334        assert_eq!(
335            res.offsets().to_primitive().unwrap().as_slice::<O>(),
336            expect.offsets().to_primitive().unwrap().as_slice::<O>()
337        );
338
339        assert_eq!(res.validity(), expect.validity())
340    }
341
342    #[test]
343    fn test_extend_builder() {
344        test_extend_builder_gen::<i8>();
345        test_extend_builder_gen::<i16>();
346        test_extend_builder_gen::<i32>();
347        test_extend_builder_gen::<i64>();
348
349        test_extend_builder_gen::<u8>();
350        test_extend_builder_gen::<u16>();
351        test_extend_builder_gen::<u32>();
352        test_extend_builder_gen::<u64>();
353    }
354
355    #[test]
356    pub fn test_array_with_gap() {
357        let one_trailing_unused_element = ListArray::try_new(
358            buffer![1, 2, 3, 4].into_array(),
359            buffer![0, 3].into_array(),
360            Validity::NonNullable,
361        )
362        .unwrap();
363
364        let second_array = ListArray::try_new(
365            buffer![5, 6].into_array(),
366            buffer![0, 2].into_array(),
367            Validity::NonNullable,
368        )
369        .unwrap();
370
371        let chunked_list = ChunkedArray::try_new(
372            vec![
373                one_trailing_unused_element.clone().into_array(),
374                second_array.clone().into_array(),
375            ],
376            DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
377        );
378
379        let canon_values = chunked_list.unwrap().to_list().unwrap();
380
381        assert_eq!(
382            scalar_at(&one_trailing_unused_element, 0).unwrap(),
383            scalar_at(&canon_values, 0).unwrap()
384        );
385        assert_eq!(
386            scalar_at(&second_array, 0).unwrap(),
387            scalar_at(&canon_values, 1).unwrap()
388        );
389    }
390}