1use std::any::Any;
2use std::sync::Arc;
3
4use vortex_dtype::Nullability::NonNullable;
5use vortex_dtype::{DType, NativePType, Nullability};
6use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
7use vortex_mask::Mask;
8use vortex_scalar::{ListScalar, NumericOperator};
9
10use crate::arrays::{ConstantArray, ListArray, OffsetPType};
11use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
12use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
13use crate::compute::{cast, numeric};
14use crate::{Array, ArrayRef, IntoArray, ToCanonical};
15
16pub struct ListBuilder<O: NativePType> {
17 value_builder: Box<dyn ArrayBuilder>,
18 index_builder: PrimitiveBuilder<O>,
19 nulls: LazyNullBufferBuilder,
20 nullability: Nullability,
21 dtype: DType,
22}
23
24impl<O: OffsetPType> ListBuilder<O> {
25 pub fn with_capacity(
27 value_dtype: Arc<DType>,
28 nullability: Nullability,
29 capacity: usize,
30 ) -> Self {
31 let value_builder = builder_with_capacity(value_dtype.as_ref(), 2 * capacity);
33 let mut index_builder = PrimitiveBuilder::with_capacity(NonNullable, capacity);
34
35 index_builder.append_zero();
37
38 Self {
39 value_builder,
40 index_builder,
41 nulls: LazyNullBufferBuilder::new(capacity),
42 nullability,
43 dtype: DType::List(value_dtype, nullability),
44 }
45 }
46
47 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
48 match value.elements() {
49 None => {
50 if self.nullability == NonNullable {
51 vortex_bail!("Cannot append null value to non-nullable list");
52 }
53 self.append_null();
54 Ok(())
55 }
56 Some(elements) => {
57 for scalar in elements {
58 self.value_builder.append_scalar(&scalar)?;
61 }
62 self.nulls.append_non_null();
63 self.append_index(
64 O::from_usize(self.value_builder.len())
65 .vortex_expect("Failed to convert from usize to O"),
66 )
67 }
68 }
69 }
70
71 fn append_index(&mut self, index: O) -> VortexResult<()> {
72 self.index_builder.append_scalar(&index.into())
73 }
74}
75
76impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
77 fn as_any(&self) -> &dyn Any {
78 self
79 }
80
81 fn as_any_mut(&mut self) -> &mut dyn Any {
82 self
83 }
84
85 fn dtype(&self) -> &DType {
86 &self.dtype
87 }
88
89 fn len(&self) -> usize {
90 self.nulls.len()
91 }
92
93 fn append_zeros(&mut self, n: usize) {
94 let count = self.value_builder.len();
95 self.value_builder.append_zeros(n);
96 for i in 0..n {
97 self.append_index(
98 O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
99 )
100 .vortex_expect("Failed to append index");
101 }
102 self.nulls.append_n_non_nulls(n);
103 }
104
105 fn append_nulls(&mut self, n: usize) {
106 let count = self.value_builder.len();
107 for _ in 0..n {
108 self.append_index(
111 O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
112 )
113 .vortex_expect("Failed to append index");
114 }
115 self.nulls.append_n_nulls(n);
116 }
117
118 fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
119 self.nulls.append_validity_mask(array.validity_mask()?);
120
121 let list = array.to_list()?;
122
123 let cursor_usize = self.value_builder.len();
124 let cursor = O::from_usize(cursor_usize).ok_or_else(|| {
125 vortex_err!(
126 "cannot convert length {} to type {:?}",
127 cursor_usize,
128 O::PTYPE
129 )
130 })?;
131
132 let offsets = numeric(
133 &cast(
134 &list.offsets().slice(1, list.offsets().len())?,
135 &DType::Primitive(O::PTYPE, NonNullable),
136 )?,
137 ConstantArray::new(cursor, list.len()).as_ref(),
138 NumericOperator::Add,
139 )?;
140 self.index_builder.extend_from_array(&offsets)?;
141
142 if !list.is_empty() {
143 let last_used_index = self.index_builder.values().last().vortex_expect("there must be at least one index because we just extended a non-zero list of offsets");
144 let sliced_values = list
145 .elements()
146 .slice(0, last_used_index.as_() - cursor_usize)?;
147 self.value_builder.ensure_capacity(sliced_values.len());
148 self.value_builder.extend_from_array(&sliced_values)?;
149 }
150
151 Ok(())
152 }
153
154 fn ensure_capacity(&mut self, capacity: usize) {
155 self.index_builder.ensure_capacity(capacity);
156 self.value_builder.ensure_capacity(capacity);
157 self.nulls.ensure_capacity(capacity);
158 }
159
160 fn set_validity(&mut self, validity: Mask) {
161 self.nulls = LazyNullBufferBuilder::new(validity.len());
162 self.nulls.append_validity_mask(validity);
163 }
164
165 fn finish(&mut self) -> ArrayRef {
166 assert_eq!(
167 self.index_builder.len(),
168 self.nulls.len() + 1,
169 "Indices length must be one more than nulls length."
170 );
171
172 ListArray::try_new(
173 self.value_builder.finish(),
174 self.index_builder.finish(),
175 self.nulls.finish_with_nullability(self.nullability),
176 )
177 .vortex_expect("Buffer, offsets, and validity must have same length.")
178 .into_array()
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use std::sync::Arc;
185
186 use Nullability::{NonNullable, Nullable};
187 use vortex_buffer::buffer;
188 use vortex_dtype::PType::I32;
189 use vortex_dtype::{DType, Nullability};
190 use vortex_scalar::Scalar;
191
192 use crate::array::Array;
193 use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
194 use crate::builders::ArrayBuilder;
195 use crate::builders::list::ListBuilder;
196 use crate::validity::Validity;
197 use crate::vtable::ValidityHelper;
198 use crate::{IntoArray as _, ToCanonical};
199
200 #[test]
201 fn test_empty() {
202 let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
203
204 let list = builder.finish();
205 assert_eq!(list.len(), 0);
206 }
207
208 #[test]
209 fn test_values() {
210 let dtype: Arc<DType> = Arc::new(I32.into());
211 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
212
213 builder
214 .append_value(
215 Scalar::list(
216 dtype.clone(),
217 vec![1i32.into(), 2i32.into(), 3i32.into()],
218 NonNullable,
219 )
220 .as_list(),
221 )
222 .unwrap();
223
224 builder
225 .append_value(
226 Scalar::list(
227 dtype,
228 vec![4i32.into(), 5i32.into(), 6i32.into()],
229 NonNullable,
230 )
231 .as_list(),
232 )
233 .unwrap();
234
235 let list = builder.finish();
236 assert_eq!(list.len(), 2);
237
238 let list_array = list.to_list().unwrap();
239
240 assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
241 assert_eq!(list_array.elements_at(1).unwrap().len(), 3);
242 }
243
244 #[test]
245 fn test_non_null_fails() {
246 let dtype: Arc<DType> = Arc::new(I32.into());
247 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
248
249 assert!(
250 builder
251 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
252 .is_err()
253 )
254 }
255
256 #[test]
257 fn test_nullable_values() {
258 let dtype: Arc<DType> = Arc::new(I32.into());
259 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
260
261 builder
262 .append_value(
263 Scalar::list(
264 dtype.clone(),
265 vec![1i32.into(), 2i32.into(), 3i32.into()],
266 NonNullable,
267 )
268 .as_list(),
269 )
270 .unwrap();
271
272 builder
273 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
274 .unwrap();
275
276 builder
277 .append_value(
278 Scalar::list(
279 dtype,
280 vec![4i32.into(), 5i32.into(), 6i32.into()],
281 NonNullable,
282 )
283 .as_list(),
284 )
285 .unwrap();
286
287 let list = builder.finish();
288 assert_eq!(list.len(), 3);
289
290 let list_array = list.to_list().unwrap();
291
292 assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
293 assert_eq!(list_array.elements_at(1).unwrap().len(), 0);
294 assert_eq!(list_array.elements_at(2).unwrap().len(), 3);
295 }
296
297 fn test_extend_builder_gen<O: OffsetPType>() {
298 let list = ListArray::from_iter_opt_slow::<O, _, _>(
299 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
300 Arc::new(I32.into()),
301 )
302 .unwrap();
303
304 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
305
306 builder.extend_from_array(&list).unwrap();
307 builder.extend_from_array(&list).unwrap();
308
309 let expect = ListArray::from_iter_opt_slow::<O, _, _>(
310 [
311 Some(vec![0, 1, 2]),
312 None,
313 Some(vec![4, 5]),
314 Some(vec![0, 1, 2]),
315 None,
316 Some(vec![4, 5]),
317 ],
318 Arc::new(DType::Primitive(I32, NonNullable)),
319 )
320 .unwrap()
321 .to_list()
322 .unwrap();
323
324 let res = builder
325 .finish()
326 .to_canonical()
327 .unwrap()
328 .into_list()
329 .unwrap();
330
331 assert_eq!(
332 res.elements().to_primitive().unwrap().as_slice::<i32>(),
333 expect.elements().to_primitive().unwrap().as_slice::<i32>()
334 );
335
336 assert_eq!(
337 res.offsets().to_primitive().unwrap().as_slice::<O>(),
338 expect.offsets().to_primitive().unwrap().as_slice::<O>()
339 );
340
341 assert_eq!(res.validity(), expect.validity())
342 }
343
344 #[test]
345 fn test_extend_builder() {
346 test_extend_builder_gen::<i8>();
347 test_extend_builder_gen::<i16>();
348 test_extend_builder_gen::<i32>();
349 test_extend_builder_gen::<i64>();
350
351 test_extend_builder_gen::<u8>();
352 test_extend_builder_gen::<u16>();
353 test_extend_builder_gen::<u32>();
354 test_extend_builder_gen::<u64>();
355 }
356
357 #[test]
358 pub fn test_array_with_gap() {
359 let one_trailing_unused_element = ListArray::try_new(
360 buffer![1, 2, 3, 4].into_array(),
361 buffer![0, 3].into_array(),
362 Validity::NonNullable,
363 )
364 .unwrap();
365
366 let second_array = ListArray::try_new(
367 buffer![5, 6].into_array(),
368 buffer![0, 2].into_array(),
369 Validity::NonNullable,
370 )
371 .unwrap();
372
373 let chunked_list = ChunkedArray::try_new(
374 vec![
375 one_trailing_unused_element.clone().into_array(),
376 second_array.clone().into_array(),
377 ],
378 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
379 );
380
381 let canon_values = chunked_list.unwrap().to_list().unwrap();
382
383 assert_eq!(
384 one_trailing_unused_element.scalar_at(0).unwrap(),
385 canon_values.scalar_at(0).unwrap()
386 );
387 assert_eq!(
388 second_array.scalar_at(0).unwrap(),
389 canon_values.scalar_at(1).unwrap()
390 );
391 }
392}