1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::Nullability::NonNullable;
8use vortex_dtype::{DType, IntegerPType, Nullability, match_each_integer_ptype};
9use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_ensure, vortex_panic};
10use vortex_mask::Mask;
11use vortex_scalar::{ListScalar, Scalar};
12
13use crate::arrays::{ListArray, list_view_from_list};
14use crate::builders::{
15 ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder, PrimitiveBuilder,
16 builder_with_capacity,
17};
18use crate::canonical::{Canonical, ToCanonical};
19use crate::{Array, ArrayRef, IntoArray};
20
21pub struct ListBuilder<O: IntegerPType> {
24 dtype: DType,
26
27 elements_builder: Box<dyn ArrayBuilder>,
29
30 offsets_builder: PrimitiveBuilder<O>,
32
33 nulls: LazyNullBufferBuilder,
35}
36
37impl<O: IntegerPType> ListBuilder<O> {
38 pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
40 Self::with_capacity(
41 value_dtype,
42 nullability,
43 DEFAULT_BUILDER_CAPACITY * 2,
46 DEFAULT_BUILDER_CAPACITY,
47 )
48 }
49
50 pub fn with_capacity(
58 value_dtype: Arc<DType>,
59 nullability: Nullability,
60 elements_capacity: usize,
61 capacity: usize,
62 ) -> Self {
63 let elements_builder = builder_with_capacity(value_dtype.as_ref(), elements_capacity);
64 let mut offsets_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, capacity + 1);
65
66 offsets_builder.append_zero();
68
69 Self {
70 elements_builder,
71 offsets_builder,
72 nulls: LazyNullBufferBuilder::new(capacity),
73 dtype: DType::List(value_dtype, nullability),
74 }
75 }
76
77 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
79 match value.elements() {
80 None => {
81 if self.dtype.nullability() == NonNullable {
82 vortex_bail!("Cannot append null value to non-nullable list");
83 }
84 self.append_null();
85 }
86 Some(elements) => {
87 for scalar in elements {
88 self.elements_builder.append_scalar(&scalar)?;
91 }
92
93 self.nulls.append_non_null();
94 self.offsets_builder.append_value(
95 O::from_usize(self.elements_builder.len())
96 .vortex_expect("Failed to convert from usize to O"),
97 );
98 }
99 }
100
101 Ok(())
102 }
103
104 pub fn finish_into_list(&mut self) -> ListArray {
106 assert_eq!(
107 self.offsets_builder.len(),
108 self.nulls.len() + 1,
109 "offsets length must be one more than nulls length."
110 );
111
112 ListArray::try_new(
113 self.elements_builder.finish(),
114 self.offsets_builder.finish(),
115 self.nulls.finish_with_nullability(self.dtype.nullability()),
116 )
117 .vortex_expect("Buffer, offsets, and validity must have same length.")
118 }
119
120 pub fn element_dtype(&self) -> &DType {
123 let DType::List(element_dtype, _) = &self.dtype else {
124 vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
125 };
126
127 element_dtype
128 }
129}
130
131impl<O: IntegerPType> ArrayBuilder for ListBuilder<O> {
132 fn as_any(&self) -> &dyn Any {
133 self
134 }
135
136 fn as_any_mut(&mut self) -> &mut dyn Any {
137 self
138 }
139
140 fn dtype(&self) -> &DType {
141 &self.dtype
142 }
143
144 fn len(&self) -> usize {
145 self.nulls.len()
146 }
147
148 fn append_zeros(&mut self, n: usize) {
149 let curr_len = self.elements_builder.len();
150 for _ in 0..n {
151 self.offsets_builder.append_value(
152 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
153 )
154 }
155 self.nulls.append_n_non_nulls(n);
156 }
157
158 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
159 let curr_len = self.elements_builder.len();
160 for _ in 0..n {
161 self.offsets_builder.append_value(
164 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
165 )
166 }
167 self.nulls.append_n_nulls(n);
168 }
169
170 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
171 vortex_ensure!(
172 scalar.dtype() == self.dtype(),
173 "ListBuilder expected scalar with dtype {:?}, got {:?}",
174 self.dtype(),
175 scalar.dtype()
176 );
177
178 let list_scalar = ListScalar::try_from(scalar)?;
179 self.append_value(list_scalar)
180 }
181
182 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
183 let list = array.to_listview();
184 if list.is_empty() {
185 return;
186 }
187
188 self.nulls.append_validity_mask(array.validity_mask());
190
191 let elements = list.elements();
193 let offsets = list.offsets().to_primitive();
194 let sizes = list.sizes().to_primitive();
195
196 fn extend_inner<O, OffsetType, SizeType>(
197 builder: &mut ListBuilder<O>,
198 new_elements: &ArrayRef,
199 new_offsets: &[OffsetType],
200 new_sizes: &[SizeType],
201 ) where
202 O: IntegerPType,
203 OffsetType: IntegerPType,
204 SizeType: IntegerPType,
205 {
206 let num_lists = new_offsets.len();
207 debug_assert_eq!(num_lists, new_sizes.len());
208
209 let mut curr_offset = builder.elements_builder.len();
210 let mut offsets_range = builder.offsets_builder.uninit_range(num_lists);
211
212 for i in 0..new_offsets.len() {
215 let offset: usize = new_offsets[i].as_();
216 let size: usize = new_sizes[i].as_();
217
218 if size > 0 {
219 let list_elements = new_elements.slice(offset..offset + size);
220 builder.elements_builder.extend_from_array(&list_elements);
221 curr_offset += size;
222 }
223
224 let new_offset =
225 O::from_usize(curr_offset).vortex_expect("Failed to convert offset");
226
227 offsets_range.set_value(i, new_offset);
228 }
229
230 unsafe { offsets_range.finish() };
233 }
234
235 match_each_integer_ptype!(offsets.ptype(), |OffsetType| {
236 match_each_integer_ptype!(sizes.ptype(), |SizeType| {
237 extend_inner(
238 self,
239 elements,
240 offsets.as_slice::<OffsetType>(),
241 sizes.as_slice::<SizeType>(),
242 )
243 })
244 })
245 }
246
247 fn reserve_exact(&mut self, additional: usize) {
248 self.elements_builder.reserve_exact(additional);
249 self.offsets_builder.reserve_exact(additional);
250 self.nulls.reserve_exact(additional);
251 }
252
253 unsafe fn set_validity_unchecked(&mut self, validity: Mask) {
254 self.nulls = LazyNullBufferBuilder::new(validity.len());
255 self.nulls.append_validity_mask(validity);
256 }
257
258 fn finish(&mut self) -> ArrayRef {
259 self.finish_into_list().into_array()
260 }
261
262 fn finish_into_canonical(&mut self) -> Canonical {
263 Canonical::List(list_view_from_list(self.finish_into_list()))
264 }
265}
266
267#[cfg(test)]
268mod tests {
269 use std::sync::Arc;
270
271 use Nullability::{NonNullable, Nullable};
272 use vortex_buffer::buffer;
273 use vortex_dtype::PType::I32;
274 use vortex_dtype::{DType, IntegerPType, Nullability};
275 use vortex_scalar::Scalar;
276
277 use crate::array::Array;
278 use crate::arrays::{ChunkedArray, ListArray};
279 use crate::builders::ArrayBuilder;
280 use crate::builders::list::ListBuilder;
281 use crate::validity::Validity;
282 use crate::vtable::ValidityHelper;
283 use crate::{IntoArray, ToCanonical};
284
285 #[test]
286 fn test_empty() {
287 let mut builder =
288 ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0, 0);
289
290 let list = builder.finish();
291 assert_eq!(list.len(), 0);
292 }
293
294 #[test]
295 fn test_values() {
296 let dtype: Arc<DType> = Arc::new(I32.into());
297 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
298
299 builder
300 .append_value(
301 Scalar::list(
302 dtype.clone(),
303 vec![1i32.into(), 2i32.into(), 3i32.into()],
304 NonNullable,
305 )
306 .as_list(),
307 )
308 .unwrap();
309
310 builder
311 .append_value(
312 Scalar::list(
313 dtype,
314 vec![4i32.into(), 5i32.into(), 6i32.into()],
315 NonNullable,
316 )
317 .as_list(),
318 )
319 .unwrap();
320
321 let list = builder.finish();
322 assert_eq!(list.len(), 2);
323
324 let list_array = list.to_listview();
325
326 assert_eq!(list_array.list_elements_at(0).len(), 3);
327 assert_eq!(list_array.list_elements_at(1).len(), 3);
328 }
329
330 #[test]
331 fn test_append_empty_list() {
332 let dtype: Arc<DType> = Arc::new(I32.into());
333 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0, 0);
334
335 assert!(
336 builder
337 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
338 .is_ok()
339 )
340 }
341
342 #[test]
343 fn test_nullable_values() {
344 let dtype: Arc<DType> = Arc::new(I32.into());
345 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0, 0);
346
347 builder
348 .append_value(
349 Scalar::list(
350 dtype.clone(),
351 vec![1i32.into(), 2i32.into(), 3i32.into()],
352 NonNullable,
353 )
354 .as_list(),
355 )
356 .unwrap();
357
358 builder
359 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
360 .unwrap();
361
362 builder
363 .append_value(
364 Scalar::list(
365 dtype,
366 vec![4i32.into(), 5i32.into(), 6i32.into()],
367 NonNullable,
368 )
369 .as_list(),
370 )
371 .unwrap();
372
373 let list = builder.finish();
374 assert_eq!(list.len(), 3);
375
376 let list_array = list.to_listview();
377
378 assert_eq!(list_array.list_elements_at(0).len(), 3);
379 assert_eq!(list_array.list_elements_at(1).len(), 0);
380 assert_eq!(list_array.list_elements_at(2).len(), 3);
381 }
382
383 fn test_extend_builder_gen<O: IntegerPType>() {
384 let list = ListArray::from_iter_opt_slow::<O, _, _>(
385 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
386 Arc::new(I32.into()),
387 )
388 .unwrap();
389 assert_eq!(list.len(), 3);
390
391 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 18, 9);
392
393 builder.extend_from_array(&list);
394 builder.extend_from_array(&list);
395 builder.extend_from_array(&list.slice(0..0));
396 builder.extend_from_array(&list.slice(1..3));
397
398 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
399 [
400 Some(vec![0, 1, 2]),
401 None,
402 Some(vec![4, 5]),
403 Some(vec![0, 1, 2]),
404 None,
405 Some(vec![4, 5]),
406 None,
407 Some(vec![4, 5]),
408 ],
409 Arc::new(DType::Primitive(I32, NonNullable)),
410 )
411 .unwrap()
412 .to_listview();
413
414 let actual = builder.finish_into_canonical().into_listview();
415
416 assert_eq!(
417 actual.elements().to_primitive().as_slice::<i32>(),
418 expected.elements().to_primitive().as_slice::<i32>()
419 );
420
421 assert_eq!(
422 actual.offsets().to_primitive().as_slice::<O>(),
423 expected.offsets().to_primitive().as_slice::<O>()
424 );
425
426 assert_eq!(actual.validity(), expected.validity())
427 }
428
429 #[test]
430 fn test_extend_builder() {
431 test_extend_builder_gen::<i8>();
432 test_extend_builder_gen::<i16>();
433 test_extend_builder_gen::<i32>();
434 test_extend_builder_gen::<i64>();
435
436 test_extend_builder_gen::<u8>();
437 test_extend_builder_gen::<u16>();
438 test_extend_builder_gen::<u32>();
439 test_extend_builder_gen::<u64>();
440 }
441
442 #[test]
443 pub fn test_array_with_gap() {
444 let one_trailing_unused_element = ListArray::try_new(
445 buffer![1, 2, 3, 4].into_array(),
446 buffer![0, 3].into_array(),
447 Validity::NonNullable,
448 )
449 .unwrap();
450
451 let second_array = ListArray::try_new(
452 buffer![5, 6].into_array(),
453 buffer![0, 2].into_array(),
454 Validity::NonNullable,
455 )
456 .unwrap();
457
458 let chunked_list = ChunkedArray::try_new(
459 vec![
460 one_trailing_unused_element.clone().into_array(),
461 second_array.clone().into_array(),
462 ],
463 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
464 );
465
466 let canon_values = chunked_list.unwrap().to_listview();
467
468 assert_eq!(
469 one_trailing_unused_element.scalar_at(0),
470 canon_values.scalar_at(0)
471 );
472 assert_eq!(second_array.scalar_at(0), canon_values.scalar_at(1));
473 }
474
475 #[test]
476 fn test_append_scalar() {
477 let dtype: Arc<DType> = Arc::new(I32.into());
478 let mut builder = ListBuilder::<u64>::with_capacity(dtype.clone(), Nullable, 20, 10);
479
480 let list_scalar1 = Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable);
482 builder.append_scalar(&list_scalar1).unwrap();
483
484 let list_scalar2 = Scalar::list(
486 dtype.clone(),
487 vec![3i32.into(), 4i32.into(), 5i32.into()],
488 Nullable,
489 );
490 builder.append_scalar(&list_scalar2).unwrap();
491
492 let null_scalar = Scalar::null(DType::List(dtype.clone(), Nullable));
494 builder.append_scalar(&null_scalar).unwrap();
495
496 let array = builder.finish_into_list();
497 assert_eq!(array.len(), 3);
498
499 let scalar0 = array.scalar_at(0);
502 let list0 = scalar0.as_list();
503 assert_eq!(list0.len(), 2);
504 if let Some(list0_items) = list0.elements() {
505 assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
506 assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
507 }
508
509 let scalar1 = array.scalar_at(1);
510 let list1 = scalar1.as_list();
511 assert_eq!(list1.len(), 3);
512 if let Some(list1_items) = list1.elements() {
513 assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
514 assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
515 assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
516 }
517
518 let scalar2 = array.scalar_at(2);
519 let list2 = scalar2.as_list();
520 assert!(list2.is_null()); assert!(array.validity().is_valid(0));
524 assert!(array.validity().is_valid(1));
525 assert!(!array.validity().is_valid(2));
526
527 let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 20, 10);
529 let wrong_scalar = Scalar::from(42i32);
530 assert!(builder.append_scalar(&wrong_scalar).is_err());
531 }
532}