1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::Nullability::NonNullable;
8use vortex_dtype::{DType, NativePType, Nullability};
9use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_ensure, vortex_panic};
10use vortex_mask::Mask;
11use vortex_scalar::{ListScalar, Scalar};
12
13use crate::arrays::ListArray;
14use crate::builders::{
15 ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder, PrimitiveBuilder,
16 builder_with_capacity,
17};
18use crate::canonical::{Canonical, ToCanonical};
19use crate::compute::{add_scalar, cast, sub_scalar};
20use crate::{Array, ArrayRef, IntoArray, OffsetPType};
21
22pub struct ListBuilder<O: NativePType> {
24 dtype: DType,
25 value_builder: Box<dyn ArrayBuilder>,
27 index_builder: PrimitiveBuilder<O>,
29 nulls: LazyNullBufferBuilder,
30}
31
32impl<O: OffsetPType> ListBuilder<O> {
33 pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
35 Self::with_capacity(value_dtype, nullability, DEFAULT_BUILDER_CAPACITY)
36 }
37
38 pub fn with_capacity(
46 value_dtype: Arc<DType>,
47 nullability: Nullability,
48 index_capacity: usize,
49 ) -> Self {
50 Self::with_values_and_index_capacity(
51 value_dtype,
52 nullability,
53 2 * index_capacity,
55 index_capacity,
56 )
57 }
58
59 pub fn with_values_and_index_capacity(
65 value_dtype: Arc<DType>,
66 nullability: Nullability,
67 values_capacity: usize,
68 index_capacity: usize,
69 ) -> Self {
70 let value_builder = builder_with_capacity(value_dtype.as_ref(), values_capacity);
71 let mut index_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, index_capacity);
72
73 index_builder.append_zero();
75
76 Self {
77 value_builder,
78 index_builder,
79 nulls: LazyNullBufferBuilder::new(index_capacity),
80 dtype: DType::List(value_dtype, nullability),
81 }
82 }
83
84 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
86 match value.elements() {
87 None => {
88 if self.dtype.nullability() == NonNullable {
89 vortex_bail!("Cannot append null value to non-nullable list");
90 }
91 self.append_null();
92 }
93 Some(elements) => {
94 for scalar in elements {
95 self.value_builder.append_scalar(&scalar)?;
98 }
99
100 self.nulls.append_non_null();
101 self.index_builder.append_value(
102 O::from_usize(self.value_builder.len())
103 .vortex_expect("Failed to convert from usize to O"),
104 );
105 }
106 }
107
108 Ok(())
109 }
110
111 pub fn finish_into_list(&mut self) -> ListArray {
113 assert_eq!(
114 self.index_builder.len(),
115 self.nulls.len() + 1,
116 "Indices length must be one more than nulls length."
117 );
118
119 ListArray::try_new(
120 self.value_builder.finish(),
121 self.index_builder.finish(),
122 self.nulls.finish_with_nullability(self.dtype.nullability()),
123 )
124 .vortex_expect("Buffer, offsets, and validity must have same length.")
125 }
126
127 pub fn element_dtype(&self) -> &DType {
130 let DType::List(element_dtype, _) = &self.dtype else {
131 vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
132 };
133
134 element_dtype
135 }
136}
137
138impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
139 fn as_any(&self) -> &dyn Any {
140 self
141 }
142
143 fn as_any_mut(&mut self) -> &mut dyn Any {
144 self
145 }
146
147 fn dtype(&self) -> &DType {
148 &self.dtype
149 }
150
151 fn len(&self) -> usize {
152 self.nulls.len()
153 }
154
155 fn append_zeros(&mut self, n: usize) {
156 let curr_len = self.value_builder.len();
157 for _ in 0..n {
158 self.index_builder.append_value(
159 O::from_usize(curr_len).vortex_expect("Failed to convert from usize to <O>"),
160 )
161 }
162 self.nulls.append_n_non_nulls(n);
163 }
164
165 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
166 let count = self.value_builder.len();
167 for _ in 0..n {
168 self.index_builder.append_value(
171 O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
172 )
173 }
174 self.nulls.append_n_nulls(n);
175 }
176
177 fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> {
178 vortex_ensure!(
179 scalar.dtype() == self.dtype(),
180 "ListBuilder expected scalar with dtype {:?}, got {:?}",
181 self.dtype(),
182 scalar.dtype()
183 );
184
185 let list_scalar = ListScalar::try_from(scalar)?;
186 self.append_value(list_scalar)
187 }
188
189 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
190 let list = array.to_list();
191 if list.is_empty() {
192 return;
193 }
194
195 let n_already_added_values = self.value_builder.len();
196 let Some(n_already_added_values) = O::from_usize(n_already_added_values) else {
197 vortex_panic!(
198 "cannot convert length {} to type {:?}",
199 n_already_added_values,
200 O::PTYPE
201 )
202 };
203
204 let offsets = list.offsets();
205 let elements = list.elements();
206
207 let index_dtype = self.index_builder.dtype();
208
209 let n_leading_junk_values_scalar = offsets
210 .scalar_at(0)
211 .cast(index_dtype)
212 .vortex_expect("Must cast to index dtype");
213 let n_leading_junk_values =
214 usize::try_from(&n_leading_junk_values_scalar).vortex_expect("Offset must be a usize");
215
216 let casted_offsets = cast(&offsets.slice(1..offsets.len()), index_dtype)
217 .vortex_expect("Offsets must be an index dtype");
218 let offsets_without_leading_junk =
219 sub_scalar(&casted_offsets, n_leading_junk_values_scalar)
220 .vortex_expect("Offsets must be able to subtract leading offset");
221 let offsets_into_builder =
222 add_scalar(&offsets_without_leading_junk, n_already_added_values.into())
223 .vortex_expect("Offsets must be able to add existing values offsets");
224
225 let last_offset = offsets.scalar_at(offsets.len() - 1);
226 let last_offset = usize::try_from(&last_offset).vortex_expect("Offset must be a usize");
227 let non_junk_values = elements.slice(n_leading_junk_values..last_offset);
228
229 self.nulls.append_validity_mask(array.validity_mask());
230 self.index_builder.extend_from_array(&offsets_into_builder);
231 self.value_builder.ensure_capacity(non_junk_values.len());
232 self.value_builder.extend_from_array(&non_junk_values);
233 }
234
235 fn ensure_capacity(&mut self, capacity: usize) {
236 self.value_builder.ensure_capacity(capacity);
237 self.index_builder.ensure_capacity(capacity);
238 self.nulls.ensure_capacity(capacity);
239 }
240
241 fn set_validity(&mut self, validity: Mask) {
242 self.nulls = LazyNullBufferBuilder::new(validity.len());
243 self.nulls.append_validity_mask(validity);
244 }
245
246 fn finish(&mut self) -> ArrayRef {
247 self.finish_into_list().into_array()
248 }
249
250 fn finish_into_canonical(&mut self) -> Canonical {
251 Canonical::List(self.finish_into_list())
252 }
253}
254
255#[cfg(test)]
256mod tests {
257 use std::sync::Arc;
258
259 use Nullability::{NonNullable, Nullable};
260 use vortex_buffer::buffer;
261 use vortex_dtype::PType::I32;
262 use vortex_dtype::{DType, Nullability};
263 use vortex_scalar::Scalar;
264
265 use crate::array::Array;
266 use crate::arrays::{ChunkedArray, ListArray};
267 use crate::builders::ArrayBuilder;
268 use crate::builders::list::ListBuilder;
269 use crate::validity::Validity;
270 use crate::vtable::ValidityHelper;
271 use crate::{IntoArray as _, OffsetPType, ToCanonical};
272
273 #[test]
274 fn test_empty() {
275 let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
276
277 let list = builder.finish();
278 assert_eq!(list.len(), 0);
279 }
280
281 #[test]
282 fn test_values() {
283 let dtype: Arc<DType> = Arc::new(I32.into());
284 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
285
286 builder
287 .append_value(
288 Scalar::list(
289 dtype.clone(),
290 vec![1i32.into(), 2i32.into(), 3i32.into()],
291 NonNullable,
292 )
293 .as_list(),
294 )
295 .unwrap();
296
297 builder
298 .append_value(
299 Scalar::list(
300 dtype,
301 vec![4i32.into(), 5i32.into(), 6i32.into()],
302 NonNullable,
303 )
304 .as_list(),
305 )
306 .unwrap();
307
308 let list = builder.finish();
309 assert_eq!(list.len(), 2);
310
311 let list_array = list.to_list();
312
313 assert_eq!(list_array.list_elements_at(0).len(), 3);
314 assert_eq!(list_array.list_elements_at(1).len(), 3);
315 }
316
317 #[test]
318 fn test_append_empty_list() {
319 let dtype: Arc<DType> = Arc::new(I32.into());
320 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
321
322 assert!(
323 builder
324 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
325 .is_ok()
326 )
327 }
328
329 #[test]
330 fn test_nullable_values() {
331 let dtype: Arc<DType> = Arc::new(I32.into());
332 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
333
334 builder
335 .append_value(
336 Scalar::list(
337 dtype.clone(),
338 vec![1i32.into(), 2i32.into(), 3i32.into()],
339 NonNullable,
340 )
341 .as_list(),
342 )
343 .unwrap();
344
345 builder
346 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
347 .unwrap();
348
349 builder
350 .append_value(
351 Scalar::list(
352 dtype,
353 vec![4i32.into(), 5i32.into(), 6i32.into()],
354 NonNullable,
355 )
356 .as_list(),
357 )
358 .unwrap();
359
360 let list = builder.finish();
361 assert_eq!(list.len(), 3);
362
363 let list_array = list.to_list();
364
365 assert_eq!(list_array.list_elements_at(0).len(), 3);
366 assert_eq!(list_array.list_elements_at(1).len(), 0);
367 assert_eq!(list_array.list_elements_at(2).len(), 3);
368 }
369
370 fn test_extend_builder_gen<O: OffsetPType>() {
371 let list = ListArray::from_iter_opt_slow::<O, _, _>(
372 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
373 Arc::new(I32.into()),
374 )
375 .unwrap();
376
377 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
378
379 builder.extend_from_array(&list);
380 builder.extend_from_array(&list);
381 builder.extend_from_array(&list.slice(0..0));
382 builder.extend_from_array(&list.slice(1..3));
383
384 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
385 [
386 Some(vec![0, 1, 2]),
387 None,
388 Some(vec![4, 5]),
389 Some(vec![0, 1, 2]),
390 None,
391 Some(vec![4, 5]),
392 None,
393 Some(vec![4, 5]),
394 ],
395 Arc::new(DType::Primitive(I32, NonNullable)),
396 )
397 .unwrap()
398 .to_list();
399
400 let actual = builder.finish_into_canonical().into_list();
401
402 assert_eq!(
403 actual.elements().to_primitive().as_slice::<i32>(),
404 expected.elements().to_primitive().as_slice::<i32>()
405 );
406
407 assert_eq!(
408 actual.offsets().to_primitive().as_slice::<O>(),
409 expected.offsets().to_primitive().as_slice::<O>()
410 );
411
412 assert_eq!(actual.validity(), expected.validity())
413 }
414
415 #[test]
416 fn test_extend_builder() {
417 test_extend_builder_gen::<i8>();
418 test_extend_builder_gen::<i16>();
419 test_extend_builder_gen::<i32>();
420 test_extend_builder_gen::<i64>();
421
422 test_extend_builder_gen::<u8>();
423 test_extend_builder_gen::<u16>();
424 test_extend_builder_gen::<u32>();
425 test_extend_builder_gen::<u64>();
426 }
427
428 #[test]
429 pub fn test_array_with_gap() {
430 let one_trailing_unused_element = ListArray::try_new(
431 buffer![1, 2, 3, 4].into_array(),
432 buffer![0, 3].into_array(),
433 Validity::NonNullable,
434 )
435 .unwrap();
436
437 let second_array = ListArray::try_new(
438 buffer![5, 6].into_array(),
439 buffer![0, 2].into_array(),
440 Validity::NonNullable,
441 )
442 .unwrap();
443
444 let chunked_list = ChunkedArray::try_new(
445 vec![
446 one_trailing_unused_element.clone().into_array(),
447 second_array.clone().into_array(),
448 ],
449 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
450 );
451
452 let canon_values = chunked_list.unwrap().to_list();
453
454 assert_eq!(
455 one_trailing_unused_element.scalar_at(0),
456 canon_values.scalar_at(0)
457 );
458 assert_eq!(second_array.scalar_at(0), canon_values.scalar_at(1));
459 }
460
461 #[test]
462 fn test_append_scalar() {
463 let dtype: Arc<DType> = Arc::new(I32.into());
464 let mut builder = ListBuilder::<u64>::with_capacity(dtype.clone(), Nullable, 10);
465
466 let list_scalar1 = Scalar::list(dtype.clone(), vec![1i32.into(), 2i32.into()], Nullable);
468 builder.append_scalar(&list_scalar1).unwrap();
469
470 let list_scalar2 = Scalar::list(
472 dtype.clone(),
473 vec![3i32.into(), 4i32.into(), 5i32.into()],
474 Nullable,
475 );
476 builder.append_scalar(&list_scalar2).unwrap();
477
478 let null_scalar = Scalar::null(DType::List(dtype.clone(), Nullable));
480 builder.append_scalar(&null_scalar).unwrap();
481
482 let array = builder.finish_into_list();
483 assert_eq!(array.len(), 3);
484
485 let scalar0 = array.scalar_at(0);
488 let list0 = scalar0.as_list();
489 assert_eq!(list0.len(), 2);
490 if let Some(list0_items) = list0.elements() {
491 assert_eq!(list0_items[0].as_primitive().typed_value::<i32>(), Some(1));
492 assert_eq!(list0_items[1].as_primitive().typed_value::<i32>(), Some(2));
493 }
494
495 let scalar1 = array.scalar_at(1);
496 let list1 = scalar1.as_list();
497 assert_eq!(list1.len(), 3);
498 if let Some(list1_items) = list1.elements() {
499 assert_eq!(list1_items[0].as_primitive().typed_value::<i32>(), Some(3));
500 assert_eq!(list1_items[1].as_primitive().typed_value::<i32>(), Some(4));
501 assert_eq!(list1_items[2].as_primitive().typed_value::<i32>(), Some(5));
502 }
503
504 let scalar2 = array.scalar_at(2);
505 let list2 = scalar2.as_list();
506 assert!(list2.is_null()); assert!(array.validity().is_valid(0));
510 assert!(array.validity().is_valid(1));
511 assert!(!array.validity().is_valid(2));
512
513 let mut builder = ListBuilder::<u64>::with_capacity(dtype, NonNullable, 10);
515 let wrong_scalar = Scalar::from(42i32);
516 assert!(builder.append_scalar(&wrong_scalar).is_err());
517 }
518}