1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::Nullability::NonNullable;
8use vortex_dtype::{DType, NativePType, Nullability};
9use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
10use vortex_mask::Mask;
11use vortex_scalar::ListScalar;
12
13use crate::arrays::{ListArray, OffsetPType};
14use crate::builders::{
15 ArrayBuilder, DEFAULT_BUILDER_CAPACITY, LazyNullBufferBuilder, PrimitiveBuilder,
16 builder_with_capacity,
17};
18use crate::canonical::{Canonical, ToCanonical};
19use crate::compute::{add_scalar, cast, sub_scalar};
20use crate::{Array, ArrayRef, IntoArray};
21
22pub struct ListBuilder<O: NativePType> {
24 dtype: DType,
25 value_builder: Box<dyn ArrayBuilder>,
27 index_builder: PrimitiveBuilder<O>,
29 nulls: LazyNullBufferBuilder,
30}
31
32impl<O: OffsetPType> ListBuilder<O> {
33 pub fn new(value_dtype: Arc<DType>, nullability: Nullability) -> Self {
35 Self::with_capacity(value_dtype, nullability, DEFAULT_BUILDER_CAPACITY)
36 }
37
38 pub fn with_capacity(
46 value_dtype: Arc<DType>,
47 nullability: Nullability,
48 index_capacity: usize,
49 ) -> Self {
50 Self::with_values_and_index_capacity(
51 value_dtype,
52 nullability,
53 2 * index_capacity,
55 index_capacity,
56 )
57 }
58
59 pub fn with_values_and_index_capacity(
65 value_dtype: Arc<DType>,
66 nullability: Nullability,
67 values_capacity: usize,
68 index_capacity: usize,
69 ) -> Self {
70 let value_builder = builder_with_capacity(value_dtype.as_ref(), values_capacity);
71 let mut index_builder = PrimitiveBuilder::<O>::with_capacity(NonNullable, index_capacity);
72
73 index_builder.append_zero();
75
76 Self {
77 value_builder,
78 index_builder,
79 nulls: LazyNullBufferBuilder::new(index_capacity),
80 dtype: DType::List(value_dtype, nullability),
81 }
82 }
83
84 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
86 match value.elements() {
87 None => {
88 if self.dtype.nullability() == NonNullable {
89 vortex_bail!("Cannot append null value to non-nullable list");
90 }
91 self.append_null();
92 }
93 Some(elements) => {
94 for scalar in elements {
95 self.value_builder.append_scalar(&scalar)?;
98 }
99
100 self.nulls.append_non_null();
101 self.index_builder.append_value(
102 O::from_usize(self.value_builder.len())
103 .vortex_expect("Failed to convert from usize to O"),
104 );
105 }
106 }
107
108 Ok(())
109 }
110
111 pub fn append_option(&mut self, value: Option<ListScalar>) -> VortexResult<()> {
119 match value {
120 Some(value) => self.append_value(value),
121 None => {
122 self.append_null();
123 Ok(())
124 }
125 }
126 }
127
128 pub fn finish_into_list(&mut self) -> ListArray {
130 assert_eq!(
131 self.index_builder.len(),
132 self.nulls.len() + 1,
133 "Indices length must be one more than nulls length."
134 );
135
136 ListArray::try_new(
138 self.value_builder.finish(),
139 self.index_builder.finish(),
140 self.nulls.finish_with_nullability(self.dtype.nullability()),
141 )
142 .vortex_expect("Buffer, offsets, and validity must have same length.")
143 }
144
145 pub fn element_dtype(&self) -> &DType {
148 let DType::List(element_dtype, _) = &self.dtype else {
149 vortex_panic!("`ListBuilder` has an incorrect dtype: {}", self.dtype);
150 };
151
152 element_dtype
153 }
154}
155
156impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
157 fn as_any(&self) -> &dyn Any {
158 self
159 }
160
161 fn as_any_mut(&mut self) -> &mut dyn Any {
162 self
163 }
164
165 fn dtype(&self) -> &DType {
166 &self.dtype
167 }
168
169 fn len(&self) -> usize {
170 self.nulls.len()
171 }
172
173 fn append_zeros(&mut self, n: usize) {
174 let count = self.value_builder.len();
175 self.value_builder.append_zeros(n);
177 for i in 0..n {
178 self.index_builder.append_value(
179 O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
180 )
181 }
182 self.nulls.append_n_non_nulls(n);
183 }
184
185 unsafe fn append_nulls_unchecked(&mut self, n: usize) {
186 let count = self.value_builder.len();
187 for _ in 0..n {
188 self.index_builder.append_value(
191 O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
192 )
193 }
194 self.nulls.append_n_nulls(n);
195 }
196
197 unsafe fn extend_from_array_unchecked(&mut self, array: &dyn Array) {
198 let list = array.to_list();
199 if list.is_empty() {
200 return;
201 }
202
203 let n_already_added_values = self.value_builder.len();
204 let Some(n_already_added_values) = O::from_usize(n_already_added_values) else {
205 vortex_panic!(
206 "cannot convert length {} to type {:?}",
207 n_already_added_values,
208 O::PTYPE
209 )
210 };
211
212 let offsets = list.offsets();
213 let elements = list.elements();
214
215 let index_dtype = self.index_builder.dtype();
216
217 let n_leading_junk_values_scalar = offsets
218 .scalar_at(0)
219 .cast(index_dtype)
220 .vortex_expect("Must cast to index dtype");
221 let n_leading_junk_values =
222 usize::try_from(&n_leading_junk_values_scalar).vortex_expect("Offset must be a usize");
223
224 let casted_offsets = cast(&offsets.slice(1..offsets.len()), index_dtype)
225 .vortex_expect("Offsets must be an index dtype");
226 let offsets_without_leading_junk =
227 sub_scalar(&casted_offsets, n_leading_junk_values_scalar)
228 .vortex_expect("Offsets must be able to subtract leading offset");
229 let offsets_into_builder =
230 add_scalar(&offsets_without_leading_junk, n_already_added_values.into())
231 .vortex_expect("Offsets must be able to add existing values offsets");
232
233 let last_offset = offsets.scalar_at(offsets.len() - 1);
234 let last_offset = usize::try_from(&last_offset).vortex_expect("Offset must be a usize");
235 let non_junk_values = elements.slice(n_leading_junk_values..last_offset);
236
237 self.nulls.append_validity_mask(array.validity_mask());
238 self.index_builder.extend_from_array(&offsets_into_builder);
239 self.value_builder.ensure_capacity(non_junk_values.len());
240 self.value_builder.extend_from_array(&non_junk_values);
241 }
242
243 fn ensure_capacity(&mut self, capacity: usize) {
244 self.value_builder.ensure_capacity(capacity);
245 self.index_builder.ensure_capacity(capacity);
246 self.nulls.ensure_capacity(capacity);
247 }
248
249 fn set_validity(&mut self, validity: Mask) {
250 self.nulls = LazyNullBufferBuilder::new(validity.len());
251 self.nulls.append_validity_mask(validity);
252 }
253
254 fn finish(&mut self) -> ArrayRef {
255 self.finish_into_list().into_array()
256 }
257
258 fn finish_into_canonical(&mut self) -> Canonical {
259 Canonical::List(self.finish_into_list())
260 }
261}
262
263#[cfg(test)]
264mod tests {
265 use std::sync::Arc;
266
267 use Nullability::{NonNullable, Nullable};
268 use vortex_buffer::buffer;
269 use vortex_dtype::PType::I32;
270 use vortex_dtype::{DType, Nullability};
271 use vortex_scalar::Scalar;
272
273 use crate::array::Array;
274 use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
275 use crate::builders::ArrayBuilder;
276 use crate::builders::list::ListBuilder;
277 use crate::validity::Validity;
278 use crate::vtable::ValidityHelper;
279 use crate::{IntoArray as _, ToCanonical};
280
281 #[test]
282 fn test_empty() {
283 let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
284
285 let list = builder.finish();
286 assert_eq!(list.len(), 0);
287 }
288
289 #[test]
290 fn test_values() {
291 let dtype: Arc<DType> = Arc::new(I32.into());
292 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
293
294 builder
295 .append_value(
296 Scalar::list(
297 dtype.clone(),
298 vec![1i32.into(), 2i32.into(), 3i32.into()],
299 NonNullable,
300 )
301 .as_list(),
302 )
303 .unwrap();
304
305 builder
306 .append_value(
307 Scalar::list(
308 dtype,
309 vec![4i32.into(), 5i32.into(), 6i32.into()],
310 NonNullable,
311 )
312 .as_list(),
313 )
314 .unwrap();
315
316 let list = builder.finish();
317 assert_eq!(list.len(), 2);
318
319 let list_array = list.to_list();
320
321 assert_eq!(list_array.elements_at(0).len(), 3);
322 assert_eq!(list_array.elements_at(1).len(), 3);
323 }
324
325 #[test]
326 fn test_append_empty_list() {
327 let dtype: Arc<DType> = Arc::new(I32.into());
328 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
329
330 assert!(
331 builder
332 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
333 .is_ok()
334 )
335 }
336
337 #[test]
338 fn test_nullable_values() {
339 let dtype: Arc<DType> = Arc::new(I32.into());
340 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
341
342 builder
343 .append_value(
344 Scalar::list(
345 dtype.clone(),
346 vec![1i32.into(), 2i32.into(), 3i32.into()],
347 NonNullable,
348 )
349 .as_list(),
350 )
351 .unwrap();
352
353 builder
354 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
355 .unwrap();
356
357 builder
358 .append_value(
359 Scalar::list(
360 dtype,
361 vec![4i32.into(), 5i32.into(), 6i32.into()],
362 NonNullable,
363 )
364 .as_list(),
365 )
366 .unwrap();
367
368 let list = builder.finish();
369 assert_eq!(list.len(), 3);
370
371 let list_array = list.to_list();
372
373 assert_eq!(list_array.elements_at(0).len(), 3);
374 assert_eq!(list_array.elements_at(1).len(), 0);
375 assert_eq!(list_array.elements_at(2).len(), 3);
376 }
377
378 fn test_extend_builder_gen<O: OffsetPType>() {
379 let list = ListArray::from_iter_opt_slow::<O, _, _>(
380 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
381 Arc::new(I32.into()),
382 )
383 .unwrap();
384
385 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
386
387 builder.extend_from_array(&list);
388 builder.extend_from_array(&list);
389 builder.extend_from_array(&list.slice(0..0));
390 builder.extend_from_array(&list.slice(1..3));
391
392 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
393 [
394 Some(vec![0, 1, 2]),
395 None,
396 Some(vec![4, 5]),
397 Some(vec![0, 1, 2]),
398 None,
399 Some(vec![4, 5]),
400 None,
401 Some(vec![4, 5]),
402 ],
403 Arc::new(DType::Primitive(I32, NonNullable)),
404 )
405 .unwrap()
406 .to_list();
407
408 let actual = builder.finish_into_canonical().into_list();
409
410 assert_eq!(
411 actual.elements().to_primitive().as_slice::<i32>(),
412 expected.elements().to_primitive().as_slice::<i32>()
413 );
414
415 assert_eq!(
416 actual.offsets().to_primitive().as_slice::<O>(),
417 expected.offsets().to_primitive().as_slice::<O>()
418 );
419
420 assert_eq!(actual.validity(), expected.validity())
421 }
422
423 #[test]
424 fn test_extend_builder() {
425 test_extend_builder_gen::<i8>();
426 test_extend_builder_gen::<i16>();
427 test_extend_builder_gen::<i32>();
428 test_extend_builder_gen::<i64>();
429
430 test_extend_builder_gen::<u8>();
431 test_extend_builder_gen::<u16>();
432 test_extend_builder_gen::<u32>();
433 test_extend_builder_gen::<u64>();
434 }
435
436 #[test]
437 pub fn test_array_with_gap() {
438 let one_trailing_unused_element = ListArray::try_new(
439 buffer![1, 2, 3, 4].into_array(),
440 buffer![0, 3].into_array(),
441 Validity::NonNullable,
442 )
443 .unwrap();
444
445 let second_array = ListArray::try_new(
446 buffer![5, 6].into_array(),
447 buffer![0, 2].into_array(),
448 Validity::NonNullable,
449 )
450 .unwrap();
451
452 let chunked_list = ChunkedArray::try_new(
453 vec![
454 one_trailing_unused_element.clone().into_array(),
455 second_array.clone().into_array(),
456 ],
457 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
458 );
459
460 let canon_values = chunked_list.unwrap().to_list();
461
462 assert_eq!(
463 one_trailing_unused_element.scalar_at(0),
464 canon_values.scalar_at(0)
465 );
466 assert_eq!(second_array.scalar_at(0), canon_values.scalar_at(1));
467 }
468}