1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::Nullability::NonNullable;
8use vortex_dtype::{DType, NativePType, Nullability};
9use vortex_error::{VortexExpect, VortexResult, vortex_bail};
10use vortex_mask::Mask;
11use vortex_scalar::ListScalar;
12
13use crate::arrays::{ListArray, OffsetPType};
14use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
15use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
16use crate::compute::{add_scalar, cast, sub_scalar};
17use crate::{Array, ArrayRef, IntoArray, ToCanonical};
18
19pub struct ListBuilder<O: NativePType> {
20 value_builder: Box<dyn ArrayBuilder>,
22 index_builder: PrimitiveBuilder<O>,
24 nulls: LazyNullBufferBuilder,
25 nullability: Nullability,
27 dtype: DType,
28}
29
30impl<O: OffsetPType> ListBuilder<O> {
31 pub fn with_capacity(
39 value_dtype: Arc<DType>,
40 nullability: Nullability,
41 index_capacity: usize,
42 ) -> Self {
43 Self::with_values_and_index_capacity(
45 value_dtype,
46 nullability,
47 2 * index_capacity,
48 index_capacity,
49 )
50 }
51
52 pub fn with_values_and_index_capacity(
58 value_dtype: Arc<DType>,
59 nullability: Nullability,
60 values_capacity: usize,
61 index_capacity: usize,
62 ) -> Self {
63 let value_builder = builder_with_capacity(value_dtype.as_ref(), values_capacity);
64 let mut index_builder = PrimitiveBuilder::with_capacity(NonNullable, index_capacity);
65
66 index_builder.append_zero();
68
69 Self {
70 value_builder,
71 index_builder,
72 nulls: LazyNullBufferBuilder::new(index_capacity),
73 nullability,
74 dtype: DType::List(value_dtype, nullability),
75 }
76 }
77
78 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
79 match value.elements() {
80 None => {
81 if self.nullability == NonNullable {
82 vortex_bail!("Cannot append null value to non-nullable list");
83 }
84 self.append_null();
85 Ok(())
86 }
87 Some(elements) => {
88 for scalar in elements {
89 self.value_builder.append_scalar(&scalar)?;
92 }
93 self.nulls.append_non_null();
94 self.append_index(
95 O::from_usize(self.value_builder.len())
96 .vortex_expect("Failed to convert from usize to O"),
97 )
98 }
99 }
100 }
101
102 fn append_index(&mut self, index: O) -> VortexResult<()> {
103 self.index_builder.append_scalar(&index.into())
104 }
105}
106
107impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
108 fn as_any(&self) -> &dyn Any {
109 self
110 }
111
112 fn as_any_mut(&mut self) -> &mut dyn Any {
113 self
114 }
115
116 fn dtype(&self) -> &DType {
117 &self.dtype
118 }
119
120 fn len(&self) -> usize {
121 self.nulls.len()
122 }
123
124 fn append_zeros(&mut self, n: usize) {
125 let count = self.value_builder.len();
126 self.value_builder.append_zeros(n);
128 for i in 0..n {
129 self.append_index(
130 O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
131 )
132 .vortex_expect("Failed to append index");
133 }
134 self.nulls.append_n_non_nulls(n);
135 }
136
137 fn append_nulls(&mut self, n: usize) {
138 let count = self.value_builder.len();
139 for _ in 0..n {
140 self.append_index(
143 O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
144 )
145 .vortex_expect("Failed to append index");
146 }
147 self.nulls.append_n_nulls(n);
148 }
149
150 fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
151 let list = array.to_list()?;
152 if list.is_empty() {
153 return Ok(());
154 }
155
156 let n_already_added_values = self.value_builder.len();
157 let Some(n_already_added_values) = O::from_usize(n_already_added_values) else {
158 vortex_bail!(
159 "cannot convert length {} to type {:?}",
160 n_already_added_values,
161 O::PTYPE
162 )
163 };
164
165 let offsets = list.offsets();
166 let elements = list.elements();
167
168 let index_dtype = self.index_builder.dtype();
169
170 let n_leading_junk_values_scalar = offsets.scalar_at(0).cast(index_dtype)?;
171 let n_leading_junk_values = usize::try_from(&n_leading_junk_values_scalar)?;
172
173 let casted_offsets = cast(&offsets.slice(1..offsets.len()), index_dtype)?;
174 let offsets_without_leading_junk =
175 sub_scalar(&casted_offsets, n_leading_junk_values_scalar)?;
176 let offsets_into_builder =
177 add_scalar(&offsets_without_leading_junk, n_already_added_values.into())?;
178
179 let last_offset = offsets.scalar_at(offsets.len() - 1);
180 let last_offset = usize::try_from(&last_offset)?;
181 let non_junk_values = elements.slice(n_leading_junk_values..last_offset);
182
183 self.nulls.append_validity_mask(array.validity_mask());
184 self.index_builder
185 .extend_from_array(&offsets_into_builder)?;
186 self.value_builder.ensure_capacity(non_junk_values.len());
187 self.value_builder.extend_from_array(&non_junk_values)?;
188
189 Ok(())
190 }
191
192 fn ensure_capacity(&mut self, capacity: usize) {
193 self.index_builder.ensure_capacity(capacity);
194 self.value_builder.ensure_capacity(capacity);
195 self.nulls.ensure_capacity(capacity);
196 }
197
198 fn set_validity(&mut self, validity: Mask) {
199 self.nulls = LazyNullBufferBuilder::new(validity.len());
200 self.nulls.append_validity_mask(validity);
201 }
202
203 fn finish(&mut self) -> ArrayRef {
204 assert_eq!(
205 self.index_builder.len(),
206 self.nulls.len() + 1,
207 "Indices length must be one more than nulls length."
208 );
209
210 ListArray::try_new(
212 self.value_builder.finish(),
213 self.index_builder.finish(),
214 self.nulls.finish_with_nullability(self.nullability),
215 )
216 .vortex_expect("Buffer, offsets, and validity must have same length.")
217 .into_array()
218 }
219}
220
221#[cfg(test)]
222mod tests {
223 use std::sync::Arc;
224
225 use Nullability::{NonNullable, Nullable};
226 use vortex_buffer::buffer;
227 use vortex_dtype::PType::I32;
228 use vortex_dtype::{DType, Nullability};
229 use vortex_scalar::Scalar;
230
231 use crate::array::Array;
232 use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
233 use crate::builders::ArrayBuilder;
234 use crate::builders::list::ListBuilder;
235 use crate::validity::Validity;
236 use crate::vtable::ValidityHelper;
237 use crate::{IntoArray as _, ToCanonical};
238
239 #[test]
240 fn test_empty() {
241 let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
242
243 let list = builder.finish();
244 assert_eq!(list.len(), 0);
245 }
246
247 #[test]
248 fn test_values() {
249 let dtype: Arc<DType> = Arc::new(I32.into());
250 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
251
252 builder
253 .append_value(
254 Scalar::list(
255 dtype.clone(),
256 vec![1i32.into(), 2i32.into(), 3i32.into()],
257 NonNullable,
258 )
259 .as_list(),
260 )
261 .unwrap();
262
263 builder
264 .append_value(
265 Scalar::list(
266 dtype,
267 vec![4i32.into(), 5i32.into(), 6i32.into()],
268 NonNullable,
269 )
270 .as_list(),
271 )
272 .unwrap();
273
274 let list = builder.finish();
275 assert_eq!(list.len(), 2);
276
277 let list_array = list.to_list().unwrap();
278
279 assert_eq!(list_array.elements_at(0).len(), 3);
280 assert_eq!(list_array.elements_at(1).len(), 3);
281 }
282
283 #[test]
284 fn test_append_empty_list() {
285 let dtype: Arc<DType> = Arc::new(I32.into());
286 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
287
288 assert!(
289 builder
290 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
291 .is_ok()
292 )
293 }
294
295 #[test]
296 fn test_nullable_values() {
297 let dtype: Arc<DType> = Arc::new(I32.into());
298 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
299
300 builder
301 .append_value(
302 Scalar::list(
303 dtype.clone(),
304 vec![1i32.into(), 2i32.into(), 3i32.into()],
305 NonNullable,
306 )
307 .as_list(),
308 )
309 .unwrap();
310
311 builder
312 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
313 .unwrap();
314
315 builder
316 .append_value(
317 Scalar::list(
318 dtype,
319 vec![4i32.into(), 5i32.into(), 6i32.into()],
320 NonNullable,
321 )
322 .as_list(),
323 )
324 .unwrap();
325
326 let list = builder.finish();
327 assert_eq!(list.len(), 3);
328
329 let list_array = list.to_list().unwrap();
330
331 assert_eq!(list_array.elements_at(0).len(), 3);
332 assert_eq!(list_array.elements_at(1).len(), 0);
333 assert_eq!(list_array.elements_at(2).len(), 3);
334 }
335
336 fn test_extend_builder_gen<O: OffsetPType>() {
337 let list = ListArray::from_iter_opt_slow::<O, _, _>(
338 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
339 Arc::new(I32.into()),
340 )
341 .unwrap();
342
343 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
344
345 builder.extend_from_array(&list).unwrap();
346 builder.extend_from_array(&list).unwrap();
347 builder.extend_from_array(&list.slice(0..0)).unwrap();
348 builder.extend_from_array(&list.slice(1..3)).unwrap();
349
350 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
351 [
352 Some(vec![0, 1, 2]),
353 None,
354 Some(vec![4, 5]),
355 Some(vec![0, 1, 2]),
356 None,
357 Some(vec![4, 5]),
358 None,
359 Some(vec![4, 5]),
360 ],
361 Arc::new(DType::Primitive(I32, NonNullable)),
362 )
363 .unwrap()
364 .to_list()
365 .unwrap();
366
367 let actual = builder
368 .finish()
369 .to_canonical()
370 .unwrap()
371 .into_list()
372 .unwrap();
373
374 assert_eq!(
375 actual.elements().to_primitive().unwrap().as_slice::<i32>(),
376 expected
377 .elements()
378 .to_primitive()
379 .unwrap()
380 .as_slice::<i32>()
381 );
382
383 assert_eq!(
384 actual.offsets().to_primitive().unwrap().as_slice::<O>(),
385 expected.offsets().to_primitive().unwrap().as_slice::<O>()
386 );
387
388 assert_eq!(actual.validity(), expected.validity())
389 }
390
391 #[test]
392 fn test_extend_builder() {
393 test_extend_builder_gen::<i8>();
394 test_extend_builder_gen::<i16>();
395 test_extend_builder_gen::<i32>();
396 test_extend_builder_gen::<i64>();
397
398 test_extend_builder_gen::<u8>();
399 test_extend_builder_gen::<u16>();
400 test_extend_builder_gen::<u32>();
401 test_extend_builder_gen::<u64>();
402 }
403
404 #[test]
405 pub fn test_array_with_gap() {
406 let one_trailing_unused_element = ListArray::try_new(
407 buffer![1, 2, 3, 4].into_array(),
408 buffer![0, 3].into_array(),
409 Validity::NonNullable,
410 )
411 .unwrap();
412
413 let second_array = ListArray::try_new(
414 buffer![5, 6].into_array(),
415 buffer![0, 2].into_array(),
416 Validity::NonNullable,
417 )
418 .unwrap();
419
420 let chunked_list = ChunkedArray::try_new(
421 vec![
422 one_trailing_unused_element.clone().into_array(),
423 second_array.clone().into_array(),
424 ],
425 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
426 );
427
428 let canon_values = chunked_list.unwrap().to_list().unwrap();
429
430 assert_eq!(
431 one_trailing_unused_element.scalar_at(0),
432 canon_values.scalar_at(0)
433 );
434 assert_eq!(second_array.scalar_at(0), canon_values.scalar_at(1));
435 }
436}