1use std::any::Any;
5use std::sync::Arc;
6
7use vortex_dtype::Nullability::NonNullable;
8use vortex_dtype::{DType, NativePType, Nullability};
9use vortex_error::{VortexExpect, VortexResult, vortex_bail};
10use vortex_mask::Mask;
11use vortex_scalar::ListScalar;
12
13use crate::arrays::{ListArray, OffsetPType};
14use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
15use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
16use crate::compute::{add_scalar, cast, sub_scalar};
17use crate::{Array, ArrayRef, IntoArray, ToCanonical};
18
19pub struct ListBuilder<O: NativePType> {
20 value_builder: Box<dyn ArrayBuilder>,
21 index_builder: PrimitiveBuilder<O>,
22 nulls: LazyNullBufferBuilder,
23 nullability: Nullability,
24 dtype: DType,
25}
26
27impl<O: OffsetPType> ListBuilder<O> {
28 pub fn with_capacity(
36 value_dtype: Arc<DType>,
37 nullability: Nullability,
38 index_capacity: usize,
39 ) -> Self {
40 Self::with_values_and_index_capacity(
42 value_dtype,
43 nullability,
44 2 * index_capacity,
45 index_capacity,
46 )
47 }
48
49 pub fn with_values_and_index_capacity(
55 value_dtype: Arc<DType>,
56 nullability: Nullability,
57 values_capacity: usize,
58 index_capacity: usize,
59 ) -> Self {
60 let value_builder = builder_with_capacity(value_dtype.as_ref(), values_capacity);
61 let mut index_builder = PrimitiveBuilder::with_capacity(NonNullable, index_capacity);
62
63 index_builder.append_zero();
65
66 Self {
67 value_builder,
68 index_builder,
69 nulls: LazyNullBufferBuilder::new(index_capacity),
70 nullability,
71 dtype: DType::List(value_dtype, nullability),
72 }
73 }
74
75 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
76 match value.elements() {
77 None => {
78 if self.nullability == NonNullable {
79 vortex_bail!("Cannot append null value to non-nullable list");
80 }
81 self.append_null();
82 Ok(())
83 }
84 Some(elements) => {
85 for scalar in elements {
86 self.value_builder.append_scalar(&scalar)?;
89 }
90 self.nulls.append_non_null();
91 self.append_index(
92 O::from_usize(self.value_builder.len())
93 .vortex_expect("Failed to convert from usize to O"),
94 )
95 }
96 }
97 }
98
99 fn append_index(&mut self, index: O) -> VortexResult<()> {
100 self.index_builder.append_scalar(&index.into())
101 }
102}
103
104impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
105 fn as_any(&self) -> &dyn Any {
106 self
107 }
108
109 fn as_any_mut(&mut self) -> &mut dyn Any {
110 self
111 }
112
113 fn dtype(&self) -> &DType {
114 &self.dtype
115 }
116
117 fn len(&self) -> usize {
118 self.nulls.len()
119 }
120
121 fn append_zeros(&mut self, n: usize) {
122 let count = self.value_builder.len();
123 self.value_builder.append_zeros(n);
124 for i in 0..n {
125 self.append_index(
126 O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
127 )
128 .vortex_expect("Failed to append index");
129 }
130 self.nulls.append_n_non_nulls(n);
131 }
132
133 fn append_nulls(&mut self, n: usize) {
134 let count = self.value_builder.len();
135 for _ in 0..n {
136 self.append_index(
139 O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
140 )
141 .vortex_expect("Failed to append index");
142 }
143 self.nulls.append_n_nulls(n);
144 }
145
146 fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
147 let list = array.to_list()?;
148 if list.is_empty() {
149 return Ok(());
150 }
151
152 let n_already_added_values = self.value_builder.len();
153 let Some(n_already_added_values) = O::from_usize(n_already_added_values) else {
154 vortex_bail!(
155 "cannot convert length {} to type {:?}",
156 n_already_added_values,
157 O::PTYPE
158 )
159 };
160
161 let offsets = list.offsets();
162 let elements = list.elements();
163
164 let index_dtype = self.index_builder.dtype();
165
166 let n_leading_junk_values_scalar = offsets.scalar_at(0)?.cast(index_dtype)?;
167 let n_leading_junk_values = usize::try_from(&n_leading_junk_values_scalar)?;
168
169 let casted_offsets = cast(&offsets.slice(1, offsets.len())?, index_dtype)?;
170 let offsets_without_leading_junk =
171 sub_scalar(&casted_offsets, n_leading_junk_values_scalar)?;
172 let offsets_into_builder =
173 add_scalar(&offsets_without_leading_junk, n_already_added_values.into())?;
174
175 let last_offset = offsets.scalar_at(offsets.len() - 1)?;
176 let last_offset = usize::try_from(&last_offset)?;
177 let non_junk_values = elements.slice(n_leading_junk_values, last_offset)?;
178
179 self.nulls.append_validity_mask(array.validity_mask()?);
180 self.index_builder
181 .extend_from_array(&offsets_into_builder)?;
182 self.value_builder.ensure_capacity(non_junk_values.len());
183 self.value_builder.extend_from_array(&non_junk_values)?;
184
185 Ok(())
186 }
187
188 fn ensure_capacity(&mut self, capacity: usize) {
189 self.index_builder.ensure_capacity(capacity);
190 self.value_builder.ensure_capacity(capacity);
191 self.nulls.ensure_capacity(capacity);
192 }
193
194 fn set_validity(&mut self, validity: Mask) {
195 self.nulls = LazyNullBufferBuilder::new(validity.len());
196 self.nulls.append_validity_mask(validity);
197 }
198
199 fn finish(&mut self) -> ArrayRef {
200 assert_eq!(
201 self.index_builder.len(),
202 self.nulls.len() + 1,
203 "Indices length must be one more than nulls length."
204 );
205
206 ListArray::try_new(
207 self.value_builder.finish(),
208 self.index_builder.finish(),
209 self.nulls.finish_with_nullability(self.nullability),
210 )
211 .vortex_expect("Buffer, offsets, and validity must have same length.")
212 .into_array()
213 }
214}
215
216#[cfg(test)]
217mod tests {
218 use std::sync::Arc;
219
220 use Nullability::{NonNullable, Nullable};
221 use vortex_buffer::buffer;
222 use vortex_dtype::PType::I32;
223 use vortex_dtype::{DType, Nullability};
224 use vortex_scalar::Scalar;
225
226 use crate::array::Array;
227 use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
228 use crate::builders::ArrayBuilder;
229 use crate::builders::list::ListBuilder;
230 use crate::validity::Validity;
231 use crate::vtable::ValidityHelper;
232 use crate::{IntoArray as _, ToCanonical};
233
234 #[test]
235 fn test_empty() {
236 let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
237
238 let list = builder.finish();
239 assert_eq!(list.len(), 0);
240 }
241
242 #[test]
243 fn test_values() {
244 let dtype: Arc<DType> = Arc::new(I32.into());
245 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
246
247 builder
248 .append_value(
249 Scalar::list(
250 dtype.clone(),
251 vec![1i32.into(), 2i32.into(), 3i32.into()],
252 NonNullable,
253 )
254 .as_list(),
255 )
256 .unwrap();
257
258 builder
259 .append_value(
260 Scalar::list(
261 dtype,
262 vec![4i32.into(), 5i32.into(), 6i32.into()],
263 NonNullable,
264 )
265 .as_list(),
266 )
267 .unwrap();
268
269 let list = builder.finish();
270 assert_eq!(list.len(), 2);
271
272 let list_array = list.to_list().unwrap();
273
274 assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
275 assert_eq!(list_array.elements_at(1).unwrap().len(), 3);
276 }
277
278 #[test]
279 fn test_non_null_fails() {
280 let dtype: Arc<DType> = Arc::new(I32.into());
281 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
282
283 assert!(
284 builder
285 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
286 .is_err()
287 )
288 }
289
290 #[test]
291 fn test_nullable_values() {
292 let dtype: Arc<DType> = Arc::new(I32.into());
293 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
294
295 builder
296 .append_value(
297 Scalar::list(
298 dtype.clone(),
299 vec![1i32.into(), 2i32.into(), 3i32.into()],
300 NonNullable,
301 )
302 .as_list(),
303 )
304 .unwrap();
305
306 builder
307 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
308 .unwrap();
309
310 builder
311 .append_value(
312 Scalar::list(
313 dtype,
314 vec![4i32.into(), 5i32.into(), 6i32.into()],
315 NonNullable,
316 )
317 .as_list(),
318 )
319 .unwrap();
320
321 let list = builder.finish();
322 assert_eq!(list.len(), 3);
323
324 let list_array = list.to_list().unwrap();
325
326 assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
327 assert_eq!(list_array.elements_at(1).unwrap().len(), 0);
328 assert_eq!(list_array.elements_at(2).unwrap().len(), 3);
329 }
330
331 fn test_extend_builder_gen<O: OffsetPType>() {
332 let list = ListArray::from_iter_opt_slow::<O, _, _>(
333 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
334 Arc::new(I32.into()),
335 )
336 .unwrap();
337
338 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
339
340 builder.extend_from_array(&list).unwrap();
341 builder.extend_from_array(&list).unwrap();
342 builder
343 .extend_from_array(&list.slice(0, 0).unwrap())
344 .unwrap();
345 builder
346 .extend_from_array(&list.slice(1, 3).unwrap())
347 .unwrap();
348
349 let expected = ListArray::from_iter_opt_slow::<O, _, _>(
350 [
351 Some(vec![0, 1, 2]),
352 None,
353 Some(vec![4, 5]),
354 Some(vec![0, 1, 2]),
355 None,
356 Some(vec![4, 5]),
357 None,
358 Some(vec![4, 5]),
359 ],
360 Arc::new(DType::Primitive(I32, NonNullable)),
361 )
362 .unwrap()
363 .to_list()
364 .unwrap();
365
366 let actual = builder
367 .finish()
368 .to_canonical()
369 .unwrap()
370 .into_list()
371 .unwrap();
372
373 assert_eq!(
374 actual.elements().to_primitive().unwrap().as_slice::<i32>(),
375 expected
376 .elements()
377 .to_primitive()
378 .unwrap()
379 .as_slice::<i32>()
380 );
381
382 assert_eq!(
383 actual.offsets().to_primitive().unwrap().as_slice::<O>(),
384 expected.offsets().to_primitive().unwrap().as_slice::<O>()
385 );
386
387 assert_eq!(actual.validity(), expected.validity())
388 }
389
390 #[test]
391 fn test_extend_builder() {
392 test_extend_builder_gen::<i8>();
393 test_extend_builder_gen::<i16>();
394 test_extend_builder_gen::<i32>();
395 test_extend_builder_gen::<i64>();
396
397 test_extend_builder_gen::<u8>();
398 test_extend_builder_gen::<u16>();
399 test_extend_builder_gen::<u32>();
400 test_extend_builder_gen::<u64>();
401 }
402
403 #[test]
404 pub fn test_array_with_gap() {
405 let one_trailing_unused_element = ListArray::try_new(
406 buffer![1, 2, 3, 4].into_array(),
407 buffer![0, 3].into_array(),
408 Validity::NonNullable,
409 )
410 .unwrap();
411
412 let second_array = ListArray::try_new(
413 buffer![5, 6].into_array(),
414 buffer![0, 2].into_array(),
415 Validity::NonNullable,
416 )
417 .unwrap();
418
419 let chunked_list = ChunkedArray::try_new(
420 vec![
421 one_trailing_unused_element.clone().into_array(),
422 second_array.clone().into_array(),
423 ],
424 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
425 );
426
427 let canon_values = chunked_list.unwrap().to_list().unwrap();
428
429 assert_eq!(
430 one_trailing_unused_element.scalar_at(0).unwrap(),
431 canon_values.scalar_at(0).unwrap()
432 );
433 assert_eq!(
434 second_array.scalar_at(0).unwrap(),
435 canon_values.scalar_at(1).unwrap()
436 );
437 }
438}