1use std::any::Any;
2use std::sync::Arc;
3
4use vortex_dtype::Nullability::NonNullable;
5use vortex_dtype::{DType, NativePType, Nullability};
6use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
7use vortex_mask::Mask;
8use vortex_scalar::{ListScalar, NumericOperator};
9
10use crate::arrays::{ConstantArray, ListArray, OffsetPType};
11use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
12use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
13use crate::compute::{cast, numeric, slice};
14use crate::{Array, ArrayRef, ToCanonical};
15
16pub struct ListBuilder<O: NativePType> {
17 value_builder: Box<dyn ArrayBuilder>,
18 index_builder: PrimitiveBuilder<O>,
19 nulls: LazyNullBufferBuilder,
20 nullability: Nullability,
21 dtype: DType,
22}
23
24impl<O: OffsetPType> ListBuilder<O> {
25 pub fn with_capacity(
27 value_dtype: Arc<DType>,
28 nullability: Nullability,
29 capacity: usize,
30 ) -> Self {
31 let value_builder = builder_with_capacity(value_dtype.as_ref(), 2 * capacity);
33 let mut index_builder = PrimitiveBuilder::with_capacity(NonNullable, capacity);
34
35 index_builder.append_zero();
37
38 Self {
39 value_builder,
40 index_builder,
41 nulls: LazyNullBufferBuilder::new(capacity),
42 nullability,
43 dtype: DType::List(value_dtype, nullability),
44 }
45 }
46
47 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
48 match value.elements() {
49 None => {
50 if self.nullability == NonNullable {
51 vortex_bail!("Cannot append null value to non-nullable list");
52 }
53 self.append_null();
54 Ok(())
55 }
56 Some(elements) => {
57 for scalar in elements {
58 self.value_builder.append_scalar(&scalar)?;
61 }
62 self.nulls.append_non_null();
63 self.append_index(
64 O::from_usize(self.value_builder.len())
65 .vortex_expect("Failed to convert from usize to O"),
66 )
67 }
68 }
69 }
70
71 fn append_index(&mut self, index: O) -> VortexResult<()> {
72 self.index_builder.append_scalar(&index.into())
73 }
74}
75
76impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
77 fn as_any(&self) -> &dyn Any {
78 self
79 }
80
81 fn as_any_mut(&mut self) -> &mut dyn Any {
82 self
83 }
84
85 fn dtype(&self) -> &DType {
86 &self.dtype
87 }
88
89 fn len(&self) -> usize {
90 self.nulls.len()
91 }
92
93 fn append_zeros(&mut self, n: usize) {
94 let count = self.value_builder.len();
95 self.value_builder.append_zeros(n);
96 for i in 0..n {
97 self.append_index(
98 O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
99 )
100 .vortex_expect("Failed to append index");
101 }
102 self.nulls.append_n_non_nulls(n);
103 }
104
105 fn append_nulls(&mut self, n: usize) {
106 let count = self.value_builder.len();
107 for _ in 0..n {
108 self.append_index(
111 O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
112 )
113 .vortex_expect("Failed to append index");
114 }
115 self.nulls.append_n_nulls(n);
116 }
117
118 fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
119 self.nulls.append_validity_mask(array.validity_mask()?);
120
121 let list = array.to_list()?;
122
123 let cursor_usize = self.value_builder.len();
124 let cursor = O::from_usize(cursor_usize).ok_or_else(|| {
125 vortex_err!(
126 "cannot convert length {} to type {:?}",
127 cursor_usize,
128 O::PTYPE
129 )
130 })?;
131
132 let offsets = numeric(
133 &cast(
134 &slice(list.offsets(), 1, list.offsets().len())?,
135 &DType::Primitive(O::PTYPE, NonNullable),
136 )?,
137 &ConstantArray::new(cursor, list.len()),
138 NumericOperator::Add,
139 )?;
140 self.index_builder.extend_from_array(&offsets)?;
141
142 if !list.is_empty() {
143 let last_used_index = self.index_builder.values().last().vortex_expect("there must be at least one index because we just extended a non-zero list of offsets");
144 let sliced_values = slice(list.elements(), 0, last_used_index.as_() - cursor_usize)?;
145 self.value_builder.ensure_capacity(sliced_values.len());
146 self.value_builder.extend_from_array(&sliced_values)?;
147 }
148
149 Ok(())
150 }
151
152 fn ensure_capacity(&mut self, capacity: usize) {
153 self.index_builder.ensure_capacity(capacity);
154 self.value_builder.ensure_capacity(capacity);
155 self.nulls.ensure_capacity(capacity);
156 }
157
158 fn set_validity(&mut self, validity: Mask) {
159 self.nulls = LazyNullBufferBuilder::new(validity.len());
160 self.nulls.append_validity_mask(validity);
161 }
162
163 fn finish(&mut self) -> ArrayRef {
164 assert_eq!(
165 self.index_builder.len(),
166 self.nulls.len() + 1,
167 "Indices length must be one more than nulls length."
168 );
169
170 ListArray::try_new(
171 self.value_builder.finish(),
172 self.index_builder.finish(),
173 self.nulls.finish_with_nullability(self.nullability),
174 )
175 .vortex_expect("Buffer, offsets, and validity must have same length.")
176 .into_array()
177 }
178}
179
180#[cfg(test)]
181mod tests {
182 use std::sync::Arc;
183
184 use Nullability::{NonNullable, Nullable};
185 use vortex_buffer::buffer;
186 use vortex_dtype::PType::I32;
187 use vortex_dtype::{DType, Nullability};
188 use vortex_scalar::Scalar;
189
190 use crate::array::Array;
191 use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
192 use crate::builders::ArrayBuilder;
193 use crate::builders::list::ListBuilder;
194 use crate::compute::scalar_at;
195 use crate::validity::Validity;
196 use crate::{IntoArray as _, ToCanonical};
197
198 #[test]
199 fn test_empty() {
200 let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
201
202 let list = builder.finish();
203 assert_eq!(list.len(), 0);
204 }
205
206 #[test]
207 fn test_values() {
208 let dtype: Arc<DType> = Arc::new(I32.into());
209 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
210
211 builder
212 .append_value(
213 Scalar::list(
214 dtype.clone(),
215 vec![1i32.into(), 2i32.into(), 3i32.into()],
216 NonNullable,
217 )
218 .as_list(),
219 )
220 .unwrap();
221
222 builder
223 .append_value(
224 Scalar::list(
225 dtype,
226 vec![4i32.into(), 5i32.into(), 6i32.into()],
227 NonNullable,
228 )
229 .as_list(),
230 )
231 .unwrap();
232
233 let list = builder.finish();
234 assert_eq!(list.len(), 2);
235
236 let list_array = list.to_list().unwrap();
237
238 assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
239 assert_eq!(list_array.elements_at(1).unwrap().len(), 3);
240 }
241
242 #[test]
243 fn test_non_null_fails() {
244 let dtype: Arc<DType> = Arc::new(I32.into());
245 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
246
247 assert!(
248 builder
249 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
250 .is_err()
251 )
252 }
253
254 #[test]
255 fn test_nullable_values() {
256 let dtype: Arc<DType> = Arc::new(I32.into());
257 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
258
259 builder
260 .append_value(
261 Scalar::list(
262 dtype.clone(),
263 vec![1i32.into(), 2i32.into(), 3i32.into()],
264 NonNullable,
265 )
266 .as_list(),
267 )
268 .unwrap();
269
270 builder
271 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
272 .unwrap();
273
274 builder
275 .append_value(
276 Scalar::list(
277 dtype,
278 vec![4i32.into(), 5i32.into(), 6i32.into()],
279 NonNullable,
280 )
281 .as_list(),
282 )
283 .unwrap();
284
285 let list = builder.finish();
286 assert_eq!(list.len(), 3);
287
288 let list_array = list.to_list().unwrap();
289
290 assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
291 assert_eq!(list_array.elements_at(1).unwrap().len(), 0);
292 assert_eq!(list_array.elements_at(2).unwrap().len(), 3);
293 }
294
295 fn test_extend_builder_gen<O: OffsetPType>() {
296 let list = ListArray::from_iter_opt_slow::<O, _, _>(
297 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
298 Arc::new(I32.into()),
299 )
300 .unwrap();
301
302 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
303
304 builder.extend_from_array(&list).unwrap();
305 builder.extend_from_array(&list).unwrap();
306
307 let expect = ListArray::from_iter_opt_slow::<O, _, _>(
308 [
309 Some(vec![0, 1, 2]),
310 None,
311 Some(vec![4, 5]),
312 Some(vec![0, 1, 2]),
313 None,
314 Some(vec![4, 5]),
315 ],
316 Arc::new(DType::Primitive(I32, NonNullable)),
317 )
318 .unwrap()
319 .to_list()
320 .unwrap();
321
322 let res = builder
323 .finish()
324 .to_canonical()
325 .unwrap()
326 .into_list()
327 .unwrap();
328
329 assert_eq!(
330 res.elements().to_primitive().unwrap().as_slice::<i32>(),
331 expect.elements().to_primitive().unwrap().as_slice::<i32>()
332 );
333
334 assert_eq!(
335 res.offsets().to_primitive().unwrap().as_slice::<O>(),
336 expect.offsets().to_primitive().unwrap().as_slice::<O>()
337 );
338
339 assert_eq!(res.validity(), expect.validity())
340 }
341
342 #[test]
343 fn test_extend_builder() {
344 test_extend_builder_gen::<i8>();
345 test_extend_builder_gen::<i16>();
346 test_extend_builder_gen::<i32>();
347 test_extend_builder_gen::<i64>();
348
349 test_extend_builder_gen::<u8>();
350 test_extend_builder_gen::<u16>();
351 test_extend_builder_gen::<u32>();
352 test_extend_builder_gen::<u64>();
353 }
354
355 #[test]
356 pub fn test_array_with_gap() {
357 let one_trailing_unused_element = ListArray::try_new(
358 buffer![1, 2, 3, 4].into_array(),
359 buffer![0, 3].into_array(),
360 Validity::NonNullable,
361 )
362 .unwrap();
363
364 let second_array = ListArray::try_new(
365 buffer![5, 6].into_array(),
366 buffer![0, 2].into_array(),
367 Validity::NonNullable,
368 )
369 .unwrap();
370
371 let chunked_list = ChunkedArray::try_new(
372 vec![
373 one_trailing_unused_element.clone().into_array(),
374 second_array.clone().into_array(),
375 ],
376 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
377 );
378
379 let canon_values = chunked_list.unwrap().to_list().unwrap();
380
381 assert_eq!(
382 scalar_at(&one_trailing_unused_element, 0).unwrap(),
383 scalar_at(&canon_values, 0).unwrap()
384 );
385 assert_eq!(
386 scalar_at(&second_array, 0).unwrap(),
387 scalar_at(&canon_values, 1).unwrap()
388 );
389 }
390}