1use std::any::Any;
2use std::sync::Arc;
3
4use vortex_dtype::Nullability::NonNullable;
5use vortex_dtype::{DType, NativePType, Nullability};
6use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
7use vortex_scalar::{BinaryNumericOperator, ListScalar};
8
9use crate::arrays::{ConstantArray, ListArray, OffsetPType};
10use crate::builders::lazy_validity_builder::LazyNullBufferBuilder;
11use crate::builders::{ArrayBuilder, ArrayBuilderExt, PrimitiveBuilder, builder_with_capacity};
12use crate::compute::{binary_numeric, slice, try_cast};
13use crate::{Array, ArrayRef};
14
15pub struct ListBuilder<O: NativePType> {
16 value_builder: Box<dyn ArrayBuilder>,
17 index_builder: PrimitiveBuilder<O>,
18 nulls: LazyNullBufferBuilder,
19 nullability: Nullability,
20 dtype: DType,
21}
22
23impl<O: OffsetPType> ListBuilder<O> {
24 pub fn with_capacity(
26 value_dtype: Arc<DType>,
27 nullability: Nullability,
28 capacity: usize,
29 ) -> Self {
30 let value_builder = builder_with_capacity(value_dtype.as_ref(), 2 * capacity);
32 let mut index_builder = PrimitiveBuilder::with_capacity(NonNullable, capacity);
33
34 index_builder.append_zero();
36
37 Self {
38 value_builder,
39 index_builder,
40 nulls: LazyNullBufferBuilder::new(capacity),
41 nullability,
42 dtype: DType::List(value_dtype, nullability),
43 }
44 }
45
46 pub fn append_value(&mut self, value: ListScalar) -> VortexResult<()> {
47 match value.elements() {
48 None => {
49 if self.nullability == NonNullable {
50 vortex_bail!("Cannot append null value to non-nullable list");
51 }
52 self.append_null();
53 Ok(())
54 }
55 Some(elements) => {
56 for scalar in elements {
57 self.value_builder.append_scalar(&scalar)?;
60 }
61 self.nulls.append_non_null();
62 self.append_index(
63 O::from_usize(self.value_builder.len())
64 .vortex_expect("Failed to convert from usize to O"),
65 )
66 }
67 }
68 }
69
70 fn append_index(&mut self, index: O) -> VortexResult<()> {
71 self.index_builder.append_scalar(&index.into())
72 }
73}
74
75impl<O: OffsetPType> ArrayBuilder for ListBuilder<O> {
76 fn as_any(&self) -> &dyn Any {
77 self
78 }
79
80 fn as_any_mut(&mut self) -> &mut dyn Any {
81 self
82 }
83
84 fn dtype(&self) -> &DType {
85 &self.dtype
86 }
87
88 fn len(&self) -> usize {
89 self.nulls.len()
90 }
91
92 fn append_zeros(&mut self, n: usize) {
93 let count = self.value_builder.len();
94 self.value_builder.append_zeros(n);
95 for i in 0..n {
96 self.append_index(
97 O::from_usize(count + i + 1).vortex_expect("Failed to convert from usize to <O>"),
98 )
99 .vortex_expect("Failed to append index");
100 }
101 self.nulls.append_n_non_nulls(n);
102 }
103
104 fn append_nulls(&mut self, n: usize) {
105 let count = self.value_builder.len();
106 for _ in 0..n {
107 self.append_index(
110 O::from_usize(count).vortex_expect("Failed to convert from usize to <O>"),
111 )
112 .vortex_expect("Failed to append index");
113 }
114 self.nulls.append_n_nulls(n);
115 }
116
117 fn extend_from_array(&mut self, array: &dyn Array) -> VortexResult<()> {
118 self.nulls.append_validity_mask(array.validity_mask()?);
119
120 let list = array.to_canonical()?.into_list()?;
121
122 let cursor_usize = self.value_builder.len();
123 let cursor = O::from_usize(cursor_usize).ok_or_else(|| {
124 vortex_err!(
125 "cannot convert length {} to type {:?}",
126 cursor_usize,
127 O::PTYPE
128 )
129 })?;
130
131 let offsets = binary_numeric(
132 &try_cast(
133 &slice(list.offsets(), 1, list.offsets().len())?,
134 &DType::Primitive(O::PTYPE, NonNullable),
135 )?,
136 &ConstantArray::new(cursor, list.len()),
137 BinaryNumericOperator::Add,
138 )?;
139 self.index_builder.extend_from_array(&offsets)?;
140
141 if !list.is_empty() {
142 let last_used_index = self.index_builder.values().last().vortex_expect("there must be at least one index because we just extended a non-zero list of offsets");
143 self.value_builder.extend_from_array(&slice(
144 list.elements(),
145 0,
146 last_used_index.as_() - cursor_usize,
147 )?)?;
148 }
149
150 Ok(())
151 }
152
153 fn finish(&mut self) -> ArrayRef {
154 assert_eq!(
155 self.index_builder.len(),
156 self.nulls.len() + 1,
157 "Indices length must be one more than nulls length."
158 );
159
160 ListArray::try_new(
161 self.value_builder.finish(),
162 self.index_builder.finish(),
163 self.nulls.finish_with_nullability(self.nullability),
164 )
165 .vortex_expect("Buffer, offsets, and validity must have same length.")
166 .into_array()
167 }
168}
169
170#[cfg(test)]
171mod tests {
172 use std::sync::Arc;
173
174 use Nullability::{NonNullable, Nullable};
175 use vortex_buffer::buffer;
176 use vortex_dtype::PType::I32;
177 use vortex_dtype::{DType, Nullability};
178 use vortex_scalar::Scalar;
179
180 use crate::array::Array;
181 use crate::arrays::{ChunkedArray, ListArray, OffsetPType};
182 use crate::builders::ArrayBuilder;
183 use crate::builders::list::ListBuilder;
184 use crate::compute::scalar_at;
185 use crate::validity::Validity;
186 use crate::{IntoArray as _, ToCanonical};
187
188 #[test]
189 fn test_empty() {
190 let mut builder = ListBuilder::<u32>::with_capacity(Arc::new(I32.into()), NonNullable, 0);
191
192 let list = builder.finish();
193 assert_eq!(list.len(), 0);
194 }
195
196 #[test]
197 fn test_values() {
198 let dtype: Arc<DType> = Arc::new(I32.into());
199 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
200
201 builder
202 .append_value(
203 Scalar::list(
204 dtype.clone(),
205 vec![1i32.into(), 2i32.into(), 3i32.into()],
206 NonNullable,
207 )
208 .as_list(),
209 )
210 .unwrap();
211
212 builder
213 .append_value(
214 Scalar::list(
215 dtype,
216 vec![4i32.into(), 5i32.into(), 6i32.into()],
217 NonNullable,
218 )
219 .as_list(),
220 )
221 .unwrap();
222
223 let list = builder.finish();
224 assert_eq!(list.len(), 2);
225
226 let list_array = list.to_list().unwrap();
227
228 assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
229 assert_eq!(list_array.elements_at(1).unwrap().len(), 3);
230 }
231
232 #[test]
233 fn test_non_null_fails() {
234 let dtype: Arc<DType> = Arc::new(I32.into());
235 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), NonNullable, 0);
236
237 assert!(
238 builder
239 .append_value(Scalar::list_empty(dtype, NonNullable).as_list())
240 .is_err()
241 )
242 }
243
244 #[test]
245 fn test_nullable_values() {
246 let dtype: Arc<DType> = Arc::new(I32.into());
247 let mut builder = ListBuilder::<u32>::with_capacity(dtype.clone(), Nullable, 0);
248
249 builder
250 .append_value(
251 Scalar::list(
252 dtype.clone(),
253 vec![1i32.into(), 2i32.into(), 3i32.into()],
254 NonNullable,
255 )
256 .as_list(),
257 )
258 .unwrap();
259
260 builder
261 .append_value(Scalar::list_empty(dtype.clone(), NonNullable).as_list())
262 .unwrap();
263
264 builder
265 .append_value(
266 Scalar::list(
267 dtype,
268 vec![4i32.into(), 5i32.into(), 6i32.into()],
269 NonNullable,
270 )
271 .as_list(),
272 )
273 .unwrap();
274
275 let list = builder.finish();
276 assert_eq!(list.len(), 3);
277
278 let list_array = list.to_list().unwrap();
279
280 assert_eq!(list_array.elements_at(0).unwrap().len(), 3);
281 assert_eq!(list_array.elements_at(1).unwrap().len(), 0);
282 assert_eq!(list_array.elements_at(2).unwrap().len(), 3);
283 }
284
285 fn test_extend_builder_gen<O: OffsetPType>() {
286 let list = ListArray::from_iter_opt_slow::<O, _, _>(
287 [Some(vec![0, 1, 2]), None, Some(vec![4, 5])],
288 Arc::new(I32.into()),
289 )
290 .unwrap();
291
292 let mut builder = ListBuilder::<O>::with_capacity(Arc::new(I32.into()), Nullable, 6);
293
294 builder.extend_from_array(&list).unwrap();
295 builder.extend_from_array(&list).unwrap();
296
297 let expect = ListArray::from_iter_opt_slow::<O, _, _>(
298 [
299 Some(vec![0, 1, 2]),
300 None,
301 Some(vec![4, 5]),
302 Some(vec![0, 1, 2]),
303 None,
304 Some(vec![4, 5]),
305 ],
306 Arc::new(DType::Primitive(I32, NonNullable)),
307 )
308 .unwrap()
309 .to_list()
310 .unwrap();
311
312 let res = builder
313 .finish()
314 .to_canonical()
315 .unwrap()
316 .into_list()
317 .unwrap();
318
319 assert_eq!(
320 res.elements().to_primitive().unwrap().as_slice::<i32>(),
321 expect.elements().to_primitive().unwrap().as_slice::<i32>()
322 );
323
324 assert_eq!(
325 res.offsets().to_primitive().unwrap().as_slice::<O>(),
326 expect.offsets().to_primitive().unwrap().as_slice::<O>()
327 );
328
329 assert_eq!(res.validity(), expect.validity())
330 }
331
332 #[test]
333 fn test_extend_builder() {
334 test_extend_builder_gen::<i8>();
335 test_extend_builder_gen::<i16>();
336 test_extend_builder_gen::<i32>();
337 test_extend_builder_gen::<i64>();
338
339 test_extend_builder_gen::<u8>();
340 test_extend_builder_gen::<u16>();
341 test_extend_builder_gen::<u32>();
342 test_extend_builder_gen::<u64>();
343 }
344
345 #[test]
346 pub fn test_array_with_gap() {
347 let one_trailing_unused_element = ListArray::try_new(
348 buffer![1, 2, 3, 4].into_array(),
349 buffer![0, 3].into_array(),
350 Validity::NonNullable,
351 )
352 .unwrap();
353
354 let second_array = ListArray::try_new(
355 buffer![5, 6].into_array(),
356 buffer![0, 2].into_array(),
357 Validity::NonNullable,
358 )
359 .unwrap();
360
361 let chunked_list = ChunkedArray::try_new(
362 vec![
363 one_trailing_unused_element.clone().into_array(),
364 second_array.clone().into_array(),
365 ],
366 DType::List(Arc::new(DType::Primitive(I32, NonNullable)), NonNullable),
367 );
368
369 let canon_values = chunked_list.unwrap().to_list().unwrap();
370
371 assert_eq!(
372 scalar_at(&one_trailing_unused_element, 0).unwrap(),
373 scalar_at(&canon_values, 0).unwrap()
374 );
375 assert_eq!(
376 scalar_at(&second_array, 0).unwrap(),
377 scalar_at(&canon_values, 1).unwrap()
378 );
379 }
380}